diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,298888 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 42694, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 4.622729721679287, + "learning_rate": 3.9032006245121e-09, + "loss": 0.5483, + "step": 1 + }, + { + "epoch": 0.0, + "grad_norm": 4.9874238212397355, + "learning_rate": 7.8064012490242e-09, + "loss": 0.54, + "step": 2 + }, + { + "epoch": 0.0, + "grad_norm": 4.5453329237395454, + "learning_rate": 1.1709601873536301e-08, + "loss": 0.51, + "step": 3 + }, + { + "epoch": 0.0, + "grad_norm": 4.771523459488045, + "learning_rate": 1.56128024980484e-08, + "loss": 0.5275, + "step": 4 + }, + { + "epoch": 0.0, + "grad_norm": 4.938423447703485, + "learning_rate": 1.95160031225605e-08, + "loss": 0.5251, + "step": 5 + }, + { + "epoch": 0.0, + "grad_norm": 4.746415280407646, + "learning_rate": 2.3419203747072602e-08, + "loss": 0.5373, + "step": 6 + }, + { + "epoch": 0.0, + "grad_norm": 5.103974884798016, + "learning_rate": 2.7322404371584703e-08, + "loss": 0.5306, + "step": 7 + }, + { + "epoch": 0.0, + "grad_norm": 4.8332911295043655, + "learning_rate": 3.12256049960968e-08, + "loss": 0.5391, + "step": 8 + }, + { + "epoch": 0.0, + "grad_norm": 4.914533251816914, + "learning_rate": 3.51288056206089e-08, + "loss": 0.5423, + "step": 9 + }, + { + "epoch": 0.0, + "grad_norm": 4.942202520877101, + "learning_rate": 3.9032006245121e-08, + "loss": 0.5286, + "step": 10 + }, + { + "epoch": 0.0, + "grad_norm": 4.672023119250784, + "learning_rate": 4.293520686963311e-08, + "loss": 0.5161, + "step": 11 + }, + { + "epoch": 0.0, + "grad_norm": 4.956795172054546, + "learning_rate": 4.6838407494145204e-08, + "loss": 0.5351, + "step": 12 + }, + { + "epoch": 0.0, + "grad_norm": 5.227337187609141, + "learning_rate": 5.0741608118657305e-08, + "loss": 0.5514, + "step": 13 + }, + { + "epoch": 0.0, + "grad_norm": 4.472700621071287, + "learning_rate": 5.4644808743169406e-08, + "loss": 0.513, + "step": 14 + }, + { + "epoch": 0.0, + "grad_norm": 4.576174860666567, + "learning_rate": 5.8548009367681506e-08, + "loss": 0.5277, + "step": 15 + }, + { + "epoch": 0.0, + "grad_norm": 4.910034497586738, + "learning_rate": 6.24512099921936e-08, + "loss": 0.5313, + "step": 16 + }, + { + "epoch": 0.0, + "grad_norm": 4.785311525245733, + "learning_rate": 6.635441061670571e-08, + "loss": 0.5216, + "step": 17 + }, + { + "epoch": 0.0, + "grad_norm": 4.474570928291394, + "learning_rate": 7.02576112412178e-08, + "loss": 0.509, + "step": 18 + }, + { + "epoch": 0.0, + "grad_norm": 4.869580247707969, + "learning_rate": 7.41608118657299e-08, + "loss": 0.5193, + "step": 19 + }, + { + "epoch": 0.0, + "grad_norm": 4.8940671477221045, + "learning_rate": 7.8064012490242e-08, + "loss": 0.5437, + "step": 20 + }, + { + "epoch": 0.0, + "grad_norm": 5.162550605877126, + "learning_rate": 8.19672131147541e-08, + "loss": 0.5574, + "step": 21 + }, + { + "epoch": 0.0, + "grad_norm": 5.0781329772719745, + "learning_rate": 8.587041373926622e-08, + "loss": 0.5311, + "step": 22 + }, + { + "epoch": 0.0, + "grad_norm": 4.802466955440946, + "learning_rate": 8.977361436377831e-08, + "loss": 0.5025, + "step": 23 + }, + { + "epoch": 0.0, + "grad_norm": 4.757025990013422, + "learning_rate": 9.367681498829041e-08, + "loss": 0.5397, + "step": 24 + }, + { + "epoch": 0.0, + "grad_norm": 4.580769103038741, + "learning_rate": 9.758001561280251e-08, + "loss": 0.556, + "step": 25 + }, + { + "epoch": 0.0, + "grad_norm": 5.063035505775675, + "learning_rate": 1.0148321623731461e-07, + "loss": 0.5073, + "step": 26 + }, + { + "epoch": 0.0, + "grad_norm": 4.393285113859669, + "learning_rate": 1.053864168618267e-07, + "loss": 0.5452, + "step": 27 + }, + { + "epoch": 0.0, + "grad_norm": 4.871040167590817, + "learning_rate": 1.0928961748633881e-07, + "loss": 0.5523, + "step": 28 + }, + { + "epoch": 0.0, + "grad_norm": 4.916487334487691, + "learning_rate": 1.1319281811085091e-07, + "loss": 0.5527, + "step": 29 + }, + { + "epoch": 0.0, + "grad_norm": 4.681130170031395, + "learning_rate": 1.1709601873536301e-07, + "loss": 0.5347, + "step": 30 + }, + { + "epoch": 0.0, + "grad_norm": 4.558809227525443, + "learning_rate": 1.2099921935987511e-07, + "loss": 0.5273, + "step": 31 + }, + { + "epoch": 0.0, + "grad_norm": 4.625041196215798, + "learning_rate": 1.249024199843872e-07, + "loss": 0.5429, + "step": 32 + }, + { + "epoch": 0.0, + "grad_norm": 4.416095401198277, + "learning_rate": 1.2880562060889932e-07, + "loss": 0.5248, + "step": 33 + }, + { + "epoch": 0.0, + "grad_norm": 4.603944493910623, + "learning_rate": 1.3270882123341143e-07, + "loss": 0.541, + "step": 34 + }, + { + "epoch": 0.0, + "grad_norm": 4.3215544422628405, + "learning_rate": 1.366120218579235e-07, + "loss": 0.5215, + "step": 35 + }, + { + "epoch": 0.0, + "grad_norm": 4.4355540740489605, + "learning_rate": 1.405152224824356e-07, + "loss": 0.5228, + "step": 36 + }, + { + "epoch": 0.0, + "grad_norm": 4.156788742243626, + "learning_rate": 1.4441842310694772e-07, + "loss": 0.5077, + "step": 37 + }, + { + "epoch": 0.0, + "grad_norm": 4.323971066639269, + "learning_rate": 1.483216237314598e-07, + "loss": 0.5298, + "step": 38 + }, + { + "epoch": 0.0, + "grad_norm": 4.035424996763108, + "learning_rate": 1.522248243559719e-07, + "loss": 0.4931, + "step": 39 + }, + { + "epoch": 0.0, + "grad_norm": 4.21943051677546, + "learning_rate": 1.56128024980484e-07, + "loss": 0.5398, + "step": 40 + }, + { + "epoch": 0.0, + "grad_norm": 4.222509976155011, + "learning_rate": 1.6003122560499612e-07, + "loss": 0.523, + "step": 41 + }, + { + "epoch": 0.0, + "grad_norm": 3.916317451405703, + "learning_rate": 1.639344262295082e-07, + "loss": 0.5433, + "step": 42 + }, + { + "epoch": 0.0, + "grad_norm": 4.0196983308015914, + "learning_rate": 1.6783762685402032e-07, + "loss": 0.5163, + "step": 43 + }, + { + "epoch": 0.0, + "grad_norm": 4.294682649688215, + "learning_rate": 1.7174082747853244e-07, + "loss": 0.5211, + "step": 44 + }, + { + "epoch": 0.0, + "grad_norm": 4.15139626010468, + "learning_rate": 1.756440281030445e-07, + "loss": 0.5184, + "step": 45 + }, + { + "epoch": 0.0, + "grad_norm": 3.726393229513805, + "learning_rate": 1.7954722872755661e-07, + "loss": 0.505, + "step": 46 + }, + { + "epoch": 0.0, + "grad_norm": 3.37925548303896, + "learning_rate": 1.834504293520687e-07, + "loss": 0.4903, + "step": 47 + }, + { + "epoch": 0.0, + "grad_norm": 2.8975201529597565, + "learning_rate": 1.8735362997658082e-07, + "loss": 0.4749, + "step": 48 + }, + { + "epoch": 0.0, + "grad_norm": 2.996512187875095, + "learning_rate": 1.912568306010929e-07, + "loss": 0.4784, + "step": 49 + }, + { + "epoch": 0.0, + "grad_norm": 2.7024299768318, + "learning_rate": 1.9516003122560502e-07, + "loss": 0.4745, + "step": 50 + }, + { + "epoch": 0.0, + "grad_norm": 3.048661356849958, + "learning_rate": 1.9906323185011713e-07, + "loss": 0.4738, + "step": 51 + }, + { + "epoch": 0.0, + "grad_norm": 2.6938356320245602, + "learning_rate": 2.0296643247462922e-07, + "loss": 0.4738, + "step": 52 + }, + { + "epoch": 0.0, + "grad_norm": 2.857796796791975, + "learning_rate": 2.068696330991413e-07, + "loss": 0.5026, + "step": 53 + }, + { + "epoch": 0.0, + "grad_norm": 2.7440602131187415, + "learning_rate": 2.107728337236534e-07, + "loss": 0.4981, + "step": 54 + }, + { + "epoch": 0.0, + "grad_norm": 2.6069629510706958, + "learning_rate": 2.146760343481655e-07, + "loss": 0.4931, + "step": 55 + }, + { + "epoch": 0.0, + "grad_norm": 2.593446222285576, + "learning_rate": 2.1857923497267762e-07, + "loss": 0.4815, + "step": 56 + }, + { + "epoch": 0.0, + "grad_norm": 2.425235101214889, + "learning_rate": 2.224824355971897e-07, + "loss": 0.4639, + "step": 57 + }, + { + "epoch": 0.0, + "grad_norm": 2.4653887582549388, + "learning_rate": 2.2638563622170182e-07, + "loss": 0.4747, + "step": 58 + }, + { + "epoch": 0.0, + "grad_norm": 2.1512577978720966, + "learning_rate": 2.3028883684621394e-07, + "loss": 0.4764, + "step": 59 + }, + { + "epoch": 0.0, + "grad_norm": 2.389804994952366, + "learning_rate": 2.3419203747072603e-07, + "loss": 0.4531, + "step": 60 + }, + { + "epoch": 0.0, + "grad_norm": 2.3966539116445094, + "learning_rate": 2.3809523809523811e-07, + "loss": 0.4674, + "step": 61 + }, + { + "epoch": 0.0, + "grad_norm": 2.293633641919212, + "learning_rate": 2.4199843871975023e-07, + "loss": 0.4779, + "step": 62 + }, + { + "epoch": 0.0, + "grad_norm": 1.9282054190146631, + "learning_rate": 2.459016393442623e-07, + "loss": 0.4524, + "step": 63 + }, + { + "epoch": 0.0, + "grad_norm": 1.7125508870682322, + "learning_rate": 2.498048399687744e-07, + "loss": 0.4179, + "step": 64 + }, + { + "epoch": 0.0, + "grad_norm": 1.7155722113406209, + "learning_rate": 2.5370804059328654e-07, + "loss": 0.4733, + "step": 65 + }, + { + "epoch": 0.0, + "grad_norm": 1.579095880980918, + "learning_rate": 2.5761124121779863e-07, + "loss": 0.4784, + "step": 66 + }, + { + "epoch": 0.0, + "grad_norm": 1.5536282684022076, + "learning_rate": 2.615144418423107e-07, + "loss": 0.4269, + "step": 67 + }, + { + "epoch": 0.0, + "grad_norm": 1.5235193645160208, + "learning_rate": 2.6541764246682286e-07, + "loss": 0.4388, + "step": 68 + }, + { + "epoch": 0.0, + "grad_norm": 1.4955293726691672, + "learning_rate": 2.693208430913349e-07, + "loss": 0.4523, + "step": 69 + }, + { + "epoch": 0.0, + "grad_norm": 1.3792296221118685, + "learning_rate": 2.73224043715847e-07, + "loss": 0.4584, + "step": 70 + }, + { + "epoch": 0.0, + "grad_norm": 1.342382803840417, + "learning_rate": 2.771272443403591e-07, + "loss": 0.4351, + "step": 71 + }, + { + "epoch": 0.0, + "grad_norm": 1.2782325828524457, + "learning_rate": 2.810304449648712e-07, + "loss": 0.4525, + "step": 72 + }, + { + "epoch": 0.0, + "grad_norm": 1.1864441639263879, + "learning_rate": 2.849336455893833e-07, + "loss": 0.4271, + "step": 73 + }, + { + "epoch": 0.0, + "grad_norm": 1.0900708651070972, + "learning_rate": 2.8883684621389544e-07, + "loss": 0.4387, + "step": 74 + }, + { + "epoch": 0.0, + "grad_norm": 1.1571683464380191, + "learning_rate": 2.927400468384075e-07, + "loss": 0.4029, + "step": 75 + }, + { + "epoch": 0.0, + "grad_norm": 1.0543627668256164, + "learning_rate": 2.966432474629196e-07, + "loss": 0.4398, + "step": 76 + }, + { + "epoch": 0.0, + "grad_norm": 1.3596442930379864, + "learning_rate": 3.005464480874317e-07, + "loss": 0.4323, + "step": 77 + }, + { + "epoch": 0.0, + "grad_norm": 1.058102853758717, + "learning_rate": 3.044496487119438e-07, + "loss": 0.4313, + "step": 78 + }, + { + "epoch": 0.0, + "grad_norm": 1.1093218857992502, + "learning_rate": 3.0835284933645593e-07, + "loss": 0.4474, + "step": 79 + }, + { + "epoch": 0.0, + "grad_norm": 1.12681798118543, + "learning_rate": 3.12256049960968e-07, + "loss": 0.4371, + "step": 80 + }, + { + "epoch": 0.0, + "grad_norm": 1.2531884097984334, + "learning_rate": 3.161592505854801e-07, + "loss": 0.4499, + "step": 81 + }, + { + "epoch": 0.0, + "grad_norm": 1.2258256056301933, + "learning_rate": 3.2006245120999224e-07, + "loss": 0.4222, + "step": 82 + }, + { + "epoch": 0.0, + "grad_norm": 1.195532558331112, + "learning_rate": 3.2396565183450433e-07, + "loss": 0.4466, + "step": 83 + }, + { + "epoch": 0.0, + "grad_norm": 1.0892772233242887, + "learning_rate": 3.278688524590164e-07, + "loss": 0.4272, + "step": 84 + }, + { + "epoch": 0.0, + "grad_norm": 1.0183091953336865, + "learning_rate": 3.3177205308352856e-07, + "loss": 0.4326, + "step": 85 + }, + { + "epoch": 0.0, + "grad_norm": 0.9809369017337862, + "learning_rate": 3.3567525370804065e-07, + "loss": 0.4127, + "step": 86 + }, + { + "epoch": 0.0, + "grad_norm": 1.1099685917276574, + "learning_rate": 3.3957845433255274e-07, + "loss": 0.4194, + "step": 87 + }, + { + "epoch": 0.0, + "grad_norm": 0.9832506757005745, + "learning_rate": 3.434816549570649e-07, + "loss": 0.431, + "step": 88 + }, + { + "epoch": 0.0, + "grad_norm": 0.9310636074872207, + "learning_rate": 3.473848555815769e-07, + "loss": 0.4062, + "step": 89 + }, + { + "epoch": 0.0, + "grad_norm": 0.9390862304596608, + "learning_rate": 3.51288056206089e-07, + "loss": 0.4176, + "step": 90 + }, + { + "epoch": 0.0, + "grad_norm": 1.0151679977414008, + "learning_rate": 3.551912568306011e-07, + "loss": 0.4021, + "step": 91 + }, + { + "epoch": 0.0, + "grad_norm": 0.9825107320866411, + "learning_rate": 3.5909445745511323e-07, + "loss": 0.4234, + "step": 92 + }, + { + "epoch": 0.0, + "grad_norm": 0.9484229899033223, + "learning_rate": 3.629976580796253e-07, + "loss": 0.4249, + "step": 93 + }, + { + "epoch": 0.0, + "grad_norm": 0.8835717883353453, + "learning_rate": 3.669008587041374e-07, + "loss": 0.41, + "step": 94 + }, + { + "epoch": 0.0, + "grad_norm": 0.9322549084740902, + "learning_rate": 3.7080405932864954e-07, + "loss": 0.4173, + "step": 95 + }, + { + "epoch": 0.0, + "grad_norm": 1.0184301857541327, + "learning_rate": 3.7470725995316163e-07, + "loss": 0.4179, + "step": 96 + }, + { + "epoch": 0.0, + "grad_norm": 0.871678964881241, + "learning_rate": 3.786104605776737e-07, + "loss": 0.4024, + "step": 97 + }, + { + "epoch": 0.0, + "grad_norm": 0.9079726857593808, + "learning_rate": 3.825136612021858e-07, + "loss": 0.41, + "step": 98 + }, + { + "epoch": 0.0, + "grad_norm": 0.9137816825133326, + "learning_rate": 3.8641686182669795e-07, + "loss": 0.3979, + "step": 99 + }, + { + "epoch": 0.0, + "grad_norm": 0.8605166206910289, + "learning_rate": 3.9032006245121003e-07, + "loss": 0.4048, + "step": 100 + }, + { + "epoch": 0.0, + "grad_norm": 0.8649526568124305, + "learning_rate": 3.942232630757221e-07, + "loss": 0.3991, + "step": 101 + }, + { + "epoch": 0.0, + "grad_norm": 0.8429804711556066, + "learning_rate": 3.9812646370023426e-07, + "loss": 0.392, + "step": 102 + }, + { + "epoch": 0.0, + "grad_norm": 0.9071801246586166, + "learning_rate": 4.0202966432474635e-07, + "loss": 0.4182, + "step": 103 + }, + { + "epoch": 0.0, + "grad_norm": 0.8653628602837699, + "learning_rate": 4.0593286494925844e-07, + "loss": 0.431, + "step": 104 + }, + { + "epoch": 0.0, + "grad_norm": 0.9057315997795852, + "learning_rate": 4.0983606557377047e-07, + "loss": 0.4221, + "step": 105 + }, + { + "epoch": 0.0, + "grad_norm": 0.8715118742847457, + "learning_rate": 4.137392661982826e-07, + "loss": 0.4202, + "step": 106 + }, + { + "epoch": 0.01, + "grad_norm": 0.9256478701405499, + "learning_rate": 4.176424668227947e-07, + "loss": 0.4147, + "step": 107 + }, + { + "epoch": 0.01, + "grad_norm": 0.7822126523481772, + "learning_rate": 4.215456674473068e-07, + "loss": 0.3861, + "step": 108 + }, + { + "epoch": 0.01, + "grad_norm": 0.8693223307081748, + "learning_rate": 4.2544886807181893e-07, + "loss": 0.4065, + "step": 109 + }, + { + "epoch": 0.01, + "grad_norm": 0.8697349675321939, + "learning_rate": 4.29352068696331e-07, + "loss": 0.4016, + "step": 110 + }, + { + "epoch": 0.01, + "grad_norm": 0.8945889816344377, + "learning_rate": 4.332552693208431e-07, + "loss": 0.4132, + "step": 111 + }, + { + "epoch": 0.01, + "grad_norm": 0.8526016397346817, + "learning_rate": 4.3715846994535524e-07, + "loss": 0.4008, + "step": 112 + }, + { + "epoch": 0.01, + "grad_norm": 0.8691083507125095, + "learning_rate": 4.4106167056986733e-07, + "loss": 0.4161, + "step": 113 + }, + { + "epoch": 0.01, + "grad_norm": 0.8107423805281441, + "learning_rate": 4.449648711943794e-07, + "loss": 0.4247, + "step": 114 + }, + { + "epoch": 0.01, + "grad_norm": 0.8793459657506867, + "learning_rate": 4.4886807181889156e-07, + "loss": 0.3904, + "step": 115 + }, + { + "epoch": 0.01, + "grad_norm": 0.9299864241047985, + "learning_rate": 4.5277127244340365e-07, + "loss": 0.436, + "step": 116 + }, + { + "epoch": 0.01, + "grad_norm": 0.8725264489737143, + "learning_rate": 4.5667447306791574e-07, + "loss": 0.384, + "step": 117 + }, + { + "epoch": 0.01, + "grad_norm": 0.880884707772859, + "learning_rate": 4.605776736924279e-07, + "loss": 0.4262, + "step": 118 + }, + { + "epoch": 0.01, + "grad_norm": 0.8026771754707254, + "learning_rate": 4.6448087431693996e-07, + "loss": 0.3692, + "step": 119 + }, + { + "epoch": 0.01, + "grad_norm": 0.8015380206629927, + "learning_rate": 4.6838407494145205e-07, + "loss": 0.3854, + "step": 120 + }, + { + "epoch": 0.01, + "grad_norm": 0.8638432623568536, + "learning_rate": 4.722872755659641e-07, + "loss": 0.4059, + "step": 121 + }, + { + "epoch": 0.01, + "grad_norm": 0.8561786878973998, + "learning_rate": 4.7619047619047623e-07, + "loss": 0.3854, + "step": 122 + }, + { + "epoch": 0.01, + "grad_norm": 0.8434541279647906, + "learning_rate": 4.800936768149883e-07, + "loss": 0.3985, + "step": 123 + }, + { + "epoch": 0.01, + "grad_norm": 0.8010029303324552, + "learning_rate": 4.839968774395005e-07, + "loss": 0.3955, + "step": 124 + }, + { + "epoch": 0.01, + "grad_norm": 0.8575199239511408, + "learning_rate": 4.879000780640125e-07, + "loss": 0.4276, + "step": 125 + }, + { + "epoch": 0.01, + "grad_norm": 0.9333959449873935, + "learning_rate": 4.918032786885246e-07, + "loss": 0.3787, + "step": 126 + }, + { + "epoch": 0.01, + "grad_norm": 0.8199887116385266, + "learning_rate": 4.957064793130368e-07, + "loss": 0.4148, + "step": 127 + }, + { + "epoch": 0.01, + "grad_norm": 0.8189211916310329, + "learning_rate": 4.996096799375488e-07, + "loss": 0.3949, + "step": 128 + }, + { + "epoch": 0.01, + "grad_norm": 0.8682936020259665, + "learning_rate": 5.035128805620609e-07, + "loss": 0.4029, + "step": 129 + }, + { + "epoch": 0.01, + "grad_norm": 0.850867465194446, + "learning_rate": 5.074160811865731e-07, + "loss": 0.3685, + "step": 130 + }, + { + "epoch": 0.01, + "grad_norm": 0.8460062322640085, + "learning_rate": 5.113192818110851e-07, + "loss": 0.3885, + "step": 131 + }, + { + "epoch": 0.01, + "grad_norm": 0.9334319433179203, + "learning_rate": 5.152224824355973e-07, + "loss": 0.4033, + "step": 132 + }, + { + "epoch": 0.01, + "grad_norm": 0.8798267253048739, + "learning_rate": 5.191256830601094e-07, + "loss": 0.4045, + "step": 133 + }, + { + "epoch": 0.01, + "grad_norm": 0.8595835375977054, + "learning_rate": 5.230288836846214e-07, + "loss": 0.4199, + "step": 134 + }, + { + "epoch": 0.01, + "grad_norm": 0.8094193552143988, + "learning_rate": 5.269320843091336e-07, + "loss": 0.3813, + "step": 135 + }, + { + "epoch": 0.01, + "grad_norm": 0.7766841480628787, + "learning_rate": 5.308352849336457e-07, + "loss": 0.3728, + "step": 136 + }, + { + "epoch": 0.01, + "grad_norm": 0.8863757481993635, + "learning_rate": 5.347384855581578e-07, + "loss": 0.4106, + "step": 137 + }, + { + "epoch": 0.01, + "grad_norm": 0.8394808241117865, + "learning_rate": 5.386416861826698e-07, + "loss": 0.3937, + "step": 138 + }, + { + "epoch": 0.01, + "grad_norm": 0.8704725239537591, + "learning_rate": 5.425448868071819e-07, + "loss": 0.4015, + "step": 139 + }, + { + "epoch": 0.01, + "grad_norm": 0.7665424854876717, + "learning_rate": 5.46448087431694e-07, + "loss": 0.3842, + "step": 140 + }, + { + "epoch": 0.01, + "grad_norm": 0.8290444464877001, + "learning_rate": 5.503512880562061e-07, + "loss": 0.3934, + "step": 141 + }, + { + "epoch": 0.01, + "grad_norm": 0.8390971274603042, + "learning_rate": 5.542544886807182e-07, + "loss": 0.3839, + "step": 142 + }, + { + "epoch": 0.01, + "grad_norm": 0.9273351295576844, + "learning_rate": 5.581576893052303e-07, + "loss": 0.3563, + "step": 143 + }, + { + "epoch": 0.01, + "grad_norm": 0.8232430355620092, + "learning_rate": 5.620608899297424e-07, + "loss": 0.3908, + "step": 144 + }, + { + "epoch": 0.01, + "grad_norm": 0.8828034707607485, + "learning_rate": 5.659640905542546e-07, + "loss": 0.3966, + "step": 145 + }, + { + "epoch": 0.01, + "grad_norm": 0.8837715130966162, + "learning_rate": 5.698672911787666e-07, + "loss": 0.3705, + "step": 146 + }, + { + "epoch": 0.01, + "grad_norm": 0.8157209537631375, + "learning_rate": 5.737704918032787e-07, + "loss": 0.3789, + "step": 147 + }, + { + "epoch": 0.01, + "grad_norm": 0.8402247679335287, + "learning_rate": 5.776736924277909e-07, + "loss": 0.384, + "step": 148 + }, + { + "epoch": 0.01, + "grad_norm": 0.7965742448405706, + "learning_rate": 5.815768930523029e-07, + "loss": 0.3724, + "step": 149 + }, + { + "epoch": 0.01, + "grad_norm": 0.9350150267765636, + "learning_rate": 5.85480093676815e-07, + "loss": 0.3886, + "step": 150 + }, + { + "epoch": 0.01, + "grad_norm": 0.8629841246695686, + "learning_rate": 5.893832943013272e-07, + "loss": 0.3886, + "step": 151 + }, + { + "epoch": 0.01, + "grad_norm": 0.8355352522588186, + "learning_rate": 5.932864949258392e-07, + "loss": 0.4003, + "step": 152 + }, + { + "epoch": 0.01, + "grad_norm": 0.8475807531624556, + "learning_rate": 5.971896955503513e-07, + "loss": 0.3891, + "step": 153 + }, + { + "epoch": 0.01, + "grad_norm": 0.8430332622455138, + "learning_rate": 6.010928961748634e-07, + "loss": 0.3811, + "step": 154 + }, + { + "epoch": 0.01, + "grad_norm": 0.8564816093873223, + "learning_rate": 6.049960967993755e-07, + "loss": 0.3828, + "step": 155 + }, + { + "epoch": 0.01, + "grad_norm": 0.8315223811544212, + "learning_rate": 6.088992974238876e-07, + "loss": 0.3739, + "step": 156 + }, + { + "epoch": 0.01, + "grad_norm": 0.8654116410134031, + "learning_rate": 6.128024980483997e-07, + "loss": 0.3954, + "step": 157 + }, + { + "epoch": 0.01, + "grad_norm": 0.8406165949837806, + "learning_rate": 6.167056986729119e-07, + "loss": 0.3828, + "step": 158 + }, + { + "epoch": 0.01, + "grad_norm": 0.8546330898187493, + "learning_rate": 6.206088992974239e-07, + "loss": 0.3869, + "step": 159 + }, + { + "epoch": 0.01, + "grad_norm": 1.0049134609008348, + "learning_rate": 6.24512099921936e-07, + "loss": 0.4003, + "step": 160 + }, + { + "epoch": 0.01, + "grad_norm": 1.0098992238779199, + "learning_rate": 6.284153005464482e-07, + "loss": 0.3772, + "step": 161 + }, + { + "epoch": 0.01, + "grad_norm": 0.8537267996296484, + "learning_rate": 6.323185011709602e-07, + "loss": 0.3869, + "step": 162 + }, + { + "epoch": 0.01, + "grad_norm": 1.0770995434944979, + "learning_rate": 6.362217017954723e-07, + "loss": 0.3699, + "step": 163 + }, + { + "epoch": 0.01, + "grad_norm": 0.9895019796252554, + "learning_rate": 6.401249024199845e-07, + "loss": 0.3697, + "step": 164 + }, + { + "epoch": 0.01, + "grad_norm": 0.8398164417005768, + "learning_rate": 6.440281030444965e-07, + "loss": 0.3744, + "step": 165 + }, + { + "epoch": 0.01, + "grad_norm": 1.0164297114139453, + "learning_rate": 6.479313036690087e-07, + "loss": 0.3899, + "step": 166 + }, + { + "epoch": 0.01, + "grad_norm": 0.8739587550577989, + "learning_rate": 6.518345042935208e-07, + "loss": 0.3801, + "step": 167 + }, + { + "epoch": 0.01, + "grad_norm": 0.8620478437871912, + "learning_rate": 6.557377049180328e-07, + "loss": 0.4036, + "step": 168 + }, + { + "epoch": 0.01, + "grad_norm": 0.8733728650221982, + "learning_rate": 6.59640905542545e-07, + "loss": 0.3754, + "step": 169 + }, + { + "epoch": 0.01, + "grad_norm": 0.8513921113064464, + "learning_rate": 6.635441061670571e-07, + "loss": 0.3728, + "step": 170 + }, + { + "epoch": 0.01, + "grad_norm": 0.8044710772963816, + "learning_rate": 6.674473067915692e-07, + "loss": 0.3674, + "step": 171 + }, + { + "epoch": 0.01, + "grad_norm": 0.8642165359896761, + "learning_rate": 6.713505074160813e-07, + "loss": 0.3933, + "step": 172 + }, + { + "epoch": 0.01, + "grad_norm": 0.842025818834614, + "learning_rate": 6.752537080405934e-07, + "loss": 0.3884, + "step": 173 + }, + { + "epoch": 0.01, + "grad_norm": 0.8190387791173283, + "learning_rate": 6.791569086651055e-07, + "loss": 0.3816, + "step": 174 + }, + { + "epoch": 0.01, + "grad_norm": 0.8514099801083872, + "learning_rate": 6.830601092896176e-07, + "loss": 0.3686, + "step": 175 + }, + { + "epoch": 0.01, + "grad_norm": 0.879852229257622, + "learning_rate": 6.869633099141298e-07, + "loss": 0.3905, + "step": 176 + }, + { + "epoch": 0.01, + "grad_norm": 0.8189034929478652, + "learning_rate": 6.908665105386417e-07, + "loss": 0.3885, + "step": 177 + }, + { + "epoch": 0.01, + "grad_norm": 0.8171522974714015, + "learning_rate": 6.947697111631538e-07, + "loss": 0.3936, + "step": 178 + }, + { + "epoch": 0.01, + "grad_norm": 0.8677961842949385, + "learning_rate": 6.986729117876659e-07, + "loss": 0.3729, + "step": 179 + }, + { + "epoch": 0.01, + "grad_norm": 0.8394881049157396, + "learning_rate": 7.02576112412178e-07, + "loss": 0.3808, + "step": 180 + }, + { + "epoch": 0.01, + "grad_norm": 0.8276512940997796, + "learning_rate": 7.064793130366901e-07, + "loss": 0.3706, + "step": 181 + }, + { + "epoch": 0.01, + "grad_norm": 0.7879185309946193, + "learning_rate": 7.103825136612022e-07, + "loss": 0.3727, + "step": 182 + }, + { + "epoch": 0.01, + "grad_norm": 0.8159215077864062, + "learning_rate": 7.142857142857143e-07, + "loss": 0.379, + "step": 183 + }, + { + "epoch": 0.01, + "grad_norm": 0.9179275023653323, + "learning_rate": 7.181889149102265e-07, + "loss": 0.4055, + "step": 184 + }, + { + "epoch": 0.01, + "grad_norm": 0.8143770557434388, + "learning_rate": 7.220921155347385e-07, + "loss": 0.3742, + "step": 185 + }, + { + "epoch": 0.01, + "grad_norm": 0.8065611541557082, + "learning_rate": 7.259953161592506e-07, + "loss": 0.3741, + "step": 186 + }, + { + "epoch": 0.01, + "grad_norm": 0.9488414891408218, + "learning_rate": 7.298985167837628e-07, + "loss": 0.388, + "step": 187 + }, + { + "epoch": 0.01, + "grad_norm": 0.7912907748044055, + "learning_rate": 7.338017174082748e-07, + "loss": 0.3529, + "step": 188 + }, + { + "epoch": 0.01, + "grad_norm": 0.8856310794509977, + "learning_rate": 7.377049180327869e-07, + "loss": 0.3678, + "step": 189 + }, + { + "epoch": 0.01, + "grad_norm": 0.7996437661915726, + "learning_rate": 7.416081186572991e-07, + "loss": 0.3801, + "step": 190 + }, + { + "epoch": 0.01, + "grad_norm": 0.8248744965873346, + "learning_rate": 7.455113192818111e-07, + "loss": 0.3917, + "step": 191 + }, + { + "epoch": 0.01, + "grad_norm": 0.8293545467547411, + "learning_rate": 7.494145199063233e-07, + "loss": 0.3992, + "step": 192 + }, + { + "epoch": 0.01, + "grad_norm": 0.8496822680632784, + "learning_rate": 7.533177205308354e-07, + "loss": 0.3576, + "step": 193 + }, + { + "epoch": 0.01, + "grad_norm": 0.8519314323035403, + "learning_rate": 7.572209211553474e-07, + "loss": 0.3687, + "step": 194 + }, + { + "epoch": 0.01, + "grad_norm": 0.8389642926270531, + "learning_rate": 7.611241217798596e-07, + "loss": 0.369, + "step": 195 + }, + { + "epoch": 0.01, + "grad_norm": 0.8281081648829327, + "learning_rate": 7.650273224043716e-07, + "loss": 0.3809, + "step": 196 + }, + { + "epoch": 0.01, + "grad_norm": 0.8724082065995259, + "learning_rate": 7.689305230288838e-07, + "loss": 0.3726, + "step": 197 + }, + { + "epoch": 0.01, + "grad_norm": 0.8432177164396634, + "learning_rate": 7.728337236533959e-07, + "loss": 0.3867, + "step": 198 + }, + { + "epoch": 0.01, + "grad_norm": 0.7997352132762979, + "learning_rate": 7.767369242779079e-07, + "loss": 0.3676, + "step": 199 + }, + { + "epoch": 0.01, + "grad_norm": 0.918306503419813, + "learning_rate": 7.806401249024201e-07, + "loss": 0.3704, + "step": 200 + }, + { + "epoch": 0.01, + "grad_norm": 0.8259143292954171, + "learning_rate": 7.845433255269322e-07, + "loss": 0.3639, + "step": 201 + }, + { + "epoch": 0.01, + "grad_norm": 0.9683099092434378, + "learning_rate": 7.884465261514442e-07, + "loss": 0.3841, + "step": 202 + }, + { + "epoch": 0.01, + "grad_norm": 0.935779296727939, + "learning_rate": 7.923497267759564e-07, + "loss": 0.3962, + "step": 203 + }, + { + "epoch": 0.01, + "grad_norm": 0.8824398243252304, + "learning_rate": 7.962529274004685e-07, + "loss": 0.3944, + "step": 204 + }, + { + "epoch": 0.01, + "grad_norm": 0.8624705316191792, + "learning_rate": 8.001561280249806e-07, + "loss": 0.3728, + "step": 205 + }, + { + "epoch": 0.01, + "grad_norm": 0.9335831229963681, + "learning_rate": 8.040593286494927e-07, + "loss": 0.3651, + "step": 206 + }, + { + "epoch": 0.01, + "grad_norm": 0.8536110174923287, + "learning_rate": 8.079625292740048e-07, + "loss": 0.3662, + "step": 207 + }, + { + "epoch": 0.01, + "grad_norm": 0.9195563571065184, + "learning_rate": 8.118657298985169e-07, + "loss": 0.3634, + "step": 208 + }, + { + "epoch": 0.01, + "grad_norm": 0.9168034533666126, + "learning_rate": 8.157689305230289e-07, + "loss": 0.3866, + "step": 209 + }, + { + "epoch": 0.01, + "grad_norm": 0.8915190951248373, + "learning_rate": 8.196721311475409e-07, + "loss": 0.3581, + "step": 210 + }, + { + "epoch": 0.01, + "grad_norm": 0.7508610533245367, + "learning_rate": 8.235753317720531e-07, + "loss": 0.3609, + "step": 211 + }, + { + "epoch": 0.01, + "grad_norm": 0.8458555968444709, + "learning_rate": 8.274785323965652e-07, + "loss": 0.3895, + "step": 212 + }, + { + "epoch": 0.01, + "grad_norm": 0.8370272432406768, + "learning_rate": 8.313817330210773e-07, + "loss": 0.3525, + "step": 213 + }, + { + "epoch": 0.01, + "grad_norm": 0.9121891938215305, + "learning_rate": 8.352849336455894e-07, + "loss": 0.3809, + "step": 214 + }, + { + "epoch": 0.01, + "grad_norm": 0.9731578753621841, + "learning_rate": 8.391881342701015e-07, + "loss": 0.3886, + "step": 215 + }, + { + "epoch": 0.01, + "grad_norm": 0.8617924848601851, + "learning_rate": 8.430913348946136e-07, + "loss": 0.3749, + "step": 216 + }, + { + "epoch": 0.01, + "grad_norm": 0.9266985942420548, + "learning_rate": 8.469945355191257e-07, + "loss": 0.3696, + "step": 217 + }, + { + "epoch": 0.01, + "grad_norm": 0.8283383828534994, + "learning_rate": 8.508977361436379e-07, + "loss": 0.3575, + "step": 218 + }, + { + "epoch": 0.01, + "grad_norm": 0.8267911525906587, + "learning_rate": 8.548009367681499e-07, + "loss": 0.3557, + "step": 219 + }, + { + "epoch": 0.01, + "grad_norm": 0.8669836754699147, + "learning_rate": 8.58704137392662e-07, + "loss": 0.3569, + "step": 220 + }, + { + "epoch": 0.01, + "grad_norm": 0.8727863985290725, + "learning_rate": 8.626073380171742e-07, + "loss": 0.4104, + "step": 221 + }, + { + "epoch": 0.01, + "grad_norm": 0.8502890001538393, + "learning_rate": 8.665105386416862e-07, + "loss": 0.3761, + "step": 222 + }, + { + "epoch": 0.01, + "grad_norm": 0.8432821638567171, + "learning_rate": 8.704137392661983e-07, + "loss": 0.3911, + "step": 223 + }, + { + "epoch": 0.01, + "grad_norm": 0.8478264204129208, + "learning_rate": 8.743169398907105e-07, + "loss": 0.3597, + "step": 224 + }, + { + "epoch": 0.01, + "grad_norm": 0.9926464471740118, + "learning_rate": 8.782201405152225e-07, + "loss": 0.3797, + "step": 225 + }, + { + "epoch": 0.01, + "grad_norm": 0.8596454816076141, + "learning_rate": 8.821233411397347e-07, + "loss": 0.3813, + "step": 226 + }, + { + "epoch": 0.01, + "grad_norm": 0.8311955008029471, + "learning_rate": 8.860265417642468e-07, + "loss": 0.3796, + "step": 227 + }, + { + "epoch": 0.01, + "grad_norm": 0.8451651537533872, + "learning_rate": 8.899297423887588e-07, + "loss": 0.375, + "step": 228 + }, + { + "epoch": 0.01, + "grad_norm": 0.9595522633911128, + "learning_rate": 8.93832943013271e-07, + "loss": 0.3551, + "step": 229 + }, + { + "epoch": 0.01, + "grad_norm": 0.8133573369556113, + "learning_rate": 8.977361436377831e-07, + "loss": 0.3568, + "step": 230 + }, + { + "epoch": 0.01, + "grad_norm": 0.8716155259439982, + "learning_rate": 9.016393442622952e-07, + "loss": 0.3765, + "step": 231 + }, + { + "epoch": 0.01, + "grad_norm": 0.8120406469827169, + "learning_rate": 9.055425448868073e-07, + "loss": 0.3579, + "step": 232 + }, + { + "epoch": 0.01, + "grad_norm": 0.8606318289485451, + "learning_rate": 9.094457455113194e-07, + "loss": 0.3803, + "step": 233 + }, + { + "epoch": 0.01, + "grad_norm": 0.8194303265100454, + "learning_rate": 9.133489461358315e-07, + "loss": 0.3523, + "step": 234 + }, + { + "epoch": 0.01, + "grad_norm": 0.8542841425369457, + "learning_rate": 9.172521467603436e-07, + "loss": 0.3689, + "step": 235 + }, + { + "epoch": 0.01, + "grad_norm": 0.8154494535834671, + "learning_rate": 9.211553473848558e-07, + "loss": 0.41, + "step": 236 + }, + { + "epoch": 0.01, + "grad_norm": 0.9016423923225648, + "learning_rate": 9.250585480093678e-07, + "loss": 0.3803, + "step": 237 + }, + { + "epoch": 0.01, + "grad_norm": 0.8101126516991347, + "learning_rate": 9.289617486338799e-07, + "loss": 0.3678, + "step": 238 + }, + { + "epoch": 0.01, + "grad_norm": 0.8150552739944517, + "learning_rate": 9.328649492583921e-07, + "loss": 0.3592, + "step": 239 + }, + { + "epoch": 0.01, + "grad_norm": 0.8075017563514328, + "learning_rate": 9.367681498829041e-07, + "loss": 0.3654, + "step": 240 + }, + { + "epoch": 0.01, + "grad_norm": 0.8639730715576405, + "learning_rate": 9.406713505074161e-07, + "loss": 0.3949, + "step": 241 + }, + { + "epoch": 0.01, + "grad_norm": 0.8649985737086766, + "learning_rate": 9.445745511319282e-07, + "loss": 0.3735, + "step": 242 + }, + { + "epoch": 0.01, + "grad_norm": 0.7779703538392265, + "learning_rate": 9.484777517564403e-07, + "loss": 0.3468, + "step": 243 + }, + { + "epoch": 0.01, + "grad_norm": 0.7756043331546959, + "learning_rate": 9.523809523809525e-07, + "loss": 0.3456, + "step": 244 + }, + { + "epoch": 0.01, + "grad_norm": 0.8869001301007062, + "learning_rate": 9.562841530054645e-07, + "loss": 0.3914, + "step": 245 + }, + { + "epoch": 0.01, + "grad_norm": 0.9152728351581307, + "learning_rate": 9.601873536299766e-07, + "loss": 0.357, + "step": 246 + }, + { + "epoch": 0.01, + "grad_norm": 0.8484547366701196, + "learning_rate": 9.640905542544888e-07, + "loss": 0.3653, + "step": 247 + }, + { + "epoch": 0.01, + "grad_norm": 0.9334156312717311, + "learning_rate": 9.67993754879001e-07, + "loss": 0.3741, + "step": 248 + }, + { + "epoch": 0.01, + "grad_norm": 0.8073477833175995, + "learning_rate": 9.718969555035128e-07, + "loss": 0.3528, + "step": 249 + }, + { + "epoch": 0.01, + "grad_norm": 0.7791154639115215, + "learning_rate": 9.75800156128025e-07, + "loss": 0.3458, + "step": 250 + }, + { + "epoch": 0.01, + "grad_norm": 0.827754253079892, + "learning_rate": 9.797033567525371e-07, + "loss": 0.3617, + "step": 251 + }, + { + "epoch": 0.01, + "grad_norm": 0.9181867159366851, + "learning_rate": 9.836065573770493e-07, + "loss": 0.3885, + "step": 252 + }, + { + "epoch": 0.01, + "grad_norm": 0.8738737477561861, + "learning_rate": 9.875097580015614e-07, + "loss": 0.3583, + "step": 253 + }, + { + "epoch": 0.01, + "grad_norm": 0.8483802367680947, + "learning_rate": 9.914129586260735e-07, + "loss": 0.3684, + "step": 254 + }, + { + "epoch": 0.01, + "grad_norm": 0.7893604640723723, + "learning_rate": 9.953161592505855e-07, + "loss": 0.358, + "step": 255 + }, + { + "epoch": 0.01, + "grad_norm": 0.8228454633646461, + "learning_rate": 9.992193598750976e-07, + "loss": 0.3606, + "step": 256 + }, + { + "epoch": 0.01, + "grad_norm": 0.8232485379021844, + "learning_rate": 1.0031225604996098e-06, + "loss": 0.3551, + "step": 257 + }, + { + "epoch": 0.01, + "grad_norm": 0.9054177513982167, + "learning_rate": 1.0070257611241219e-06, + "loss": 0.3733, + "step": 258 + }, + { + "epoch": 0.01, + "grad_norm": 0.8421630388784828, + "learning_rate": 1.010928961748634e-06, + "loss": 0.3767, + "step": 259 + }, + { + "epoch": 0.01, + "grad_norm": 0.8044634540922767, + "learning_rate": 1.0148321623731462e-06, + "loss": 0.3398, + "step": 260 + }, + { + "epoch": 0.01, + "grad_norm": 0.876967592916659, + "learning_rate": 1.018735362997658e-06, + "loss": 0.3588, + "step": 261 + }, + { + "epoch": 0.01, + "grad_norm": 0.786229799395679, + "learning_rate": 1.0226385636221702e-06, + "loss": 0.3389, + "step": 262 + }, + { + "epoch": 0.01, + "grad_norm": 0.8793620270887019, + "learning_rate": 1.0265417642466824e-06, + "loss": 0.3849, + "step": 263 + }, + { + "epoch": 0.01, + "grad_norm": 0.895174865533386, + "learning_rate": 1.0304449648711945e-06, + "loss": 0.3677, + "step": 264 + }, + { + "epoch": 0.01, + "grad_norm": 0.7438654971848939, + "learning_rate": 1.0343481654957067e-06, + "loss": 0.3643, + "step": 265 + }, + { + "epoch": 0.01, + "grad_norm": 0.8157931822853366, + "learning_rate": 1.0382513661202188e-06, + "loss": 0.38, + "step": 266 + }, + { + "epoch": 0.01, + "grad_norm": 0.8478226736861759, + "learning_rate": 1.0421545667447307e-06, + "loss": 0.3641, + "step": 267 + }, + { + "epoch": 0.01, + "grad_norm": 0.8455758508026294, + "learning_rate": 1.0460577673692429e-06, + "loss": 0.3597, + "step": 268 + }, + { + "epoch": 0.01, + "grad_norm": 0.8426668186541952, + "learning_rate": 1.049960967993755e-06, + "loss": 0.3597, + "step": 269 + }, + { + "epoch": 0.01, + "grad_norm": 0.8481281550900193, + "learning_rate": 1.0538641686182672e-06, + "loss": 0.3836, + "step": 270 + }, + { + "epoch": 0.01, + "grad_norm": 0.8338404829688861, + "learning_rate": 1.0577673692427793e-06, + "loss": 0.3817, + "step": 271 + }, + { + "epoch": 0.01, + "grad_norm": 0.8333230075189905, + "learning_rate": 1.0616705698672914e-06, + "loss": 0.3658, + "step": 272 + }, + { + "epoch": 0.01, + "grad_norm": 0.8566408699730564, + "learning_rate": 1.0655737704918034e-06, + "loss": 0.3748, + "step": 273 + }, + { + "epoch": 0.01, + "grad_norm": 0.9302395217241406, + "learning_rate": 1.0694769711163155e-06, + "loss": 0.3692, + "step": 274 + }, + { + "epoch": 0.01, + "grad_norm": 0.7792405316129761, + "learning_rate": 1.0733801717408274e-06, + "loss": 0.3531, + "step": 275 + }, + { + "epoch": 0.01, + "grad_norm": 0.785568066491356, + "learning_rate": 1.0772833723653396e-06, + "loss": 0.3335, + "step": 276 + }, + { + "epoch": 0.01, + "grad_norm": 0.8183096192847972, + "learning_rate": 1.0811865729898517e-06, + "loss": 0.3584, + "step": 277 + }, + { + "epoch": 0.01, + "grad_norm": 0.8707395551945405, + "learning_rate": 1.0850897736143639e-06, + "loss": 0.3592, + "step": 278 + }, + { + "epoch": 0.01, + "grad_norm": 0.8564331128175929, + "learning_rate": 1.088992974238876e-06, + "loss": 0.3557, + "step": 279 + }, + { + "epoch": 0.01, + "grad_norm": 0.75442588960338, + "learning_rate": 1.092896174863388e-06, + "loss": 0.3478, + "step": 280 + }, + { + "epoch": 0.01, + "grad_norm": 0.8675448386502503, + "learning_rate": 1.0967993754879e-06, + "loss": 0.3661, + "step": 281 + }, + { + "epoch": 0.01, + "grad_norm": 0.7844141655885469, + "learning_rate": 1.1007025761124122e-06, + "loss": 0.328, + "step": 282 + }, + { + "epoch": 0.01, + "grad_norm": 0.7907726555649885, + "learning_rate": 1.1046057767369243e-06, + "loss": 0.3695, + "step": 283 + }, + { + "epoch": 0.01, + "grad_norm": 0.7643962025408518, + "learning_rate": 1.1085089773614365e-06, + "loss": 0.3596, + "step": 284 + }, + { + "epoch": 0.01, + "grad_norm": 0.8649157492648186, + "learning_rate": 1.1124121779859486e-06, + "loss": 0.3287, + "step": 285 + }, + { + "epoch": 0.01, + "grad_norm": 0.8286153533040325, + "learning_rate": 1.1163153786104606e-06, + "loss": 0.3533, + "step": 286 + }, + { + "epoch": 0.01, + "grad_norm": 0.8994413511836561, + "learning_rate": 1.1202185792349727e-06, + "loss": 0.3723, + "step": 287 + }, + { + "epoch": 0.01, + "grad_norm": 0.7706577376571753, + "learning_rate": 1.1241217798594848e-06, + "loss": 0.3528, + "step": 288 + }, + { + "epoch": 0.01, + "grad_norm": 0.8695843397602406, + "learning_rate": 1.128024980483997e-06, + "loss": 0.3779, + "step": 289 + }, + { + "epoch": 0.01, + "grad_norm": 0.7945932437863785, + "learning_rate": 1.1319281811085091e-06, + "loss": 0.3446, + "step": 290 + }, + { + "epoch": 0.01, + "grad_norm": 0.7659475754747396, + "learning_rate": 1.1358313817330213e-06, + "loss": 0.3398, + "step": 291 + }, + { + "epoch": 0.01, + "grad_norm": 0.766518999389875, + "learning_rate": 1.1397345823575332e-06, + "loss": 0.338, + "step": 292 + }, + { + "epoch": 0.01, + "grad_norm": 0.8039349334530823, + "learning_rate": 1.1436377829820453e-06, + "loss": 0.3639, + "step": 293 + }, + { + "epoch": 0.01, + "grad_norm": 0.781255043681138, + "learning_rate": 1.1475409836065575e-06, + "loss": 0.3581, + "step": 294 + }, + { + "epoch": 0.01, + "grad_norm": 0.7817919359225565, + "learning_rate": 1.1514441842310696e-06, + "loss": 0.3305, + "step": 295 + }, + { + "epoch": 0.01, + "grad_norm": 0.7481266786865592, + "learning_rate": 1.1553473848555818e-06, + "loss": 0.3466, + "step": 296 + }, + { + "epoch": 0.01, + "grad_norm": 0.8593968591736764, + "learning_rate": 1.1592505854800939e-06, + "loss": 0.3608, + "step": 297 + }, + { + "epoch": 0.01, + "grad_norm": 0.7807133925661633, + "learning_rate": 1.1631537861046058e-06, + "loss": 0.3404, + "step": 298 + }, + { + "epoch": 0.01, + "grad_norm": 0.8399250415345166, + "learning_rate": 1.167056986729118e-06, + "loss": 0.3535, + "step": 299 + }, + { + "epoch": 0.01, + "grad_norm": 0.7767108289451815, + "learning_rate": 1.17096018735363e-06, + "loss": 0.3526, + "step": 300 + }, + { + "epoch": 0.01, + "grad_norm": 0.9231001485814859, + "learning_rate": 1.1748633879781422e-06, + "loss": 0.3504, + "step": 301 + }, + { + "epoch": 0.01, + "grad_norm": 0.8383685618989679, + "learning_rate": 1.1787665886026544e-06, + "loss": 0.3667, + "step": 302 + }, + { + "epoch": 0.01, + "grad_norm": 0.8522508371664709, + "learning_rate": 1.1826697892271665e-06, + "loss": 0.3692, + "step": 303 + }, + { + "epoch": 0.01, + "grad_norm": 0.8056954671293388, + "learning_rate": 1.1865729898516785e-06, + "loss": 0.3441, + "step": 304 + }, + { + "epoch": 0.01, + "grad_norm": 0.917591351570124, + "learning_rate": 1.1904761904761906e-06, + "loss": 0.3595, + "step": 305 + }, + { + "epoch": 0.01, + "grad_norm": 0.8931024407564067, + "learning_rate": 1.1943793911007025e-06, + "loss": 0.3568, + "step": 306 + }, + { + "epoch": 0.01, + "grad_norm": 0.8206328706115729, + "learning_rate": 1.1982825917252147e-06, + "loss": 0.3539, + "step": 307 + }, + { + "epoch": 0.01, + "grad_norm": 0.9266299245255428, + "learning_rate": 1.2021857923497268e-06, + "loss": 0.3446, + "step": 308 + }, + { + "epoch": 0.01, + "grad_norm": 0.7958288846588454, + "learning_rate": 1.206088992974239e-06, + "loss": 0.3573, + "step": 309 + }, + { + "epoch": 0.01, + "grad_norm": 0.9853350857336837, + "learning_rate": 1.209992193598751e-06, + "loss": 0.3708, + "step": 310 + }, + { + "epoch": 0.01, + "grad_norm": 0.8704429163609152, + "learning_rate": 1.2138953942232632e-06, + "loss": 0.3616, + "step": 311 + }, + { + "epoch": 0.01, + "grad_norm": 0.8901256306257386, + "learning_rate": 1.2177985948477752e-06, + "loss": 0.3632, + "step": 312 + }, + { + "epoch": 0.01, + "grad_norm": 0.9157374494579276, + "learning_rate": 1.2217017954722873e-06, + "loss": 0.3386, + "step": 313 + }, + { + "epoch": 0.01, + "grad_norm": 0.8865645877771393, + "learning_rate": 1.2256049960967994e-06, + "loss": 0.3583, + "step": 314 + }, + { + "epoch": 0.01, + "grad_norm": 0.9480676186417357, + "learning_rate": 1.2295081967213116e-06, + "loss": 0.3442, + "step": 315 + }, + { + "epoch": 0.01, + "grad_norm": 0.9670057857057475, + "learning_rate": 1.2334113973458237e-06, + "loss": 0.3599, + "step": 316 + }, + { + "epoch": 0.01, + "grad_norm": 0.8177542909994509, + "learning_rate": 1.2373145979703359e-06, + "loss": 0.3424, + "step": 317 + }, + { + "epoch": 0.01, + "grad_norm": 0.993529810533958, + "learning_rate": 1.2412177985948478e-06, + "loss": 0.3372, + "step": 318 + }, + { + "epoch": 0.01, + "grad_norm": 1.001047400913079, + "learning_rate": 1.24512099921936e-06, + "loss": 0.3562, + "step": 319 + }, + { + "epoch": 0.01, + "grad_norm": 0.9998908301919309, + "learning_rate": 1.249024199843872e-06, + "loss": 0.3725, + "step": 320 + }, + { + "epoch": 0.02, + "grad_norm": 1.0894599927783868, + "learning_rate": 1.2529274004683842e-06, + "loss": 0.3628, + "step": 321 + }, + { + "epoch": 0.02, + "grad_norm": 1.1128766999517847, + "learning_rate": 1.2568306010928963e-06, + "loss": 0.3613, + "step": 322 + }, + { + "epoch": 0.02, + "grad_norm": 0.817380825237106, + "learning_rate": 1.2607338017174085e-06, + "loss": 0.3446, + "step": 323 + }, + { + "epoch": 0.02, + "grad_norm": 0.8816290740598899, + "learning_rate": 1.2646370023419204e-06, + "loss": 0.3549, + "step": 324 + }, + { + "epoch": 0.02, + "grad_norm": 1.0795860709011016, + "learning_rate": 1.2685402029664326e-06, + "loss": 0.3616, + "step": 325 + }, + { + "epoch": 0.02, + "grad_norm": 0.8187722780129082, + "learning_rate": 1.2724434035909447e-06, + "loss": 0.3451, + "step": 326 + }, + { + "epoch": 0.02, + "grad_norm": 0.8514763148761405, + "learning_rate": 1.2763466042154568e-06, + "loss": 0.3548, + "step": 327 + }, + { + "epoch": 0.02, + "grad_norm": 1.0239629661846263, + "learning_rate": 1.280249804839969e-06, + "loss": 0.3375, + "step": 328 + }, + { + "epoch": 0.02, + "grad_norm": 0.8643184589149551, + "learning_rate": 1.2841530054644811e-06, + "loss": 0.3723, + "step": 329 + }, + { + "epoch": 0.02, + "grad_norm": 0.7688847165240739, + "learning_rate": 1.288056206088993e-06, + "loss": 0.3626, + "step": 330 + }, + { + "epoch": 0.02, + "grad_norm": 0.8019679337487924, + "learning_rate": 1.2919594067135052e-06, + "loss": 0.3458, + "step": 331 + }, + { + "epoch": 0.02, + "grad_norm": 0.8870607693004382, + "learning_rate": 1.2958626073380173e-06, + "loss": 0.349, + "step": 332 + }, + { + "epoch": 0.02, + "grad_norm": 0.813274208523567, + "learning_rate": 1.2997658079625295e-06, + "loss": 0.3817, + "step": 333 + }, + { + "epoch": 0.02, + "grad_norm": 0.7950670703846899, + "learning_rate": 1.3036690085870416e-06, + "loss": 0.3519, + "step": 334 + }, + { + "epoch": 0.02, + "grad_norm": 0.8002582670653976, + "learning_rate": 1.3075722092115535e-06, + "loss": 0.358, + "step": 335 + }, + { + "epoch": 0.02, + "grad_norm": 0.8438170613319562, + "learning_rate": 1.3114754098360657e-06, + "loss": 0.3725, + "step": 336 + }, + { + "epoch": 0.02, + "grad_norm": 0.8441469761018485, + "learning_rate": 1.3153786104605778e-06, + "loss": 0.3729, + "step": 337 + }, + { + "epoch": 0.02, + "grad_norm": 0.7795518160675929, + "learning_rate": 1.31928181108509e-06, + "loss": 0.3494, + "step": 338 + }, + { + "epoch": 0.02, + "grad_norm": 0.779972199092561, + "learning_rate": 1.323185011709602e-06, + "loss": 0.3433, + "step": 339 + }, + { + "epoch": 0.02, + "grad_norm": 0.768110797866881, + "learning_rate": 1.3270882123341142e-06, + "loss": 0.3342, + "step": 340 + }, + { + "epoch": 0.02, + "grad_norm": 0.7902255948559378, + "learning_rate": 1.3309914129586262e-06, + "loss": 0.3587, + "step": 341 + }, + { + "epoch": 0.02, + "grad_norm": 0.8741388204821082, + "learning_rate": 1.3348946135831383e-06, + "loss": 0.3692, + "step": 342 + }, + { + "epoch": 0.02, + "grad_norm": 0.8105155403995535, + "learning_rate": 1.3387978142076505e-06, + "loss": 0.3651, + "step": 343 + }, + { + "epoch": 0.02, + "grad_norm": 0.7660183347929093, + "learning_rate": 1.3427010148321626e-06, + "loss": 0.3383, + "step": 344 + }, + { + "epoch": 0.02, + "grad_norm": 0.784400512768931, + "learning_rate": 1.3466042154566747e-06, + "loss": 0.3359, + "step": 345 + }, + { + "epoch": 0.02, + "grad_norm": 0.8357944787060468, + "learning_rate": 1.3505074160811869e-06, + "loss": 0.3564, + "step": 346 + }, + { + "epoch": 0.02, + "grad_norm": 0.834558949504693, + "learning_rate": 1.3544106167056988e-06, + "loss": 0.382, + "step": 347 + }, + { + "epoch": 0.02, + "grad_norm": 0.7747547740089832, + "learning_rate": 1.358313817330211e-06, + "loss": 0.3488, + "step": 348 + }, + { + "epoch": 0.02, + "grad_norm": 0.794158243996675, + "learning_rate": 1.362217017954723e-06, + "loss": 0.3493, + "step": 349 + }, + { + "epoch": 0.02, + "grad_norm": 0.8226191807757942, + "learning_rate": 1.3661202185792352e-06, + "loss": 0.3567, + "step": 350 + }, + { + "epoch": 0.02, + "grad_norm": 0.7728555867369871, + "learning_rate": 1.3700234192037474e-06, + "loss": 0.3488, + "step": 351 + }, + { + "epoch": 0.02, + "grad_norm": 0.8006623254319585, + "learning_rate": 1.3739266198282595e-06, + "loss": 0.3562, + "step": 352 + }, + { + "epoch": 0.02, + "grad_norm": 0.7487582039811426, + "learning_rate": 1.3778298204527712e-06, + "loss": 0.3253, + "step": 353 + }, + { + "epoch": 0.02, + "grad_norm": 0.7802908968035446, + "learning_rate": 1.3817330210772834e-06, + "loss": 0.3278, + "step": 354 + }, + { + "epoch": 0.02, + "grad_norm": 0.8112031933685945, + "learning_rate": 1.3856362217017955e-06, + "loss": 0.3603, + "step": 355 + }, + { + "epoch": 0.02, + "grad_norm": 0.7858256598927056, + "learning_rate": 1.3895394223263076e-06, + "loss": 0.339, + "step": 356 + }, + { + "epoch": 0.02, + "grad_norm": 0.8503992905019468, + "learning_rate": 1.3934426229508196e-06, + "loss": 0.363, + "step": 357 + }, + { + "epoch": 0.02, + "grad_norm": 0.7722171391186934, + "learning_rate": 1.3973458235753317e-06, + "loss": 0.3301, + "step": 358 + }, + { + "epoch": 0.02, + "grad_norm": 0.7609929808068646, + "learning_rate": 1.4012490241998439e-06, + "loss": 0.3266, + "step": 359 + }, + { + "epoch": 0.02, + "grad_norm": 0.8205844547130833, + "learning_rate": 1.405152224824356e-06, + "loss": 0.3569, + "step": 360 + }, + { + "epoch": 0.02, + "grad_norm": 0.881581951184364, + "learning_rate": 1.4090554254488681e-06, + "loss": 0.3397, + "step": 361 + }, + { + "epoch": 0.02, + "grad_norm": 0.8794442379259257, + "learning_rate": 1.4129586260733803e-06, + "loss": 0.3331, + "step": 362 + }, + { + "epoch": 0.02, + "grad_norm": 0.8751525445249699, + "learning_rate": 1.4168618266978922e-06, + "loss": 0.3338, + "step": 363 + }, + { + "epoch": 0.02, + "grad_norm": 0.8282031567235338, + "learning_rate": 1.4207650273224043e-06, + "loss": 0.3586, + "step": 364 + }, + { + "epoch": 0.02, + "grad_norm": 0.802141115998238, + "learning_rate": 1.4246682279469165e-06, + "loss": 0.3384, + "step": 365 + }, + { + "epoch": 0.02, + "grad_norm": 0.8452042586333027, + "learning_rate": 1.4285714285714286e-06, + "loss": 0.335, + "step": 366 + }, + { + "epoch": 0.02, + "grad_norm": 0.8564701032071632, + "learning_rate": 1.4324746291959408e-06, + "loss": 0.3489, + "step": 367 + }, + { + "epoch": 0.02, + "grad_norm": 0.8031651754266314, + "learning_rate": 1.436377829820453e-06, + "loss": 0.359, + "step": 368 + }, + { + "epoch": 0.02, + "grad_norm": 0.8004393992736598, + "learning_rate": 1.4402810304449648e-06, + "loss": 0.3502, + "step": 369 + }, + { + "epoch": 0.02, + "grad_norm": 0.8794244081868436, + "learning_rate": 1.444184231069477e-06, + "loss": 0.3686, + "step": 370 + }, + { + "epoch": 0.02, + "grad_norm": 0.8474845011153278, + "learning_rate": 1.4480874316939891e-06, + "loss": 0.3602, + "step": 371 + }, + { + "epoch": 0.02, + "grad_norm": 0.850385798584401, + "learning_rate": 1.4519906323185013e-06, + "loss": 0.3676, + "step": 372 + }, + { + "epoch": 0.02, + "grad_norm": 0.801224430180162, + "learning_rate": 1.4558938329430134e-06, + "loss": 0.3602, + "step": 373 + }, + { + "epoch": 0.02, + "grad_norm": 0.7523538668966292, + "learning_rate": 1.4597970335675255e-06, + "loss": 0.3502, + "step": 374 + }, + { + "epoch": 0.02, + "grad_norm": 0.802511578909268, + "learning_rate": 1.4637002341920375e-06, + "loss": 0.348, + "step": 375 + }, + { + "epoch": 0.02, + "grad_norm": 0.759246040931667, + "learning_rate": 1.4676034348165496e-06, + "loss": 0.3405, + "step": 376 + }, + { + "epoch": 0.02, + "grad_norm": 0.7740017130875888, + "learning_rate": 1.4715066354410618e-06, + "loss": 0.3226, + "step": 377 + }, + { + "epoch": 0.02, + "grad_norm": 0.8619942512661257, + "learning_rate": 1.4754098360655739e-06, + "loss": 0.3627, + "step": 378 + }, + { + "epoch": 0.02, + "grad_norm": 0.7962185545607231, + "learning_rate": 1.479313036690086e-06, + "loss": 0.3561, + "step": 379 + }, + { + "epoch": 0.02, + "grad_norm": 0.8627791768706268, + "learning_rate": 1.4832162373145982e-06, + "loss": 0.3762, + "step": 380 + }, + { + "epoch": 0.02, + "grad_norm": 0.7702800702094907, + "learning_rate": 1.48711943793911e-06, + "loss": 0.3523, + "step": 381 + }, + { + "epoch": 0.02, + "grad_norm": 0.7383564115527774, + "learning_rate": 1.4910226385636222e-06, + "loss": 0.3227, + "step": 382 + }, + { + "epoch": 0.02, + "grad_norm": 0.814597968947982, + "learning_rate": 1.4949258391881344e-06, + "loss": 0.344, + "step": 383 + }, + { + "epoch": 0.02, + "grad_norm": 0.9353176371168589, + "learning_rate": 1.4988290398126465e-06, + "loss": 0.3785, + "step": 384 + }, + { + "epoch": 0.02, + "grad_norm": 0.7834930754783883, + "learning_rate": 1.5027322404371587e-06, + "loss": 0.3501, + "step": 385 + }, + { + "epoch": 0.02, + "grad_norm": 0.8204511568682297, + "learning_rate": 1.5066354410616708e-06, + "loss": 0.3768, + "step": 386 + }, + { + "epoch": 0.02, + "grad_norm": 0.7651396794596279, + "learning_rate": 1.5105386416861827e-06, + "loss": 0.3417, + "step": 387 + }, + { + "epoch": 0.02, + "grad_norm": 0.8676874311826211, + "learning_rate": 1.5144418423106949e-06, + "loss": 0.358, + "step": 388 + }, + { + "epoch": 0.02, + "grad_norm": 0.8568070450915463, + "learning_rate": 1.518345042935207e-06, + "loss": 0.3221, + "step": 389 + }, + { + "epoch": 0.02, + "grad_norm": 0.8932836715259755, + "learning_rate": 1.5222482435597192e-06, + "loss": 0.3584, + "step": 390 + }, + { + "epoch": 0.02, + "grad_norm": 0.8129000102178708, + "learning_rate": 1.5261514441842313e-06, + "loss": 0.3366, + "step": 391 + }, + { + "epoch": 0.02, + "grad_norm": 0.8426653382976391, + "learning_rate": 1.5300546448087432e-06, + "loss": 0.3706, + "step": 392 + }, + { + "epoch": 0.02, + "grad_norm": 0.802217831930987, + "learning_rate": 1.5339578454332554e-06, + "loss": 0.3671, + "step": 393 + }, + { + "epoch": 0.02, + "grad_norm": 0.8063571420227426, + "learning_rate": 1.5378610460577675e-06, + "loss": 0.3576, + "step": 394 + }, + { + "epoch": 0.02, + "grad_norm": 0.8112651659031157, + "learning_rate": 1.5417642466822796e-06, + "loss": 0.3312, + "step": 395 + }, + { + "epoch": 0.02, + "grad_norm": 0.8256835436570796, + "learning_rate": 1.5456674473067918e-06, + "loss": 0.3351, + "step": 396 + }, + { + "epoch": 0.02, + "grad_norm": 0.8402022792830862, + "learning_rate": 1.549570647931304e-06, + "loss": 0.3643, + "step": 397 + }, + { + "epoch": 0.02, + "grad_norm": 0.813526207768186, + "learning_rate": 1.5534738485558159e-06, + "loss": 0.3265, + "step": 398 + }, + { + "epoch": 0.02, + "grad_norm": 0.8091116971837633, + "learning_rate": 1.557377049180328e-06, + "loss": 0.3498, + "step": 399 + }, + { + "epoch": 0.02, + "grad_norm": 0.8140598758568126, + "learning_rate": 1.5612802498048401e-06, + "loss": 0.3721, + "step": 400 + }, + { + "epoch": 0.02, + "grad_norm": 0.797121709314926, + "learning_rate": 1.5651834504293523e-06, + "loss": 0.3281, + "step": 401 + }, + { + "epoch": 0.02, + "grad_norm": 0.7913627484323329, + "learning_rate": 1.5690866510538644e-06, + "loss": 0.349, + "step": 402 + }, + { + "epoch": 0.02, + "grad_norm": 0.7952131842491763, + "learning_rate": 1.5729898516783766e-06, + "loss": 0.3519, + "step": 403 + }, + { + "epoch": 0.02, + "grad_norm": 0.7994612454319783, + "learning_rate": 1.5768930523028885e-06, + "loss": 0.3381, + "step": 404 + }, + { + "epoch": 0.02, + "grad_norm": 0.848780452071145, + "learning_rate": 1.5807962529274006e-06, + "loss": 0.3542, + "step": 405 + }, + { + "epoch": 0.02, + "grad_norm": 0.8085383683680807, + "learning_rate": 1.5846994535519128e-06, + "loss": 0.3768, + "step": 406 + }, + { + "epoch": 0.02, + "grad_norm": 0.7806852450554176, + "learning_rate": 1.588602654176425e-06, + "loss": 0.3555, + "step": 407 + }, + { + "epoch": 0.02, + "grad_norm": 0.943604616216737, + "learning_rate": 1.592505854800937e-06, + "loss": 0.3581, + "step": 408 + }, + { + "epoch": 0.02, + "grad_norm": 0.8104476401182904, + "learning_rate": 1.5964090554254492e-06, + "loss": 0.3323, + "step": 409 + }, + { + "epoch": 0.02, + "grad_norm": 0.8905921061110477, + "learning_rate": 1.6003122560499611e-06, + "loss": 0.3529, + "step": 410 + }, + { + "epoch": 0.02, + "grad_norm": 0.8048260084533357, + "learning_rate": 1.6042154566744733e-06, + "loss": 0.3495, + "step": 411 + }, + { + "epoch": 0.02, + "grad_norm": 0.7640329879124377, + "learning_rate": 1.6081186572989854e-06, + "loss": 0.3387, + "step": 412 + }, + { + "epoch": 0.02, + "grad_norm": 0.933014397809207, + "learning_rate": 1.6120218579234975e-06, + "loss": 0.3413, + "step": 413 + }, + { + "epoch": 0.02, + "grad_norm": 0.7826236535012769, + "learning_rate": 1.6159250585480097e-06, + "loss": 0.3331, + "step": 414 + }, + { + "epoch": 0.02, + "grad_norm": 1.0190802204536085, + "learning_rate": 1.6198282591725218e-06, + "loss": 0.3534, + "step": 415 + }, + { + "epoch": 0.02, + "grad_norm": 0.8691956315281144, + "learning_rate": 1.6237314597970338e-06, + "loss": 0.3499, + "step": 416 + }, + { + "epoch": 0.02, + "grad_norm": 0.8043086326409871, + "learning_rate": 1.6276346604215457e-06, + "loss": 0.3573, + "step": 417 + }, + { + "epoch": 0.02, + "grad_norm": 0.8647530798784214, + "learning_rate": 1.6315378610460578e-06, + "loss": 0.3702, + "step": 418 + }, + { + "epoch": 0.02, + "grad_norm": 0.8913223909156825, + "learning_rate": 1.63544106167057e-06, + "loss": 0.3304, + "step": 419 + }, + { + "epoch": 0.02, + "grad_norm": 0.7730725085178504, + "learning_rate": 1.6393442622950819e-06, + "loss": 0.3477, + "step": 420 + }, + { + "epoch": 0.02, + "grad_norm": 0.9388426665671403, + "learning_rate": 1.643247462919594e-06, + "loss": 0.3385, + "step": 421 + }, + { + "epoch": 0.02, + "grad_norm": 0.8645221080995654, + "learning_rate": 1.6471506635441062e-06, + "loss": 0.357, + "step": 422 + }, + { + "epoch": 0.02, + "grad_norm": 0.8274723523201518, + "learning_rate": 1.6510538641686183e-06, + "loss": 0.3591, + "step": 423 + }, + { + "epoch": 0.02, + "grad_norm": 0.8929913836533515, + "learning_rate": 1.6549570647931305e-06, + "loss": 0.3386, + "step": 424 + }, + { + "epoch": 0.02, + "grad_norm": 1.0362822190573269, + "learning_rate": 1.6588602654176426e-06, + "loss": 0.3464, + "step": 425 + }, + { + "epoch": 0.02, + "grad_norm": 0.83422142999996, + "learning_rate": 1.6627634660421545e-06, + "loss": 0.3325, + "step": 426 + }, + { + "epoch": 0.02, + "grad_norm": 0.7559385796253701, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.3315, + "step": 427 + }, + { + "epoch": 0.02, + "grad_norm": 0.9445334816350274, + "learning_rate": 1.6705698672911788e-06, + "loss": 0.3385, + "step": 428 + }, + { + "epoch": 0.02, + "grad_norm": 0.9551099975907847, + "learning_rate": 1.674473067915691e-06, + "loss": 0.3505, + "step": 429 + }, + { + "epoch": 0.02, + "grad_norm": 0.8262714043431814, + "learning_rate": 1.678376268540203e-06, + "loss": 0.3307, + "step": 430 + }, + { + "epoch": 0.02, + "grad_norm": 0.844363251156397, + "learning_rate": 1.6822794691647152e-06, + "loss": 0.3399, + "step": 431 + }, + { + "epoch": 0.02, + "grad_norm": 0.9106213453465972, + "learning_rate": 1.6861826697892272e-06, + "loss": 0.3444, + "step": 432 + }, + { + "epoch": 0.02, + "grad_norm": 1.0347643384515455, + "learning_rate": 1.6900858704137393e-06, + "loss": 0.3518, + "step": 433 + }, + { + "epoch": 0.02, + "grad_norm": 0.7797503120162157, + "learning_rate": 1.6939890710382514e-06, + "loss": 0.3531, + "step": 434 + }, + { + "epoch": 0.02, + "grad_norm": 0.9271815408352906, + "learning_rate": 1.6978922716627636e-06, + "loss": 0.3428, + "step": 435 + }, + { + "epoch": 0.02, + "grad_norm": 0.8999507315772118, + "learning_rate": 1.7017954722872757e-06, + "loss": 0.3394, + "step": 436 + }, + { + "epoch": 0.02, + "grad_norm": 0.8077804915501534, + "learning_rate": 1.7056986729117879e-06, + "loss": 0.3466, + "step": 437 + }, + { + "epoch": 0.02, + "grad_norm": 0.8039344045918295, + "learning_rate": 1.7096018735362998e-06, + "loss": 0.346, + "step": 438 + }, + { + "epoch": 0.02, + "grad_norm": 0.8076175608440448, + "learning_rate": 1.713505074160812e-06, + "loss": 0.3303, + "step": 439 + }, + { + "epoch": 0.02, + "grad_norm": 0.7783493122245685, + "learning_rate": 1.717408274785324e-06, + "loss": 0.3333, + "step": 440 + }, + { + "epoch": 0.02, + "grad_norm": 0.7739166584299653, + "learning_rate": 1.7213114754098362e-06, + "loss": 0.3259, + "step": 441 + }, + { + "epoch": 0.02, + "grad_norm": 0.8752574513259891, + "learning_rate": 1.7252146760343483e-06, + "loss": 0.3529, + "step": 442 + }, + { + "epoch": 0.02, + "grad_norm": 0.8036210209664938, + "learning_rate": 1.7291178766588605e-06, + "loss": 0.3484, + "step": 443 + }, + { + "epoch": 0.02, + "grad_norm": 0.7855887385319112, + "learning_rate": 1.7330210772833724e-06, + "loss": 0.336, + "step": 444 + }, + { + "epoch": 0.02, + "grad_norm": 0.8691927310521814, + "learning_rate": 1.7369242779078846e-06, + "loss": 0.3689, + "step": 445 + }, + { + "epoch": 0.02, + "grad_norm": 0.819433098526287, + "learning_rate": 1.7408274785323967e-06, + "loss": 0.3593, + "step": 446 + }, + { + "epoch": 0.02, + "grad_norm": 0.8044045223097964, + "learning_rate": 1.7447306791569088e-06, + "loss": 0.3364, + "step": 447 + }, + { + "epoch": 0.02, + "grad_norm": 0.8553326165821837, + "learning_rate": 1.748633879781421e-06, + "loss": 0.3399, + "step": 448 + }, + { + "epoch": 0.02, + "grad_norm": 0.8097907722122035, + "learning_rate": 1.752537080405933e-06, + "loss": 0.3518, + "step": 449 + }, + { + "epoch": 0.02, + "grad_norm": 0.7829910801112184, + "learning_rate": 1.756440281030445e-06, + "loss": 0.3514, + "step": 450 + }, + { + "epoch": 0.02, + "grad_norm": 0.7682096424024997, + "learning_rate": 1.7603434816549572e-06, + "loss": 0.3385, + "step": 451 + }, + { + "epoch": 0.02, + "grad_norm": 0.8747405845664556, + "learning_rate": 1.7642466822794693e-06, + "loss": 0.3507, + "step": 452 + }, + { + "epoch": 0.02, + "grad_norm": 0.7584845821798897, + "learning_rate": 1.7681498829039815e-06, + "loss": 0.3403, + "step": 453 + }, + { + "epoch": 0.02, + "grad_norm": 0.7768556567715447, + "learning_rate": 1.7720530835284936e-06, + "loss": 0.3521, + "step": 454 + }, + { + "epoch": 0.02, + "grad_norm": 0.7986084451646666, + "learning_rate": 1.7759562841530055e-06, + "loss": 0.3485, + "step": 455 + }, + { + "epoch": 0.02, + "grad_norm": 0.8264624900959446, + "learning_rate": 1.7798594847775177e-06, + "loss": 0.3352, + "step": 456 + }, + { + "epoch": 0.02, + "grad_norm": 0.7540310841896886, + "learning_rate": 1.7837626854020298e-06, + "loss": 0.3198, + "step": 457 + }, + { + "epoch": 0.02, + "grad_norm": 0.8011072414974729, + "learning_rate": 1.787665886026542e-06, + "loss": 0.3391, + "step": 458 + }, + { + "epoch": 0.02, + "grad_norm": 0.7915728264209736, + "learning_rate": 1.791569086651054e-06, + "loss": 0.3282, + "step": 459 + }, + { + "epoch": 0.02, + "grad_norm": 0.7526082651562527, + "learning_rate": 1.7954722872755662e-06, + "loss": 0.3283, + "step": 460 + }, + { + "epoch": 0.02, + "grad_norm": 0.8065224994878701, + "learning_rate": 1.7993754879000782e-06, + "loss": 0.3265, + "step": 461 + }, + { + "epoch": 0.02, + "grad_norm": 0.720021576109131, + "learning_rate": 1.8032786885245903e-06, + "loss": 0.3378, + "step": 462 + }, + { + "epoch": 0.02, + "grad_norm": 0.8009645488357455, + "learning_rate": 1.8071818891491025e-06, + "loss": 0.3249, + "step": 463 + }, + { + "epoch": 0.02, + "grad_norm": 0.764199649808597, + "learning_rate": 1.8110850897736146e-06, + "loss": 0.3164, + "step": 464 + }, + { + "epoch": 0.02, + "grad_norm": 0.8392410128448871, + "learning_rate": 1.8149882903981267e-06, + "loss": 0.3486, + "step": 465 + }, + { + "epoch": 0.02, + "grad_norm": 0.8347846614582782, + "learning_rate": 1.8188914910226389e-06, + "loss": 0.3321, + "step": 466 + }, + { + "epoch": 0.02, + "grad_norm": 0.8041869658807121, + "learning_rate": 1.8227946916471508e-06, + "loss": 0.3424, + "step": 467 + }, + { + "epoch": 0.02, + "grad_norm": 0.8084940094695268, + "learning_rate": 1.826697892271663e-06, + "loss": 0.3264, + "step": 468 + }, + { + "epoch": 0.02, + "grad_norm": 0.7905325945807548, + "learning_rate": 1.830601092896175e-06, + "loss": 0.3278, + "step": 469 + }, + { + "epoch": 0.02, + "grad_norm": 0.8204026872768688, + "learning_rate": 1.8345042935206872e-06, + "loss": 0.3221, + "step": 470 + }, + { + "epoch": 0.02, + "grad_norm": 0.8547949164112751, + "learning_rate": 1.8384074941451994e-06, + "loss": 0.3592, + "step": 471 + }, + { + "epoch": 0.02, + "grad_norm": 0.8239886989922721, + "learning_rate": 1.8423106947697115e-06, + "loss": 0.3827, + "step": 472 + }, + { + "epoch": 0.02, + "grad_norm": 0.7940628180165971, + "learning_rate": 1.8462138953942234e-06, + "loss": 0.3259, + "step": 473 + }, + { + "epoch": 0.02, + "grad_norm": 0.7518707517527868, + "learning_rate": 1.8501170960187356e-06, + "loss": 0.3267, + "step": 474 + }, + { + "epoch": 0.02, + "grad_norm": 0.8054700350826289, + "learning_rate": 1.8540202966432477e-06, + "loss": 0.3479, + "step": 475 + }, + { + "epoch": 0.02, + "grad_norm": 0.7566510698996279, + "learning_rate": 1.8579234972677599e-06, + "loss": 0.3565, + "step": 476 + }, + { + "epoch": 0.02, + "grad_norm": 0.7744965121296424, + "learning_rate": 1.861826697892272e-06, + "loss": 0.3582, + "step": 477 + }, + { + "epoch": 0.02, + "grad_norm": 0.8333147887465753, + "learning_rate": 1.8657298985167841e-06, + "loss": 0.3582, + "step": 478 + }, + { + "epoch": 0.02, + "grad_norm": 0.8705498982783497, + "learning_rate": 1.869633099141296e-06, + "loss": 0.3747, + "step": 479 + }, + { + "epoch": 0.02, + "grad_norm": 0.8242185390839183, + "learning_rate": 1.8735362997658082e-06, + "loss": 0.3727, + "step": 480 + }, + { + "epoch": 0.02, + "grad_norm": 0.8938283079590679, + "learning_rate": 1.8774395003903201e-06, + "loss": 0.3371, + "step": 481 + }, + { + "epoch": 0.02, + "grad_norm": 0.8274775966846732, + "learning_rate": 1.8813427010148323e-06, + "loss": 0.3373, + "step": 482 + }, + { + "epoch": 0.02, + "grad_norm": 0.7834754439680777, + "learning_rate": 1.8852459016393442e-06, + "loss": 0.3409, + "step": 483 + }, + { + "epoch": 0.02, + "grad_norm": 0.802248199612125, + "learning_rate": 1.8891491022638563e-06, + "loss": 0.3189, + "step": 484 + }, + { + "epoch": 0.02, + "grad_norm": 0.8423818357808843, + "learning_rate": 1.8930523028883685e-06, + "loss": 0.3512, + "step": 485 + }, + { + "epoch": 0.02, + "grad_norm": 0.8779632662103783, + "learning_rate": 1.8969555035128806e-06, + "loss": 0.3421, + "step": 486 + }, + { + "epoch": 0.02, + "grad_norm": 0.9029760535205037, + "learning_rate": 1.9008587041373928e-06, + "loss": 0.348, + "step": 487 + }, + { + "epoch": 0.02, + "grad_norm": 0.8502347019942966, + "learning_rate": 1.904761904761905e-06, + "loss": 0.3604, + "step": 488 + }, + { + "epoch": 0.02, + "grad_norm": 0.7793366770665221, + "learning_rate": 1.908665105386417e-06, + "loss": 0.3323, + "step": 489 + }, + { + "epoch": 0.02, + "grad_norm": 0.8181185505467234, + "learning_rate": 1.912568306010929e-06, + "loss": 0.3516, + "step": 490 + }, + { + "epoch": 0.02, + "grad_norm": 0.816062272805071, + "learning_rate": 1.916471506635441e-06, + "loss": 0.3431, + "step": 491 + }, + { + "epoch": 0.02, + "grad_norm": 0.7872606792200858, + "learning_rate": 1.9203747072599533e-06, + "loss": 0.347, + "step": 492 + }, + { + "epoch": 0.02, + "grad_norm": 0.8189155770202825, + "learning_rate": 1.9242779078844654e-06, + "loss": 0.3297, + "step": 493 + }, + { + "epoch": 0.02, + "grad_norm": 0.8703111446645835, + "learning_rate": 1.9281811085089775e-06, + "loss": 0.3318, + "step": 494 + }, + { + "epoch": 0.02, + "grad_norm": 0.7293655995524029, + "learning_rate": 1.9320843091334897e-06, + "loss": 0.3136, + "step": 495 + }, + { + "epoch": 0.02, + "grad_norm": 0.8320973632647902, + "learning_rate": 1.935987509758002e-06, + "loss": 0.3467, + "step": 496 + }, + { + "epoch": 0.02, + "grad_norm": 0.8177091031563406, + "learning_rate": 1.939890710382514e-06, + "loss": 0.3463, + "step": 497 + }, + { + "epoch": 0.02, + "grad_norm": 0.7864970902017039, + "learning_rate": 1.9437939110070257e-06, + "loss": 0.3291, + "step": 498 + }, + { + "epoch": 0.02, + "grad_norm": 0.8802739456156831, + "learning_rate": 1.947697111631538e-06, + "loss": 0.352, + "step": 499 + }, + { + "epoch": 0.02, + "grad_norm": 0.8435116959311106, + "learning_rate": 1.95160031225605e-06, + "loss": 0.3355, + "step": 500 + }, + { + "epoch": 0.02, + "grad_norm": 0.8284973943393457, + "learning_rate": 1.955503512880562e-06, + "loss": 0.3348, + "step": 501 + }, + { + "epoch": 0.02, + "grad_norm": 0.758843137064754, + "learning_rate": 1.9594067135050742e-06, + "loss": 0.3312, + "step": 502 + }, + { + "epoch": 0.02, + "grad_norm": 0.798607297413246, + "learning_rate": 1.9633099141295864e-06, + "loss": 0.3558, + "step": 503 + }, + { + "epoch": 0.02, + "grad_norm": 0.7961600320688458, + "learning_rate": 1.9672131147540985e-06, + "loss": 0.3654, + "step": 504 + }, + { + "epoch": 0.02, + "grad_norm": 0.8081626037113192, + "learning_rate": 1.9711163153786107e-06, + "loss": 0.349, + "step": 505 + }, + { + "epoch": 0.02, + "grad_norm": 0.804960032378753, + "learning_rate": 1.975019516003123e-06, + "loss": 0.3245, + "step": 506 + }, + { + "epoch": 0.02, + "grad_norm": 0.8523743024265802, + "learning_rate": 1.978922716627635e-06, + "loss": 0.3566, + "step": 507 + }, + { + "epoch": 0.02, + "grad_norm": 0.7998472698069916, + "learning_rate": 1.982825917252147e-06, + "loss": 0.3394, + "step": 508 + }, + { + "epoch": 0.02, + "grad_norm": 0.7866075552176608, + "learning_rate": 1.986729117876659e-06, + "loss": 0.3334, + "step": 509 + }, + { + "epoch": 0.02, + "grad_norm": 0.8143604673651875, + "learning_rate": 1.990632318501171e-06, + "loss": 0.3584, + "step": 510 + }, + { + "epoch": 0.02, + "grad_norm": 0.8260591693822977, + "learning_rate": 1.994535519125683e-06, + "loss": 0.346, + "step": 511 + }, + { + "epoch": 0.02, + "grad_norm": 0.7724749985956596, + "learning_rate": 1.9984387197501952e-06, + "loss": 0.3464, + "step": 512 + }, + { + "epoch": 0.02, + "grad_norm": 0.8456122078869557, + "learning_rate": 2.0023419203747074e-06, + "loss": 0.3453, + "step": 513 + }, + { + "epoch": 0.02, + "grad_norm": 0.9175665164690844, + "learning_rate": 2.0062451209992195e-06, + "loss": 0.3529, + "step": 514 + }, + { + "epoch": 0.02, + "grad_norm": 0.8131362241534775, + "learning_rate": 2.0101483216237316e-06, + "loss": 0.3249, + "step": 515 + }, + { + "epoch": 0.02, + "grad_norm": 0.7875554564813882, + "learning_rate": 2.0140515222482438e-06, + "loss": 0.3314, + "step": 516 + }, + { + "epoch": 0.02, + "grad_norm": 0.8893803970873522, + "learning_rate": 2.017954722872756e-06, + "loss": 0.3485, + "step": 517 + }, + { + "epoch": 0.02, + "grad_norm": 0.851098871467831, + "learning_rate": 2.021857923497268e-06, + "loss": 0.3385, + "step": 518 + }, + { + "epoch": 0.02, + "grad_norm": 0.7980840276586699, + "learning_rate": 2.02576112412178e-06, + "loss": 0.3395, + "step": 519 + }, + { + "epoch": 0.02, + "grad_norm": 0.9884006798599159, + "learning_rate": 2.0296643247462923e-06, + "loss": 0.3595, + "step": 520 + }, + { + "epoch": 0.02, + "grad_norm": 0.9385561217923857, + "learning_rate": 2.033567525370804e-06, + "loss": 0.3552, + "step": 521 + }, + { + "epoch": 0.02, + "grad_norm": 0.7852475719899504, + "learning_rate": 2.037470725995316e-06, + "loss": 0.3384, + "step": 522 + }, + { + "epoch": 0.02, + "grad_norm": 0.9147349639084852, + "learning_rate": 2.0413739266198283e-06, + "loss": 0.3391, + "step": 523 + }, + { + "epoch": 0.02, + "grad_norm": 0.792848222220544, + "learning_rate": 2.0452771272443405e-06, + "loss": 0.3169, + "step": 524 + }, + { + "epoch": 0.02, + "grad_norm": 0.7521658886883144, + "learning_rate": 2.0491803278688526e-06, + "loss": 0.3265, + "step": 525 + }, + { + "epoch": 0.02, + "grad_norm": 0.8403936311289207, + "learning_rate": 2.0530835284933648e-06, + "loss": 0.3373, + "step": 526 + }, + { + "epoch": 0.02, + "grad_norm": 0.9251397978580324, + "learning_rate": 2.056986729117877e-06, + "loss": 0.336, + "step": 527 + }, + { + "epoch": 0.02, + "grad_norm": 0.7676392434229101, + "learning_rate": 2.060889929742389e-06, + "loss": 0.3464, + "step": 528 + }, + { + "epoch": 0.02, + "grad_norm": 0.812699929509099, + "learning_rate": 2.064793130366901e-06, + "loss": 0.3671, + "step": 529 + }, + { + "epoch": 0.02, + "grad_norm": 0.7716543087705485, + "learning_rate": 2.0686963309914133e-06, + "loss": 0.3308, + "step": 530 + }, + { + "epoch": 0.02, + "grad_norm": 0.7972143681944185, + "learning_rate": 2.0725995316159255e-06, + "loss": 0.3093, + "step": 531 + }, + { + "epoch": 0.02, + "grad_norm": 0.782879731854589, + "learning_rate": 2.0765027322404376e-06, + "loss": 0.3176, + "step": 532 + }, + { + "epoch": 0.02, + "grad_norm": 0.7416915004747296, + "learning_rate": 2.0804059328649493e-06, + "loss": 0.3187, + "step": 533 + }, + { + "epoch": 0.03, + "grad_norm": 0.8432968237623029, + "learning_rate": 2.0843091334894615e-06, + "loss": 0.3384, + "step": 534 + }, + { + "epoch": 0.03, + "grad_norm": 0.7618461243507044, + "learning_rate": 2.0882123341139736e-06, + "loss": 0.3223, + "step": 535 + }, + { + "epoch": 0.03, + "grad_norm": 0.8184523653891167, + "learning_rate": 2.0921155347384857e-06, + "loss": 0.331, + "step": 536 + }, + { + "epoch": 0.03, + "grad_norm": 0.8061009058222663, + "learning_rate": 2.096018735362998e-06, + "loss": 0.3677, + "step": 537 + }, + { + "epoch": 0.03, + "grad_norm": 0.7605093112940662, + "learning_rate": 2.09992193598751e-06, + "loss": 0.3277, + "step": 538 + }, + { + "epoch": 0.03, + "grad_norm": 0.776005431590662, + "learning_rate": 2.103825136612022e-06, + "loss": 0.3194, + "step": 539 + }, + { + "epoch": 0.03, + "grad_norm": 0.7690444714246482, + "learning_rate": 2.1077283372365343e-06, + "loss": 0.3265, + "step": 540 + }, + { + "epoch": 0.03, + "grad_norm": 0.7829947334201941, + "learning_rate": 2.1116315378610465e-06, + "loss": 0.3391, + "step": 541 + }, + { + "epoch": 0.03, + "grad_norm": 0.8448858469489935, + "learning_rate": 2.1155347384855586e-06, + "loss": 0.3608, + "step": 542 + }, + { + "epoch": 0.03, + "grad_norm": 0.8192097246862647, + "learning_rate": 2.1194379391100707e-06, + "loss": 0.3443, + "step": 543 + }, + { + "epoch": 0.03, + "grad_norm": 0.8177801352147176, + "learning_rate": 2.123341139734583e-06, + "loss": 0.3373, + "step": 544 + }, + { + "epoch": 0.03, + "grad_norm": 0.796544203626462, + "learning_rate": 2.1272443403590946e-06, + "loss": 0.3361, + "step": 545 + }, + { + "epoch": 0.03, + "grad_norm": 0.8322224622052117, + "learning_rate": 2.1311475409836067e-06, + "loss": 0.3549, + "step": 546 + }, + { + "epoch": 0.03, + "grad_norm": 0.8053915510469698, + "learning_rate": 2.135050741608119e-06, + "loss": 0.36, + "step": 547 + }, + { + "epoch": 0.03, + "grad_norm": 0.7917592617514915, + "learning_rate": 2.138953942232631e-06, + "loss": 0.311, + "step": 548 + }, + { + "epoch": 0.03, + "grad_norm": 0.7580255127182716, + "learning_rate": 2.1428571428571427e-06, + "loss": 0.3277, + "step": 549 + }, + { + "epoch": 0.03, + "grad_norm": 0.8822547698184935, + "learning_rate": 2.146760343481655e-06, + "loss": 0.3407, + "step": 550 + }, + { + "epoch": 0.03, + "grad_norm": 0.843271279476342, + "learning_rate": 2.150663544106167e-06, + "loss": 0.3486, + "step": 551 + }, + { + "epoch": 0.03, + "grad_norm": 0.8140940911711986, + "learning_rate": 2.154566744730679e-06, + "loss": 0.3329, + "step": 552 + }, + { + "epoch": 0.03, + "grad_norm": 0.9034552942380556, + "learning_rate": 2.1584699453551913e-06, + "loss": 0.3537, + "step": 553 + }, + { + "epoch": 0.03, + "grad_norm": 0.808335806406233, + "learning_rate": 2.1623731459797034e-06, + "loss": 0.3401, + "step": 554 + }, + { + "epoch": 0.03, + "grad_norm": 0.8634246156617748, + "learning_rate": 2.1662763466042156e-06, + "loss": 0.3329, + "step": 555 + }, + { + "epoch": 0.03, + "grad_norm": 0.8348786689394118, + "learning_rate": 2.1701795472287277e-06, + "loss": 0.3596, + "step": 556 + }, + { + "epoch": 0.03, + "grad_norm": 0.7816203798414081, + "learning_rate": 2.17408274785324e-06, + "loss": 0.3396, + "step": 557 + }, + { + "epoch": 0.03, + "grad_norm": 0.7823891520124928, + "learning_rate": 2.177985948477752e-06, + "loss": 0.3349, + "step": 558 + }, + { + "epoch": 0.03, + "grad_norm": 0.778370223851375, + "learning_rate": 2.181889149102264e-06, + "loss": 0.3397, + "step": 559 + }, + { + "epoch": 0.03, + "grad_norm": 0.8281540742887777, + "learning_rate": 2.185792349726776e-06, + "loss": 0.3365, + "step": 560 + }, + { + "epoch": 0.03, + "grad_norm": 0.8393024894786068, + "learning_rate": 2.189695550351288e-06, + "loss": 0.3372, + "step": 561 + }, + { + "epoch": 0.03, + "grad_norm": 0.8459807283262023, + "learning_rate": 2.1935987509758e-06, + "loss": 0.3499, + "step": 562 + }, + { + "epoch": 0.03, + "grad_norm": 0.7951522396776779, + "learning_rate": 2.1975019516003123e-06, + "loss": 0.3404, + "step": 563 + }, + { + "epoch": 0.03, + "grad_norm": 0.833758662299918, + "learning_rate": 2.2014051522248244e-06, + "loss": 0.3503, + "step": 564 + }, + { + "epoch": 0.03, + "grad_norm": 0.8026196775710626, + "learning_rate": 2.2053083528493366e-06, + "loss": 0.3315, + "step": 565 + }, + { + "epoch": 0.03, + "grad_norm": 0.7823428427861416, + "learning_rate": 2.2092115534738487e-06, + "loss": 0.3426, + "step": 566 + }, + { + "epoch": 0.03, + "grad_norm": 0.8244970571097272, + "learning_rate": 2.213114754098361e-06, + "loss": 0.3409, + "step": 567 + }, + { + "epoch": 0.03, + "grad_norm": 0.8197848051288171, + "learning_rate": 2.217017954722873e-06, + "loss": 0.3205, + "step": 568 + }, + { + "epoch": 0.03, + "grad_norm": 0.7890611354954277, + "learning_rate": 2.220921155347385e-06, + "loss": 0.3537, + "step": 569 + }, + { + "epoch": 0.03, + "grad_norm": 0.8957270945111125, + "learning_rate": 2.2248243559718973e-06, + "loss": 0.3298, + "step": 570 + }, + { + "epoch": 0.03, + "grad_norm": 0.8064455501463538, + "learning_rate": 2.2287275565964094e-06, + "loss": 0.3408, + "step": 571 + }, + { + "epoch": 0.03, + "grad_norm": 0.8259409832293229, + "learning_rate": 2.232630757220921e-06, + "loss": 0.3156, + "step": 572 + }, + { + "epoch": 0.03, + "grad_norm": 0.7843405470720111, + "learning_rate": 2.2365339578454333e-06, + "loss": 0.3431, + "step": 573 + }, + { + "epoch": 0.03, + "grad_norm": 0.7291240582266163, + "learning_rate": 2.2404371584699454e-06, + "loss": 0.3209, + "step": 574 + }, + { + "epoch": 0.03, + "grad_norm": 0.6926258757702719, + "learning_rate": 2.2443403590944575e-06, + "loss": 0.3193, + "step": 575 + }, + { + "epoch": 0.03, + "grad_norm": 0.8136685263039289, + "learning_rate": 2.2482435597189697e-06, + "loss": 0.3409, + "step": 576 + }, + { + "epoch": 0.03, + "grad_norm": 0.7786278946607379, + "learning_rate": 2.252146760343482e-06, + "loss": 0.3458, + "step": 577 + }, + { + "epoch": 0.03, + "grad_norm": 0.7629653236626819, + "learning_rate": 2.256049960967994e-06, + "loss": 0.3361, + "step": 578 + }, + { + "epoch": 0.03, + "grad_norm": 0.885397145765798, + "learning_rate": 2.259953161592506e-06, + "loss": 0.3513, + "step": 579 + }, + { + "epoch": 0.03, + "grad_norm": 0.8379005464133578, + "learning_rate": 2.2638563622170182e-06, + "loss": 0.3453, + "step": 580 + }, + { + "epoch": 0.03, + "grad_norm": 0.8003694377306825, + "learning_rate": 2.2677595628415304e-06, + "loss": 0.3382, + "step": 581 + }, + { + "epoch": 0.03, + "grad_norm": 0.7976949076061977, + "learning_rate": 2.2716627634660425e-06, + "loss": 0.3528, + "step": 582 + }, + { + "epoch": 0.03, + "grad_norm": 0.7606944770531175, + "learning_rate": 2.2755659640905547e-06, + "loss": 0.3209, + "step": 583 + }, + { + "epoch": 0.03, + "grad_norm": 0.8176711824838864, + "learning_rate": 2.2794691647150664e-06, + "loss": 0.33, + "step": 584 + }, + { + "epoch": 0.03, + "grad_norm": 0.8225948771042985, + "learning_rate": 2.2833723653395785e-06, + "loss": 0.3481, + "step": 585 + }, + { + "epoch": 0.03, + "grad_norm": 0.7864326962619818, + "learning_rate": 2.2872755659640907e-06, + "loss": 0.3433, + "step": 586 + }, + { + "epoch": 0.03, + "grad_norm": 0.7862938810335386, + "learning_rate": 2.291178766588603e-06, + "loss": 0.3508, + "step": 587 + }, + { + "epoch": 0.03, + "grad_norm": 0.7471045848938762, + "learning_rate": 2.295081967213115e-06, + "loss": 0.3332, + "step": 588 + }, + { + "epoch": 0.03, + "grad_norm": 0.7554708567751323, + "learning_rate": 2.298985167837627e-06, + "loss": 0.3187, + "step": 589 + }, + { + "epoch": 0.03, + "grad_norm": 0.7488333444906121, + "learning_rate": 2.3028883684621392e-06, + "loss": 0.324, + "step": 590 + }, + { + "epoch": 0.03, + "grad_norm": 0.7916469200082464, + "learning_rate": 2.3067915690866514e-06, + "loss": 0.3222, + "step": 591 + }, + { + "epoch": 0.03, + "grad_norm": 0.7789434123164025, + "learning_rate": 2.3106947697111635e-06, + "loss": 0.3498, + "step": 592 + }, + { + "epoch": 0.03, + "grad_norm": 0.8368553226904428, + "learning_rate": 2.3145979703356756e-06, + "loss": 0.3659, + "step": 593 + }, + { + "epoch": 0.03, + "grad_norm": 0.7750079989436724, + "learning_rate": 2.3185011709601878e-06, + "loss": 0.3265, + "step": 594 + }, + { + "epoch": 0.03, + "grad_norm": 0.816126471368532, + "learning_rate": 2.3224043715847e-06, + "loss": 0.3315, + "step": 595 + }, + { + "epoch": 0.03, + "grad_norm": 0.8118383918916602, + "learning_rate": 2.3263075722092116e-06, + "loss": 0.3291, + "step": 596 + }, + { + "epoch": 0.03, + "grad_norm": 0.8051406399258569, + "learning_rate": 2.3302107728337238e-06, + "loss": 0.3427, + "step": 597 + }, + { + "epoch": 0.03, + "grad_norm": 0.7621211317704851, + "learning_rate": 2.334113973458236e-06, + "loss": 0.3456, + "step": 598 + }, + { + "epoch": 0.03, + "grad_norm": 0.8308707652739893, + "learning_rate": 2.338017174082748e-06, + "loss": 0.3645, + "step": 599 + }, + { + "epoch": 0.03, + "grad_norm": 0.8660209210650036, + "learning_rate": 2.34192037470726e-06, + "loss": 0.3674, + "step": 600 + }, + { + "epoch": 0.03, + "grad_norm": 0.8312514605364693, + "learning_rate": 2.3458235753317723e-06, + "loss": 0.34, + "step": 601 + }, + { + "epoch": 0.03, + "grad_norm": 0.8801429915550012, + "learning_rate": 2.3497267759562845e-06, + "loss": 0.3617, + "step": 602 + }, + { + "epoch": 0.03, + "grad_norm": 0.8208042665808435, + "learning_rate": 2.3536299765807966e-06, + "loss": 0.3225, + "step": 603 + }, + { + "epoch": 0.03, + "grad_norm": 0.7462094770308701, + "learning_rate": 2.3575331772053088e-06, + "loss": 0.3364, + "step": 604 + }, + { + "epoch": 0.03, + "grad_norm": 0.8091057086684812, + "learning_rate": 2.361436377829821e-06, + "loss": 0.3311, + "step": 605 + }, + { + "epoch": 0.03, + "grad_norm": 0.8225695955214373, + "learning_rate": 2.365339578454333e-06, + "loss": 0.336, + "step": 606 + }, + { + "epoch": 0.03, + "grad_norm": 0.8149114966891813, + "learning_rate": 2.369242779078845e-06, + "loss": 0.3444, + "step": 607 + }, + { + "epoch": 0.03, + "grad_norm": 0.868508869614167, + "learning_rate": 2.373145979703357e-06, + "loss": 0.359, + "step": 608 + }, + { + "epoch": 0.03, + "grad_norm": 0.7823059396365701, + "learning_rate": 2.377049180327869e-06, + "loss": 0.3215, + "step": 609 + }, + { + "epoch": 0.03, + "grad_norm": 0.8442542971736989, + "learning_rate": 2.380952380952381e-06, + "loss": 0.3695, + "step": 610 + }, + { + "epoch": 0.03, + "grad_norm": 0.7734463953532689, + "learning_rate": 2.3848555815768933e-06, + "loss": 0.3155, + "step": 611 + }, + { + "epoch": 0.03, + "grad_norm": 0.776040340990316, + "learning_rate": 2.388758782201405e-06, + "loss": 0.334, + "step": 612 + }, + { + "epoch": 0.03, + "grad_norm": 0.8442630728872506, + "learning_rate": 2.392661982825917e-06, + "loss": 0.3381, + "step": 613 + }, + { + "epoch": 0.03, + "grad_norm": 0.8529305732296938, + "learning_rate": 2.3965651834504293e-06, + "loss": 0.3504, + "step": 614 + }, + { + "epoch": 0.03, + "grad_norm": 0.8448447635977847, + "learning_rate": 2.4004683840749415e-06, + "loss": 0.3529, + "step": 615 + }, + { + "epoch": 0.03, + "grad_norm": 0.8940693585726635, + "learning_rate": 2.4043715846994536e-06, + "loss": 0.3421, + "step": 616 + }, + { + "epoch": 0.03, + "grad_norm": 0.8079194826483432, + "learning_rate": 2.4082747853239657e-06, + "loss": 0.3156, + "step": 617 + }, + { + "epoch": 0.03, + "grad_norm": 0.7960200078427281, + "learning_rate": 2.412177985948478e-06, + "loss": 0.3675, + "step": 618 + }, + { + "epoch": 0.03, + "grad_norm": 0.776318123439197, + "learning_rate": 2.41608118657299e-06, + "loss": 0.3317, + "step": 619 + }, + { + "epoch": 0.03, + "grad_norm": 0.7936762898762251, + "learning_rate": 2.419984387197502e-06, + "loss": 0.3409, + "step": 620 + }, + { + "epoch": 0.03, + "grad_norm": 0.8348333964379586, + "learning_rate": 2.4238875878220143e-06, + "loss": 0.3369, + "step": 621 + }, + { + "epoch": 0.03, + "grad_norm": 0.796603314823004, + "learning_rate": 2.4277907884465265e-06, + "loss": 0.3196, + "step": 622 + }, + { + "epoch": 0.03, + "grad_norm": 0.7152377817881694, + "learning_rate": 2.431693989071038e-06, + "loss": 0.3497, + "step": 623 + }, + { + "epoch": 0.03, + "grad_norm": 0.8066787847079624, + "learning_rate": 2.4355971896955503e-06, + "loss": 0.3132, + "step": 624 + }, + { + "epoch": 0.03, + "grad_norm": 0.771806254662875, + "learning_rate": 2.4395003903200624e-06, + "loss": 0.3402, + "step": 625 + }, + { + "epoch": 0.03, + "grad_norm": 0.8927890026492508, + "learning_rate": 2.4434035909445746e-06, + "loss": 0.3244, + "step": 626 + }, + { + "epoch": 0.03, + "grad_norm": 0.8386291161482408, + "learning_rate": 2.4473067915690867e-06, + "loss": 0.3386, + "step": 627 + }, + { + "epoch": 0.03, + "grad_norm": 0.9151412413059024, + "learning_rate": 2.451209992193599e-06, + "loss": 0.3521, + "step": 628 + }, + { + "epoch": 0.03, + "grad_norm": 0.8023348195093023, + "learning_rate": 2.455113192818111e-06, + "loss": 0.3425, + "step": 629 + }, + { + "epoch": 0.03, + "grad_norm": 0.8951013803694953, + "learning_rate": 2.459016393442623e-06, + "loss": 0.3669, + "step": 630 + }, + { + "epoch": 0.03, + "grad_norm": 0.8498256722189474, + "learning_rate": 2.4629195940671353e-06, + "loss": 0.3383, + "step": 631 + }, + { + "epoch": 0.03, + "grad_norm": 0.8155112992464246, + "learning_rate": 2.4668227946916474e-06, + "loss": 0.334, + "step": 632 + }, + { + "epoch": 0.03, + "grad_norm": 0.8183420547857655, + "learning_rate": 2.4707259953161596e-06, + "loss": 0.3579, + "step": 633 + }, + { + "epoch": 0.03, + "grad_norm": 0.9084211502331642, + "learning_rate": 2.4746291959406717e-06, + "loss": 0.3423, + "step": 634 + }, + { + "epoch": 0.03, + "grad_norm": 0.8097793659548036, + "learning_rate": 2.4785323965651834e-06, + "loss": 0.3381, + "step": 635 + }, + { + "epoch": 0.03, + "grad_norm": 0.7629016132125738, + "learning_rate": 2.4824355971896956e-06, + "loss": 0.3411, + "step": 636 + }, + { + "epoch": 0.03, + "grad_norm": 0.906423479129678, + "learning_rate": 2.4863387978142077e-06, + "loss": 0.3609, + "step": 637 + }, + { + "epoch": 0.03, + "grad_norm": 0.7919661976209177, + "learning_rate": 2.49024199843872e-06, + "loss": 0.3301, + "step": 638 + }, + { + "epoch": 0.03, + "grad_norm": 0.805424102406669, + "learning_rate": 2.494145199063232e-06, + "loss": 0.348, + "step": 639 + }, + { + "epoch": 0.03, + "grad_norm": 0.8485367403383143, + "learning_rate": 2.498048399687744e-06, + "loss": 0.3386, + "step": 640 + }, + { + "epoch": 0.03, + "grad_norm": 0.766144493946584, + "learning_rate": 2.5019516003122563e-06, + "loss": 0.3277, + "step": 641 + }, + { + "epoch": 0.03, + "grad_norm": 0.8365029784346091, + "learning_rate": 2.5058548009367684e-06, + "loss": 0.3215, + "step": 642 + }, + { + "epoch": 0.03, + "grad_norm": 0.8521290543890946, + "learning_rate": 2.5097580015612806e-06, + "loss": 0.3258, + "step": 643 + }, + { + "epoch": 0.03, + "grad_norm": 0.8197005154682134, + "learning_rate": 2.5136612021857927e-06, + "loss": 0.3222, + "step": 644 + }, + { + "epoch": 0.03, + "grad_norm": 0.8893944638116964, + "learning_rate": 2.517564402810305e-06, + "loss": 0.3387, + "step": 645 + }, + { + "epoch": 0.03, + "grad_norm": 0.8568159129647409, + "learning_rate": 2.521467603434817e-06, + "loss": 0.3739, + "step": 646 + }, + { + "epoch": 0.03, + "grad_norm": 0.8252722425646443, + "learning_rate": 2.5253708040593287e-06, + "loss": 0.3629, + "step": 647 + }, + { + "epoch": 0.03, + "grad_norm": 0.7753439123265872, + "learning_rate": 2.529274004683841e-06, + "loss": 0.3514, + "step": 648 + }, + { + "epoch": 0.03, + "grad_norm": 0.8356188546927573, + "learning_rate": 2.533177205308353e-06, + "loss": 0.3162, + "step": 649 + }, + { + "epoch": 0.03, + "grad_norm": 0.7966738087447028, + "learning_rate": 2.537080405932865e-06, + "loss": 0.3405, + "step": 650 + }, + { + "epoch": 0.03, + "grad_norm": 0.8369322969138862, + "learning_rate": 2.5409836065573773e-06, + "loss": 0.3397, + "step": 651 + }, + { + "epoch": 0.03, + "grad_norm": 0.853806642287919, + "learning_rate": 2.5448868071818894e-06, + "loss": 0.333, + "step": 652 + }, + { + "epoch": 0.03, + "grad_norm": 0.8100218455026337, + "learning_rate": 2.5487900078064015e-06, + "loss": 0.3537, + "step": 653 + }, + { + "epoch": 0.03, + "grad_norm": 0.8339253808236127, + "learning_rate": 2.5526932084309137e-06, + "loss": 0.3567, + "step": 654 + }, + { + "epoch": 0.03, + "grad_norm": 0.7663236726118475, + "learning_rate": 2.556596409055426e-06, + "loss": 0.3315, + "step": 655 + }, + { + "epoch": 0.03, + "grad_norm": 0.7752952418477345, + "learning_rate": 2.560499609679938e-06, + "loss": 0.321, + "step": 656 + }, + { + "epoch": 0.03, + "grad_norm": 0.8192925488822871, + "learning_rate": 2.56440281030445e-06, + "loss": 0.3124, + "step": 657 + }, + { + "epoch": 0.03, + "grad_norm": 0.7589312094507481, + "learning_rate": 2.5683060109289622e-06, + "loss": 0.3265, + "step": 658 + }, + { + "epoch": 0.03, + "grad_norm": 0.8104224889312353, + "learning_rate": 2.572209211553474e-06, + "loss": 0.3372, + "step": 659 + }, + { + "epoch": 0.03, + "grad_norm": 0.7654378258806438, + "learning_rate": 2.576112412177986e-06, + "loss": 0.3416, + "step": 660 + }, + { + "epoch": 0.03, + "grad_norm": 0.8181306454442836, + "learning_rate": 2.5800156128024982e-06, + "loss": 0.3542, + "step": 661 + }, + { + "epoch": 0.03, + "grad_norm": 0.8051998490357577, + "learning_rate": 2.5839188134270104e-06, + "loss": 0.3226, + "step": 662 + }, + { + "epoch": 0.03, + "grad_norm": 0.7630363952276362, + "learning_rate": 2.5878220140515225e-06, + "loss": 0.3399, + "step": 663 + }, + { + "epoch": 0.03, + "grad_norm": 0.8426353402808956, + "learning_rate": 2.5917252146760347e-06, + "loss": 0.3429, + "step": 664 + }, + { + "epoch": 0.03, + "grad_norm": 0.7858997353361868, + "learning_rate": 2.595628415300547e-06, + "loss": 0.3313, + "step": 665 + }, + { + "epoch": 0.03, + "grad_norm": 0.7866297452184502, + "learning_rate": 2.599531615925059e-06, + "loss": 0.3273, + "step": 666 + }, + { + "epoch": 0.03, + "grad_norm": 0.8221860059287672, + "learning_rate": 2.603434816549571e-06, + "loss": 0.3213, + "step": 667 + }, + { + "epoch": 0.03, + "grad_norm": 0.8516859005441, + "learning_rate": 2.6073380171740832e-06, + "loss": 0.3355, + "step": 668 + }, + { + "epoch": 0.03, + "grad_norm": 0.8617959254185017, + "learning_rate": 2.6112412177985954e-06, + "loss": 0.3563, + "step": 669 + }, + { + "epoch": 0.03, + "grad_norm": 0.8167427412014266, + "learning_rate": 2.615144418423107e-06, + "loss": 0.3487, + "step": 670 + }, + { + "epoch": 0.03, + "grad_norm": 0.7964710848187857, + "learning_rate": 2.6190476190476192e-06, + "loss": 0.3512, + "step": 671 + }, + { + "epoch": 0.03, + "grad_norm": 0.7691816468401839, + "learning_rate": 2.6229508196721314e-06, + "loss": 0.3346, + "step": 672 + }, + { + "epoch": 0.03, + "grad_norm": 0.877804691869427, + "learning_rate": 2.6268540202966435e-06, + "loss": 0.3426, + "step": 673 + }, + { + "epoch": 0.03, + "grad_norm": 0.7923258060123479, + "learning_rate": 2.6307572209211556e-06, + "loss": 0.3429, + "step": 674 + }, + { + "epoch": 0.03, + "grad_norm": 0.7807139797630203, + "learning_rate": 2.6346604215456678e-06, + "loss": 0.3219, + "step": 675 + }, + { + "epoch": 0.03, + "grad_norm": 0.7899172759968026, + "learning_rate": 2.63856362217018e-06, + "loss": 0.3437, + "step": 676 + }, + { + "epoch": 0.03, + "grad_norm": 0.8198910429728323, + "learning_rate": 2.642466822794692e-06, + "loss": 0.3454, + "step": 677 + }, + { + "epoch": 0.03, + "grad_norm": 0.8529665656085381, + "learning_rate": 2.646370023419204e-06, + "loss": 0.3393, + "step": 678 + }, + { + "epoch": 0.03, + "grad_norm": 0.8215443956177911, + "learning_rate": 2.6502732240437163e-06, + "loss": 0.3164, + "step": 679 + }, + { + "epoch": 0.03, + "grad_norm": 0.8218102126971579, + "learning_rate": 2.6541764246682285e-06, + "loss": 0.3286, + "step": 680 + }, + { + "epoch": 0.03, + "grad_norm": 0.8034161970149091, + "learning_rate": 2.6580796252927406e-06, + "loss": 0.3287, + "step": 681 + }, + { + "epoch": 0.03, + "grad_norm": 0.7747677421244202, + "learning_rate": 2.6619828259172523e-06, + "loss": 0.3234, + "step": 682 + }, + { + "epoch": 0.03, + "grad_norm": 0.7658455310349103, + "learning_rate": 2.6658860265417645e-06, + "loss": 0.326, + "step": 683 + }, + { + "epoch": 0.03, + "grad_norm": 0.810968377218814, + "learning_rate": 2.6697892271662766e-06, + "loss": 0.3402, + "step": 684 + }, + { + "epoch": 0.03, + "grad_norm": 0.8442301712666332, + "learning_rate": 2.6736924277907888e-06, + "loss": 0.3499, + "step": 685 + }, + { + "epoch": 0.03, + "grad_norm": 0.8439489899628114, + "learning_rate": 2.677595628415301e-06, + "loss": 0.323, + "step": 686 + }, + { + "epoch": 0.03, + "grad_norm": 0.8614225957489333, + "learning_rate": 2.681498829039813e-06, + "loss": 0.33, + "step": 687 + }, + { + "epoch": 0.03, + "grad_norm": 0.8033934544338569, + "learning_rate": 2.685402029664325e-06, + "loss": 0.3377, + "step": 688 + }, + { + "epoch": 0.03, + "grad_norm": 0.8043456222224746, + "learning_rate": 2.6893052302888373e-06, + "loss": 0.3204, + "step": 689 + }, + { + "epoch": 0.03, + "grad_norm": 0.8096315000049439, + "learning_rate": 2.6932084309133495e-06, + "loss": 0.3195, + "step": 690 + }, + { + "epoch": 0.03, + "grad_norm": 0.7984721034381544, + "learning_rate": 2.6971116315378616e-06, + "loss": 0.3313, + "step": 691 + }, + { + "epoch": 0.03, + "grad_norm": 0.7805940232007262, + "learning_rate": 2.7010148321623738e-06, + "loss": 0.3403, + "step": 692 + }, + { + "epoch": 0.03, + "grad_norm": 0.7827369219619398, + "learning_rate": 2.704918032786886e-06, + "loss": 0.3397, + "step": 693 + }, + { + "epoch": 0.03, + "grad_norm": 0.8049744336281308, + "learning_rate": 2.7088212334113976e-06, + "loss": 0.3167, + "step": 694 + }, + { + "epoch": 0.03, + "grad_norm": 0.7631759404007294, + "learning_rate": 2.7127244340359097e-06, + "loss": 0.3218, + "step": 695 + }, + { + "epoch": 0.03, + "grad_norm": 0.7691376189334814, + "learning_rate": 2.716627634660422e-06, + "loss": 0.312, + "step": 696 + }, + { + "epoch": 0.03, + "grad_norm": 0.7838346235163963, + "learning_rate": 2.720530835284934e-06, + "loss": 0.3492, + "step": 697 + }, + { + "epoch": 0.03, + "grad_norm": 0.7862020941494849, + "learning_rate": 2.724434035909446e-06, + "loss": 0.3408, + "step": 698 + }, + { + "epoch": 0.03, + "grad_norm": 0.8570278601809549, + "learning_rate": 2.7283372365339583e-06, + "loss": 0.3641, + "step": 699 + }, + { + "epoch": 0.03, + "grad_norm": 0.8050586320636697, + "learning_rate": 2.7322404371584705e-06, + "loss": 0.3062, + "step": 700 + }, + { + "epoch": 0.03, + "grad_norm": 0.8177632339533573, + "learning_rate": 2.7361436377829826e-06, + "loss": 0.3332, + "step": 701 + }, + { + "epoch": 0.03, + "grad_norm": 0.779980016178864, + "learning_rate": 2.7400468384074947e-06, + "loss": 0.3122, + "step": 702 + }, + { + "epoch": 0.03, + "grad_norm": 0.8569443672980638, + "learning_rate": 2.743950039032007e-06, + "loss": 0.3361, + "step": 703 + }, + { + "epoch": 0.03, + "grad_norm": 0.7950587200221287, + "learning_rate": 2.747853239656519e-06, + "loss": 0.3497, + "step": 704 + }, + { + "epoch": 0.03, + "grad_norm": 0.7932447591678385, + "learning_rate": 2.7517564402810303e-06, + "loss": 0.3356, + "step": 705 + }, + { + "epoch": 0.03, + "grad_norm": 0.7541101258894791, + "learning_rate": 2.7556596409055424e-06, + "loss": 0.3446, + "step": 706 + }, + { + "epoch": 0.03, + "grad_norm": 0.8553408312023306, + "learning_rate": 2.7595628415300546e-06, + "loss": 0.3457, + "step": 707 + }, + { + "epoch": 0.03, + "grad_norm": 0.8124230545284448, + "learning_rate": 2.7634660421545667e-06, + "loss": 0.335, + "step": 708 + }, + { + "epoch": 0.03, + "grad_norm": 0.8496890969766349, + "learning_rate": 2.767369242779079e-06, + "loss": 0.3597, + "step": 709 + }, + { + "epoch": 0.03, + "grad_norm": 0.8451795950035875, + "learning_rate": 2.771272443403591e-06, + "loss": 0.3671, + "step": 710 + }, + { + "epoch": 0.03, + "grad_norm": 0.7715712362898155, + "learning_rate": 2.775175644028103e-06, + "loss": 0.3507, + "step": 711 + }, + { + "epoch": 0.03, + "grad_norm": 0.7788158978039329, + "learning_rate": 2.7790788446526153e-06, + "loss": 0.334, + "step": 712 + }, + { + "epoch": 0.03, + "grad_norm": 0.7978813760225691, + "learning_rate": 2.7829820452771274e-06, + "loss": 0.325, + "step": 713 + }, + { + "epoch": 0.03, + "grad_norm": 0.7763649021421601, + "learning_rate": 2.786885245901639e-06, + "loss": 0.3323, + "step": 714 + }, + { + "epoch": 0.03, + "grad_norm": 0.8332951878326039, + "learning_rate": 2.7907884465261513e-06, + "loss": 0.3337, + "step": 715 + }, + { + "epoch": 0.03, + "grad_norm": 0.8182831701712994, + "learning_rate": 2.7946916471506634e-06, + "loss": 0.3398, + "step": 716 + }, + { + "epoch": 0.03, + "grad_norm": 0.7181141025956902, + "learning_rate": 2.7985948477751756e-06, + "loss": 0.325, + "step": 717 + }, + { + "epoch": 0.03, + "grad_norm": 0.7495419041903385, + "learning_rate": 2.8024980483996877e-06, + "loss": 0.3321, + "step": 718 + }, + { + "epoch": 0.03, + "grad_norm": 0.7689848336555103, + "learning_rate": 2.8064012490242e-06, + "loss": 0.3336, + "step": 719 + }, + { + "epoch": 0.03, + "grad_norm": 0.8586580468246836, + "learning_rate": 2.810304449648712e-06, + "loss": 0.3506, + "step": 720 + }, + { + "epoch": 0.03, + "grad_norm": 0.7725515440599748, + "learning_rate": 2.814207650273224e-06, + "loss": 0.3364, + "step": 721 + }, + { + "epoch": 0.03, + "grad_norm": 0.8162205456560944, + "learning_rate": 2.8181108508977363e-06, + "loss": 0.3468, + "step": 722 + }, + { + "epoch": 0.03, + "grad_norm": 0.7342182698461299, + "learning_rate": 2.8220140515222484e-06, + "loss": 0.3511, + "step": 723 + }, + { + "epoch": 0.03, + "grad_norm": 0.7376438813335502, + "learning_rate": 2.8259172521467606e-06, + "loss": 0.3107, + "step": 724 + }, + { + "epoch": 0.03, + "grad_norm": 0.7853931944131775, + "learning_rate": 2.8298204527712727e-06, + "loss": 0.3332, + "step": 725 + }, + { + "epoch": 0.03, + "grad_norm": 0.8187099850589857, + "learning_rate": 2.8337236533957844e-06, + "loss": 0.3377, + "step": 726 + }, + { + "epoch": 0.03, + "grad_norm": 0.8274370486385725, + "learning_rate": 2.8376268540202966e-06, + "loss": 0.3496, + "step": 727 + }, + { + "epoch": 0.03, + "grad_norm": 0.7894338110040804, + "learning_rate": 2.8415300546448087e-06, + "loss": 0.3108, + "step": 728 + }, + { + "epoch": 0.03, + "grad_norm": 0.8427486897690427, + "learning_rate": 2.845433255269321e-06, + "loss": 0.3401, + "step": 729 + }, + { + "epoch": 0.03, + "grad_norm": 0.757772305733387, + "learning_rate": 2.849336455893833e-06, + "loss": 0.3213, + "step": 730 + }, + { + "epoch": 0.03, + "grad_norm": 0.8923657203583834, + "learning_rate": 2.853239656518345e-06, + "loss": 0.3354, + "step": 731 + }, + { + "epoch": 0.03, + "grad_norm": 0.7977685228531756, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.3318, + "step": 732 + }, + { + "epoch": 0.03, + "grad_norm": 0.809048644715256, + "learning_rate": 2.8610460577673694e-06, + "loss": 0.3346, + "step": 733 + }, + { + "epoch": 0.03, + "grad_norm": 0.793338825118952, + "learning_rate": 2.8649492583918815e-06, + "loss": 0.3217, + "step": 734 + }, + { + "epoch": 0.03, + "grad_norm": 0.7942420180904798, + "learning_rate": 2.8688524590163937e-06, + "loss": 0.3377, + "step": 735 + }, + { + "epoch": 0.03, + "grad_norm": 0.8284604305290624, + "learning_rate": 2.872755659640906e-06, + "loss": 0.3268, + "step": 736 + }, + { + "epoch": 0.03, + "grad_norm": 0.8219416473293603, + "learning_rate": 2.8766588602654175e-06, + "loss": 0.3317, + "step": 737 + }, + { + "epoch": 0.03, + "grad_norm": 0.7896923942535435, + "learning_rate": 2.8805620608899297e-06, + "loss": 0.3154, + "step": 738 + }, + { + "epoch": 0.03, + "grad_norm": 0.8486812000557402, + "learning_rate": 2.884465261514442e-06, + "loss": 0.3261, + "step": 739 + }, + { + "epoch": 0.03, + "grad_norm": 0.7936555710601935, + "learning_rate": 2.888368462138954e-06, + "loss": 0.3116, + "step": 740 + }, + { + "epoch": 0.03, + "grad_norm": 0.7450683315725468, + "learning_rate": 2.892271662763466e-06, + "loss": 0.3053, + "step": 741 + }, + { + "epoch": 0.03, + "grad_norm": 0.8234800384273521, + "learning_rate": 2.8961748633879782e-06, + "loss": 0.3349, + "step": 742 + }, + { + "epoch": 0.03, + "grad_norm": 0.8156383945114665, + "learning_rate": 2.9000780640124904e-06, + "loss": 0.3381, + "step": 743 + }, + { + "epoch": 0.03, + "grad_norm": 0.8410110252236526, + "learning_rate": 2.9039812646370025e-06, + "loss": 0.354, + "step": 744 + }, + { + "epoch": 0.03, + "grad_norm": 0.916437311505579, + "learning_rate": 2.9078844652615147e-06, + "loss": 0.3396, + "step": 745 + }, + { + "epoch": 0.03, + "grad_norm": 0.7834399088297331, + "learning_rate": 2.911787665886027e-06, + "loss": 0.3251, + "step": 746 + }, + { + "epoch": 0.03, + "grad_norm": 0.8552350688832298, + "learning_rate": 2.915690866510539e-06, + "loss": 0.3303, + "step": 747 + }, + { + "epoch": 0.04, + "grad_norm": 0.7241434188411299, + "learning_rate": 2.919594067135051e-06, + "loss": 0.3089, + "step": 748 + }, + { + "epoch": 0.04, + "grad_norm": 0.7812130167151623, + "learning_rate": 2.923497267759563e-06, + "loss": 0.3188, + "step": 749 + }, + { + "epoch": 0.04, + "grad_norm": 0.8122469616416123, + "learning_rate": 2.927400468384075e-06, + "loss": 0.3372, + "step": 750 + }, + { + "epoch": 0.04, + "grad_norm": 0.8029421057407501, + "learning_rate": 2.931303669008587e-06, + "loss": 0.3301, + "step": 751 + }, + { + "epoch": 0.04, + "grad_norm": 0.7566646579184225, + "learning_rate": 2.9352068696330992e-06, + "loss": 0.3185, + "step": 752 + }, + { + "epoch": 0.04, + "grad_norm": 0.7968556552003517, + "learning_rate": 2.9391100702576114e-06, + "loss": 0.3448, + "step": 753 + }, + { + "epoch": 0.04, + "grad_norm": 0.8016038204651001, + "learning_rate": 2.9430132708821235e-06, + "loss": 0.333, + "step": 754 + }, + { + "epoch": 0.04, + "grad_norm": 0.8618607621424933, + "learning_rate": 2.9469164715066356e-06, + "loss": 0.3184, + "step": 755 + }, + { + "epoch": 0.04, + "grad_norm": 0.8302300219646712, + "learning_rate": 2.9508196721311478e-06, + "loss": 0.3337, + "step": 756 + }, + { + "epoch": 0.04, + "grad_norm": 0.799091014412622, + "learning_rate": 2.95472287275566e-06, + "loss": 0.325, + "step": 757 + }, + { + "epoch": 0.04, + "grad_norm": 0.7691156714790359, + "learning_rate": 2.958626073380172e-06, + "loss": 0.3311, + "step": 758 + }, + { + "epoch": 0.04, + "grad_norm": 0.8121559532747387, + "learning_rate": 2.962529274004684e-06, + "loss": 0.3274, + "step": 759 + }, + { + "epoch": 0.04, + "grad_norm": 0.8344345712853426, + "learning_rate": 2.9664324746291963e-06, + "loss": 0.3277, + "step": 760 + }, + { + "epoch": 0.04, + "grad_norm": 0.8253657271612495, + "learning_rate": 2.970335675253708e-06, + "loss": 0.3427, + "step": 761 + }, + { + "epoch": 0.04, + "grad_norm": 0.8156392379208636, + "learning_rate": 2.97423887587822e-06, + "loss": 0.3511, + "step": 762 + }, + { + "epoch": 0.04, + "grad_norm": 0.7541797466669938, + "learning_rate": 2.9781420765027323e-06, + "loss": 0.3347, + "step": 763 + }, + { + "epoch": 0.04, + "grad_norm": 0.8363411941041499, + "learning_rate": 2.9820452771272445e-06, + "loss": 0.3336, + "step": 764 + }, + { + "epoch": 0.04, + "grad_norm": 0.7204706114714415, + "learning_rate": 2.9859484777517566e-06, + "loss": 0.3185, + "step": 765 + }, + { + "epoch": 0.04, + "grad_norm": 0.8532660169644781, + "learning_rate": 2.9898516783762688e-06, + "loss": 0.3295, + "step": 766 + }, + { + "epoch": 0.04, + "grad_norm": 0.7282820054166352, + "learning_rate": 2.993754879000781e-06, + "loss": 0.3193, + "step": 767 + }, + { + "epoch": 0.04, + "grad_norm": 0.7971398304095669, + "learning_rate": 2.997658079625293e-06, + "loss": 0.3187, + "step": 768 + }, + { + "epoch": 0.04, + "grad_norm": 0.8190063663785068, + "learning_rate": 3.001561280249805e-06, + "loss": 0.3194, + "step": 769 + }, + { + "epoch": 0.04, + "grad_norm": 0.8616919609003421, + "learning_rate": 3.0054644808743173e-06, + "loss": 0.3084, + "step": 770 + }, + { + "epoch": 0.04, + "grad_norm": 0.811698617625455, + "learning_rate": 3.0093676814988295e-06, + "loss": 0.3349, + "step": 771 + }, + { + "epoch": 0.04, + "grad_norm": 0.7756913400165172, + "learning_rate": 3.0132708821233416e-06, + "loss": 0.3203, + "step": 772 + }, + { + "epoch": 0.04, + "grad_norm": 0.9150608313252299, + "learning_rate": 3.0171740827478533e-06, + "loss": 0.358, + "step": 773 + }, + { + "epoch": 0.04, + "grad_norm": 0.7522790028830695, + "learning_rate": 3.0210772833723655e-06, + "loss": 0.3302, + "step": 774 + }, + { + "epoch": 0.04, + "grad_norm": 0.7782927956049192, + "learning_rate": 3.0249804839968776e-06, + "loss": 0.3072, + "step": 775 + }, + { + "epoch": 0.04, + "grad_norm": 0.7995018967422126, + "learning_rate": 3.0288836846213897e-06, + "loss": 0.3155, + "step": 776 + }, + { + "epoch": 0.04, + "grad_norm": 0.8307098312387007, + "learning_rate": 3.032786885245902e-06, + "loss": 0.3338, + "step": 777 + }, + { + "epoch": 0.04, + "grad_norm": 0.7731822591171632, + "learning_rate": 3.036690085870414e-06, + "loss": 0.2989, + "step": 778 + }, + { + "epoch": 0.04, + "grad_norm": 0.841444102203878, + "learning_rate": 3.040593286494926e-06, + "loss": 0.3577, + "step": 779 + }, + { + "epoch": 0.04, + "grad_norm": 0.9089754300438045, + "learning_rate": 3.0444964871194383e-06, + "loss": 0.333, + "step": 780 + }, + { + "epoch": 0.04, + "grad_norm": 0.8462727412075749, + "learning_rate": 3.0483996877439504e-06, + "loss": 0.3414, + "step": 781 + }, + { + "epoch": 0.04, + "grad_norm": 0.8228273244503667, + "learning_rate": 3.0523028883684626e-06, + "loss": 0.3344, + "step": 782 + }, + { + "epoch": 0.04, + "grad_norm": 0.8447388776746985, + "learning_rate": 3.0562060889929747e-06, + "loss": 0.3137, + "step": 783 + }, + { + "epoch": 0.04, + "grad_norm": 0.8754394322361377, + "learning_rate": 3.0601092896174864e-06, + "loss": 0.3395, + "step": 784 + }, + { + "epoch": 0.04, + "grad_norm": 0.838103587023006, + "learning_rate": 3.0640124902419986e-06, + "loss": 0.3337, + "step": 785 + }, + { + "epoch": 0.04, + "grad_norm": 0.7500904029855162, + "learning_rate": 3.0679156908665107e-06, + "loss": 0.3338, + "step": 786 + }, + { + "epoch": 0.04, + "grad_norm": 0.8624169718221015, + "learning_rate": 3.071818891491023e-06, + "loss": 0.3436, + "step": 787 + }, + { + "epoch": 0.04, + "grad_norm": 1.052708452495042, + "learning_rate": 3.075722092115535e-06, + "loss": 0.3508, + "step": 788 + }, + { + "epoch": 0.04, + "grad_norm": 0.8891424545077748, + "learning_rate": 3.079625292740047e-06, + "loss": 0.3318, + "step": 789 + }, + { + "epoch": 0.04, + "grad_norm": 0.9109984378098749, + "learning_rate": 3.0835284933645593e-06, + "loss": 0.3675, + "step": 790 + }, + { + "epoch": 0.04, + "grad_norm": 1.0039460159844047, + "learning_rate": 3.0874316939890714e-06, + "loss": 0.3392, + "step": 791 + }, + { + "epoch": 0.04, + "grad_norm": 0.8005122878965755, + "learning_rate": 3.0913348946135836e-06, + "loss": 0.3538, + "step": 792 + }, + { + "epoch": 0.04, + "grad_norm": 0.7644232596544708, + "learning_rate": 3.0952380952380957e-06, + "loss": 0.3504, + "step": 793 + }, + { + "epoch": 0.04, + "grad_norm": 0.7994143003467342, + "learning_rate": 3.099141295862608e-06, + "loss": 0.3262, + "step": 794 + }, + { + "epoch": 0.04, + "grad_norm": 0.8221448503875004, + "learning_rate": 3.10304449648712e-06, + "loss": 0.3246, + "step": 795 + }, + { + "epoch": 0.04, + "grad_norm": 0.7647300450833371, + "learning_rate": 3.1069476971116317e-06, + "loss": 0.3218, + "step": 796 + }, + { + "epoch": 0.04, + "grad_norm": 0.8265657721600739, + "learning_rate": 3.110850897736144e-06, + "loss": 0.3511, + "step": 797 + }, + { + "epoch": 0.04, + "grad_norm": 0.8266120344513347, + "learning_rate": 3.114754098360656e-06, + "loss": 0.3158, + "step": 798 + }, + { + "epoch": 0.04, + "grad_norm": 0.8209161618407386, + "learning_rate": 3.118657298985168e-06, + "loss": 0.3275, + "step": 799 + }, + { + "epoch": 0.04, + "grad_norm": 0.7771361400125482, + "learning_rate": 3.1225604996096803e-06, + "loss": 0.3566, + "step": 800 + }, + { + "epoch": 0.04, + "grad_norm": 0.793282863891631, + "learning_rate": 3.1264637002341924e-06, + "loss": 0.3359, + "step": 801 + }, + { + "epoch": 0.04, + "grad_norm": 0.8635743381528056, + "learning_rate": 3.1303669008587046e-06, + "loss": 0.3387, + "step": 802 + }, + { + "epoch": 0.04, + "grad_norm": 0.8536329729255786, + "learning_rate": 3.1342701014832167e-06, + "loss": 0.3282, + "step": 803 + }, + { + "epoch": 0.04, + "grad_norm": 0.8376574890269507, + "learning_rate": 3.138173302107729e-06, + "loss": 0.3207, + "step": 804 + }, + { + "epoch": 0.04, + "grad_norm": 0.9280302526645755, + "learning_rate": 3.142076502732241e-06, + "loss": 0.3331, + "step": 805 + }, + { + "epoch": 0.04, + "grad_norm": 0.8134711744056579, + "learning_rate": 3.145979703356753e-06, + "loss": 0.3303, + "step": 806 + }, + { + "epoch": 0.04, + "grad_norm": 0.7960738984951738, + "learning_rate": 3.1498829039812653e-06, + "loss": 0.3396, + "step": 807 + }, + { + "epoch": 0.04, + "grad_norm": 0.8022116621207531, + "learning_rate": 3.153786104605777e-06, + "loss": 0.3508, + "step": 808 + }, + { + "epoch": 0.04, + "grad_norm": 0.916071245771324, + "learning_rate": 3.157689305230289e-06, + "loss": 0.3507, + "step": 809 + }, + { + "epoch": 0.04, + "grad_norm": 0.7857188471874412, + "learning_rate": 3.1615925058548013e-06, + "loss": 0.3443, + "step": 810 + }, + { + "epoch": 0.04, + "grad_norm": 0.7441787885157019, + "learning_rate": 3.1654957064793134e-06, + "loss": 0.326, + "step": 811 + }, + { + "epoch": 0.04, + "grad_norm": 0.7444036351495855, + "learning_rate": 3.1693989071038255e-06, + "loss": 0.3331, + "step": 812 + }, + { + "epoch": 0.04, + "grad_norm": 0.8392784058471788, + "learning_rate": 3.1733021077283377e-06, + "loss": 0.3325, + "step": 813 + }, + { + "epoch": 0.04, + "grad_norm": 0.803343334347028, + "learning_rate": 3.17720530835285e-06, + "loss": 0.3359, + "step": 814 + }, + { + "epoch": 0.04, + "grad_norm": 0.7522728429933897, + "learning_rate": 3.181108508977362e-06, + "loss": 0.3316, + "step": 815 + }, + { + "epoch": 0.04, + "grad_norm": 0.8134946439357837, + "learning_rate": 3.185011709601874e-06, + "loss": 0.3503, + "step": 816 + }, + { + "epoch": 0.04, + "grad_norm": 0.8752313960312206, + "learning_rate": 3.1889149102263862e-06, + "loss": 0.3666, + "step": 817 + }, + { + "epoch": 0.04, + "grad_norm": 0.7806651192201576, + "learning_rate": 3.1928181108508984e-06, + "loss": 0.3511, + "step": 818 + }, + { + "epoch": 0.04, + "grad_norm": 0.7680533420472494, + "learning_rate": 3.1967213114754105e-06, + "loss": 0.3318, + "step": 819 + }, + { + "epoch": 0.04, + "grad_norm": 0.83716733877137, + "learning_rate": 3.2006245120999222e-06, + "loss": 0.3374, + "step": 820 + }, + { + "epoch": 0.04, + "grad_norm": 0.8204507134205091, + "learning_rate": 3.2045277127244344e-06, + "loss": 0.3365, + "step": 821 + }, + { + "epoch": 0.04, + "grad_norm": 0.7842270747171057, + "learning_rate": 3.2084309133489465e-06, + "loss": 0.3227, + "step": 822 + }, + { + "epoch": 0.04, + "grad_norm": 0.7417054584959047, + "learning_rate": 3.2123341139734587e-06, + "loss": 0.3293, + "step": 823 + }, + { + "epoch": 0.04, + "grad_norm": 0.7188135181900579, + "learning_rate": 3.216237314597971e-06, + "loss": 0.316, + "step": 824 + }, + { + "epoch": 0.04, + "grad_norm": 0.8035596046328799, + "learning_rate": 3.220140515222483e-06, + "loss": 0.3372, + "step": 825 + }, + { + "epoch": 0.04, + "grad_norm": 0.7283596883233108, + "learning_rate": 3.224043715846995e-06, + "loss": 0.3278, + "step": 826 + }, + { + "epoch": 0.04, + "grad_norm": 0.7509435619880571, + "learning_rate": 3.2279469164715072e-06, + "loss": 0.3239, + "step": 827 + }, + { + "epoch": 0.04, + "grad_norm": 0.7211576057815914, + "learning_rate": 3.2318501170960194e-06, + "loss": 0.3213, + "step": 828 + }, + { + "epoch": 0.04, + "grad_norm": 0.726394665486096, + "learning_rate": 3.2357533177205315e-06, + "loss": 0.3183, + "step": 829 + }, + { + "epoch": 0.04, + "grad_norm": 0.7713345483006055, + "learning_rate": 3.2396565183450436e-06, + "loss": 0.3227, + "step": 830 + }, + { + "epoch": 0.04, + "grad_norm": 0.8340463553704909, + "learning_rate": 3.2435597189695554e-06, + "loss": 0.3416, + "step": 831 + }, + { + "epoch": 0.04, + "grad_norm": 0.7545648500036302, + "learning_rate": 3.2474629195940675e-06, + "loss": 0.3427, + "step": 832 + }, + { + "epoch": 0.04, + "grad_norm": 0.7312445566337831, + "learning_rate": 3.2513661202185792e-06, + "loss": 0.3327, + "step": 833 + }, + { + "epoch": 0.04, + "grad_norm": 0.8230221181091733, + "learning_rate": 3.2552693208430914e-06, + "loss": 0.3396, + "step": 834 + }, + { + "epoch": 0.04, + "grad_norm": 0.8009610524453586, + "learning_rate": 3.2591725214676035e-06, + "loss": 0.32, + "step": 835 + }, + { + "epoch": 0.04, + "grad_norm": 0.7457447458736481, + "learning_rate": 3.2630757220921156e-06, + "loss": 0.3063, + "step": 836 + }, + { + "epoch": 0.04, + "grad_norm": 0.7684066144698827, + "learning_rate": 3.2669789227166278e-06, + "loss": 0.3165, + "step": 837 + }, + { + "epoch": 0.04, + "grad_norm": 0.8522804128859687, + "learning_rate": 3.27088212334114e-06, + "loss": 0.3393, + "step": 838 + }, + { + "epoch": 0.04, + "grad_norm": 0.7635423777074165, + "learning_rate": 3.274785323965652e-06, + "loss": 0.3181, + "step": 839 + }, + { + "epoch": 0.04, + "grad_norm": 0.7636316455379989, + "learning_rate": 3.2786885245901638e-06, + "loss": 0.3481, + "step": 840 + }, + { + "epoch": 0.04, + "grad_norm": 0.7962142143126167, + "learning_rate": 3.282591725214676e-06, + "loss": 0.3634, + "step": 841 + }, + { + "epoch": 0.04, + "grad_norm": 0.797541005449039, + "learning_rate": 3.286494925839188e-06, + "loss": 0.341, + "step": 842 + }, + { + "epoch": 0.04, + "grad_norm": 0.7915566085751801, + "learning_rate": 3.2903981264637e-06, + "loss": 0.3378, + "step": 843 + }, + { + "epoch": 0.04, + "grad_norm": 0.8072465079859944, + "learning_rate": 3.2943013270882123e-06, + "loss": 0.3385, + "step": 844 + }, + { + "epoch": 0.04, + "grad_norm": 0.769720696479069, + "learning_rate": 3.2982045277127245e-06, + "loss": 0.3264, + "step": 845 + }, + { + "epoch": 0.04, + "grad_norm": 0.8117625605037423, + "learning_rate": 3.3021077283372366e-06, + "loss": 0.3332, + "step": 846 + }, + { + "epoch": 0.04, + "grad_norm": 0.8080889046006523, + "learning_rate": 3.3060109289617488e-06, + "loss": 0.3328, + "step": 847 + }, + { + "epoch": 0.04, + "grad_norm": 0.9152136741387104, + "learning_rate": 3.309914129586261e-06, + "loss": 0.3362, + "step": 848 + }, + { + "epoch": 0.04, + "grad_norm": 0.718083494197403, + "learning_rate": 3.313817330210773e-06, + "loss": 0.2867, + "step": 849 + }, + { + "epoch": 0.04, + "grad_norm": 0.775350908486815, + "learning_rate": 3.317720530835285e-06, + "loss": 0.312, + "step": 850 + }, + { + "epoch": 0.04, + "grad_norm": 0.8789946035530294, + "learning_rate": 3.321623731459797e-06, + "loss": 0.3269, + "step": 851 + }, + { + "epoch": 0.04, + "grad_norm": 0.793020921778407, + "learning_rate": 3.325526932084309e-06, + "loss": 0.3308, + "step": 852 + }, + { + "epoch": 0.04, + "grad_norm": 0.7259971758792938, + "learning_rate": 3.329430132708821e-06, + "loss": 0.3297, + "step": 853 + }, + { + "epoch": 0.04, + "grad_norm": 0.8153957475378928, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.3226, + "step": 854 + }, + { + "epoch": 0.04, + "grad_norm": 0.7650808299692285, + "learning_rate": 3.3372365339578455e-06, + "loss": 0.3195, + "step": 855 + }, + { + "epoch": 0.04, + "grad_norm": 0.8111772008170571, + "learning_rate": 3.3411397345823576e-06, + "loss": 0.3313, + "step": 856 + }, + { + "epoch": 0.04, + "grad_norm": 0.761890415525151, + "learning_rate": 3.3450429352068697e-06, + "loss": 0.3417, + "step": 857 + }, + { + "epoch": 0.04, + "grad_norm": 0.8398770092534883, + "learning_rate": 3.348946135831382e-06, + "loss": 0.3438, + "step": 858 + }, + { + "epoch": 0.04, + "grad_norm": 0.9113600145812258, + "learning_rate": 3.352849336455894e-06, + "loss": 0.3268, + "step": 859 + }, + { + "epoch": 0.04, + "grad_norm": 0.7327985633088402, + "learning_rate": 3.356752537080406e-06, + "loss": 0.3147, + "step": 860 + }, + { + "epoch": 0.04, + "grad_norm": 0.7747300652496784, + "learning_rate": 3.3606557377049183e-06, + "loss": 0.3369, + "step": 861 + }, + { + "epoch": 0.04, + "grad_norm": 0.8365518812699518, + "learning_rate": 3.3645589383294304e-06, + "loss": 0.3526, + "step": 862 + }, + { + "epoch": 0.04, + "grad_norm": 0.7376035705265241, + "learning_rate": 3.368462138953942e-06, + "loss": 0.3184, + "step": 863 + }, + { + "epoch": 0.04, + "grad_norm": 0.7768389737953629, + "learning_rate": 3.3723653395784543e-06, + "loss": 0.3366, + "step": 864 + }, + { + "epoch": 0.04, + "grad_norm": 0.7722353163854936, + "learning_rate": 3.3762685402029664e-06, + "loss": 0.3379, + "step": 865 + }, + { + "epoch": 0.04, + "grad_norm": 0.7644296862178812, + "learning_rate": 3.3801717408274786e-06, + "loss": 0.3139, + "step": 866 + }, + { + "epoch": 0.04, + "grad_norm": 0.7227476378947277, + "learning_rate": 3.3840749414519907e-06, + "loss": 0.32, + "step": 867 + }, + { + "epoch": 0.04, + "grad_norm": 0.8275473037758273, + "learning_rate": 3.387978142076503e-06, + "loss": 0.3553, + "step": 868 + }, + { + "epoch": 0.04, + "grad_norm": 0.865947688952979, + "learning_rate": 3.391881342701015e-06, + "loss": 0.3446, + "step": 869 + }, + { + "epoch": 0.04, + "grad_norm": 0.8130923373070527, + "learning_rate": 3.395784543325527e-06, + "loss": 0.3263, + "step": 870 + }, + { + "epoch": 0.04, + "grad_norm": 0.7676129307866603, + "learning_rate": 3.3996877439500393e-06, + "loss": 0.337, + "step": 871 + }, + { + "epoch": 0.04, + "grad_norm": 0.7797667888693276, + "learning_rate": 3.4035909445745514e-06, + "loss": 0.337, + "step": 872 + }, + { + "epoch": 0.04, + "grad_norm": 0.8253042591862586, + "learning_rate": 3.4074941451990636e-06, + "loss": 0.321, + "step": 873 + }, + { + "epoch": 0.04, + "grad_norm": 0.8117768546323659, + "learning_rate": 3.4113973458235757e-06, + "loss": 0.3355, + "step": 874 + }, + { + "epoch": 0.04, + "grad_norm": 0.7738043801327601, + "learning_rate": 3.4153005464480874e-06, + "loss": 0.3237, + "step": 875 + }, + { + "epoch": 0.04, + "grad_norm": 0.7271047040670663, + "learning_rate": 3.4192037470725996e-06, + "loss": 0.3158, + "step": 876 + }, + { + "epoch": 0.04, + "grad_norm": 0.8087840120826398, + "learning_rate": 3.4231069476971117e-06, + "loss": 0.3191, + "step": 877 + }, + { + "epoch": 0.04, + "grad_norm": 0.7941412633035108, + "learning_rate": 3.427010148321624e-06, + "loss": 0.3388, + "step": 878 + }, + { + "epoch": 0.04, + "grad_norm": 0.7877303502580432, + "learning_rate": 3.430913348946136e-06, + "loss": 0.3453, + "step": 879 + }, + { + "epoch": 0.04, + "grad_norm": 0.8154554299572914, + "learning_rate": 3.434816549570648e-06, + "loss": 0.3351, + "step": 880 + }, + { + "epoch": 0.04, + "grad_norm": 0.7704542209398879, + "learning_rate": 3.4387197501951603e-06, + "loss": 0.3334, + "step": 881 + }, + { + "epoch": 0.04, + "grad_norm": 0.7610813178384717, + "learning_rate": 3.4426229508196724e-06, + "loss": 0.3363, + "step": 882 + }, + { + "epoch": 0.04, + "grad_norm": 0.8123810668489388, + "learning_rate": 3.4465261514441846e-06, + "loss": 0.3122, + "step": 883 + }, + { + "epoch": 0.04, + "grad_norm": 0.8531157012395201, + "learning_rate": 3.4504293520686967e-06, + "loss": 0.3422, + "step": 884 + }, + { + "epoch": 0.04, + "grad_norm": 0.7917430412840865, + "learning_rate": 3.454332552693209e-06, + "loss": 0.363, + "step": 885 + }, + { + "epoch": 0.04, + "grad_norm": 0.7724838395015216, + "learning_rate": 3.458235753317721e-06, + "loss": 0.3266, + "step": 886 + }, + { + "epoch": 0.04, + "grad_norm": 0.7393769065386122, + "learning_rate": 3.4621389539422327e-06, + "loss": 0.2936, + "step": 887 + }, + { + "epoch": 0.04, + "grad_norm": 1.0559936009065718, + "learning_rate": 3.466042154566745e-06, + "loss": 0.3553, + "step": 888 + }, + { + "epoch": 0.04, + "grad_norm": 0.8293808528940515, + "learning_rate": 3.469945355191257e-06, + "loss": 0.3443, + "step": 889 + }, + { + "epoch": 0.04, + "grad_norm": 0.7635639412811581, + "learning_rate": 3.473848555815769e-06, + "loss": 0.3178, + "step": 890 + }, + { + "epoch": 0.04, + "grad_norm": 0.8130042775388293, + "learning_rate": 3.4777517564402813e-06, + "loss": 0.3094, + "step": 891 + }, + { + "epoch": 0.04, + "grad_norm": 0.9537634622389648, + "learning_rate": 3.4816549570647934e-06, + "loss": 0.3189, + "step": 892 + }, + { + "epoch": 0.04, + "grad_norm": 0.7418089558489026, + "learning_rate": 3.4855581576893055e-06, + "loss": 0.3367, + "step": 893 + }, + { + "epoch": 0.04, + "grad_norm": 0.956840469122639, + "learning_rate": 3.4894613583138177e-06, + "loss": 0.3492, + "step": 894 + }, + { + "epoch": 0.04, + "grad_norm": 0.8172849941417073, + "learning_rate": 3.49336455893833e-06, + "loss": 0.3452, + "step": 895 + }, + { + "epoch": 0.04, + "grad_norm": 0.7510868761341295, + "learning_rate": 3.497267759562842e-06, + "loss": 0.3177, + "step": 896 + }, + { + "epoch": 0.04, + "grad_norm": 0.8060394791917349, + "learning_rate": 3.501170960187354e-06, + "loss": 0.3353, + "step": 897 + }, + { + "epoch": 0.04, + "grad_norm": 0.806716986079775, + "learning_rate": 3.505074160811866e-06, + "loss": 0.3048, + "step": 898 + }, + { + "epoch": 0.04, + "grad_norm": 0.8600109143743565, + "learning_rate": 3.508977361436378e-06, + "loss": 0.3226, + "step": 899 + }, + { + "epoch": 0.04, + "grad_norm": 0.7713881667032363, + "learning_rate": 3.51288056206089e-06, + "loss": 0.3228, + "step": 900 + }, + { + "epoch": 0.04, + "grad_norm": 0.8015311464017524, + "learning_rate": 3.5167837626854022e-06, + "loss": 0.346, + "step": 901 + }, + { + "epoch": 0.04, + "grad_norm": 0.942495571454301, + "learning_rate": 3.5206869633099144e-06, + "loss": 0.3549, + "step": 902 + }, + { + "epoch": 0.04, + "grad_norm": 0.7194074341345336, + "learning_rate": 3.5245901639344265e-06, + "loss": 0.3305, + "step": 903 + }, + { + "epoch": 0.04, + "grad_norm": 0.7931051777866895, + "learning_rate": 3.5284933645589387e-06, + "loss": 0.3153, + "step": 904 + }, + { + "epoch": 0.04, + "grad_norm": 0.8029643392390938, + "learning_rate": 3.532396565183451e-06, + "loss": 0.3365, + "step": 905 + }, + { + "epoch": 0.04, + "grad_norm": 0.7611429213543123, + "learning_rate": 3.536299765807963e-06, + "loss": 0.3275, + "step": 906 + }, + { + "epoch": 0.04, + "grad_norm": 0.7455878952583745, + "learning_rate": 3.540202966432475e-06, + "loss": 0.3199, + "step": 907 + }, + { + "epoch": 0.04, + "grad_norm": 0.9290360237593249, + "learning_rate": 3.5441061670569872e-06, + "loss": 0.3595, + "step": 908 + }, + { + "epoch": 0.04, + "grad_norm": 0.7949664721782665, + "learning_rate": 3.5480093676814994e-06, + "loss": 0.3342, + "step": 909 + }, + { + "epoch": 0.04, + "grad_norm": 0.8055983880398003, + "learning_rate": 3.551912568306011e-06, + "loss": 0.3361, + "step": 910 + }, + { + "epoch": 0.04, + "grad_norm": 0.7882975494866825, + "learning_rate": 3.5558157689305232e-06, + "loss": 0.3192, + "step": 911 + }, + { + "epoch": 0.04, + "grad_norm": 0.7909086937151064, + "learning_rate": 3.5597189695550354e-06, + "loss": 0.3285, + "step": 912 + }, + { + "epoch": 0.04, + "grad_norm": 0.7765582277116957, + "learning_rate": 3.5636221701795475e-06, + "loss": 0.3142, + "step": 913 + }, + { + "epoch": 0.04, + "grad_norm": 0.8367425350249224, + "learning_rate": 3.5675253708040596e-06, + "loss": 0.3329, + "step": 914 + }, + { + "epoch": 0.04, + "grad_norm": 0.8409167641030671, + "learning_rate": 3.5714285714285718e-06, + "loss": 0.3098, + "step": 915 + }, + { + "epoch": 0.04, + "grad_norm": 0.8301927914526555, + "learning_rate": 3.575331772053084e-06, + "loss": 0.3414, + "step": 916 + }, + { + "epoch": 0.04, + "grad_norm": 0.8491807077348973, + "learning_rate": 3.579234972677596e-06, + "loss": 0.3327, + "step": 917 + }, + { + "epoch": 0.04, + "grad_norm": 0.8440584749190931, + "learning_rate": 3.583138173302108e-06, + "loss": 0.3315, + "step": 918 + }, + { + "epoch": 0.04, + "grad_norm": 0.8440893986467001, + "learning_rate": 3.5870413739266203e-06, + "loss": 0.3487, + "step": 919 + }, + { + "epoch": 0.04, + "grad_norm": 0.859290425354415, + "learning_rate": 3.5909445745511325e-06, + "loss": 0.3163, + "step": 920 + }, + { + "epoch": 0.04, + "grad_norm": 0.7919663481629915, + "learning_rate": 3.5948477751756446e-06, + "loss": 0.3058, + "step": 921 + }, + { + "epoch": 0.04, + "grad_norm": 0.8251123311409907, + "learning_rate": 3.5987509758001563e-06, + "loss": 0.3264, + "step": 922 + }, + { + "epoch": 0.04, + "grad_norm": 0.7946474838515742, + "learning_rate": 3.6026541764246685e-06, + "loss": 0.3276, + "step": 923 + }, + { + "epoch": 0.04, + "grad_norm": 0.8532649866655985, + "learning_rate": 3.6065573770491806e-06, + "loss": 0.3395, + "step": 924 + }, + { + "epoch": 0.04, + "grad_norm": 0.8548919937304804, + "learning_rate": 3.6104605776736928e-06, + "loss": 0.3369, + "step": 925 + }, + { + "epoch": 0.04, + "grad_norm": 0.8955005454822654, + "learning_rate": 3.614363778298205e-06, + "loss": 0.3207, + "step": 926 + }, + { + "epoch": 0.04, + "grad_norm": 0.7632648826424214, + "learning_rate": 3.618266978922717e-06, + "loss": 0.3437, + "step": 927 + }, + { + "epoch": 0.04, + "grad_norm": 0.8072860604476365, + "learning_rate": 3.622170179547229e-06, + "loss": 0.3255, + "step": 928 + }, + { + "epoch": 0.04, + "grad_norm": 0.8760802832467262, + "learning_rate": 3.6260733801717413e-06, + "loss": 0.3646, + "step": 929 + }, + { + "epoch": 0.04, + "grad_norm": 0.8656503814453173, + "learning_rate": 3.6299765807962535e-06, + "loss": 0.3252, + "step": 930 + }, + { + "epoch": 0.04, + "grad_norm": 0.8130250774495784, + "learning_rate": 3.6338797814207656e-06, + "loss": 0.3484, + "step": 931 + }, + { + "epoch": 0.04, + "grad_norm": 0.7909203448417327, + "learning_rate": 3.6377829820452777e-06, + "loss": 0.3218, + "step": 932 + }, + { + "epoch": 0.04, + "grad_norm": 0.6964803463182522, + "learning_rate": 3.64168618266979e-06, + "loss": 0.3119, + "step": 933 + }, + { + "epoch": 0.04, + "grad_norm": 0.8720178364428258, + "learning_rate": 3.6455893832943016e-06, + "loss": 0.3593, + "step": 934 + }, + { + "epoch": 0.04, + "grad_norm": 0.8584073671761802, + "learning_rate": 3.6494925839188137e-06, + "loss": 0.3391, + "step": 935 + }, + { + "epoch": 0.04, + "grad_norm": 0.7920450003494458, + "learning_rate": 3.653395784543326e-06, + "loss": 0.3336, + "step": 936 + }, + { + "epoch": 0.04, + "grad_norm": 0.8426930769580041, + "learning_rate": 3.657298985167838e-06, + "loss": 0.3311, + "step": 937 + }, + { + "epoch": 0.04, + "grad_norm": 0.8689801617084886, + "learning_rate": 3.66120218579235e-06, + "loss": 0.3394, + "step": 938 + }, + { + "epoch": 0.04, + "grad_norm": 0.8286000493056713, + "learning_rate": 3.6651053864168623e-06, + "loss": 0.3222, + "step": 939 + }, + { + "epoch": 0.04, + "grad_norm": 0.800336495310161, + "learning_rate": 3.6690085870413744e-06, + "loss": 0.3549, + "step": 940 + }, + { + "epoch": 0.04, + "grad_norm": 0.8617829184382465, + "learning_rate": 3.6729117876658866e-06, + "loss": 0.3202, + "step": 941 + }, + { + "epoch": 0.04, + "grad_norm": 0.8689088516749263, + "learning_rate": 3.6768149882903987e-06, + "loss": 0.3409, + "step": 942 + }, + { + "epoch": 0.04, + "grad_norm": 0.8013660151983368, + "learning_rate": 3.680718188914911e-06, + "loss": 0.3298, + "step": 943 + }, + { + "epoch": 0.04, + "grad_norm": 0.7557451681966474, + "learning_rate": 3.684621389539423e-06, + "loss": 0.3365, + "step": 944 + }, + { + "epoch": 0.04, + "grad_norm": 0.8439126755160394, + "learning_rate": 3.6885245901639347e-06, + "loss": 0.3452, + "step": 945 + }, + { + "epoch": 0.04, + "grad_norm": 0.8430614543303967, + "learning_rate": 3.692427790788447e-06, + "loss": 0.3352, + "step": 946 + }, + { + "epoch": 0.04, + "grad_norm": 0.7856132455875008, + "learning_rate": 3.696330991412959e-06, + "loss": 0.3264, + "step": 947 + }, + { + "epoch": 0.04, + "grad_norm": 0.7387629642401166, + "learning_rate": 3.700234192037471e-06, + "loss": 0.3119, + "step": 948 + }, + { + "epoch": 0.04, + "grad_norm": 0.813033092286266, + "learning_rate": 3.7041373926619833e-06, + "loss": 0.3565, + "step": 949 + }, + { + "epoch": 0.04, + "grad_norm": 0.7596858845415385, + "learning_rate": 3.7080405932864954e-06, + "loss": 0.3049, + "step": 950 + }, + { + "epoch": 0.04, + "grad_norm": 0.8340576417695552, + "learning_rate": 3.7119437939110076e-06, + "loss": 0.3385, + "step": 951 + }, + { + "epoch": 0.04, + "grad_norm": 0.7365498775997931, + "learning_rate": 3.7158469945355197e-06, + "loss": 0.3221, + "step": 952 + }, + { + "epoch": 0.04, + "grad_norm": 0.7615740593876276, + "learning_rate": 3.719750195160032e-06, + "loss": 0.3147, + "step": 953 + }, + { + "epoch": 0.04, + "grad_norm": 0.8722114257350929, + "learning_rate": 3.723653395784544e-06, + "loss": 0.3215, + "step": 954 + }, + { + "epoch": 0.04, + "grad_norm": 0.7598820261402331, + "learning_rate": 3.727556596409056e-06, + "loss": 0.326, + "step": 955 + }, + { + "epoch": 0.04, + "grad_norm": 0.8207602557816609, + "learning_rate": 3.7314597970335683e-06, + "loss": 0.3339, + "step": 956 + }, + { + "epoch": 0.04, + "grad_norm": 0.8192586863048623, + "learning_rate": 3.73536299765808e-06, + "loss": 0.3516, + "step": 957 + }, + { + "epoch": 0.04, + "grad_norm": 0.7709055368938971, + "learning_rate": 3.739266198282592e-06, + "loss": 0.3276, + "step": 958 + }, + { + "epoch": 0.04, + "grad_norm": 0.7502410411971993, + "learning_rate": 3.7431693989071043e-06, + "loss": 0.3234, + "step": 959 + }, + { + "epoch": 0.04, + "grad_norm": 0.8536371932479321, + "learning_rate": 3.7470725995316164e-06, + "loss": 0.3399, + "step": 960 + }, + { + "epoch": 0.05, + "grad_norm": 0.8389733897792988, + "learning_rate": 3.750975800156128e-06, + "loss": 0.3412, + "step": 961 + }, + { + "epoch": 0.05, + "grad_norm": 0.8053147767949144, + "learning_rate": 3.7548790007806403e-06, + "loss": 0.3406, + "step": 962 + }, + { + "epoch": 0.05, + "grad_norm": 0.8132929307436576, + "learning_rate": 3.7587822014051524e-06, + "loss": 0.3442, + "step": 963 + }, + { + "epoch": 0.05, + "grad_norm": 0.7546123830385424, + "learning_rate": 3.7626854020296646e-06, + "loss": 0.3253, + "step": 964 + }, + { + "epoch": 0.05, + "grad_norm": 0.8357659310098826, + "learning_rate": 3.7665886026541763e-06, + "loss": 0.322, + "step": 965 + }, + { + "epoch": 0.05, + "grad_norm": 0.739658053499859, + "learning_rate": 3.7704918032786884e-06, + "loss": 0.3454, + "step": 966 + }, + { + "epoch": 0.05, + "grad_norm": 0.7972718145466251, + "learning_rate": 3.7743950039032006e-06, + "loss": 0.3324, + "step": 967 + }, + { + "epoch": 0.05, + "grad_norm": 0.8432896818295975, + "learning_rate": 3.7782982045277127e-06, + "loss": 0.3257, + "step": 968 + }, + { + "epoch": 0.05, + "grad_norm": 0.8471889606538532, + "learning_rate": 3.782201405152225e-06, + "loss": 0.3561, + "step": 969 + }, + { + "epoch": 0.05, + "grad_norm": 0.8179039774211231, + "learning_rate": 3.786104605776737e-06, + "loss": 0.3296, + "step": 970 + }, + { + "epoch": 0.05, + "grad_norm": 0.9094698142095845, + "learning_rate": 3.790007806401249e-06, + "loss": 0.3572, + "step": 971 + }, + { + "epoch": 0.05, + "grad_norm": 0.8035942030412484, + "learning_rate": 3.7939110070257613e-06, + "loss": 0.3269, + "step": 972 + }, + { + "epoch": 0.05, + "grad_norm": 0.7625220616302513, + "learning_rate": 3.7978142076502734e-06, + "loss": 0.3254, + "step": 973 + }, + { + "epoch": 0.05, + "grad_norm": 0.8092451289897646, + "learning_rate": 3.8017174082747855e-06, + "loss": 0.3277, + "step": 974 + }, + { + "epoch": 0.05, + "grad_norm": 0.7539295998268221, + "learning_rate": 3.8056206088992977e-06, + "loss": 0.3027, + "step": 975 + }, + { + "epoch": 0.05, + "grad_norm": 0.8355907557653678, + "learning_rate": 3.80952380952381e-06, + "loss": 0.3549, + "step": 976 + }, + { + "epoch": 0.05, + "grad_norm": 0.7367614000037639, + "learning_rate": 3.8134270101483215e-06, + "loss": 0.3334, + "step": 977 + }, + { + "epoch": 0.05, + "grad_norm": 0.835979463148373, + "learning_rate": 3.817330210772834e-06, + "loss": 0.3266, + "step": 978 + }, + { + "epoch": 0.05, + "grad_norm": 0.80339628054785, + "learning_rate": 3.821233411397346e-06, + "loss": 0.325, + "step": 979 + }, + { + "epoch": 0.05, + "grad_norm": 0.8255250685515131, + "learning_rate": 3.825136612021858e-06, + "loss": 0.3362, + "step": 980 + }, + { + "epoch": 0.05, + "grad_norm": 0.7561570577557267, + "learning_rate": 3.82903981264637e-06, + "loss": 0.3303, + "step": 981 + }, + { + "epoch": 0.05, + "grad_norm": 0.8062603122203911, + "learning_rate": 3.832943013270882e-06, + "loss": 0.3207, + "step": 982 + }, + { + "epoch": 0.05, + "grad_norm": 0.7811281631079525, + "learning_rate": 3.836846213895394e-06, + "loss": 0.3275, + "step": 983 + }, + { + "epoch": 0.05, + "grad_norm": 0.8213392735769245, + "learning_rate": 3.8407494145199065e-06, + "loss": 0.3326, + "step": 984 + }, + { + "epoch": 0.05, + "grad_norm": 0.8465246372299461, + "learning_rate": 3.844652615144419e-06, + "loss": 0.3321, + "step": 985 + }, + { + "epoch": 0.05, + "grad_norm": 0.7825737985816031, + "learning_rate": 3.848555815768931e-06, + "loss": 0.3135, + "step": 986 + }, + { + "epoch": 0.05, + "grad_norm": 0.7129246477564651, + "learning_rate": 3.852459016393443e-06, + "loss": 0.3202, + "step": 987 + }, + { + "epoch": 0.05, + "grad_norm": 0.7411345770728354, + "learning_rate": 3.856362217017955e-06, + "loss": 0.3211, + "step": 988 + }, + { + "epoch": 0.05, + "grad_norm": 0.7596579543552242, + "learning_rate": 3.860265417642467e-06, + "loss": 0.3122, + "step": 989 + }, + { + "epoch": 0.05, + "grad_norm": 0.8013433597816115, + "learning_rate": 3.864168618266979e-06, + "loss": 0.3374, + "step": 990 + }, + { + "epoch": 0.05, + "grad_norm": 0.9370076083302152, + "learning_rate": 3.8680718188914915e-06, + "loss": 0.3078, + "step": 991 + }, + { + "epoch": 0.05, + "grad_norm": 0.7062535591471397, + "learning_rate": 3.871975019516004e-06, + "loss": 0.3173, + "step": 992 + }, + { + "epoch": 0.05, + "grad_norm": 0.8423517333186136, + "learning_rate": 3.875878220140516e-06, + "loss": 0.3155, + "step": 993 + }, + { + "epoch": 0.05, + "grad_norm": 0.7985684676633483, + "learning_rate": 3.879781420765028e-06, + "loss": 0.3269, + "step": 994 + }, + { + "epoch": 0.05, + "grad_norm": 0.7678495362241716, + "learning_rate": 3.883684621389539e-06, + "loss": 0.3198, + "step": 995 + }, + { + "epoch": 0.05, + "grad_norm": 0.7336264797959, + "learning_rate": 3.887587822014051e-06, + "loss": 0.3157, + "step": 996 + }, + { + "epoch": 0.05, + "grad_norm": 0.7860970439217574, + "learning_rate": 3.8914910226385635e-06, + "loss": 0.3321, + "step": 997 + }, + { + "epoch": 0.05, + "grad_norm": 0.74218741684949, + "learning_rate": 3.895394223263076e-06, + "loss": 0.3169, + "step": 998 + }, + { + "epoch": 0.05, + "grad_norm": 0.7517542650869181, + "learning_rate": 3.899297423887588e-06, + "loss": 0.3153, + "step": 999 + }, + { + "epoch": 0.05, + "grad_norm": 0.801465554671392, + "learning_rate": 3.9032006245121e-06, + "loss": 0.3269, + "step": 1000 + }, + { + "epoch": 0.05, + "grad_norm": 0.7868527073239706, + "learning_rate": 3.907103825136612e-06, + "loss": 0.3373, + "step": 1001 + }, + { + "epoch": 0.05, + "grad_norm": 0.804160864761319, + "learning_rate": 3.911007025761124e-06, + "loss": 0.3352, + "step": 1002 + }, + { + "epoch": 0.05, + "grad_norm": 0.8426601477000829, + "learning_rate": 3.914910226385636e-06, + "loss": 0.3324, + "step": 1003 + }, + { + "epoch": 0.05, + "grad_norm": 0.8920566829923665, + "learning_rate": 3.9188134270101485e-06, + "loss": 0.3408, + "step": 1004 + }, + { + "epoch": 0.05, + "grad_norm": 0.8282588490276088, + "learning_rate": 3.922716627634661e-06, + "loss": 0.3501, + "step": 1005 + }, + { + "epoch": 0.05, + "grad_norm": 0.7460949682365215, + "learning_rate": 3.926619828259173e-06, + "loss": 0.3336, + "step": 1006 + }, + { + "epoch": 0.05, + "grad_norm": 0.8711609145031212, + "learning_rate": 3.930523028883685e-06, + "loss": 0.3329, + "step": 1007 + }, + { + "epoch": 0.05, + "grad_norm": 0.8356857906134444, + "learning_rate": 3.934426229508197e-06, + "loss": 0.3377, + "step": 1008 + }, + { + "epoch": 0.05, + "grad_norm": 0.7673043430044278, + "learning_rate": 3.938329430132709e-06, + "loss": 0.3258, + "step": 1009 + }, + { + "epoch": 0.05, + "grad_norm": 0.7941049656040756, + "learning_rate": 3.942232630757221e-06, + "loss": 0.3052, + "step": 1010 + }, + { + "epoch": 0.05, + "grad_norm": 0.7121748038719863, + "learning_rate": 3.9461358313817335e-06, + "loss": 0.3143, + "step": 1011 + }, + { + "epoch": 0.05, + "grad_norm": 0.8463781646553441, + "learning_rate": 3.950039032006246e-06, + "loss": 0.3565, + "step": 1012 + }, + { + "epoch": 0.05, + "grad_norm": 0.8528091987633496, + "learning_rate": 3.953942232630758e-06, + "loss": 0.3291, + "step": 1013 + }, + { + "epoch": 0.05, + "grad_norm": 0.806708986779555, + "learning_rate": 3.95784543325527e-06, + "loss": 0.3347, + "step": 1014 + }, + { + "epoch": 0.05, + "grad_norm": 0.8037005023622362, + "learning_rate": 3.961748633879782e-06, + "loss": 0.3296, + "step": 1015 + }, + { + "epoch": 0.05, + "grad_norm": 0.791312271351172, + "learning_rate": 3.965651834504294e-06, + "loss": 0.3167, + "step": 1016 + }, + { + "epoch": 0.05, + "grad_norm": 0.761304011671788, + "learning_rate": 3.969555035128806e-06, + "loss": 0.3223, + "step": 1017 + }, + { + "epoch": 0.05, + "grad_norm": 0.832443552891366, + "learning_rate": 3.973458235753318e-06, + "loss": 0.3224, + "step": 1018 + }, + { + "epoch": 0.05, + "grad_norm": 0.9423490749563538, + "learning_rate": 3.97736143637783e-06, + "loss": 0.35, + "step": 1019 + }, + { + "epoch": 0.05, + "grad_norm": 0.7920397638065396, + "learning_rate": 3.981264637002342e-06, + "loss": 0.3383, + "step": 1020 + }, + { + "epoch": 0.05, + "grad_norm": 0.7909693782698485, + "learning_rate": 3.985167837626854e-06, + "loss": 0.3125, + "step": 1021 + }, + { + "epoch": 0.05, + "grad_norm": 0.8597376202893422, + "learning_rate": 3.989071038251366e-06, + "loss": 0.3253, + "step": 1022 + }, + { + "epoch": 0.05, + "grad_norm": 0.8934167196082788, + "learning_rate": 3.992974238875878e-06, + "loss": 0.349, + "step": 1023 + }, + { + "epoch": 0.05, + "grad_norm": 0.7741811787120841, + "learning_rate": 3.9968774395003904e-06, + "loss": 0.3209, + "step": 1024 + }, + { + "epoch": 0.05, + "grad_norm": 0.7970655032408883, + "learning_rate": 4.000780640124903e-06, + "loss": 0.3151, + "step": 1025 + }, + { + "epoch": 0.05, + "grad_norm": 0.7975903965146152, + "learning_rate": 4.004683840749415e-06, + "loss": 0.3496, + "step": 1026 + }, + { + "epoch": 0.05, + "grad_norm": 0.7843244476130126, + "learning_rate": 4.008587041373927e-06, + "loss": 0.3257, + "step": 1027 + }, + { + "epoch": 0.05, + "grad_norm": 0.7164307517223025, + "learning_rate": 4.012490241998439e-06, + "loss": 0.3077, + "step": 1028 + }, + { + "epoch": 0.05, + "grad_norm": 0.7942687617519383, + "learning_rate": 4.016393442622951e-06, + "loss": 0.3485, + "step": 1029 + }, + { + "epoch": 0.05, + "grad_norm": 0.8241373083463113, + "learning_rate": 4.020296643247463e-06, + "loss": 0.3363, + "step": 1030 + }, + { + "epoch": 0.05, + "grad_norm": 0.7809439780952002, + "learning_rate": 4.0241998438719754e-06, + "loss": 0.3297, + "step": 1031 + }, + { + "epoch": 0.05, + "grad_norm": 0.7004532899521256, + "learning_rate": 4.0281030444964876e-06, + "loss": 0.315, + "step": 1032 + }, + { + "epoch": 0.05, + "grad_norm": 0.8171960784407708, + "learning_rate": 4.032006245121e-06, + "loss": 0.3275, + "step": 1033 + }, + { + "epoch": 0.05, + "grad_norm": 0.7536411607022162, + "learning_rate": 4.035909445745512e-06, + "loss": 0.3152, + "step": 1034 + }, + { + "epoch": 0.05, + "grad_norm": 0.8398439956197662, + "learning_rate": 4.039812646370024e-06, + "loss": 0.3197, + "step": 1035 + }, + { + "epoch": 0.05, + "grad_norm": 0.7750455541647057, + "learning_rate": 4.043715846994536e-06, + "loss": 0.3149, + "step": 1036 + }, + { + "epoch": 0.05, + "grad_norm": 0.8192760096960554, + "learning_rate": 4.047619047619048e-06, + "loss": 0.3153, + "step": 1037 + }, + { + "epoch": 0.05, + "grad_norm": 0.7772789659363601, + "learning_rate": 4.05152224824356e-06, + "loss": 0.337, + "step": 1038 + }, + { + "epoch": 0.05, + "grad_norm": 0.794129215020833, + "learning_rate": 4.0554254488680726e-06, + "loss": 0.3133, + "step": 1039 + }, + { + "epoch": 0.05, + "grad_norm": 0.7430636574188977, + "learning_rate": 4.059328649492585e-06, + "loss": 0.3167, + "step": 1040 + }, + { + "epoch": 0.05, + "grad_norm": 0.7258586993887473, + "learning_rate": 4.063231850117097e-06, + "loss": 0.3039, + "step": 1041 + }, + { + "epoch": 0.05, + "grad_norm": 0.7142964857046481, + "learning_rate": 4.067135050741608e-06, + "loss": 0.3163, + "step": 1042 + }, + { + "epoch": 0.05, + "grad_norm": 0.8554870911499838, + "learning_rate": 4.07103825136612e-06, + "loss": 0.3549, + "step": 1043 + }, + { + "epoch": 0.05, + "grad_norm": 0.8095567330424142, + "learning_rate": 4.074941451990632e-06, + "loss": 0.3258, + "step": 1044 + }, + { + "epoch": 0.05, + "grad_norm": 0.7725747186393114, + "learning_rate": 4.0788446526151446e-06, + "loss": 0.3406, + "step": 1045 + }, + { + "epoch": 0.05, + "grad_norm": 0.8177815876841619, + "learning_rate": 4.082747853239657e-06, + "loss": 0.3362, + "step": 1046 + }, + { + "epoch": 0.05, + "grad_norm": 0.7846846849384338, + "learning_rate": 4.086651053864169e-06, + "loss": 0.3368, + "step": 1047 + }, + { + "epoch": 0.05, + "grad_norm": 0.7329859164097651, + "learning_rate": 4.090554254488681e-06, + "loss": 0.3149, + "step": 1048 + }, + { + "epoch": 0.05, + "grad_norm": 0.7518828854583361, + "learning_rate": 4.094457455113193e-06, + "loss": 0.3234, + "step": 1049 + }, + { + "epoch": 0.05, + "grad_norm": 0.7570870948356919, + "learning_rate": 4.098360655737705e-06, + "loss": 0.3262, + "step": 1050 + }, + { + "epoch": 0.05, + "grad_norm": 0.7815048592529774, + "learning_rate": 4.102263856362217e-06, + "loss": 0.3462, + "step": 1051 + }, + { + "epoch": 0.05, + "grad_norm": 0.7622841807644124, + "learning_rate": 4.1061670569867295e-06, + "loss": 0.3284, + "step": 1052 + }, + { + "epoch": 0.05, + "grad_norm": 0.7871223655339628, + "learning_rate": 4.110070257611242e-06, + "loss": 0.3286, + "step": 1053 + }, + { + "epoch": 0.05, + "grad_norm": 0.7103371177686691, + "learning_rate": 4.113973458235754e-06, + "loss": 0.3394, + "step": 1054 + }, + { + "epoch": 0.05, + "grad_norm": 0.7378037838581551, + "learning_rate": 4.117876658860266e-06, + "loss": 0.3226, + "step": 1055 + }, + { + "epoch": 0.05, + "grad_norm": 0.7557597715521295, + "learning_rate": 4.121779859484778e-06, + "loss": 0.3394, + "step": 1056 + }, + { + "epoch": 0.05, + "grad_norm": 0.814048254433811, + "learning_rate": 4.12568306010929e-06, + "loss": 0.3265, + "step": 1057 + }, + { + "epoch": 0.05, + "grad_norm": 0.7870604106780377, + "learning_rate": 4.129586260733802e-06, + "loss": 0.3336, + "step": 1058 + }, + { + "epoch": 0.05, + "grad_norm": 0.8232699770079824, + "learning_rate": 4.1334894613583145e-06, + "loss": 0.3473, + "step": 1059 + }, + { + "epoch": 0.05, + "grad_norm": 0.7713489596151366, + "learning_rate": 4.137392661982827e-06, + "loss": 0.3374, + "step": 1060 + }, + { + "epoch": 0.05, + "grad_norm": 0.7446546279052748, + "learning_rate": 4.141295862607339e-06, + "loss": 0.3219, + "step": 1061 + }, + { + "epoch": 0.05, + "grad_norm": 0.7613819279560381, + "learning_rate": 4.145199063231851e-06, + "loss": 0.3157, + "step": 1062 + }, + { + "epoch": 0.05, + "grad_norm": 0.8538321996694695, + "learning_rate": 4.149102263856363e-06, + "loss": 0.329, + "step": 1063 + }, + { + "epoch": 0.05, + "grad_norm": 0.7409984387736864, + "learning_rate": 4.153005464480875e-06, + "loss": 0.3156, + "step": 1064 + }, + { + "epoch": 0.05, + "grad_norm": 0.7911431659611726, + "learning_rate": 4.1569086651053865e-06, + "loss": 0.3419, + "step": 1065 + }, + { + "epoch": 0.05, + "grad_norm": 0.74657095242217, + "learning_rate": 4.160811865729899e-06, + "loss": 0.3367, + "step": 1066 + }, + { + "epoch": 0.05, + "grad_norm": 0.765269208802623, + "learning_rate": 4.164715066354411e-06, + "loss": 0.3194, + "step": 1067 + }, + { + "epoch": 0.05, + "grad_norm": 0.8043592990405705, + "learning_rate": 4.168618266978923e-06, + "loss": 0.3113, + "step": 1068 + }, + { + "epoch": 0.05, + "grad_norm": 0.8085620855377833, + "learning_rate": 4.172521467603435e-06, + "loss": 0.3647, + "step": 1069 + }, + { + "epoch": 0.05, + "grad_norm": 0.7990228122089519, + "learning_rate": 4.176424668227947e-06, + "loss": 0.3182, + "step": 1070 + }, + { + "epoch": 0.05, + "grad_norm": 0.9040155254741561, + "learning_rate": 4.180327868852459e-06, + "loss": 0.3515, + "step": 1071 + }, + { + "epoch": 0.05, + "grad_norm": 0.7899225631269892, + "learning_rate": 4.1842310694769715e-06, + "loss": 0.3473, + "step": 1072 + }, + { + "epoch": 0.05, + "grad_norm": 0.7690416713588238, + "learning_rate": 4.188134270101484e-06, + "loss": 0.3238, + "step": 1073 + }, + { + "epoch": 0.05, + "grad_norm": 0.7630156058147718, + "learning_rate": 4.192037470725996e-06, + "loss": 0.307, + "step": 1074 + }, + { + "epoch": 0.05, + "grad_norm": 0.8625253080202074, + "learning_rate": 4.195940671350508e-06, + "loss": 0.3093, + "step": 1075 + }, + { + "epoch": 0.05, + "grad_norm": 0.8129731923198934, + "learning_rate": 4.19984387197502e-06, + "loss": 0.3329, + "step": 1076 + }, + { + "epoch": 0.05, + "grad_norm": 0.7934095658317847, + "learning_rate": 4.203747072599532e-06, + "loss": 0.3389, + "step": 1077 + }, + { + "epoch": 0.05, + "grad_norm": 0.7339282651268476, + "learning_rate": 4.207650273224044e-06, + "loss": 0.3135, + "step": 1078 + }, + { + "epoch": 0.05, + "grad_norm": 0.7945362792054323, + "learning_rate": 4.2115534738485565e-06, + "loss": 0.3121, + "step": 1079 + }, + { + "epoch": 0.05, + "grad_norm": 0.7785411781814795, + "learning_rate": 4.215456674473069e-06, + "loss": 0.3157, + "step": 1080 + }, + { + "epoch": 0.05, + "grad_norm": 0.7245974787821303, + "learning_rate": 4.219359875097581e-06, + "loss": 0.3423, + "step": 1081 + }, + { + "epoch": 0.05, + "grad_norm": 0.8041426868294155, + "learning_rate": 4.223263075722093e-06, + "loss": 0.3332, + "step": 1082 + }, + { + "epoch": 0.05, + "grad_norm": 0.7626961137264079, + "learning_rate": 4.227166276346605e-06, + "loss": 0.329, + "step": 1083 + }, + { + "epoch": 0.05, + "grad_norm": 0.7399745734037926, + "learning_rate": 4.231069476971117e-06, + "loss": 0.3294, + "step": 1084 + }, + { + "epoch": 0.05, + "grad_norm": 0.9757282630972325, + "learning_rate": 4.234972677595629e-06, + "loss": 0.3196, + "step": 1085 + }, + { + "epoch": 0.05, + "grad_norm": 0.7709814845647732, + "learning_rate": 4.2388758782201415e-06, + "loss": 0.3601, + "step": 1086 + }, + { + "epoch": 0.05, + "grad_norm": 0.7085720097502731, + "learning_rate": 4.242779078844654e-06, + "loss": 0.3196, + "step": 1087 + }, + { + "epoch": 0.05, + "grad_norm": 0.7276270226935244, + "learning_rate": 4.246682279469166e-06, + "loss": 0.3057, + "step": 1088 + }, + { + "epoch": 0.05, + "grad_norm": 0.7154431407559079, + "learning_rate": 4.250585480093677e-06, + "loss": 0.3216, + "step": 1089 + }, + { + "epoch": 0.05, + "grad_norm": 0.8219937295500883, + "learning_rate": 4.254488680718189e-06, + "loss": 0.3393, + "step": 1090 + }, + { + "epoch": 0.05, + "grad_norm": 0.7696289547206345, + "learning_rate": 4.258391881342701e-06, + "loss": 0.3221, + "step": 1091 + }, + { + "epoch": 0.05, + "grad_norm": 0.8078598793710751, + "learning_rate": 4.2622950819672135e-06, + "loss": 0.352, + "step": 1092 + }, + { + "epoch": 0.05, + "grad_norm": 0.7878016353321163, + "learning_rate": 4.266198282591726e-06, + "loss": 0.3238, + "step": 1093 + }, + { + "epoch": 0.05, + "grad_norm": 0.8214492840177962, + "learning_rate": 4.270101483216238e-06, + "loss": 0.3408, + "step": 1094 + }, + { + "epoch": 0.05, + "grad_norm": 0.8315196647745042, + "learning_rate": 4.27400468384075e-06, + "loss": 0.3353, + "step": 1095 + }, + { + "epoch": 0.05, + "grad_norm": 0.7505995354116095, + "learning_rate": 4.277907884465262e-06, + "loss": 0.3593, + "step": 1096 + }, + { + "epoch": 0.05, + "grad_norm": 0.8479225598990868, + "learning_rate": 4.281811085089773e-06, + "loss": 0.352, + "step": 1097 + }, + { + "epoch": 0.05, + "grad_norm": 0.7807060160062745, + "learning_rate": 4.2857142857142855e-06, + "loss": 0.3185, + "step": 1098 + }, + { + "epoch": 0.05, + "grad_norm": 0.759009079029957, + "learning_rate": 4.289617486338798e-06, + "loss": 0.355, + "step": 1099 + }, + { + "epoch": 0.05, + "grad_norm": 0.8196708590415941, + "learning_rate": 4.29352068696331e-06, + "loss": 0.3389, + "step": 1100 + }, + { + "epoch": 0.05, + "grad_norm": 0.7949029707301941, + "learning_rate": 4.297423887587822e-06, + "loss": 0.3365, + "step": 1101 + }, + { + "epoch": 0.05, + "grad_norm": 0.7775805454162443, + "learning_rate": 4.301327088212334e-06, + "loss": 0.3392, + "step": 1102 + }, + { + "epoch": 0.05, + "grad_norm": 0.7482196592712799, + "learning_rate": 4.305230288836846e-06, + "loss": 0.3217, + "step": 1103 + }, + { + "epoch": 0.05, + "grad_norm": 0.8059045956958294, + "learning_rate": 4.309133489461358e-06, + "loss": 0.3463, + "step": 1104 + }, + { + "epoch": 0.05, + "grad_norm": 0.8128933000220175, + "learning_rate": 4.3130366900858704e-06, + "loss": 0.3233, + "step": 1105 + }, + { + "epoch": 0.05, + "grad_norm": 0.7737591374993412, + "learning_rate": 4.316939890710383e-06, + "loss": 0.3373, + "step": 1106 + }, + { + "epoch": 0.05, + "grad_norm": 0.8648982210545426, + "learning_rate": 4.320843091334895e-06, + "loss": 0.3066, + "step": 1107 + }, + { + "epoch": 0.05, + "grad_norm": 0.8381258266262805, + "learning_rate": 4.324746291959407e-06, + "loss": 0.337, + "step": 1108 + }, + { + "epoch": 0.05, + "grad_norm": 0.7819822885797783, + "learning_rate": 4.328649492583919e-06, + "loss": 0.3352, + "step": 1109 + }, + { + "epoch": 0.05, + "grad_norm": 0.7930983079225373, + "learning_rate": 4.332552693208431e-06, + "loss": 0.3258, + "step": 1110 + }, + { + "epoch": 0.05, + "grad_norm": 0.8068786105830926, + "learning_rate": 4.336455893832943e-06, + "loss": 0.3192, + "step": 1111 + }, + { + "epoch": 0.05, + "grad_norm": 0.8421270210500106, + "learning_rate": 4.3403590944574554e-06, + "loss": 0.333, + "step": 1112 + }, + { + "epoch": 0.05, + "grad_norm": 0.8311501988164857, + "learning_rate": 4.3442622950819676e-06, + "loss": 0.3459, + "step": 1113 + }, + { + "epoch": 0.05, + "grad_norm": 0.8733144807634394, + "learning_rate": 4.34816549570648e-06, + "loss": 0.3475, + "step": 1114 + }, + { + "epoch": 0.05, + "grad_norm": 0.7333203781218812, + "learning_rate": 4.352068696330992e-06, + "loss": 0.3278, + "step": 1115 + }, + { + "epoch": 0.05, + "grad_norm": 0.7907541011156831, + "learning_rate": 4.355971896955504e-06, + "loss": 0.3332, + "step": 1116 + }, + { + "epoch": 0.05, + "grad_norm": 0.870712017894598, + "learning_rate": 4.359875097580016e-06, + "loss": 0.3377, + "step": 1117 + }, + { + "epoch": 0.05, + "grad_norm": 0.75078089759963, + "learning_rate": 4.363778298204528e-06, + "loss": 0.3123, + "step": 1118 + }, + { + "epoch": 0.05, + "grad_norm": 0.7959570198877866, + "learning_rate": 4.36768149882904e-06, + "loss": 0.3587, + "step": 1119 + }, + { + "epoch": 0.05, + "grad_norm": 0.8199562253507233, + "learning_rate": 4.371584699453552e-06, + "loss": 0.3457, + "step": 1120 + }, + { + "epoch": 0.05, + "grad_norm": 0.7539389984923776, + "learning_rate": 4.375487900078064e-06, + "loss": 0.3314, + "step": 1121 + }, + { + "epoch": 0.05, + "grad_norm": 0.7774541586558678, + "learning_rate": 4.379391100702576e-06, + "loss": 0.3205, + "step": 1122 + }, + { + "epoch": 0.05, + "grad_norm": 0.7158416857315977, + "learning_rate": 4.383294301327088e-06, + "loss": 0.3128, + "step": 1123 + }, + { + "epoch": 0.05, + "grad_norm": 0.8019237726485963, + "learning_rate": 4.3871975019516e-06, + "loss": 0.3439, + "step": 1124 + }, + { + "epoch": 0.05, + "grad_norm": 0.7945134625901896, + "learning_rate": 4.391100702576112e-06, + "loss": 0.3417, + "step": 1125 + }, + { + "epoch": 0.05, + "grad_norm": 0.8030387119487601, + "learning_rate": 4.3950039032006245e-06, + "loss": 0.326, + "step": 1126 + }, + { + "epoch": 0.05, + "grad_norm": 0.7608939249537782, + "learning_rate": 4.398907103825137e-06, + "loss": 0.3106, + "step": 1127 + }, + { + "epoch": 0.05, + "grad_norm": 0.843899363072372, + "learning_rate": 4.402810304449649e-06, + "loss": 0.3412, + "step": 1128 + }, + { + "epoch": 0.05, + "grad_norm": 0.7520682965182158, + "learning_rate": 4.406713505074161e-06, + "loss": 0.3168, + "step": 1129 + }, + { + "epoch": 0.05, + "grad_norm": 0.8160462118117454, + "learning_rate": 4.410616705698673e-06, + "loss": 0.328, + "step": 1130 + }, + { + "epoch": 0.05, + "grad_norm": 0.7394665604249703, + "learning_rate": 4.414519906323185e-06, + "loss": 0.3201, + "step": 1131 + }, + { + "epoch": 0.05, + "grad_norm": 0.7723673763030374, + "learning_rate": 4.418423106947697e-06, + "loss": 0.3244, + "step": 1132 + }, + { + "epoch": 0.05, + "grad_norm": 0.717333581240294, + "learning_rate": 4.4223263075722095e-06, + "loss": 0.3157, + "step": 1133 + }, + { + "epoch": 0.05, + "grad_norm": 0.7536719020079228, + "learning_rate": 4.426229508196722e-06, + "loss": 0.3232, + "step": 1134 + }, + { + "epoch": 0.05, + "grad_norm": 0.7797077716990461, + "learning_rate": 4.430132708821234e-06, + "loss": 0.3314, + "step": 1135 + }, + { + "epoch": 0.05, + "grad_norm": 0.7879258123244073, + "learning_rate": 4.434035909445746e-06, + "loss": 0.3317, + "step": 1136 + }, + { + "epoch": 0.05, + "grad_norm": 0.7418047923204022, + "learning_rate": 4.437939110070258e-06, + "loss": 0.3287, + "step": 1137 + }, + { + "epoch": 0.05, + "grad_norm": 0.7857658507133573, + "learning_rate": 4.44184231069477e-06, + "loss": 0.3122, + "step": 1138 + }, + { + "epoch": 0.05, + "grad_norm": 0.773080902450866, + "learning_rate": 4.445745511319282e-06, + "loss": 0.306, + "step": 1139 + }, + { + "epoch": 0.05, + "grad_norm": 0.7521305802775401, + "learning_rate": 4.4496487119437945e-06, + "loss": 0.3356, + "step": 1140 + }, + { + "epoch": 0.05, + "grad_norm": 0.7887897219408405, + "learning_rate": 4.453551912568307e-06, + "loss": 0.3203, + "step": 1141 + }, + { + "epoch": 0.05, + "grad_norm": 0.8836903646248546, + "learning_rate": 4.457455113192819e-06, + "loss": 0.3242, + "step": 1142 + }, + { + "epoch": 0.05, + "grad_norm": 0.7574615135962971, + "learning_rate": 4.461358313817331e-06, + "loss": 0.3486, + "step": 1143 + }, + { + "epoch": 0.05, + "grad_norm": 0.7813213846959247, + "learning_rate": 4.465261514441842e-06, + "loss": 0.3189, + "step": 1144 + }, + { + "epoch": 0.05, + "grad_norm": 0.7729664775573555, + "learning_rate": 4.469164715066354e-06, + "loss": 0.3267, + "step": 1145 + }, + { + "epoch": 0.05, + "grad_norm": 0.8018999834961694, + "learning_rate": 4.4730679156908665e-06, + "loss": 0.3285, + "step": 1146 + }, + { + "epoch": 0.05, + "grad_norm": 0.7434686351484441, + "learning_rate": 4.476971116315379e-06, + "loss": 0.318, + "step": 1147 + }, + { + "epoch": 0.05, + "grad_norm": 0.7505204664861658, + "learning_rate": 4.480874316939891e-06, + "loss": 0.3182, + "step": 1148 + }, + { + "epoch": 0.05, + "grad_norm": 0.8528481712745205, + "learning_rate": 4.484777517564403e-06, + "loss": 0.3241, + "step": 1149 + }, + { + "epoch": 0.05, + "grad_norm": 0.7722934197072976, + "learning_rate": 4.488680718188915e-06, + "loss": 0.3472, + "step": 1150 + }, + { + "epoch": 0.05, + "grad_norm": 0.8744420640434646, + "learning_rate": 4.492583918813427e-06, + "loss": 0.3467, + "step": 1151 + }, + { + "epoch": 0.05, + "grad_norm": 0.7834143405648434, + "learning_rate": 4.496487119437939e-06, + "loss": 0.3375, + "step": 1152 + }, + { + "epoch": 0.05, + "grad_norm": 0.8180738040586236, + "learning_rate": 4.5003903200624515e-06, + "loss": 0.3301, + "step": 1153 + }, + { + "epoch": 0.05, + "grad_norm": 0.8069896316404841, + "learning_rate": 4.504293520686964e-06, + "loss": 0.3672, + "step": 1154 + }, + { + "epoch": 0.05, + "grad_norm": 0.777690308577652, + "learning_rate": 4.508196721311476e-06, + "loss": 0.3416, + "step": 1155 + }, + { + "epoch": 0.05, + "grad_norm": 0.8071548121534299, + "learning_rate": 4.512099921935988e-06, + "loss": 0.3027, + "step": 1156 + }, + { + "epoch": 0.05, + "grad_norm": 0.7825094984345531, + "learning_rate": 4.5160031225605e-06, + "loss": 0.3328, + "step": 1157 + }, + { + "epoch": 0.05, + "grad_norm": 0.8247099962264008, + "learning_rate": 4.519906323185012e-06, + "loss": 0.3487, + "step": 1158 + }, + { + "epoch": 0.05, + "grad_norm": 0.7914229657813877, + "learning_rate": 4.523809523809524e-06, + "loss": 0.3245, + "step": 1159 + }, + { + "epoch": 0.05, + "grad_norm": 0.8319241680100073, + "learning_rate": 4.5277127244340365e-06, + "loss": 0.3341, + "step": 1160 + }, + { + "epoch": 0.05, + "grad_norm": 0.7321441870462392, + "learning_rate": 4.531615925058549e-06, + "loss": 0.309, + "step": 1161 + }, + { + "epoch": 0.05, + "grad_norm": 0.7903653449296587, + "learning_rate": 4.535519125683061e-06, + "loss": 0.322, + "step": 1162 + }, + { + "epoch": 0.05, + "grad_norm": 0.7464466848416672, + "learning_rate": 4.539422326307573e-06, + "loss": 0.32, + "step": 1163 + }, + { + "epoch": 0.05, + "grad_norm": 0.7513299849162668, + "learning_rate": 4.543325526932085e-06, + "loss": 0.3301, + "step": 1164 + }, + { + "epoch": 0.05, + "grad_norm": 0.7455689059751355, + "learning_rate": 4.547228727556597e-06, + "loss": 0.2991, + "step": 1165 + }, + { + "epoch": 0.05, + "grad_norm": 0.7340372003213809, + "learning_rate": 4.551131928181109e-06, + "loss": 0.3245, + "step": 1166 + }, + { + "epoch": 0.05, + "grad_norm": 0.801099243879809, + "learning_rate": 4.5550351288056215e-06, + "loss": 0.3296, + "step": 1167 + }, + { + "epoch": 0.05, + "grad_norm": 0.8091049232703141, + "learning_rate": 4.558938329430133e-06, + "loss": 0.3268, + "step": 1168 + }, + { + "epoch": 0.05, + "grad_norm": 0.725198942359715, + "learning_rate": 4.562841530054645e-06, + "loss": 0.3347, + "step": 1169 + }, + { + "epoch": 0.05, + "grad_norm": 0.8091911176240195, + "learning_rate": 4.566744730679157e-06, + "loss": 0.3184, + "step": 1170 + }, + { + "epoch": 0.05, + "grad_norm": 0.7410087196063481, + "learning_rate": 4.570647931303669e-06, + "loss": 0.3367, + "step": 1171 + }, + { + "epoch": 0.05, + "grad_norm": 0.8058079757133377, + "learning_rate": 4.574551131928181e-06, + "loss": 0.3248, + "step": 1172 + }, + { + "epoch": 0.05, + "grad_norm": 0.8048682224120459, + "learning_rate": 4.5784543325526935e-06, + "loss": 0.3281, + "step": 1173 + }, + { + "epoch": 0.05, + "grad_norm": 0.784922943494407, + "learning_rate": 4.582357533177206e-06, + "loss": 0.3378, + "step": 1174 + }, + { + "epoch": 0.06, + "grad_norm": 0.7875099952505576, + "learning_rate": 4.586260733801718e-06, + "loss": 0.3485, + "step": 1175 + }, + { + "epoch": 0.06, + "grad_norm": 0.716861225618717, + "learning_rate": 4.59016393442623e-06, + "loss": 0.3114, + "step": 1176 + }, + { + "epoch": 0.06, + "grad_norm": 0.780846901801135, + "learning_rate": 4.594067135050742e-06, + "loss": 0.313, + "step": 1177 + }, + { + "epoch": 0.06, + "grad_norm": 0.7577791047890048, + "learning_rate": 4.597970335675254e-06, + "loss": 0.3397, + "step": 1178 + }, + { + "epoch": 0.06, + "grad_norm": 0.7690472310672557, + "learning_rate": 4.601873536299766e-06, + "loss": 0.3165, + "step": 1179 + }, + { + "epoch": 0.06, + "grad_norm": 0.7733795422584592, + "learning_rate": 4.6057767369242784e-06, + "loss": 0.3417, + "step": 1180 + }, + { + "epoch": 0.06, + "grad_norm": 0.8392925510948042, + "learning_rate": 4.609679937548791e-06, + "loss": 0.3424, + "step": 1181 + }, + { + "epoch": 0.06, + "grad_norm": 0.7111725081416846, + "learning_rate": 4.613583138173303e-06, + "loss": 0.296, + "step": 1182 + }, + { + "epoch": 0.06, + "grad_norm": 0.8513619451765813, + "learning_rate": 4.617486338797815e-06, + "loss": 0.3356, + "step": 1183 + }, + { + "epoch": 0.06, + "grad_norm": 0.7875945959998719, + "learning_rate": 4.621389539422327e-06, + "loss": 0.3311, + "step": 1184 + }, + { + "epoch": 0.06, + "grad_norm": 0.7669350867947335, + "learning_rate": 4.625292740046839e-06, + "loss": 0.3367, + "step": 1185 + }, + { + "epoch": 0.06, + "grad_norm": 0.8337841795115746, + "learning_rate": 4.629195940671351e-06, + "loss": 0.3224, + "step": 1186 + }, + { + "epoch": 0.06, + "grad_norm": 0.7717487122026646, + "learning_rate": 4.6330991412958634e-06, + "loss": 0.3048, + "step": 1187 + }, + { + "epoch": 0.06, + "grad_norm": 0.7007504837538838, + "learning_rate": 4.6370023419203756e-06, + "loss": 0.3107, + "step": 1188 + }, + { + "epoch": 0.06, + "grad_norm": 0.7785296339400172, + "learning_rate": 4.640905542544888e-06, + "loss": 0.3164, + "step": 1189 + }, + { + "epoch": 0.06, + "grad_norm": 0.7227534369783942, + "learning_rate": 4.6448087431694e-06, + "loss": 0.3075, + "step": 1190 + }, + { + "epoch": 0.06, + "grad_norm": 0.7524646935432423, + "learning_rate": 4.648711943793911e-06, + "loss": 0.3046, + "step": 1191 + }, + { + "epoch": 0.06, + "grad_norm": 0.8396265499663016, + "learning_rate": 4.652615144418423e-06, + "loss": 0.3391, + "step": 1192 + }, + { + "epoch": 0.06, + "grad_norm": 0.751680387631082, + "learning_rate": 4.6565183450429354e-06, + "loss": 0.3247, + "step": 1193 + }, + { + "epoch": 0.06, + "grad_norm": 0.7541148833501374, + "learning_rate": 4.6604215456674476e-06, + "loss": 0.3177, + "step": 1194 + }, + { + "epoch": 0.06, + "grad_norm": 0.768031674377618, + "learning_rate": 4.66432474629196e-06, + "loss": 0.3247, + "step": 1195 + }, + { + "epoch": 0.06, + "grad_norm": 0.865291775756713, + "learning_rate": 4.668227946916472e-06, + "loss": 0.3519, + "step": 1196 + }, + { + "epoch": 0.06, + "grad_norm": 0.78324554839197, + "learning_rate": 4.672131147540984e-06, + "loss": 0.3371, + "step": 1197 + }, + { + "epoch": 0.06, + "grad_norm": 0.7515151649070249, + "learning_rate": 4.676034348165496e-06, + "loss": 0.3103, + "step": 1198 + }, + { + "epoch": 0.06, + "grad_norm": 0.7658841521811186, + "learning_rate": 4.679937548790008e-06, + "loss": 0.3282, + "step": 1199 + }, + { + "epoch": 0.06, + "grad_norm": 0.7444163221472007, + "learning_rate": 4.68384074941452e-06, + "loss": 0.3084, + "step": 1200 + }, + { + "epoch": 0.06, + "grad_norm": 0.7900746582451963, + "learning_rate": 4.6877439500390326e-06, + "loss": 0.3335, + "step": 1201 + }, + { + "epoch": 0.06, + "grad_norm": 0.7772416466347553, + "learning_rate": 4.691647150663545e-06, + "loss": 0.3271, + "step": 1202 + }, + { + "epoch": 0.06, + "grad_norm": 0.7580981679055885, + "learning_rate": 4.695550351288057e-06, + "loss": 0.3307, + "step": 1203 + }, + { + "epoch": 0.06, + "grad_norm": 0.8473319565326328, + "learning_rate": 4.699453551912569e-06, + "loss": 0.3419, + "step": 1204 + }, + { + "epoch": 0.06, + "grad_norm": 0.7564314492569815, + "learning_rate": 4.703356752537081e-06, + "loss": 0.3253, + "step": 1205 + }, + { + "epoch": 0.06, + "grad_norm": 0.717025328451318, + "learning_rate": 4.707259953161593e-06, + "loss": 0.33, + "step": 1206 + }, + { + "epoch": 0.06, + "grad_norm": 0.7818112513165272, + "learning_rate": 4.711163153786105e-06, + "loss": 0.335, + "step": 1207 + }, + { + "epoch": 0.06, + "grad_norm": 0.8781303663180692, + "learning_rate": 4.7150663544106175e-06, + "loss": 0.3339, + "step": 1208 + }, + { + "epoch": 0.06, + "grad_norm": 0.8315431405979283, + "learning_rate": 4.71896955503513e-06, + "loss": 0.3444, + "step": 1209 + }, + { + "epoch": 0.06, + "grad_norm": 0.8189351184214387, + "learning_rate": 4.722872755659642e-06, + "loss": 0.3406, + "step": 1210 + }, + { + "epoch": 0.06, + "grad_norm": 0.735869229568121, + "learning_rate": 4.726775956284154e-06, + "loss": 0.3235, + "step": 1211 + }, + { + "epoch": 0.06, + "grad_norm": 0.8294830764067048, + "learning_rate": 4.730679156908666e-06, + "loss": 0.3367, + "step": 1212 + }, + { + "epoch": 0.06, + "grad_norm": 0.8354841612956521, + "learning_rate": 4.734582357533178e-06, + "loss": 0.3017, + "step": 1213 + }, + { + "epoch": 0.06, + "grad_norm": 0.8227247343353387, + "learning_rate": 4.73848555815769e-06, + "loss": 0.323, + "step": 1214 + }, + { + "epoch": 0.06, + "grad_norm": 0.824587040521365, + "learning_rate": 4.742388758782202e-06, + "loss": 0.3435, + "step": 1215 + }, + { + "epoch": 0.06, + "grad_norm": 0.7341508886043664, + "learning_rate": 4.746291959406714e-06, + "loss": 0.3226, + "step": 1216 + }, + { + "epoch": 0.06, + "grad_norm": 0.8777944438379092, + "learning_rate": 4.750195160031226e-06, + "loss": 0.3263, + "step": 1217 + }, + { + "epoch": 0.06, + "grad_norm": 0.7584736045922514, + "learning_rate": 4.754098360655738e-06, + "loss": 0.3137, + "step": 1218 + }, + { + "epoch": 0.06, + "grad_norm": 0.8176468832771439, + "learning_rate": 4.75800156128025e-06, + "loss": 0.3098, + "step": 1219 + }, + { + "epoch": 0.06, + "grad_norm": 0.9508349247770249, + "learning_rate": 4.761904761904762e-06, + "loss": 0.3522, + "step": 1220 + }, + { + "epoch": 0.06, + "grad_norm": 0.7255629930257858, + "learning_rate": 4.7658079625292745e-06, + "loss": 0.3204, + "step": 1221 + }, + { + "epoch": 0.06, + "grad_norm": 0.7177808258446199, + "learning_rate": 4.769711163153787e-06, + "loss": 0.3153, + "step": 1222 + }, + { + "epoch": 0.06, + "grad_norm": 0.8049481332762758, + "learning_rate": 4.773614363778298e-06, + "loss": 0.3197, + "step": 1223 + }, + { + "epoch": 0.06, + "grad_norm": 0.8104592910423961, + "learning_rate": 4.77751756440281e-06, + "loss": 0.3498, + "step": 1224 + }, + { + "epoch": 0.06, + "grad_norm": 0.8658183820118093, + "learning_rate": 4.781420765027322e-06, + "loss": 0.3196, + "step": 1225 + }, + { + "epoch": 0.06, + "grad_norm": 0.8302675732882627, + "learning_rate": 4.785323965651834e-06, + "loss": 0.3396, + "step": 1226 + }, + { + "epoch": 0.06, + "grad_norm": 0.7561091699667413, + "learning_rate": 4.7892271662763465e-06, + "loss": 0.3239, + "step": 1227 + }, + { + "epoch": 0.06, + "grad_norm": 0.7889787861290779, + "learning_rate": 4.793130366900859e-06, + "loss": 0.3338, + "step": 1228 + }, + { + "epoch": 0.06, + "grad_norm": 0.7059046769531109, + "learning_rate": 4.797033567525371e-06, + "loss": 0.3045, + "step": 1229 + }, + { + "epoch": 0.06, + "grad_norm": 0.7423340821227207, + "learning_rate": 4.800936768149883e-06, + "loss": 0.3386, + "step": 1230 + }, + { + "epoch": 0.06, + "grad_norm": 0.8360795460167664, + "learning_rate": 4.804839968774395e-06, + "loss": 0.3359, + "step": 1231 + }, + { + "epoch": 0.06, + "grad_norm": 0.8222573732108486, + "learning_rate": 4.808743169398907e-06, + "loss": 0.3371, + "step": 1232 + }, + { + "epoch": 0.06, + "grad_norm": 0.8485394399110076, + "learning_rate": 4.812646370023419e-06, + "loss": 0.3631, + "step": 1233 + }, + { + "epoch": 0.06, + "grad_norm": 0.794027479491186, + "learning_rate": 4.8165495706479315e-06, + "loss": 0.3463, + "step": 1234 + }, + { + "epoch": 0.06, + "grad_norm": 0.8185214444691623, + "learning_rate": 4.820452771272444e-06, + "loss": 0.3337, + "step": 1235 + }, + { + "epoch": 0.06, + "grad_norm": 0.8221224991308261, + "learning_rate": 4.824355971896956e-06, + "loss": 0.3285, + "step": 1236 + }, + { + "epoch": 0.06, + "grad_norm": 0.7919473301914005, + "learning_rate": 4.828259172521468e-06, + "loss": 0.346, + "step": 1237 + }, + { + "epoch": 0.06, + "grad_norm": 0.7244893762319451, + "learning_rate": 4.83216237314598e-06, + "loss": 0.3118, + "step": 1238 + }, + { + "epoch": 0.06, + "grad_norm": 0.8396373238994559, + "learning_rate": 4.836065573770492e-06, + "loss": 0.3577, + "step": 1239 + }, + { + "epoch": 0.06, + "grad_norm": 0.9020800742116587, + "learning_rate": 4.839968774395004e-06, + "loss": 0.3102, + "step": 1240 + }, + { + "epoch": 0.06, + "grad_norm": 0.7819052930665716, + "learning_rate": 4.8438719750195165e-06, + "loss": 0.32, + "step": 1241 + }, + { + "epoch": 0.06, + "grad_norm": 0.7497677853256657, + "learning_rate": 4.847775175644029e-06, + "loss": 0.3151, + "step": 1242 + }, + { + "epoch": 0.06, + "grad_norm": 0.7829821859204853, + "learning_rate": 4.851678376268541e-06, + "loss": 0.3434, + "step": 1243 + }, + { + "epoch": 0.06, + "grad_norm": 0.8186020024751871, + "learning_rate": 4.855581576893053e-06, + "loss": 0.3477, + "step": 1244 + }, + { + "epoch": 0.06, + "grad_norm": 0.7362084513663902, + "learning_rate": 4.859484777517565e-06, + "loss": 0.3035, + "step": 1245 + }, + { + "epoch": 0.06, + "grad_norm": 0.8505777937565062, + "learning_rate": 4.863387978142076e-06, + "loss": 0.3177, + "step": 1246 + }, + { + "epoch": 0.06, + "grad_norm": 0.8145690492577363, + "learning_rate": 4.8672911787665885e-06, + "loss": 0.3428, + "step": 1247 + }, + { + "epoch": 0.06, + "grad_norm": 0.7818545959585905, + "learning_rate": 4.871194379391101e-06, + "loss": 0.3122, + "step": 1248 + }, + { + "epoch": 0.06, + "grad_norm": 0.7960478442547329, + "learning_rate": 4.875097580015613e-06, + "loss": 0.3269, + "step": 1249 + }, + { + "epoch": 0.06, + "grad_norm": 0.7779048211833903, + "learning_rate": 4.879000780640125e-06, + "loss": 0.3283, + "step": 1250 + }, + { + "epoch": 0.06, + "grad_norm": 0.8249720164542179, + "learning_rate": 4.882903981264637e-06, + "loss": 0.3307, + "step": 1251 + }, + { + "epoch": 0.06, + "grad_norm": 0.7943877729716782, + "learning_rate": 4.886807181889149e-06, + "loss": 0.3079, + "step": 1252 + }, + { + "epoch": 0.06, + "grad_norm": 0.8034055560865014, + "learning_rate": 4.890710382513661e-06, + "loss": 0.321, + "step": 1253 + }, + { + "epoch": 0.06, + "grad_norm": 0.8410481455135758, + "learning_rate": 4.8946135831381735e-06, + "loss": 0.3193, + "step": 1254 + }, + { + "epoch": 0.06, + "grad_norm": 0.8264291944888741, + "learning_rate": 4.898516783762686e-06, + "loss": 0.3126, + "step": 1255 + }, + { + "epoch": 0.06, + "grad_norm": 0.8531688397120727, + "learning_rate": 4.902419984387198e-06, + "loss": 0.3136, + "step": 1256 + }, + { + "epoch": 0.06, + "grad_norm": 0.7685841005898182, + "learning_rate": 4.90632318501171e-06, + "loss": 0.3345, + "step": 1257 + }, + { + "epoch": 0.06, + "grad_norm": 0.7830586414062322, + "learning_rate": 4.910226385636222e-06, + "loss": 0.3315, + "step": 1258 + }, + { + "epoch": 0.06, + "grad_norm": 0.7813446500308224, + "learning_rate": 4.914129586260734e-06, + "loss": 0.3218, + "step": 1259 + }, + { + "epoch": 0.06, + "grad_norm": 0.7735809271337962, + "learning_rate": 4.918032786885246e-06, + "loss": 0.3165, + "step": 1260 + }, + { + "epoch": 0.06, + "grad_norm": 0.7361258705235983, + "learning_rate": 4.9219359875097584e-06, + "loss": 0.3013, + "step": 1261 + }, + { + "epoch": 0.06, + "grad_norm": 0.7728615677939132, + "learning_rate": 4.925839188134271e-06, + "loss": 0.3308, + "step": 1262 + }, + { + "epoch": 0.06, + "grad_norm": 0.7756592390849794, + "learning_rate": 4.929742388758783e-06, + "loss": 0.3228, + "step": 1263 + }, + { + "epoch": 0.06, + "grad_norm": 0.771770736441463, + "learning_rate": 4.933645589383295e-06, + "loss": 0.3148, + "step": 1264 + }, + { + "epoch": 0.06, + "grad_norm": 0.7056656979199595, + "learning_rate": 4.937548790007807e-06, + "loss": 0.3077, + "step": 1265 + }, + { + "epoch": 0.06, + "grad_norm": 0.7556295649413937, + "learning_rate": 4.941451990632319e-06, + "loss": 0.3192, + "step": 1266 + }, + { + "epoch": 0.06, + "grad_norm": 0.7550118187784288, + "learning_rate": 4.945355191256831e-06, + "loss": 0.3303, + "step": 1267 + }, + { + "epoch": 0.06, + "grad_norm": 0.7752972144288165, + "learning_rate": 4.9492583918813434e-06, + "loss": 0.3488, + "step": 1268 + }, + { + "epoch": 0.06, + "grad_norm": 0.7831803680685049, + "learning_rate": 4.9531615925058556e-06, + "loss": 0.3293, + "step": 1269 + }, + { + "epoch": 0.06, + "grad_norm": 0.748514262293179, + "learning_rate": 4.957064793130367e-06, + "loss": 0.3254, + "step": 1270 + }, + { + "epoch": 0.06, + "grad_norm": 0.8236744619318931, + "learning_rate": 4.960967993754879e-06, + "loss": 0.3477, + "step": 1271 + }, + { + "epoch": 0.06, + "grad_norm": 0.7638744825055725, + "learning_rate": 4.964871194379391e-06, + "loss": 0.3251, + "step": 1272 + }, + { + "epoch": 0.06, + "grad_norm": 0.7318747144658153, + "learning_rate": 4.968774395003903e-06, + "loss": 0.3181, + "step": 1273 + }, + { + "epoch": 0.06, + "grad_norm": 0.7828499676619863, + "learning_rate": 4.9726775956284154e-06, + "loss": 0.3324, + "step": 1274 + }, + { + "epoch": 0.06, + "grad_norm": 0.791571618969947, + "learning_rate": 4.9765807962529276e-06, + "loss": 0.3213, + "step": 1275 + }, + { + "epoch": 0.06, + "grad_norm": 0.7951307041122969, + "learning_rate": 4.98048399687744e-06, + "loss": 0.3091, + "step": 1276 + }, + { + "epoch": 0.06, + "grad_norm": 0.8255482691373225, + "learning_rate": 4.984387197501952e-06, + "loss": 0.3428, + "step": 1277 + }, + { + "epoch": 0.06, + "grad_norm": 0.708282737480509, + "learning_rate": 4.988290398126464e-06, + "loss": 0.3132, + "step": 1278 + }, + { + "epoch": 0.06, + "grad_norm": 0.7885071154472629, + "learning_rate": 4.992193598750976e-06, + "loss": 0.31, + "step": 1279 + }, + { + "epoch": 0.06, + "grad_norm": 0.777552679317253, + "learning_rate": 4.996096799375488e-06, + "loss": 0.3457, + "step": 1280 + }, + { + "epoch": 0.06, + "grad_norm": 0.764862573690078, + "learning_rate": 5e-06, + "loss": 0.3418, + "step": 1281 + }, + { + "epoch": 0.06, + "grad_norm": 0.7803137656050225, + "learning_rate": 4.999999992806565e-06, + "loss": 0.3037, + "step": 1282 + }, + { + "epoch": 0.06, + "grad_norm": 0.8110414517342917, + "learning_rate": 4.999999971226257e-06, + "loss": 0.3258, + "step": 1283 + }, + { + "epoch": 0.06, + "grad_norm": 0.7133219635181552, + "learning_rate": 4.9999999352590784e-06, + "loss": 0.3227, + "step": 1284 + }, + { + "epoch": 0.06, + "grad_norm": 0.7226255150842057, + "learning_rate": 4.999999884905028e-06, + "loss": 0.3035, + "step": 1285 + }, + { + "epoch": 0.06, + "grad_norm": 0.8308273029600334, + "learning_rate": 4.999999820164106e-06, + "loss": 0.3536, + "step": 1286 + }, + { + "epoch": 0.06, + "grad_norm": 0.7251481944860867, + "learning_rate": 4.999999741036315e-06, + "loss": 0.3498, + "step": 1287 + }, + { + "epoch": 0.06, + "grad_norm": 0.8174791747152399, + "learning_rate": 4.9999996475216525e-06, + "loss": 0.3196, + "step": 1288 + }, + { + "epoch": 0.06, + "grad_norm": 0.8069131948035422, + "learning_rate": 4.999999539620122e-06, + "loss": 0.3288, + "step": 1289 + }, + { + "epoch": 0.06, + "grad_norm": 0.7485398226862541, + "learning_rate": 4.999999417331721e-06, + "loss": 0.3261, + "step": 1290 + }, + { + "epoch": 0.06, + "grad_norm": 0.733823443482088, + "learning_rate": 4.9999992806564526e-06, + "loss": 0.3506, + "step": 1291 + }, + { + "epoch": 0.06, + "grad_norm": 0.9072100948677695, + "learning_rate": 4.999999129594316e-06, + "loss": 0.3435, + "step": 1292 + }, + { + "epoch": 0.06, + "grad_norm": 0.7612573448994624, + "learning_rate": 4.999998964145313e-06, + "loss": 0.3444, + "step": 1293 + }, + { + "epoch": 0.06, + "grad_norm": 0.7939437529928963, + "learning_rate": 4.999998784309444e-06, + "loss": 0.3264, + "step": 1294 + }, + { + "epoch": 0.06, + "grad_norm": 0.7609080159324426, + "learning_rate": 4.999998590086711e-06, + "loss": 0.3206, + "step": 1295 + }, + { + "epoch": 0.06, + "grad_norm": 0.7384256141372285, + "learning_rate": 4.999998381477114e-06, + "loss": 0.335, + "step": 1296 + }, + { + "epoch": 0.06, + "grad_norm": 0.6844258838632076, + "learning_rate": 4.999998158480655e-06, + "loss": 0.2974, + "step": 1297 + }, + { + "epoch": 0.06, + "grad_norm": 0.8054200488914997, + "learning_rate": 4.999997921097334e-06, + "loss": 0.3343, + "step": 1298 + }, + { + "epoch": 0.06, + "grad_norm": 0.7867870728390103, + "learning_rate": 4.999997669327155e-06, + "loss": 0.3235, + "step": 1299 + }, + { + "epoch": 0.06, + "grad_norm": 0.7975287047884598, + "learning_rate": 4.999997403170116e-06, + "loss": 0.316, + "step": 1300 + }, + { + "epoch": 0.06, + "grad_norm": 0.7509507853960665, + "learning_rate": 4.9999971226262215e-06, + "loss": 0.3171, + "step": 1301 + }, + { + "epoch": 0.06, + "grad_norm": 0.741425898796729, + "learning_rate": 4.999996827695472e-06, + "loss": 0.2881, + "step": 1302 + }, + { + "epoch": 0.06, + "grad_norm": 0.677456620155258, + "learning_rate": 4.999996518377867e-06, + "loss": 0.3076, + "step": 1303 + }, + { + "epoch": 0.06, + "grad_norm": 0.7650894674918964, + "learning_rate": 4.999996194673413e-06, + "loss": 0.3415, + "step": 1304 + }, + { + "epoch": 0.06, + "grad_norm": 0.6911346932621355, + "learning_rate": 4.999995856582108e-06, + "loss": 0.3017, + "step": 1305 + }, + { + "epoch": 0.06, + "grad_norm": 1.2509123669028706, + "learning_rate": 4.999995504103956e-06, + "loss": 0.3249, + "step": 1306 + }, + { + "epoch": 0.06, + "grad_norm": 0.7745646094052938, + "learning_rate": 4.999995137238957e-06, + "loss": 0.3375, + "step": 1307 + }, + { + "epoch": 0.06, + "grad_norm": 0.6935304153293038, + "learning_rate": 4.999994755987115e-06, + "loss": 0.308, + "step": 1308 + }, + { + "epoch": 0.06, + "grad_norm": 0.7912320672734515, + "learning_rate": 4.9999943603484315e-06, + "loss": 0.3292, + "step": 1309 + }, + { + "epoch": 0.06, + "grad_norm": 0.8507651477028199, + "learning_rate": 4.999993950322909e-06, + "loss": 0.3415, + "step": 1310 + }, + { + "epoch": 0.06, + "grad_norm": 0.7367441318116214, + "learning_rate": 4.99999352591055e-06, + "loss": 0.3186, + "step": 1311 + }, + { + "epoch": 0.06, + "grad_norm": 0.7730847121976596, + "learning_rate": 4.999993087111356e-06, + "loss": 0.3304, + "step": 1312 + }, + { + "epoch": 0.06, + "grad_norm": 0.7367448351282575, + "learning_rate": 4.99999263392533e-06, + "loss": 0.3136, + "step": 1313 + }, + { + "epoch": 0.06, + "grad_norm": 0.7274886410459188, + "learning_rate": 4.999992166352475e-06, + "loss": 0.3145, + "step": 1314 + }, + { + "epoch": 0.06, + "grad_norm": 0.6964111368551325, + "learning_rate": 4.999991684392793e-06, + "loss": 0.3253, + "step": 1315 + }, + { + "epoch": 0.06, + "grad_norm": 0.7866002292207801, + "learning_rate": 4.999991188046288e-06, + "loss": 0.3344, + "step": 1316 + }, + { + "epoch": 0.06, + "grad_norm": 0.7133987678637378, + "learning_rate": 4.999990677312961e-06, + "loss": 0.3118, + "step": 1317 + }, + { + "epoch": 0.06, + "grad_norm": 0.7517369159089695, + "learning_rate": 4.999990152192817e-06, + "loss": 0.3251, + "step": 1318 + }, + { + "epoch": 0.06, + "grad_norm": 0.728674657623226, + "learning_rate": 4.999989612685858e-06, + "loss": 0.2986, + "step": 1319 + }, + { + "epoch": 0.06, + "grad_norm": 0.7471289297645265, + "learning_rate": 4.999989058792087e-06, + "loss": 0.3412, + "step": 1320 + }, + { + "epoch": 0.06, + "grad_norm": 0.6840244527003992, + "learning_rate": 4.999988490511507e-06, + "loss": 0.3144, + "step": 1321 + }, + { + "epoch": 0.06, + "grad_norm": 0.7410554736281589, + "learning_rate": 4.9999879078441215e-06, + "loss": 0.3281, + "step": 1322 + }, + { + "epoch": 0.06, + "grad_norm": 0.7187145273078142, + "learning_rate": 4.999987310789935e-06, + "loss": 0.3048, + "step": 1323 + }, + { + "epoch": 0.06, + "grad_norm": 0.7563440638011815, + "learning_rate": 4.9999866993489485e-06, + "loss": 0.3233, + "step": 1324 + }, + { + "epoch": 0.06, + "grad_norm": 0.826675002511806, + "learning_rate": 4.999986073521168e-06, + "loss": 0.3373, + "step": 1325 + }, + { + "epoch": 0.06, + "grad_norm": 0.7260286045265256, + "learning_rate": 4.999985433306595e-06, + "loss": 0.3316, + "step": 1326 + }, + { + "epoch": 0.06, + "grad_norm": 0.8456393204595302, + "learning_rate": 4.999984778705235e-06, + "loss": 0.3164, + "step": 1327 + }, + { + "epoch": 0.06, + "grad_norm": 0.7625339001232265, + "learning_rate": 4.999984109717091e-06, + "loss": 0.3324, + "step": 1328 + }, + { + "epoch": 0.06, + "grad_norm": 0.707449583391776, + "learning_rate": 4.999983426342165e-06, + "loss": 0.3302, + "step": 1329 + }, + { + "epoch": 0.06, + "grad_norm": 0.7240693800042678, + "learning_rate": 4.9999827285804645e-06, + "loss": 0.3247, + "step": 1330 + }, + { + "epoch": 0.06, + "grad_norm": 0.7376891288395485, + "learning_rate": 4.9999820164319914e-06, + "loss": 0.3205, + "step": 1331 + }, + { + "epoch": 0.06, + "grad_norm": 0.7594742731199269, + "learning_rate": 4.99998128989675e-06, + "loss": 0.3314, + "step": 1332 + }, + { + "epoch": 0.06, + "grad_norm": 0.7393227447224908, + "learning_rate": 4.999980548974745e-06, + "loss": 0.3267, + "step": 1333 + }, + { + "epoch": 0.06, + "grad_norm": 0.7442323249759815, + "learning_rate": 4.99997979366598e-06, + "loss": 0.3219, + "step": 1334 + }, + { + "epoch": 0.06, + "grad_norm": 0.699437957714745, + "learning_rate": 4.9999790239704594e-06, + "loss": 0.2982, + "step": 1335 + }, + { + "epoch": 0.06, + "grad_norm": 0.7149296770155923, + "learning_rate": 4.999978239888188e-06, + "loss": 0.3221, + "step": 1336 + }, + { + "epoch": 0.06, + "grad_norm": 0.7507402452498291, + "learning_rate": 4.9999774414191706e-06, + "loss": 0.3158, + "step": 1337 + }, + { + "epoch": 0.06, + "grad_norm": 0.7132009971878002, + "learning_rate": 4.999976628563411e-06, + "loss": 0.314, + "step": 1338 + }, + { + "epoch": 0.06, + "grad_norm": 0.732670820746385, + "learning_rate": 4.999975801320915e-06, + "loss": 0.3112, + "step": 1339 + }, + { + "epoch": 0.06, + "grad_norm": 0.6960867630576326, + "learning_rate": 4.999974959691685e-06, + "loss": 0.3212, + "step": 1340 + }, + { + "epoch": 0.06, + "grad_norm": 0.7336747596530897, + "learning_rate": 4.999974103675729e-06, + "loss": 0.3138, + "step": 1341 + }, + { + "epoch": 0.06, + "grad_norm": 0.7376761786809612, + "learning_rate": 4.99997323327305e-06, + "loss": 0.3342, + "step": 1342 + }, + { + "epoch": 0.06, + "grad_norm": 0.7663278709694125, + "learning_rate": 4.999972348483653e-06, + "loss": 0.3336, + "step": 1343 + }, + { + "epoch": 0.06, + "grad_norm": 0.7182471284919185, + "learning_rate": 4.999971449307543e-06, + "loss": 0.3092, + "step": 1344 + }, + { + "epoch": 0.06, + "grad_norm": 0.6752330412016309, + "learning_rate": 4.999970535744726e-06, + "loss": 0.2974, + "step": 1345 + }, + { + "epoch": 0.06, + "grad_norm": 0.8053265039608123, + "learning_rate": 4.999969607795207e-06, + "loss": 0.3367, + "step": 1346 + }, + { + "epoch": 0.06, + "grad_norm": 0.7530869191830782, + "learning_rate": 4.999968665458992e-06, + "loss": 0.3015, + "step": 1347 + }, + { + "epoch": 0.06, + "grad_norm": 0.7315717862149176, + "learning_rate": 4.999967708736085e-06, + "loss": 0.3263, + "step": 1348 + }, + { + "epoch": 0.06, + "grad_norm": 0.7651342740255899, + "learning_rate": 4.999966737626492e-06, + "loss": 0.3268, + "step": 1349 + }, + { + "epoch": 0.06, + "grad_norm": 0.8765825368834491, + "learning_rate": 4.999965752130219e-06, + "loss": 0.3456, + "step": 1350 + }, + { + "epoch": 0.06, + "grad_norm": 0.7453257401213077, + "learning_rate": 4.999964752247271e-06, + "loss": 0.3182, + "step": 1351 + }, + { + "epoch": 0.06, + "grad_norm": 0.7207060754579225, + "learning_rate": 4.999963737977655e-06, + "loss": 0.314, + "step": 1352 + }, + { + "epoch": 0.06, + "grad_norm": 0.7723811117964019, + "learning_rate": 4.999962709321376e-06, + "loss": 0.3269, + "step": 1353 + }, + { + "epoch": 0.06, + "grad_norm": 0.7672414775940899, + "learning_rate": 4.99996166627844e-06, + "loss": 0.3405, + "step": 1354 + }, + { + "epoch": 0.06, + "grad_norm": 0.7376459107776716, + "learning_rate": 4.999960608848852e-06, + "loss": 0.3335, + "step": 1355 + }, + { + "epoch": 0.06, + "grad_norm": 0.731540370103192, + "learning_rate": 4.99995953703262e-06, + "loss": 0.3109, + "step": 1356 + }, + { + "epoch": 0.06, + "grad_norm": 0.7496925688781383, + "learning_rate": 4.999958450829749e-06, + "loss": 0.3233, + "step": 1357 + }, + { + "epoch": 0.06, + "grad_norm": 0.7924665179991935, + "learning_rate": 4.999957350240245e-06, + "loss": 0.314, + "step": 1358 + }, + { + "epoch": 0.06, + "grad_norm": 0.7467863568791201, + "learning_rate": 4.999956235264115e-06, + "loss": 0.3505, + "step": 1359 + }, + { + "epoch": 0.06, + "grad_norm": 0.7425333397549844, + "learning_rate": 4.9999551059013655e-06, + "loss": 0.3091, + "step": 1360 + }, + { + "epoch": 0.06, + "grad_norm": 0.7220379527817716, + "learning_rate": 4.999953962152002e-06, + "loss": 0.3104, + "step": 1361 + }, + { + "epoch": 0.06, + "grad_norm": 0.7363486132695186, + "learning_rate": 4.9999528040160325e-06, + "loss": 0.3321, + "step": 1362 + }, + { + "epoch": 0.06, + "grad_norm": 0.7845208892207514, + "learning_rate": 4.999951631493463e-06, + "loss": 0.3298, + "step": 1363 + }, + { + "epoch": 0.06, + "grad_norm": 0.7652113944199916, + "learning_rate": 4.999950444584299e-06, + "loss": 0.3367, + "step": 1364 + }, + { + "epoch": 0.06, + "grad_norm": 0.7699446091316131, + "learning_rate": 4.99994924328855e-06, + "loss": 0.3075, + "step": 1365 + }, + { + "epoch": 0.06, + "grad_norm": 0.7817358011925266, + "learning_rate": 4.9999480276062215e-06, + "loss": 0.3425, + "step": 1366 + }, + { + "epoch": 0.06, + "grad_norm": 0.7593612499750346, + "learning_rate": 4.999946797537319e-06, + "loss": 0.3364, + "step": 1367 + }, + { + "epoch": 0.06, + "grad_norm": 0.7729739330217843, + "learning_rate": 4.999945553081853e-06, + "loss": 0.3191, + "step": 1368 + }, + { + "epoch": 0.06, + "grad_norm": 0.8293784372725229, + "learning_rate": 4.9999442942398265e-06, + "loss": 0.3354, + "step": 1369 + }, + { + "epoch": 0.06, + "grad_norm": 0.8294068613949712, + "learning_rate": 4.999943021011251e-06, + "loss": 0.3411, + "step": 1370 + }, + { + "epoch": 0.06, + "grad_norm": 0.7718945851400763, + "learning_rate": 4.99994173339613e-06, + "loss": 0.3348, + "step": 1371 + }, + { + "epoch": 0.06, + "grad_norm": 0.7874737232528474, + "learning_rate": 4.999940431394473e-06, + "loss": 0.3185, + "step": 1372 + }, + { + "epoch": 0.06, + "grad_norm": 0.8004965991525651, + "learning_rate": 4.999939115006288e-06, + "loss": 0.3315, + "step": 1373 + }, + { + "epoch": 0.06, + "grad_norm": 0.7780665180412123, + "learning_rate": 4.999937784231581e-06, + "loss": 0.3253, + "step": 1374 + }, + { + "epoch": 0.06, + "grad_norm": 0.8331445053959585, + "learning_rate": 4.999936439070361e-06, + "loss": 0.3242, + "step": 1375 + }, + { + "epoch": 0.06, + "grad_norm": 0.7505248455802971, + "learning_rate": 4.999935079522635e-06, + "loss": 0.317, + "step": 1376 + }, + { + "epoch": 0.06, + "grad_norm": 0.7023018733888201, + "learning_rate": 4.99993370558841e-06, + "loss": 0.3306, + "step": 1377 + }, + { + "epoch": 0.06, + "grad_norm": 0.7457684082937429, + "learning_rate": 4.999932317267696e-06, + "loss": 0.3193, + "step": 1378 + }, + { + "epoch": 0.06, + "grad_norm": 0.7455622596608662, + "learning_rate": 4.9999309145605e-06, + "loss": 0.3207, + "step": 1379 + }, + { + "epoch": 0.06, + "grad_norm": 0.7284889032760635, + "learning_rate": 4.999929497466829e-06, + "loss": 0.313, + "step": 1380 + }, + { + "epoch": 0.06, + "grad_norm": 0.6766479267786115, + "learning_rate": 4.999928065986692e-06, + "loss": 0.2926, + "step": 1381 + }, + { + "epoch": 0.06, + "grad_norm": 0.7498029346052265, + "learning_rate": 4.999926620120098e-06, + "loss": 0.328, + "step": 1382 + }, + { + "epoch": 0.06, + "grad_norm": 0.7410110099764343, + "learning_rate": 4.999925159867055e-06, + "loss": 0.3283, + "step": 1383 + }, + { + "epoch": 0.06, + "grad_norm": 0.7320006488126742, + "learning_rate": 4.99992368522757e-06, + "loss": 0.3225, + "step": 1384 + }, + { + "epoch": 0.06, + "grad_norm": 0.7553792860215451, + "learning_rate": 4.999922196201653e-06, + "loss": 0.3147, + "step": 1385 + }, + { + "epoch": 0.06, + "grad_norm": 0.7864043415783705, + "learning_rate": 4.999920692789312e-06, + "loss": 0.3253, + "step": 1386 + }, + { + "epoch": 0.06, + "grad_norm": 0.7715901578313776, + "learning_rate": 4.9999191749905556e-06, + "loss": 0.3382, + "step": 1387 + }, + { + "epoch": 0.07, + "grad_norm": 0.728328630068724, + "learning_rate": 4.999917642805393e-06, + "loss": 0.3232, + "step": 1388 + }, + { + "epoch": 0.07, + "grad_norm": 0.7732240391462509, + "learning_rate": 4.999916096233832e-06, + "loss": 0.3304, + "step": 1389 + }, + { + "epoch": 0.07, + "grad_norm": 0.7240590240714608, + "learning_rate": 4.999914535275883e-06, + "loss": 0.3118, + "step": 1390 + }, + { + "epoch": 0.07, + "grad_norm": 0.7639291371417187, + "learning_rate": 4.9999129599315544e-06, + "loss": 0.3178, + "step": 1391 + }, + { + "epoch": 0.07, + "grad_norm": 0.7580636566443026, + "learning_rate": 4.999911370200854e-06, + "loss": 0.3178, + "step": 1392 + }, + { + "epoch": 0.07, + "grad_norm": 0.7975027762407995, + "learning_rate": 4.999909766083792e-06, + "loss": 0.3151, + "step": 1393 + }, + { + "epoch": 0.07, + "grad_norm": 0.8284257291499109, + "learning_rate": 4.999908147580379e-06, + "loss": 0.3376, + "step": 1394 + }, + { + "epoch": 0.07, + "grad_norm": 0.6870880785742166, + "learning_rate": 4.999906514690622e-06, + "loss": 0.3063, + "step": 1395 + }, + { + "epoch": 0.07, + "grad_norm": 0.750755239452954, + "learning_rate": 4.999904867414532e-06, + "loss": 0.3087, + "step": 1396 + }, + { + "epoch": 0.07, + "grad_norm": 0.7686038513243841, + "learning_rate": 4.999903205752117e-06, + "loss": 0.3053, + "step": 1397 + }, + { + "epoch": 0.07, + "grad_norm": 0.6955423395534619, + "learning_rate": 4.999901529703387e-06, + "loss": 0.3145, + "step": 1398 + }, + { + "epoch": 0.07, + "grad_norm": 0.7458412751939388, + "learning_rate": 4.999899839268354e-06, + "loss": 0.3094, + "step": 1399 + }, + { + "epoch": 0.07, + "grad_norm": 0.8556588701793159, + "learning_rate": 4.999898134447024e-06, + "loss": 0.3531, + "step": 1400 + }, + { + "epoch": 0.07, + "grad_norm": 0.7759435277767608, + "learning_rate": 4.999896415239409e-06, + "loss": 0.3136, + "step": 1401 + }, + { + "epoch": 0.07, + "grad_norm": 0.713043956245841, + "learning_rate": 4.9998946816455186e-06, + "loss": 0.3031, + "step": 1402 + }, + { + "epoch": 0.07, + "grad_norm": 0.721641871002048, + "learning_rate": 4.999892933665362e-06, + "loss": 0.3133, + "step": 1403 + }, + { + "epoch": 0.07, + "grad_norm": 0.7847844087621879, + "learning_rate": 4.9998911712989506e-06, + "loss": 0.3443, + "step": 1404 + }, + { + "epoch": 0.07, + "grad_norm": 0.7543272724175308, + "learning_rate": 4.9998893945462935e-06, + "loss": 0.3135, + "step": 1405 + }, + { + "epoch": 0.07, + "grad_norm": 0.7515858786415736, + "learning_rate": 4.999887603407402e-06, + "loss": 0.3236, + "step": 1406 + }, + { + "epoch": 0.07, + "grad_norm": 0.7276615670183029, + "learning_rate": 4.999885797882284e-06, + "loss": 0.3288, + "step": 1407 + }, + { + "epoch": 0.07, + "grad_norm": 0.7678786528139937, + "learning_rate": 4.999883977970953e-06, + "loss": 0.3086, + "step": 1408 + }, + { + "epoch": 0.07, + "grad_norm": 0.7839575915932258, + "learning_rate": 4.999882143673418e-06, + "loss": 0.3043, + "step": 1409 + }, + { + "epoch": 0.07, + "grad_norm": 0.7796041764180076, + "learning_rate": 4.999880294989689e-06, + "loss": 0.3272, + "step": 1410 + }, + { + "epoch": 0.07, + "grad_norm": 0.7574251431609124, + "learning_rate": 4.999878431919777e-06, + "loss": 0.3224, + "step": 1411 + }, + { + "epoch": 0.07, + "grad_norm": 0.8005703637422873, + "learning_rate": 4.999876554463694e-06, + "loss": 0.3437, + "step": 1412 + }, + { + "epoch": 0.07, + "grad_norm": 0.770765554335854, + "learning_rate": 4.9998746626214486e-06, + "loss": 0.335, + "step": 1413 + }, + { + "epoch": 0.07, + "grad_norm": 0.760272510637501, + "learning_rate": 4.999872756393054e-06, + "loss": 0.3254, + "step": 1414 + }, + { + "epoch": 0.07, + "grad_norm": 0.7500995242982944, + "learning_rate": 4.9998708357785185e-06, + "loss": 0.3016, + "step": 1415 + }, + { + "epoch": 0.07, + "grad_norm": 0.7675302714379824, + "learning_rate": 4.999868900777855e-06, + "loss": 0.3571, + "step": 1416 + }, + { + "epoch": 0.07, + "grad_norm": 0.7471692721093864, + "learning_rate": 4.999866951391076e-06, + "loss": 0.3136, + "step": 1417 + }, + { + "epoch": 0.07, + "grad_norm": 0.7413781941137431, + "learning_rate": 4.9998649876181895e-06, + "loss": 0.3259, + "step": 1418 + }, + { + "epoch": 0.07, + "grad_norm": 0.7385478956059102, + "learning_rate": 4.999863009459209e-06, + "loss": 0.3267, + "step": 1419 + }, + { + "epoch": 0.07, + "grad_norm": 0.7287171944674179, + "learning_rate": 4.9998610169141444e-06, + "loss": 0.3151, + "step": 1420 + }, + { + "epoch": 0.07, + "grad_norm": 0.8016330543785316, + "learning_rate": 4.999859009983009e-06, + "loss": 0.3311, + "step": 1421 + }, + { + "epoch": 0.07, + "grad_norm": 0.7659098906624392, + "learning_rate": 4.999856988665812e-06, + "loss": 0.3151, + "step": 1422 + }, + { + "epoch": 0.07, + "grad_norm": 0.7748308044546797, + "learning_rate": 4.999854952962568e-06, + "loss": 0.3208, + "step": 1423 + }, + { + "epoch": 0.07, + "grad_norm": 0.8162940742738379, + "learning_rate": 4.999852902873286e-06, + "loss": 0.3456, + "step": 1424 + }, + { + "epoch": 0.07, + "grad_norm": 0.8064536318089695, + "learning_rate": 4.999850838397978e-06, + "loss": 0.3314, + "step": 1425 + }, + { + "epoch": 0.07, + "grad_norm": 0.7051252301364939, + "learning_rate": 4.999848759536658e-06, + "loss": 0.3278, + "step": 1426 + }, + { + "epoch": 0.07, + "grad_norm": 0.7636065634705897, + "learning_rate": 4.999846666289337e-06, + "loss": 0.3212, + "step": 1427 + }, + { + "epoch": 0.07, + "grad_norm": 0.7985170581778217, + "learning_rate": 4.999844558656025e-06, + "loss": 0.3118, + "step": 1428 + }, + { + "epoch": 0.07, + "grad_norm": 0.696943496637969, + "learning_rate": 4.9998424366367386e-06, + "loss": 0.3355, + "step": 1429 + }, + { + "epoch": 0.07, + "grad_norm": 0.7678619919042976, + "learning_rate": 4.999840300231485e-06, + "loss": 0.3443, + "step": 1430 + }, + { + "epoch": 0.07, + "grad_norm": 0.7645393944549164, + "learning_rate": 4.9998381494402794e-06, + "loss": 0.3177, + "step": 1431 + }, + { + "epoch": 0.07, + "grad_norm": 0.7960209204280255, + "learning_rate": 4.999835984263134e-06, + "loss": 0.3061, + "step": 1432 + }, + { + "epoch": 0.07, + "grad_norm": 0.7852127326746268, + "learning_rate": 4.99983380470006e-06, + "loss": 0.3271, + "step": 1433 + }, + { + "epoch": 0.07, + "grad_norm": 0.8245427438276254, + "learning_rate": 4.999831610751071e-06, + "loss": 0.3367, + "step": 1434 + }, + { + "epoch": 0.07, + "grad_norm": 0.7055901907643555, + "learning_rate": 4.99982940241618e-06, + "loss": 0.295, + "step": 1435 + }, + { + "epoch": 0.07, + "grad_norm": 0.7743251183803946, + "learning_rate": 4.999827179695399e-06, + "loss": 0.3116, + "step": 1436 + }, + { + "epoch": 0.07, + "grad_norm": 0.7572678382918469, + "learning_rate": 4.9998249425887404e-06, + "loss": 0.3205, + "step": 1437 + }, + { + "epoch": 0.07, + "grad_norm": 0.7530058536620665, + "learning_rate": 4.999822691096218e-06, + "loss": 0.3261, + "step": 1438 + }, + { + "epoch": 0.07, + "grad_norm": 0.7935108533156817, + "learning_rate": 4.999820425217844e-06, + "loss": 0.337, + "step": 1439 + }, + { + "epoch": 0.07, + "grad_norm": 0.6879200841488003, + "learning_rate": 4.999818144953632e-06, + "loss": 0.2973, + "step": 1440 + }, + { + "epoch": 0.07, + "grad_norm": 0.7710208367699641, + "learning_rate": 4.999815850303595e-06, + "loss": 0.3216, + "step": 1441 + }, + { + "epoch": 0.07, + "grad_norm": 0.7768023803389901, + "learning_rate": 4.999813541267745e-06, + "loss": 0.3284, + "step": 1442 + }, + { + "epoch": 0.07, + "grad_norm": 0.7780844186171958, + "learning_rate": 4.999811217846098e-06, + "loss": 0.2999, + "step": 1443 + }, + { + "epoch": 0.07, + "grad_norm": 0.9005667750952311, + "learning_rate": 4.9998088800386645e-06, + "loss": 0.3403, + "step": 1444 + }, + { + "epoch": 0.07, + "grad_norm": 0.6656818167416058, + "learning_rate": 4.99980652784546e-06, + "loss": 0.3042, + "step": 1445 + }, + { + "epoch": 0.07, + "grad_norm": 0.7849902303955985, + "learning_rate": 4.999804161266497e-06, + "loss": 0.3059, + "step": 1446 + }, + { + "epoch": 0.07, + "grad_norm": 0.7511714916351702, + "learning_rate": 4.9998017803017894e-06, + "loss": 0.3003, + "step": 1447 + }, + { + "epoch": 0.07, + "grad_norm": 0.7499509656137348, + "learning_rate": 4.99979938495135e-06, + "loss": 0.3238, + "step": 1448 + }, + { + "epoch": 0.07, + "grad_norm": 0.7898348344375091, + "learning_rate": 4.999796975215195e-06, + "loss": 0.3292, + "step": 1449 + }, + { + "epoch": 0.07, + "grad_norm": 0.7759340245363471, + "learning_rate": 4.999794551093336e-06, + "loss": 0.3229, + "step": 1450 + }, + { + "epoch": 0.07, + "grad_norm": 0.7949613514841498, + "learning_rate": 4.999792112585787e-06, + "loss": 0.3254, + "step": 1451 + }, + { + "epoch": 0.07, + "grad_norm": 0.811030214076513, + "learning_rate": 4.999789659692564e-06, + "loss": 0.3408, + "step": 1452 + }, + { + "epoch": 0.07, + "grad_norm": 0.7863273664766185, + "learning_rate": 4.999787192413679e-06, + "loss": 0.2963, + "step": 1453 + }, + { + "epoch": 0.07, + "grad_norm": 0.8046246008440711, + "learning_rate": 4.999784710749146e-06, + "loss": 0.3058, + "step": 1454 + }, + { + "epoch": 0.07, + "grad_norm": 0.7505664921887646, + "learning_rate": 4.999782214698982e-06, + "loss": 0.3136, + "step": 1455 + }, + { + "epoch": 0.07, + "grad_norm": 0.7931554508127876, + "learning_rate": 4.9997797042631994e-06, + "loss": 0.3439, + "step": 1456 + }, + { + "epoch": 0.07, + "grad_norm": 0.7769282421116374, + "learning_rate": 4.999777179441812e-06, + "loss": 0.3191, + "step": 1457 + }, + { + "epoch": 0.07, + "grad_norm": 0.88297802817566, + "learning_rate": 4.999774640234836e-06, + "loss": 0.3402, + "step": 1458 + }, + { + "epoch": 0.07, + "grad_norm": 0.7878196274623868, + "learning_rate": 4.9997720866422845e-06, + "loss": 0.324, + "step": 1459 + }, + { + "epoch": 0.07, + "grad_norm": 0.7209205083679588, + "learning_rate": 4.9997695186641735e-06, + "loss": 0.3226, + "step": 1460 + }, + { + "epoch": 0.07, + "grad_norm": 0.7178222919139742, + "learning_rate": 4.999766936300517e-06, + "loss": 0.3329, + "step": 1461 + }, + { + "epoch": 0.07, + "grad_norm": 0.7331462724333657, + "learning_rate": 4.99976433955133e-06, + "loss": 0.3262, + "step": 1462 + }, + { + "epoch": 0.07, + "grad_norm": 0.6983354735218428, + "learning_rate": 4.999761728416628e-06, + "loss": 0.291, + "step": 1463 + }, + { + "epoch": 0.07, + "grad_norm": 0.7515257028444886, + "learning_rate": 4.999759102896425e-06, + "loss": 0.3174, + "step": 1464 + }, + { + "epoch": 0.07, + "grad_norm": 0.7171470173784439, + "learning_rate": 4.999756462990737e-06, + "loss": 0.3335, + "step": 1465 + }, + { + "epoch": 0.07, + "grad_norm": 0.6550264664467874, + "learning_rate": 4.999753808699579e-06, + "loss": 0.3158, + "step": 1466 + }, + { + "epoch": 0.07, + "grad_norm": 0.7401193701766067, + "learning_rate": 4.999751140022965e-06, + "loss": 0.3332, + "step": 1467 + }, + { + "epoch": 0.07, + "grad_norm": 0.7367393117315025, + "learning_rate": 4.9997484569609125e-06, + "loss": 0.3255, + "step": 1468 + }, + { + "epoch": 0.07, + "grad_norm": 0.7532629974878763, + "learning_rate": 4.999745759513436e-06, + "loss": 0.3427, + "step": 1469 + }, + { + "epoch": 0.07, + "grad_norm": 0.7589488653499541, + "learning_rate": 4.999743047680551e-06, + "loss": 0.3315, + "step": 1470 + }, + { + "epoch": 0.07, + "grad_norm": 0.7570716731130944, + "learning_rate": 4.999740321462272e-06, + "loss": 0.3199, + "step": 1471 + }, + { + "epoch": 0.07, + "grad_norm": 0.6830572060485115, + "learning_rate": 4.999737580858617e-06, + "loss": 0.3019, + "step": 1472 + }, + { + "epoch": 0.07, + "grad_norm": 0.7972202301333423, + "learning_rate": 4.9997348258696e-06, + "loss": 0.3377, + "step": 1473 + }, + { + "epoch": 0.07, + "grad_norm": 0.7408353535294735, + "learning_rate": 4.9997320564952364e-06, + "loss": 0.3244, + "step": 1474 + }, + { + "epoch": 0.07, + "grad_norm": 0.8500308092057246, + "learning_rate": 4.999729272735545e-06, + "loss": 0.3233, + "step": 1475 + }, + { + "epoch": 0.07, + "grad_norm": 0.6767041635007625, + "learning_rate": 4.9997264745905385e-06, + "loss": 0.3163, + "step": 1476 + }, + { + "epoch": 0.07, + "grad_norm": 0.7278111667740418, + "learning_rate": 4.999723662060235e-06, + "loss": 0.3325, + "step": 1477 + }, + { + "epoch": 0.07, + "grad_norm": 0.7577688724372197, + "learning_rate": 4.99972083514465e-06, + "loss": 0.3303, + "step": 1478 + }, + { + "epoch": 0.07, + "grad_norm": 0.7684262999074705, + "learning_rate": 4.9997179938438e-06, + "loss": 0.3365, + "step": 1479 + }, + { + "epoch": 0.07, + "grad_norm": 0.7207956773728583, + "learning_rate": 4.999715138157702e-06, + "loss": 0.3158, + "step": 1480 + }, + { + "epoch": 0.07, + "grad_norm": 0.660766685104361, + "learning_rate": 4.99971226808637e-06, + "loss": 0.304, + "step": 1481 + }, + { + "epoch": 0.07, + "grad_norm": 0.7828411923637231, + "learning_rate": 4.999709383629823e-06, + "loss": 0.316, + "step": 1482 + }, + { + "epoch": 0.07, + "grad_norm": 0.7390744229238987, + "learning_rate": 4.999706484788076e-06, + "loss": 0.3308, + "step": 1483 + }, + { + "epoch": 0.07, + "grad_norm": 0.7107513398493261, + "learning_rate": 4.9997035715611476e-06, + "loss": 0.3405, + "step": 1484 + }, + { + "epoch": 0.07, + "grad_norm": 0.7586528681082031, + "learning_rate": 4.999700643949053e-06, + "loss": 0.3364, + "step": 1485 + }, + { + "epoch": 0.07, + "grad_norm": 0.7621367392687403, + "learning_rate": 4.999697701951809e-06, + "loss": 0.3408, + "step": 1486 + }, + { + "epoch": 0.07, + "grad_norm": 0.6707334982158796, + "learning_rate": 4.999694745569434e-06, + "loss": 0.2888, + "step": 1487 + }, + { + "epoch": 0.07, + "grad_norm": 0.6949320169518288, + "learning_rate": 4.9996917748019435e-06, + "loss": 0.3206, + "step": 1488 + }, + { + "epoch": 0.07, + "grad_norm": 0.7900310038896375, + "learning_rate": 4.999688789649355e-06, + "loss": 0.3254, + "step": 1489 + }, + { + "epoch": 0.07, + "grad_norm": 0.734224741489078, + "learning_rate": 4.999685790111686e-06, + "loss": 0.3351, + "step": 1490 + }, + { + "epoch": 0.07, + "grad_norm": 0.7089561733450659, + "learning_rate": 4.9996827761889535e-06, + "loss": 0.3292, + "step": 1491 + }, + { + "epoch": 0.07, + "grad_norm": 0.8157798321741538, + "learning_rate": 4.999679747881174e-06, + "loss": 0.3257, + "step": 1492 + }, + { + "epoch": 0.07, + "grad_norm": 0.7462695793704598, + "learning_rate": 4.999676705188367e-06, + "loss": 0.3195, + "step": 1493 + }, + { + "epoch": 0.07, + "grad_norm": 0.7522877880571439, + "learning_rate": 4.999673648110549e-06, + "loss": 0.3082, + "step": 1494 + }, + { + "epoch": 0.07, + "grad_norm": 0.842096333993474, + "learning_rate": 4.999670576647736e-06, + "loss": 0.3408, + "step": 1495 + }, + { + "epoch": 0.07, + "grad_norm": 0.7886679015240067, + "learning_rate": 4.999667490799948e-06, + "loss": 0.319, + "step": 1496 + }, + { + "epoch": 0.07, + "grad_norm": 0.7329785823970988, + "learning_rate": 4.9996643905672024e-06, + "loss": 0.3237, + "step": 1497 + }, + { + "epoch": 0.07, + "grad_norm": 0.7920290208959585, + "learning_rate": 4.9996612759495155e-06, + "loss": 0.3168, + "step": 1498 + }, + { + "epoch": 0.07, + "grad_norm": 0.7016122990559039, + "learning_rate": 4.999658146946907e-06, + "loss": 0.3125, + "step": 1499 + }, + { + "epoch": 0.07, + "grad_norm": 0.7297207495928555, + "learning_rate": 4.999655003559394e-06, + "loss": 0.3271, + "step": 1500 + }, + { + "epoch": 0.07, + "grad_norm": 0.7888931666103407, + "learning_rate": 4.999651845786994e-06, + "loss": 0.3249, + "step": 1501 + }, + { + "epoch": 0.07, + "grad_norm": 0.7481178407915542, + "learning_rate": 4.999648673629727e-06, + "loss": 0.3176, + "step": 1502 + }, + { + "epoch": 0.07, + "grad_norm": 0.7070698464219193, + "learning_rate": 4.9996454870876085e-06, + "loss": 0.3128, + "step": 1503 + }, + { + "epoch": 0.07, + "grad_norm": 0.7242977001610423, + "learning_rate": 4.99964228616066e-06, + "loss": 0.3177, + "step": 1504 + }, + { + "epoch": 0.07, + "grad_norm": 0.7221293720659834, + "learning_rate": 4.999639070848898e-06, + "loss": 0.3165, + "step": 1505 + }, + { + "epoch": 0.07, + "grad_norm": 0.7520132804809743, + "learning_rate": 4.999635841152342e-06, + "loss": 0.3285, + "step": 1506 + }, + { + "epoch": 0.07, + "grad_norm": 0.7320463754698797, + "learning_rate": 4.999632597071008e-06, + "loss": 0.3169, + "step": 1507 + }, + { + "epoch": 0.07, + "grad_norm": 0.8043506670384957, + "learning_rate": 4.999629338604919e-06, + "loss": 0.3306, + "step": 1508 + }, + { + "epoch": 0.07, + "grad_norm": 0.7289483279065602, + "learning_rate": 4.99962606575409e-06, + "loss": 0.3004, + "step": 1509 + }, + { + "epoch": 0.07, + "grad_norm": 0.7508220128921895, + "learning_rate": 4.9996227785185415e-06, + "loss": 0.3173, + "step": 1510 + }, + { + "epoch": 0.07, + "grad_norm": 0.729523572859493, + "learning_rate": 4.999619476898292e-06, + "loss": 0.3284, + "step": 1511 + }, + { + "epoch": 0.07, + "grad_norm": 0.7277889113116073, + "learning_rate": 4.999616160893361e-06, + "loss": 0.3188, + "step": 1512 + }, + { + "epoch": 0.07, + "grad_norm": 0.7708974964846851, + "learning_rate": 4.9996128305037675e-06, + "loss": 0.3216, + "step": 1513 + }, + { + "epoch": 0.07, + "grad_norm": 0.7328097206962179, + "learning_rate": 4.999609485729531e-06, + "loss": 0.3101, + "step": 1514 + }, + { + "epoch": 0.07, + "grad_norm": 0.7900625240014929, + "learning_rate": 4.999606126570669e-06, + "loss": 0.3228, + "step": 1515 + }, + { + "epoch": 0.07, + "grad_norm": 0.7664824236357681, + "learning_rate": 4.999602753027202e-06, + "loss": 0.3436, + "step": 1516 + }, + { + "epoch": 0.07, + "grad_norm": 0.7795850783777316, + "learning_rate": 4.99959936509915e-06, + "loss": 0.3146, + "step": 1517 + }, + { + "epoch": 0.07, + "grad_norm": 0.8129287625859359, + "learning_rate": 4.999595962786532e-06, + "loss": 0.3286, + "step": 1518 + }, + { + "epoch": 0.07, + "grad_norm": 0.7631296667183622, + "learning_rate": 4.9995925460893664e-06, + "loss": 0.3145, + "step": 1519 + }, + { + "epoch": 0.07, + "grad_norm": 0.7700571251955761, + "learning_rate": 4.999589115007675e-06, + "loss": 0.3089, + "step": 1520 + }, + { + "epoch": 0.07, + "grad_norm": 0.7804865653166885, + "learning_rate": 4.999585669541476e-06, + "loss": 0.3189, + "step": 1521 + }, + { + "epoch": 0.07, + "grad_norm": 0.8046219415553443, + "learning_rate": 4.99958220969079e-06, + "loss": 0.3372, + "step": 1522 + }, + { + "epoch": 0.07, + "grad_norm": 0.7821502375022974, + "learning_rate": 4.999578735455636e-06, + "loss": 0.3187, + "step": 1523 + }, + { + "epoch": 0.07, + "grad_norm": 0.7280757898553232, + "learning_rate": 4.999575246836036e-06, + "loss": 0.3161, + "step": 1524 + }, + { + "epoch": 0.07, + "grad_norm": 0.8153552072473981, + "learning_rate": 4.999571743832007e-06, + "loss": 0.3429, + "step": 1525 + }, + { + "epoch": 0.07, + "grad_norm": 0.904142585099536, + "learning_rate": 4.999568226443572e-06, + "loss": 0.3415, + "step": 1526 + }, + { + "epoch": 0.07, + "grad_norm": 0.7654763676279214, + "learning_rate": 4.99956469467075e-06, + "loss": 0.3177, + "step": 1527 + }, + { + "epoch": 0.07, + "grad_norm": 0.7520403034344559, + "learning_rate": 4.999561148513561e-06, + "loss": 0.313, + "step": 1528 + }, + { + "epoch": 0.07, + "grad_norm": 0.7447390713355699, + "learning_rate": 4.999557587972026e-06, + "loss": 0.2968, + "step": 1529 + }, + { + "epoch": 0.07, + "grad_norm": 0.7924378365296926, + "learning_rate": 4.999554013046165e-06, + "loss": 0.3085, + "step": 1530 + }, + { + "epoch": 0.07, + "grad_norm": 0.8146373815794455, + "learning_rate": 4.9995504237359994e-06, + "loss": 0.3252, + "step": 1531 + }, + { + "epoch": 0.07, + "grad_norm": 0.7184854850470002, + "learning_rate": 4.999546820041549e-06, + "loss": 0.306, + "step": 1532 + }, + { + "epoch": 0.07, + "grad_norm": 0.7785688745390072, + "learning_rate": 4.9995432019628355e-06, + "loss": 0.3494, + "step": 1533 + }, + { + "epoch": 0.07, + "grad_norm": 0.7954621537509475, + "learning_rate": 4.999539569499878e-06, + "loss": 0.3089, + "step": 1534 + }, + { + "epoch": 0.07, + "grad_norm": 0.7117172999110207, + "learning_rate": 4.9995359226527e-06, + "loss": 0.3333, + "step": 1535 + }, + { + "epoch": 0.07, + "grad_norm": 0.8265710724847547, + "learning_rate": 4.9995322614213195e-06, + "loss": 0.3233, + "step": 1536 + }, + { + "epoch": 0.07, + "grad_norm": 0.7242801056379659, + "learning_rate": 4.99952858580576e-06, + "loss": 0.3178, + "step": 1537 + }, + { + "epoch": 0.07, + "grad_norm": 0.7164278565347454, + "learning_rate": 4.999524895806042e-06, + "loss": 0.3237, + "step": 1538 + }, + { + "epoch": 0.07, + "grad_norm": 0.7180861021551446, + "learning_rate": 4.999521191422185e-06, + "loss": 0.3354, + "step": 1539 + }, + { + "epoch": 0.07, + "grad_norm": 0.6976371542360692, + "learning_rate": 4.9995174726542135e-06, + "loss": 0.3083, + "step": 1540 + }, + { + "epoch": 0.07, + "grad_norm": 0.7035713162569168, + "learning_rate": 4.999513739502147e-06, + "loss": 0.3275, + "step": 1541 + }, + { + "epoch": 0.07, + "grad_norm": 0.7501275657201921, + "learning_rate": 4.999509991966006e-06, + "loss": 0.316, + "step": 1542 + }, + { + "epoch": 0.07, + "grad_norm": 0.6853848262537231, + "learning_rate": 4.9995062300458136e-06, + "loss": 0.3156, + "step": 1543 + }, + { + "epoch": 0.07, + "grad_norm": 0.7140777882207315, + "learning_rate": 4.999502453741591e-06, + "loss": 0.3376, + "step": 1544 + }, + { + "epoch": 0.07, + "grad_norm": 0.7091259844997619, + "learning_rate": 4.999498663053361e-06, + "loss": 0.3272, + "step": 1545 + }, + { + "epoch": 0.07, + "grad_norm": 0.7557288394547415, + "learning_rate": 4.999494857981143e-06, + "loss": 0.3151, + "step": 1546 + }, + { + "epoch": 0.07, + "grad_norm": 0.7194751115015698, + "learning_rate": 4.9994910385249614e-06, + "loss": 0.295, + "step": 1547 + }, + { + "epoch": 0.07, + "grad_norm": 0.7801434553746144, + "learning_rate": 4.999487204684836e-06, + "loss": 0.3296, + "step": 1548 + }, + { + "epoch": 0.07, + "grad_norm": 0.7949960143614095, + "learning_rate": 4.999483356460791e-06, + "loss": 0.2971, + "step": 1549 + }, + { + "epoch": 0.07, + "grad_norm": 0.7577336533172907, + "learning_rate": 4.999479493852847e-06, + "loss": 0.3231, + "step": 1550 + }, + { + "epoch": 0.07, + "grad_norm": 0.8611660494149874, + "learning_rate": 4.9994756168610274e-06, + "loss": 0.3315, + "step": 1551 + }, + { + "epoch": 0.07, + "grad_norm": 0.7115035048995083, + "learning_rate": 4.999471725485353e-06, + "loss": 0.3352, + "step": 1552 + }, + { + "epoch": 0.07, + "grad_norm": 0.720803129842589, + "learning_rate": 4.9994678197258475e-06, + "loss": 0.3211, + "step": 1553 + }, + { + "epoch": 0.07, + "grad_norm": 0.8126696640878327, + "learning_rate": 4.999463899582533e-06, + "loss": 0.3217, + "step": 1554 + }, + { + "epoch": 0.07, + "grad_norm": 0.7260695689138561, + "learning_rate": 4.999459965055432e-06, + "loss": 0.3066, + "step": 1555 + }, + { + "epoch": 0.07, + "grad_norm": 0.738263222998644, + "learning_rate": 4.999456016144566e-06, + "loss": 0.3319, + "step": 1556 + }, + { + "epoch": 0.07, + "grad_norm": 0.7454447197988133, + "learning_rate": 4.99945205284996e-06, + "loss": 0.3352, + "step": 1557 + }, + { + "epoch": 0.07, + "grad_norm": 0.7676085745630984, + "learning_rate": 4.999448075171636e-06, + "loss": 0.3159, + "step": 1558 + }, + { + "epoch": 0.07, + "grad_norm": 0.7544609261141583, + "learning_rate": 4.999444083109616e-06, + "loss": 0.3262, + "step": 1559 + }, + { + "epoch": 0.07, + "grad_norm": 0.7596667273216736, + "learning_rate": 4.999440076663923e-06, + "loss": 0.344, + "step": 1560 + }, + { + "epoch": 0.07, + "grad_norm": 0.7309542676809847, + "learning_rate": 4.9994360558345815e-06, + "loss": 0.3281, + "step": 1561 + }, + { + "epoch": 0.07, + "grad_norm": 0.7050190677894967, + "learning_rate": 4.9994320206216125e-06, + "loss": 0.3012, + "step": 1562 + }, + { + "epoch": 0.07, + "grad_norm": 0.7565841770688835, + "learning_rate": 4.9994279710250415e-06, + "loss": 0.3398, + "step": 1563 + }, + { + "epoch": 0.07, + "grad_norm": 0.7108711216627874, + "learning_rate": 4.999423907044891e-06, + "loss": 0.3161, + "step": 1564 + }, + { + "epoch": 0.07, + "grad_norm": 0.7468147261953452, + "learning_rate": 4.9994198286811825e-06, + "loss": 0.3256, + "step": 1565 + }, + { + "epoch": 0.07, + "grad_norm": 0.7200574534664601, + "learning_rate": 4.999415735933943e-06, + "loss": 0.313, + "step": 1566 + }, + { + "epoch": 0.07, + "grad_norm": 0.7762689688882982, + "learning_rate": 4.999411628803192e-06, + "loss": 0.3047, + "step": 1567 + }, + { + "epoch": 0.07, + "grad_norm": 0.744724846615189, + "learning_rate": 4.999407507288957e-06, + "loss": 0.3294, + "step": 1568 + }, + { + "epoch": 0.07, + "grad_norm": 0.7251104920048113, + "learning_rate": 4.999403371391259e-06, + "loss": 0.3146, + "step": 1569 + }, + { + "epoch": 0.07, + "grad_norm": 0.6783811370715859, + "learning_rate": 4.999399221110124e-06, + "loss": 0.3045, + "step": 1570 + }, + { + "epoch": 0.07, + "grad_norm": 0.7459990808042437, + "learning_rate": 4.999395056445574e-06, + "loss": 0.3399, + "step": 1571 + }, + { + "epoch": 0.07, + "grad_norm": 0.7886185310807565, + "learning_rate": 4.9993908773976335e-06, + "loss": 0.3208, + "step": 1572 + }, + { + "epoch": 0.07, + "grad_norm": 0.6951992277417983, + "learning_rate": 4.9993866839663265e-06, + "loss": 0.3117, + "step": 1573 + }, + { + "epoch": 0.07, + "grad_norm": 0.6618623193122845, + "learning_rate": 4.999382476151678e-06, + "loss": 0.3066, + "step": 1574 + }, + { + "epoch": 0.07, + "grad_norm": 0.7156384015624929, + "learning_rate": 4.999378253953711e-06, + "loss": 0.3158, + "step": 1575 + }, + { + "epoch": 0.07, + "grad_norm": 0.6727640514860471, + "learning_rate": 4.999374017372451e-06, + "loss": 0.3077, + "step": 1576 + }, + { + "epoch": 0.07, + "grad_norm": 0.7725779480492453, + "learning_rate": 4.999369766407921e-06, + "loss": 0.3327, + "step": 1577 + }, + { + "epoch": 0.07, + "grad_norm": 0.7170565022267472, + "learning_rate": 4.999365501060147e-06, + "loss": 0.3186, + "step": 1578 + }, + { + "epoch": 0.07, + "grad_norm": 0.6685425117008461, + "learning_rate": 4.999361221329152e-06, + "loss": 0.3082, + "step": 1579 + }, + { + "epoch": 0.07, + "grad_norm": 0.7645416182266488, + "learning_rate": 4.999356927214961e-06, + "loss": 0.3472, + "step": 1580 + }, + { + "epoch": 0.07, + "grad_norm": 0.7811348475060741, + "learning_rate": 4.9993526187176e-06, + "loss": 0.3389, + "step": 1581 + }, + { + "epoch": 0.07, + "grad_norm": 0.7354481208722943, + "learning_rate": 4.999348295837092e-06, + "loss": 0.3197, + "step": 1582 + }, + { + "epoch": 0.07, + "grad_norm": 0.7578029989797281, + "learning_rate": 4.9993439585734635e-06, + "loss": 0.3224, + "step": 1583 + }, + { + "epoch": 0.07, + "grad_norm": 0.7282013324265326, + "learning_rate": 4.999339606926738e-06, + "loss": 0.3228, + "step": 1584 + }, + { + "epoch": 0.07, + "grad_norm": 0.7545225962405183, + "learning_rate": 4.999335240896942e-06, + "loss": 0.3314, + "step": 1585 + }, + { + "epoch": 0.07, + "grad_norm": 0.7736047193816242, + "learning_rate": 4.9993308604841e-06, + "loss": 0.3271, + "step": 1586 + }, + { + "epoch": 0.07, + "grad_norm": 0.7338290995817963, + "learning_rate": 4.999326465688237e-06, + "loss": 0.3345, + "step": 1587 + }, + { + "epoch": 0.07, + "grad_norm": 0.7038057895773236, + "learning_rate": 4.999322056509378e-06, + "loss": 0.3099, + "step": 1588 + }, + { + "epoch": 0.07, + "grad_norm": 0.7088622231228038, + "learning_rate": 4.999317632947549e-06, + "loss": 0.3293, + "step": 1589 + }, + { + "epoch": 0.07, + "grad_norm": 0.6909281726507206, + "learning_rate": 4.999313195002774e-06, + "loss": 0.3007, + "step": 1590 + }, + { + "epoch": 0.07, + "grad_norm": 0.7022354236752141, + "learning_rate": 4.9993087426750805e-06, + "loss": 0.3131, + "step": 1591 + }, + { + "epoch": 0.07, + "grad_norm": 0.6834875964544871, + "learning_rate": 4.999304275964494e-06, + "loss": 0.3145, + "step": 1592 + }, + { + "epoch": 0.07, + "grad_norm": 0.722314853238451, + "learning_rate": 4.999299794871039e-06, + "loss": 0.33, + "step": 1593 + }, + { + "epoch": 0.07, + "grad_norm": 0.6815492027121889, + "learning_rate": 4.9992952993947425e-06, + "loss": 0.3054, + "step": 1594 + }, + { + "epoch": 0.07, + "grad_norm": 0.7161421756622602, + "learning_rate": 4.999290789535629e-06, + "loss": 0.3058, + "step": 1595 + }, + { + "epoch": 0.07, + "grad_norm": 0.7253518530301497, + "learning_rate": 4.999286265293726e-06, + "loss": 0.315, + "step": 1596 + }, + { + "epoch": 0.07, + "grad_norm": 0.7502641728986101, + "learning_rate": 4.9992817266690575e-06, + "loss": 0.3177, + "step": 1597 + }, + { + "epoch": 0.07, + "grad_norm": 0.7478340437518278, + "learning_rate": 4.999277173661652e-06, + "loss": 0.3201, + "step": 1598 + }, + { + "epoch": 0.07, + "grad_norm": 0.7435914801876982, + "learning_rate": 4.999272606271534e-06, + "loss": 0.3479, + "step": 1599 + }, + { + "epoch": 0.07, + "grad_norm": 0.7401637723219691, + "learning_rate": 4.99926802449873e-06, + "loss": 0.3264, + "step": 1600 + }, + { + "epoch": 0.07, + "grad_norm": 0.762264834843614, + "learning_rate": 4.999263428343267e-06, + "loss": 0.3176, + "step": 1601 + }, + { + "epoch": 0.08, + "grad_norm": 0.7422250110527431, + "learning_rate": 4.999258817805171e-06, + "loss": 0.3246, + "step": 1602 + }, + { + "epoch": 0.08, + "grad_norm": 0.7618582712375128, + "learning_rate": 4.999254192884469e-06, + "loss": 0.3052, + "step": 1603 + }, + { + "epoch": 0.08, + "grad_norm": 0.7339292728809115, + "learning_rate": 4.9992495535811866e-06, + "loss": 0.3336, + "step": 1604 + }, + { + "epoch": 0.08, + "grad_norm": 0.6609183575223918, + "learning_rate": 4.999244899895352e-06, + "loss": 0.2909, + "step": 1605 + }, + { + "epoch": 0.08, + "grad_norm": 0.7717855551799784, + "learning_rate": 4.999240231826991e-06, + "loss": 0.3104, + "step": 1606 + }, + { + "epoch": 0.08, + "grad_norm": 0.770074935019334, + "learning_rate": 4.9992355493761305e-06, + "loss": 0.3089, + "step": 1607 + }, + { + "epoch": 0.08, + "grad_norm": 0.7138351605496025, + "learning_rate": 4.999230852542797e-06, + "loss": 0.3028, + "step": 1608 + }, + { + "epoch": 0.08, + "grad_norm": 0.6836608666602769, + "learning_rate": 4.999226141327019e-06, + "loss": 0.3027, + "step": 1609 + }, + { + "epoch": 0.08, + "grad_norm": 0.76264224464993, + "learning_rate": 4.9992214157288225e-06, + "loss": 0.3292, + "step": 1610 + }, + { + "epoch": 0.08, + "grad_norm": 0.718124507624867, + "learning_rate": 4.999216675748235e-06, + "loss": 0.3268, + "step": 1611 + }, + { + "epoch": 0.08, + "grad_norm": 0.7966639628941184, + "learning_rate": 4.999211921385283e-06, + "loss": 0.2875, + "step": 1612 + }, + { + "epoch": 0.08, + "grad_norm": 0.7374922881004943, + "learning_rate": 4.999207152639995e-06, + "loss": 0.3247, + "step": 1613 + }, + { + "epoch": 0.08, + "grad_norm": 0.7982808240602456, + "learning_rate": 4.999202369512398e-06, + "loss": 0.3594, + "step": 1614 + }, + { + "epoch": 0.08, + "grad_norm": 0.6767326630254633, + "learning_rate": 4.9991975720025195e-06, + "loss": 0.3101, + "step": 1615 + }, + { + "epoch": 0.08, + "grad_norm": 0.7523780266203851, + "learning_rate": 4.999192760110387e-06, + "loss": 0.357, + "step": 1616 + }, + { + "epoch": 0.08, + "grad_norm": 0.747332102240822, + "learning_rate": 4.999187933836028e-06, + "loss": 0.3115, + "step": 1617 + }, + { + "epoch": 0.08, + "grad_norm": 0.7421595288962545, + "learning_rate": 4.999183093179472e-06, + "loss": 0.3332, + "step": 1618 + }, + { + "epoch": 0.08, + "grad_norm": 0.7152668717206908, + "learning_rate": 4.999178238140744e-06, + "loss": 0.322, + "step": 1619 + }, + { + "epoch": 0.08, + "grad_norm": 0.7610367437194858, + "learning_rate": 4.999173368719874e-06, + "loss": 0.3222, + "step": 1620 + }, + { + "epoch": 0.08, + "grad_norm": 0.7477361680979185, + "learning_rate": 4.999168484916889e-06, + "loss": 0.3118, + "step": 1621 + }, + { + "epoch": 0.08, + "grad_norm": 0.7850032291318698, + "learning_rate": 4.999163586731818e-06, + "loss": 0.3441, + "step": 1622 + }, + { + "epoch": 0.08, + "grad_norm": 0.7735174823279416, + "learning_rate": 4.999158674164688e-06, + "loss": 0.3193, + "step": 1623 + }, + { + "epoch": 0.08, + "grad_norm": 0.7013413381568773, + "learning_rate": 4.999153747215529e-06, + "loss": 0.3101, + "step": 1624 + }, + { + "epoch": 0.08, + "grad_norm": 0.7214332890281085, + "learning_rate": 4.999148805884368e-06, + "loss": 0.3121, + "step": 1625 + }, + { + "epoch": 0.08, + "grad_norm": 0.8038184289519242, + "learning_rate": 4.999143850171233e-06, + "loss": 0.3484, + "step": 1626 + }, + { + "epoch": 0.08, + "grad_norm": 0.701630024366284, + "learning_rate": 4.999138880076154e-06, + "loss": 0.3094, + "step": 1627 + }, + { + "epoch": 0.08, + "grad_norm": 0.7600781660106994, + "learning_rate": 4.999133895599159e-06, + "loss": 0.3072, + "step": 1628 + }, + { + "epoch": 0.08, + "grad_norm": 0.6825130713542163, + "learning_rate": 4.999128896740275e-06, + "loss": 0.3222, + "step": 1629 + }, + { + "epoch": 0.08, + "grad_norm": 0.8048960700229559, + "learning_rate": 4.999123883499534e-06, + "loss": 0.3541, + "step": 1630 + }, + { + "epoch": 0.08, + "grad_norm": 0.7438402680391148, + "learning_rate": 4.999118855876963e-06, + "loss": 0.3298, + "step": 1631 + }, + { + "epoch": 0.08, + "grad_norm": 0.7423778919481935, + "learning_rate": 4.999113813872591e-06, + "loss": 0.3108, + "step": 1632 + }, + { + "epoch": 0.08, + "grad_norm": 0.6614086120713557, + "learning_rate": 4.999108757486447e-06, + "loss": 0.2924, + "step": 1633 + }, + { + "epoch": 0.08, + "grad_norm": 0.7610039119180538, + "learning_rate": 4.99910368671856e-06, + "loss": 0.323, + "step": 1634 + }, + { + "epoch": 0.08, + "grad_norm": 0.7972689346898589, + "learning_rate": 4.99909860156896e-06, + "loss": 0.3461, + "step": 1635 + }, + { + "epoch": 0.08, + "grad_norm": 0.778524022555661, + "learning_rate": 4.999093502037675e-06, + "loss": 0.331, + "step": 1636 + }, + { + "epoch": 0.08, + "grad_norm": 0.7563034880823084, + "learning_rate": 4.999088388124736e-06, + "loss": 0.3155, + "step": 1637 + }, + { + "epoch": 0.08, + "grad_norm": 0.7375071135183848, + "learning_rate": 4.999083259830171e-06, + "loss": 0.3363, + "step": 1638 + }, + { + "epoch": 0.08, + "grad_norm": 0.7178744669189682, + "learning_rate": 4.9990781171540095e-06, + "loss": 0.312, + "step": 1639 + }, + { + "epoch": 0.08, + "grad_norm": 0.7452885061238755, + "learning_rate": 4.999072960096281e-06, + "loss": 0.299, + "step": 1640 + }, + { + "epoch": 0.08, + "grad_norm": 0.8048486837416327, + "learning_rate": 4.999067788657017e-06, + "loss": 0.3126, + "step": 1641 + }, + { + "epoch": 0.08, + "grad_norm": 0.7078401627495173, + "learning_rate": 4.999062602836246e-06, + "loss": 0.2944, + "step": 1642 + }, + { + "epoch": 0.08, + "grad_norm": 0.7133066584199885, + "learning_rate": 4.999057402633997e-06, + "loss": 0.3225, + "step": 1643 + }, + { + "epoch": 0.08, + "grad_norm": 0.7982168545728273, + "learning_rate": 4.999052188050301e-06, + "loss": 0.3328, + "step": 1644 + }, + { + "epoch": 0.08, + "grad_norm": 0.7828734967931195, + "learning_rate": 4.9990469590851875e-06, + "loss": 0.3507, + "step": 1645 + }, + { + "epoch": 0.08, + "grad_norm": 0.7487712520083271, + "learning_rate": 4.999041715738687e-06, + "loss": 0.3117, + "step": 1646 + }, + { + "epoch": 0.08, + "grad_norm": 0.7634019631197171, + "learning_rate": 4.99903645801083e-06, + "loss": 0.308, + "step": 1647 + }, + { + "epoch": 0.08, + "grad_norm": 0.7465538390064974, + "learning_rate": 4.999031185901646e-06, + "loss": 0.3188, + "step": 1648 + }, + { + "epoch": 0.08, + "grad_norm": 0.7519035176230956, + "learning_rate": 4.999025899411166e-06, + "loss": 0.3359, + "step": 1649 + }, + { + "epoch": 0.08, + "grad_norm": 0.7284348981755598, + "learning_rate": 4.99902059853942e-06, + "loss": 0.3101, + "step": 1650 + }, + { + "epoch": 0.08, + "grad_norm": 0.7997777546980201, + "learning_rate": 4.999015283286438e-06, + "loss": 0.3253, + "step": 1651 + }, + { + "epoch": 0.08, + "grad_norm": 0.7823712572386659, + "learning_rate": 4.999009953652252e-06, + "loss": 0.3314, + "step": 1652 + }, + { + "epoch": 0.08, + "grad_norm": 0.7064302203073027, + "learning_rate": 4.999004609636891e-06, + "loss": 0.3163, + "step": 1653 + }, + { + "epoch": 0.08, + "grad_norm": 0.7166746561933975, + "learning_rate": 4.998999251240387e-06, + "loss": 0.3065, + "step": 1654 + }, + { + "epoch": 0.08, + "grad_norm": 0.6986848236222497, + "learning_rate": 4.9989938784627705e-06, + "loss": 0.3219, + "step": 1655 + }, + { + "epoch": 0.08, + "grad_norm": 0.7686442518064631, + "learning_rate": 4.998988491304072e-06, + "loss": 0.3027, + "step": 1656 + }, + { + "epoch": 0.08, + "grad_norm": 0.755893272738977, + "learning_rate": 4.998983089764323e-06, + "loss": 0.2995, + "step": 1657 + }, + { + "epoch": 0.08, + "grad_norm": 0.7018002227971664, + "learning_rate": 4.998977673843554e-06, + "loss": 0.3252, + "step": 1658 + }, + { + "epoch": 0.08, + "grad_norm": 0.7473303608512185, + "learning_rate": 4.998972243541797e-06, + "loss": 0.3098, + "step": 1659 + }, + { + "epoch": 0.08, + "grad_norm": 0.7269801935768679, + "learning_rate": 4.998966798859083e-06, + "loss": 0.3143, + "step": 1660 + }, + { + "epoch": 0.08, + "grad_norm": 0.6651439520555769, + "learning_rate": 4.998961339795443e-06, + "loss": 0.3113, + "step": 1661 + }, + { + "epoch": 0.08, + "grad_norm": 0.7279211377369268, + "learning_rate": 4.998955866350908e-06, + "loss": 0.3282, + "step": 1662 + }, + { + "epoch": 0.08, + "grad_norm": 0.8173911671620676, + "learning_rate": 4.99895037852551e-06, + "loss": 0.3609, + "step": 1663 + }, + { + "epoch": 0.08, + "grad_norm": 0.7593340599501268, + "learning_rate": 4.998944876319282e-06, + "loss": 0.3186, + "step": 1664 + }, + { + "epoch": 0.08, + "grad_norm": 0.7068837359645022, + "learning_rate": 4.998939359732253e-06, + "loss": 0.3196, + "step": 1665 + }, + { + "epoch": 0.08, + "grad_norm": 0.682231439675626, + "learning_rate": 4.998933828764457e-06, + "loss": 0.3101, + "step": 1666 + }, + { + "epoch": 0.08, + "grad_norm": 0.7521152033836275, + "learning_rate": 4.998928283415924e-06, + "loss": 0.3275, + "step": 1667 + }, + { + "epoch": 0.08, + "grad_norm": 0.7693914390148405, + "learning_rate": 4.998922723686688e-06, + "loss": 0.3314, + "step": 1668 + }, + { + "epoch": 0.08, + "grad_norm": 0.7366946871456527, + "learning_rate": 4.998917149576779e-06, + "loss": 0.3171, + "step": 1669 + }, + { + "epoch": 0.08, + "grad_norm": 0.845667812934547, + "learning_rate": 4.99891156108623e-06, + "loss": 0.3451, + "step": 1670 + }, + { + "epoch": 0.08, + "grad_norm": 0.7178654067965274, + "learning_rate": 4.998905958215073e-06, + "loss": 0.2903, + "step": 1671 + }, + { + "epoch": 0.08, + "grad_norm": 0.7066536466637067, + "learning_rate": 4.99890034096334e-06, + "loss": 0.2995, + "step": 1672 + }, + { + "epoch": 0.08, + "grad_norm": 0.7456870650257225, + "learning_rate": 4.998894709331064e-06, + "loss": 0.3086, + "step": 1673 + }, + { + "epoch": 0.08, + "grad_norm": 0.7772952969313207, + "learning_rate": 4.998889063318277e-06, + "loss": 0.3154, + "step": 1674 + }, + { + "epoch": 0.08, + "grad_norm": 0.7819113104871929, + "learning_rate": 4.998883402925012e-06, + "loss": 0.3223, + "step": 1675 + }, + { + "epoch": 0.08, + "grad_norm": 0.7385555654934371, + "learning_rate": 4.998877728151301e-06, + "loss": 0.3289, + "step": 1676 + }, + { + "epoch": 0.08, + "grad_norm": 0.812804597323901, + "learning_rate": 4.9988720389971764e-06, + "loss": 0.3267, + "step": 1677 + }, + { + "epoch": 0.08, + "grad_norm": 0.6830459663416636, + "learning_rate": 4.998866335462671e-06, + "loss": 0.3006, + "step": 1678 + }, + { + "epoch": 0.08, + "grad_norm": 0.7057038157621082, + "learning_rate": 4.998860617547818e-06, + "loss": 0.3065, + "step": 1679 + }, + { + "epoch": 0.08, + "grad_norm": 0.7517042964753557, + "learning_rate": 4.998854885252651e-06, + "loss": 0.342, + "step": 1680 + }, + { + "epoch": 0.08, + "grad_norm": 0.7995839884466274, + "learning_rate": 4.998849138577201e-06, + "loss": 0.3209, + "step": 1681 + }, + { + "epoch": 0.08, + "grad_norm": 0.8667777013902617, + "learning_rate": 4.9988433775215025e-06, + "loss": 0.3242, + "step": 1682 + }, + { + "epoch": 0.08, + "grad_norm": 0.718172060955515, + "learning_rate": 4.998837602085589e-06, + "loss": 0.3124, + "step": 1683 + }, + { + "epoch": 0.08, + "grad_norm": 0.6412282870607331, + "learning_rate": 4.998831812269493e-06, + "loss": 0.2947, + "step": 1684 + }, + { + "epoch": 0.08, + "grad_norm": 0.7388983029115189, + "learning_rate": 4.998826008073248e-06, + "loss": 0.3257, + "step": 1685 + }, + { + "epoch": 0.08, + "grad_norm": 0.7843335896066234, + "learning_rate": 4.998820189496887e-06, + "loss": 0.3213, + "step": 1686 + }, + { + "epoch": 0.08, + "grad_norm": 0.7473124221595492, + "learning_rate": 4.998814356540445e-06, + "loss": 0.3196, + "step": 1687 + }, + { + "epoch": 0.08, + "grad_norm": 0.698651836867668, + "learning_rate": 4.9988085092039526e-06, + "loss": 0.2936, + "step": 1688 + }, + { + "epoch": 0.08, + "grad_norm": 0.6950129573704931, + "learning_rate": 4.9988026474874466e-06, + "loss": 0.3177, + "step": 1689 + }, + { + "epoch": 0.08, + "grad_norm": 0.7187400923327444, + "learning_rate": 4.998796771390959e-06, + "loss": 0.3354, + "step": 1690 + }, + { + "epoch": 0.08, + "grad_norm": 0.7866352982702991, + "learning_rate": 4.998790880914524e-06, + "loss": 0.3214, + "step": 1691 + }, + { + "epoch": 0.08, + "grad_norm": 0.7717076429244231, + "learning_rate": 4.998784976058175e-06, + "loss": 0.3509, + "step": 1692 + }, + { + "epoch": 0.08, + "grad_norm": 0.7367159944515886, + "learning_rate": 4.998779056821948e-06, + "loss": 0.3127, + "step": 1693 + }, + { + "epoch": 0.08, + "grad_norm": 0.7898077593407784, + "learning_rate": 4.998773123205874e-06, + "loss": 0.3282, + "step": 1694 + }, + { + "epoch": 0.08, + "grad_norm": 0.702623986236606, + "learning_rate": 4.99876717520999e-06, + "loss": 0.3236, + "step": 1695 + }, + { + "epoch": 0.08, + "grad_norm": 0.7459675042006468, + "learning_rate": 4.998761212834328e-06, + "loss": 0.3, + "step": 1696 + }, + { + "epoch": 0.08, + "grad_norm": 0.8257675007262679, + "learning_rate": 4.998755236078924e-06, + "loss": 0.3285, + "step": 1697 + }, + { + "epoch": 0.08, + "grad_norm": 0.7583060387154643, + "learning_rate": 4.998749244943811e-06, + "loss": 0.3237, + "step": 1698 + }, + { + "epoch": 0.08, + "grad_norm": 0.7081598458468251, + "learning_rate": 4.998743239429024e-06, + "loss": 0.3392, + "step": 1699 + }, + { + "epoch": 0.08, + "grad_norm": 0.7348255641070603, + "learning_rate": 4.998737219534599e-06, + "loss": 0.3175, + "step": 1700 + }, + { + "epoch": 0.08, + "grad_norm": 0.671216471334538, + "learning_rate": 4.998731185260568e-06, + "loss": 0.313, + "step": 1701 + }, + { + "epoch": 0.08, + "grad_norm": 0.7277886541867677, + "learning_rate": 4.9987251366069675e-06, + "loss": 0.306, + "step": 1702 + }, + { + "epoch": 0.08, + "grad_norm": 0.6416974430697506, + "learning_rate": 4.998719073573832e-06, + "loss": 0.286, + "step": 1703 + }, + { + "epoch": 0.08, + "grad_norm": 0.7179044539112954, + "learning_rate": 4.998712996161196e-06, + "loss": 0.3231, + "step": 1704 + }, + { + "epoch": 0.08, + "grad_norm": 0.681990052502104, + "learning_rate": 4.998706904369095e-06, + "loss": 0.3222, + "step": 1705 + }, + { + "epoch": 0.08, + "grad_norm": 0.7266064387011337, + "learning_rate": 4.9987007981975635e-06, + "loss": 0.3098, + "step": 1706 + }, + { + "epoch": 0.08, + "grad_norm": 0.7001457463251953, + "learning_rate": 4.998694677646637e-06, + "loss": 0.284, + "step": 1707 + }, + { + "epoch": 0.08, + "grad_norm": 0.7248509122251477, + "learning_rate": 4.99868854271635e-06, + "loss": 0.3158, + "step": 1708 + }, + { + "epoch": 0.08, + "grad_norm": 0.799168287325932, + "learning_rate": 4.998682393406739e-06, + "loss": 0.3545, + "step": 1709 + }, + { + "epoch": 0.08, + "grad_norm": 0.7620824972386876, + "learning_rate": 4.99867622971784e-06, + "loss": 0.3214, + "step": 1710 + }, + { + "epoch": 0.08, + "grad_norm": 0.7007858773476957, + "learning_rate": 4.998670051649686e-06, + "loss": 0.3282, + "step": 1711 + }, + { + "epoch": 0.08, + "grad_norm": 0.7624661827813749, + "learning_rate": 4.998663859202314e-06, + "loss": 0.345, + "step": 1712 + }, + { + "epoch": 0.08, + "grad_norm": 0.698826217843581, + "learning_rate": 4.99865765237576e-06, + "loss": 0.3119, + "step": 1713 + }, + { + "epoch": 0.08, + "grad_norm": 0.7414525308834549, + "learning_rate": 4.9986514311700594e-06, + "loss": 0.3161, + "step": 1714 + }, + { + "epoch": 0.08, + "grad_norm": 0.7546880433207858, + "learning_rate": 4.998645195585247e-06, + "loss": 0.3189, + "step": 1715 + }, + { + "epoch": 0.08, + "grad_norm": 0.7087605039097468, + "learning_rate": 4.9986389456213605e-06, + "loss": 0.3293, + "step": 1716 + }, + { + "epoch": 0.08, + "grad_norm": 0.6806453547338477, + "learning_rate": 4.9986326812784334e-06, + "loss": 0.301, + "step": 1717 + }, + { + "epoch": 0.08, + "grad_norm": 0.8185632363733483, + "learning_rate": 4.998626402556505e-06, + "loss": 0.3331, + "step": 1718 + }, + { + "epoch": 0.08, + "grad_norm": 0.7521392200791931, + "learning_rate": 4.998620109455608e-06, + "loss": 0.3104, + "step": 1719 + }, + { + "epoch": 0.08, + "grad_norm": 0.7303717397137349, + "learning_rate": 4.998613801975781e-06, + "loss": 0.3243, + "step": 1720 + }, + { + "epoch": 0.08, + "grad_norm": 0.6730159394452321, + "learning_rate": 4.99860748011706e-06, + "loss": 0.2941, + "step": 1721 + }, + { + "epoch": 0.08, + "grad_norm": 0.7419018851209224, + "learning_rate": 4.9986011438794806e-06, + "loss": 0.3058, + "step": 1722 + }, + { + "epoch": 0.08, + "grad_norm": 0.8771733096100495, + "learning_rate": 4.99859479326308e-06, + "loss": 0.3333, + "step": 1723 + }, + { + "epoch": 0.08, + "grad_norm": 0.7110613093535557, + "learning_rate": 4.9985884282678935e-06, + "loss": 0.3292, + "step": 1724 + }, + { + "epoch": 0.08, + "grad_norm": 0.7620087127352067, + "learning_rate": 4.9985820488939586e-06, + "loss": 0.3137, + "step": 1725 + }, + { + "epoch": 0.08, + "grad_norm": 0.8161361739283891, + "learning_rate": 4.998575655141312e-06, + "loss": 0.3184, + "step": 1726 + }, + { + "epoch": 0.08, + "grad_norm": 0.8586064027191836, + "learning_rate": 4.998569247009991e-06, + "loss": 0.3545, + "step": 1727 + }, + { + "epoch": 0.08, + "grad_norm": 0.8102411244562121, + "learning_rate": 4.998562824500032e-06, + "loss": 0.3515, + "step": 1728 + }, + { + "epoch": 0.08, + "grad_norm": 0.7897331926965182, + "learning_rate": 4.998556387611472e-06, + "loss": 0.3262, + "step": 1729 + }, + { + "epoch": 0.08, + "grad_norm": 0.7827400563794741, + "learning_rate": 4.998549936344348e-06, + "loss": 0.3297, + "step": 1730 + }, + { + "epoch": 0.08, + "grad_norm": 0.7545982491087264, + "learning_rate": 4.998543470698697e-06, + "loss": 0.3266, + "step": 1731 + }, + { + "epoch": 0.08, + "grad_norm": 0.7003527032758037, + "learning_rate": 4.998536990674556e-06, + "loss": 0.3189, + "step": 1732 + }, + { + "epoch": 0.08, + "grad_norm": 0.697372252464276, + "learning_rate": 4.998530496271963e-06, + "loss": 0.3155, + "step": 1733 + }, + { + "epoch": 0.08, + "grad_norm": 0.6960140155835915, + "learning_rate": 4.998523987490955e-06, + "loss": 0.3237, + "step": 1734 + }, + { + "epoch": 0.08, + "grad_norm": 0.6686493589477254, + "learning_rate": 4.998517464331569e-06, + "loss": 0.289, + "step": 1735 + }, + { + "epoch": 0.08, + "grad_norm": 0.7414978661150851, + "learning_rate": 4.998510926793844e-06, + "loss": 0.3329, + "step": 1736 + }, + { + "epoch": 0.08, + "grad_norm": 0.7673726921119707, + "learning_rate": 4.998504374877815e-06, + "loss": 0.3223, + "step": 1737 + }, + { + "epoch": 0.08, + "grad_norm": 0.6799599923184064, + "learning_rate": 4.998497808583522e-06, + "loss": 0.2776, + "step": 1738 + }, + { + "epoch": 0.08, + "grad_norm": 0.6879388426320046, + "learning_rate": 4.998491227911002e-06, + "loss": 0.3033, + "step": 1739 + }, + { + "epoch": 0.08, + "grad_norm": 0.7548048546962131, + "learning_rate": 4.998484632860293e-06, + "loss": 0.3172, + "step": 1740 + }, + { + "epoch": 0.08, + "grad_norm": 0.713617913934807, + "learning_rate": 4.998478023431434e-06, + "loss": 0.3085, + "step": 1741 + }, + { + "epoch": 0.08, + "grad_norm": 0.7690454012480247, + "learning_rate": 4.99847139962446e-06, + "loss": 0.298, + "step": 1742 + }, + { + "epoch": 0.08, + "grad_norm": 0.6950314898943085, + "learning_rate": 4.998464761439413e-06, + "loss": 0.3043, + "step": 1743 + }, + { + "epoch": 0.08, + "grad_norm": 0.7075000971046799, + "learning_rate": 4.998458108876328e-06, + "loss": 0.3103, + "step": 1744 + }, + { + "epoch": 0.08, + "grad_norm": 0.7798423173993134, + "learning_rate": 4.998451441935245e-06, + "loss": 0.344, + "step": 1745 + }, + { + "epoch": 0.08, + "grad_norm": 0.7452003721000116, + "learning_rate": 4.998444760616201e-06, + "loss": 0.3252, + "step": 1746 + }, + { + "epoch": 0.08, + "grad_norm": 0.6679886897551953, + "learning_rate": 4.998438064919237e-06, + "loss": 0.2998, + "step": 1747 + }, + { + "epoch": 0.08, + "grad_norm": 0.7206542598810673, + "learning_rate": 4.998431354844389e-06, + "loss": 0.3031, + "step": 1748 + }, + { + "epoch": 0.08, + "grad_norm": 0.7509049192178332, + "learning_rate": 4.998424630391696e-06, + "loss": 0.3183, + "step": 1749 + }, + { + "epoch": 0.08, + "grad_norm": 0.7546458022981962, + "learning_rate": 4.998417891561198e-06, + "loss": 0.3222, + "step": 1750 + }, + { + "epoch": 0.08, + "grad_norm": 0.6855404395700321, + "learning_rate": 4.998411138352933e-06, + "loss": 0.3101, + "step": 1751 + }, + { + "epoch": 0.08, + "grad_norm": 0.7238018142407787, + "learning_rate": 4.998404370766939e-06, + "loss": 0.3308, + "step": 1752 + }, + { + "epoch": 0.08, + "grad_norm": 0.7056564201596913, + "learning_rate": 4.998397588803256e-06, + "loss": 0.3107, + "step": 1753 + }, + { + "epoch": 0.08, + "grad_norm": 0.7632406240763014, + "learning_rate": 4.998390792461924e-06, + "loss": 0.3327, + "step": 1754 + }, + { + "epoch": 0.08, + "grad_norm": 0.7295120925574667, + "learning_rate": 4.9983839817429806e-06, + "loss": 0.3368, + "step": 1755 + }, + { + "epoch": 0.08, + "grad_norm": 0.7802718964125281, + "learning_rate": 4.998377156646465e-06, + "loss": 0.3168, + "step": 1756 + }, + { + "epoch": 0.08, + "grad_norm": 0.7057602056148283, + "learning_rate": 4.998370317172416e-06, + "loss": 0.3361, + "step": 1757 + }, + { + "epoch": 0.08, + "grad_norm": 0.7434180035006607, + "learning_rate": 4.998363463320874e-06, + "loss": 0.3106, + "step": 1758 + }, + { + "epoch": 0.08, + "grad_norm": 0.8954340677926301, + "learning_rate": 4.99835659509188e-06, + "loss": 0.3356, + "step": 1759 + }, + { + "epoch": 0.08, + "grad_norm": 0.7154793462938862, + "learning_rate": 4.998349712485469e-06, + "loss": 0.309, + "step": 1760 + }, + { + "epoch": 0.08, + "grad_norm": 0.7480020898829135, + "learning_rate": 4.998342815501686e-06, + "loss": 0.3204, + "step": 1761 + }, + { + "epoch": 0.08, + "grad_norm": 0.7163655607771257, + "learning_rate": 4.998335904140567e-06, + "loss": 0.3272, + "step": 1762 + }, + { + "epoch": 0.08, + "grad_norm": 0.7759190883810828, + "learning_rate": 4.998328978402152e-06, + "loss": 0.337, + "step": 1763 + }, + { + "epoch": 0.08, + "grad_norm": 0.7267166675328236, + "learning_rate": 4.9983220382864815e-06, + "loss": 0.2968, + "step": 1764 + }, + { + "epoch": 0.08, + "grad_norm": 0.7808124316361885, + "learning_rate": 4.998315083793597e-06, + "loss": 0.3239, + "step": 1765 + }, + { + "epoch": 0.08, + "grad_norm": 0.7935108783025467, + "learning_rate": 4.998308114923537e-06, + "loss": 0.3259, + "step": 1766 + }, + { + "epoch": 0.08, + "grad_norm": 0.7987650415729342, + "learning_rate": 4.998301131676341e-06, + "loss": 0.3263, + "step": 1767 + }, + { + "epoch": 0.08, + "grad_norm": 0.72807329199965, + "learning_rate": 4.9982941340520495e-06, + "loss": 0.3333, + "step": 1768 + }, + { + "epoch": 0.08, + "grad_norm": 0.7323147289935289, + "learning_rate": 4.998287122050704e-06, + "loss": 0.3261, + "step": 1769 + }, + { + "epoch": 0.08, + "grad_norm": 0.7242895678071853, + "learning_rate": 4.998280095672344e-06, + "loss": 0.3193, + "step": 1770 + }, + { + "epoch": 0.08, + "grad_norm": 0.7098091601813346, + "learning_rate": 4.99827305491701e-06, + "loss": 0.3403, + "step": 1771 + }, + { + "epoch": 0.08, + "grad_norm": 0.7367894092366094, + "learning_rate": 4.998265999784741e-06, + "loss": 0.3252, + "step": 1772 + }, + { + "epoch": 0.08, + "grad_norm": 0.7291537995218673, + "learning_rate": 4.998258930275581e-06, + "loss": 0.3172, + "step": 1773 + }, + { + "epoch": 0.08, + "grad_norm": 0.6870567079778375, + "learning_rate": 4.998251846389568e-06, + "loss": 0.3322, + "step": 1774 + }, + { + "epoch": 0.08, + "grad_norm": 0.7474369591725489, + "learning_rate": 4.998244748126744e-06, + "loss": 0.298, + "step": 1775 + }, + { + "epoch": 0.08, + "grad_norm": 0.7830366462232454, + "learning_rate": 4.998237635487149e-06, + "loss": 0.3519, + "step": 1776 + }, + { + "epoch": 0.08, + "grad_norm": 0.76818945844856, + "learning_rate": 4.998230508470824e-06, + "loss": 0.3322, + "step": 1777 + }, + { + "epoch": 0.08, + "grad_norm": 0.7109054411232639, + "learning_rate": 4.9982233670778114e-06, + "loss": 0.3274, + "step": 1778 + }, + { + "epoch": 0.08, + "grad_norm": 0.6986536477620593, + "learning_rate": 4.99821621130815e-06, + "loss": 0.3236, + "step": 1779 + }, + { + "epoch": 0.08, + "grad_norm": 0.7155236486233788, + "learning_rate": 4.998209041161883e-06, + "loss": 0.3102, + "step": 1780 + }, + { + "epoch": 0.08, + "grad_norm": 0.6801665064335161, + "learning_rate": 4.998201856639051e-06, + "loss": 0.3179, + "step": 1781 + }, + { + "epoch": 0.08, + "grad_norm": 0.7699799523838893, + "learning_rate": 4.998194657739695e-06, + "loss": 0.3387, + "step": 1782 + }, + { + "epoch": 0.08, + "grad_norm": 0.7606229931649146, + "learning_rate": 4.998187444463856e-06, + "loss": 0.328, + "step": 1783 + }, + { + "epoch": 0.08, + "grad_norm": 0.7139241231626434, + "learning_rate": 4.998180216811576e-06, + "loss": 0.3249, + "step": 1784 + }, + { + "epoch": 0.08, + "grad_norm": 0.7159153957303732, + "learning_rate": 4.998172974782898e-06, + "loss": 0.32, + "step": 1785 + }, + { + "epoch": 0.08, + "grad_norm": 0.7088580650004593, + "learning_rate": 4.998165718377862e-06, + "loss": 0.3247, + "step": 1786 + }, + { + "epoch": 0.08, + "grad_norm": 0.7077902237615663, + "learning_rate": 4.99815844759651e-06, + "loss": 0.2995, + "step": 1787 + }, + { + "epoch": 0.08, + "grad_norm": 0.7324174537367453, + "learning_rate": 4.998151162438884e-06, + "loss": 0.3106, + "step": 1788 + }, + { + "epoch": 0.08, + "grad_norm": 0.7263458637467279, + "learning_rate": 4.998143862905025e-06, + "loss": 0.3213, + "step": 1789 + }, + { + "epoch": 0.08, + "grad_norm": 0.7718814672642494, + "learning_rate": 4.998136548994977e-06, + "loss": 0.3161, + "step": 1790 + }, + { + "epoch": 0.08, + "grad_norm": 0.6959745622776874, + "learning_rate": 4.998129220708781e-06, + "loss": 0.318, + "step": 1791 + }, + { + "epoch": 0.08, + "grad_norm": 0.698710157275822, + "learning_rate": 4.998121878046479e-06, + "loss": 0.3104, + "step": 1792 + }, + { + "epoch": 0.08, + "grad_norm": 0.7351381021236111, + "learning_rate": 4.998114521008114e-06, + "loss": 0.3315, + "step": 1793 + }, + { + "epoch": 0.08, + "grad_norm": 0.7466049061178303, + "learning_rate": 4.998107149593727e-06, + "loss": 0.3333, + "step": 1794 + }, + { + "epoch": 0.08, + "grad_norm": 0.6928736768465505, + "learning_rate": 4.998099763803362e-06, + "loss": 0.3115, + "step": 1795 + }, + { + "epoch": 0.08, + "grad_norm": 0.6958381477494567, + "learning_rate": 4.99809236363706e-06, + "loss": 0.3035, + "step": 1796 + }, + { + "epoch": 0.08, + "grad_norm": 0.7602021574577486, + "learning_rate": 4.998084949094864e-06, + "loss": 0.3259, + "step": 1797 + }, + { + "epoch": 0.08, + "grad_norm": 0.6927924823999699, + "learning_rate": 4.998077520176818e-06, + "loss": 0.3103, + "step": 1798 + }, + { + "epoch": 0.08, + "grad_norm": 0.7498158760171405, + "learning_rate": 4.998070076882964e-06, + "loss": 0.336, + "step": 1799 + }, + { + "epoch": 0.08, + "grad_norm": 0.7453694620699397, + "learning_rate": 4.998062619213344e-06, + "loss": 0.326, + "step": 1800 + }, + { + "epoch": 0.08, + "grad_norm": 0.8068477086252633, + "learning_rate": 4.998055147168002e-06, + "loss": 0.3258, + "step": 1801 + }, + { + "epoch": 0.08, + "grad_norm": 0.6955121679811483, + "learning_rate": 4.99804766074698e-06, + "loss": 0.3047, + "step": 1802 + }, + { + "epoch": 0.08, + "grad_norm": 0.7262856926709291, + "learning_rate": 4.998040159950322e-06, + "loss": 0.3329, + "step": 1803 + }, + { + "epoch": 0.08, + "grad_norm": 0.6959419974806004, + "learning_rate": 4.998032644778069e-06, + "loss": 0.3158, + "step": 1804 + }, + { + "epoch": 0.08, + "grad_norm": 0.7187913266130985, + "learning_rate": 4.998025115230268e-06, + "loss": 0.315, + "step": 1805 + }, + { + "epoch": 0.08, + "grad_norm": 0.7366374624629235, + "learning_rate": 4.998017571306959e-06, + "loss": 0.2924, + "step": 1806 + }, + { + "epoch": 0.08, + "grad_norm": 0.7780051659615007, + "learning_rate": 4.9980100130081875e-06, + "loss": 0.322, + "step": 1807 + }, + { + "epoch": 0.08, + "grad_norm": 0.735962223319754, + "learning_rate": 4.998002440333996e-06, + "loss": 0.3041, + "step": 1808 + }, + { + "epoch": 0.08, + "grad_norm": 0.6795134987475824, + "learning_rate": 4.997994853284429e-06, + "loss": 0.3063, + "step": 1809 + }, + { + "epoch": 0.08, + "grad_norm": 0.6826684435710839, + "learning_rate": 4.997987251859528e-06, + "loss": 0.3128, + "step": 1810 + }, + { + "epoch": 0.08, + "grad_norm": 0.6823867138381928, + "learning_rate": 4.997979636059339e-06, + "loss": 0.3259, + "step": 1811 + }, + { + "epoch": 0.08, + "grad_norm": 0.7187396126467649, + "learning_rate": 4.997972005883905e-06, + "loss": 0.3303, + "step": 1812 + }, + { + "epoch": 0.08, + "grad_norm": 0.7243086563802056, + "learning_rate": 4.997964361333271e-06, + "loss": 0.3233, + "step": 1813 + }, + { + "epoch": 0.08, + "grad_norm": 0.7026059949246819, + "learning_rate": 4.997956702407478e-06, + "loss": 0.3241, + "step": 1814 + }, + { + "epoch": 0.09, + "grad_norm": 0.7479737242754531, + "learning_rate": 4.997949029106574e-06, + "loss": 0.3425, + "step": 1815 + }, + { + "epoch": 0.09, + "grad_norm": 0.8071354633995583, + "learning_rate": 4.997941341430599e-06, + "loss": 0.3341, + "step": 1816 + }, + { + "epoch": 0.09, + "grad_norm": 0.7533007935571685, + "learning_rate": 4.997933639379601e-06, + "loss": 0.3236, + "step": 1817 + }, + { + "epoch": 0.09, + "grad_norm": 0.7471213322037267, + "learning_rate": 4.9979259229536226e-06, + "loss": 0.3031, + "step": 1818 + }, + { + "epoch": 0.09, + "grad_norm": 0.7469831232903273, + "learning_rate": 4.997918192152707e-06, + "loss": 0.3242, + "step": 1819 + }, + { + "epoch": 0.09, + "grad_norm": 0.6691807061984908, + "learning_rate": 4.997910446976902e-06, + "loss": 0.3039, + "step": 1820 + }, + { + "epoch": 0.09, + "grad_norm": 0.6715047763051905, + "learning_rate": 4.997902687426249e-06, + "loss": 0.2996, + "step": 1821 + }, + { + "epoch": 0.09, + "grad_norm": 0.7139486870455246, + "learning_rate": 4.997894913500794e-06, + "loss": 0.3095, + "step": 1822 + }, + { + "epoch": 0.09, + "grad_norm": 0.7963261280517057, + "learning_rate": 4.997887125200582e-06, + "loss": 0.3223, + "step": 1823 + }, + { + "epoch": 0.09, + "grad_norm": 0.7668077363746179, + "learning_rate": 4.997879322525657e-06, + "loss": 0.3142, + "step": 1824 + }, + { + "epoch": 0.09, + "grad_norm": 0.7043956208805083, + "learning_rate": 4.997871505476064e-06, + "loss": 0.3076, + "step": 1825 + }, + { + "epoch": 0.09, + "grad_norm": 0.6809975385236566, + "learning_rate": 4.99786367405185e-06, + "loss": 0.3101, + "step": 1826 + }, + { + "epoch": 0.09, + "grad_norm": 0.778689259579579, + "learning_rate": 4.9978558282530565e-06, + "loss": 0.3115, + "step": 1827 + }, + { + "epoch": 0.09, + "grad_norm": 0.6838314546241825, + "learning_rate": 4.9978479680797305e-06, + "loss": 0.3072, + "step": 1828 + }, + { + "epoch": 0.09, + "grad_norm": 0.7190708957130343, + "learning_rate": 4.997840093531918e-06, + "loss": 0.3159, + "step": 1829 + }, + { + "epoch": 0.09, + "grad_norm": 0.7296557020481241, + "learning_rate": 4.997832204609664e-06, + "loss": 0.3293, + "step": 1830 + }, + { + "epoch": 0.09, + "grad_norm": 0.7415202359639985, + "learning_rate": 4.997824301313013e-06, + "loss": 0.3177, + "step": 1831 + }, + { + "epoch": 0.09, + "grad_norm": 0.6715278100003642, + "learning_rate": 4.997816383642012e-06, + "loss": 0.2917, + "step": 1832 + }, + { + "epoch": 0.09, + "grad_norm": 0.7219307230041185, + "learning_rate": 4.9978084515967035e-06, + "loss": 0.3166, + "step": 1833 + }, + { + "epoch": 0.09, + "grad_norm": 0.7188266363425205, + "learning_rate": 4.997800505177136e-06, + "loss": 0.3166, + "step": 1834 + }, + { + "epoch": 0.09, + "grad_norm": 0.7775991769574809, + "learning_rate": 4.997792544383354e-06, + "loss": 0.3458, + "step": 1835 + }, + { + "epoch": 0.09, + "grad_norm": 0.7098277819200154, + "learning_rate": 4.997784569215405e-06, + "loss": 0.3208, + "step": 1836 + }, + { + "epoch": 0.09, + "grad_norm": 0.7039087094141674, + "learning_rate": 4.997776579673332e-06, + "loss": 0.3056, + "step": 1837 + }, + { + "epoch": 0.09, + "grad_norm": 0.6895476967375671, + "learning_rate": 4.997768575757184e-06, + "loss": 0.3229, + "step": 1838 + }, + { + "epoch": 0.09, + "grad_norm": 0.7282488955405598, + "learning_rate": 4.997760557467005e-06, + "loss": 0.3411, + "step": 1839 + }, + { + "epoch": 0.09, + "grad_norm": 0.7111674495871648, + "learning_rate": 4.9977525248028415e-06, + "loss": 0.293, + "step": 1840 + }, + { + "epoch": 0.09, + "grad_norm": 0.7001478017670746, + "learning_rate": 4.99774447776474e-06, + "loss": 0.3185, + "step": 1841 + }, + { + "epoch": 0.09, + "grad_norm": 0.7386876078647513, + "learning_rate": 4.997736416352747e-06, + "loss": 0.3294, + "step": 1842 + }, + { + "epoch": 0.09, + "grad_norm": 0.7131410596494243, + "learning_rate": 4.997728340566909e-06, + "loss": 0.3088, + "step": 1843 + }, + { + "epoch": 0.09, + "grad_norm": 0.7554464345794174, + "learning_rate": 4.997720250407272e-06, + "loss": 0.3232, + "step": 1844 + }, + { + "epoch": 0.09, + "grad_norm": 0.7118736026163497, + "learning_rate": 4.997712145873883e-06, + "loss": 0.3355, + "step": 1845 + }, + { + "epoch": 0.09, + "grad_norm": 0.6931751775850544, + "learning_rate": 4.997704026966788e-06, + "loss": 0.3041, + "step": 1846 + }, + { + "epoch": 0.09, + "grad_norm": 0.7076553211120602, + "learning_rate": 4.997695893686034e-06, + "loss": 0.3203, + "step": 1847 + }, + { + "epoch": 0.09, + "grad_norm": 0.743248104282541, + "learning_rate": 4.997687746031669e-06, + "loss": 0.3065, + "step": 1848 + }, + { + "epoch": 0.09, + "grad_norm": 0.6857642940206522, + "learning_rate": 4.997679584003737e-06, + "loss": 0.2877, + "step": 1849 + }, + { + "epoch": 0.09, + "grad_norm": 0.7267861614460097, + "learning_rate": 4.9976714076022885e-06, + "loss": 0.3027, + "step": 1850 + }, + { + "epoch": 0.09, + "grad_norm": 0.6699607370263757, + "learning_rate": 4.997663216827367e-06, + "loss": 0.3041, + "step": 1851 + }, + { + "epoch": 0.09, + "grad_norm": 0.6911567970506886, + "learning_rate": 4.997655011679022e-06, + "loss": 0.3187, + "step": 1852 + }, + { + "epoch": 0.09, + "grad_norm": 0.7088719665933794, + "learning_rate": 4.997646792157301e-06, + "loss": 0.3051, + "step": 1853 + }, + { + "epoch": 0.09, + "grad_norm": 0.6639722550318254, + "learning_rate": 4.9976385582622496e-06, + "loss": 0.3168, + "step": 1854 + }, + { + "epoch": 0.09, + "grad_norm": 0.6526392683521721, + "learning_rate": 4.997630309993917e-06, + "loss": 0.2916, + "step": 1855 + }, + { + "epoch": 0.09, + "grad_norm": 0.6822839071237815, + "learning_rate": 4.99762204735235e-06, + "loss": 0.3091, + "step": 1856 + }, + { + "epoch": 0.09, + "grad_norm": 0.7366819568090066, + "learning_rate": 4.997613770337595e-06, + "loss": 0.3192, + "step": 1857 + }, + { + "epoch": 0.09, + "grad_norm": 0.7033342995649723, + "learning_rate": 4.9976054789497e-06, + "loss": 0.3006, + "step": 1858 + }, + { + "epoch": 0.09, + "grad_norm": 0.7056612060017199, + "learning_rate": 4.997597173188714e-06, + "loss": 0.3359, + "step": 1859 + }, + { + "epoch": 0.09, + "grad_norm": 0.7401241781969261, + "learning_rate": 4.997588853054683e-06, + "loss": 0.3362, + "step": 1860 + }, + { + "epoch": 0.09, + "grad_norm": 0.7269733388695065, + "learning_rate": 4.997580518547658e-06, + "loss": 0.3323, + "step": 1861 + }, + { + "epoch": 0.09, + "grad_norm": 0.7413527685377155, + "learning_rate": 4.9975721696676834e-06, + "loss": 0.3459, + "step": 1862 + }, + { + "epoch": 0.09, + "grad_norm": 0.681558143450639, + "learning_rate": 4.9975638064148086e-06, + "loss": 0.3082, + "step": 1863 + }, + { + "epoch": 0.09, + "grad_norm": 0.6930896963455039, + "learning_rate": 4.997555428789082e-06, + "loss": 0.3294, + "step": 1864 + }, + { + "epoch": 0.09, + "grad_norm": 0.6606396190758275, + "learning_rate": 4.997547036790552e-06, + "loss": 0.3255, + "step": 1865 + }, + { + "epoch": 0.09, + "grad_norm": 0.7297307947405649, + "learning_rate": 4.997538630419267e-06, + "loss": 0.333, + "step": 1866 + }, + { + "epoch": 0.09, + "grad_norm": 0.7329292581732605, + "learning_rate": 4.997530209675273e-06, + "loss": 0.3385, + "step": 1867 + }, + { + "epoch": 0.09, + "grad_norm": 0.7424093867254915, + "learning_rate": 4.997521774558622e-06, + "loss": 0.3108, + "step": 1868 + }, + { + "epoch": 0.09, + "grad_norm": 0.7188352838783755, + "learning_rate": 4.997513325069361e-06, + "loss": 0.3055, + "step": 1869 + }, + { + "epoch": 0.09, + "grad_norm": 0.666567765815909, + "learning_rate": 4.997504861207538e-06, + "loss": 0.3111, + "step": 1870 + }, + { + "epoch": 0.09, + "grad_norm": 0.8232753568530948, + "learning_rate": 4.997496382973202e-06, + "loss": 0.3287, + "step": 1871 + }, + { + "epoch": 0.09, + "grad_norm": 0.751013891356403, + "learning_rate": 4.997487890366403e-06, + "loss": 0.3426, + "step": 1872 + }, + { + "epoch": 0.09, + "grad_norm": 0.719654054311617, + "learning_rate": 4.9974793833871875e-06, + "loss": 0.3327, + "step": 1873 + }, + { + "epoch": 0.09, + "grad_norm": 0.7548392395678372, + "learning_rate": 4.997470862035606e-06, + "loss": 0.3453, + "step": 1874 + }, + { + "epoch": 0.09, + "grad_norm": 0.7068259307911579, + "learning_rate": 4.997462326311708e-06, + "loss": 0.309, + "step": 1875 + }, + { + "epoch": 0.09, + "grad_norm": 0.6890734110093145, + "learning_rate": 4.997453776215543e-06, + "loss": 0.3003, + "step": 1876 + }, + { + "epoch": 0.09, + "grad_norm": 0.7353684379308423, + "learning_rate": 4.997445211747158e-06, + "loss": 0.3294, + "step": 1877 + }, + { + "epoch": 0.09, + "grad_norm": 0.75396330749522, + "learning_rate": 4.997436632906604e-06, + "loss": 0.3164, + "step": 1878 + }, + { + "epoch": 0.09, + "grad_norm": 0.7146925613272879, + "learning_rate": 4.99742803969393e-06, + "loss": 0.359, + "step": 1879 + }, + { + "epoch": 0.09, + "grad_norm": 0.6837506254526113, + "learning_rate": 4.997419432109184e-06, + "loss": 0.3171, + "step": 1880 + }, + { + "epoch": 0.09, + "grad_norm": 0.7036093907929248, + "learning_rate": 4.997410810152419e-06, + "loss": 0.3332, + "step": 1881 + }, + { + "epoch": 0.09, + "grad_norm": 0.6909854235356081, + "learning_rate": 4.997402173823681e-06, + "loss": 0.3078, + "step": 1882 + }, + { + "epoch": 0.09, + "grad_norm": 0.7164450520194366, + "learning_rate": 4.9973935231230215e-06, + "loss": 0.3209, + "step": 1883 + }, + { + "epoch": 0.09, + "grad_norm": 0.6915842235057327, + "learning_rate": 4.9973848580504905e-06, + "loss": 0.3357, + "step": 1884 + }, + { + "epoch": 0.09, + "grad_norm": 0.6767201626593479, + "learning_rate": 4.9973761786061355e-06, + "loss": 0.3137, + "step": 1885 + }, + { + "epoch": 0.09, + "grad_norm": 0.6826660752961203, + "learning_rate": 4.997367484790011e-06, + "loss": 0.2857, + "step": 1886 + }, + { + "epoch": 0.09, + "grad_norm": 0.6783039996557115, + "learning_rate": 4.997358776602163e-06, + "loss": 0.3163, + "step": 1887 + }, + { + "epoch": 0.09, + "grad_norm": 0.7687018581489734, + "learning_rate": 4.997350054042642e-06, + "loss": 0.3214, + "step": 1888 + }, + { + "epoch": 0.09, + "grad_norm": 0.7212583900936821, + "learning_rate": 4.9973413171115e-06, + "loss": 0.3275, + "step": 1889 + }, + { + "epoch": 0.09, + "grad_norm": 0.7184324707098966, + "learning_rate": 4.997332565808787e-06, + "loss": 0.3223, + "step": 1890 + }, + { + "epoch": 0.09, + "grad_norm": 0.7663605858286447, + "learning_rate": 4.997323800134552e-06, + "loss": 0.3443, + "step": 1891 + }, + { + "epoch": 0.09, + "grad_norm": 0.7635449157288412, + "learning_rate": 4.997315020088846e-06, + "loss": 0.3441, + "step": 1892 + }, + { + "epoch": 0.09, + "grad_norm": 0.6837677725488568, + "learning_rate": 4.9973062256717204e-06, + "loss": 0.3307, + "step": 1893 + }, + { + "epoch": 0.09, + "grad_norm": 0.6599091374386534, + "learning_rate": 4.997297416883225e-06, + "loss": 0.31, + "step": 1894 + }, + { + "epoch": 0.09, + "grad_norm": 0.6688961670672441, + "learning_rate": 4.99728859372341e-06, + "loss": 0.3165, + "step": 1895 + }, + { + "epoch": 0.09, + "grad_norm": 0.7408658442788287, + "learning_rate": 4.997279756192328e-06, + "loss": 0.3239, + "step": 1896 + }, + { + "epoch": 0.09, + "grad_norm": 0.7298599595339883, + "learning_rate": 4.997270904290028e-06, + "loss": 0.3266, + "step": 1897 + }, + { + "epoch": 0.09, + "grad_norm": 0.7288308767288255, + "learning_rate": 4.997262038016561e-06, + "loss": 0.3129, + "step": 1898 + }, + { + "epoch": 0.09, + "grad_norm": 0.6771704459219822, + "learning_rate": 4.997253157371979e-06, + "loss": 0.2913, + "step": 1899 + }, + { + "epoch": 0.09, + "grad_norm": 0.6963374331826315, + "learning_rate": 4.997244262356333e-06, + "loss": 0.3022, + "step": 1900 + }, + { + "epoch": 0.09, + "grad_norm": 0.7836837735572619, + "learning_rate": 4.9972353529696745e-06, + "loss": 0.3424, + "step": 1901 + }, + { + "epoch": 0.09, + "grad_norm": 0.6990091642582053, + "learning_rate": 4.997226429212054e-06, + "loss": 0.3152, + "step": 1902 + }, + { + "epoch": 0.09, + "grad_norm": 0.7831222265900695, + "learning_rate": 4.997217491083523e-06, + "loss": 0.3257, + "step": 1903 + }, + { + "epoch": 0.09, + "grad_norm": 0.8036304138051275, + "learning_rate": 4.997208538584132e-06, + "loss": 0.3144, + "step": 1904 + }, + { + "epoch": 0.09, + "grad_norm": 0.7251270047858688, + "learning_rate": 4.997199571713934e-06, + "loss": 0.3186, + "step": 1905 + }, + { + "epoch": 0.09, + "grad_norm": 0.7116158139412175, + "learning_rate": 4.997190590472981e-06, + "loss": 0.3196, + "step": 1906 + }, + { + "epoch": 0.09, + "grad_norm": 0.7201846583418852, + "learning_rate": 4.9971815948613225e-06, + "loss": 0.3311, + "step": 1907 + }, + { + "epoch": 0.09, + "grad_norm": 0.7997702696957747, + "learning_rate": 4.997172584879012e-06, + "loss": 0.3325, + "step": 1908 + }, + { + "epoch": 0.09, + "grad_norm": 0.7668203809437942, + "learning_rate": 4.997163560526102e-06, + "loss": 0.3431, + "step": 1909 + }, + { + "epoch": 0.09, + "grad_norm": 0.7158109589347311, + "learning_rate": 4.9971545218026415e-06, + "loss": 0.3228, + "step": 1910 + }, + { + "epoch": 0.09, + "grad_norm": 0.7932227280705801, + "learning_rate": 4.9971454687086855e-06, + "loss": 0.3225, + "step": 1911 + }, + { + "epoch": 0.09, + "grad_norm": 0.735527862306773, + "learning_rate": 4.9971364012442845e-06, + "loss": 0.3296, + "step": 1912 + }, + { + "epoch": 0.09, + "grad_norm": 0.7158975375635732, + "learning_rate": 4.997127319409492e-06, + "loss": 0.3217, + "step": 1913 + }, + { + "epoch": 0.09, + "grad_norm": 0.7323482712540782, + "learning_rate": 4.997118223204358e-06, + "loss": 0.3233, + "step": 1914 + }, + { + "epoch": 0.09, + "grad_norm": 0.6880820913560425, + "learning_rate": 4.997109112628937e-06, + "loss": 0.2948, + "step": 1915 + }, + { + "epoch": 0.09, + "grad_norm": 0.7837134603822978, + "learning_rate": 4.997099987683281e-06, + "loss": 0.3248, + "step": 1916 + }, + { + "epoch": 0.09, + "grad_norm": 0.7071113985732169, + "learning_rate": 4.997090848367442e-06, + "loss": 0.3076, + "step": 1917 + }, + { + "epoch": 0.09, + "grad_norm": 0.6593355548184715, + "learning_rate": 4.997081694681473e-06, + "loss": 0.3171, + "step": 1918 + }, + { + "epoch": 0.09, + "grad_norm": 0.6876502834785655, + "learning_rate": 4.997072526625426e-06, + "loss": 0.2911, + "step": 1919 + }, + { + "epoch": 0.09, + "grad_norm": 0.8331073934271395, + "learning_rate": 4.997063344199354e-06, + "loss": 0.3339, + "step": 1920 + }, + { + "epoch": 0.09, + "grad_norm": 0.7133722159310094, + "learning_rate": 4.997054147403311e-06, + "loss": 0.3172, + "step": 1921 + }, + { + "epoch": 0.09, + "grad_norm": 0.6488035182272132, + "learning_rate": 4.997044936237349e-06, + "loss": 0.2892, + "step": 1922 + }, + { + "epoch": 0.09, + "grad_norm": 0.7490176843677582, + "learning_rate": 4.99703571070152e-06, + "loss": 0.3035, + "step": 1923 + }, + { + "epoch": 0.09, + "grad_norm": 0.7353834138289501, + "learning_rate": 4.997026470795878e-06, + "loss": 0.3317, + "step": 1924 + }, + { + "epoch": 0.09, + "grad_norm": 0.7761396775437668, + "learning_rate": 4.997017216520477e-06, + "loss": 0.3157, + "step": 1925 + }, + { + "epoch": 0.09, + "grad_norm": 0.6953386038853723, + "learning_rate": 4.9970079478753695e-06, + "loss": 0.3104, + "step": 1926 + }, + { + "epoch": 0.09, + "grad_norm": 0.6747279018109393, + "learning_rate": 4.996998664860608e-06, + "loss": 0.3084, + "step": 1927 + }, + { + "epoch": 0.09, + "grad_norm": 0.7223050296761307, + "learning_rate": 4.996989367476248e-06, + "loss": 0.3129, + "step": 1928 + }, + { + "epoch": 0.09, + "grad_norm": 0.7037064353240281, + "learning_rate": 4.996980055722341e-06, + "loss": 0.3059, + "step": 1929 + }, + { + "epoch": 0.09, + "grad_norm": 0.7104164777301738, + "learning_rate": 4.996970729598942e-06, + "loss": 0.3166, + "step": 1930 + }, + { + "epoch": 0.09, + "grad_norm": 0.6909175558052765, + "learning_rate": 4.996961389106104e-06, + "loss": 0.3187, + "step": 1931 + }, + { + "epoch": 0.09, + "grad_norm": 0.7254390781246494, + "learning_rate": 4.99695203424388e-06, + "loss": 0.3037, + "step": 1932 + }, + { + "epoch": 0.09, + "grad_norm": 0.7279276591343629, + "learning_rate": 4.996942665012326e-06, + "loss": 0.3166, + "step": 1933 + }, + { + "epoch": 0.09, + "grad_norm": 0.7594721863395164, + "learning_rate": 4.996933281411492e-06, + "loss": 0.3153, + "step": 1934 + }, + { + "epoch": 0.09, + "grad_norm": 0.6780087296646461, + "learning_rate": 4.996923883441437e-06, + "loss": 0.3045, + "step": 1935 + }, + { + "epoch": 0.09, + "grad_norm": 0.6821646650173121, + "learning_rate": 4.9969144711022115e-06, + "loss": 0.3107, + "step": 1936 + }, + { + "epoch": 0.09, + "grad_norm": 0.8667170497564675, + "learning_rate": 4.996905044393872e-06, + "loss": 0.3208, + "step": 1937 + }, + { + "epoch": 0.09, + "grad_norm": 0.7752206510634256, + "learning_rate": 4.99689560331647e-06, + "loss": 0.2991, + "step": 1938 + }, + { + "epoch": 0.09, + "grad_norm": 0.6663394309335908, + "learning_rate": 4.996886147870062e-06, + "loss": 0.2809, + "step": 1939 + }, + { + "epoch": 0.09, + "grad_norm": 0.68910794070722, + "learning_rate": 4.996876678054702e-06, + "loss": 0.316, + "step": 1940 + }, + { + "epoch": 0.09, + "grad_norm": 0.7448187981262238, + "learning_rate": 4.9968671938704435e-06, + "loss": 0.3179, + "step": 1941 + }, + { + "epoch": 0.09, + "grad_norm": 0.7997261717133721, + "learning_rate": 4.996857695317343e-06, + "loss": 0.3318, + "step": 1942 + }, + { + "epoch": 0.09, + "grad_norm": 0.7277065429822372, + "learning_rate": 4.996848182395453e-06, + "loss": 0.3313, + "step": 1943 + }, + { + "epoch": 0.09, + "grad_norm": 0.6995087059659084, + "learning_rate": 4.99683865510483e-06, + "loss": 0.2915, + "step": 1944 + }, + { + "epoch": 0.09, + "grad_norm": 0.7500194247164272, + "learning_rate": 4.9968291134455285e-06, + "loss": 0.3382, + "step": 1945 + }, + { + "epoch": 0.09, + "grad_norm": 0.8043447545238351, + "learning_rate": 4.9968195574176015e-06, + "loss": 0.3301, + "step": 1946 + }, + { + "epoch": 0.09, + "grad_norm": 0.7773128590339266, + "learning_rate": 4.9968099870211064e-06, + "loss": 0.3403, + "step": 1947 + }, + { + "epoch": 0.09, + "grad_norm": 0.7636779095414591, + "learning_rate": 4.996800402256098e-06, + "loss": 0.3302, + "step": 1948 + }, + { + "epoch": 0.09, + "grad_norm": 0.6776972292481492, + "learning_rate": 4.99679080312263e-06, + "loss": 0.3296, + "step": 1949 + }, + { + "epoch": 0.09, + "grad_norm": 0.7871746604257311, + "learning_rate": 4.996781189620759e-06, + "loss": 0.3243, + "step": 1950 + }, + { + "epoch": 0.09, + "grad_norm": 0.7381429331745297, + "learning_rate": 4.996771561750539e-06, + "loss": 0.3137, + "step": 1951 + }, + { + "epoch": 0.09, + "grad_norm": 0.6991042128244905, + "learning_rate": 4.996761919512026e-06, + "loss": 0.3176, + "step": 1952 + }, + { + "epoch": 0.09, + "grad_norm": 0.734969840078329, + "learning_rate": 4.996752262905277e-06, + "loss": 0.3088, + "step": 1953 + }, + { + "epoch": 0.09, + "grad_norm": 0.6579275817182733, + "learning_rate": 4.996742591930345e-06, + "loss": 0.3091, + "step": 1954 + }, + { + "epoch": 0.09, + "grad_norm": 0.7993589249036315, + "learning_rate": 4.9967329065872874e-06, + "loss": 0.3377, + "step": 1955 + }, + { + "epoch": 0.09, + "grad_norm": 0.7689596540842992, + "learning_rate": 4.99672320687616e-06, + "loss": 0.3274, + "step": 1956 + }, + { + "epoch": 0.09, + "grad_norm": 0.7900213165159051, + "learning_rate": 4.9967134927970175e-06, + "loss": 0.3239, + "step": 1957 + }, + { + "epoch": 0.09, + "grad_norm": 0.7159964306244458, + "learning_rate": 4.9967037643499166e-06, + "loss": 0.324, + "step": 1958 + }, + { + "epoch": 0.09, + "grad_norm": 0.7474630611415196, + "learning_rate": 4.996694021534913e-06, + "loss": 0.3453, + "step": 1959 + }, + { + "epoch": 0.09, + "grad_norm": 0.7966518428507672, + "learning_rate": 4.996684264352063e-06, + "loss": 0.3489, + "step": 1960 + }, + { + "epoch": 0.09, + "grad_norm": 0.7031007878620311, + "learning_rate": 4.996674492801422e-06, + "loss": 0.3055, + "step": 1961 + }, + { + "epoch": 0.09, + "grad_norm": 0.7913475176910331, + "learning_rate": 4.9966647068830476e-06, + "loss": 0.3318, + "step": 1962 + }, + { + "epoch": 0.09, + "grad_norm": 0.73957207620478, + "learning_rate": 4.996654906596995e-06, + "loss": 0.3295, + "step": 1963 + }, + { + "epoch": 0.09, + "grad_norm": 0.6773020245710241, + "learning_rate": 4.996645091943321e-06, + "loss": 0.2944, + "step": 1964 + }, + { + "epoch": 0.09, + "grad_norm": 0.6806928340542534, + "learning_rate": 4.996635262922082e-06, + "loss": 0.2932, + "step": 1965 + }, + { + "epoch": 0.09, + "grad_norm": 0.7585239426799351, + "learning_rate": 4.996625419533333e-06, + "loss": 0.3374, + "step": 1966 + }, + { + "epoch": 0.09, + "grad_norm": 0.7074513198076765, + "learning_rate": 4.996615561777134e-06, + "loss": 0.3207, + "step": 1967 + }, + { + "epoch": 0.09, + "grad_norm": 0.6914585274096812, + "learning_rate": 4.99660568965354e-06, + "loss": 0.33, + "step": 1968 + }, + { + "epoch": 0.09, + "grad_norm": 0.661062052777361, + "learning_rate": 4.996595803162607e-06, + "loss": 0.3086, + "step": 1969 + }, + { + "epoch": 0.09, + "grad_norm": 0.7115858274680142, + "learning_rate": 4.996585902304393e-06, + "loss": 0.3176, + "step": 1970 + }, + { + "epoch": 0.09, + "grad_norm": 0.6807019705232173, + "learning_rate": 4.9965759870789545e-06, + "loss": 0.3192, + "step": 1971 + }, + { + "epoch": 0.09, + "grad_norm": 0.7248476486564042, + "learning_rate": 4.996566057486348e-06, + "loss": 0.3337, + "step": 1972 + }, + { + "epoch": 0.09, + "grad_norm": 0.6904311121359333, + "learning_rate": 4.996556113526633e-06, + "loss": 0.3173, + "step": 1973 + }, + { + "epoch": 0.09, + "grad_norm": 0.7306640582016146, + "learning_rate": 4.996546155199864e-06, + "loss": 0.3212, + "step": 1974 + }, + { + "epoch": 0.09, + "grad_norm": 0.7356398855193282, + "learning_rate": 4.9965361825061e-06, + "loss": 0.3147, + "step": 1975 + }, + { + "epoch": 0.09, + "grad_norm": 0.6890633431582758, + "learning_rate": 4.996526195445397e-06, + "loss": 0.3178, + "step": 1976 + }, + { + "epoch": 0.09, + "grad_norm": 0.6618299416200297, + "learning_rate": 4.996516194017813e-06, + "loss": 0.3192, + "step": 1977 + }, + { + "epoch": 0.09, + "grad_norm": 0.7081314021779908, + "learning_rate": 4.996506178223406e-06, + "loss": 0.3064, + "step": 1978 + }, + { + "epoch": 0.09, + "grad_norm": 0.7051453717902891, + "learning_rate": 4.996496148062234e-06, + "loss": 0.3128, + "step": 1979 + }, + { + "epoch": 0.09, + "grad_norm": 0.7160342677676603, + "learning_rate": 4.996486103534354e-06, + "loss": 0.3328, + "step": 1980 + }, + { + "epoch": 0.09, + "grad_norm": 0.6961733703657984, + "learning_rate": 4.9964760446398236e-06, + "loss": 0.3247, + "step": 1981 + }, + { + "epoch": 0.09, + "grad_norm": 0.7030255026396931, + "learning_rate": 4.9964659713787015e-06, + "loss": 0.3086, + "step": 1982 + }, + { + "epoch": 0.09, + "grad_norm": 0.6797773801964319, + "learning_rate": 4.996455883751044e-06, + "loss": 0.3325, + "step": 1983 + }, + { + "epoch": 0.09, + "grad_norm": 0.7218973006376445, + "learning_rate": 4.996445781756912e-06, + "loss": 0.312, + "step": 1984 + }, + { + "epoch": 0.09, + "grad_norm": 0.7327849542558678, + "learning_rate": 4.996435665396361e-06, + "loss": 0.3184, + "step": 1985 + }, + { + "epoch": 0.09, + "grad_norm": 0.7751262674490096, + "learning_rate": 4.99642553466945e-06, + "loss": 0.3178, + "step": 1986 + }, + { + "epoch": 0.09, + "grad_norm": 0.6938787389743843, + "learning_rate": 4.996415389576238e-06, + "loss": 0.3096, + "step": 1987 + }, + { + "epoch": 0.09, + "grad_norm": 0.6902095563237467, + "learning_rate": 4.996405230116783e-06, + "loss": 0.3176, + "step": 1988 + }, + { + "epoch": 0.09, + "grad_norm": 0.732727021725934, + "learning_rate": 4.996395056291143e-06, + "loss": 0.3011, + "step": 1989 + }, + { + "epoch": 0.09, + "grad_norm": 0.6776226488820124, + "learning_rate": 4.996384868099378e-06, + "loss": 0.3317, + "step": 1990 + }, + { + "epoch": 0.09, + "grad_norm": 0.7337498477441999, + "learning_rate": 4.996374665541544e-06, + "loss": 0.3258, + "step": 1991 + }, + { + "epoch": 0.09, + "grad_norm": 0.7513878336937183, + "learning_rate": 4.996364448617702e-06, + "loss": 0.3062, + "step": 1992 + }, + { + "epoch": 0.09, + "grad_norm": 0.7546091481185564, + "learning_rate": 4.99635421732791e-06, + "loss": 0.3281, + "step": 1993 + }, + { + "epoch": 0.09, + "grad_norm": 0.749783182598125, + "learning_rate": 4.996343971672226e-06, + "loss": 0.3371, + "step": 1994 + }, + { + "epoch": 0.09, + "grad_norm": 0.6810520212312787, + "learning_rate": 4.996333711650711e-06, + "loss": 0.3087, + "step": 1995 + }, + { + "epoch": 0.09, + "grad_norm": 0.7519857883680776, + "learning_rate": 4.996323437263423e-06, + "loss": 0.3273, + "step": 1996 + }, + { + "epoch": 0.09, + "grad_norm": 0.7178022665080589, + "learning_rate": 4.99631314851042e-06, + "loss": 0.3119, + "step": 1997 + }, + { + "epoch": 0.09, + "grad_norm": 0.69700242113943, + "learning_rate": 4.996302845391763e-06, + "loss": 0.3036, + "step": 1998 + }, + { + "epoch": 0.09, + "grad_norm": 0.6275142874244114, + "learning_rate": 4.996292527907511e-06, + "loss": 0.3001, + "step": 1999 + }, + { + "epoch": 0.09, + "grad_norm": 0.6806431371033609, + "learning_rate": 4.996282196057722e-06, + "loss": 0.291, + "step": 2000 + }, + { + "epoch": 0.09, + "grad_norm": 0.7162968451659444, + "learning_rate": 4.996271849842457e-06, + "loss": 0.3276, + "step": 2001 + }, + { + "epoch": 0.09, + "grad_norm": 0.6793452324954328, + "learning_rate": 4.996261489261774e-06, + "loss": 0.3165, + "step": 2002 + }, + { + "epoch": 0.09, + "grad_norm": 0.7265893043004891, + "learning_rate": 4.996251114315734e-06, + "loss": 0.3072, + "step": 2003 + }, + { + "epoch": 0.09, + "grad_norm": 0.6727822894849818, + "learning_rate": 4.996240725004397e-06, + "loss": 0.3152, + "step": 2004 + }, + { + "epoch": 0.09, + "grad_norm": 0.6626612597708353, + "learning_rate": 4.996230321327821e-06, + "loss": 0.3148, + "step": 2005 + }, + { + "epoch": 0.09, + "grad_norm": 0.6930216608453353, + "learning_rate": 4.996219903286067e-06, + "loss": 0.3304, + "step": 2006 + }, + { + "epoch": 0.09, + "grad_norm": 0.6991611440649248, + "learning_rate": 4.996209470879195e-06, + "loss": 0.3182, + "step": 2007 + }, + { + "epoch": 0.09, + "grad_norm": 0.6943444922924938, + "learning_rate": 4.996199024107265e-06, + "loss": 0.3379, + "step": 2008 + }, + { + "epoch": 0.09, + "grad_norm": 0.7185752172120992, + "learning_rate": 4.9961885629703376e-06, + "loss": 0.3241, + "step": 2009 + }, + { + "epoch": 0.09, + "grad_norm": 0.6818338953878365, + "learning_rate": 4.996178087468471e-06, + "loss": 0.3252, + "step": 2010 + }, + { + "epoch": 0.09, + "grad_norm": 0.73129274465354, + "learning_rate": 4.996167597601729e-06, + "loss": 0.3165, + "step": 2011 + }, + { + "epoch": 0.09, + "grad_norm": 0.6668892045953531, + "learning_rate": 4.9961570933701684e-06, + "loss": 0.3105, + "step": 2012 + }, + { + "epoch": 0.09, + "grad_norm": 0.8048888466377019, + "learning_rate": 4.996146574773851e-06, + "loss": 0.3319, + "step": 2013 + }, + { + "epoch": 0.09, + "grad_norm": 0.7022578850602126, + "learning_rate": 4.996136041812838e-06, + "loss": 0.3033, + "step": 2014 + }, + { + "epoch": 0.09, + "grad_norm": 0.7214059308725413, + "learning_rate": 4.9961254944871895e-06, + "loss": 0.3067, + "step": 2015 + }, + { + "epoch": 0.09, + "grad_norm": 0.6004132207874183, + "learning_rate": 4.996114932796966e-06, + "loss": 0.2768, + "step": 2016 + }, + { + "epoch": 0.09, + "grad_norm": 0.7204367294706567, + "learning_rate": 4.996104356742228e-06, + "loss": 0.3145, + "step": 2017 + }, + { + "epoch": 0.09, + "grad_norm": 0.7532391253395982, + "learning_rate": 4.996093766323038e-06, + "loss": 0.326, + "step": 2018 + }, + { + "epoch": 0.09, + "grad_norm": 0.72512529105649, + "learning_rate": 4.996083161539455e-06, + "loss": 0.3114, + "step": 2019 + }, + { + "epoch": 0.09, + "grad_norm": 0.7229045385144908, + "learning_rate": 4.99607254239154e-06, + "loss": 0.3216, + "step": 2020 + }, + { + "epoch": 0.09, + "grad_norm": 0.704978826789479, + "learning_rate": 4.996061908879356e-06, + "loss": 0.3115, + "step": 2021 + }, + { + "epoch": 0.09, + "grad_norm": 0.69803821366478, + "learning_rate": 4.9960512610029625e-06, + "loss": 0.2988, + "step": 2022 + }, + { + "epoch": 0.09, + "grad_norm": 0.8070362772972249, + "learning_rate": 4.996040598762421e-06, + "loss": 0.3396, + "step": 2023 + }, + { + "epoch": 0.09, + "grad_norm": 0.6905061687146364, + "learning_rate": 4.996029922157795e-06, + "loss": 0.3064, + "step": 2024 + }, + { + "epoch": 0.09, + "grad_norm": 0.7330085432984124, + "learning_rate": 4.996019231189142e-06, + "loss": 0.3224, + "step": 2025 + }, + { + "epoch": 0.09, + "grad_norm": 0.7202361827496362, + "learning_rate": 4.996008525856527e-06, + "loss": 0.3285, + "step": 2026 + }, + { + "epoch": 0.09, + "grad_norm": 0.6806881546586806, + "learning_rate": 4.9959978061600104e-06, + "loss": 0.3034, + "step": 2027 + }, + { + "epoch": 0.1, + "grad_norm": 0.699354028503221, + "learning_rate": 4.995987072099653e-06, + "loss": 0.3048, + "step": 2028 + }, + { + "epoch": 0.1, + "grad_norm": 0.7528084838502914, + "learning_rate": 4.995976323675517e-06, + "loss": 0.3217, + "step": 2029 + }, + { + "epoch": 0.1, + "grad_norm": 0.7025373475943397, + "learning_rate": 4.995965560887666e-06, + "loss": 0.314, + "step": 2030 + }, + { + "epoch": 0.1, + "grad_norm": 0.7784791234767059, + "learning_rate": 4.995954783736159e-06, + "loss": 0.3566, + "step": 2031 + }, + { + "epoch": 0.1, + "grad_norm": 0.7263136612599541, + "learning_rate": 4.995943992221061e-06, + "loss": 0.3108, + "step": 2032 + }, + { + "epoch": 0.1, + "grad_norm": 0.7131279360509972, + "learning_rate": 4.995933186342431e-06, + "loss": 0.3237, + "step": 2033 + }, + { + "epoch": 0.1, + "grad_norm": 0.7419875917380949, + "learning_rate": 4.995922366100334e-06, + "loss": 0.3244, + "step": 2034 + }, + { + "epoch": 0.1, + "grad_norm": 0.7337273470801657, + "learning_rate": 4.99591153149483e-06, + "loss": 0.2912, + "step": 2035 + }, + { + "epoch": 0.1, + "grad_norm": 0.7601069349492571, + "learning_rate": 4.995900682525983e-06, + "loss": 0.3236, + "step": 2036 + }, + { + "epoch": 0.1, + "grad_norm": 0.7889723649976328, + "learning_rate": 4.9958898191938565e-06, + "loss": 0.3322, + "step": 2037 + }, + { + "epoch": 0.1, + "grad_norm": 0.6655029627480791, + "learning_rate": 4.99587894149851e-06, + "loss": 0.3122, + "step": 2038 + }, + { + "epoch": 0.1, + "grad_norm": 0.689591537280031, + "learning_rate": 4.995868049440008e-06, + "loss": 0.3221, + "step": 2039 + }, + { + "epoch": 0.1, + "grad_norm": 0.7386857331320142, + "learning_rate": 4.995857143018412e-06, + "loss": 0.3122, + "step": 2040 + }, + { + "epoch": 0.1, + "grad_norm": 0.698494224717507, + "learning_rate": 4.995846222233785e-06, + "loss": 0.3192, + "step": 2041 + }, + { + "epoch": 0.1, + "grad_norm": 0.7663458341407203, + "learning_rate": 4.995835287086192e-06, + "loss": 0.329, + "step": 2042 + }, + { + "epoch": 0.1, + "grad_norm": 0.6675677203996234, + "learning_rate": 4.995824337575693e-06, + "loss": 0.304, + "step": 2043 + }, + { + "epoch": 0.1, + "grad_norm": 0.7045633541467177, + "learning_rate": 4.995813373702352e-06, + "loss": 0.3349, + "step": 2044 + }, + { + "epoch": 0.1, + "grad_norm": 0.8001088806098321, + "learning_rate": 4.995802395466233e-06, + "loss": 0.3269, + "step": 2045 + }, + { + "epoch": 0.1, + "grad_norm": 0.6684643537490988, + "learning_rate": 4.995791402867399e-06, + "loss": 0.3306, + "step": 2046 + }, + { + "epoch": 0.1, + "grad_norm": 0.7017845606723002, + "learning_rate": 4.995780395905912e-06, + "loss": 0.3066, + "step": 2047 + }, + { + "epoch": 0.1, + "grad_norm": 0.7219907046506884, + "learning_rate": 4.995769374581836e-06, + "loss": 0.3286, + "step": 2048 + }, + { + "epoch": 0.1, + "grad_norm": 0.7611232678657989, + "learning_rate": 4.995758338895235e-06, + "loss": 0.322, + "step": 2049 + }, + { + "epoch": 0.1, + "grad_norm": 0.7297936592281694, + "learning_rate": 4.995747288846171e-06, + "loss": 0.3252, + "step": 2050 + }, + { + "epoch": 0.1, + "grad_norm": 0.662164079528115, + "learning_rate": 4.9957362244347104e-06, + "loss": 0.3119, + "step": 2051 + }, + { + "epoch": 0.1, + "grad_norm": 0.7512124248027041, + "learning_rate": 4.995725145660914e-06, + "loss": 0.329, + "step": 2052 + }, + { + "epoch": 0.1, + "grad_norm": 0.7364984112961053, + "learning_rate": 4.995714052524847e-06, + "loss": 0.3389, + "step": 2053 + }, + { + "epoch": 0.1, + "grad_norm": 0.7518378719297122, + "learning_rate": 4.9957029450265726e-06, + "loss": 0.3289, + "step": 2054 + }, + { + "epoch": 0.1, + "grad_norm": 0.7008184167259965, + "learning_rate": 4.995691823166155e-06, + "loss": 0.3247, + "step": 2055 + }, + { + "epoch": 0.1, + "grad_norm": 0.7318672838819901, + "learning_rate": 4.995680686943658e-06, + "loss": 0.3234, + "step": 2056 + }, + { + "epoch": 0.1, + "grad_norm": 0.748409492884662, + "learning_rate": 4.995669536359147e-06, + "loss": 0.3257, + "step": 2057 + }, + { + "epoch": 0.1, + "grad_norm": 0.7630862730616792, + "learning_rate": 4.995658371412684e-06, + "loss": 0.3105, + "step": 2058 + }, + { + "epoch": 0.1, + "grad_norm": 0.8310588706591965, + "learning_rate": 4.995647192104335e-06, + "loss": 0.3158, + "step": 2059 + }, + { + "epoch": 0.1, + "grad_norm": 0.6856091369346331, + "learning_rate": 4.995635998434163e-06, + "loss": 0.3159, + "step": 2060 + }, + { + "epoch": 0.1, + "grad_norm": 0.672522372694842, + "learning_rate": 4.9956247904022335e-06, + "loss": 0.3195, + "step": 2061 + }, + { + "epoch": 0.1, + "grad_norm": 0.7242321473645752, + "learning_rate": 4.99561356800861e-06, + "loss": 0.3331, + "step": 2062 + }, + { + "epoch": 0.1, + "grad_norm": 0.695451217035721, + "learning_rate": 4.995602331253359e-06, + "loss": 0.2921, + "step": 2063 + }, + { + "epoch": 0.1, + "grad_norm": 0.6699566919656599, + "learning_rate": 4.9955910801365425e-06, + "loss": 0.2949, + "step": 2064 + }, + { + "epoch": 0.1, + "grad_norm": 0.6948181594873838, + "learning_rate": 4.995579814658228e-06, + "loss": 0.3123, + "step": 2065 + }, + { + "epoch": 0.1, + "grad_norm": 0.6920526269980979, + "learning_rate": 4.995568534818478e-06, + "loss": 0.3286, + "step": 2066 + }, + { + "epoch": 0.1, + "grad_norm": 0.6859101146805546, + "learning_rate": 4.995557240617358e-06, + "loss": 0.3116, + "step": 2067 + }, + { + "epoch": 0.1, + "grad_norm": 0.7199519623103068, + "learning_rate": 4.9955459320549335e-06, + "loss": 0.3114, + "step": 2068 + }, + { + "epoch": 0.1, + "grad_norm": 0.6624799208975389, + "learning_rate": 4.995534609131269e-06, + "loss": 0.3002, + "step": 2069 + }, + { + "epoch": 0.1, + "grad_norm": 0.7345684193622452, + "learning_rate": 4.995523271846432e-06, + "loss": 0.3236, + "step": 2070 + }, + { + "epoch": 0.1, + "grad_norm": 0.7277376794213963, + "learning_rate": 4.995511920200483e-06, + "loss": 0.3196, + "step": 2071 + }, + { + "epoch": 0.1, + "grad_norm": 0.7966042320944818, + "learning_rate": 4.995500554193492e-06, + "loss": 0.3389, + "step": 2072 + }, + { + "epoch": 0.1, + "grad_norm": 0.7288973169737949, + "learning_rate": 4.995489173825522e-06, + "loss": 0.3176, + "step": 2073 + }, + { + "epoch": 0.1, + "grad_norm": 0.6599058328676276, + "learning_rate": 4.9954777790966395e-06, + "loss": 0.3114, + "step": 2074 + }, + { + "epoch": 0.1, + "grad_norm": 0.7456654896908892, + "learning_rate": 4.995466370006909e-06, + "loss": 0.3069, + "step": 2075 + }, + { + "epoch": 0.1, + "grad_norm": 0.7100040579208217, + "learning_rate": 4.995454946556397e-06, + "loss": 0.2897, + "step": 2076 + }, + { + "epoch": 0.1, + "grad_norm": 0.6940114034808494, + "learning_rate": 4.995443508745169e-06, + "loss": 0.2791, + "step": 2077 + }, + { + "epoch": 0.1, + "grad_norm": 0.7534207824449729, + "learning_rate": 4.99543205657329e-06, + "loss": 0.3247, + "step": 2078 + }, + { + "epoch": 0.1, + "grad_norm": 0.7053005832363709, + "learning_rate": 4.995420590040828e-06, + "loss": 0.3161, + "step": 2079 + }, + { + "epoch": 0.1, + "grad_norm": 0.7360936652114926, + "learning_rate": 4.9954091091478474e-06, + "loss": 0.3371, + "step": 2080 + }, + { + "epoch": 0.1, + "grad_norm": 0.7361277447061083, + "learning_rate": 4.995397613894414e-06, + "loss": 0.2998, + "step": 2081 + }, + { + "epoch": 0.1, + "grad_norm": 0.7578695238762561, + "learning_rate": 4.995386104280595e-06, + "loss": 0.3243, + "step": 2082 + }, + { + "epoch": 0.1, + "grad_norm": 0.6887447683194639, + "learning_rate": 4.995374580306456e-06, + "loss": 0.3052, + "step": 2083 + }, + { + "epoch": 0.1, + "grad_norm": 0.7351082814470324, + "learning_rate": 4.995363041972063e-06, + "loss": 0.3297, + "step": 2084 + }, + { + "epoch": 0.1, + "grad_norm": 0.8379569815257168, + "learning_rate": 4.995351489277484e-06, + "loss": 0.343, + "step": 2085 + }, + { + "epoch": 0.1, + "grad_norm": 0.6746011528273849, + "learning_rate": 4.995339922222784e-06, + "loss": 0.307, + "step": 2086 + }, + { + "epoch": 0.1, + "grad_norm": 0.7536579685652788, + "learning_rate": 4.99532834080803e-06, + "loss": 0.3046, + "step": 2087 + }, + { + "epoch": 0.1, + "grad_norm": 0.6048047823207747, + "learning_rate": 4.995316745033287e-06, + "loss": 0.2821, + "step": 2088 + }, + { + "epoch": 0.1, + "grad_norm": 0.7698779951598945, + "learning_rate": 4.9953051348986245e-06, + "loss": 0.3138, + "step": 2089 + }, + { + "epoch": 0.1, + "grad_norm": 0.7246611717852909, + "learning_rate": 4.995293510404109e-06, + "loss": 0.3161, + "step": 2090 + }, + { + "epoch": 0.1, + "grad_norm": 0.6797194895971466, + "learning_rate": 4.995281871549805e-06, + "loss": 0.3083, + "step": 2091 + }, + { + "epoch": 0.1, + "grad_norm": 0.7718555193688023, + "learning_rate": 4.995270218335782e-06, + "loss": 0.3206, + "step": 2092 + }, + { + "epoch": 0.1, + "grad_norm": 0.688538715738008, + "learning_rate": 4.995258550762106e-06, + "loss": 0.3081, + "step": 2093 + }, + { + "epoch": 0.1, + "grad_norm": 0.7251399736219075, + "learning_rate": 4.995246868828844e-06, + "loss": 0.3094, + "step": 2094 + }, + { + "epoch": 0.1, + "grad_norm": 0.7593002180282846, + "learning_rate": 4.995235172536063e-06, + "loss": 0.3035, + "step": 2095 + }, + { + "epoch": 0.1, + "grad_norm": 0.7568472233922776, + "learning_rate": 4.995223461883831e-06, + "loss": 0.2847, + "step": 2096 + }, + { + "epoch": 0.1, + "grad_norm": 0.690897163187103, + "learning_rate": 4.9952117368722155e-06, + "loss": 0.327, + "step": 2097 + }, + { + "epoch": 0.1, + "grad_norm": 0.7380133555465154, + "learning_rate": 4.995199997501283e-06, + "loss": 0.322, + "step": 2098 + }, + { + "epoch": 0.1, + "grad_norm": 0.7599174665252005, + "learning_rate": 4.995188243771102e-06, + "loss": 0.2968, + "step": 2099 + }, + { + "epoch": 0.1, + "grad_norm": 0.7543462461951145, + "learning_rate": 4.99517647568174e-06, + "loss": 0.3064, + "step": 2100 + }, + { + "epoch": 0.1, + "grad_norm": 0.6785276673999853, + "learning_rate": 4.995164693233264e-06, + "loss": 0.3072, + "step": 2101 + }, + { + "epoch": 0.1, + "grad_norm": 0.6801999540643456, + "learning_rate": 4.995152896425744e-06, + "loss": 0.3182, + "step": 2102 + }, + { + "epoch": 0.1, + "grad_norm": 0.7157938353120792, + "learning_rate": 4.995141085259244e-06, + "loss": 0.3507, + "step": 2103 + }, + { + "epoch": 0.1, + "grad_norm": 0.6634684405372138, + "learning_rate": 4.995129259733835e-06, + "loss": 0.303, + "step": 2104 + }, + { + "epoch": 0.1, + "grad_norm": 0.6948258480655293, + "learning_rate": 4.995117419849585e-06, + "loss": 0.3267, + "step": 2105 + }, + { + "epoch": 0.1, + "grad_norm": 0.7349833299145839, + "learning_rate": 4.99510556560656e-06, + "loss": 0.302, + "step": 2106 + }, + { + "epoch": 0.1, + "grad_norm": 0.6513771738931029, + "learning_rate": 4.99509369700483e-06, + "loss": 0.3055, + "step": 2107 + }, + { + "epoch": 0.1, + "grad_norm": 0.6573655422742545, + "learning_rate": 4.9950818140444634e-06, + "loss": 0.3086, + "step": 2108 + }, + { + "epoch": 0.1, + "grad_norm": 0.7298904017917056, + "learning_rate": 4.995069916725528e-06, + "loss": 0.3096, + "step": 2109 + }, + { + "epoch": 0.1, + "grad_norm": 0.6592191877525462, + "learning_rate": 4.995058005048092e-06, + "loss": 0.3017, + "step": 2110 + }, + { + "epoch": 0.1, + "grad_norm": 0.658047161169274, + "learning_rate": 4.995046079012225e-06, + "loss": 0.3229, + "step": 2111 + }, + { + "epoch": 0.1, + "grad_norm": 0.6641755663570664, + "learning_rate": 4.995034138617993e-06, + "loss": 0.3117, + "step": 2112 + }, + { + "epoch": 0.1, + "grad_norm": 0.7168422958712559, + "learning_rate": 4.995022183865469e-06, + "loss": 0.3104, + "step": 2113 + }, + { + "epoch": 0.1, + "grad_norm": 0.6412624758581381, + "learning_rate": 4.995010214754718e-06, + "loss": 0.2953, + "step": 2114 + }, + { + "epoch": 0.1, + "grad_norm": 0.707773277638653, + "learning_rate": 4.994998231285811e-06, + "loss": 0.2983, + "step": 2115 + }, + { + "epoch": 0.1, + "grad_norm": 0.6654860997231012, + "learning_rate": 4.9949862334588165e-06, + "loss": 0.2987, + "step": 2116 + }, + { + "epoch": 0.1, + "grad_norm": 0.6920216999662693, + "learning_rate": 4.994974221273802e-06, + "loss": 0.3077, + "step": 2117 + }, + { + "epoch": 0.1, + "grad_norm": 0.7043248811209329, + "learning_rate": 4.994962194730839e-06, + "loss": 0.3039, + "step": 2118 + }, + { + "epoch": 0.1, + "grad_norm": 0.6414661717906337, + "learning_rate": 4.994950153829995e-06, + "loss": 0.3026, + "step": 2119 + }, + { + "epoch": 0.1, + "grad_norm": 0.7656162583343783, + "learning_rate": 4.994938098571341e-06, + "loss": 0.3229, + "step": 2120 + }, + { + "epoch": 0.1, + "grad_norm": 0.7329448545541555, + "learning_rate": 4.994926028954944e-06, + "loss": 0.3036, + "step": 2121 + }, + { + "epoch": 0.1, + "grad_norm": 0.7504873821177167, + "learning_rate": 4.994913944980876e-06, + "loss": 0.3135, + "step": 2122 + }, + { + "epoch": 0.1, + "grad_norm": 0.6525584091372416, + "learning_rate": 4.994901846649205e-06, + "loss": 0.3079, + "step": 2123 + }, + { + "epoch": 0.1, + "grad_norm": 0.6677683010828573, + "learning_rate": 4.99488973396e-06, + "loss": 0.2989, + "step": 2124 + }, + { + "epoch": 0.1, + "grad_norm": 0.6383993767237515, + "learning_rate": 4.994877606913333e-06, + "loss": 0.3136, + "step": 2125 + }, + { + "epoch": 0.1, + "grad_norm": 0.7672218869300704, + "learning_rate": 4.994865465509272e-06, + "loss": 0.298, + "step": 2126 + }, + { + "epoch": 0.1, + "grad_norm": 0.7898948123791807, + "learning_rate": 4.994853309747887e-06, + "loss": 0.3217, + "step": 2127 + }, + { + "epoch": 0.1, + "grad_norm": 0.6512444266777496, + "learning_rate": 4.994841139629248e-06, + "loss": 0.3235, + "step": 2128 + }, + { + "epoch": 0.1, + "grad_norm": 0.6630778062645278, + "learning_rate": 4.994828955153426e-06, + "loss": 0.3049, + "step": 2129 + }, + { + "epoch": 0.1, + "grad_norm": 0.6674226556798483, + "learning_rate": 4.9948167563204896e-06, + "loss": 0.2838, + "step": 2130 + }, + { + "epoch": 0.1, + "grad_norm": 0.7183317086908528, + "learning_rate": 4.99480454313051e-06, + "loss": 0.3114, + "step": 2131 + }, + { + "epoch": 0.1, + "grad_norm": 0.7285979668034909, + "learning_rate": 4.9947923155835585e-06, + "loss": 0.2913, + "step": 2132 + }, + { + "epoch": 0.1, + "grad_norm": 0.7575939692668359, + "learning_rate": 4.994780073679703e-06, + "loss": 0.3207, + "step": 2133 + }, + { + "epoch": 0.1, + "grad_norm": 0.7961760710929432, + "learning_rate": 4.994767817419016e-06, + "loss": 0.3549, + "step": 2134 + }, + { + "epoch": 0.1, + "grad_norm": 0.6732189726251565, + "learning_rate": 4.994755546801566e-06, + "loss": 0.3404, + "step": 2135 + }, + { + "epoch": 0.1, + "grad_norm": 0.6589464539118469, + "learning_rate": 4.994743261827426e-06, + "loss": 0.3188, + "step": 2136 + }, + { + "epoch": 0.1, + "grad_norm": 0.7146730473388665, + "learning_rate": 4.9947309624966655e-06, + "loss": 0.2938, + "step": 2137 + }, + { + "epoch": 0.1, + "grad_norm": 0.7908326285223344, + "learning_rate": 4.994718648809355e-06, + "loss": 0.3134, + "step": 2138 + }, + { + "epoch": 0.1, + "grad_norm": 0.6470129845540165, + "learning_rate": 4.994706320765566e-06, + "loss": 0.3094, + "step": 2139 + }, + { + "epoch": 0.1, + "grad_norm": 0.7535951360308738, + "learning_rate": 4.994693978365369e-06, + "loss": 0.3014, + "step": 2140 + }, + { + "epoch": 0.1, + "grad_norm": 0.659742105053161, + "learning_rate": 4.994681621608835e-06, + "loss": 0.2986, + "step": 2141 + }, + { + "epoch": 0.1, + "grad_norm": 0.7234559568834276, + "learning_rate": 4.994669250496035e-06, + "loss": 0.3117, + "step": 2142 + }, + { + "epoch": 0.1, + "grad_norm": 0.7420546242600153, + "learning_rate": 4.994656865027041e-06, + "loss": 0.3263, + "step": 2143 + }, + { + "epoch": 0.1, + "grad_norm": 0.6878873848680856, + "learning_rate": 4.994644465201924e-06, + "loss": 0.316, + "step": 2144 + }, + { + "epoch": 0.1, + "grad_norm": 0.7130448330807805, + "learning_rate": 4.994632051020755e-06, + "loss": 0.316, + "step": 2145 + }, + { + "epoch": 0.1, + "grad_norm": 0.6985326407386208, + "learning_rate": 4.994619622483605e-06, + "loss": 0.3176, + "step": 2146 + }, + { + "epoch": 0.1, + "grad_norm": 0.6954824915563703, + "learning_rate": 4.994607179590546e-06, + "loss": 0.3202, + "step": 2147 + }, + { + "epoch": 0.1, + "grad_norm": 0.701608322872022, + "learning_rate": 4.9945947223416504e-06, + "loss": 0.3279, + "step": 2148 + }, + { + "epoch": 0.1, + "grad_norm": 0.7647580945393683, + "learning_rate": 4.994582250736989e-06, + "loss": 0.3218, + "step": 2149 + }, + { + "epoch": 0.1, + "grad_norm": 0.7116139743876687, + "learning_rate": 4.994569764776633e-06, + "loss": 0.3093, + "step": 2150 + }, + { + "epoch": 0.1, + "grad_norm": 0.7408421617761687, + "learning_rate": 4.994557264460656e-06, + "loss": 0.331, + "step": 2151 + }, + { + "epoch": 0.1, + "grad_norm": 0.7621433107967988, + "learning_rate": 4.994544749789129e-06, + "loss": 0.3443, + "step": 2152 + }, + { + "epoch": 0.1, + "grad_norm": 0.6856200874398263, + "learning_rate": 4.994532220762123e-06, + "loss": 0.3129, + "step": 2153 + }, + { + "epoch": 0.1, + "grad_norm": 0.6602667227673957, + "learning_rate": 4.994519677379711e-06, + "loss": 0.3036, + "step": 2154 + }, + { + "epoch": 0.1, + "grad_norm": 0.6575252306991487, + "learning_rate": 4.994507119641966e-06, + "loss": 0.3065, + "step": 2155 + }, + { + "epoch": 0.1, + "grad_norm": 0.6462179327425783, + "learning_rate": 4.9944945475489595e-06, + "loss": 0.3046, + "step": 2156 + }, + { + "epoch": 0.1, + "grad_norm": 0.8124030652977513, + "learning_rate": 4.9944819611007625e-06, + "loss": 0.328, + "step": 2157 + }, + { + "epoch": 0.1, + "grad_norm": 0.7396438437035795, + "learning_rate": 4.99446936029745e-06, + "loss": 0.3162, + "step": 2158 + }, + { + "epoch": 0.1, + "grad_norm": 0.7019138256179622, + "learning_rate": 4.994456745139093e-06, + "loss": 0.3488, + "step": 2159 + }, + { + "epoch": 0.1, + "grad_norm": 0.6706820652537383, + "learning_rate": 4.994444115625765e-06, + "loss": 0.3183, + "step": 2160 + }, + { + "epoch": 0.1, + "grad_norm": 0.6478576608051356, + "learning_rate": 4.9944314717575375e-06, + "loss": 0.2835, + "step": 2161 + }, + { + "epoch": 0.1, + "grad_norm": 0.760462593606468, + "learning_rate": 4.994418813534484e-06, + "loss": 0.3254, + "step": 2162 + }, + { + "epoch": 0.1, + "grad_norm": 0.6833273975277072, + "learning_rate": 4.994406140956677e-06, + "loss": 0.3048, + "step": 2163 + }, + { + "epoch": 0.1, + "grad_norm": 0.6962533105832606, + "learning_rate": 4.99439345402419e-06, + "loss": 0.3041, + "step": 2164 + }, + { + "epoch": 0.1, + "grad_norm": 0.641555512002947, + "learning_rate": 4.9943807527370945e-06, + "loss": 0.303, + "step": 2165 + }, + { + "epoch": 0.1, + "grad_norm": 0.7115814791514812, + "learning_rate": 4.994368037095466e-06, + "loss": 0.3173, + "step": 2166 + }, + { + "epoch": 0.1, + "grad_norm": 0.68661756878424, + "learning_rate": 4.994355307099375e-06, + "loss": 0.2896, + "step": 2167 + }, + { + "epoch": 0.1, + "grad_norm": 0.7005308344940274, + "learning_rate": 4.994342562748897e-06, + "loss": 0.3178, + "step": 2168 + }, + { + "epoch": 0.1, + "grad_norm": 0.7598988318670659, + "learning_rate": 4.994329804044105e-06, + "loss": 0.3271, + "step": 2169 + }, + { + "epoch": 0.1, + "grad_norm": 0.7648446254554273, + "learning_rate": 4.994317030985071e-06, + "loss": 0.2999, + "step": 2170 + }, + { + "epoch": 0.1, + "grad_norm": 0.7334109877095383, + "learning_rate": 4.99430424357187e-06, + "loss": 0.3137, + "step": 2171 + }, + { + "epoch": 0.1, + "grad_norm": 0.747867947590961, + "learning_rate": 4.994291441804575e-06, + "loss": 0.3125, + "step": 2172 + }, + { + "epoch": 0.1, + "grad_norm": 0.7601727862740293, + "learning_rate": 4.99427862568326e-06, + "loss": 0.3288, + "step": 2173 + }, + { + "epoch": 0.1, + "grad_norm": 0.6362727188523271, + "learning_rate": 4.994265795207998e-06, + "loss": 0.2884, + "step": 2174 + }, + { + "epoch": 0.1, + "grad_norm": 0.6996284726742361, + "learning_rate": 4.994252950378863e-06, + "loss": 0.2881, + "step": 2175 + }, + { + "epoch": 0.1, + "grad_norm": 0.7385240875157256, + "learning_rate": 4.994240091195929e-06, + "loss": 0.318, + "step": 2176 + }, + { + "epoch": 0.1, + "grad_norm": 0.669260171942932, + "learning_rate": 4.994227217659271e-06, + "loss": 0.2902, + "step": 2177 + }, + { + "epoch": 0.1, + "grad_norm": 1.0025843335596905, + "learning_rate": 4.994214329768961e-06, + "loss": 0.3365, + "step": 2178 + }, + { + "epoch": 0.1, + "grad_norm": 0.7006552906041222, + "learning_rate": 4.994201427525075e-06, + "loss": 0.3127, + "step": 2179 + }, + { + "epoch": 0.1, + "grad_norm": 0.7039834708528693, + "learning_rate": 4.994188510927687e-06, + "loss": 0.3154, + "step": 2180 + }, + { + "epoch": 0.1, + "grad_norm": 0.7259054005192246, + "learning_rate": 4.994175579976871e-06, + "loss": 0.3283, + "step": 2181 + }, + { + "epoch": 0.1, + "grad_norm": 0.6973919891396498, + "learning_rate": 4.994162634672701e-06, + "loss": 0.3392, + "step": 2182 + }, + { + "epoch": 0.1, + "grad_norm": 0.6791034522532114, + "learning_rate": 4.994149675015253e-06, + "loss": 0.3203, + "step": 2183 + }, + { + "epoch": 0.1, + "grad_norm": 0.6965934656143554, + "learning_rate": 4.9941367010046e-06, + "loss": 0.3107, + "step": 2184 + }, + { + "epoch": 0.1, + "grad_norm": 0.6675039819467468, + "learning_rate": 4.994123712640816e-06, + "loss": 0.32, + "step": 2185 + }, + { + "epoch": 0.1, + "grad_norm": 0.7316109438004123, + "learning_rate": 4.994110709923978e-06, + "loss": 0.3202, + "step": 2186 + }, + { + "epoch": 0.1, + "grad_norm": 0.6983565797571897, + "learning_rate": 4.99409769285416e-06, + "loss": 0.3081, + "step": 2187 + }, + { + "epoch": 0.1, + "grad_norm": 0.7412088874437971, + "learning_rate": 4.994084661431436e-06, + "loss": 0.3325, + "step": 2188 + }, + { + "epoch": 0.1, + "grad_norm": 0.6724119608299791, + "learning_rate": 4.9940716156558816e-06, + "loss": 0.3021, + "step": 2189 + }, + { + "epoch": 0.1, + "grad_norm": 0.695833221887371, + "learning_rate": 4.994058555527573e-06, + "loss": 0.3057, + "step": 2190 + }, + { + "epoch": 0.1, + "grad_norm": 0.6749668305608559, + "learning_rate": 4.994045481046582e-06, + "loss": 0.3174, + "step": 2191 + }, + { + "epoch": 0.1, + "grad_norm": 0.7420199556874306, + "learning_rate": 4.994032392212988e-06, + "loss": 0.3233, + "step": 2192 + }, + { + "epoch": 0.1, + "grad_norm": 0.6988257975609354, + "learning_rate": 4.9940192890268644e-06, + "loss": 0.3333, + "step": 2193 + }, + { + "epoch": 0.1, + "grad_norm": 0.6665813867609155, + "learning_rate": 4.994006171488286e-06, + "loss": 0.3095, + "step": 2194 + }, + { + "epoch": 0.1, + "grad_norm": 0.6703943285313823, + "learning_rate": 4.993993039597329e-06, + "loss": 0.3133, + "step": 2195 + }, + { + "epoch": 0.1, + "grad_norm": 0.692437597938566, + "learning_rate": 4.993979893354069e-06, + "loss": 0.333, + "step": 2196 + }, + { + "epoch": 0.1, + "grad_norm": 0.6874342900592044, + "learning_rate": 4.9939667327585815e-06, + "loss": 0.3311, + "step": 2197 + }, + { + "epoch": 0.1, + "grad_norm": 0.6833858980788423, + "learning_rate": 4.9939535578109425e-06, + "loss": 0.3231, + "step": 2198 + }, + { + "epoch": 0.1, + "grad_norm": 0.7434103746505378, + "learning_rate": 4.993940368511227e-06, + "loss": 0.2983, + "step": 2199 + }, + { + "epoch": 0.1, + "grad_norm": 0.7739676984897637, + "learning_rate": 4.993927164859512e-06, + "loss": 0.2952, + "step": 2200 + }, + { + "epoch": 0.1, + "grad_norm": 0.6969256187177068, + "learning_rate": 4.9939139468558736e-06, + "loss": 0.316, + "step": 2201 + }, + { + "epoch": 0.1, + "grad_norm": 0.6859521621412211, + "learning_rate": 4.993900714500386e-06, + "loss": 0.3172, + "step": 2202 + }, + { + "epoch": 0.1, + "grad_norm": 0.7804255329858026, + "learning_rate": 4.993887467793128e-06, + "loss": 0.3146, + "step": 2203 + }, + { + "epoch": 0.1, + "grad_norm": 0.6985589275901173, + "learning_rate": 4.993874206734173e-06, + "loss": 0.2983, + "step": 2204 + }, + { + "epoch": 0.1, + "grad_norm": 0.7170707568384548, + "learning_rate": 4.9938609313236e-06, + "loss": 0.308, + "step": 2205 + }, + { + "epoch": 0.1, + "grad_norm": 0.7964921598317342, + "learning_rate": 4.993847641561484e-06, + "loss": 0.3166, + "step": 2206 + }, + { + "epoch": 0.1, + "grad_norm": 0.7050736722252827, + "learning_rate": 4.993834337447901e-06, + "loss": 0.33, + "step": 2207 + }, + { + "epoch": 0.1, + "grad_norm": 0.7610705110272393, + "learning_rate": 4.993821018982928e-06, + "loss": 0.3261, + "step": 2208 + }, + { + "epoch": 0.1, + "grad_norm": 0.734991062645969, + "learning_rate": 4.9938076861666415e-06, + "loss": 0.33, + "step": 2209 + }, + { + "epoch": 0.1, + "grad_norm": 0.6732122236954349, + "learning_rate": 4.993794338999119e-06, + "loss": 0.3074, + "step": 2210 + }, + { + "epoch": 0.1, + "grad_norm": 0.7071262311859321, + "learning_rate": 4.993780977480438e-06, + "loss": 0.3189, + "step": 2211 + }, + { + "epoch": 0.1, + "grad_norm": 0.6950009398343167, + "learning_rate": 4.9937676016106735e-06, + "loss": 0.2956, + "step": 2212 + }, + { + "epoch": 0.1, + "grad_norm": 0.6155565621668154, + "learning_rate": 4.993754211389903e-06, + "loss": 0.2993, + "step": 2213 + }, + { + "epoch": 0.1, + "grad_norm": 0.7305084235499043, + "learning_rate": 4.9937408068182035e-06, + "loss": 0.3242, + "step": 2214 + }, + { + "epoch": 0.1, + "grad_norm": 0.7397057538069514, + "learning_rate": 4.993727387895653e-06, + "loss": 0.3198, + "step": 2215 + }, + { + "epoch": 0.1, + "grad_norm": 0.6357282792159281, + "learning_rate": 4.993713954622328e-06, + "loss": 0.296, + "step": 2216 + }, + { + "epoch": 0.1, + "grad_norm": 0.6824129953292024, + "learning_rate": 4.993700506998306e-06, + "loss": 0.3029, + "step": 2217 + }, + { + "epoch": 0.1, + "grad_norm": 0.7788433004460239, + "learning_rate": 4.993687045023665e-06, + "loss": 0.3392, + "step": 2218 + }, + { + "epoch": 0.1, + "grad_norm": 0.6742656407116071, + "learning_rate": 4.99367356869848e-06, + "loss": 0.2862, + "step": 2219 + }, + { + "epoch": 0.1, + "grad_norm": 0.7113659462078031, + "learning_rate": 4.993660078022833e-06, + "loss": 0.2817, + "step": 2220 + }, + { + "epoch": 0.1, + "grad_norm": 0.6844971535747861, + "learning_rate": 4.993646572996797e-06, + "loss": 0.3331, + "step": 2221 + }, + { + "epoch": 0.1, + "grad_norm": 0.6945151599053865, + "learning_rate": 4.993633053620453e-06, + "loss": 0.3085, + "step": 2222 + }, + { + "epoch": 0.1, + "grad_norm": 0.7406367580760873, + "learning_rate": 4.9936195198938765e-06, + "loss": 0.319, + "step": 2223 + }, + { + "epoch": 0.1, + "grad_norm": 0.6724021638834367, + "learning_rate": 4.993605971817146e-06, + "loss": 0.3046, + "step": 2224 + }, + { + "epoch": 0.1, + "grad_norm": 0.6627687184810518, + "learning_rate": 4.9935924093903405e-06, + "loss": 0.2992, + "step": 2225 + }, + { + "epoch": 0.1, + "grad_norm": 0.6929860023223073, + "learning_rate": 4.993578832613538e-06, + "loss": 0.2915, + "step": 2226 + }, + { + "epoch": 0.1, + "grad_norm": 0.7100248967999525, + "learning_rate": 4.993565241486816e-06, + "loss": 0.3165, + "step": 2227 + }, + { + "epoch": 0.1, + "grad_norm": 0.7277900674031972, + "learning_rate": 4.993551636010252e-06, + "loss": 0.312, + "step": 2228 + }, + { + "epoch": 0.1, + "grad_norm": 0.7366157787775478, + "learning_rate": 4.993538016183925e-06, + "loss": 0.3296, + "step": 2229 + }, + { + "epoch": 0.1, + "grad_norm": 0.6765847070967917, + "learning_rate": 4.993524382007914e-06, + "loss": 0.3103, + "step": 2230 + }, + { + "epoch": 0.1, + "grad_norm": 0.7146617125777828, + "learning_rate": 4.993510733482297e-06, + "loss": 0.3308, + "step": 2231 + }, + { + "epoch": 0.1, + "grad_norm": 0.7869348245946277, + "learning_rate": 4.993497070607152e-06, + "loss": 0.335, + "step": 2232 + }, + { + "epoch": 0.1, + "grad_norm": 0.7265530052096573, + "learning_rate": 4.993483393382558e-06, + "loss": 0.3313, + "step": 2233 + }, + { + "epoch": 0.1, + "grad_norm": 0.6971658373676896, + "learning_rate": 4.993469701808594e-06, + "loss": 0.3188, + "step": 2234 + }, + { + "epoch": 0.1, + "grad_norm": 0.7306551726369791, + "learning_rate": 4.9934559958853394e-06, + "loss": 0.3301, + "step": 2235 + }, + { + "epoch": 0.1, + "grad_norm": 0.7740126060600911, + "learning_rate": 4.993442275612871e-06, + "loss": 0.3308, + "step": 2236 + }, + { + "epoch": 0.1, + "grad_norm": 0.7221425559266359, + "learning_rate": 4.99342854099127e-06, + "loss": 0.3268, + "step": 2237 + }, + { + "epoch": 0.1, + "grad_norm": 0.7245485401590184, + "learning_rate": 4.993414792020613e-06, + "loss": 0.3093, + "step": 2238 + }, + { + "epoch": 0.1, + "grad_norm": 0.7273764206174397, + "learning_rate": 4.993401028700982e-06, + "loss": 0.3177, + "step": 2239 + }, + { + "epoch": 0.1, + "grad_norm": 0.7053459713265139, + "learning_rate": 4.993387251032454e-06, + "loss": 0.3182, + "step": 2240 + }, + { + "epoch": 0.1, + "grad_norm": 0.7234082567388366, + "learning_rate": 4.993373459015109e-06, + "loss": 0.3091, + "step": 2241 + }, + { + "epoch": 0.11, + "grad_norm": 0.7495635265639762, + "learning_rate": 4.993359652649027e-06, + "loss": 0.3108, + "step": 2242 + }, + { + "epoch": 0.11, + "grad_norm": 0.7166320595590765, + "learning_rate": 4.9933458319342875e-06, + "loss": 0.3283, + "step": 2243 + }, + { + "epoch": 0.11, + "grad_norm": 0.8850850695062106, + "learning_rate": 4.993331996870968e-06, + "loss": 0.3351, + "step": 2244 + }, + { + "epoch": 0.11, + "grad_norm": 0.7917277597578298, + "learning_rate": 4.993318147459151e-06, + "loss": 0.3252, + "step": 2245 + }, + { + "epoch": 0.11, + "grad_norm": 0.7150360920753059, + "learning_rate": 4.993304283698913e-06, + "loss": 0.2958, + "step": 2246 + }, + { + "epoch": 0.11, + "grad_norm": 0.6657136868905353, + "learning_rate": 4.993290405590336e-06, + "loss": 0.3073, + "step": 2247 + }, + { + "epoch": 0.11, + "grad_norm": 0.8374345387737329, + "learning_rate": 4.9932765131335e-06, + "loss": 0.3318, + "step": 2248 + }, + { + "epoch": 0.11, + "grad_norm": 0.8109352436324987, + "learning_rate": 4.993262606328485e-06, + "loss": 0.3372, + "step": 2249 + }, + { + "epoch": 0.11, + "grad_norm": 0.7110167483930909, + "learning_rate": 4.993248685175369e-06, + "loss": 0.3015, + "step": 2250 + }, + { + "epoch": 0.11, + "grad_norm": 0.6705980218774601, + "learning_rate": 4.993234749674234e-06, + "loss": 0.3021, + "step": 2251 + }, + { + "epoch": 0.11, + "grad_norm": 0.7649668845937174, + "learning_rate": 4.993220799825159e-06, + "loss": 0.3133, + "step": 2252 + }, + { + "epoch": 0.11, + "grad_norm": 0.7063203103546492, + "learning_rate": 4.993206835628226e-06, + "loss": 0.3021, + "step": 2253 + }, + { + "epoch": 0.11, + "grad_norm": 0.7032101296035645, + "learning_rate": 4.993192857083514e-06, + "loss": 0.2989, + "step": 2254 + }, + { + "epoch": 0.11, + "grad_norm": 0.7492453099052793, + "learning_rate": 4.993178864191104e-06, + "loss": 0.2988, + "step": 2255 + }, + { + "epoch": 0.11, + "grad_norm": 0.7574169509547419, + "learning_rate": 4.993164856951075e-06, + "loss": 0.3201, + "step": 2256 + }, + { + "epoch": 0.11, + "grad_norm": 0.6378415681054531, + "learning_rate": 4.99315083536351e-06, + "loss": 0.2817, + "step": 2257 + }, + { + "epoch": 0.11, + "grad_norm": 0.6757547787540222, + "learning_rate": 4.9931367994284876e-06, + "loss": 0.3119, + "step": 2258 + }, + { + "epoch": 0.11, + "grad_norm": 0.7560837298522554, + "learning_rate": 4.993122749146091e-06, + "loss": 0.3295, + "step": 2259 + }, + { + "epoch": 0.11, + "grad_norm": 0.692972514988318, + "learning_rate": 4.993108684516398e-06, + "loss": 0.2929, + "step": 2260 + }, + { + "epoch": 0.11, + "grad_norm": 0.6936460484408653, + "learning_rate": 4.9930946055394926e-06, + "loss": 0.3305, + "step": 2261 + }, + { + "epoch": 0.11, + "grad_norm": 0.6481165917210637, + "learning_rate": 4.993080512215453e-06, + "loss": 0.2981, + "step": 2262 + }, + { + "epoch": 0.11, + "grad_norm": 0.6809514223546914, + "learning_rate": 4.993066404544364e-06, + "loss": 0.3086, + "step": 2263 + }, + { + "epoch": 0.11, + "grad_norm": 0.7204052503572076, + "learning_rate": 4.993052282526301e-06, + "loss": 0.3258, + "step": 2264 + }, + { + "epoch": 0.11, + "grad_norm": 0.6991208263004631, + "learning_rate": 4.993038146161352e-06, + "loss": 0.3158, + "step": 2265 + }, + { + "epoch": 0.11, + "grad_norm": 0.7100982069896482, + "learning_rate": 4.993023995449593e-06, + "loss": 0.3113, + "step": 2266 + }, + { + "epoch": 0.11, + "grad_norm": 0.6365664930994619, + "learning_rate": 4.993009830391108e-06, + "loss": 0.2961, + "step": 2267 + }, + { + "epoch": 0.11, + "grad_norm": 0.6580170277111368, + "learning_rate": 4.992995650985978e-06, + "loss": 0.3346, + "step": 2268 + }, + { + "epoch": 0.11, + "grad_norm": 0.7343050502685629, + "learning_rate": 4.992981457234285e-06, + "loss": 0.3427, + "step": 2269 + }, + { + "epoch": 0.11, + "grad_norm": 0.691930159512099, + "learning_rate": 4.99296724913611e-06, + "loss": 0.3059, + "step": 2270 + }, + { + "epoch": 0.11, + "grad_norm": 0.6525299694798655, + "learning_rate": 4.9929530266915354e-06, + "loss": 0.3015, + "step": 2271 + }, + { + "epoch": 0.11, + "grad_norm": 0.7730659388975933, + "learning_rate": 4.992938789900643e-06, + "loss": 0.3085, + "step": 2272 + }, + { + "epoch": 0.11, + "grad_norm": 0.6940638256706138, + "learning_rate": 4.992924538763514e-06, + "loss": 0.3132, + "step": 2273 + }, + { + "epoch": 0.11, + "grad_norm": 0.6960520388387565, + "learning_rate": 4.99291027328023e-06, + "loss": 0.3076, + "step": 2274 + }, + { + "epoch": 0.11, + "grad_norm": 0.6665392473283404, + "learning_rate": 4.992895993450875e-06, + "loss": 0.312, + "step": 2275 + }, + { + "epoch": 0.11, + "grad_norm": 0.7194099922016544, + "learning_rate": 4.9928816992755295e-06, + "loss": 0.3063, + "step": 2276 + }, + { + "epoch": 0.11, + "grad_norm": 0.6765340393796643, + "learning_rate": 4.992867390754277e-06, + "loss": 0.301, + "step": 2277 + }, + { + "epoch": 0.11, + "grad_norm": 0.7062733478059953, + "learning_rate": 4.992853067887199e-06, + "loss": 0.3285, + "step": 2278 + }, + { + "epoch": 0.11, + "grad_norm": 0.6897313106103454, + "learning_rate": 4.992838730674378e-06, + "loss": 0.3201, + "step": 2279 + }, + { + "epoch": 0.11, + "grad_norm": 0.6723503332630821, + "learning_rate": 4.992824379115897e-06, + "loss": 0.3056, + "step": 2280 + }, + { + "epoch": 0.11, + "grad_norm": 0.7431548392919325, + "learning_rate": 4.9928100132118375e-06, + "loss": 0.3375, + "step": 2281 + }, + { + "epoch": 0.11, + "grad_norm": 0.677762901903657, + "learning_rate": 4.992795632962284e-06, + "loss": 0.2836, + "step": 2282 + }, + { + "epoch": 0.11, + "grad_norm": 0.6503899565774751, + "learning_rate": 4.9927812383673165e-06, + "loss": 0.298, + "step": 2283 + }, + { + "epoch": 0.11, + "grad_norm": 0.7665934127613793, + "learning_rate": 4.992766829427021e-06, + "loss": 0.3436, + "step": 2284 + }, + { + "epoch": 0.11, + "grad_norm": 0.6748924869440155, + "learning_rate": 4.992752406141479e-06, + "loss": 0.2985, + "step": 2285 + }, + { + "epoch": 0.11, + "grad_norm": 0.6877861607090004, + "learning_rate": 4.992737968510772e-06, + "loss": 0.3149, + "step": 2286 + }, + { + "epoch": 0.11, + "grad_norm": 0.6688569464827442, + "learning_rate": 4.992723516534987e-06, + "loss": 0.302, + "step": 2287 + }, + { + "epoch": 0.11, + "grad_norm": 0.696719472079597, + "learning_rate": 4.992709050214202e-06, + "loss": 0.3065, + "step": 2288 + }, + { + "epoch": 0.11, + "grad_norm": 0.6944017823222013, + "learning_rate": 4.992694569548504e-06, + "loss": 0.3169, + "step": 2289 + }, + { + "epoch": 0.11, + "grad_norm": 0.7315030760475426, + "learning_rate": 4.992680074537975e-06, + "loss": 0.3165, + "step": 2290 + }, + { + "epoch": 0.11, + "grad_norm": 0.610032267113296, + "learning_rate": 4.9926655651827e-06, + "loss": 0.2777, + "step": 2291 + }, + { + "epoch": 0.11, + "grad_norm": 0.7708883070201884, + "learning_rate": 4.99265104148276e-06, + "loss": 0.337, + "step": 2292 + }, + { + "epoch": 0.11, + "grad_norm": 0.6818760285280581, + "learning_rate": 4.99263650343824e-06, + "loss": 0.3499, + "step": 2293 + }, + { + "epoch": 0.11, + "grad_norm": 0.6986964743350761, + "learning_rate": 4.992621951049224e-06, + "loss": 0.3076, + "step": 2294 + }, + { + "epoch": 0.11, + "grad_norm": 0.7481203506009354, + "learning_rate": 4.992607384315794e-06, + "loss": 0.3286, + "step": 2295 + }, + { + "epoch": 0.11, + "grad_norm": 0.7720347470718703, + "learning_rate": 4.9925928032380354e-06, + "loss": 0.3321, + "step": 2296 + }, + { + "epoch": 0.11, + "grad_norm": 0.757292016494418, + "learning_rate": 4.992578207816032e-06, + "loss": 0.3163, + "step": 2297 + }, + { + "epoch": 0.11, + "grad_norm": 0.7110073644752205, + "learning_rate": 4.992563598049868e-06, + "loss": 0.3288, + "step": 2298 + }, + { + "epoch": 0.11, + "grad_norm": 0.7152599403741762, + "learning_rate": 4.992548973939626e-06, + "loss": 0.3102, + "step": 2299 + }, + { + "epoch": 0.11, + "grad_norm": 0.692028815189754, + "learning_rate": 4.992534335485392e-06, + "loss": 0.3073, + "step": 2300 + }, + { + "epoch": 0.11, + "grad_norm": 0.8033099931155931, + "learning_rate": 4.992519682687248e-06, + "loss": 0.3448, + "step": 2301 + }, + { + "epoch": 0.11, + "grad_norm": 0.6797939137603848, + "learning_rate": 4.99250501554528e-06, + "loss": 0.2985, + "step": 2302 + }, + { + "epoch": 0.11, + "grad_norm": 0.6742560224412237, + "learning_rate": 4.9924903340595735e-06, + "loss": 0.302, + "step": 2303 + }, + { + "epoch": 0.11, + "grad_norm": 0.7328918223619636, + "learning_rate": 4.992475638230211e-06, + "loss": 0.3243, + "step": 2304 + }, + { + "epoch": 0.11, + "grad_norm": 0.7505924700252627, + "learning_rate": 4.992460928057277e-06, + "loss": 0.329, + "step": 2305 + }, + { + "epoch": 0.11, + "grad_norm": 0.6587972791702824, + "learning_rate": 4.9924462035408575e-06, + "loss": 0.3018, + "step": 2306 + }, + { + "epoch": 0.11, + "grad_norm": 0.6555947232028648, + "learning_rate": 4.992431464681035e-06, + "loss": 0.3158, + "step": 2307 + }, + { + "epoch": 0.11, + "grad_norm": 0.7151405241381659, + "learning_rate": 4.9924167114778985e-06, + "loss": 0.3308, + "step": 2308 + }, + { + "epoch": 0.11, + "grad_norm": 0.6691702834061449, + "learning_rate": 4.9924019439315285e-06, + "loss": 0.3127, + "step": 2309 + }, + { + "epoch": 0.11, + "grad_norm": 0.6449015253995048, + "learning_rate": 4.9923871620420125e-06, + "loss": 0.2978, + "step": 2310 + }, + { + "epoch": 0.11, + "grad_norm": 0.7050535223033867, + "learning_rate": 4.992372365809434e-06, + "loss": 0.3112, + "step": 2311 + }, + { + "epoch": 0.11, + "grad_norm": 0.6868926430359059, + "learning_rate": 4.9923575552338795e-06, + "loss": 0.3113, + "step": 2312 + }, + { + "epoch": 0.11, + "grad_norm": 0.7387197743426941, + "learning_rate": 4.992342730315434e-06, + "loss": 0.321, + "step": 2313 + }, + { + "epoch": 0.11, + "grad_norm": 0.7705993541354828, + "learning_rate": 4.992327891054181e-06, + "loss": 0.329, + "step": 2314 + }, + { + "epoch": 0.11, + "grad_norm": 0.6720501330547278, + "learning_rate": 4.992313037450209e-06, + "loss": 0.3246, + "step": 2315 + }, + { + "epoch": 0.11, + "grad_norm": 0.7381405181936465, + "learning_rate": 4.992298169503602e-06, + "loss": 0.3207, + "step": 2316 + }, + { + "epoch": 0.11, + "grad_norm": 0.7057673304839985, + "learning_rate": 4.992283287214445e-06, + "loss": 0.3029, + "step": 2317 + }, + { + "epoch": 0.11, + "grad_norm": 0.6719595884223667, + "learning_rate": 4.992268390582824e-06, + "loss": 0.3154, + "step": 2318 + }, + { + "epoch": 0.11, + "grad_norm": 0.6597055091367027, + "learning_rate": 4.992253479608825e-06, + "loss": 0.308, + "step": 2319 + }, + { + "epoch": 0.11, + "grad_norm": 0.6546958248426524, + "learning_rate": 4.992238554292533e-06, + "loss": 0.2761, + "step": 2320 + }, + { + "epoch": 0.11, + "grad_norm": 0.6239507980727053, + "learning_rate": 4.992223614634035e-06, + "loss": 0.3043, + "step": 2321 + }, + { + "epoch": 0.11, + "grad_norm": 0.7106290758642707, + "learning_rate": 4.992208660633417e-06, + "loss": 0.3164, + "step": 2322 + }, + { + "epoch": 0.11, + "grad_norm": 0.7051291775465305, + "learning_rate": 4.992193692290764e-06, + "loss": 0.3225, + "step": 2323 + }, + { + "epoch": 0.11, + "grad_norm": 0.6713774641709707, + "learning_rate": 4.992178709606162e-06, + "loss": 0.3031, + "step": 2324 + }, + { + "epoch": 0.11, + "grad_norm": 0.6693995757714735, + "learning_rate": 4.9921637125797e-06, + "loss": 0.3324, + "step": 2325 + }, + { + "epoch": 0.11, + "grad_norm": 0.6524567316975368, + "learning_rate": 4.99214870121146e-06, + "loss": 0.3047, + "step": 2326 + }, + { + "epoch": 0.11, + "grad_norm": 0.698078899714943, + "learning_rate": 4.992133675501532e-06, + "loss": 0.3164, + "step": 2327 + }, + { + "epoch": 0.11, + "grad_norm": 0.888539611807526, + "learning_rate": 4.99211863545e-06, + "loss": 0.3295, + "step": 2328 + }, + { + "epoch": 0.11, + "grad_norm": 0.7000506449486106, + "learning_rate": 4.992103581056952e-06, + "loss": 0.2866, + "step": 2329 + }, + { + "epoch": 0.11, + "grad_norm": 0.7444297794929415, + "learning_rate": 4.992088512322475e-06, + "loss": 0.3238, + "step": 2330 + }, + { + "epoch": 0.11, + "grad_norm": 0.7458274475650195, + "learning_rate": 4.992073429246654e-06, + "loss": 0.3181, + "step": 2331 + }, + { + "epoch": 0.11, + "grad_norm": 0.6323066664941309, + "learning_rate": 4.992058331829577e-06, + "loss": 0.2798, + "step": 2332 + }, + { + "epoch": 0.11, + "grad_norm": 0.7505909304564012, + "learning_rate": 4.992043220071331e-06, + "loss": 0.3358, + "step": 2333 + }, + { + "epoch": 0.11, + "grad_norm": 0.6617940752007274, + "learning_rate": 4.9920280939720025e-06, + "loss": 0.3167, + "step": 2334 + }, + { + "epoch": 0.11, + "grad_norm": 0.7255140134913691, + "learning_rate": 4.992012953531679e-06, + "loss": 0.3356, + "step": 2335 + }, + { + "epoch": 0.11, + "grad_norm": 0.7313608026172641, + "learning_rate": 4.991997798750446e-06, + "loss": 0.331, + "step": 2336 + }, + { + "epoch": 0.11, + "grad_norm": 0.7197995545695549, + "learning_rate": 4.991982629628394e-06, + "loss": 0.3127, + "step": 2337 + }, + { + "epoch": 0.11, + "grad_norm": 0.7944081669770238, + "learning_rate": 4.9919674461656065e-06, + "loss": 0.3154, + "step": 2338 + }, + { + "epoch": 0.11, + "grad_norm": 0.6452866484250859, + "learning_rate": 4.991952248362174e-06, + "loss": 0.3018, + "step": 2339 + }, + { + "epoch": 0.11, + "grad_norm": 0.7016532431612533, + "learning_rate": 4.991937036218182e-06, + "loss": 0.3129, + "step": 2340 + }, + { + "epoch": 0.11, + "grad_norm": 0.7253054939790068, + "learning_rate": 4.991921809733719e-06, + "loss": 0.3238, + "step": 2341 + }, + { + "epoch": 0.11, + "grad_norm": 0.671797961635785, + "learning_rate": 4.991906568908871e-06, + "loss": 0.3099, + "step": 2342 + }, + { + "epoch": 0.11, + "grad_norm": 0.6971669627966228, + "learning_rate": 4.9918913137437285e-06, + "loss": 0.3167, + "step": 2343 + }, + { + "epoch": 0.11, + "grad_norm": 0.6996956752385861, + "learning_rate": 4.9918760442383775e-06, + "loss": 0.3039, + "step": 2344 + }, + { + "epoch": 0.11, + "grad_norm": 0.7637775735780707, + "learning_rate": 4.991860760392906e-06, + "loss": 0.3285, + "step": 2345 + }, + { + "epoch": 0.11, + "grad_norm": 0.6695367151225856, + "learning_rate": 4.991845462207402e-06, + "loss": 0.3146, + "step": 2346 + }, + { + "epoch": 0.11, + "grad_norm": 0.6701911452495309, + "learning_rate": 4.991830149681953e-06, + "loss": 0.2892, + "step": 2347 + }, + { + "epoch": 0.11, + "grad_norm": 0.7298845264268727, + "learning_rate": 4.991814822816649e-06, + "loss": 0.3006, + "step": 2348 + }, + { + "epoch": 0.11, + "grad_norm": 0.6813068191005663, + "learning_rate": 4.991799481611577e-06, + "loss": 0.3033, + "step": 2349 + }, + { + "epoch": 0.11, + "grad_norm": 0.6357524187198214, + "learning_rate": 4.9917841260668246e-06, + "loss": 0.271, + "step": 2350 + }, + { + "epoch": 0.11, + "grad_norm": 0.7229655725652769, + "learning_rate": 4.991768756182481e-06, + "loss": 0.3288, + "step": 2351 + }, + { + "epoch": 0.11, + "grad_norm": 0.7084478929282095, + "learning_rate": 4.991753371958634e-06, + "loss": 0.3203, + "step": 2352 + }, + { + "epoch": 0.11, + "grad_norm": 0.7148579284806731, + "learning_rate": 4.991737973395374e-06, + "loss": 0.2991, + "step": 2353 + }, + { + "epoch": 0.11, + "grad_norm": 0.7359865217126507, + "learning_rate": 4.991722560492787e-06, + "loss": 0.3189, + "step": 2354 + }, + { + "epoch": 0.11, + "grad_norm": 0.7909060039806096, + "learning_rate": 4.9917071332509635e-06, + "loss": 0.336, + "step": 2355 + }, + { + "epoch": 0.11, + "grad_norm": 0.6897327670455706, + "learning_rate": 4.991691691669992e-06, + "loss": 0.2932, + "step": 2356 + }, + { + "epoch": 0.11, + "grad_norm": 0.6820187845612918, + "learning_rate": 4.991676235749961e-06, + "loss": 0.3201, + "step": 2357 + }, + { + "epoch": 0.11, + "grad_norm": 0.7454837276729831, + "learning_rate": 4.991660765490959e-06, + "loss": 0.3247, + "step": 2358 + }, + { + "epoch": 0.11, + "grad_norm": 0.6688288760793715, + "learning_rate": 4.991645280893076e-06, + "loss": 0.2631, + "step": 2359 + }, + { + "epoch": 0.11, + "grad_norm": 0.6768849086601592, + "learning_rate": 4.9916297819563994e-06, + "loss": 0.3148, + "step": 2360 + }, + { + "epoch": 0.11, + "grad_norm": 0.677232258857392, + "learning_rate": 4.991614268681021e-06, + "loss": 0.2973, + "step": 2361 + }, + { + "epoch": 0.11, + "grad_norm": 0.6601725402809256, + "learning_rate": 4.9915987410670284e-06, + "loss": 0.2872, + "step": 2362 + }, + { + "epoch": 0.11, + "grad_norm": 0.6361593284239909, + "learning_rate": 4.991583199114512e-06, + "loss": 0.2928, + "step": 2363 + }, + { + "epoch": 0.11, + "grad_norm": 0.6741617474324861, + "learning_rate": 4.991567642823559e-06, + "loss": 0.3089, + "step": 2364 + }, + { + "epoch": 0.11, + "grad_norm": 0.7099619029348974, + "learning_rate": 4.9915520721942615e-06, + "loss": 0.3156, + "step": 2365 + }, + { + "epoch": 0.11, + "grad_norm": 0.6789990699107289, + "learning_rate": 4.991536487226708e-06, + "loss": 0.3071, + "step": 2366 + }, + { + "epoch": 0.11, + "grad_norm": 0.721690675395533, + "learning_rate": 4.991520887920988e-06, + "loss": 0.3246, + "step": 2367 + }, + { + "epoch": 0.11, + "grad_norm": 0.6574185880587174, + "learning_rate": 4.991505274277191e-06, + "loss": 0.3122, + "step": 2368 + }, + { + "epoch": 0.11, + "grad_norm": 0.6621068011224228, + "learning_rate": 4.991489646295408e-06, + "loss": 0.3115, + "step": 2369 + }, + { + "epoch": 0.11, + "grad_norm": 0.6493372574013051, + "learning_rate": 4.991474003975728e-06, + "loss": 0.3088, + "step": 2370 + }, + { + "epoch": 0.11, + "grad_norm": 0.7120702484689082, + "learning_rate": 4.991458347318242e-06, + "loss": 0.3081, + "step": 2371 + }, + { + "epoch": 0.11, + "grad_norm": 0.6750962596502782, + "learning_rate": 4.991442676323039e-06, + "loss": 0.3004, + "step": 2372 + }, + { + "epoch": 0.11, + "grad_norm": 0.7008837077440461, + "learning_rate": 4.9914269909902095e-06, + "loss": 0.3133, + "step": 2373 + }, + { + "epoch": 0.11, + "grad_norm": 0.6528575590803208, + "learning_rate": 4.991411291319844e-06, + "loss": 0.2891, + "step": 2374 + }, + { + "epoch": 0.11, + "grad_norm": 0.7705996595733008, + "learning_rate": 4.991395577312032e-06, + "loss": 0.3327, + "step": 2375 + }, + { + "epoch": 0.11, + "grad_norm": 0.7269728917693805, + "learning_rate": 4.991379848966865e-06, + "loss": 0.3004, + "step": 2376 + }, + { + "epoch": 0.11, + "grad_norm": 0.7448706850415588, + "learning_rate": 4.991364106284434e-06, + "loss": 0.3092, + "step": 2377 + }, + { + "epoch": 0.11, + "grad_norm": 0.8165393491509028, + "learning_rate": 4.991348349264828e-06, + "loss": 0.3212, + "step": 2378 + }, + { + "epoch": 0.11, + "grad_norm": 0.7428354847957275, + "learning_rate": 4.991332577908139e-06, + "loss": 0.3054, + "step": 2379 + }, + { + "epoch": 0.11, + "grad_norm": 0.7438945231178475, + "learning_rate": 4.991316792214457e-06, + "loss": 0.3209, + "step": 2380 + }, + { + "epoch": 0.11, + "grad_norm": 0.8520781927858059, + "learning_rate": 4.991300992183872e-06, + "loss": 0.3188, + "step": 2381 + }, + { + "epoch": 0.11, + "grad_norm": 0.7124170150301321, + "learning_rate": 4.991285177816477e-06, + "loss": 0.3234, + "step": 2382 + }, + { + "epoch": 0.11, + "grad_norm": 0.6964835284325164, + "learning_rate": 4.991269349112362e-06, + "loss": 0.3167, + "step": 2383 + }, + { + "epoch": 0.11, + "grad_norm": 0.7461094957710867, + "learning_rate": 4.991253506071617e-06, + "loss": 0.3055, + "step": 2384 + }, + { + "epoch": 0.11, + "grad_norm": 0.6907255095777912, + "learning_rate": 4.991237648694336e-06, + "loss": 0.3187, + "step": 2385 + }, + { + "epoch": 0.11, + "grad_norm": 0.6932633286159118, + "learning_rate": 4.9912217769806075e-06, + "loss": 0.3202, + "step": 2386 + }, + { + "epoch": 0.11, + "grad_norm": 0.7039605703267855, + "learning_rate": 4.991205890930523e-06, + "loss": 0.3281, + "step": 2387 + }, + { + "epoch": 0.11, + "grad_norm": 0.6807791819537211, + "learning_rate": 4.9911899905441755e-06, + "loss": 0.3191, + "step": 2388 + }, + { + "epoch": 0.11, + "grad_norm": 0.786009677736922, + "learning_rate": 4.9911740758216565e-06, + "loss": 0.3141, + "step": 2389 + }, + { + "epoch": 0.11, + "grad_norm": 0.6797531174513047, + "learning_rate": 4.991158146763056e-06, + "loss": 0.2922, + "step": 2390 + }, + { + "epoch": 0.11, + "grad_norm": 0.6751093304894836, + "learning_rate": 4.991142203368466e-06, + "loss": 0.3117, + "step": 2391 + }, + { + "epoch": 0.11, + "grad_norm": 0.6337425430019834, + "learning_rate": 4.991126245637979e-06, + "loss": 0.3005, + "step": 2392 + }, + { + "epoch": 0.11, + "grad_norm": 0.6929595558708915, + "learning_rate": 4.991110273571688e-06, + "loss": 0.323, + "step": 2393 + }, + { + "epoch": 0.11, + "grad_norm": 0.6865234828010763, + "learning_rate": 4.991094287169682e-06, + "loss": 0.2925, + "step": 2394 + }, + { + "epoch": 0.11, + "grad_norm": 0.6780430995504249, + "learning_rate": 4.991078286432055e-06, + "loss": 0.3196, + "step": 2395 + }, + { + "epoch": 0.11, + "grad_norm": 0.6942918987026917, + "learning_rate": 4.9910622713588984e-06, + "loss": 0.3369, + "step": 2396 + }, + { + "epoch": 0.11, + "grad_norm": 0.7310620079411794, + "learning_rate": 4.9910462419503046e-06, + "loss": 0.3288, + "step": 2397 + }, + { + "epoch": 0.11, + "grad_norm": 0.6888203463351311, + "learning_rate": 4.991030198206366e-06, + "loss": 0.2976, + "step": 2398 + }, + { + "epoch": 0.11, + "grad_norm": 0.7842437161730333, + "learning_rate": 4.991014140127174e-06, + "loss": 0.3274, + "step": 2399 + }, + { + "epoch": 0.11, + "grad_norm": 0.749123270010661, + "learning_rate": 4.990998067712822e-06, + "loss": 0.3327, + "step": 2400 + }, + { + "epoch": 0.11, + "grad_norm": 0.719409896295562, + "learning_rate": 4.990981980963403e-06, + "loss": 0.2994, + "step": 2401 + }, + { + "epoch": 0.11, + "grad_norm": 0.7951220034953331, + "learning_rate": 4.990965879879008e-06, + "loss": 0.3445, + "step": 2402 + }, + { + "epoch": 0.11, + "grad_norm": 0.721836173722458, + "learning_rate": 4.99094976445973e-06, + "loss": 0.3172, + "step": 2403 + }, + { + "epoch": 0.11, + "grad_norm": 0.7555709951764337, + "learning_rate": 4.990933634705664e-06, + "loss": 0.3343, + "step": 2404 + }, + { + "epoch": 0.11, + "grad_norm": 0.6905557378179727, + "learning_rate": 4.9909174906169e-06, + "loss": 0.3202, + "step": 2405 + }, + { + "epoch": 0.11, + "grad_norm": 0.7117924331328038, + "learning_rate": 4.990901332193531e-06, + "loss": 0.3047, + "step": 2406 + }, + { + "epoch": 0.11, + "grad_norm": 0.6883345771222072, + "learning_rate": 4.990885159435652e-06, + "loss": 0.3213, + "step": 2407 + }, + { + "epoch": 0.11, + "grad_norm": 0.6652981572634223, + "learning_rate": 4.990868972343355e-06, + "loss": 0.3221, + "step": 2408 + }, + { + "epoch": 0.11, + "grad_norm": 0.6757712346511, + "learning_rate": 4.990852770916732e-06, + "loss": 0.2932, + "step": 2409 + }, + { + "epoch": 0.11, + "grad_norm": 0.6772034831748057, + "learning_rate": 4.990836555155878e-06, + "loss": 0.3129, + "step": 2410 + }, + { + "epoch": 0.11, + "grad_norm": 0.7280216899954342, + "learning_rate": 4.990820325060885e-06, + "loss": 0.3253, + "step": 2411 + }, + { + "epoch": 0.11, + "grad_norm": 0.6773673022420167, + "learning_rate": 4.990804080631849e-06, + "loss": 0.3126, + "step": 2412 + }, + { + "epoch": 0.11, + "grad_norm": 0.6760054225962787, + "learning_rate": 4.99078782186886e-06, + "loss": 0.296, + "step": 2413 + }, + { + "epoch": 0.11, + "grad_norm": 0.6720381043238625, + "learning_rate": 4.990771548772013e-06, + "loss": 0.3028, + "step": 2414 + }, + { + "epoch": 0.11, + "grad_norm": 0.7723097984809802, + "learning_rate": 4.9907552613414025e-06, + "loss": 0.3259, + "step": 2415 + }, + { + "epoch": 0.11, + "grad_norm": 0.7394311932072927, + "learning_rate": 4.9907389595771216e-06, + "loss": 0.3173, + "step": 2416 + }, + { + "epoch": 0.11, + "grad_norm": 0.6352030267340605, + "learning_rate": 4.9907226434792635e-06, + "loss": 0.2863, + "step": 2417 + }, + { + "epoch": 0.11, + "grad_norm": 0.7096466164207457, + "learning_rate": 4.9907063130479224e-06, + "loss": 0.3123, + "step": 2418 + }, + { + "epoch": 0.11, + "grad_norm": 0.7416039343171802, + "learning_rate": 4.990689968283193e-06, + "loss": 0.2807, + "step": 2419 + }, + { + "epoch": 0.11, + "grad_norm": 0.7327699937476152, + "learning_rate": 4.99067360918517e-06, + "loss": 0.3228, + "step": 2420 + }, + { + "epoch": 0.11, + "grad_norm": 0.7519312929380826, + "learning_rate": 4.990657235753944e-06, + "loss": 0.3178, + "step": 2421 + }, + { + "epoch": 0.11, + "grad_norm": 0.7024147819767798, + "learning_rate": 4.990640847989613e-06, + "loss": 0.3038, + "step": 2422 + }, + { + "epoch": 0.11, + "grad_norm": 0.7246759675238348, + "learning_rate": 4.990624445892269e-06, + "loss": 0.3307, + "step": 2423 + }, + { + "epoch": 0.11, + "grad_norm": 0.6954254577988576, + "learning_rate": 4.9906080294620085e-06, + "loss": 0.3305, + "step": 2424 + }, + { + "epoch": 0.11, + "grad_norm": 0.6963221991779899, + "learning_rate": 4.990591598698924e-06, + "loss": 0.3174, + "step": 2425 + }, + { + "epoch": 0.11, + "grad_norm": 0.6771619604968555, + "learning_rate": 4.990575153603111e-06, + "loss": 0.32, + "step": 2426 + }, + { + "epoch": 0.11, + "grad_norm": 0.7430805252803147, + "learning_rate": 4.9905586941746645e-06, + "loss": 0.3154, + "step": 2427 + }, + { + "epoch": 0.11, + "grad_norm": 0.716425058116847, + "learning_rate": 4.9905422204136775e-06, + "loss": 0.3029, + "step": 2428 + }, + { + "epoch": 0.11, + "grad_norm": 0.7118726953540812, + "learning_rate": 4.9905257323202476e-06, + "loss": 0.3301, + "step": 2429 + }, + { + "epoch": 0.11, + "grad_norm": 0.681058723206773, + "learning_rate": 4.990509229894467e-06, + "loss": 0.3096, + "step": 2430 + }, + { + "epoch": 0.11, + "grad_norm": 0.7500763523758301, + "learning_rate": 4.9904927131364325e-06, + "loss": 0.3294, + "step": 2431 + }, + { + "epoch": 0.11, + "grad_norm": 0.7290818084187604, + "learning_rate": 4.990476182046237e-06, + "loss": 0.2924, + "step": 2432 + }, + { + "epoch": 0.11, + "grad_norm": 0.6623373402616922, + "learning_rate": 4.990459636623978e-06, + "loss": 0.2876, + "step": 2433 + }, + { + "epoch": 0.11, + "grad_norm": 0.6542355413879511, + "learning_rate": 4.99044307686975e-06, + "loss": 0.3432, + "step": 2434 + }, + { + "epoch": 0.11, + "grad_norm": 0.6821804382543978, + "learning_rate": 4.990426502783647e-06, + "loss": 0.3191, + "step": 2435 + }, + { + "epoch": 0.11, + "grad_norm": 0.6869901225718229, + "learning_rate": 4.9904099143657665e-06, + "loss": 0.3014, + "step": 2436 + }, + { + "epoch": 0.11, + "grad_norm": 0.7050173502934065, + "learning_rate": 4.990393311616203e-06, + "loss": 0.3337, + "step": 2437 + }, + { + "epoch": 0.11, + "grad_norm": 0.7412111033474408, + "learning_rate": 4.9903766945350504e-06, + "loss": 0.3161, + "step": 2438 + }, + { + "epoch": 0.11, + "grad_norm": 0.6839166372751112, + "learning_rate": 4.990360063122407e-06, + "loss": 0.2977, + "step": 2439 + }, + { + "epoch": 0.11, + "grad_norm": 0.6152042439889196, + "learning_rate": 4.990343417378367e-06, + "loss": 0.3046, + "step": 2440 + }, + { + "epoch": 0.11, + "grad_norm": 0.6776908385258404, + "learning_rate": 4.990326757303028e-06, + "loss": 0.3283, + "step": 2441 + }, + { + "epoch": 0.11, + "grad_norm": 0.7193089418854844, + "learning_rate": 4.990310082896482e-06, + "loss": 0.3276, + "step": 2442 + }, + { + "epoch": 0.11, + "grad_norm": 0.7311686006014344, + "learning_rate": 4.9902933941588295e-06, + "loss": 0.324, + "step": 2443 + }, + { + "epoch": 0.11, + "grad_norm": 0.7206160441567077, + "learning_rate": 4.990276691090164e-06, + "loss": 0.3212, + "step": 2444 + }, + { + "epoch": 0.11, + "grad_norm": 0.6370820867058148, + "learning_rate": 4.990259973690581e-06, + "loss": 0.3024, + "step": 2445 + }, + { + "epoch": 0.11, + "grad_norm": 0.6868634370742973, + "learning_rate": 4.9902432419601785e-06, + "loss": 0.3342, + "step": 2446 + }, + { + "epoch": 0.11, + "grad_norm": 0.6850334429575595, + "learning_rate": 4.990226495899051e-06, + "loss": 0.2956, + "step": 2447 + }, + { + "epoch": 0.11, + "grad_norm": 0.7445223705994136, + "learning_rate": 4.990209735507298e-06, + "loss": 0.3242, + "step": 2448 + }, + { + "epoch": 0.11, + "grad_norm": 0.645769400070969, + "learning_rate": 4.990192960785012e-06, + "loss": 0.3117, + "step": 2449 + }, + { + "epoch": 0.11, + "grad_norm": 0.7079208960635979, + "learning_rate": 4.9901761717322915e-06, + "loss": 0.3344, + "step": 2450 + }, + { + "epoch": 0.11, + "grad_norm": 0.7062945305080015, + "learning_rate": 4.9901593683492335e-06, + "loss": 0.3081, + "step": 2451 + }, + { + "epoch": 0.11, + "grad_norm": 0.6626775420637546, + "learning_rate": 4.990142550635935e-06, + "loss": 0.2944, + "step": 2452 + }, + { + "epoch": 0.11, + "grad_norm": 0.7352959493387669, + "learning_rate": 4.990125718592491e-06, + "loss": 0.328, + "step": 2453 + }, + { + "epoch": 0.11, + "grad_norm": 0.6362086369080926, + "learning_rate": 4.990108872218999e-06, + "loss": 0.2856, + "step": 2454 + }, + { + "epoch": 0.12, + "grad_norm": 0.6396669218227405, + "learning_rate": 4.990092011515557e-06, + "loss": 0.3113, + "step": 2455 + }, + { + "epoch": 0.12, + "grad_norm": 0.6733489005459731, + "learning_rate": 4.990075136482262e-06, + "loss": 0.3129, + "step": 2456 + }, + { + "epoch": 0.12, + "grad_norm": 0.6257268699810145, + "learning_rate": 4.9900582471192094e-06, + "loss": 0.2913, + "step": 2457 + }, + { + "epoch": 0.12, + "grad_norm": 0.6371716171989381, + "learning_rate": 4.990041343426498e-06, + "loss": 0.3092, + "step": 2458 + }, + { + "epoch": 0.12, + "grad_norm": 0.696602631101336, + "learning_rate": 4.990024425404224e-06, + "loss": 0.3331, + "step": 2459 + }, + { + "epoch": 0.12, + "grad_norm": 0.6779242016529771, + "learning_rate": 4.990007493052485e-06, + "loss": 0.2964, + "step": 2460 + }, + { + "epoch": 0.12, + "grad_norm": 0.6954440157597109, + "learning_rate": 4.98999054637138e-06, + "loss": 0.3144, + "step": 2461 + }, + { + "epoch": 0.12, + "grad_norm": 0.6498928432875791, + "learning_rate": 4.9899735853610045e-06, + "loss": 0.291, + "step": 2462 + }, + { + "epoch": 0.12, + "grad_norm": 0.6528936410616636, + "learning_rate": 4.989956610021457e-06, + "loss": 0.2993, + "step": 2463 + }, + { + "epoch": 0.12, + "grad_norm": 0.7000846016375903, + "learning_rate": 4.9899396203528345e-06, + "loss": 0.3127, + "step": 2464 + }, + { + "epoch": 0.12, + "grad_norm": 0.6875031672939942, + "learning_rate": 4.9899226163552365e-06, + "loss": 0.3134, + "step": 2465 + }, + { + "epoch": 0.12, + "grad_norm": 0.727083379518094, + "learning_rate": 4.989905598028758e-06, + "loss": 0.339, + "step": 2466 + }, + { + "epoch": 0.12, + "grad_norm": 0.7000746056193832, + "learning_rate": 4.989888565373499e-06, + "loss": 0.3213, + "step": 2467 + }, + { + "epoch": 0.12, + "grad_norm": 0.7793588631412453, + "learning_rate": 4.989871518389559e-06, + "loss": 0.3441, + "step": 2468 + }, + { + "epoch": 0.12, + "grad_norm": 0.8011989311858084, + "learning_rate": 4.989854457077032e-06, + "loss": 0.3102, + "step": 2469 + }, + { + "epoch": 0.12, + "grad_norm": 0.6605872717000557, + "learning_rate": 4.989837381436019e-06, + "loss": 0.2928, + "step": 2470 + }, + { + "epoch": 0.12, + "grad_norm": 0.6786356791759671, + "learning_rate": 4.989820291466619e-06, + "loss": 0.3187, + "step": 2471 + }, + { + "epoch": 0.12, + "grad_norm": 0.7766365549897377, + "learning_rate": 4.989803187168927e-06, + "loss": 0.3265, + "step": 2472 + }, + { + "epoch": 0.12, + "grad_norm": 0.7334446172750475, + "learning_rate": 4.989786068543045e-06, + "loss": 0.3256, + "step": 2473 + }, + { + "epoch": 0.12, + "grad_norm": 0.6696447409154154, + "learning_rate": 4.989768935589069e-06, + "loss": 0.3238, + "step": 2474 + }, + { + "epoch": 0.12, + "grad_norm": 0.6429021467295299, + "learning_rate": 4.989751788307099e-06, + "loss": 0.3032, + "step": 2475 + }, + { + "epoch": 0.12, + "grad_norm": 0.705010196258185, + "learning_rate": 4.9897346266972325e-06, + "loss": 0.2985, + "step": 2476 + }, + { + "epoch": 0.12, + "grad_norm": 0.6947774202436862, + "learning_rate": 4.9897174507595694e-06, + "loss": 0.2982, + "step": 2477 + }, + { + "epoch": 0.12, + "grad_norm": 0.7060260420794753, + "learning_rate": 4.9897002604942085e-06, + "loss": 0.2935, + "step": 2478 + }, + { + "epoch": 0.12, + "grad_norm": 0.6922017923641826, + "learning_rate": 4.989683055901248e-06, + "loss": 0.3047, + "step": 2479 + }, + { + "epoch": 0.12, + "grad_norm": 0.6887069945753617, + "learning_rate": 4.989665836980788e-06, + "loss": 0.3147, + "step": 2480 + }, + { + "epoch": 0.12, + "grad_norm": 0.7266362808214661, + "learning_rate": 4.9896486037329265e-06, + "loss": 0.3064, + "step": 2481 + }, + { + "epoch": 0.12, + "grad_norm": 0.6806720502535538, + "learning_rate": 4.989631356157763e-06, + "loss": 0.2995, + "step": 2482 + }, + { + "epoch": 0.12, + "grad_norm": 0.6357604900431841, + "learning_rate": 4.989614094255396e-06, + "loss": 0.3005, + "step": 2483 + }, + { + "epoch": 0.12, + "grad_norm": 0.7220973922029662, + "learning_rate": 4.989596818025926e-06, + "loss": 0.3113, + "step": 2484 + }, + { + "epoch": 0.12, + "grad_norm": 0.8159896570184343, + "learning_rate": 4.989579527469452e-06, + "loss": 0.3342, + "step": 2485 + }, + { + "epoch": 0.12, + "grad_norm": 0.6963448076079461, + "learning_rate": 4.989562222586074e-06, + "loss": 0.2898, + "step": 2486 + }, + { + "epoch": 0.12, + "grad_norm": 0.7240607355878613, + "learning_rate": 4.989544903375891e-06, + "loss": 0.3201, + "step": 2487 + }, + { + "epoch": 0.12, + "grad_norm": 0.7005062049090963, + "learning_rate": 4.989527569839004e-06, + "loss": 0.3255, + "step": 2488 + }, + { + "epoch": 0.12, + "grad_norm": 0.6637184193402471, + "learning_rate": 4.989510221975509e-06, + "loss": 0.2948, + "step": 2489 + }, + { + "epoch": 0.12, + "grad_norm": 0.6199790250294851, + "learning_rate": 4.9894928597855094e-06, + "loss": 0.2963, + "step": 2490 + }, + { + "epoch": 0.12, + "grad_norm": 0.6781961297577891, + "learning_rate": 4.989475483269105e-06, + "loss": 0.3148, + "step": 2491 + }, + { + "epoch": 0.12, + "grad_norm": 0.7177021818931334, + "learning_rate": 4.989458092426395e-06, + "loss": 0.3278, + "step": 2492 + }, + { + "epoch": 0.12, + "grad_norm": 0.7349709876228968, + "learning_rate": 4.9894406872574785e-06, + "loss": 0.313, + "step": 2493 + }, + { + "epoch": 0.12, + "grad_norm": 0.7485843933252829, + "learning_rate": 4.9894232677624565e-06, + "loss": 0.3271, + "step": 2494 + }, + { + "epoch": 0.12, + "grad_norm": 0.6808032525270925, + "learning_rate": 4.98940583394143e-06, + "loss": 0.2976, + "step": 2495 + }, + { + "epoch": 0.12, + "grad_norm": 0.7322569309205689, + "learning_rate": 4.989388385794498e-06, + "loss": 0.3071, + "step": 2496 + }, + { + "epoch": 0.12, + "grad_norm": 0.7241434145214645, + "learning_rate": 4.989370923321761e-06, + "loss": 0.3036, + "step": 2497 + }, + { + "epoch": 0.12, + "grad_norm": 0.7285213863259554, + "learning_rate": 4.989353446523321e-06, + "loss": 0.3078, + "step": 2498 + }, + { + "epoch": 0.12, + "grad_norm": 0.7034274665603591, + "learning_rate": 4.989335955399277e-06, + "loss": 0.318, + "step": 2499 + }, + { + "epoch": 0.12, + "grad_norm": 0.7176095544281921, + "learning_rate": 4.989318449949731e-06, + "loss": 0.3164, + "step": 2500 + }, + { + "epoch": 0.12, + "grad_norm": 0.7146056992606665, + "learning_rate": 4.989300930174782e-06, + "loss": 0.3191, + "step": 2501 + }, + { + "epoch": 0.12, + "grad_norm": 0.7558163196617218, + "learning_rate": 4.989283396074532e-06, + "loss": 0.2839, + "step": 2502 + }, + { + "epoch": 0.12, + "grad_norm": 0.6640074803033309, + "learning_rate": 4.989265847649083e-06, + "loss": 0.3045, + "step": 2503 + }, + { + "epoch": 0.12, + "grad_norm": 0.6621415936153033, + "learning_rate": 4.989248284898533e-06, + "loss": 0.2922, + "step": 2504 + }, + { + "epoch": 0.12, + "grad_norm": 0.7461213389579124, + "learning_rate": 4.989230707822987e-06, + "loss": 0.3059, + "step": 2505 + }, + { + "epoch": 0.12, + "grad_norm": 0.6492646422311312, + "learning_rate": 4.989213116422542e-06, + "loss": 0.2889, + "step": 2506 + }, + { + "epoch": 0.12, + "grad_norm": 0.7654323211498955, + "learning_rate": 4.989195510697301e-06, + "loss": 0.3273, + "step": 2507 + }, + { + "epoch": 0.12, + "grad_norm": 0.7315414843790528, + "learning_rate": 4.989177890647367e-06, + "loss": 0.3342, + "step": 2508 + }, + { + "epoch": 0.12, + "grad_norm": 0.7239423683488871, + "learning_rate": 4.989160256272838e-06, + "loss": 0.3165, + "step": 2509 + }, + { + "epoch": 0.12, + "grad_norm": 0.6768216599654596, + "learning_rate": 4.989142607573818e-06, + "loss": 0.2881, + "step": 2510 + }, + { + "epoch": 0.12, + "grad_norm": 0.7280642272290861, + "learning_rate": 4.989124944550409e-06, + "loss": 0.3023, + "step": 2511 + }, + { + "epoch": 0.12, + "grad_norm": 0.7725633416736322, + "learning_rate": 4.989107267202711e-06, + "loss": 0.312, + "step": 2512 + }, + { + "epoch": 0.12, + "grad_norm": 0.7439169526134014, + "learning_rate": 4.989089575530826e-06, + "loss": 0.3211, + "step": 2513 + }, + { + "epoch": 0.12, + "grad_norm": 0.6591187481624474, + "learning_rate": 4.989071869534855e-06, + "loss": 0.3018, + "step": 2514 + }, + { + "epoch": 0.12, + "grad_norm": 0.7126026215264374, + "learning_rate": 4.989054149214902e-06, + "loss": 0.3204, + "step": 2515 + }, + { + "epoch": 0.12, + "grad_norm": 0.7058506917968352, + "learning_rate": 4.989036414571069e-06, + "loss": 0.3182, + "step": 2516 + }, + { + "epoch": 0.12, + "grad_norm": 0.7294414778703925, + "learning_rate": 4.989018665603456e-06, + "loss": 0.3225, + "step": 2517 + }, + { + "epoch": 0.12, + "grad_norm": 0.6736178655906577, + "learning_rate": 4.9890009023121665e-06, + "loss": 0.2912, + "step": 2518 + }, + { + "epoch": 0.12, + "grad_norm": 0.6414317057357551, + "learning_rate": 4.988983124697302e-06, + "loss": 0.3108, + "step": 2519 + }, + { + "epoch": 0.12, + "grad_norm": 0.7158724428672455, + "learning_rate": 4.988965332758965e-06, + "loss": 0.3212, + "step": 2520 + }, + { + "epoch": 0.12, + "grad_norm": 0.7780529193125982, + "learning_rate": 4.988947526497259e-06, + "loss": 0.3455, + "step": 2521 + }, + { + "epoch": 0.12, + "grad_norm": 0.6800331029981759, + "learning_rate": 4.988929705912285e-06, + "loss": 0.288, + "step": 2522 + }, + { + "epoch": 0.12, + "grad_norm": 0.7240972275684893, + "learning_rate": 4.988911871004146e-06, + "loss": 0.3254, + "step": 2523 + }, + { + "epoch": 0.12, + "grad_norm": 0.667094931125728, + "learning_rate": 4.9888940217729455e-06, + "loss": 0.313, + "step": 2524 + }, + { + "epoch": 0.12, + "grad_norm": 0.6296732876094374, + "learning_rate": 4.988876158218784e-06, + "loss": 0.2945, + "step": 2525 + }, + { + "epoch": 0.12, + "grad_norm": 0.6972116115013175, + "learning_rate": 4.988858280341768e-06, + "loss": 0.3064, + "step": 2526 + }, + { + "epoch": 0.12, + "grad_norm": 0.7600692948760438, + "learning_rate": 4.988840388141996e-06, + "loss": 0.3174, + "step": 2527 + }, + { + "epoch": 0.12, + "grad_norm": 0.6363500903798379, + "learning_rate": 4.988822481619574e-06, + "loss": 0.3144, + "step": 2528 + }, + { + "epoch": 0.12, + "grad_norm": 0.682879179160212, + "learning_rate": 4.988804560774604e-06, + "loss": 0.2901, + "step": 2529 + }, + { + "epoch": 0.12, + "grad_norm": 0.7394581661341556, + "learning_rate": 4.988786625607189e-06, + "loss": 0.3323, + "step": 2530 + }, + { + "epoch": 0.12, + "grad_norm": 0.6629766631595073, + "learning_rate": 4.988768676117433e-06, + "loss": 0.3172, + "step": 2531 + }, + { + "epoch": 0.12, + "grad_norm": 0.7045164593225907, + "learning_rate": 4.988750712305439e-06, + "loss": 0.3285, + "step": 2532 + }, + { + "epoch": 0.12, + "grad_norm": 0.7254338123438372, + "learning_rate": 4.98873273417131e-06, + "loss": 0.309, + "step": 2533 + }, + { + "epoch": 0.12, + "grad_norm": 0.6428356914388451, + "learning_rate": 4.988714741715149e-06, + "loss": 0.3067, + "step": 2534 + }, + { + "epoch": 0.12, + "grad_norm": 0.664309588564329, + "learning_rate": 4.988696734937061e-06, + "loss": 0.2961, + "step": 2535 + }, + { + "epoch": 0.12, + "grad_norm": 0.6669117127094563, + "learning_rate": 4.9886787138371484e-06, + "loss": 0.3035, + "step": 2536 + }, + { + "epoch": 0.12, + "grad_norm": 0.7511399777127872, + "learning_rate": 4.9886606784155155e-06, + "loss": 0.3293, + "step": 2537 + }, + { + "epoch": 0.12, + "grad_norm": 0.7288270316485492, + "learning_rate": 4.9886426286722655e-06, + "loss": 0.3221, + "step": 2538 + }, + { + "epoch": 0.12, + "grad_norm": 0.6509824731651909, + "learning_rate": 4.988624564607503e-06, + "loss": 0.289, + "step": 2539 + }, + { + "epoch": 0.12, + "grad_norm": 0.7240302030652209, + "learning_rate": 4.988606486221331e-06, + "loss": 0.331, + "step": 2540 + }, + { + "epoch": 0.12, + "grad_norm": 0.6613089707463681, + "learning_rate": 4.988588393513855e-06, + "loss": 0.2856, + "step": 2541 + }, + { + "epoch": 0.12, + "grad_norm": 0.6839890284636726, + "learning_rate": 4.988570286485178e-06, + "loss": 0.2994, + "step": 2542 + }, + { + "epoch": 0.12, + "grad_norm": 0.7634488727069186, + "learning_rate": 4.988552165135405e-06, + "loss": 0.3138, + "step": 2543 + }, + { + "epoch": 0.12, + "grad_norm": 0.6823041377065954, + "learning_rate": 4.988534029464639e-06, + "loss": 0.3203, + "step": 2544 + }, + { + "epoch": 0.12, + "grad_norm": 0.7169827143959852, + "learning_rate": 4.988515879472985e-06, + "loss": 0.3059, + "step": 2545 + }, + { + "epoch": 0.12, + "grad_norm": 0.7882821539969701, + "learning_rate": 4.988497715160548e-06, + "loss": 0.309, + "step": 2546 + }, + { + "epoch": 0.12, + "grad_norm": 0.6611751633794805, + "learning_rate": 4.988479536527432e-06, + "loss": 0.3001, + "step": 2547 + }, + { + "epoch": 0.12, + "grad_norm": 0.7058031400985368, + "learning_rate": 4.988461343573742e-06, + "loss": 0.3263, + "step": 2548 + }, + { + "epoch": 0.12, + "grad_norm": 0.6739460038628883, + "learning_rate": 4.988443136299582e-06, + "loss": 0.2982, + "step": 2549 + }, + { + "epoch": 0.12, + "grad_norm": 0.6810796585611433, + "learning_rate": 4.9884249147050574e-06, + "loss": 0.3063, + "step": 2550 + }, + { + "epoch": 0.12, + "grad_norm": 0.8453776935210562, + "learning_rate": 4.988406678790273e-06, + "loss": 0.332, + "step": 2551 + }, + { + "epoch": 0.12, + "grad_norm": 0.7104137697601194, + "learning_rate": 4.988388428555334e-06, + "loss": 0.3184, + "step": 2552 + }, + { + "epoch": 0.12, + "grad_norm": 0.6695897818381483, + "learning_rate": 4.988370164000344e-06, + "loss": 0.2942, + "step": 2553 + }, + { + "epoch": 0.12, + "grad_norm": 0.7037197053675502, + "learning_rate": 4.988351885125409e-06, + "loss": 0.3234, + "step": 2554 + }, + { + "epoch": 0.12, + "grad_norm": 0.6175476162818516, + "learning_rate": 4.988333591930636e-06, + "loss": 0.2797, + "step": 2555 + }, + { + "epoch": 0.12, + "grad_norm": 0.6657174964668005, + "learning_rate": 4.988315284416127e-06, + "loss": 0.3066, + "step": 2556 + }, + { + "epoch": 0.12, + "grad_norm": 0.6854663531833657, + "learning_rate": 4.9882969625819895e-06, + "loss": 0.2863, + "step": 2557 + }, + { + "epoch": 0.12, + "grad_norm": 0.6368521711189166, + "learning_rate": 4.988278626428328e-06, + "loss": 0.299, + "step": 2558 + }, + { + "epoch": 0.12, + "grad_norm": 0.6756578010332885, + "learning_rate": 4.988260275955249e-06, + "loss": 0.297, + "step": 2559 + }, + { + "epoch": 0.12, + "grad_norm": 0.6973198181629864, + "learning_rate": 4.988241911162857e-06, + "loss": 0.322, + "step": 2560 + }, + { + "epoch": 0.12, + "grad_norm": 0.8070162723751143, + "learning_rate": 4.988223532051259e-06, + "loss": 0.329, + "step": 2561 + }, + { + "epoch": 0.12, + "grad_norm": 0.6869338541529664, + "learning_rate": 4.98820513862056e-06, + "loss": 0.333, + "step": 2562 + }, + { + "epoch": 0.12, + "grad_norm": 0.6734769524076113, + "learning_rate": 4.988186730870865e-06, + "loss": 0.2913, + "step": 2563 + }, + { + "epoch": 0.12, + "grad_norm": 0.7254299429037464, + "learning_rate": 4.9881683088022806e-06, + "loss": 0.3385, + "step": 2564 + }, + { + "epoch": 0.12, + "grad_norm": 0.6928674158872561, + "learning_rate": 4.988149872414913e-06, + "loss": 0.2941, + "step": 2565 + }, + { + "epoch": 0.12, + "grad_norm": 0.7142949103382032, + "learning_rate": 4.988131421708869e-06, + "loss": 0.314, + "step": 2566 + }, + { + "epoch": 0.12, + "grad_norm": 0.6936719970270498, + "learning_rate": 4.988112956684254e-06, + "loss": 0.302, + "step": 2567 + }, + { + "epoch": 0.12, + "grad_norm": 0.7064986790286651, + "learning_rate": 4.988094477341174e-06, + "loss": 0.2937, + "step": 2568 + }, + { + "epoch": 0.12, + "grad_norm": 0.6970369257487562, + "learning_rate": 4.988075983679736e-06, + "loss": 0.3041, + "step": 2569 + }, + { + "epoch": 0.12, + "grad_norm": 0.6736884227310062, + "learning_rate": 4.988057475700045e-06, + "loss": 0.3067, + "step": 2570 + }, + { + "epoch": 0.12, + "grad_norm": 0.7462941351635272, + "learning_rate": 4.988038953402209e-06, + "loss": 0.3149, + "step": 2571 + }, + { + "epoch": 0.12, + "grad_norm": 0.7112155117608525, + "learning_rate": 4.988020416786335e-06, + "loss": 0.3244, + "step": 2572 + }, + { + "epoch": 0.12, + "grad_norm": 0.6692027943550227, + "learning_rate": 4.988001865852528e-06, + "loss": 0.3098, + "step": 2573 + }, + { + "epoch": 0.12, + "grad_norm": 0.7273025266069253, + "learning_rate": 4.987983300600896e-06, + "loss": 0.312, + "step": 2574 + }, + { + "epoch": 0.12, + "grad_norm": 0.694223205081879, + "learning_rate": 4.987964721031545e-06, + "loss": 0.3188, + "step": 2575 + }, + { + "epoch": 0.12, + "grad_norm": 0.7545567873431591, + "learning_rate": 4.987946127144583e-06, + "loss": 0.3314, + "step": 2576 + }, + { + "epoch": 0.12, + "grad_norm": 0.6845139031587791, + "learning_rate": 4.987927518940116e-06, + "loss": 0.3098, + "step": 2577 + }, + { + "epoch": 0.12, + "grad_norm": 0.6575433088786312, + "learning_rate": 4.9879088964182515e-06, + "loss": 0.302, + "step": 2578 + }, + { + "epoch": 0.12, + "grad_norm": 0.7557713276706663, + "learning_rate": 4.987890259579097e-06, + "loss": 0.3072, + "step": 2579 + }, + { + "epoch": 0.12, + "grad_norm": 0.6874230511393818, + "learning_rate": 4.987871608422759e-06, + "loss": 0.3165, + "step": 2580 + }, + { + "epoch": 0.12, + "grad_norm": 0.6692888443635425, + "learning_rate": 4.987852942949345e-06, + "loss": 0.3084, + "step": 2581 + }, + { + "epoch": 0.12, + "grad_norm": 0.6937320127701615, + "learning_rate": 4.987834263158963e-06, + "loss": 0.3099, + "step": 2582 + }, + { + "epoch": 0.12, + "grad_norm": 0.6595792724236078, + "learning_rate": 4.98781556905172e-06, + "loss": 0.3067, + "step": 2583 + }, + { + "epoch": 0.12, + "grad_norm": 0.6770108983271594, + "learning_rate": 4.9877968606277225e-06, + "loss": 0.3106, + "step": 2584 + }, + { + "epoch": 0.12, + "grad_norm": 0.6324561944814804, + "learning_rate": 4.98777813788708e-06, + "loss": 0.3212, + "step": 2585 + }, + { + "epoch": 0.12, + "grad_norm": 0.7610670379321801, + "learning_rate": 4.987759400829901e-06, + "loss": 0.3289, + "step": 2586 + }, + { + "epoch": 0.12, + "grad_norm": 0.6634657838495456, + "learning_rate": 4.987740649456291e-06, + "loss": 0.3192, + "step": 2587 + }, + { + "epoch": 0.12, + "grad_norm": 0.7638620722030118, + "learning_rate": 4.9877218837663586e-06, + "loss": 0.3201, + "step": 2588 + }, + { + "epoch": 0.12, + "grad_norm": 0.7005446848472232, + "learning_rate": 4.987703103760212e-06, + "loss": 0.3246, + "step": 2589 + }, + { + "epoch": 0.12, + "grad_norm": 0.7310635979756521, + "learning_rate": 4.9876843094379595e-06, + "loss": 0.3253, + "step": 2590 + }, + { + "epoch": 0.12, + "grad_norm": 0.7119317737158616, + "learning_rate": 4.987665500799709e-06, + "loss": 0.3149, + "step": 2591 + }, + { + "epoch": 0.12, + "grad_norm": 0.6851559139957113, + "learning_rate": 4.987646677845569e-06, + "loss": 0.3029, + "step": 2592 + }, + { + "epoch": 0.12, + "grad_norm": 0.6894126208398874, + "learning_rate": 4.9876278405756475e-06, + "loss": 0.2995, + "step": 2593 + }, + { + "epoch": 0.12, + "grad_norm": 0.7018071706679382, + "learning_rate": 4.987608988990052e-06, + "loss": 0.3051, + "step": 2594 + }, + { + "epoch": 0.12, + "grad_norm": 0.7260262976740165, + "learning_rate": 4.987590123088894e-06, + "loss": 0.33, + "step": 2595 + }, + { + "epoch": 0.12, + "grad_norm": 0.6564120482116753, + "learning_rate": 4.987571242872279e-06, + "loss": 0.3037, + "step": 2596 + }, + { + "epoch": 0.12, + "grad_norm": 0.6682019777288305, + "learning_rate": 4.9875523483403165e-06, + "loss": 0.3146, + "step": 2597 + }, + { + "epoch": 0.12, + "grad_norm": 0.6254509209005744, + "learning_rate": 4.9875334394931154e-06, + "loss": 0.2958, + "step": 2598 + }, + { + "epoch": 0.12, + "grad_norm": 0.7413136746016084, + "learning_rate": 4.987514516330785e-06, + "loss": 0.3235, + "step": 2599 + }, + { + "epoch": 0.12, + "grad_norm": 0.811641416833189, + "learning_rate": 4.987495578853434e-06, + "loss": 0.3158, + "step": 2600 + }, + { + "epoch": 0.12, + "grad_norm": 0.7216203057367442, + "learning_rate": 4.987476627061171e-06, + "loss": 0.3352, + "step": 2601 + }, + { + "epoch": 0.12, + "grad_norm": 0.630697231383496, + "learning_rate": 4.987457660954105e-06, + "loss": 0.2784, + "step": 2602 + }, + { + "epoch": 0.12, + "grad_norm": 0.6707172033875742, + "learning_rate": 4.9874386805323446e-06, + "loss": 0.3026, + "step": 2603 + }, + { + "epoch": 0.12, + "grad_norm": 0.660686710171611, + "learning_rate": 4.987419685796001e-06, + "loss": 0.3139, + "step": 2604 + }, + { + "epoch": 0.12, + "grad_norm": 0.701390283082083, + "learning_rate": 4.9874006767451825e-06, + "loss": 0.3234, + "step": 2605 + }, + { + "epoch": 0.12, + "grad_norm": 0.7351729136497146, + "learning_rate": 4.987381653379997e-06, + "loss": 0.3128, + "step": 2606 + }, + { + "epoch": 0.12, + "grad_norm": 0.632900122826865, + "learning_rate": 4.987362615700556e-06, + "loss": 0.2909, + "step": 2607 + }, + { + "epoch": 0.12, + "grad_norm": 0.6639497078801305, + "learning_rate": 4.987343563706969e-06, + "loss": 0.2938, + "step": 2608 + }, + { + "epoch": 0.12, + "grad_norm": 0.725088790221481, + "learning_rate": 4.987324497399344e-06, + "loss": 0.3106, + "step": 2609 + }, + { + "epoch": 0.12, + "grad_norm": 0.6838652848002703, + "learning_rate": 4.987305416777791e-06, + "loss": 0.3029, + "step": 2610 + }, + { + "epoch": 0.12, + "grad_norm": 0.6851788722665033, + "learning_rate": 4.987286321842422e-06, + "loss": 0.3102, + "step": 2611 + }, + { + "epoch": 0.12, + "grad_norm": 0.6089600785746957, + "learning_rate": 4.987267212593345e-06, + "loss": 0.2951, + "step": 2612 + }, + { + "epoch": 0.12, + "grad_norm": 0.6664775011857139, + "learning_rate": 4.98724808903067e-06, + "loss": 0.2915, + "step": 2613 + }, + { + "epoch": 0.12, + "grad_norm": 0.6794336760894079, + "learning_rate": 4.987228951154507e-06, + "loss": 0.3115, + "step": 2614 + }, + { + "epoch": 0.12, + "grad_norm": 0.6867784635686116, + "learning_rate": 4.987209798964967e-06, + "loss": 0.3108, + "step": 2615 + }, + { + "epoch": 0.12, + "grad_norm": 0.6660254419400036, + "learning_rate": 4.98719063246216e-06, + "loss": 0.3051, + "step": 2616 + }, + { + "epoch": 0.12, + "grad_norm": 0.6248847852332967, + "learning_rate": 4.987171451646196e-06, + "loss": 0.3053, + "step": 2617 + }, + { + "epoch": 0.12, + "grad_norm": 0.7193580985790888, + "learning_rate": 4.987152256517185e-06, + "loss": 0.3403, + "step": 2618 + }, + { + "epoch": 0.12, + "grad_norm": 0.6588089872937163, + "learning_rate": 4.987133047075238e-06, + "loss": 0.3104, + "step": 2619 + }, + { + "epoch": 0.12, + "grad_norm": 0.699702252575233, + "learning_rate": 4.9871138233204655e-06, + "loss": 0.3025, + "step": 2620 + }, + { + "epoch": 0.12, + "grad_norm": 0.6805096997856174, + "learning_rate": 4.9870945852529785e-06, + "loss": 0.3103, + "step": 2621 + }, + { + "epoch": 0.12, + "grad_norm": 0.718934550555825, + "learning_rate": 4.987075332872887e-06, + "loss": 0.3259, + "step": 2622 + }, + { + "epoch": 0.12, + "grad_norm": 0.7320695804255767, + "learning_rate": 4.987056066180302e-06, + "loss": 0.3111, + "step": 2623 + }, + { + "epoch": 0.12, + "grad_norm": 0.6676872614684224, + "learning_rate": 4.987036785175334e-06, + "loss": 0.3033, + "step": 2624 + }, + { + "epoch": 0.12, + "grad_norm": 0.6622712824087398, + "learning_rate": 4.9870174898580944e-06, + "loss": 0.2855, + "step": 2625 + }, + { + "epoch": 0.12, + "grad_norm": 0.6133836946103636, + "learning_rate": 4.986998180228695e-06, + "loss": 0.3008, + "step": 2626 + }, + { + "epoch": 0.12, + "grad_norm": 0.6519633431305055, + "learning_rate": 4.986978856287246e-06, + "loss": 0.3158, + "step": 2627 + }, + { + "epoch": 0.12, + "grad_norm": 0.7060064155205813, + "learning_rate": 4.986959518033858e-06, + "loss": 0.2881, + "step": 2628 + }, + { + "epoch": 0.12, + "grad_norm": 0.6617304589126916, + "learning_rate": 4.986940165468644e-06, + "loss": 0.3014, + "step": 2629 + }, + { + "epoch": 0.12, + "grad_norm": 0.6916815324984398, + "learning_rate": 4.986920798591714e-06, + "loss": 0.3213, + "step": 2630 + }, + { + "epoch": 0.12, + "grad_norm": 0.665660291578906, + "learning_rate": 4.986901417403179e-06, + "loss": 0.3157, + "step": 2631 + }, + { + "epoch": 0.12, + "grad_norm": 0.7103949930813817, + "learning_rate": 4.9868820219031535e-06, + "loss": 0.3101, + "step": 2632 + }, + { + "epoch": 0.12, + "grad_norm": 0.6689162450817072, + "learning_rate": 4.9868626120917455e-06, + "loss": 0.2917, + "step": 2633 + }, + { + "epoch": 0.12, + "grad_norm": 0.7704283383296351, + "learning_rate": 4.986843187969068e-06, + "loss": 0.325, + "step": 2634 + }, + { + "epoch": 0.12, + "grad_norm": 0.6685232619102731, + "learning_rate": 4.986823749535233e-06, + "loss": 0.314, + "step": 2635 + }, + { + "epoch": 0.12, + "grad_norm": 0.6126743017518813, + "learning_rate": 4.9868042967903535e-06, + "loss": 0.2905, + "step": 2636 + }, + { + "epoch": 0.12, + "grad_norm": 0.6692530758276065, + "learning_rate": 4.98678482973454e-06, + "loss": 0.3101, + "step": 2637 + }, + { + "epoch": 0.12, + "grad_norm": 0.712457785501523, + "learning_rate": 4.986765348367905e-06, + "loss": 0.3338, + "step": 2638 + }, + { + "epoch": 0.12, + "grad_norm": 0.7037399050766334, + "learning_rate": 4.98674585269056e-06, + "loss": 0.3194, + "step": 2639 + }, + { + "epoch": 0.12, + "grad_norm": 0.6693581510158645, + "learning_rate": 4.986726342702617e-06, + "loss": 0.3234, + "step": 2640 + }, + { + "epoch": 0.12, + "grad_norm": 0.7507621429877559, + "learning_rate": 4.986706818404189e-06, + "loss": 0.3397, + "step": 2641 + }, + { + "epoch": 0.12, + "grad_norm": 0.668076676065211, + "learning_rate": 4.9866872797953895e-06, + "loss": 0.3189, + "step": 2642 + }, + { + "epoch": 0.12, + "grad_norm": 0.6987145119459186, + "learning_rate": 4.9866677268763285e-06, + "loss": 0.3196, + "step": 2643 + }, + { + "epoch": 0.12, + "grad_norm": 0.7000197675953901, + "learning_rate": 4.98664815964712e-06, + "loss": 0.3155, + "step": 2644 + }, + { + "epoch": 0.12, + "grad_norm": 0.7189732826578218, + "learning_rate": 4.986628578107877e-06, + "loss": 0.3092, + "step": 2645 + }, + { + "epoch": 0.12, + "grad_norm": 0.630866291703158, + "learning_rate": 4.9866089822587106e-06, + "loss": 0.2913, + "step": 2646 + }, + { + "epoch": 0.12, + "grad_norm": 0.681962276329209, + "learning_rate": 4.9865893720997346e-06, + "loss": 0.3025, + "step": 2647 + }, + { + "epoch": 0.12, + "grad_norm": 0.6607323379408168, + "learning_rate": 4.986569747631063e-06, + "loss": 0.3114, + "step": 2648 + }, + { + "epoch": 0.12, + "grad_norm": 0.6367293245300596, + "learning_rate": 4.986550108852806e-06, + "loss": 0.3008, + "step": 2649 + }, + { + "epoch": 0.12, + "grad_norm": 0.6618355919698719, + "learning_rate": 4.986530455765079e-06, + "loss": 0.3086, + "step": 2650 + }, + { + "epoch": 0.12, + "grad_norm": 0.6528035571834094, + "learning_rate": 4.9865107883679945e-06, + "loss": 0.3071, + "step": 2651 + }, + { + "epoch": 0.12, + "grad_norm": 0.6927147001599878, + "learning_rate": 4.986491106661664e-06, + "loss": 0.3143, + "step": 2652 + }, + { + "epoch": 0.12, + "grad_norm": 0.6227649443774443, + "learning_rate": 4.986471410646203e-06, + "loss": 0.3064, + "step": 2653 + }, + { + "epoch": 0.12, + "grad_norm": 0.6572889269228513, + "learning_rate": 4.986451700321724e-06, + "loss": 0.3114, + "step": 2654 + }, + { + "epoch": 0.12, + "grad_norm": 0.6545727574431969, + "learning_rate": 4.98643197568834e-06, + "loss": 0.3119, + "step": 2655 + }, + { + "epoch": 0.12, + "grad_norm": 0.736161714148256, + "learning_rate": 4.986412236746165e-06, + "loss": 0.2989, + "step": 2656 + }, + { + "epoch": 0.12, + "grad_norm": 0.6759289587889629, + "learning_rate": 4.986392483495313e-06, + "loss": 0.3038, + "step": 2657 + }, + { + "epoch": 0.12, + "grad_norm": 0.6584244338564449, + "learning_rate": 4.9863727159358965e-06, + "loss": 0.3139, + "step": 2658 + }, + { + "epoch": 0.12, + "grad_norm": 0.7187017475937235, + "learning_rate": 4.98635293406803e-06, + "loss": 0.3097, + "step": 2659 + }, + { + "epoch": 0.12, + "grad_norm": 0.7983517242355249, + "learning_rate": 4.9863331378918285e-06, + "loss": 0.3429, + "step": 2660 + }, + { + "epoch": 0.12, + "grad_norm": 0.6955413166701154, + "learning_rate": 4.986313327407404e-06, + "loss": 0.2814, + "step": 2661 + }, + { + "epoch": 0.12, + "grad_norm": 0.6515538574843641, + "learning_rate": 4.986293502614871e-06, + "loss": 0.293, + "step": 2662 + }, + { + "epoch": 0.12, + "grad_norm": 0.6810986611289185, + "learning_rate": 4.986273663514344e-06, + "loss": 0.3023, + "step": 2663 + }, + { + "epoch": 0.12, + "grad_norm": 0.6761492909575139, + "learning_rate": 4.986253810105937e-06, + "loss": 0.2937, + "step": 2664 + }, + { + "epoch": 0.12, + "grad_norm": 0.6520333660034692, + "learning_rate": 4.986233942389763e-06, + "loss": 0.307, + "step": 2665 + }, + { + "epoch": 0.12, + "grad_norm": 0.7277968418688002, + "learning_rate": 4.986214060365939e-06, + "loss": 0.3153, + "step": 2666 + }, + { + "epoch": 0.12, + "grad_norm": 0.6710197040581062, + "learning_rate": 4.986194164034578e-06, + "loss": 0.3011, + "step": 2667 + }, + { + "epoch": 0.12, + "grad_norm": 0.6600039994246751, + "learning_rate": 4.986174253395793e-06, + "loss": 0.2942, + "step": 2668 + }, + { + "epoch": 0.13, + "grad_norm": 0.7094010219884277, + "learning_rate": 4.986154328449702e-06, + "loss": 0.3153, + "step": 2669 + }, + { + "epoch": 0.13, + "grad_norm": 0.6532478226652974, + "learning_rate": 4.9861343891964165e-06, + "loss": 0.2989, + "step": 2670 + }, + { + "epoch": 0.13, + "grad_norm": 0.7062806229715204, + "learning_rate": 4.986114435636053e-06, + "loss": 0.311, + "step": 2671 + }, + { + "epoch": 0.13, + "grad_norm": 0.7076051433581456, + "learning_rate": 4.986094467768725e-06, + "loss": 0.291, + "step": 2672 + }, + { + "epoch": 0.13, + "grad_norm": 0.697563174021034, + "learning_rate": 4.986074485594549e-06, + "loss": 0.3042, + "step": 2673 + }, + { + "epoch": 0.13, + "grad_norm": 0.6400876504248478, + "learning_rate": 4.986054489113639e-06, + "loss": 0.3046, + "step": 2674 + }, + { + "epoch": 0.13, + "grad_norm": 0.7165167553772435, + "learning_rate": 4.98603447832611e-06, + "loss": 0.3217, + "step": 2675 + }, + { + "epoch": 0.13, + "grad_norm": 0.6736576853854938, + "learning_rate": 4.986014453232078e-06, + "loss": 0.311, + "step": 2676 + }, + { + "epoch": 0.13, + "grad_norm": 0.644960838160625, + "learning_rate": 4.985994413831657e-06, + "loss": 0.325, + "step": 2677 + }, + { + "epoch": 0.13, + "grad_norm": 0.6601854396248257, + "learning_rate": 4.985974360124963e-06, + "loss": 0.3162, + "step": 2678 + }, + { + "epoch": 0.13, + "grad_norm": 0.6322511503933199, + "learning_rate": 4.985954292112112e-06, + "loss": 0.2938, + "step": 2679 + }, + { + "epoch": 0.13, + "grad_norm": 0.6214562297562297, + "learning_rate": 4.985934209793218e-06, + "loss": 0.3091, + "step": 2680 + }, + { + "epoch": 0.13, + "grad_norm": 0.6081989063698556, + "learning_rate": 4.985914113168398e-06, + "loss": 0.2889, + "step": 2681 + }, + { + "epoch": 0.13, + "grad_norm": 0.6533508080523244, + "learning_rate": 4.985894002237766e-06, + "loss": 0.3021, + "step": 2682 + }, + { + "epoch": 0.13, + "grad_norm": 0.7101776776849726, + "learning_rate": 4.985873877001441e-06, + "loss": 0.3212, + "step": 2683 + }, + { + "epoch": 0.13, + "grad_norm": 0.6369378266417303, + "learning_rate": 4.985853737459535e-06, + "loss": 0.2893, + "step": 2684 + }, + { + "epoch": 0.13, + "grad_norm": 0.6903457446356897, + "learning_rate": 4.985833583612166e-06, + "loss": 0.3125, + "step": 2685 + }, + { + "epoch": 0.13, + "grad_norm": 0.7010224592812112, + "learning_rate": 4.985813415459449e-06, + "loss": 0.3344, + "step": 2686 + }, + { + "epoch": 0.13, + "grad_norm": 0.7342719201749003, + "learning_rate": 4.985793233001502e-06, + "loss": 0.3221, + "step": 2687 + }, + { + "epoch": 0.13, + "grad_norm": 0.7028221455548029, + "learning_rate": 4.985773036238439e-06, + "loss": 0.3223, + "step": 2688 + }, + { + "epoch": 0.13, + "grad_norm": 0.6164646874752331, + "learning_rate": 4.9857528251703765e-06, + "loss": 0.2935, + "step": 2689 + }, + { + "epoch": 0.13, + "grad_norm": 0.6760327975478746, + "learning_rate": 4.985732599797431e-06, + "loss": 0.3157, + "step": 2690 + }, + { + "epoch": 0.13, + "grad_norm": 0.7444907145941833, + "learning_rate": 4.98571236011972e-06, + "loss": 0.3373, + "step": 2691 + }, + { + "epoch": 0.13, + "grad_norm": 0.7411865065000474, + "learning_rate": 4.985692106137359e-06, + "loss": 0.3169, + "step": 2692 + }, + { + "epoch": 0.13, + "grad_norm": 0.61333862942202, + "learning_rate": 4.985671837850464e-06, + "loss": 0.2922, + "step": 2693 + }, + { + "epoch": 0.13, + "grad_norm": 0.6430786045175184, + "learning_rate": 4.985651555259153e-06, + "loss": 0.2939, + "step": 2694 + }, + { + "epoch": 0.13, + "grad_norm": 0.6772348813406737, + "learning_rate": 4.985631258363542e-06, + "loss": 0.3187, + "step": 2695 + }, + { + "epoch": 0.13, + "grad_norm": 0.6755026375058879, + "learning_rate": 4.985610947163748e-06, + "loss": 0.2807, + "step": 2696 + }, + { + "epoch": 0.13, + "grad_norm": 0.9090991632888398, + "learning_rate": 4.985590621659887e-06, + "loss": 0.3064, + "step": 2697 + }, + { + "epoch": 0.13, + "grad_norm": 0.6957715683178097, + "learning_rate": 4.985570281852078e-06, + "loss": 0.3078, + "step": 2698 + }, + { + "epoch": 0.13, + "grad_norm": 0.7019579065310796, + "learning_rate": 4.985549927740435e-06, + "loss": 0.3085, + "step": 2699 + }, + { + "epoch": 0.13, + "grad_norm": 0.6597866613102564, + "learning_rate": 4.985529559325079e-06, + "loss": 0.2775, + "step": 2700 + }, + { + "epoch": 0.13, + "grad_norm": 0.6822166102400778, + "learning_rate": 4.9855091766061234e-06, + "loss": 0.3068, + "step": 2701 + }, + { + "epoch": 0.13, + "grad_norm": 0.7908361802917686, + "learning_rate": 4.985488779583688e-06, + "loss": 0.3221, + "step": 2702 + }, + { + "epoch": 0.13, + "grad_norm": 0.7134557695557282, + "learning_rate": 4.985468368257889e-06, + "loss": 0.3128, + "step": 2703 + }, + { + "epoch": 0.13, + "grad_norm": 0.6974508521813934, + "learning_rate": 4.985447942628843e-06, + "loss": 0.3141, + "step": 2704 + }, + { + "epoch": 0.13, + "grad_norm": 0.7007772311582975, + "learning_rate": 4.985427502696671e-06, + "loss": 0.3206, + "step": 2705 + }, + { + "epoch": 0.13, + "grad_norm": 0.7069290585840607, + "learning_rate": 4.985407048461487e-06, + "loss": 0.3265, + "step": 2706 + }, + { + "epoch": 0.13, + "grad_norm": 0.6765347919744771, + "learning_rate": 4.985386579923411e-06, + "loss": 0.3086, + "step": 2707 + }, + { + "epoch": 0.13, + "grad_norm": 0.6926054643301713, + "learning_rate": 4.985366097082559e-06, + "loss": 0.3091, + "step": 2708 + }, + { + "epoch": 0.13, + "grad_norm": 0.6283202261350292, + "learning_rate": 4.985345599939051e-06, + "loss": 0.292, + "step": 2709 + }, + { + "epoch": 0.13, + "grad_norm": 0.6683509889241858, + "learning_rate": 4.985325088493003e-06, + "loss": 0.3024, + "step": 2710 + }, + { + "epoch": 0.13, + "grad_norm": 0.8351471303574877, + "learning_rate": 4.985304562744534e-06, + "loss": 0.2952, + "step": 2711 + }, + { + "epoch": 0.13, + "grad_norm": 0.6596935611411917, + "learning_rate": 4.985284022693762e-06, + "loss": 0.2874, + "step": 2712 + }, + { + "epoch": 0.13, + "grad_norm": 0.7303208467400482, + "learning_rate": 4.985263468340804e-06, + "loss": 0.2931, + "step": 2713 + }, + { + "epoch": 0.13, + "grad_norm": 0.8137347367384566, + "learning_rate": 4.985242899685781e-06, + "loss": 0.305, + "step": 2714 + }, + { + "epoch": 0.13, + "grad_norm": 0.6947314106271173, + "learning_rate": 4.9852223167288085e-06, + "loss": 0.317, + "step": 2715 + }, + { + "epoch": 0.13, + "grad_norm": 0.6945574216801129, + "learning_rate": 4.985201719470006e-06, + "loss": 0.3072, + "step": 2716 + }, + { + "epoch": 0.13, + "grad_norm": 0.6579427315207388, + "learning_rate": 4.9851811079094935e-06, + "loss": 0.2755, + "step": 2717 + }, + { + "epoch": 0.13, + "grad_norm": 0.7456472616927775, + "learning_rate": 4.985160482047388e-06, + "loss": 0.3103, + "step": 2718 + }, + { + "epoch": 0.13, + "grad_norm": 0.7678285572959176, + "learning_rate": 4.9851398418838085e-06, + "loss": 0.3182, + "step": 2719 + }, + { + "epoch": 0.13, + "grad_norm": 0.6580565803883347, + "learning_rate": 4.985119187418873e-06, + "loss": 0.2832, + "step": 2720 + }, + { + "epoch": 0.13, + "grad_norm": 0.7109711539865722, + "learning_rate": 4.985098518652702e-06, + "loss": 0.3117, + "step": 2721 + }, + { + "epoch": 0.13, + "grad_norm": 0.7444870868667772, + "learning_rate": 4.985077835585413e-06, + "loss": 0.3094, + "step": 2722 + }, + { + "epoch": 0.13, + "grad_norm": 0.7256281532638628, + "learning_rate": 4.985057138217127e-06, + "loss": 0.3077, + "step": 2723 + }, + { + "epoch": 0.13, + "grad_norm": 0.6696714659519352, + "learning_rate": 4.985036426547961e-06, + "loss": 0.3207, + "step": 2724 + }, + { + "epoch": 0.13, + "grad_norm": 0.6648891063990117, + "learning_rate": 4.985015700578035e-06, + "loss": 0.312, + "step": 2725 + }, + { + "epoch": 0.13, + "grad_norm": 0.6547806568377581, + "learning_rate": 4.984994960307468e-06, + "loss": 0.2905, + "step": 2726 + }, + { + "epoch": 0.13, + "grad_norm": 0.7111021810788002, + "learning_rate": 4.984974205736379e-06, + "loss": 0.3045, + "step": 2727 + }, + { + "epoch": 0.13, + "grad_norm": 0.6743766919776626, + "learning_rate": 4.984953436864889e-06, + "loss": 0.3079, + "step": 2728 + }, + { + "epoch": 0.13, + "grad_norm": 0.6427703055799978, + "learning_rate": 4.984932653693116e-06, + "loss": 0.3094, + "step": 2729 + }, + { + "epoch": 0.13, + "grad_norm": 0.6441565854284128, + "learning_rate": 4.984911856221181e-06, + "loss": 0.3109, + "step": 2730 + }, + { + "epoch": 0.13, + "grad_norm": 0.7320153583036513, + "learning_rate": 4.9848910444492015e-06, + "loss": 0.3149, + "step": 2731 + }, + { + "epoch": 0.13, + "grad_norm": 0.7532002057103185, + "learning_rate": 4.9848702183772995e-06, + "loss": 0.3261, + "step": 2732 + }, + { + "epoch": 0.13, + "grad_norm": 0.7253309226033688, + "learning_rate": 4.984849378005594e-06, + "loss": 0.3031, + "step": 2733 + }, + { + "epoch": 0.13, + "grad_norm": 0.674185262665623, + "learning_rate": 4.984828523334204e-06, + "loss": 0.295, + "step": 2734 + }, + { + "epoch": 0.13, + "grad_norm": 0.6303052988295248, + "learning_rate": 4.984807654363252e-06, + "loss": 0.2802, + "step": 2735 + }, + { + "epoch": 0.13, + "grad_norm": 0.7147151759368848, + "learning_rate": 4.984786771092855e-06, + "loss": 0.3229, + "step": 2736 + }, + { + "epoch": 0.13, + "grad_norm": 0.6934056778835322, + "learning_rate": 4.9847658735231355e-06, + "loss": 0.3324, + "step": 2737 + }, + { + "epoch": 0.13, + "grad_norm": 0.7448749171519544, + "learning_rate": 4.984744961654212e-06, + "loss": 0.3116, + "step": 2738 + }, + { + "epoch": 0.13, + "grad_norm": 0.7000226333628277, + "learning_rate": 4.984724035486206e-06, + "loss": 0.3193, + "step": 2739 + }, + { + "epoch": 0.13, + "grad_norm": 0.7051558937344505, + "learning_rate": 4.9847030950192385e-06, + "loss": 0.322, + "step": 2740 + }, + { + "epoch": 0.13, + "grad_norm": 0.7586060912158282, + "learning_rate": 4.984682140253429e-06, + "loss": 0.3182, + "step": 2741 + }, + { + "epoch": 0.13, + "grad_norm": 0.6519683793905946, + "learning_rate": 4.984661171188897e-06, + "loss": 0.3256, + "step": 2742 + }, + { + "epoch": 0.13, + "grad_norm": 0.6786415676868995, + "learning_rate": 4.9846401878257655e-06, + "loss": 0.3079, + "step": 2743 + }, + { + "epoch": 0.13, + "grad_norm": 0.7731899948827805, + "learning_rate": 4.984619190164154e-06, + "loss": 0.3182, + "step": 2744 + }, + { + "epoch": 0.13, + "grad_norm": 0.6928221953875281, + "learning_rate": 4.9845981782041845e-06, + "loss": 0.3018, + "step": 2745 + }, + { + "epoch": 0.13, + "grad_norm": 0.6857909223638868, + "learning_rate": 4.984577151945976e-06, + "loss": 0.3013, + "step": 2746 + }, + { + "epoch": 0.13, + "grad_norm": 0.7031269057653435, + "learning_rate": 4.98455611138965e-06, + "loss": 0.3069, + "step": 2747 + }, + { + "epoch": 0.13, + "grad_norm": 0.6912560690440933, + "learning_rate": 4.984535056535329e-06, + "loss": 0.3067, + "step": 2748 + }, + { + "epoch": 0.13, + "grad_norm": 0.6870875565938638, + "learning_rate": 4.984513987383133e-06, + "loss": 0.3174, + "step": 2749 + }, + { + "epoch": 0.13, + "grad_norm": 0.658515487727245, + "learning_rate": 4.9844929039331835e-06, + "loss": 0.3093, + "step": 2750 + }, + { + "epoch": 0.13, + "grad_norm": 0.7184811700318372, + "learning_rate": 4.984471806185601e-06, + "loss": 0.3169, + "step": 2751 + }, + { + "epoch": 0.13, + "grad_norm": 0.6881563610371892, + "learning_rate": 4.984450694140509e-06, + "loss": 0.2953, + "step": 2752 + }, + { + "epoch": 0.13, + "grad_norm": 0.7691889466723798, + "learning_rate": 4.984429567798027e-06, + "loss": 0.3263, + "step": 2753 + }, + { + "epoch": 0.13, + "grad_norm": 0.7441412872427494, + "learning_rate": 4.984408427158277e-06, + "loss": 0.3041, + "step": 2754 + }, + { + "epoch": 0.13, + "grad_norm": 0.6911123891719066, + "learning_rate": 4.984387272221382e-06, + "loss": 0.3018, + "step": 2755 + }, + { + "epoch": 0.13, + "grad_norm": 0.6492130931784367, + "learning_rate": 4.984366102987461e-06, + "loss": 0.305, + "step": 2756 + }, + { + "epoch": 0.13, + "grad_norm": 0.6985000123980021, + "learning_rate": 4.984344919456639e-06, + "loss": 0.3154, + "step": 2757 + }, + { + "epoch": 0.13, + "grad_norm": 0.6548082257709306, + "learning_rate": 4.984323721629035e-06, + "loss": 0.3118, + "step": 2758 + }, + { + "epoch": 0.13, + "grad_norm": 0.6525785465514359, + "learning_rate": 4.984302509504774e-06, + "loss": 0.3042, + "step": 2759 + }, + { + "epoch": 0.13, + "grad_norm": 0.6929632583692081, + "learning_rate": 4.9842812830839755e-06, + "loss": 0.3244, + "step": 2760 + }, + { + "epoch": 0.13, + "grad_norm": 0.6898471411789359, + "learning_rate": 4.984260042366763e-06, + "loss": 0.3184, + "step": 2761 + }, + { + "epoch": 0.13, + "grad_norm": 0.7268552340206348, + "learning_rate": 4.9842387873532575e-06, + "loss": 0.3175, + "step": 2762 + }, + { + "epoch": 0.13, + "grad_norm": 0.6359196393190956, + "learning_rate": 4.984217518043583e-06, + "loss": 0.2935, + "step": 2763 + }, + { + "epoch": 0.13, + "grad_norm": 0.7992437473503239, + "learning_rate": 4.984196234437861e-06, + "loss": 0.3019, + "step": 2764 + }, + { + "epoch": 0.13, + "grad_norm": 0.6634220442348884, + "learning_rate": 4.984174936536214e-06, + "loss": 0.3046, + "step": 2765 + }, + { + "epoch": 0.13, + "grad_norm": 0.667438225169493, + "learning_rate": 4.984153624338765e-06, + "loss": 0.3036, + "step": 2766 + }, + { + "epoch": 0.13, + "grad_norm": 0.7228466311353479, + "learning_rate": 4.984132297845635e-06, + "loss": 0.328, + "step": 2767 + }, + { + "epoch": 0.13, + "grad_norm": 0.7713256112160293, + "learning_rate": 4.984110957056949e-06, + "loss": 0.315, + "step": 2768 + }, + { + "epoch": 0.13, + "grad_norm": 0.7475936915866939, + "learning_rate": 4.984089601972829e-06, + "loss": 0.3439, + "step": 2769 + }, + { + "epoch": 0.13, + "grad_norm": 0.6750979395922051, + "learning_rate": 4.984068232593397e-06, + "loss": 0.306, + "step": 2770 + }, + { + "epoch": 0.13, + "grad_norm": 0.6571707202164975, + "learning_rate": 4.984046848918777e-06, + "loss": 0.2991, + "step": 2771 + }, + { + "epoch": 0.13, + "grad_norm": 0.7520984596707564, + "learning_rate": 4.984025450949092e-06, + "loss": 0.3272, + "step": 2772 + }, + { + "epoch": 0.13, + "grad_norm": 0.679103633991206, + "learning_rate": 4.984004038684465e-06, + "loss": 0.3047, + "step": 2773 + }, + { + "epoch": 0.13, + "grad_norm": 0.6839823220681328, + "learning_rate": 4.983982612125018e-06, + "loss": 0.3067, + "step": 2774 + }, + { + "epoch": 0.13, + "grad_norm": 0.8158368923471491, + "learning_rate": 4.983961171270876e-06, + "loss": 0.3328, + "step": 2775 + }, + { + "epoch": 0.13, + "grad_norm": 0.7221932817611965, + "learning_rate": 4.983939716122161e-06, + "loss": 0.3086, + "step": 2776 + }, + { + "epoch": 0.13, + "grad_norm": 0.655282927345717, + "learning_rate": 4.983918246678999e-06, + "loss": 0.2855, + "step": 2777 + }, + { + "epoch": 0.13, + "grad_norm": 0.6759510806318528, + "learning_rate": 4.98389676294151e-06, + "loss": 0.3046, + "step": 2778 + }, + { + "epoch": 0.13, + "grad_norm": 0.6530992386166202, + "learning_rate": 4.983875264909821e-06, + "loss": 0.3043, + "step": 2779 + }, + { + "epoch": 0.13, + "grad_norm": 0.6250381415692378, + "learning_rate": 4.9838537525840536e-06, + "loss": 0.2966, + "step": 2780 + }, + { + "epoch": 0.13, + "grad_norm": 0.7010338407777167, + "learning_rate": 4.983832225964332e-06, + "loss": 0.3106, + "step": 2781 + }, + { + "epoch": 0.13, + "grad_norm": 0.7060499950345298, + "learning_rate": 4.98381068505078e-06, + "loss": 0.3158, + "step": 2782 + }, + { + "epoch": 0.13, + "grad_norm": 0.7248655129044128, + "learning_rate": 4.983789129843522e-06, + "loss": 0.35, + "step": 2783 + }, + { + "epoch": 0.13, + "grad_norm": 0.6621012157054731, + "learning_rate": 4.983767560342681e-06, + "loss": 0.3078, + "step": 2784 + }, + { + "epoch": 0.13, + "grad_norm": 0.6763996262839521, + "learning_rate": 4.983745976548384e-06, + "loss": 0.3006, + "step": 2785 + }, + { + "epoch": 0.13, + "grad_norm": 0.6711684433762368, + "learning_rate": 4.9837243784607516e-06, + "loss": 0.3448, + "step": 2786 + }, + { + "epoch": 0.13, + "grad_norm": 0.6943865319022802, + "learning_rate": 4.983702766079909e-06, + "loss": 0.3028, + "step": 2787 + }, + { + "epoch": 0.13, + "grad_norm": 0.6578762387718345, + "learning_rate": 4.9836811394059834e-06, + "loss": 0.2962, + "step": 2788 + }, + { + "epoch": 0.13, + "grad_norm": 0.6087569692381789, + "learning_rate": 4.9836594984390964e-06, + "loss": 0.2984, + "step": 2789 + }, + { + "epoch": 0.13, + "grad_norm": 0.7285972063914142, + "learning_rate": 4.983637843179372e-06, + "loss": 0.317, + "step": 2790 + }, + { + "epoch": 0.13, + "grad_norm": 0.7440575152415403, + "learning_rate": 4.9836161736269375e-06, + "loss": 0.3338, + "step": 2791 + }, + { + "epoch": 0.13, + "grad_norm": 0.6912001109022753, + "learning_rate": 4.9835944897819154e-06, + "loss": 0.3049, + "step": 2792 + }, + { + "epoch": 0.13, + "grad_norm": 0.6605540483398785, + "learning_rate": 4.983572791644431e-06, + "loss": 0.3053, + "step": 2793 + }, + { + "epoch": 0.13, + "grad_norm": 0.6965579074954796, + "learning_rate": 4.9835510792146095e-06, + "loss": 0.3276, + "step": 2794 + }, + { + "epoch": 0.13, + "grad_norm": 0.7225302429423766, + "learning_rate": 4.983529352492576e-06, + "loss": 0.3175, + "step": 2795 + }, + { + "epoch": 0.13, + "grad_norm": 0.6962279177448402, + "learning_rate": 4.9835076114784554e-06, + "loss": 0.3144, + "step": 2796 + }, + { + "epoch": 0.13, + "grad_norm": 0.7106303875621603, + "learning_rate": 4.983485856172373e-06, + "loss": 0.3204, + "step": 2797 + }, + { + "epoch": 0.13, + "grad_norm": 0.7290723337458733, + "learning_rate": 4.983464086574453e-06, + "loss": 0.3035, + "step": 2798 + }, + { + "epoch": 0.13, + "grad_norm": 0.7140696379610183, + "learning_rate": 4.983442302684822e-06, + "loss": 0.3055, + "step": 2799 + }, + { + "epoch": 0.13, + "grad_norm": 0.6597762392226483, + "learning_rate": 4.9834205045036046e-06, + "loss": 0.3086, + "step": 2800 + }, + { + "epoch": 0.13, + "grad_norm": 0.7536662851161271, + "learning_rate": 4.9833986920309255e-06, + "loss": 0.3245, + "step": 2801 + }, + { + "epoch": 0.13, + "grad_norm": 0.6334236723335062, + "learning_rate": 4.983376865266913e-06, + "loss": 0.2839, + "step": 2802 + }, + { + "epoch": 0.13, + "grad_norm": 0.6949281459424254, + "learning_rate": 4.983355024211689e-06, + "loss": 0.3068, + "step": 2803 + }, + { + "epoch": 0.13, + "grad_norm": 0.7176638293605295, + "learning_rate": 4.983333168865382e-06, + "loss": 0.3092, + "step": 2804 + }, + { + "epoch": 0.13, + "grad_norm": 0.714749291652377, + "learning_rate": 4.983311299228116e-06, + "loss": 0.3089, + "step": 2805 + }, + { + "epoch": 0.13, + "grad_norm": 0.7293723349558504, + "learning_rate": 4.983289415300018e-06, + "loss": 0.3041, + "step": 2806 + }, + { + "epoch": 0.13, + "grad_norm": 0.629780551454618, + "learning_rate": 4.9832675170812135e-06, + "loss": 0.2973, + "step": 2807 + }, + { + "epoch": 0.13, + "grad_norm": 0.6604929985095718, + "learning_rate": 4.983245604571828e-06, + "loss": 0.2971, + "step": 2808 + }, + { + "epoch": 0.13, + "grad_norm": 0.6982112701741372, + "learning_rate": 4.983223677771989e-06, + "loss": 0.3157, + "step": 2809 + }, + { + "epoch": 0.13, + "grad_norm": 0.6723908356939835, + "learning_rate": 4.983201736681822e-06, + "loss": 0.3047, + "step": 2810 + }, + { + "epoch": 0.13, + "grad_norm": 0.7164283199887137, + "learning_rate": 4.9831797813014525e-06, + "loss": 0.3132, + "step": 2811 + }, + { + "epoch": 0.13, + "grad_norm": 0.6677694875067862, + "learning_rate": 4.983157811631008e-06, + "loss": 0.2963, + "step": 2812 + }, + { + "epoch": 0.13, + "grad_norm": 0.6950116692682682, + "learning_rate": 4.983135827670614e-06, + "loss": 0.3276, + "step": 2813 + }, + { + "epoch": 0.13, + "grad_norm": 0.6610663174149385, + "learning_rate": 4.983113829420398e-06, + "loss": 0.3163, + "step": 2814 + }, + { + "epoch": 0.13, + "grad_norm": 0.6425238595623931, + "learning_rate": 4.983091816880486e-06, + "loss": 0.3042, + "step": 2815 + }, + { + "epoch": 0.13, + "grad_norm": 0.681548974266561, + "learning_rate": 4.983069790051004e-06, + "loss": 0.3013, + "step": 2816 + }, + { + "epoch": 0.13, + "grad_norm": 0.7077324345625784, + "learning_rate": 4.98304774893208e-06, + "loss": 0.3073, + "step": 2817 + }, + { + "epoch": 0.13, + "grad_norm": 0.7155162898900909, + "learning_rate": 4.98302569352384e-06, + "loss": 0.3166, + "step": 2818 + }, + { + "epoch": 0.13, + "grad_norm": 0.7344490960209316, + "learning_rate": 4.983003623826412e-06, + "loss": 0.3414, + "step": 2819 + }, + { + "epoch": 0.13, + "grad_norm": 0.6496983495425597, + "learning_rate": 4.982981539839921e-06, + "loss": 0.3061, + "step": 2820 + }, + { + "epoch": 0.13, + "grad_norm": 0.7000524568281578, + "learning_rate": 4.982959441564496e-06, + "loss": 0.2972, + "step": 2821 + }, + { + "epoch": 0.13, + "grad_norm": 0.6765823843370966, + "learning_rate": 4.982937329000264e-06, + "loss": 0.2997, + "step": 2822 + }, + { + "epoch": 0.13, + "grad_norm": 0.7047448234344201, + "learning_rate": 4.982915202147351e-06, + "loss": 0.3177, + "step": 2823 + }, + { + "epoch": 0.13, + "grad_norm": 0.732822270185039, + "learning_rate": 4.982893061005885e-06, + "loss": 0.314, + "step": 2824 + }, + { + "epoch": 0.13, + "grad_norm": 0.6311974074850603, + "learning_rate": 4.982870905575993e-06, + "loss": 0.2946, + "step": 2825 + }, + { + "epoch": 0.13, + "grad_norm": 0.6395363008214913, + "learning_rate": 4.982848735857804e-06, + "loss": 0.2979, + "step": 2826 + }, + { + "epoch": 0.13, + "grad_norm": 0.6397298801766539, + "learning_rate": 4.982826551851444e-06, + "loss": 0.3031, + "step": 2827 + }, + { + "epoch": 0.13, + "grad_norm": 0.709853810218756, + "learning_rate": 4.982804353557041e-06, + "loss": 0.3428, + "step": 2828 + }, + { + "epoch": 0.13, + "grad_norm": 0.7198002082034077, + "learning_rate": 4.982782140974724e-06, + "loss": 0.3477, + "step": 2829 + }, + { + "epoch": 0.13, + "grad_norm": 0.6543329822480511, + "learning_rate": 4.982759914104619e-06, + "loss": 0.296, + "step": 2830 + }, + { + "epoch": 0.13, + "grad_norm": 0.6661750939046363, + "learning_rate": 4.982737672946855e-06, + "loss": 0.3188, + "step": 2831 + }, + { + "epoch": 0.13, + "grad_norm": 0.6627322078205536, + "learning_rate": 4.9827154175015605e-06, + "loss": 0.3179, + "step": 2832 + }, + { + "epoch": 0.13, + "grad_norm": 0.7694101688457367, + "learning_rate": 4.9826931477688615e-06, + "loss": 0.3099, + "step": 2833 + }, + { + "epoch": 0.13, + "grad_norm": 0.7358998479286714, + "learning_rate": 4.982670863748888e-06, + "loss": 0.3152, + "step": 2834 + }, + { + "epoch": 0.13, + "grad_norm": 0.6375817449277336, + "learning_rate": 4.982648565441767e-06, + "loss": 0.2969, + "step": 2835 + }, + { + "epoch": 0.13, + "grad_norm": 0.674129629871453, + "learning_rate": 4.982626252847628e-06, + "loss": 0.3088, + "step": 2836 + }, + { + "epoch": 0.13, + "grad_norm": 0.7793755233341537, + "learning_rate": 4.982603925966599e-06, + "loss": 0.3391, + "step": 2837 + }, + { + "epoch": 0.13, + "grad_norm": 0.7082102739267263, + "learning_rate": 4.9825815847988085e-06, + "loss": 0.3304, + "step": 2838 + }, + { + "epoch": 0.13, + "grad_norm": 0.7134271204625757, + "learning_rate": 4.982559229344385e-06, + "loss": 0.2895, + "step": 2839 + }, + { + "epoch": 0.13, + "grad_norm": 0.6925814383692093, + "learning_rate": 4.982536859603457e-06, + "loss": 0.3191, + "step": 2840 + }, + { + "epoch": 0.13, + "grad_norm": 0.6982690124585428, + "learning_rate": 4.982514475576153e-06, + "loss": 0.3169, + "step": 2841 + }, + { + "epoch": 0.13, + "grad_norm": 0.7035195762409365, + "learning_rate": 4.982492077262602e-06, + "loss": 0.3162, + "step": 2842 + }, + { + "epoch": 0.13, + "grad_norm": 0.6650524734619729, + "learning_rate": 4.982469664662933e-06, + "loss": 0.311, + "step": 2843 + }, + { + "epoch": 0.13, + "grad_norm": 0.696647482799808, + "learning_rate": 4.982447237777276e-06, + "loss": 0.3121, + "step": 2844 + }, + { + "epoch": 0.13, + "grad_norm": 0.6646094832620065, + "learning_rate": 4.982424796605758e-06, + "loss": 0.3023, + "step": 2845 + }, + { + "epoch": 0.13, + "grad_norm": 0.7159763060832582, + "learning_rate": 4.98240234114851e-06, + "loss": 0.3257, + "step": 2846 + }, + { + "epoch": 0.13, + "grad_norm": 0.7880057636605249, + "learning_rate": 4.9823798714056605e-06, + "loss": 0.3217, + "step": 2847 + }, + { + "epoch": 0.13, + "grad_norm": 0.6360308025891539, + "learning_rate": 4.982357387377338e-06, + "loss": 0.3148, + "step": 2848 + }, + { + "epoch": 0.13, + "grad_norm": 0.7108321676991639, + "learning_rate": 4.9823348890636735e-06, + "loss": 0.3101, + "step": 2849 + }, + { + "epoch": 0.13, + "grad_norm": 0.6845068028960403, + "learning_rate": 4.9823123764647955e-06, + "loss": 0.2923, + "step": 2850 + }, + { + "epoch": 0.13, + "grad_norm": 0.6638311499320386, + "learning_rate": 4.982289849580834e-06, + "loss": 0.2941, + "step": 2851 + }, + { + "epoch": 0.13, + "grad_norm": 0.6837366424962289, + "learning_rate": 4.9822673084119165e-06, + "loss": 0.3197, + "step": 2852 + }, + { + "epoch": 0.13, + "grad_norm": 0.657037315797205, + "learning_rate": 4.9822447529581764e-06, + "loss": 0.2906, + "step": 2853 + }, + { + "epoch": 0.13, + "grad_norm": 0.791812413015299, + "learning_rate": 4.982222183219741e-06, + "loss": 0.3356, + "step": 2854 + }, + { + "epoch": 0.13, + "grad_norm": 0.6485466473834178, + "learning_rate": 4.982199599196741e-06, + "loss": 0.282, + "step": 2855 + }, + { + "epoch": 0.13, + "grad_norm": 0.6826499048004787, + "learning_rate": 4.982177000889306e-06, + "loss": 0.3146, + "step": 2856 + }, + { + "epoch": 0.13, + "grad_norm": 0.6991965577972341, + "learning_rate": 4.982154388297566e-06, + "loss": 0.31, + "step": 2857 + }, + { + "epoch": 0.13, + "grad_norm": 0.668112343847632, + "learning_rate": 4.982131761421651e-06, + "loss": 0.2979, + "step": 2858 + }, + { + "epoch": 0.13, + "grad_norm": 0.7288607151377567, + "learning_rate": 4.982109120261692e-06, + "loss": 0.3206, + "step": 2859 + }, + { + "epoch": 0.13, + "grad_norm": 0.6847481447737396, + "learning_rate": 4.98208646481782e-06, + "loss": 0.3129, + "step": 2860 + }, + { + "epoch": 0.13, + "grad_norm": 0.7377267725355848, + "learning_rate": 4.982063795090163e-06, + "loss": 0.3274, + "step": 2861 + }, + { + "epoch": 0.13, + "grad_norm": 0.6491162467654843, + "learning_rate": 4.982041111078853e-06, + "loss": 0.309, + "step": 2862 + }, + { + "epoch": 0.13, + "grad_norm": 0.6453518032601665, + "learning_rate": 4.98201841278402e-06, + "loss": 0.3039, + "step": 2863 + }, + { + "epoch": 0.13, + "grad_norm": 0.6828987724932518, + "learning_rate": 4.981995700205795e-06, + "loss": 0.3347, + "step": 2864 + }, + { + "epoch": 0.13, + "grad_norm": 0.7003639795352645, + "learning_rate": 4.981972973344309e-06, + "loss": 0.3132, + "step": 2865 + }, + { + "epoch": 0.13, + "grad_norm": 0.724494407580755, + "learning_rate": 4.981950232199692e-06, + "loss": 0.3202, + "step": 2866 + }, + { + "epoch": 0.13, + "grad_norm": 0.731821540541618, + "learning_rate": 4.981927476772075e-06, + "loss": 0.303, + "step": 2867 + }, + { + "epoch": 0.13, + "grad_norm": 0.7014098666340283, + "learning_rate": 4.98190470706159e-06, + "loss": 0.3177, + "step": 2868 + }, + { + "epoch": 0.13, + "grad_norm": 0.7545191597038859, + "learning_rate": 4.981881923068366e-06, + "loss": 0.306, + "step": 2869 + }, + { + "epoch": 0.13, + "grad_norm": 0.6839357771433319, + "learning_rate": 4.981859124792537e-06, + "loss": 0.3097, + "step": 2870 + }, + { + "epoch": 0.13, + "grad_norm": 0.7217831152840676, + "learning_rate": 4.981836312234231e-06, + "loss": 0.303, + "step": 2871 + }, + { + "epoch": 0.13, + "grad_norm": 1.1871330973832201, + "learning_rate": 4.9818134853935815e-06, + "loss": 0.3228, + "step": 2872 + }, + { + "epoch": 0.13, + "grad_norm": 0.646778177394147, + "learning_rate": 4.981790644270718e-06, + "loss": 0.2924, + "step": 2873 + }, + { + "epoch": 0.13, + "grad_norm": 0.7230647438279234, + "learning_rate": 4.981767788865774e-06, + "loss": 0.3205, + "step": 2874 + }, + { + "epoch": 0.13, + "grad_norm": 0.7043246824695116, + "learning_rate": 4.981744919178882e-06, + "loss": 0.3067, + "step": 2875 + }, + { + "epoch": 0.13, + "grad_norm": 0.7176838087439297, + "learning_rate": 4.981722035210169e-06, + "loss": 0.3216, + "step": 2876 + }, + { + "epoch": 0.13, + "grad_norm": 0.709893211199756, + "learning_rate": 4.98169913695977e-06, + "loss": 0.2996, + "step": 2877 + }, + { + "epoch": 0.13, + "grad_norm": 0.7427818259703104, + "learning_rate": 4.981676224427817e-06, + "loss": 0.3031, + "step": 2878 + }, + { + "epoch": 0.13, + "grad_norm": 0.66562009639244, + "learning_rate": 4.981653297614441e-06, + "loss": 0.2998, + "step": 2879 + }, + { + "epoch": 0.13, + "grad_norm": 0.6805524426430655, + "learning_rate": 4.981630356519774e-06, + "loss": 0.3116, + "step": 2880 + }, + { + "epoch": 0.13, + "grad_norm": 0.6723182415120426, + "learning_rate": 4.981607401143947e-06, + "loss": 0.3244, + "step": 2881 + }, + { + "epoch": 0.14, + "grad_norm": 0.690456166475555, + "learning_rate": 4.981584431487095e-06, + "loss": 0.3033, + "step": 2882 + }, + { + "epoch": 0.14, + "grad_norm": 0.6708977014050947, + "learning_rate": 4.981561447549347e-06, + "loss": 0.3077, + "step": 2883 + }, + { + "epoch": 0.14, + "grad_norm": 0.6428508974188366, + "learning_rate": 4.981538449330836e-06, + "loss": 0.304, + "step": 2884 + }, + { + "epoch": 0.14, + "grad_norm": 0.7012445174673048, + "learning_rate": 4.981515436831697e-06, + "loss": 0.3372, + "step": 2885 + }, + { + "epoch": 0.14, + "grad_norm": 0.6837771078707225, + "learning_rate": 4.981492410052058e-06, + "loss": 0.2955, + "step": 2886 + }, + { + "epoch": 0.14, + "grad_norm": 0.6929453666562796, + "learning_rate": 4.9814693689920555e-06, + "loss": 0.2995, + "step": 2887 + }, + { + "epoch": 0.14, + "grad_norm": 0.6555601393630603, + "learning_rate": 4.98144631365182e-06, + "loss": 0.2915, + "step": 2888 + }, + { + "epoch": 0.14, + "grad_norm": 0.6811265712013894, + "learning_rate": 4.9814232440314845e-06, + "loss": 0.2999, + "step": 2889 + }, + { + "epoch": 0.14, + "grad_norm": 0.689158718564099, + "learning_rate": 4.981400160131182e-06, + "loss": 0.3004, + "step": 2890 + }, + { + "epoch": 0.14, + "grad_norm": 0.6543872446108063, + "learning_rate": 4.981377061951045e-06, + "loss": 0.3015, + "step": 2891 + }, + { + "epoch": 0.14, + "grad_norm": 0.6006401616845479, + "learning_rate": 4.981353949491207e-06, + "loss": 0.2874, + "step": 2892 + }, + { + "epoch": 0.14, + "grad_norm": 0.6576207961810799, + "learning_rate": 4.981330822751801e-06, + "loss": 0.3031, + "step": 2893 + }, + { + "epoch": 0.14, + "grad_norm": 0.8243971037356641, + "learning_rate": 4.981307681732959e-06, + "loss": 0.3409, + "step": 2894 + }, + { + "epoch": 0.14, + "grad_norm": 0.6545386436449976, + "learning_rate": 4.981284526434815e-06, + "loss": 0.3141, + "step": 2895 + }, + { + "epoch": 0.14, + "grad_norm": 0.6562476165631536, + "learning_rate": 4.981261356857503e-06, + "loss": 0.3173, + "step": 2896 + }, + { + "epoch": 0.14, + "grad_norm": 0.6260657907545268, + "learning_rate": 4.981238173001155e-06, + "loss": 0.3108, + "step": 2897 + }, + { + "epoch": 0.14, + "grad_norm": 0.6730050253990131, + "learning_rate": 4.981214974865906e-06, + "loss": 0.3017, + "step": 2898 + }, + { + "epoch": 0.14, + "grad_norm": 0.6820846936593642, + "learning_rate": 4.981191762451887e-06, + "loss": 0.3156, + "step": 2899 + }, + { + "epoch": 0.14, + "grad_norm": 0.7504873277675628, + "learning_rate": 4.9811685357592335e-06, + "loss": 0.3103, + "step": 2900 + }, + { + "epoch": 0.14, + "grad_norm": 0.6653596274127646, + "learning_rate": 4.981145294788079e-06, + "loss": 0.3062, + "step": 2901 + }, + { + "epoch": 0.14, + "grad_norm": 0.7279057756640062, + "learning_rate": 4.981122039538556e-06, + "loss": 0.3189, + "step": 2902 + }, + { + "epoch": 0.14, + "grad_norm": 0.6327185360536858, + "learning_rate": 4.9810987700108006e-06, + "loss": 0.3018, + "step": 2903 + }, + { + "epoch": 0.14, + "grad_norm": 0.6954776076308335, + "learning_rate": 4.9810754862049446e-06, + "loss": 0.3154, + "step": 2904 + }, + { + "epoch": 0.14, + "grad_norm": 0.7244939845220488, + "learning_rate": 4.9810521881211226e-06, + "loss": 0.3074, + "step": 2905 + }, + { + "epoch": 0.14, + "grad_norm": 0.7377403305845263, + "learning_rate": 4.981028875759469e-06, + "loss": 0.3162, + "step": 2906 + }, + { + "epoch": 0.14, + "grad_norm": 0.7475041173073599, + "learning_rate": 4.9810055491201185e-06, + "loss": 0.3209, + "step": 2907 + }, + { + "epoch": 0.14, + "grad_norm": 0.692710183360766, + "learning_rate": 4.980982208203204e-06, + "loss": 0.3422, + "step": 2908 + }, + { + "epoch": 0.14, + "grad_norm": 0.6353637703353041, + "learning_rate": 4.980958853008861e-06, + "loss": 0.3126, + "step": 2909 + }, + { + "epoch": 0.14, + "grad_norm": 0.7103667782654531, + "learning_rate": 4.980935483537222e-06, + "loss": 0.3081, + "step": 2910 + }, + { + "epoch": 0.14, + "grad_norm": 0.6597696845158647, + "learning_rate": 4.980912099788424e-06, + "loss": 0.3257, + "step": 2911 + }, + { + "epoch": 0.14, + "grad_norm": 0.693068448034552, + "learning_rate": 4.9808887017626e-06, + "loss": 0.3365, + "step": 2912 + }, + { + "epoch": 0.14, + "grad_norm": 0.6226520476194174, + "learning_rate": 4.980865289459886e-06, + "loss": 0.294, + "step": 2913 + }, + { + "epoch": 0.14, + "grad_norm": 0.6311503892793299, + "learning_rate": 4.980841862880415e-06, + "loss": 0.3013, + "step": 2914 + }, + { + "epoch": 0.14, + "grad_norm": 0.6758086419529246, + "learning_rate": 4.9808184220243225e-06, + "loss": 0.2875, + "step": 2915 + }, + { + "epoch": 0.14, + "grad_norm": 0.6625359038819385, + "learning_rate": 4.980794966891743e-06, + "loss": 0.2951, + "step": 2916 + }, + { + "epoch": 0.14, + "grad_norm": 0.6102301935387882, + "learning_rate": 4.980771497482814e-06, + "loss": 0.2799, + "step": 2917 + }, + { + "epoch": 0.14, + "grad_norm": 0.6721505530441206, + "learning_rate": 4.980748013797666e-06, + "loss": 0.3071, + "step": 2918 + }, + { + "epoch": 0.14, + "grad_norm": 0.6634152139480582, + "learning_rate": 4.980724515836438e-06, + "loss": 0.3177, + "step": 2919 + }, + { + "epoch": 0.14, + "grad_norm": 0.596814400520631, + "learning_rate": 4.980701003599264e-06, + "loss": 0.2765, + "step": 2920 + }, + { + "epoch": 0.14, + "grad_norm": 0.6856935465961926, + "learning_rate": 4.9806774770862796e-06, + "loss": 0.324, + "step": 2921 + }, + { + "epoch": 0.14, + "grad_norm": 0.6646243037478532, + "learning_rate": 4.980653936297619e-06, + "loss": 0.3263, + "step": 2922 + }, + { + "epoch": 0.14, + "grad_norm": 0.6663506690397718, + "learning_rate": 4.980630381233419e-06, + "loss": 0.3068, + "step": 2923 + }, + { + "epoch": 0.14, + "grad_norm": 0.6503113947276866, + "learning_rate": 4.980606811893814e-06, + "loss": 0.3124, + "step": 2924 + }, + { + "epoch": 0.14, + "grad_norm": 0.6368664318600611, + "learning_rate": 4.9805832282789414e-06, + "loss": 0.2921, + "step": 2925 + }, + { + "epoch": 0.14, + "grad_norm": 0.6173056132687096, + "learning_rate": 4.980559630388934e-06, + "loss": 0.3111, + "step": 2926 + }, + { + "epoch": 0.14, + "grad_norm": 0.677583707526189, + "learning_rate": 4.980536018223931e-06, + "loss": 0.2944, + "step": 2927 + }, + { + "epoch": 0.14, + "grad_norm": 0.6945186139024356, + "learning_rate": 4.980512391784066e-06, + "loss": 0.3113, + "step": 2928 + }, + { + "epoch": 0.14, + "grad_norm": 0.6896329094718376, + "learning_rate": 4.980488751069476e-06, + "loss": 0.3228, + "step": 2929 + }, + { + "epoch": 0.14, + "grad_norm": 0.6320093698648173, + "learning_rate": 4.980465096080297e-06, + "loss": 0.3052, + "step": 2930 + }, + { + "epoch": 0.14, + "grad_norm": 0.7207370374153755, + "learning_rate": 4.980441426816663e-06, + "loss": 0.309, + "step": 2931 + }, + { + "epoch": 0.14, + "grad_norm": 0.6543812826604832, + "learning_rate": 4.980417743278714e-06, + "loss": 0.3037, + "step": 2932 + }, + { + "epoch": 0.14, + "grad_norm": 0.6450865696432099, + "learning_rate": 4.980394045466583e-06, + "loss": 0.2972, + "step": 2933 + }, + { + "epoch": 0.14, + "grad_norm": 0.6469346670915633, + "learning_rate": 4.980370333380409e-06, + "loss": 0.2994, + "step": 2934 + }, + { + "epoch": 0.14, + "grad_norm": 0.7223415693301952, + "learning_rate": 4.980346607020327e-06, + "loss": 0.3226, + "step": 2935 + }, + { + "epoch": 0.14, + "grad_norm": 0.6462239435407594, + "learning_rate": 4.9803228663864725e-06, + "loss": 0.3018, + "step": 2936 + }, + { + "epoch": 0.14, + "grad_norm": 0.6990031747789872, + "learning_rate": 4.980299111478985e-06, + "loss": 0.2998, + "step": 2937 + }, + { + "epoch": 0.14, + "grad_norm": 0.6438034322924944, + "learning_rate": 4.980275342297997e-06, + "loss": 0.2771, + "step": 2938 + }, + { + "epoch": 0.14, + "grad_norm": 0.6361989791759148, + "learning_rate": 4.980251558843649e-06, + "loss": 0.31, + "step": 2939 + }, + { + "epoch": 0.14, + "grad_norm": 0.6643454305947235, + "learning_rate": 4.980227761116078e-06, + "loss": 0.3037, + "step": 2940 + }, + { + "epoch": 0.14, + "grad_norm": 0.6722055240805427, + "learning_rate": 4.980203949115418e-06, + "loss": 0.298, + "step": 2941 + }, + { + "epoch": 0.14, + "grad_norm": 0.6797437284176747, + "learning_rate": 4.980180122841808e-06, + "loss": 0.3071, + "step": 2942 + }, + { + "epoch": 0.14, + "grad_norm": 0.6715402653575973, + "learning_rate": 4.980156282295386e-06, + "loss": 0.2802, + "step": 2943 + }, + { + "epoch": 0.14, + "grad_norm": 0.7020600515856688, + "learning_rate": 4.980132427476287e-06, + "loss": 0.3074, + "step": 2944 + }, + { + "epoch": 0.14, + "grad_norm": 0.6501000099641986, + "learning_rate": 4.9801085583846486e-06, + "loss": 0.2907, + "step": 2945 + }, + { + "epoch": 0.14, + "grad_norm": 0.6016038238452592, + "learning_rate": 4.9800846750206096e-06, + "loss": 0.2884, + "step": 2946 + }, + { + "epoch": 0.14, + "grad_norm": 0.6777874886480868, + "learning_rate": 4.980060777384306e-06, + "loss": 0.3421, + "step": 2947 + }, + { + "epoch": 0.14, + "grad_norm": 0.6854544792588891, + "learning_rate": 4.980036865475877e-06, + "loss": 0.3143, + "step": 2948 + }, + { + "epoch": 0.14, + "grad_norm": 0.6460514400112426, + "learning_rate": 4.980012939295458e-06, + "loss": 0.2924, + "step": 2949 + }, + { + "epoch": 0.14, + "grad_norm": 0.6762125447562104, + "learning_rate": 4.979988998843188e-06, + "loss": 0.3373, + "step": 2950 + }, + { + "epoch": 0.14, + "grad_norm": 0.6862152717141914, + "learning_rate": 4.979965044119205e-06, + "loss": 0.3007, + "step": 2951 + }, + { + "epoch": 0.14, + "grad_norm": 0.6386565818701556, + "learning_rate": 4.979941075123647e-06, + "loss": 0.3205, + "step": 2952 + }, + { + "epoch": 0.14, + "grad_norm": 0.721385968067536, + "learning_rate": 4.979917091856651e-06, + "loss": 0.3118, + "step": 2953 + }, + { + "epoch": 0.14, + "grad_norm": 0.6735103946117452, + "learning_rate": 4.979893094318356e-06, + "loss": 0.309, + "step": 2954 + }, + { + "epoch": 0.14, + "grad_norm": 0.764800614249626, + "learning_rate": 4.979869082508898e-06, + "loss": 0.3092, + "step": 2955 + }, + { + "epoch": 0.14, + "grad_norm": 0.6528481089296237, + "learning_rate": 4.9798450564284175e-06, + "loss": 0.2967, + "step": 2956 + }, + { + "epoch": 0.14, + "grad_norm": 0.6728524102762574, + "learning_rate": 4.979821016077053e-06, + "loss": 0.3181, + "step": 2957 + }, + { + "epoch": 0.14, + "grad_norm": 0.6044784656182688, + "learning_rate": 4.97979696145494e-06, + "loss": 0.298, + "step": 2958 + }, + { + "epoch": 0.14, + "grad_norm": 0.7306609732975013, + "learning_rate": 4.97977289256222e-06, + "loss": 0.3319, + "step": 2959 + }, + { + "epoch": 0.14, + "grad_norm": 0.6671464336216066, + "learning_rate": 4.979748809399031e-06, + "loss": 0.2968, + "step": 2960 + }, + { + "epoch": 0.14, + "grad_norm": 0.6099403086699732, + "learning_rate": 4.979724711965509e-06, + "loss": 0.2892, + "step": 2961 + }, + { + "epoch": 0.14, + "grad_norm": 0.6458244143398582, + "learning_rate": 4.9797006002617954e-06, + "loss": 0.2936, + "step": 2962 + }, + { + "epoch": 0.14, + "grad_norm": 0.6775753502716788, + "learning_rate": 4.979676474288028e-06, + "loss": 0.2988, + "step": 2963 + }, + { + "epoch": 0.14, + "grad_norm": 0.6182678502409639, + "learning_rate": 4.979652334044346e-06, + "loss": 0.2888, + "step": 2964 + }, + { + "epoch": 0.14, + "grad_norm": 0.6344210560201493, + "learning_rate": 4.979628179530888e-06, + "loss": 0.2991, + "step": 2965 + }, + { + "epoch": 0.14, + "grad_norm": 0.6291479810410957, + "learning_rate": 4.9796040107477925e-06, + "loss": 0.2928, + "step": 2966 + }, + { + "epoch": 0.14, + "grad_norm": 0.6903576794065232, + "learning_rate": 4.9795798276952e-06, + "loss": 0.3161, + "step": 2967 + }, + { + "epoch": 0.14, + "grad_norm": 0.6522985875409987, + "learning_rate": 4.979555630373248e-06, + "loss": 0.3066, + "step": 2968 + }, + { + "epoch": 0.14, + "grad_norm": 0.6382018333837904, + "learning_rate": 4.979531418782078e-06, + "loss": 0.3056, + "step": 2969 + }, + { + "epoch": 0.14, + "grad_norm": 0.6902451955745146, + "learning_rate": 4.979507192921826e-06, + "loss": 0.325, + "step": 2970 + }, + { + "epoch": 0.14, + "grad_norm": 0.6445419909191075, + "learning_rate": 4.979482952792634e-06, + "loss": 0.3101, + "step": 2971 + }, + { + "epoch": 0.14, + "grad_norm": 0.6989777346115954, + "learning_rate": 4.979458698394641e-06, + "loss": 0.3064, + "step": 2972 + }, + { + "epoch": 0.14, + "grad_norm": 0.7232908736221767, + "learning_rate": 4.979434429727987e-06, + "loss": 0.3369, + "step": 2973 + }, + { + "epoch": 0.14, + "grad_norm": 0.7817332338698583, + "learning_rate": 4.9794101467928104e-06, + "loss": 0.3193, + "step": 2974 + }, + { + "epoch": 0.14, + "grad_norm": 0.661952808351297, + "learning_rate": 4.979385849589251e-06, + "loss": 0.3139, + "step": 2975 + }, + { + "epoch": 0.14, + "grad_norm": 0.6981358705928171, + "learning_rate": 4.97936153811745e-06, + "loss": 0.3228, + "step": 2976 + }, + { + "epoch": 0.14, + "grad_norm": 0.7135109189079495, + "learning_rate": 4.979337212377546e-06, + "loss": 0.3184, + "step": 2977 + }, + { + "epoch": 0.14, + "grad_norm": 0.7046319823416547, + "learning_rate": 4.97931287236968e-06, + "loss": 0.3022, + "step": 2978 + }, + { + "epoch": 0.14, + "grad_norm": 0.7204466112201489, + "learning_rate": 4.979288518093991e-06, + "loss": 0.2989, + "step": 2979 + }, + { + "epoch": 0.14, + "grad_norm": 0.6693175944195726, + "learning_rate": 4.97926414955062e-06, + "loss": 0.2977, + "step": 2980 + }, + { + "epoch": 0.14, + "grad_norm": 0.6613990487426928, + "learning_rate": 4.979239766739707e-06, + "loss": 0.2886, + "step": 2981 + }, + { + "epoch": 0.14, + "grad_norm": 0.7921219815326375, + "learning_rate": 4.979215369661393e-06, + "loss": 0.3244, + "step": 2982 + }, + { + "epoch": 0.14, + "grad_norm": 0.6753521767575321, + "learning_rate": 4.979190958315816e-06, + "loss": 0.2929, + "step": 2983 + }, + { + "epoch": 0.14, + "grad_norm": 0.7423708659973618, + "learning_rate": 4.97916653270312e-06, + "loss": 0.3178, + "step": 2984 + }, + { + "epoch": 0.14, + "grad_norm": 0.7462640914270692, + "learning_rate": 4.979142092823442e-06, + "loss": 0.3153, + "step": 2985 + }, + { + "epoch": 0.14, + "grad_norm": 0.6457459548717508, + "learning_rate": 4.979117638676926e-06, + "loss": 0.2797, + "step": 2986 + }, + { + "epoch": 0.14, + "grad_norm": 0.6957773386482031, + "learning_rate": 4.97909317026371e-06, + "loss": 0.3067, + "step": 2987 + }, + { + "epoch": 0.14, + "grad_norm": 0.6775644142391595, + "learning_rate": 4.979068687583937e-06, + "loss": 0.3105, + "step": 2988 + }, + { + "epoch": 0.14, + "grad_norm": 0.6756564100726558, + "learning_rate": 4.979044190637745e-06, + "loss": 0.2991, + "step": 2989 + }, + { + "epoch": 0.14, + "grad_norm": 0.5896990067852704, + "learning_rate": 4.979019679425278e-06, + "loss": 0.2944, + "step": 2990 + }, + { + "epoch": 0.14, + "grad_norm": 0.7314540587292746, + "learning_rate": 4.978995153946676e-06, + "loss": 0.3115, + "step": 2991 + }, + { + "epoch": 0.14, + "grad_norm": 0.7354830329367372, + "learning_rate": 4.97897061420208e-06, + "loss": 0.3209, + "step": 2992 + }, + { + "epoch": 0.14, + "grad_norm": 0.6616748450455244, + "learning_rate": 4.97894606019163e-06, + "loss": 0.3253, + "step": 2993 + }, + { + "epoch": 0.14, + "grad_norm": 0.652247639834857, + "learning_rate": 4.978921491915469e-06, + "loss": 0.3133, + "step": 2994 + }, + { + "epoch": 0.14, + "grad_norm": 0.7279338870534404, + "learning_rate": 4.9788969093737385e-06, + "loss": 0.3148, + "step": 2995 + }, + { + "epoch": 0.14, + "grad_norm": 0.6691524022923612, + "learning_rate": 4.978872312566579e-06, + "loss": 0.2884, + "step": 2996 + }, + { + "epoch": 0.14, + "grad_norm": 0.745761840934808, + "learning_rate": 4.9788477014941326e-06, + "loss": 0.3219, + "step": 2997 + }, + { + "epoch": 0.14, + "grad_norm": 0.6411169814884573, + "learning_rate": 4.97882307615654e-06, + "loss": 0.3113, + "step": 2998 + }, + { + "epoch": 0.14, + "grad_norm": 0.6624491571760536, + "learning_rate": 4.978798436553945e-06, + "loss": 0.3179, + "step": 2999 + }, + { + "epoch": 0.14, + "grad_norm": 0.6992997293496878, + "learning_rate": 4.978773782686486e-06, + "loss": 0.301, + "step": 3000 + }, + { + "epoch": 0.14, + "grad_norm": 0.6244983208797048, + "learning_rate": 4.978749114554308e-06, + "loss": 0.2965, + "step": 3001 + }, + { + "epoch": 0.14, + "grad_norm": 0.6923493090226205, + "learning_rate": 4.9787244321575515e-06, + "loss": 0.3119, + "step": 3002 + }, + { + "epoch": 0.14, + "grad_norm": 0.6909966090497235, + "learning_rate": 4.9786997354963595e-06, + "loss": 0.3045, + "step": 3003 + }, + { + "epoch": 0.14, + "grad_norm": 0.6298801087073285, + "learning_rate": 4.9786750245708734e-06, + "loss": 0.3014, + "step": 3004 + }, + { + "epoch": 0.14, + "grad_norm": 0.6619380558750827, + "learning_rate": 4.978650299381235e-06, + "loss": 0.2917, + "step": 3005 + }, + { + "epoch": 0.14, + "grad_norm": 0.7161252014275274, + "learning_rate": 4.978625559927588e-06, + "loss": 0.3235, + "step": 3006 + }, + { + "epoch": 0.14, + "grad_norm": 0.6678550041040788, + "learning_rate": 4.978600806210073e-06, + "loss": 0.3225, + "step": 3007 + }, + { + "epoch": 0.14, + "grad_norm": 0.6354690742997318, + "learning_rate": 4.978576038228834e-06, + "loss": 0.3226, + "step": 3008 + }, + { + "epoch": 0.14, + "grad_norm": 0.6470425806355691, + "learning_rate": 4.9785512559840125e-06, + "loss": 0.3128, + "step": 3009 + }, + { + "epoch": 0.14, + "grad_norm": 0.6410848096955444, + "learning_rate": 4.978526459475751e-06, + "loss": 0.3339, + "step": 3010 + }, + { + "epoch": 0.14, + "grad_norm": 0.6096615379385852, + "learning_rate": 4.9785016487041934e-06, + "loss": 0.2879, + "step": 3011 + }, + { + "epoch": 0.14, + "grad_norm": 0.6504932590580387, + "learning_rate": 4.9784768236694815e-06, + "loss": 0.2643, + "step": 3012 + }, + { + "epoch": 0.14, + "grad_norm": 0.6385635157863873, + "learning_rate": 4.978451984371759e-06, + "loss": 0.306, + "step": 3013 + }, + { + "epoch": 0.14, + "grad_norm": 0.6678151212218691, + "learning_rate": 4.9784271308111675e-06, + "loss": 0.3329, + "step": 3014 + }, + { + "epoch": 0.14, + "grad_norm": 0.7208581008401224, + "learning_rate": 4.978402262987852e-06, + "loss": 0.31, + "step": 3015 + }, + { + "epoch": 0.14, + "grad_norm": 0.7101623161800386, + "learning_rate": 4.978377380901952e-06, + "loss": 0.3294, + "step": 3016 + }, + { + "epoch": 0.14, + "grad_norm": 0.6783794576507698, + "learning_rate": 4.9783524845536144e-06, + "loss": 0.3192, + "step": 3017 + }, + { + "epoch": 0.14, + "grad_norm": 0.6633502667902396, + "learning_rate": 4.978327573942982e-06, + "loss": 0.3032, + "step": 3018 + }, + { + "epoch": 0.14, + "grad_norm": 0.7005732042927495, + "learning_rate": 4.978302649070197e-06, + "loss": 0.3022, + "step": 3019 + }, + { + "epoch": 0.14, + "grad_norm": 0.7008389334858319, + "learning_rate": 4.978277709935402e-06, + "loss": 0.3078, + "step": 3020 + }, + { + "epoch": 0.14, + "grad_norm": 0.6528585596450588, + "learning_rate": 4.978252756538743e-06, + "loss": 0.3055, + "step": 3021 + }, + { + "epoch": 0.14, + "grad_norm": 0.6694982893837929, + "learning_rate": 4.978227788880362e-06, + "loss": 0.3086, + "step": 3022 + }, + { + "epoch": 0.14, + "grad_norm": 0.690861255151805, + "learning_rate": 4.978202806960402e-06, + "loss": 0.3345, + "step": 3023 + }, + { + "epoch": 0.14, + "grad_norm": 0.6891817472005949, + "learning_rate": 4.978177810779008e-06, + "loss": 0.3115, + "step": 3024 + }, + { + "epoch": 0.14, + "grad_norm": 0.7206084767120006, + "learning_rate": 4.9781528003363245e-06, + "loss": 0.3114, + "step": 3025 + }, + { + "epoch": 0.14, + "grad_norm": 0.6935705620070478, + "learning_rate": 4.978127775632494e-06, + "loss": 0.3088, + "step": 3026 + }, + { + "epoch": 0.14, + "grad_norm": 0.7076794345110523, + "learning_rate": 4.978102736667661e-06, + "loss": 0.3286, + "step": 3027 + }, + { + "epoch": 0.14, + "grad_norm": 0.6840717997459809, + "learning_rate": 4.978077683441969e-06, + "loss": 0.3163, + "step": 3028 + }, + { + "epoch": 0.14, + "grad_norm": 0.6747999606369702, + "learning_rate": 4.9780526159555645e-06, + "loss": 0.312, + "step": 3029 + }, + { + "epoch": 0.14, + "grad_norm": 0.6261720412219386, + "learning_rate": 4.978027534208588e-06, + "loss": 0.2835, + "step": 3030 + }, + { + "epoch": 0.14, + "grad_norm": 0.6510148244028909, + "learning_rate": 4.9780024382011875e-06, + "loss": 0.2993, + "step": 3031 + }, + { + "epoch": 0.14, + "grad_norm": 0.6726888768388725, + "learning_rate": 4.977977327933504e-06, + "loss": 0.2835, + "step": 3032 + }, + { + "epoch": 0.14, + "grad_norm": 0.8376852172219259, + "learning_rate": 4.977952203405685e-06, + "loss": 0.3404, + "step": 3033 + }, + { + "epoch": 0.14, + "grad_norm": 0.6959316724365567, + "learning_rate": 4.977927064617874e-06, + "loss": 0.3123, + "step": 3034 + }, + { + "epoch": 0.14, + "grad_norm": 0.6887316229204565, + "learning_rate": 4.977901911570215e-06, + "loss": 0.3155, + "step": 3035 + }, + { + "epoch": 0.14, + "grad_norm": 0.6611732219203684, + "learning_rate": 4.9778767442628535e-06, + "loss": 0.3036, + "step": 3036 + }, + { + "epoch": 0.14, + "grad_norm": 0.68247544841761, + "learning_rate": 4.977851562695935e-06, + "loss": 0.3108, + "step": 3037 + }, + { + "epoch": 0.14, + "grad_norm": 0.7261559239957988, + "learning_rate": 4.977826366869602e-06, + "loss": 0.2967, + "step": 3038 + }, + { + "epoch": 0.14, + "grad_norm": 0.7027897936077918, + "learning_rate": 4.977801156784001e-06, + "loss": 0.3223, + "step": 3039 + }, + { + "epoch": 0.14, + "grad_norm": 0.6555789315962111, + "learning_rate": 4.9777759324392784e-06, + "loss": 0.3085, + "step": 3040 + }, + { + "epoch": 0.14, + "grad_norm": 0.6391099759326939, + "learning_rate": 4.977750693835578e-06, + "loss": 0.2902, + "step": 3041 + }, + { + "epoch": 0.14, + "grad_norm": 0.6570298094997383, + "learning_rate": 4.977725440973045e-06, + "loss": 0.3036, + "step": 3042 + }, + { + "epoch": 0.14, + "grad_norm": 0.7401828677595584, + "learning_rate": 4.977700173851824e-06, + "loss": 0.3331, + "step": 3043 + }, + { + "epoch": 0.14, + "grad_norm": 0.7186324951959578, + "learning_rate": 4.977674892472062e-06, + "loss": 0.3267, + "step": 3044 + }, + { + "epoch": 0.14, + "grad_norm": 0.7692751639023974, + "learning_rate": 4.9776495968339034e-06, + "loss": 0.2961, + "step": 3045 + }, + { + "epoch": 0.14, + "grad_norm": 0.6456655656957209, + "learning_rate": 4.977624286937493e-06, + "loss": 0.2988, + "step": 3046 + }, + { + "epoch": 0.14, + "grad_norm": 0.7076467406817543, + "learning_rate": 4.977598962782979e-06, + "loss": 0.3136, + "step": 3047 + }, + { + "epoch": 0.14, + "grad_norm": 0.740956035798324, + "learning_rate": 4.977573624370506e-06, + "loss": 0.3029, + "step": 3048 + }, + { + "epoch": 0.14, + "grad_norm": 0.6865426154203156, + "learning_rate": 4.977548271700219e-06, + "loss": 0.2898, + "step": 3049 + }, + { + "epoch": 0.14, + "grad_norm": 0.7394295404879839, + "learning_rate": 4.977522904772264e-06, + "loss": 0.307, + "step": 3050 + }, + { + "epoch": 0.14, + "grad_norm": 0.7394559927989817, + "learning_rate": 4.977497523586788e-06, + "loss": 0.3107, + "step": 3051 + }, + { + "epoch": 0.14, + "grad_norm": 0.7407073512811426, + "learning_rate": 4.977472128143936e-06, + "loss": 0.3418, + "step": 3052 + }, + { + "epoch": 0.14, + "grad_norm": 0.7180181495679662, + "learning_rate": 4.977446718443855e-06, + "loss": 0.3085, + "step": 3053 + }, + { + "epoch": 0.14, + "grad_norm": 0.6894796969358534, + "learning_rate": 4.97742129448669e-06, + "loss": 0.3051, + "step": 3054 + }, + { + "epoch": 0.14, + "grad_norm": 0.7234824624871982, + "learning_rate": 4.977395856272589e-06, + "loss": 0.3113, + "step": 3055 + }, + { + "epoch": 0.14, + "grad_norm": 0.6270742500621832, + "learning_rate": 4.9773704038016975e-06, + "loss": 0.3206, + "step": 3056 + }, + { + "epoch": 0.14, + "grad_norm": 0.7069721883347555, + "learning_rate": 4.977344937074161e-06, + "loss": 0.3018, + "step": 3057 + }, + { + "epoch": 0.14, + "grad_norm": 0.6503340923135553, + "learning_rate": 4.9773194560901286e-06, + "loss": 0.2987, + "step": 3058 + }, + { + "epoch": 0.14, + "grad_norm": 0.6629051795233837, + "learning_rate": 4.977293960849744e-06, + "loss": 0.2895, + "step": 3059 + }, + { + "epoch": 0.14, + "grad_norm": 0.6612723248569033, + "learning_rate": 4.977268451353156e-06, + "loss": 0.3235, + "step": 3060 + }, + { + "epoch": 0.14, + "grad_norm": 0.609699547833597, + "learning_rate": 4.977242927600511e-06, + "loss": 0.2756, + "step": 3061 + }, + { + "epoch": 0.14, + "grad_norm": 0.755214279262774, + "learning_rate": 4.977217389591955e-06, + "loss": 0.3111, + "step": 3062 + }, + { + "epoch": 0.14, + "grad_norm": 0.6979055842149194, + "learning_rate": 4.977191837327635e-06, + "loss": 0.2968, + "step": 3063 + }, + { + "epoch": 0.14, + "grad_norm": 0.6657437457967155, + "learning_rate": 4.9771662708076995e-06, + "loss": 0.3074, + "step": 3064 + }, + { + "epoch": 0.14, + "grad_norm": 0.7137108000855773, + "learning_rate": 4.977140690032294e-06, + "loss": 0.3311, + "step": 3065 + }, + { + "epoch": 0.14, + "grad_norm": 0.6735692584981807, + "learning_rate": 4.977115095001567e-06, + "loss": 0.3236, + "step": 3066 + }, + { + "epoch": 0.14, + "grad_norm": 0.729487882310804, + "learning_rate": 4.977089485715666e-06, + "loss": 0.3017, + "step": 3067 + }, + { + "epoch": 0.14, + "grad_norm": 0.716925798313675, + "learning_rate": 4.977063862174737e-06, + "loss": 0.3063, + "step": 3068 + }, + { + "epoch": 0.14, + "grad_norm": 0.7256174172848481, + "learning_rate": 4.9770382243789275e-06, + "loss": 0.3158, + "step": 3069 + }, + { + "epoch": 0.14, + "grad_norm": 0.7488148296912267, + "learning_rate": 4.977012572328386e-06, + "loss": 0.3076, + "step": 3070 + }, + { + "epoch": 0.14, + "grad_norm": 0.6604583972378147, + "learning_rate": 4.976986906023259e-06, + "loss": 0.2966, + "step": 3071 + }, + { + "epoch": 0.14, + "grad_norm": 0.6686488603021048, + "learning_rate": 4.976961225463696e-06, + "loss": 0.3158, + "step": 3072 + }, + { + "epoch": 0.14, + "grad_norm": 0.708206412455035, + "learning_rate": 4.976935530649843e-06, + "loss": 0.307, + "step": 3073 + }, + { + "epoch": 0.14, + "grad_norm": 0.6578891485357465, + "learning_rate": 4.976909821581849e-06, + "loss": 0.281, + "step": 3074 + }, + { + "epoch": 0.14, + "grad_norm": 0.8671617143365045, + "learning_rate": 4.976884098259861e-06, + "loss": 0.3423, + "step": 3075 + }, + { + "epoch": 0.14, + "grad_norm": 0.6195952949585365, + "learning_rate": 4.9768583606840285e-06, + "loss": 0.2921, + "step": 3076 + }, + { + "epoch": 0.14, + "grad_norm": 0.638914278904986, + "learning_rate": 4.976832608854498e-06, + "loss": 0.3126, + "step": 3077 + }, + { + "epoch": 0.14, + "grad_norm": 0.694387664536154, + "learning_rate": 4.976806842771418e-06, + "loss": 0.3063, + "step": 3078 + }, + { + "epoch": 0.14, + "grad_norm": 0.6785767612788691, + "learning_rate": 4.9767810624349375e-06, + "loss": 0.3126, + "step": 3079 + }, + { + "epoch": 0.14, + "grad_norm": 0.8200625592534825, + "learning_rate": 4.976755267845205e-06, + "loss": 0.3459, + "step": 3080 + }, + { + "epoch": 0.14, + "grad_norm": 0.7203309083234158, + "learning_rate": 4.976729459002367e-06, + "loss": 0.3096, + "step": 3081 + }, + { + "epoch": 0.14, + "grad_norm": 0.6921058820039908, + "learning_rate": 4.976703635906575e-06, + "loss": 0.2986, + "step": 3082 + }, + { + "epoch": 0.14, + "grad_norm": 0.628955308655588, + "learning_rate": 4.9766777985579765e-06, + "loss": 0.3008, + "step": 3083 + }, + { + "epoch": 0.14, + "grad_norm": 0.6221078000545739, + "learning_rate": 4.976651946956718e-06, + "loss": 0.3061, + "step": 3084 + }, + { + "epoch": 0.14, + "grad_norm": 0.7382106719661001, + "learning_rate": 4.976626081102951e-06, + "loss": 0.2948, + "step": 3085 + }, + { + "epoch": 0.14, + "grad_norm": 0.7243779386065178, + "learning_rate": 4.976600200996823e-06, + "loss": 0.3022, + "step": 3086 + }, + { + "epoch": 0.14, + "grad_norm": 0.665546589473621, + "learning_rate": 4.976574306638484e-06, + "loss": 0.3108, + "step": 3087 + }, + { + "epoch": 0.14, + "grad_norm": 0.6415137429274448, + "learning_rate": 4.976548398028082e-06, + "loss": 0.2958, + "step": 3088 + }, + { + "epoch": 0.14, + "grad_norm": 0.6739518463027209, + "learning_rate": 4.976522475165766e-06, + "loss": 0.3061, + "step": 3089 + }, + { + "epoch": 0.14, + "grad_norm": 0.6164618134446558, + "learning_rate": 4.9764965380516864e-06, + "loss": 0.3068, + "step": 3090 + }, + { + "epoch": 0.14, + "grad_norm": 0.7218544022876243, + "learning_rate": 4.976470586685991e-06, + "loss": 0.3151, + "step": 3091 + }, + { + "epoch": 0.14, + "grad_norm": 0.6766367188049776, + "learning_rate": 4.97644462106883e-06, + "loss": 0.3094, + "step": 3092 + }, + { + "epoch": 0.14, + "grad_norm": 0.6208634560796986, + "learning_rate": 4.976418641200353e-06, + "loss": 0.3034, + "step": 3093 + }, + { + "epoch": 0.14, + "grad_norm": 0.659168679220853, + "learning_rate": 4.9763926470807074e-06, + "loss": 0.2948, + "step": 3094 + }, + { + "epoch": 0.14, + "grad_norm": 0.9367996685692764, + "learning_rate": 4.976366638710046e-06, + "loss": 0.298, + "step": 3095 + }, + { + "epoch": 0.15, + "grad_norm": 0.6428541583162125, + "learning_rate": 4.9763406160885175e-06, + "loss": 0.3025, + "step": 3096 + }, + { + "epoch": 0.15, + "grad_norm": 0.6273492199057696, + "learning_rate": 4.97631457921627e-06, + "loss": 0.3055, + "step": 3097 + }, + { + "epoch": 0.15, + "grad_norm": 0.6923268576699759, + "learning_rate": 4.976288528093456e-06, + "loss": 0.3288, + "step": 3098 + }, + { + "epoch": 0.15, + "grad_norm": 0.8303570656861711, + "learning_rate": 4.9762624627202225e-06, + "loss": 0.3266, + "step": 3099 + }, + { + "epoch": 0.15, + "grad_norm": 0.717726766794788, + "learning_rate": 4.976236383096721e-06, + "loss": 0.3042, + "step": 3100 + }, + { + "epoch": 0.15, + "grad_norm": 0.659236084027623, + "learning_rate": 4.976210289223102e-06, + "loss": 0.3211, + "step": 3101 + }, + { + "epoch": 0.15, + "grad_norm": 0.6696689533180805, + "learning_rate": 4.9761841810995145e-06, + "loss": 0.2993, + "step": 3102 + }, + { + "epoch": 0.15, + "grad_norm": 0.6621274703839616, + "learning_rate": 4.9761580587261105e-06, + "loss": 0.3213, + "step": 3103 + }, + { + "epoch": 0.15, + "grad_norm": 0.7188460034644938, + "learning_rate": 4.976131922103039e-06, + "loss": 0.3243, + "step": 3104 + }, + { + "epoch": 0.15, + "grad_norm": 0.6704185981218383, + "learning_rate": 4.976105771230451e-06, + "loss": 0.2785, + "step": 3105 + }, + { + "epoch": 0.15, + "grad_norm": 0.6720114795441703, + "learning_rate": 4.976079606108495e-06, + "loss": 0.3225, + "step": 3106 + }, + { + "epoch": 0.15, + "grad_norm": 0.7305089074067107, + "learning_rate": 4.976053426737324e-06, + "loss": 0.3288, + "step": 3107 + }, + { + "epoch": 0.15, + "grad_norm": 0.7325645473915474, + "learning_rate": 4.976027233117088e-06, + "loss": 0.3236, + "step": 3108 + }, + { + "epoch": 0.15, + "grad_norm": 0.6711939187665613, + "learning_rate": 4.976001025247938e-06, + "loss": 0.2988, + "step": 3109 + }, + { + "epoch": 0.15, + "grad_norm": 0.6616594624720675, + "learning_rate": 4.9759748031300234e-06, + "loss": 0.3243, + "step": 3110 + }, + { + "epoch": 0.15, + "grad_norm": 0.6705657500263612, + "learning_rate": 4.975948566763497e-06, + "loss": 0.312, + "step": 3111 + }, + { + "epoch": 0.15, + "grad_norm": 0.8152402605616357, + "learning_rate": 4.97592231614851e-06, + "loss": 0.322, + "step": 3112 + }, + { + "epoch": 0.15, + "grad_norm": 0.7870166650594661, + "learning_rate": 4.97589605128521e-06, + "loss": 0.3215, + "step": 3113 + }, + { + "epoch": 0.15, + "grad_norm": 0.705697243570219, + "learning_rate": 4.975869772173751e-06, + "loss": 0.3113, + "step": 3114 + }, + { + "epoch": 0.15, + "grad_norm": 0.6984176328406172, + "learning_rate": 4.975843478814285e-06, + "loss": 0.3117, + "step": 3115 + }, + { + "epoch": 0.15, + "grad_norm": 0.7880094972004186, + "learning_rate": 4.975817171206961e-06, + "loss": 0.3093, + "step": 3116 + }, + { + "epoch": 0.15, + "grad_norm": 0.6455665309875842, + "learning_rate": 4.975790849351932e-06, + "loss": 0.2705, + "step": 3117 + }, + { + "epoch": 0.15, + "grad_norm": 0.6764762750986725, + "learning_rate": 4.975764513249349e-06, + "loss": 0.3185, + "step": 3118 + }, + { + "epoch": 0.15, + "grad_norm": 0.6528838312380406, + "learning_rate": 4.9757381628993624e-06, + "loss": 0.2896, + "step": 3119 + }, + { + "epoch": 0.15, + "grad_norm": 0.7283336679659015, + "learning_rate": 4.975711798302126e-06, + "loss": 0.3017, + "step": 3120 + }, + { + "epoch": 0.15, + "grad_norm": 0.6794291577917159, + "learning_rate": 4.975685419457791e-06, + "loss": 0.3039, + "step": 3121 + }, + { + "epoch": 0.15, + "grad_norm": 0.6791916690701325, + "learning_rate": 4.975659026366507e-06, + "loss": 0.2968, + "step": 3122 + }, + { + "epoch": 0.15, + "grad_norm": 0.6084119756179807, + "learning_rate": 4.975632619028429e-06, + "loss": 0.263, + "step": 3123 + }, + { + "epoch": 0.15, + "grad_norm": 0.6710764791245993, + "learning_rate": 4.975606197443706e-06, + "loss": 0.3086, + "step": 3124 + }, + { + "epoch": 0.15, + "grad_norm": 0.6531607732622622, + "learning_rate": 4.975579761612493e-06, + "loss": 0.3052, + "step": 3125 + }, + { + "epoch": 0.15, + "grad_norm": 0.6695536880710568, + "learning_rate": 4.975553311534939e-06, + "loss": 0.294, + "step": 3126 + }, + { + "epoch": 0.15, + "grad_norm": 0.7544117269853265, + "learning_rate": 4.9755268472112e-06, + "loss": 0.319, + "step": 3127 + }, + { + "epoch": 0.15, + "grad_norm": 0.6952246214258846, + "learning_rate": 4.975500368641425e-06, + "loss": 0.3142, + "step": 3128 + }, + { + "epoch": 0.15, + "grad_norm": 0.7010424926257115, + "learning_rate": 4.9754738758257684e-06, + "loss": 0.3151, + "step": 3129 + }, + { + "epoch": 0.15, + "grad_norm": 0.6619685247339846, + "learning_rate": 4.975447368764381e-06, + "loss": 0.3012, + "step": 3130 + }, + { + "epoch": 0.15, + "grad_norm": 0.6637314051767675, + "learning_rate": 4.975420847457416e-06, + "loss": 0.3124, + "step": 3131 + }, + { + "epoch": 0.15, + "grad_norm": 0.6919394231930645, + "learning_rate": 4.975394311905027e-06, + "loss": 0.3237, + "step": 3132 + }, + { + "epoch": 0.15, + "grad_norm": 0.7982328602161759, + "learning_rate": 4.975367762107365e-06, + "loss": 0.3354, + "step": 3133 + }, + { + "epoch": 0.15, + "grad_norm": 0.6642336199585558, + "learning_rate": 4.975341198064585e-06, + "loss": 0.3229, + "step": 3134 + }, + { + "epoch": 0.15, + "grad_norm": 0.6684035996817128, + "learning_rate": 4.975314619776838e-06, + "loss": 0.3314, + "step": 3135 + }, + { + "epoch": 0.15, + "grad_norm": 0.7543808796818245, + "learning_rate": 4.975288027244277e-06, + "loss": 0.3233, + "step": 3136 + }, + { + "epoch": 0.15, + "grad_norm": 0.7054572704231693, + "learning_rate": 4.9752614204670555e-06, + "loss": 0.3255, + "step": 3137 + }, + { + "epoch": 0.15, + "grad_norm": 0.7537798302054556, + "learning_rate": 4.975234799445327e-06, + "loss": 0.332, + "step": 3138 + }, + { + "epoch": 0.15, + "grad_norm": 0.656112374298139, + "learning_rate": 4.975208164179244e-06, + "loss": 0.2896, + "step": 3139 + }, + { + "epoch": 0.15, + "grad_norm": 0.6457532209227682, + "learning_rate": 4.975181514668961e-06, + "loss": 0.2911, + "step": 3140 + }, + { + "epoch": 0.15, + "grad_norm": 0.672448944359245, + "learning_rate": 4.975154850914629e-06, + "loss": 0.3033, + "step": 3141 + }, + { + "epoch": 0.15, + "grad_norm": 0.6752970726213818, + "learning_rate": 4.975128172916405e-06, + "loss": 0.3111, + "step": 3142 + }, + { + "epoch": 0.15, + "grad_norm": 0.6840753877270026, + "learning_rate": 4.975101480674439e-06, + "loss": 0.3034, + "step": 3143 + }, + { + "epoch": 0.15, + "grad_norm": 0.7380859926474068, + "learning_rate": 4.975074774188886e-06, + "loss": 0.3408, + "step": 3144 + }, + { + "epoch": 0.15, + "grad_norm": 0.6304242976016087, + "learning_rate": 4.9750480534599e-06, + "loss": 0.2842, + "step": 3145 + }, + { + "epoch": 0.15, + "grad_norm": 0.6423747623027455, + "learning_rate": 4.9750213184876354e-06, + "loss": 0.3071, + "step": 3146 + }, + { + "epoch": 0.15, + "grad_norm": 0.6805170692153831, + "learning_rate": 4.974994569272244e-06, + "loss": 0.3195, + "step": 3147 + }, + { + "epoch": 0.15, + "grad_norm": 0.6692316051109336, + "learning_rate": 4.9749678058138816e-06, + "loss": 0.3033, + "step": 3148 + }, + { + "epoch": 0.15, + "grad_norm": 0.654576939940573, + "learning_rate": 4.974941028112702e-06, + "loss": 0.3134, + "step": 3149 + }, + { + "epoch": 0.15, + "grad_norm": 0.6961540724218068, + "learning_rate": 4.974914236168858e-06, + "loss": 0.3104, + "step": 3150 + }, + { + "epoch": 0.15, + "grad_norm": 0.622297854264455, + "learning_rate": 4.9748874299825045e-06, + "loss": 0.3071, + "step": 3151 + }, + { + "epoch": 0.15, + "grad_norm": 0.6697808917817195, + "learning_rate": 4.974860609553796e-06, + "loss": 0.3228, + "step": 3152 + }, + { + "epoch": 0.15, + "grad_norm": 0.6920971545703286, + "learning_rate": 4.974833774882887e-06, + "loss": 0.305, + "step": 3153 + }, + { + "epoch": 0.15, + "grad_norm": 0.6046296047485855, + "learning_rate": 4.974806925969931e-06, + "loss": 0.2848, + "step": 3154 + }, + { + "epoch": 0.15, + "grad_norm": 0.6606302760731616, + "learning_rate": 4.974780062815085e-06, + "loss": 0.3135, + "step": 3155 + }, + { + "epoch": 0.15, + "grad_norm": 0.6972632122745962, + "learning_rate": 4.9747531854185e-06, + "loss": 0.3244, + "step": 3156 + }, + { + "epoch": 0.15, + "grad_norm": 0.695936728554159, + "learning_rate": 4.974726293780333e-06, + "loss": 0.2912, + "step": 3157 + }, + { + "epoch": 0.15, + "grad_norm": 0.7017550416893562, + "learning_rate": 4.974699387900738e-06, + "loss": 0.304, + "step": 3158 + }, + { + "epoch": 0.15, + "grad_norm": 0.6594925411274708, + "learning_rate": 4.974672467779869e-06, + "loss": 0.3008, + "step": 3159 + }, + { + "epoch": 0.15, + "grad_norm": 0.7085897180656224, + "learning_rate": 4.974645533417883e-06, + "loss": 0.3057, + "step": 3160 + }, + { + "epoch": 0.15, + "grad_norm": 0.6897204654110072, + "learning_rate": 4.974618584814935e-06, + "loss": 0.3123, + "step": 3161 + }, + { + "epoch": 0.15, + "grad_norm": 0.671585114621687, + "learning_rate": 4.974591621971177e-06, + "loss": 0.304, + "step": 3162 + }, + { + "epoch": 0.15, + "grad_norm": 0.6795417964857438, + "learning_rate": 4.974564644886768e-06, + "loss": 0.2965, + "step": 3163 + }, + { + "epoch": 0.15, + "grad_norm": 0.6445443631743831, + "learning_rate": 4.97453765356186e-06, + "loss": 0.2921, + "step": 3164 + }, + { + "epoch": 0.15, + "grad_norm": 0.7218426945036918, + "learning_rate": 4.974510647996611e-06, + "loss": 0.3053, + "step": 3165 + }, + { + "epoch": 0.15, + "grad_norm": 0.6808986919904879, + "learning_rate": 4.974483628191174e-06, + "loss": 0.2982, + "step": 3166 + }, + { + "epoch": 0.15, + "grad_norm": 0.7480942061197398, + "learning_rate": 4.974456594145707e-06, + "loss": 0.3075, + "step": 3167 + }, + { + "epoch": 0.15, + "grad_norm": 0.726518183358616, + "learning_rate": 4.974429545860363e-06, + "loss": 0.3087, + "step": 3168 + }, + { + "epoch": 0.15, + "grad_norm": 0.7232853883993895, + "learning_rate": 4.974402483335299e-06, + "loss": 0.287, + "step": 3169 + }, + { + "epoch": 0.15, + "grad_norm": 0.6746819789892432, + "learning_rate": 4.974375406570671e-06, + "loss": 0.3002, + "step": 3170 + }, + { + "epoch": 0.15, + "grad_norm": 0.7138916236518111, + "learning_rate": 4.9743483155666345e-06, + "loss": 0.3288, + "step": 3171 + }, + { + "epoch": 0.15, + "grad_norm": 0.6477171911935731, + "learning_rate": 4.974321210323345e-06, + "loss": 0.3036, + "step": 3172 + }, + { + "epoch": 0.15, + "grad_norm": 0.7091375567075604, + "learning_rate": 4.974294090840958e-06, + "loss": 0.3069, + "step": 3173 + }, + { + "epoch": 0.15, + "grad_norm": 0.6769956082494489, + "learning_rate": 4.974266957119633e-06, + "loss": 0.2937, + "step": 3174 + }, + { + "epoch": 0.15, + "grad_norm": 0.6639327662957786, + "learning_rate": 4.974239809159521e-06, + "loss": 0.3096, + "step": 3175 + }, + { + "epoch": 0.15, + "grad_norm": 0.6700964330934187, + "learning_rate": 4.974212646960782e-06, + "loss": 0.2804, + "step": 3176 + }, + { + "epoch": 0.15, + "grad_norm": 0.7264355985357308, + "learning_rate": 4.974185470523571e-06, + "loss": 0.3081, + "step": 3177 + }, + { + "epoch": 0.15, + "grad_norm": 0.6813583805024244, + "learning_rate": 4.974158279848045e-06, + "loss": 0.3194, + "step": 3178 + }, + { + "epoch": 0.15, + "grad_norm": 0.6943989058385741, + "learning_rate": 4.974131074934359e-06, + "loss": 0.3271, + "step": 3179 + }, + { + "epoch": 0.15, + "grad_norm": 0.635711069345906, + "learning_rate": 4.974103855782671e-06, + "loss": 0.297, + "step": 3180 + }, + { + "epoch": 0.15, + "grad_norm": 0.6993869200879856, + "learning_rate": 4.974076622393136e-06, + "loss": 0.2825, + "step": 3181 + }, + { + "epoch": 0.15, + "grad_norm": 0.7550720852175578, + "learning_rate": 4.974049374765913e-06, + "loss": 0.3311, + "step": 3182 + }, + { + "epoch": 0.15, + "grad_norm": 0.6824198393035944, + "learning_rate": 4.974022112901158e-06, + "loss": 0.3164, + "step": 3183 + }, + { + "epoch": 0.15, + "grad_norm": 0.6813363220179817, + "learning_rate": 4.973994836799026e-06, + "loss": 0.2996, + "step": 3184 + }, + { + "epoch": 0.15, + "grad_norm": 0.691485828508293, + "learning_rate": 4.973967546459677e-06, + "loss": 0.3102, + "step": 3185 + }, + { + "epoch": 0.15, + "grad_norm": 0.697356370445739, + "learning_rate": 4.973940241883267e-06, + "loss": 0.3124, + "step": 3186 + }, + { + "epoch": 0.15, + "grad_norm": 0.7051388938852045, + "learning_rate": 4.973912923069951e-06, + "loss": 0.2948, + "step": 3187 + }, + { + "epoch": 0.15, + "grad_norm": 0.6902009554302355, + "learning_rate": 4.973885590019889e-06, + "loss": 0.301, + "step": 3188 + }, + { + "epoch": 0.15, + "grad_norm": 0.7206157428726775, + "learning_rate": 4.973858242733237e-06, + "loss": 0.3314, + "step": 3189 + }, + { + "epoch": 0.15, + "grad_norm": 0.6604961322980587, + "learning_rate": 4.973830881210153e-06, + "loss": 0.3101, + "step": 3190 + }, + { + "epoch": 0.15, + "grad_norm": 0.6498596402598587, + "learning_rate": 4.9738035054507935e-06, + "loss": 0.2745, + "step": 3191 + }, + { + "epoch": 0.15, + "grad_norm": 0.6938854197123395, + "learning_rate": 4.973776115455316e-06, + "loss": 0.3257, + "step": 3192 + }, + { + "epoch": 0.15, + "grad_norm": 0.745915708349064, + "learning_rate": 4.973748711223881e-06, + "loss": 0.3188, + "step": 3193 + }, + { + "epoch": 0.15, + "grad_norm": 0.7163575143021312, + "learning_rate": 4.973721292756641e-06, + "loss": 0.3254, + "step": 3194 + }, + { + "epoch": 0.15, + "grad_norm": 0.690615064784393, + "learning_rate": 4.973693860053759e-06, + "loss": 0.3065, + "step": 3195 + }, + { + "epoch": 0.15, + "grad_norm": 0.6818639478735496, + "learning_rate": 4.973666413115389e-06, + "loss": 0.305, + "step": 3196 + }, + { + "epoch": 0.15, + "grad_norm": 0.6864054994074072, + "learning_rate": 4.973638951941692e-06, + "loss": 0.31, + "step": 3197 + }, + { + "epoch": 0.15, + "grad_norm": 0.6475451055701894, + "learning_rate": 4.973611476532823e-06, + "loss": 0.2772, + "step": 3198 + }, + { + "epoch": 0.15, + "grad_norm": 0.6690707404480594, + "learning_rate": 4.973583986888943e-06, + "loss": 0.3038, + "step": 3199 + }, + { + "epoch": 0.15, + "grad_norm": 0.7199230454984236, + "learning_rate": 4.9735564830102075e-06, + "loss": 0.3086, + "step": 3200 + }, + { + "epoch": 0.15, + "grad_norm": 0.6893082299271855, + "learning_rate": 4.973528964896778e-06, + "loss": 0.2983, + "step": 3201 + }, + { + "epoch": 0.15, + "grad_norm": 0.7317071041377692, + "learning_rate": 4.97350143254881e-06, + "loss": 0.3264, + "step": 3202 + }, + { + "epoch": 0.15, + "grad_norm": 0.6663805979339916, + "learning_rate": 4.973473885966462e-06, + "loss": 0.3165, + "step": 3203 + }, + { + "epoch": 0.15, + "grad_norm": 0.6959752648647526, + "learning_rate": 4.973446325149894e-06, + "loss": 0.3218, + "step": 3204 + }, + { + "epoch": 0.15, + "grad_norm": 0.6799809970035542, + "learning_rate": 4.973418750099265e-06, + "loss": 0.3136, + "step": 3205 + }, + { + "epoch": 0.15, + "grad_norm": 0.6123841004400639, + "learning_rate": 4.973391160814732e-06, + "loss": 0.2899, + "step": 3206 + }, + { + "epoch": 0.15, + "grad_norm": 0.6832703495798172, + "learning_rate": 4.973363557296455e-06, + "loss": 0.2997, + "step": 3207 + }, + { + "epoch": 0.15, + "grad_norm": 0.801310664905761, + "learning_rate": 4.9733359395445926e-06, + "loss": 0.3143, + "step": 3208 + }, + { + "epoch": 0.15, + "grad_norm": 0.6695630947261402, + "learning_rate": 4.973308307559303e-06, + "loss": 0.2979, + "step": 3209 + }, + { + "epoch": 0.15, + "grad_norm": 0.7857698137071673, + "learning_rate": 4.973280661340746e-06, + "loss": 0.3077, + "step": 3210 + }, + { + "epoch": 0.15, + "grad_norm": 0.6513445103632505, + "learning_rate": 4.97325300088908e-06, + "loss": 0.3168, + "step": 3211 + }, + { + "epoch": 0.15, + "grad_norm": 0.6813598154396838, + "learning_rate": 4.973225326204464e-06, + "loss": 0.3085, + "step": 3212 + }, + { + "epoch": 0.15, + "grad_norm": 0.6068009110426618, + "learning_rate": 4.9731976372870585e-06, + "loss": 0.3119, + "step": 3213 + }, + { + "epoch": 0.15, + "grad_norm": 0.6538504332170891, + "learning_rate": 4.973169934137023e-06, + "loss": 0.2931, + "step": 3214 + }, + { + "epoch": 0.15, + "grad_norm": 0.6885837807483988, + "learning_rate": 4.9731422167545155e-06, + "loss": 0.2989, + "step": 3215 + }, + { + "epoch": 0.15, + "grad_norm": 0.7342853376288654, + "learning_rate": 4.973114485139696e-06, + "loss": 0.3206, + "step": 3216 + }, + { + "epoch": 0.15, + "grad_norm": 0.670430508481302, + "learning_rate": 4.9730867392927246e-06, + "loss": 0.3075, + "step": 3217 + }, + { + "epoch": 0.15, + "grad_norm": 0.6107219670781068, + "learning_rate": 4.97305897921376e-06, + "loss": 0.2987, + "step": 3218 + }, + { + "epoch": 0.15, + "grad_norm": 0.6502409859534586, + "learning_rate": 4.973031204902963e-06, + "loss": 0.3069, + "step": 3219 + }, + { + "epoch": 0.15, + "grad_norm": 0.6421993940609388, + "learning_rate": 4.973003416360493e-06, + "loss": 0.2843, + "step": 3220 + }, + { + "epoch": 0.15, + "grad_norm": 0.6929324689937736, + "learning_rate": 4.97297561358651e-06, + "loss": 0.3058, + "step": 3221 + }, + { + "epoch": 0.15, + "grad_norm": 0.6932002241104093, + "learning_rate": 4.9729477965811735e-06, + "loss": 0.3126, + "step": 3222 + }, + { + "epoch": 0.15, + "grad_norm": 0.6424278416982133, + "learning_rate": 4.972919965344645e-06, + "loss": 0.3048, + "step": 3223 + }, + { + "epoch": 0.15, + "grad_norm": 0.6606594325856375, + "learning_rate": 4.9728921198770825e-06, + "loss": 0.2909, + "step": 3224 + }, + { + "epoch": 0.15, + "grad_norm": 0.6793210175464343, + "learning_rate": 4.9728642601786475e-06, + "loss": 0.3054, + "step": 3225 + }, + { + "epoch": 0.15, + "grad_norm": 0.6546105199497717, + "learning_rate": 4.972836386249501e-06, + "loss": 0.299, + "step": 3226 + }, + { + "epoch": 0.15, + "grad_norm": 0.7604839946317288, + "learning_rate": 4.972808498089802e-06, + "loss": 0.3396, + "step": 3227 + }, + { + "epoch": 0.15, + "grad_norm": 0.6589708514039305, + "learning_rate": 4.972780595699711e-06, + "loss": 0.3161, + "step": 3228 + }, + { + "epoch": 0.15, + "grad_norm": 0.695067713435095, + "learning_rate": 4.97275267907939e-06, + "loss": 0.3134, + "step": 3229 + }, + { + "epoch": 0.15, + "grad_norm": 0.7263044047881404, + "learning_rate": 4.972724748228999e-06, + "loss": 0.3291, + "step": 3230 + }, + { + "epoch": 0.15, + "grad_norm": 0.6753208063827687, + "learning_rate": 4.9726968031486985e-06, + "loss": 0.2832, + "step": 3231 + }, + { + "epoch": 0.15, + "grad_norm": 0.640899741701776, + "learning_rate": 4.9726688438386494e-06, + "loss": 0.3168, + "step": 3232 + }, + { + "epoch": 0.15, + "grad_norm": 0.6275847959277713, + "learning_rate": 4.972640870299012e-06, + "loss": 0.305, + "step": 3233 + }, + { + "epoch": 0.15, + "grad_norm": 0.6510014774654888, + "learning_rate": 4.972612882529948e-06, + "loss": 0.3005, + "step": 3234 + }, + { + "epoch": 0.15, + "grad_norm": 0.6904122101910353, + "learning_rate": 4.972584880531619e-06, + "loss": 0.3206, + "step": 3235 + }, + { + "epoch": 0.15, + "grad_norm": 0.6961497043002625, + "learning_rate": 4.972556864304185e-06, + "loss": 0.3047, + "step": 3236 + }, + { + "epoch": 0.15, + "grad_norm": 0.6940923543323565, + "learning_rate": 4.972528833847807e-06, + "loss": 0.3383, + "step": 3237 + }, + { + "epoch": 0.15, + "grad_norm": 0.6729081009568226, + "learning_rate": 4.972500789162649e-06, + "loss": 0.3322, + "step": 3238 + }, + { + "epoch": 0.15, + "grad_norm": 0.6218373912943967, + "learning_rate": 4.972472730248869e-06, + "loss": 0.2916, + "step": 3239 + }, + { + "epoch": 0.15, + "grad_norm": 0.6631285420019067, + "learning_rate": 4.97244465710663e-06, + "loss": 0.3064, + "step": 3240 + }, + { + "epoch": 0.15, + "grad_norm": 0.6864319118201903, + "learning_rate": 4.972416569736092e-06, + "loss": 0.2827, + "step": 3241 + }, + { + "epoch": 0.15, + "grad_norm": 0.7811036193862453, + "learning_rate": 4.97238846813742e-06, + "loss": 0.3225, + "step": 3242 + }, + { + "epoch": 0.15, + "grad_norm": 0.7057271344024982, + "learning_rate": 4.972360352310774e-06, + "loss": 0.3003, + "step": 3243 + }, + { + "epoch": 0.15, + "grad_norm": 0.6990904048534994, + "learning_rate": 4.972332222256314e-06, + "loss": 0.31, + "step": 3244 + }, + { + "epoch": 0.15, + "grad_norm": 0.6566429532776057, + "learning_rate": 4.972304077974205e-06, + "loss": 0.3168, + "step": 3245 + }, + { + "epoch": 0.15, + "grad_norm": 0.668835273540177, + "learning_rate": 4.972275919464606e-06, + "loss": 0.3, + "step": 3246 + }, + { + "epoch": 0.15, + "grad_norm": 0.7302395974438441, + "learning_rate": 4.9722477467276816e-06, + "loss": 0.3089, + "step": 3247 + }, + { + "epoch": 0.15, + "grad_norm": 0.6719920537903835, + "learning_rate": 4.9722195597635925e-06, + "loss": 0.3143, + "step": 3248 + }, + { + "epoch": 0.15, + "grad_norm": 0.7381200598191917, + "learning_rate": 4.972191358572501e-06, + "loss": 0.3161, + "step": 3249 + }, + { + "epoch": 0.15, + "grad_norm": 0.6396777286212424, + "learning_rate": 4.97216314315457e-06, + "loss": 0.2975, + "step": 3250 + }, + { + "epoch": 0.15, + "grad_norm": 0.7004473496213856, + "learning_rate": 4.972134913509961e-06, + "loss": 0.3309, + "step": 3251 + }, + { + "epoch": 0.15, + "grad_norm": 0.6617968513629569, + "learning_rate": 4.972106669638837e-06, + "loss": 0.3177, + "step": 3252 + }, + { + "epoch": 0.15, + "grad_norm": 0.6687817478973818, + "learning_rate": 4.972078411541361e-06, + "loss": 0.3234, + "step": 3253 + }, + { + "epoch": 0.15, + "grad_norm": 0.6654715686909081, + "learning_rate": 4.972050139217694e-06, + "loss": 0.297, + "step": 3254 + }, + { + "epoch": 0.15, + "grad_norm": 0.6595061337200192, + "learning_rate": 4.972021852668001e-06, + "loss": 0.315, + "step": 3255 + }, + { + "epoch": 0.15, + "grad_norm": 0.7032699590942766, + "learning_rate": 4.971993551892442e-06, + "loss": 0.3143, + "step": 3256 + }, + { + "epoch": 0.15, + "grad_norm": 0.7042432699471265, + "learning_rate": 4.971965236891183e-06, + "loss": 0.3111, + "step": 3257 + }, + { + "epoch": 0.15, + "grad_norm": 0.6775354191720786, + "learning_rate": 4.971936907664385e-06, + "loss": 0.3202, + "step": 3258 + }, + { + "epoch": 0.15, + "grad_norm": 0.6554453959453443, + "learning_rate": 4.971908564212211e-06, + "loss": 0.3124, + "step": 3259 + }, + { + "epoch": 0.15, + "grad_norm": 0.6585729433039264, + "learning_rate": 4.971880206534825e-06, + "loss": 0.3074, + "step": 3260 + }, + { + "epoch": 0.15, + "grad_norm": 0.7312889742422728, + "learning_rate": 4.971851834632388e-06, + "loss": 0.3341, + "step": 3261 + }, + { + "epoch": 0.15, + "grad_norm": 0.6836606214856308, + "learning_rate": 4.971823448505067e-06, + "loss": 0.3151, + "step": 3262 + }, + { + "epoch": 0.15, + "grad_norm": 0.6361463517146924, + "learning_rate": 4.971795048153023e-06, + "loss": 0.313, + "step": 3263 + }, + { + "epoch": 0.15, + "grad_norm": 0.6709666848284852, + "learning_rate": 4.9717666335764194e-06, + "loss": 0.2896, + "step": 3264 + }, + { + "epoch": 0.15, + "grad_norm": 0.6347077088603799, + "learning_rate": 4.97173820477542e-06, + "loss": 0.2947, + "step": 3265 + }, + { + "epoch": 0.15, + "grad_norm": 0.6057793551841573, + "learning_rate": 4.971709761750189e-06, + "loss": 0.312, + "step": 3266 + }, + { + "epoch": 0.15, + "grad_norm": 0.6090311570890187, + "learning_rate": 4.971681304500888e-06, + "loss": 0.3009, + "step": 3267 + }, + { + "epoch": 0.15, + "grad_norm": 0.6252680637738341, + "learning_rate": 4.971652833027683e-06, + "loss": 0.2951, + "step": 3268 + }, + { + "epoch": 0.15, + "grad_norm": 0.6632583844371368, + "learning_rate": 4.971624347330739e-06, + "loss": 0.3086, + "step": 3269 + }, + { + "epoch": 0.15, + "grad_norm": 0.7107632564484516, + "learning_rate": 4.971595847410216e-06, + "loss": 0.3189, + "step": 3270 + }, + { + "epoch": 0.15, + "grad_norm": 0.6505439970215822, + "learning_rate": 4.971567333266281e-06, + "loss": 0.3007, + "step": 3271 + }, + { + "epoch": 0.15, + "grad_norm": 0.7036311741936294, + "learning_rate": 4.971538804899097e-06, + "loss": 0.3127, + "step": 3272 + }, + { + "epoch": 0.15, + "grad_norm": 0.6680023806872709, + "learning_rate": 4.971510262308828e-06, + "loss": 0.3166, + "step": 3273 + }, + { + "epoch": 0.15, + "grad_norm": 0.6392994370935939, + "learning_rate": 4.971481705495639e-06, + "loss": 0.2968, + "step": 3274 + }, + { + "epoch": 0.15, + "grad_norm": 0.657004448811326, + "learning_rate": 4.971453134459694e-06, + "loss": 0.2952, + "step": 3275 + }, + { + "epoch": 0.15, + "grad_norm": 0.662430244373718, + "learning_rate": 4.971424549201157e-06, + "loss": 0.2944, + "step": 3276 + }, + { + "epoch": 0.15, + "grad_norm": 0.6230697454962578, + "learning_rate": 4.971395949720194e-06, + "loss": 0.2834, + "step": 3277 + }, + { + "epoch": 0.15, + "grad_norm": 0.6411402937281718, + "learning_rate": 4.971367336016968e-06, + "loss": 0.3066, + "step": 3278 + }, + { + "epoch": 0.15, + "grad_norm": 0.6730133867988157, + "learning_rate": 4.971338708091643e-06, + "loss": 0.3187, + "step": 3279 + }, + { + "epoch": 0.15, + "grad_norm": 0.621560870909766, + "learning_rate": 4.971310065944386e-06, + "loss": 0.2908, + "step": 3280 + }, + { + "epoch": 0.15, + "grad_norm": 0.6802221461018401, + "learning_rate": 4.971281409575361e-06, + "loss": 0.3368, + "step": 3281 + }, + { + "epoch": 0.15, + "grad_norm": 0.6567291624661407, + "learning_rate": 4.971252738984732e-06, + "loss": 0.3044, + "step": 3282 + }, + { + "epoch": 0.15, + "grad_norm": 0.6679575673474563, + "learning_rate": 4.9712240541726644e-06, + "loss": 0.3003, + "step": 3283 + }, + { + "epoch": 0.15, + "grad_norm": 0.7454792218012393, + "learning_rate": 4.9711953551393235e-06, + "loss": 0.3283, + "step": 3284 + }, + { + "epoch": 0.15, + "grad_norm": 0.6578488468983894, + "learning_rate": 4.9711666418848745e-06, + "loss": 0.3105, + "step": 3285 + }, + { + "epoch": 0.15, + "grad_norm": 0.751076368615146, + "learning_rate": 4.9711379144094835e-06, + "loss": 0.3089, + "step": 3286 + }, + { + "epoch": 0.15, + "grad_norm": 0.6480938838764851, + "learning_rate": 4.971109172713314e-06, + "loss": 0.3018, + "step": 3287 + }, + { + "epoch": 0.15, + "grad_norm": 0.6694505039927621, + "learning_rate": 4.971080416796533e-06, + "loss": 0.2924, + "step": 3288 + }, + { + "epoch": 0.15, + "grad_norm": 0.7032137882978832, + "learning_rate": 4.971051646659304e-06, + "loss": 0.3133, + "step": 3289 + }, + { + "epoch": 0.15, + "grad_norm": 0.6701017159791918, + "learning_rate": 4.971022862301795e-06, + "loss": 0.3102, + "step": 3290 + }, + { + "epoch": 0.15, + "grad_norm": 0.6825408266202231, + "learning_rate": 4.9709940637241705e-06, + "loss": 0.3329, + "step": 3291 + }, + { + "epoch": 0.15, + "grad_norm": 0.6862233963306584, + "learning_rate": 4.970965250926595e-06, + "loss": 0.3287, + "step": 3292 + }, + { + "epoch": 0.15, + "grad_norm": 0.7056046598348609, + "learning_rate": 4.970936423909237e-06, + "loss": 0.2987, + "step": 3293 + }, + { + "epoch": 0.15, + "grad_norm": 0.7087302744096199, + "learning_rate": 4.97090758267226e-06, + "loss": 0.3199, + "step": 3294 + }, + { + "epoch": 0.15, + "grad_norm": 0.6904075216270879, + "learning_rate": 4.970878727215831e-06, + "loss": 0.3197, + "step": 3295 + }, + { + "epoch": 0.15, + "grad_norm": 0.7385070144921659, + "learning_rate": 4.970849857540116e-06, + "loss": 0.3248, + "step": 3296 + }, + { + "epoch": 0.15, + "grad_norm": 0.6374716238093078, + "learning_rate": 4.970820973645282e-06, + "loss": 0.3026, + "step": 3297 + }, + { + "epoch": 0.15, + "grad_norm": 0.6800409976192051, + "learning_rate": 4.970792075531493e-06, + "loss": 0.2908, + "step": 3298 + }, + { + "epoch": 0.15, + "grad_norm": 0.6125050812731417, + "learning_rate": 4.9707631631989174e-06, + "loss": 0.2937, + "step": 3299 + }, + { + "epoch": 0.15, + "grad_norm": 0.7443215144935681, + "learning_rate": 4.970734236647721e-06, + "loss": 0.3203, + "step": 3300 + }, + { + "epoch": 0.15, + "grad_norm": 0.6598122299910438, + "learning_rate": 4.97070529587807e-06, + "loss": 0.3045, + "step": 3301 + }, + { + "epoch": 0.15, + "grad_norm": 0.6988204830817629, + "learning_rate": 4.970676340890131e-06, + "loss": 0.3049, + "step": 3302 + }, + { + "epoch": 0.15, + "grad_norm": 0.6951771416110896, + "learning_rate": 4.97064737168407e-06, + "loss": 0.3351, + "step": 3303 + }, + { + "epoch": 0.15, + "grad_norm": 0.6353831187457075, + "learning_rate": 4.970618388260055e-06, + "loss": 0.2882, + "step": 3304 + }, + { + "epoch": 0.15, + "grad_norm": 0.6384316837681608, + "learning_rate": 4.970589390618251e-06, + "loss": 0.2914, + "step": 3305 + }, + { + "epoch": 0.15, + "grad_norm": 0.6785834067701161, + "learning_rate": 4.970560378758827e-06, + "loss": 0.2976, + "step": 3306 + }, + { + "epoch": 0.15, + "grad_norm": 0.623426582671745, + "learning_rate": 4.970531352681949e-06, + "loss": 0.3009, + "step": 3307 + }, + { + "epoch": 0.15, + "grad_norm": 0.6384777495014755, + "learning_rate": 4.9705023123877836e-06, + "loss": 0.2796, + "step": 3308 + }, + { + "epoch": 0.16, + "grad_norm": 0.7302808905749637, + "learning_rate": 4.970473257876498e-06, + "loss": 0.3107, + "step": 3309 + }, + { + "epoch": 0.16, + "grad_norm": 0.6862286719098859, + "learning_rate": 4.97044418914826e-06, + "loss": 0.2939, + "step": 3310 + }, + { + "epoch": 0.16, + "grad_norm": 0.6646029602970444, + "learning_rate": 4.970415106203237e-06, + "loss": 0.2997, + "step": 3311 + }, + { + "epoch": 0.16, + "grad_norm": 0.6479261055492358, + "learning_rate": 4.970386009041596e-06, + "loss": 0.2986, + "step": 3312 + }, + { + "epoch": 0.16, + "grad_norm": 0.7036114863013374, + "learning_rate": 4.970356897663504e-06, + "loss": 0.3111, + "step": 3313 + }, + { + "epoch": 0.16, + "grad_norm": 0.7591278061764847, + "learning_rate": 4.97032777206913e-06, + "loss": 0.3062, + "step": 3314 + }, + { + "epoch": 0.16, + "grad_norm": 0.6389615160161469, + "learning_rate": 4.97029863225864e-06, + "loss": 0.3043, + "step": 3315 + }, + { + "epoch": 0.16, + "grad_norm": 0.6951655666077042, + "learning_rate": 4.9702694782322015e-06, + "loss": 0.3209, + "step": 3316 + }, + { + "epoch": 0.16, + "grad_norm": 0.6852194699372498, + "learning_rate": 4.970240309989984e-06, + "loss": 0.3102, + "step": 3317 + }, + { + "epoch": 0.16, + "grad_norm": 0.7127718829451639, + "learning_rate": 4.970211127532154e-06, + "loss": 0.3138, + "step": 3318 + }, + { + "epoch": 0.16, + "grad_norm": 0.7003066715613847, + "learning_rate": 4.970181930858879e-06, + "loss": 0.2878, + "step": 3319 + }, + { + "epoch": 0.16, + "grad_norm": 0.7179457272533443, + "learning_rate": 4.970152719970329e-06, + "loss": 0.3199, + "step": 3320 + }, + { + "epoch": 0.16, + "grad_norm": 0.6050291033978676, + "learning_rate": 4.970123494866671e-06, + "loss": 0.2801, + "step": 3321 + }, + { + "epoch": 0.16, + "grad_norm": 0.7079089525130537, + "learning_rate": 4.970094255548073e-06, + "loss": 0.3093, + "step": 3322 + }, + { + "epoch": 0.16, + "grad_norm": 0.6606038891439958, + "learning_rate": 4.970065002014702e-06, + "loss": 0.301, + "step": 3323 + }, + { + "epoch": 0.16, + "grad_norm": 0.6958499950602322, + "learning_rate": 4.970035734266729e-06, + "loss": 0.3159, + "step": 3324 + }, + { + "epoch": 0.16, + "grad_norm": 0.6496889799060828, + "learning_rate": 4.970006452304322e-06, + "loss": 0.3, + "step": 3325 + }, + { + "epoch": 0.16, + "grad_norm": 0.7199394542601798, + "learning_rate": 4.9699771561276474e-06, + "loss": 0.3094, + "step": 3326 + }, + { + "epoch": 0.16, + "grad_norm": 0.6624095174430409, + "learning_rate": 4.969947845736876e-06, + "loss": 0.3098, + "step": 3327 + }, + { + "epoch": 0.16, + "grad_norm": 0.6188827797616617, + "learning_rate": 4.969918521132175e-06, + "loss": 0.3104, + "step": 3328 + }, + { + "epoch": 0.16, + "grad_norm": 0.6485475557282167, + "learning_rate": 4.969889182313713e-06, + "loss": 0.2968, + "step": 3329 + }, + { + "epoch": 0.16, + "grad_norm": 0.6820803344689503, + "learning_rate": 4.96985982928166e-06, + "loss": 0.3014, + "step": 3330 + }, + { + "epoch": 0.16, + "grad_norm": 0.6761631707698712, + "learning_rate": 4.969830462036184e-06, + "loss": 0.3184, + "step": 3331 + }, + { + "epoch": 0.16, + "grad_norm": 0.6583961709060958, + "learning_rate": 4.969801080577455e-06, + "loss": 0.2976, + "step": 3332 + }, + { + "epoch": 0.16, + "grad_norm": 0.7486106853588954, + "learning_rate": 4.969771684905642e-06, + "loss": 0.3245, + "step": 3333 + }, + { + "epoch": 0.16, + "grad_norm": 0.6284940079521152, + "learning_rate": 4.9697422750209134e-06, + "loss": 0.2929, + "step": 3334 + }, + { + "epoch": 0.16, + "grad_norm": 0.6252421108805043, + "learning_rate": 4.969712850923439e-06, + "loss": 0.3021, + "step": 3335 + }, + { + "epoch": 0.16, + "grad_norm": 0.6702389228550659, + "learning_rate": 4.969683412613388e-06, + "loss": 0.3134, + "step": 3336 + }, + { + "epoch": 0.16, + "grad_norm": 0.6286365771788543, + "learning_rate": 4.969653960090929e-06, + "loss": 0.3034, + "step": 3337 + }, + { + "epoch": 0.16, + "grad_norm": 0.6765726142152317, + "learning_rate": 4.969624493356232e-06, + "loss": 0.3154, + "step": 3338 + }, + { + "epoch": 0.16, + "grad_norm": 0.6652907916491871, + "learning_rate": 4.9695950124094675e-06, + "loss": 0.3355, + "step": 3339 + }, + { + "epoch": 0.16, + "grad_norm": 0.6305610253898746, + "learning_rate": 4.969565517250804e-06, + "loss": 0.2985, + "step": 3340 + }, + { + "epoch": 0.16, + "grad_norm": 0.6387595133547307, + "learning_rate": 4.969536007880412e-06, + "loss": 0.3167, + "step": 3341 + }, + { + "epoch": 0.16, + "grad_norm": 0.6188959190218792, + "learning_rate": 4.969506484298461e-06, + "loss": 0.3147, + "step": 3342 + }, + { + "epoch": 0.16, + "grad_norm": 0.6378675476502227, + "learning_rate": 4.96947694650512e-06, + "loss": 0.2847, + "step": 3343 + }, + { + "epoch": 0.16, + "grad_norm": 0.6628555517682636, + "learning_rate": 4.969447394500561e-06, + "loss": 0.3219, + "step": 3344 + }, + { + "epoch": 0.16, + "grad_norm": 0.6878191939648686, + "learning_rate": 4.969417828284952e-06, + "loss": 0.3247, + "step": 3345 + }, + { + "epoch": 0.16, + "grad_norm": 0.6359650663910151, + "learning_rate": 4.969388247858464e-06, + "loss": 0.3025, + "step": 3346 + }, + { + "epoch": 0.16, + "grad_norm": 0.63751393187577, + "learning_rate": 4.969358653221268e-06, + "loss": 0.302, + "step": 3347 + }, + { + "epoch": 0.16, + "grad_norm": 0.6445679040157073, + "learning_rate": 4.969329044373534e-06, + "loss": 0.2998, + "step": 3348 + }, + { + "epoch": 0.16, + "grad_norm": 0.6759104003915639, + "learning_rate": 4.969299421315431e-06, + "loss": 0.3182, + "step": 3349 + }, + { + "epoch": 0.16, + "grad_norm": 0.6439350004854352, + "learning_rate": 4.96926978404713e-06, + "loss": 0.2883, + "step": 3350 + }, + { + "epoch": 0.16, + "grad_norm": 0.6817589107434322, + "learning_rate": 4.969240132568803e-06, + "loss": 0.2883, + "step": 3351 + }, + { + "epoch": 0.16, + "grad_norm": 0.647441175718101, + "learning_rate": 4.96921046688062e-06, + "loss": 0.2976, + "step": 3352 + }, + { + "epoch": 0.16, + "grad_norm": 0.6029983072523059, + "learning_rate": 4.969180786982751e-06, + "loss": 0.2835, + "step": 3353 + }, + { + "epoch": 0.16, + "grad_norm": 0.646510423671779, + "learning_rate": 4.969151092875367e-06, + "loss": 0.2763, + "step": 3354 + }, + { + "epoch": 0.16, + "grad_norm": 0.6561194948619423, + "learning_rate": 4.969121384558639e-06, + "loss": 0.3122, + "step": 3355 + }, + { + "epoch": 0.16, + "grad_norm": 0.6925329734671374, + "learning_rate": 4.969091662032738e-06, + "loss": 0.3185, + "step": 3356 + }, + { + "epoch": 0.16, + "grad_norm": 0.6906957041655972, + "learning_rate": 4.969061925297836e-06, + "loss": 0.2982, + "step": 3357 + }, + { + "epoch": 0.16, + "grad_norm": 0.6859502926492226, + "learning_rate": 4.9690321743541015e-06, + "loss": 0.3019, + "step": 3358 + }, + { + "epoch": 0.16, + "grad_norm": 0.6949606050582727, + "learning_rate": 4.969002409201709e-06, + "loss": 0.3246, + "step": 3359 + }, + { + "epoch": 0.16, + "grad_norm": 0.6531260883111191, + "learning_rate": 4.968972629840827e-06, + "loss": 0.2916, + "step": 3360 + }, + { + "epoch": 0.16, + "grad_norm": 0.6344962838416676, + "learning_rate": 4.968942836271628e-06, + "loss": 0.3073, + "step": 3361 + }, + { + "epoch": 0.16, + "grad_norm": 0.7120058259733749, + "learning_rate": 4.968913028494285e-06, + "loss": 0.3069, + "step": 3362 + }, + { + "epoch": 0.16, + "grad_norm": 0.6977198762251349, + "learning_rate": 4.968883206508966e-06, + "loss": 0.3135, + "step": 3363 + }, + { + "epoch": 0.16, + "grad_norm": 0.7231799345398543, + "learning_rate": 4.968853370315846e-06, + "loss": 0.3163, + "step": 3364 + }, + { + "epoch": 0.16, + "grad_norm": 0.723014747181904, + "learning_rate": 4.9688235199150955e-06, + "loss": 0.305, + "step": 3365 + }, + { + "epoch": 0.16, + "grad_norm": 0.6340815634519422, + "learning_rate": 4.968793655306886e-06, + "loss": 0.291, + "step": 3366 + }, + { + "epoch": 0.16, + "grad_norm": 0.6176266773247702, + "learning_rate": 4.968763776491389e-06, + "loss": 0.3029, + "step": 3367 + }, + { + "epoch": 0.16, + "grad_norm": 0.6353369707349245, + "learning_rate": 4.968733883468777e-06, + "loss": 0.3086, + "step": 3368 + }, + { + "epoch": 0.16, + "grad_norm": 0.6177964789281678, + "learning_rate": 4.968703976239223e-06, + "loss": 0.2936, + "step": 3369 + }, + { + "epoch": 0.16, + "grad_norm": 0.6880193612330779, + "learning_rate": 4.968674054802897e-06, + "loss": 0.3268, + "step": 3370 + }, + { + "epoch": 0.16, + "grad_norm": 0.650977505340899, + "learning_rate": 4.968644119159973e-06, + "loss": 0.3138, + "step": 3371 + }, + { + "epoch": 0.16, + "grad_norm": 0.6472720852630887, + "learning_rate": 4.9686141693106224e-06, + "loss": 0.3091, + "step": 3372 + }, + { + "epoch": 0.16, + "grad_norm": 0.6617614411558547, + "learning_rate": 4.968584205255017e-06, + "loss": 0.3155, + "step": 3373 + }, + { + "epoch": 0.16, + "grad_norm": 0.7244315504943746, + "learning_rate": 4.968554226993331e-06, + "loss": 0.3127, + "step": 3374 + }, + { + "epoch": 0.16, + "grad_norm": 0.6845135645723774, + "learning_rate": 4.968524234525736e-06, + "loss": 0.3164, + "step": 3375 + }, + { + "epoch": 0.16, + "grad_norm": 0.6510824030401108, + "learning_rate": 4.968494227852403e-06, + "loss": 0.3105, + "step": 3376 + }, + { + "epoch": 0.16, + "grad_norm": 0.6802329629364813, + "learning_rate": 4.968464206973508e-06, + "loss": 0.3048, + "step": 3377 + }, + { + "epoch": 0.16, + "grad_norm": 0.6845863790624869, + "learning_rate": 4.96843417188922e-06, + "loss": 0.3068, + "step": 3378 + }, + { + "epoch": 0.16, + "grad_norm": 0.6416408036993533, + "learning_rate": 4.968404122599715e-06, + "loss": 0.3224, + "step": 3379 + }, + { + "epoch": 0.16, + "grad_norm": 0.6686613102127152, + "learning_rate": 4.968374059105164e-06, + "loss": 0.3246, + "step": 3380 + }, + { + "epoch": 0.16, + "grad_norm": 0.6553388916819008, + "learning_rate": 4.968343981405741e-06, + "loss": 0.2805, + "step": 3381 + }, + { + "epoch": 0.16, + "grad_norm": 0.6669352369189859, + "learning_rate": 4.968313889501619e-06, + "loss": 0.2976, + "step": 3382 + }, + { + "epoch": 0.16, + "grad_norm": 0.7205675719405906, + "learning_rate": 4.968283783392971e-06, + "loss": 0.3014, + "step": 3383 + }, + { + "epoch": 0.16, + "grad_norm": 0.6549969814529539, + "learning_rate": 4.96825366307997e-06, + "loss": 0.3075, + "step": 3384 + }, + { + "epoch": 0.16, + "grad_norm": 0.6285099559225316, + "learning_rate": 4.96822352856279e-06, + "loss": 0.3188, + "step": 3385 + }, + { + "epoch": 0.16, + "grad_norm": 0.6884543672259118, + "learning_rate": 4.968193379841603e-06, + "loss": 0.308, + "step": 3386 + }, + { + "epoch": 0.16, + "grad_norm": 0.6401037408266127, + "learning_rate": 4.968163216916584e-06, + "loss": 0.2952, + "step": 3387 + }, + { + "epoch": 0.16, + "grad_norm": 0.6903057038508503, + "learning_rate": 4.968133039787906e-06, + "loss": 0.3403, + "step": 3388 + }, + { + "epoch": 0.16, + "grad_norm": 0.6774388244341045, + "learning_rate": 4.968102848455743e-06, + "loss": 0.3104, + "step": 3389 + }, + { + "epoch": 0.16, + "grad_norm": 0.5729777024306506, + "learning_rate": 4.968072642920268e-06, + "loss": 0.2817, + "step": 3390 + }, + { + "epoch": 0.16, + "grad_norm": 0.6699944709989725, + "learning_rate": 4.9680424231816555e-06, + "loss": 0.3179, + "step": 3391 + }, + { + "epoch": 0.16, + "grad_norm": 0.6650488020432269, + "learning_rate": 4.968012189240079e-06, + "loss": 0.3065, + "step": 3392 + }, + { + "epoch": 0.16, + "grad_norm": 0.6288524438435954, + "learning_rate": 4.967981941095713e-06, + "loss": 0.2967, + "step": 3393 + }, + { + "epoch": 0.16, + "grad_norm": 0.6633726456895555, + "learning_rate": 4.9679516787487305e-06, + "loss": 0.2972, + "step": 3394 + }, + { + "epoch": 0.16, + "grad_norm": 0.6300012254897606, + "learning_rate": 4.9679214021993075e-06, + "loss": 0.2761, + "step": 3395 + }, + { + "epoch": 0.16, + "grad_norm": 0.6774475885255693, + "learning_rate": 4.967891111447616e-06, + "loss": 0.3097, + "step": 3396 + }, + { + "epoch": 0.16, + "grad_norm": 0.6709929183972987, + "learning_rate": 4.967860806493832e-06, + "loss": 0.314, + "step": 3397 + }, + { + "epoch": 0.16, + "grad_norm": 0.6267328677791072, + "learning_rate": 4.967830487338129e-06, + "loss": 0.3069, + "step": 3398 + }, + { + "epoch": 0.16, + "grad_norm": 0.6648103795714004, + "learning_rate": 4.967800153980682e-06, + "loss": 0.2893, + "step": 3399 + }, + { + "epoch": 0.16, + "grad_norm": 0.6830730706831809, + "learning_rate": 4.967769806421666e-06, + "loss": 0.3144, + "step": 3400 + }, + { + "epoch": 0.16, + "grad_norm": 0.6754415027479043, + "learning_rate": 4.967739444661254e-06, + "loss": 0.303, + "step": 3401 + }, + { + "epoch": 0.16, + "grad_norm": 0.6693280028784325, + "learning_rate": 4.967709068699622e-06, + "loss": 0.3081, + "step": 3402 + }, + { + "epoch": 0.16, + "grad_norm": 0.7103880361664633, + "learning_rate": 4.967678678536945e-06, + "loss": 0.3007, + "step": 3403 + }, + { + "epoch": 0.16, + "grad_norm": 0.7257640773630752, + "learning_rate": 4.967648274173398e-06, + "loss": 0.3181, + "step": 3404 + }, + { + "epoch": 0.16, + "grad_norm": 0.6434222379059561, + "learning_rate": 4.967617855609154e-06, + "loss": 0.2972, + "step": 3405 + }, + { + "epoch": 0.16, + "grad_norm": 0.6067760790291001, + "learning_rate": 4.96758742284439e-06, + "loss": 0.3014, + "step": 3406 + }, + { + "epoch": 0.16, + "grad_norm": 0.621598034752574, + "learning_rate": 4.967556975879281e-06, + "loss": 0.3085, + "step": 3407 + }, + { + "epoch": 0.16, + "grad_norm": 0.6600944444364549, + "learning_rate": 4.967526514714001e-06, + "loss": 0.3026, + "step": 3408 + }, + { + "epoch": 0.16, + "grad_norm": 0.6630119127748713, + "learning_rate": 4.967496039348727e-06, + "loss": 0.3108, + "step": 3409 + }, + { + "epoch": 0.16, + "grad_norm": 0.648938503159328, + "learning_rate": 4.967465549783633e-06, + "loss": 0.302, + "step": 3410 + }, + { + "epoch": 0.16, + "grad_norm": 0.6118861486401367, + "learning_rate": 4.967435046018894e-06, + "loss": 0.3005, + "step": 3411 + }, + { + "epoch": 0.16, + "grad_norm": 0.6382761496917141, + "learning_rate": 4.967404528054688e-06, + "loss": 0.306, + "step": 3412 + }, + { + "epoch": 0.16, + "grad_norm": 0.6358586480175769, + "learning_rate": 4.967373995891188e-06, + "loss": 0.3109, + "step": 3413 + }, + { + "epoch": 0.16, + "grad_norm": 0.6555684992154505, + "learning_rate": 4.967343449528572e-06, + "loss": 0.2981, + "step": 3414 + }, + { + "epoch": 0.16, + "grad_norm": 0.704395812558234, + "learning_rate": 4.9673128889670135e-06, + "loss": 0.2932, + "step": 3415 + }, + { + "epoch": 0.16, + "grad_norm": 0.6195908692051428, + "learning_rate": 4.96728231420669e-06, + "loss": 0.2998, + "step": 3416 + }, + { + "epoch": 0.16, + "grad_norm": 0.6531819372349852, + "learning_rate": 4.967251725247777e-06, + "loss": 0.2985, + "step": 3417 + }, + { + "epoch": 0.16, + "grad_norm": 0.682820875094087, + "learning_rate": 4.96722112209045e-06, + "loss": 0.3131, + "step": 3418 + }, + { + "epoch": 0.16, + "grad_norm": 0.6745645837628049, + "learning_rate": 4.967190504734886e-06, + "loss": 0.2804, + "step": 3419 + }, + { + "epoch": 0.16, + "grad_norm": 0.5955324893478968, + "learning_rate": 4.96715987318126e-06, + "loss": 0.2982, + "step": 3420 + }, + { + "epoch": 0.16, + "grad_norm": 0.6563292488745064, + "learning_rate": 4.96712922742975e-06, + "loss": 0.2829, + "step": 3421 + }, + { + "epoch": 0.16, + "grad_norm": 0.6843042217558536, + "learning_rate": 4.967098567480531e-06, + "loss": 0.2886, + "step": 3422 + }, + { + "epoch": 0.16, + "grad_norm": 0.6842703207178971, + "learning_rate": 4.96706789333378e-06, + "loss": 0.3227, + "step": 3423 + }, + { + "epoch": 0.16, + "grad_norm": 0.7275329383502238, + "learning_rate": 4.967037204989672e-06, + "loss": 0.3233, + "step": 3424 + }, + { + "epoch": 0.16, + "grad_norm": 0.6634477011988483, + "learning_rate": 4.967006502448386e-06, + "loss": 0.2941, + "step": 3425 + }, + { + "epoch": 0.16, + "grad_norm": 0.6852638278588341, + "learning_rate": 4.966975785710099e-06, + "loss": 0.3009, + "step": 3426 + }, + { + "epoch": 0.16, + "grad_norm": 0.6781271715216793, + "learning_rate": 4.966945054774984e-06, + "loss": 0.3135, + "step": 3427 + }, + { + "epoch": 0.16, + "grad_norm": 0.66181466904244, + "learning_rate": 4.9669143096432215e-06, + "loss": 0.2995, + "step": 3428 + }, + { + "epoch": 0.16, + "grad_norm": 0.6500337396156831, + "learning_rate": 4.966883550314987e-06, + "loss": 0.3149, + "step": 3429 + }, + { + "epoch": 0.16, + "grad_norm": 0.6637636667164385, + "learning_rate": 4.966852776790458e-06, + "loss": 0.3075, + "step": 3430 + }, + { + "epoch": 0.16, + "grad_norm": 0.6451816884030155, + "learning_rate": 4.966821989069811e-06, + "loss": 0.3114, + "step": 3431 + }, + { + "epoch": 0.16, + "grad_norm": 0.6838828086296613, + "learning_rate": 4.966791187153224e-06, + "loss": 0.2937, + "step": 3432 + }, + { + "epoch": 0.16, + "grad_norm": 0.6656970745928591, + "learning_rate": 4.966760371040873e-06, + "loss": 0.3139, + "step": 3433 + }, + { + "epoch": 0.16, + "grad_norm": 0.7068237075254736, + "learning_rate": 4.966729540732936e-06, + "loss": 0.3318, + "step": 3434 + }, + { + "epoch": 0.16, + "grad_norm": 0.7045755734229551, + "learning_rate": 4.9666986962295906e-06, + "loss": 0.3407, + "step": 3435 + }, + { + "epoch": 0.16, + "grad_norm": 0.6549504752472107, + "learning_rate": 4.966667837531015e-06, + "loss": 0.2863, + "step": 3436 + }, + { + "epoch": 0.16, + "grad_norm": 0.6615649082627856, + "learning_rate": 4.9666369646373845e-06, + "loss": 0.3041, + "step": 3437 + }, + { + "epoch": 0.16, + "grad_norm": 0.6727879612093063, + "learning_rate": 4.96660607754888e-06, + "loss": 0.3037, + "step": 3438 + }, + { + "epoch": 0.16, + "grad_norm": 0.6652027925827134, + "learning_rate": 4.966575176265676e-06, + "loss": 0.2923, + "step": 3439 + }, + { + "epoch": 0.16, + "grad_norm": 0.6038235244178337, + "learning_rate": 4.966544260787952e-06, + "loss": 0.2956, + "step": 3440 + }, + { + "epoch": 0.16, + "grad_norm": 0.6456453690164063, + "learning_rate": 4.966513331115887e-06, + "loss": 0.2838, + "step": 3441 + }, + { + "epoch": 0.16, + "grad_norm": 0.6564052958298291, + "learning_rate": 4.966482387249656e-06, + "loss": 0.3139, + "step": 3442 + }, + { + "epoch": 0.16, + "grad_norm": 0.6506569823854772, + "learning_rate": 4.9664514291894394e-06, + "loss": 0.2983, + "step": 3443 + }, + { + "epoch": 0.16, + "grad_norm": 0.703771675717859, + "learning_rate": 4.966420456935415e-06, + "loss": 0.289, + "step": 3444 + }, + { + "epoch": 0.16, + "grad_norm": 0.6469152662976023, + "learning_rate": 4.966389470487761e-06, + "loss": 0.3144, + "step": 3445 + }, + { + "epoch": 0.16, + "grad_norm": 0.6298428215889007, + "learning_rate": 4.966358469846655e-06, + "loss": 0.2993, + "step": 3446 + }, + { + "epoch": 0.16, + "grad_norm": 0.6574713776203185, + "learning_rate": 4.9663274550122764e-06, + "loss": 0.3059, + "step": 3447 + }, + { + "epoch": 0.16, + "grad_norm": 0.6059680398302292, + "learning_rate": 4.966296425984802e-06, + "loss": 0.2912, + "step": 3448 + }, + { + "epoch": 0.16, + "grad_norm": 0.6213514532003285, + "learning_rate": 4.966265382764413e-06, + "loss": 0.3086, + "step": 3449 + }, + { + "epoch": 0.16, + "grad_norm": 0.6963776729639058, + "learning_rate": 4.966234325351286e-06, + "loss": 0.3206, + "step": 3450 + }, + { + "epoch": 0.16, + "grad_norm": 0.6350283112103439, + "learning_rate": 4.9662032537456006e-06, + "loss": 0.2908, + "step": 3451 + }, + { + "epoch": 0.16, + "grad_norm": 0.7103076616056372, + "learning_rate": 4.966172167947535e-06, + "loss": 0.317, + "step": 3452 + }, + { + "epoch": 0.16, + "grad_norm": 0.6594769221775657, + "learning_rate": 4.966141067957269e-06, + "loss": 0.2958, + "step": 3453 + }, + { + "epoch": 0.16, + "grad_norm": 0.6481970050201922, + "learning_rate": 4.96610995377498e-06, + "loss": 0.3139, + "step": 3454 + }, + { + "epoch": 0.16, + "grad_norm": 0.7278052788750831, + "learning_rate": 4.9660788254008485e-06, + "loss": 0.3406, + "step": 3455 + }, + { + "epoch": 0.16, + "grad_norm": 0.685301388434883, + "learning_rate": 4.966047682835053e-06, + "loss": 0.299, + "step": 3456 + }, + { + "epoch": 0.16, + "grad_norm": 0.657010170933295, + "learning_rate": 4.966016526077773e-06, + "loss": 0.3159, + "step": 3457 + }, + { + "epoch": 0.16, + "grad_norm": 0.6508935123189824, + "learning_rate": 4.9659853551291885e-06, + "loss": 0.3049, + "step": 3458 + }, + { + "epoch": 0.16, + "grad_norm": 0.6688875258129111, + "learning_rate": 4.965954169989476e-06, + "loss": 0.3076, + "step": 3459 + }, + { + "epoch": 0.16, + "grad_norm": 0.6536497894490383, + "learning_rate": 4.96592297065882e-06, + "loss": 0.3024, + "step": 3460 + }, + { + "epoch": 0.16, + "grad_norm": 0.6905456177976459, + "learning_rate": 4.9658917571373945e-06, + "loss": 0.3177, + "step": 3461 + }, + { + "epoch": 0.16, + "grad_norm": 0.6398894954828066, + "learning_rate": 4.965860529425383e-06, + "loss": 0.2962, + "step": 3462 + }, + { + "epoch": 0.16, + "grad_norm": 0.6372954028820373, + "learning_rate": 4.965829287522964e-06, + "loss": 0.3154, + "step": 3463 + }, + { + "epoch": 0.16, + "grad_norm": 0.5978607372765657, + "learning_rate": 4.9657980314303166e-06, + "loss": 0.2726, + "step": 3464 + }, + { + "epoch": 0.16, + "grad_norm": 0.6648941661610799, + "learning_rate": 4.965766761147621e-06, + "loss": 0.2845, + "step": 3465 + }, + { + "epoch": 0.16, + "grad_norm": 0.6997139929113699, + "learning_rate": 4.965735476675059e-06, + "loss": 0.3258, + "step": 3466 + }, + { + "epoch": 0.16, + "grad_norm": 0.6466623839404014, + "learning_rate": 4.965704178012808e-06, + "loss": 0.2878, + "step": 3467 + }, + { + "epoch": 0.16, + "grad_norm": 0.6813198944620333, + "learning_rate": 4.965672865161049e-06, + "loss": 0.3143, + "step": 3468 + }, + { + "epoch": 0.16, + "grad_norm": 0.6693205947360903, + "learning_rate": 4.965641538119963e-06, + "loss": 0.3061, + "step": 3469 + }, + { + "epoch": 0.16, + "grad_norm": 0.6219803083054185, + "learning_rate": 4.965610196889729e-06, + "loss": 0.2886, + "step": 3470 + }, + { + "epoch": 0.16, + "grad_norm": 0.6278605971033077, + "learning_rate": 4.965578841470529e-06, + "loss": 0.3122, + "step": 3471 + }, + { + "epoch": 0.16, + "grad_norm": 0.6732808783804621, + "learning_rate": 4.965547471862541e-06, + "loss": 0.3096, + "step": 3472 + }, + { + "epoch": 0.16, + "grad_norm": 0.6866491641147656, + "learning_rate": 4.965516088065948e-06, + "loss": 0.3209, + "step": 3473 + }, + { + "epoch": 0.16, + "grad_norm": 0.6137829100011645, + "learning_rate": 4.965484690080929e-06, + "loss": 0.3076, + "step": 3474 + }, + { + "epoch": 0.16, + "grad_norm": 0.7094071400824788, + "learning_rate": 4.965453277907666e-06, + "loss": 0.3413, + "step": 3475 + }, + { + "epoch": 0.16, + "grad_norm": 0.6869512442444557, + "learning_rate": 4.9654218515463385e-06, + "loss": 0.2955, + "step": 3476 + }, + { + "epoch": 0.16, + "grad_norm": 0.7054184432889758, + "learning_rate": 4.9653904109971285e-06, + "loss": 0.3043, + "step": 3477 + }, + { + "epoch": 0.16, + "grad_norm": 0.6415036167592748, + "learning_rate": 4.965358956260216e-06, + "loss": 0.3039, + "step": 3478 + }, + { + "epoch": 0.16, + "grad_norm": 0.6952426902019585, + "learning_rate": 4.9653274873357825e-06, + "loss": 0.3219, + "step": 3479 + }, + { + "epoch": 0.16, + "grad_norm": 0.6281315674671674, + "learning_rate": 4.965296004224008e-06, + "loss": 0.2926, + "step": 3480 + }, + { + "epoch": 0.16, + "grad_norm": 0.6804275176737136, + "learning_rate": 4.965264506925076e-06, + "loss": 0.3206, + "step": 3481 + }, + { + "epoch": 0.16, + "grad_norm": 0.7289364080465224, + "learning_rate": 4.965232995439166e-06, + "loss": 0.3182, + "step": 3482 + }, + { + "epoch": 0.16, + "grad_norm": 0.6209742528788837, + "learning_rate": 4.965201469766459e-06, + "loss": 0.3055, + "step": 3483 + }, + { + "epoch": 0.16, + "grad_norm": 0.64234574988949, + "learning_rate": 4.9651699299071375e-06, + "loss": 0.3173, + "step": 3484 + }, + { + "epoch": 0.16, + "grad_norm": 0.6468490354222002, + "learning_rate": 4.9651383758613835e-06, + "loss": 0.3202, + "step": 3485 + }, + { + "epoch": 0.16, + "grad_norm": 0.6566754272816404, + "learning_rate": 4.965106807629377e-06, + "loss": 0.2833, + "step": 3486 + }, + { + "epoch": 0.16, + "grad_norm": 0.6926410359252726, + "learning_rate": 4.9650752252113e-06, + "loss": 0.3161, + "step": 3487 + }, + { + "epoch": 0.16, + "grad_norm": 0.7095942151644057, + "learning_rate": 4.9650436286073355e-06, + "loss": 0.3083, + "step": 3488 + }, + { + "epoch": 0.16, + "grad_norm": 0.6512690860030629, + "learning_rate": 4.965012017817664e-06, + "loss": 0.2962, + "step": 3489 + }, + { + "epoch": 0.16, + "grad_norm": 0.6220295966077791, + "learning_rate": 4.964980392842468e-06, + "loss": 0.3063, + "step": 3490 + }, + { + "epoch": 0.16, + "grad_norm": 0.656846838078545, + "learning_rate": 4.96494875368193e-06, + "loss": 0.3004, + "step": 3491 + }, + { + "epoch": 0.16, + "grad_norm": 0.6543881059734019, + "learning_rate": 4.96491710033623e-06, + "loss": 0.3246, + "step": 3492 + }, + { + "epoch": 0.16, + "grad_norm": 0.6345036850492254, + "learning_rate": 4.964885432805553e-06, + "loss": 0.2908, + "step": 3493 + }, + { + "epoch": 0.16, + "grad_norm": 0.7619510485048341, + "learning_rate": 4.964853751090079e-06, + "loss": 0.3177, + "step": 3494 + }, + { + "epoch": 0.16, + "grad_norm": 0.6521140665562739, + "learning_rate": 4.9648220551899916e-06, + "loss": 0.3049, + "step": 3495 + }, + { + "epoch": 0.16, + "grad_norm": 0.7858164762938912, + "learning_rate": 4.964790345105472e-06, + "loss": 0.3165, + "step": 3496 + }, + { + "epoch": 0.16, + "grad_norm": 0.6771058771707748, + "learning_rate": 4.964758620836705e-06, + "loss": 0.3108, + "step": 3497 + }, + { + "epoch": 0.16, + "grad_norm": 0.6512734999816213, + "learning_rate": 4.96472688238387e-06, + "loss": 0.2781, + "step": 3498 + }, + { + "epoch": 0.16, + "grad_norm": 0.6893515310667888, + "learning_rate": 4.964695129747152e-06, + "loss": 0.3326, + "step": 3499 + }, + { + "epoch": 0.16, + "grad_norm": 0.7467598270391363, + "learning_rate": 4.964663362926734e-06, + "loss": 0.3039, + "step": 3500 + }, + { + "epoch": 0.16, + "grad_norm": 0.657100199939505, + "learning_rate": 4.964631581922797e-06, + "loss": 0.3041, + "step": 3501 + }, + { + "epoch": 0.16, + "grad_norm": 0.6743086678904111, + "learning_rate": 4.964599786735524e-06, + "loss": 0.3073, + "step": 3502 + }, + { + "epoch": 0.16, + "grad_norm": 0.7237611514147487, + "learning_rate": 4.964567977365099e-06, + "loss": 0.3384, + "step": 3503 + }, + { + "epoch": 0.16, + "grad_norm": 0.6417333922832205, + "learning_rate": 4.9645361538117056e-06, + "loss": 0.3115, + "step": 3504 + }, + { + "epoch": 0.16, + "grad_norm": 0.6769890734590144, + "learning_rate": 4.964504316075525e-06, + "loss": 0.3128, + "step": 3505 + }, + { + "epoch": 0.16, + "grad_norm": 0.6402040914500164, + "learning_rate": 4.964472464156742e-06, + "loss": 0.3045, + "step": 3506 + }, + { + "epoch": 0.16, + "grad_norm": 0.6541453979646806, + "learning_rate": 4.96444059805554e-06, + "loss": 0.3211, + "step": 3507 + }, + { + "epoch": 0.16, + "grad_norm": 0.715586458337534, + "learning_rate": 4.964408717772101e-06, + "loss": 0.3207, + "step": 3508 + }, + { + "epoch": 0.16, + "grad_norm": 0.6949103998180217, + "learning_rate": 4.9643768233066096e-06, + "loss": 0.3147, + "step": 3509 + }, + { + "epoch": 0.16, + "grad_norm": 0.7222136741583585, + "learning_rate": 4.964344914659248e-06, + "loss": 0.3314, + "step": 3510 + }, + { + "epoch": 0.16, + "grad_norm": 0.6298666986757088, + "learning_rate": 4.964312991830201e-06, + "loss": 0.296, + "step": 3511 + }, + { + "epoch": 0.16, + "grad_norm": 0.6410751080789302, + "learning_rate": 4.964281054819654e-06, + "loss": 0.2867, + "step": 3512 + }, + { + "epoch": 0.16, + "grad_norm": 0.6807929819689099, + "learning_rate": 4.9642491036277875e-06, + "loss": 0.2936, + "step": 3513 + }, + { + "epoch": 0.16, + "grad_norm": 0.683353620077659, + "learning_rate": 4.964217138254787e-06, + "loss": 0.3017, + "step": 3514 + }, + { + "epoch": 0.16, + "grad_norm": 0.7093493036260424, + "learning_rate": 4.964185158700835e-06, + "loss": 0.3208, + "step": 3515 + }, + { + "epoch": 0.16, + "grad_norm": 0.7073342880162289, + "learning_rate": 4.964153164966118e-06, + "loss": 0.3253, + "step": 3516 + }, + { + "epoch": 0.16, + "grad_norm": 0.6192177439689824, + "learning_rate": 4.964121157050819e-06, + "loss": 0.2921, + "step": 3517 + }, + { + "epoch": 0.16, + "grad_norm": 0.671832008530866, + "learning_rate": 4.964089134955122e-06, + "loss": 0.3041, + "step": 3518 + }, + { + "epoch": 0.16, + "grad_norm": 0.6732264617576768, + "learning_rate": 4.964057098679211e-06, + "loss": 0.286, + "step": 3519 + }, + { + "epoch": 0.16, + "grad_norm": 0.7865057374677676, + "learning_rate": 4.964025048223271e-06, + "loss": 0.3252, + "step": 3520 + }, + { + "epoch": 0.16, + "grad_norm": 0.6890363936232602, + "learning_rate": 4.963992983587486e-06, + "loss": 0.3103, + "step": 3521 + }, + { + "epoch": 0.16, + "grad_norm": 0.6661900278775075, + "learning_rate": 4.963960904772041e-06, + "loss": 0.2883, + "step": 3522 + }, + { + "epoch": 0.17, + "grad_norm": 0.6514894103345559, + "learning_rate": 4.963928811777119e-06, + "loss": 0.3093, + "step": 3523 + }, + { + "epoch": 0.17, + "grad_norm": 0.681729852410718, + "learning_rate": 4.963896704602908e-06, + "loss": 0.3156, + "step": 3524 + }, + { + "epoch": 0.17, + "grad_norm": 0.7197989981243194, + "learning_rate": 4.963864583249589e-06, + "loss": 0.3074, + "step": 3525 + }, + { + "epoch": 0.17, + "grad_norm": 0.6643351440961649, + "learning_rate": 4.963832447717349e-06, + "loss": 0.2973, + "step": 3526 + }, + { + "epoch": 0.17, + "grad_norm": 0.71840660919547, + "learning_rate": 4.963800298006373e-06, + "loss": 0.3218, + "step": 3527 + }, + { + "epoch": 0.17, + "grad_norm": 0.6334602772469382, + "learning_rate": 4.963768134116845e-06, + "loss": 0.3139, + "step": 3528 + }, + { + "epoch": 0.17, + "grad_norm": 0.6062706901463952, + "learning_rate": 4.963735956048952e-06, + "loss": 0.2686, + "step": 3529 + }, + { + "epoch": 0.17, + "grad_norm": 0.6515429163332376, + "learning_rate": 4.963703763802876e-06, + "loss": 0.3062, + "step": 3530 + }, + { + "epoch": 0.17, + "grad_norm": 0.6706683761032628, + "learning_rate": 4.9636715573788055e-06, + "loss": 0.299, + "step": 3531 + }, + { + "epoch": 0.17, + "grad_norm": 0.7130062284968551, + "learning_rate": 4.963639336776923e-06, + "loss": 0.308, + "step": 3532 + }, + { + "epoch": 0.17, + "grad_norm": 0.7946464792609433, + "learning_rate": 4.9636071019974165e-06, + "loss": 0.333, + "step": 3533 + }, + { + "epoch": 0.17, + "grad_norm": 0.6469401787903721, + "learning_rate": 4.96357485304047e-06, + "loss": 0.3013, + "step": 3534 + }, + { + "epoch": 0.17, + "grad_norm": 0.6580396482068978, + "learning_rate": 4.9635425899062696e-06, + "loss": 0.2966, + "step": 3535 + }, + { + "epoch": 0.17, + "grad_norm": 0.6387622541006915, + "learning_rate": 4.963510312595e-06, + "loss": 0.2955, + "step": 3536 + }, + { + "epoch": 0.17, + "grad_norm": 0.6897209306587815, + "learning_rate": 4.963478021106849e-06, + "loss": 0.3273, + "step": 3537 + }, + { + "epoch": 0.17, + "grad_norm": 0.7096638318170363, + "learning_rate": 4.963445715442e-06, + "loss": 0.3055, + "step": 3538 + }, + { + "epoch": 0.17, + "grad_norm": 0.7354994698676673, + "learning_rate": 4.9634133956006406e-06, + "loss": 0.3192, + "step": 3539 + }, + { + "epoch": 0.17, + "grad_norm": 0.6845497869836895, + "learning_rate": 4.963381061582956e-06, + "loss": 0.2978, + "step": 3540 + }, + { + "epoch": 0.17, + "grad_norm": 0.6623434717231161, + "learning_rate": 4.963348713389132e-06, + "loss": 0.2985, + "step": 3541 + }, + { + "epoch": 0.17, + "grad_norm": 0.6947265888826017, + "learning_rate": 4.963316351019356e-06, + "loss": 0.3287, + "step": 3542 + }, + { + "epoch": 0.17, + "grad_norm": 0.6739264354291511, + "learning_rate": 4.963283974473813e-06, + "loss": 0.3099, + "step": 3543 + }, + { + "epoch": 0.17, + "grad_norm": 0.7128587668207332, + "learning_rate": 4.963251583752691e-06, + "loss": 0.3201, + "step": 3544 + }, + { + "epoch": 0.17, + "grad_norm": 0.6641100887239867, + "learning_rate": 4.963219178856174e-06, + "loss": 0.3001, + "step": 3545 + }, + { + "epoch": 0.17, + "grad_norm": 0.7733087935901235, + "learning_rate": 4.963186759784451e-06, + "loss": 0.3181, + "step": 3546 + }, + { + "epoch": 0.17, + "grad_norm": 0.6468692012239414, + "learning_rate": 4.9631543265377066e-06, + "loss": 0.3028, + "step": 3547 + }, + { + "epoch": 0.17, + "grad_norm": 0.6975780025139846, + "learning_rate": 4.9631218791161285e-06, + "loss": 0.3163, + "step": 3548 + }, + { + "epoch": 0.17, + "grad_norm": 0.6842848488144475, + "learning_rate": 4.963089417519903e-06, + "loss": 0.2928, + "step": 3549 + }, + { + "epoch": 0.17, + "grad_norm": 0.7029328431950528, + "learning_rate": 4.963056941749217e-06, + "loss": 0.288, + "step": 3550 + }, + { + "epoch": 0.17, + "grad_norm": 0.6765444172550752, + "learning_rate": 4.9630244518042565e-06, + "loss": 0.3025, + "step": 3551 + }, + { + "epoch": 0.17, + "grad_norm": 0.6206148415013623, + "learning_rate": 4.96299194768521e-06, + "loss": 0.2807, + "step": 3552 + }, + { + "epoch": 0.17, + "grad_norm": 0.6892972543318243, + "learning_rate": 4.962959429392264e-06, + "loss": 0.3141, + "step": 3553 + }, + { + "epoch": 0.17, + "grad_norm": 0.6847197122158525, + "learning_rate": 4.962926896925605e-06, + "loss": 0.3049, + "step": 3554 + }, + { + "epoch": 0.17, + "grad_norm": 0.6436412849498535, + "learning_rate": 4.962894350285421e-06, + "loss": 0.3133, + "step": 3555 + }, + { + "epoch": 0.17, + "grad_norm": 0.6645170974808872, + "learning_rate": 4.9628617894718996e-06, + "loss": 0.3037, + "step": 3556 + }, + { + "epoch": 0.17, + "grad_norm": 0.6658552040386531, + "learning_rate": 4.962829214485227e-06, + "loss": 0.3129, + "step": 3557 + }, + { + "epoch": 0.17, + "grad_norm": 0.6091939618095806, + "learning_rate": 4.962796625325591e-06, + "loss": 0.2902, + "step": 3558 + }, + { + "epoch": 0.17, + "grad_norm": 0.6953535859565844, + "learning_rate": 4.962764021993181e-06, + "loss": 0.3057, + "step": 3559 + }, + { + "epoch": 0.17, + "grad_norm": 0.6710173586123958, + "learning_rate": 4.9627314044881814e-06, + "loss": 0.321, + "step": 3560 + }, + { + "epoch": 0.17, + "grad_norm": 0.730202043789514, + "learning_rate": 4.962698772810782e-06, + "loss": 0.3216, + "step": 3561 + }, + { + "epoch": 0.17, + "grad_norm": 0.6535486042934673, + "learning_rate": 4.96266612696117e-06, + "loss": 0.2992, + "step": 3562 + }, + { + "epoch": 0.17, + "grad_norm": 0.6914198828750368, + "learning_rate": 4.962633466939534e-06, + "loss": 0.3216, + "step": 3563 + }, + { + "epoch": 0.17, + "grad_norm": 0.6765318912103537, + "learning_rate": 4.962600792746061e-06, + "loss": 0.3056, + "step": 3564 + }, + { + "epoch": 0.17, + "grad_norm": 0.6935162508022299, + "learning_rate": 4.9625681043809396e-06, + "loss": 0.3042, + "step": 3565 + }, + { + "epoch": 0.17, + "grad_norm": 0.6246050269387451, + "learning_rate": 4.962535401844357e-06, + "loss": 0.2879, + "step": 3566 + }, + { + "epoch": 0.17, + "grad_norm": 0.6420749320734751, + "learning_rate": 4.962502685136502e-06, + "loss": 0.2911, + "step": 3567 + }, + { + "epoch": 0.17, + "grad_norm": 0.7142526117727966, + "learning_rate": 4.962469954257564e-06, + "loss": 0.3238, + "step": 3568 + }, + { + "epoch": 0.17, + "grad_norm": 0.6370648017773498, + "learning_rate": 4.96243720920773e-06, + "loss": 0.314, + "step": 3569 + }, + { + "epoch": 0.17, + "grad_norm": 0.6152203171964065, + "learning_rate": 4.962404449987189e-06, + "loss": 0.2913, + "step": 3570 + }, + { + "epoch": 0.17, + "grad_norm": 0.6812656360068707, + "learning_rate": 4.9623716765961285e-06, + "loss": 0.3269, + "step": 3571 + }, + { + "epoch": 0.17, + "grad_norm": 0.6504201833598272, + "learning_rate": 4.9623388890347375e-06, + "loss": 0.3142, + "step": 3572 + }, + { + "epoch": 0.17, + "grad_norm": 0.7045688022144254, + "learning_rate": 4.962306087303206e-06, + "loss": 0.3114, + "step": 3573 + }, + { + "epoch": 0.17, + "grad_norm": 0.6638831420976437, + "learning_rate": 4.9622732714017215e-06, + "loss": 0.3051, + "step": 3574 + }, + { + "epoch": 0.17, + "grad_norm": 0.6563157253313547, + "learning_rate": 4.9622404413304735e-06, + "loss": 0.2988, + "step": 3575 + }, + { + "epoch": 0.17, + "grad_norm": 0.6429937841221067, + "learning_rate": 4.962207597089651e-06, + "loss": 0.3071, + "step": 3576 + }, + { + "epoch": 0.17, + "grad_norm": 0.719770410679172, + "learning_rate": 4.9621747386794426e-06, + "loss": 0.325, + "step": 3577 + }, + { + "epoch": 0.17, + "grad_norm": 0.7012940904093565, + "learning_rate": 4.962141866100037e-06, + "loss": 0.3276, + "step": 3578 + }, + { + "epoch": 0.17, + "grad_norm": 0.6807569945980584, + "learning_rate": 4.962108979351623e-06, + "loss": 0.298, + "step": 3579 + }, + { + "epoch": 0.17, + "grad_norm": 0.6735356669564657, + "learning_rate": 4.962076078434392e-06, + "loss": 0.3114, + "step": 3580 + }, + { + "epoch": 0.17, + "grad_norm": 0.6575112182911408, + "learning_rate": 4.962043163348531e-06, + "loss": 0.3037, + "step": 3581 + }, + { + "epoch": 0.17, + "grad_norm": 0.6514534881391238, + "learning_rate": 4.962010234094231e-06, + "loss": 0.3192, + "step": 3582 + }, + { + "epoch": 0.17, + "grad_norm": 0.6208648653765999, + "learning_rate": 4.9619772906716815e-06, + "loss": 0.302, + "step": 3583 + }, + { + "epoch": 0.17, + "grad_norm": 0.6804587260010118, + "learning_rate": 4.96194433308107e-06, + "loss": 0.3185, + "step": 3584 + }, + { + "epoch": 0.17, + "grad_norm": 0.7720768095342098, + "learning_rate": 4.961911361322589e-06, + "loss": 0.3254, + "step": 3585 + }, + { + "epoch": 0.17, + "grad_norm": 0.635341049059702, + "learning_rate": 4.9618783753964266e-06, + "loss": 0.285, + "step": 3586 + }, + { + "epoch": 0.17, + "grad_norm": 0.635633583053057, + "learning_rate": 4.9618453753027715e-06, + "loss": 0.3171, + "step": 3587 + }, + { + "epoch": 0.17, + "grad_norm": 0.7077991974138533, + "learning_rate": 4.961812361041817e-06, + "loss": 0.3249, + "step": 3588 + }, + { + "epoch": 0.17, + "grad_norm": 0.6724140499532713, + "learning_rate": 4.9617793326137496e-06, + "loss": 0.3155, + "step": 3589 + }, + { + "epoch": 0.17, + "grad_norm": 0.6928883594346019, + "learning_rate": 4.961746290018762e-06, + "loss": 0.3136, + "step": 3590 + }, + { + "epoch": 0.17, + "grad_norm": 0.6863743823373195, + "learning_rate": 4.961713233257043e-06, + "loss": 0.291, + "step": 3591 + }, + { + "epoch": 0.17, + "grad_norm": 0.7160848845369857, + "learning_rate": 4.961680162328783e-06, + "loss": 0.3049, + "step": 3592 + }, + { + "epoch": 0.17, + "grad_norm": 0.6845989655656619, + "learning_rate": 4.961647077234171e-06, + "loss": 0.294, + "step": 3593 + }, + { + "epoch": 0.17, + "grad_norm": 0.70502779647904, + "learning_rate": 4.9616139779734e-06, + "loss": 0.3114, + "step": 3594 + }, + { + "epoch": 0.17, + "grad_norm": 0.6810998446641695, + "learning_rate": 4.96158086454666e-06, + "loss": 0.3043, + "step": 3595 + }, + { + "epoch": 0.17, + "grad_norm": 0.670911604386175, + "learning_rate": 4.96154773695414e-06, + "loss": 0.2941, + "step": 3596 + }, + { + "epoch": 0.17, + "grad_norm": 0.6840998611285642, + "learning_rate": 4.961514595196032e-06, + "loss": 0.3141, + "step": 3597 + }, + { + "epoch": 0.17, + "grad_norm": 0.7095380508638588, + "learning_rate": 4.961481439272525e-06, + "loss": 0.2865, + "step": 3598 + }, + { + "epoch": 0.17, + "grad_norm": 0.7280081350808516, + "learning_rate": 4.961448269183811e-06, + "loss": 0.3261, + "step": 3599 + }, + { + "epoch": 0.17, + "grad_norm": 0.6675120328792702, + "learning_rate": 4.9614150849300825e-06, + "loss": 0.2989, + "step": 3600 + }, + { + "epoch": 0.17, + "grad_norm": 0.6796084679866232, + "learning_rate": 4.961381886511528e-06, + "loss": 0.2895, + "step": 3601 + }, + { + "epoch": 0.17, + "grad_norm": 0.6702380610220419, + "learning_rate": 4.961348673928339e-06, + "loss": 0.3174, + "step": 3602 + }, + { + "epoch": 0.17, + "grad_norm": 0.7038959781985313, + "learning_rate": 4.961315447180707e-06, + "loss": 0.3183, + "step": 3603 + }, + { + "epoch": 0.17, + "grad_norm": 0.6420811640957474, + "learning_rate": 4.961282206268824e-06, + "loss": 0.2833, + "step": 3604 + }, + { + "epoch": 0.17, + "grad_norm": 0.6478802986858357, + "learning_rate": 4.96124895119288e-06, + "loss": 0.3081, + "step": 3605 + }, + { + "epoch": 0.17, + "grad_norm": 0.6721660500095951, + "learning_rate": 4.961215681953067e-06, + "loss": 0.3072, + "step": 3606 + }, + { + "epoch": 0.17, + "grad_norm": 0.6313759019568335, + "learning_rate": 4.961182398549577e-06, + "loss": 0.303, + "step": 3607 + }, + { + "epoch": 0.17, + "grad_norm": 0.7220317254566834, + "learning_rate": 4.961149100982599e-06, + "loss": 0.2956, + "step": 3608 + }, + { + "epoch": 0.17, + "grad_norm": 0.662830830931359, + "learning_rate": 4.961115789252328e-06, + "loss": 0.2995, + "step": 3609 + }, + { + "epoch": 0.17, + "grad_norm": 0.6826029866213171, + "learning_rate": 4.961082463358954e-06, + "loss": 0.3014, + "step": 3610 + }, + { + "epoch": 0.17, + "grad_norm": 0.6404842359159343, + "learning_rate": 4.9610491233026695e-06, + "loss": 0.3029, + "step": 3611 + }, + { + "epoch": 0.17, + "grad_norm": 0.7056438658897478, + "learning_rate": 4.961015769083664e-06, + "loss": 0.3241, + "step": 3612 + }, + { + "epoch": 0.17, + "grad_norm": 0.6686724826158416, + "learning_rate": 4.960982400702134e-06, + "loss": 0.3147, + "step": 3613 + }, + { + "epoch": 0.17, + "grad_norm": 0.6537047985229776, + "learning_rate": 4.960949018158267e-06, + "loss": 0.3101, + "step": 3614 + }, + { + "epoch": 0.17, + "grad_norm": 0.7207228429606153, + "learning_rate": 4.960915621452257e-06, + "loss": 0.3225, + "step": 3615 + }, + { + "epoch": 0.17, + "grad_norm": 0.6381934286318042, + "learning_rate": 4.960882210584297e-06, + "loss": 0.3167, + "step": 3616 + }, + { + "epoch": 0.17, + "grad_norm": 0.6381980423246569, + "learning_rate": 4.960848785554578e-06, + "loss": 0.2929, + "step": 3617 + }, + { + "epoch": 0.17, + "grad_norm": 0.6706254900617865, + "learning_rate": 4.960815346363293e-06, + "loss": 0.2872, + "step": 3618 + }, + { + "epoch": 0.17, + "grad_norm": 0.6875119651964064, + "learning_rate": 4.960781893010633e-06, + "loss": 0.301, + "step": 3619 + }, + { + "epoch": 0.17, + "grad_norm": 0.7140321043680492, + "learning_rate": 4.960748425496793e-06, + "loss": 0.3236, + "step": 3620 + }, + { + "epoch": 0.17, + "grad_norm": 0.7086306592871755, + "learning_rate": 4.960714943821964e-06, + "loss": 0.3066, + "step": 3621 + }, + { + "epoch": 0.17, + "grad_norm": 0.7252609472878898, + "learning_rate": 4.9606814479863385e-06, + "loss": 0.3024, + "step": 3622 + }, + { + "epoch": 0.17, + "grad_norm": 0.6883218699569074, + "learning_rate": 4.96064793799011e-06, + "loss": 0.2906, + "step": 3623 + }, + { + "epoch": 0.17, + "grad_norm": 0.6968112950502138, + "learning_rate": 4.960614413833471e-06, + "loss": 0.3152, + "step": 3624 + }, + { + "epoch": 0.17, + "grad_norm": 0.720250322679075, + "learning_rate": 4.9605808755166145e-06, + "loss": 0.3126, + "step": 3625 + }, + { + "epoch": 0.17, + "grad_norm": 0.693077645492975, + "learning_rate": 4.9605473230397335e-06, + "loss": 0.3349, + "step": 3626 + }, + { + "epoch": 0.17, + "grad_norm": 0.6220212320823005, + "learning_rate": 4.960513756403021e-06, + "loss": 0.2948, + "step": 3627 + }, + { + "epoch": 0.17, + "grad_norm": 0.6832789388626215, + "learning_rate": 4.960480175606671e-06, + "loss": 0.2993, + "step": 3628 + }, + { + "epoch": 0.17, + "grad_norm": 0.7108707198268306, + "learning_rate": 4.960446580650875e-06, + "loss": 0.3233, + "step": 3629 + }, + { + "epoch": 0.17, + "grad_norm": 0.7049607794328268, + "learning_rate": 4.9604129715358285e-06, + "loss": 0.3189, + "step": 3630 + }, + { + "epoch": 0.17, + "grad_norm": 0.6603367511391072, + "learning_rate": 4.960379348261722e-06, + "loss": 0.315, + "step": 3631 + }, + { + "epoch": 0.17, + "grad_norm": 0.7018650110678603, + "learning_rate": 4.960345710828752e-06, + "loss": 0.3062, + "step": 3632 + }, + { + "epoch": 0.17, + "grad_norm": 0.7018540516253927, + "learning_rate": 4.960312059237111e-06, + "loss": 0.3115, + "step": 3633 + }, + { + "epoch": 0.17, + "grad_norm": 0.6909172691712056, + "learning_rate": 4.960278393486993e-06, + "loss": 0.3192, + "step": 3634 + }, + { + "epoch": 0.17, + "grad_norm": 0.6658668443785584, + "learning_rate": 4.960244713578589e-06, + "loss": 0.3377, + "step": 3635 + }, + { + "epoch": 0.17, + "grad_norm": 0.6851416003198981, + "learning_rate": 4.9602110195120964e-06, + "loss": 0.2977, + "step": 3636 + }, + { + "epoch": 0.17, + "grad_norm": 0.6904795837876903, + "learning_rate": 4.960177311287708e-06, + "loss": 0.3203, + "step": 3637 + }, + { + "epoch": 0.17, + "grad_norm": 0.6508494822720842, + "learning_rate": 4.9601435889056174e-06, + "loss": 0.2908, + "step": 3638 + }, + { + "epoch": 0.17, + "grad_norm": 0.6409518378830136, + "learning_rate": 4.960109852366018e-06, + "loss": 0.2863, + "step": 3639 + }, + { + "epoch": 0.17, + "grad_norm": 0.610947995879343, + "learning_rate": 4.960076101669106e-06, + "loss": 0.2968, + "step": 3640 + }, + { + "epoch": 0.17, + "grad_norm": 0.6604392648968497, + "learning_rate": 4.960042336815074e-06, + "loss": 0.2927, + "step": 3641 + }, + { + "epoch": 0.17, + "grad_norm": 0.7026052080107118, + "learning_rate": 4.960008557804116e-06, + "loss": 0.2943, + "step": 3642 + }, + { + "epoch": 0.17, + "grad_norm": 0.665664344425831, + "learning_rate": 4.959974764636427e-06, + "loss": 0.298, + "step": 3643 + }, + { + "epoch": 0.17, + "grad_norm": 0.707237452659747, + "learning_rate": 4.959940957312202e-06, + "loss": 0.3243, + "step": 3644 + }, + { + "epoch": 0.17, + "grad_norm": 0.7019064541264489, + "learning_rate": 4.9599071358316355e-06, + "loss": 0.3184, + "step": 3645 + }, + { + "epoch": 0.17, + "grad_norm": 0.6633977663240037, + "learning_rate": 4.959873300194921e-06, + "loss": 0.3043, + "step": 3646 + }, + { + "epoch": 0.17, + "grad_norm": 0.6952643306088327, + "learning_rate": 4.959839450402254e-06, + "loss": 0.3054, + "step": 3647 + }, + { + "epoch": 0.17, + "grad_norm": 0.742025612333006, + "learning_rate": 4.959805586453829e-06, + "loss": 0.3061, + "step": 3648 + }, + { + "epoch": 0.17, + "grad_norm": 0.7284382928623251, + "learning_rate": 4.959771708349841e-06, + "loss": 0.3213, + "step": 3649 + }, + { + "epoch": 0.17, + "grad_norm": 0.7055789475122335, + "learning_rate": 4.959737816090486e-06, + "loss": 0.343, + "step": 3650 + }, + { + "epoch": 0.17, + "grad_norm": 0.6673120558234535, + "learning_rate": 4.9597039096759575e-06, + "loss": 0.304, + "step": 3651 + }, + { + "epoch": 0.17, + "grad_norm": 0.8114339245497142, + "learning_rate": 4.959669989106451e-06, + "loss": 0.3077, + "step": 3652 + }, + { + "epoch": 0.17, + "grad_norm": 0.6888479545803418, + "learning_rate": 4.959636054382162e-06, + "loss": 0.3018, + "step": 3653 + }, + { + "epoch": 0.17, + "grad_norm": 0.6807178154338008, + "learning_rate": 4.959602105503286e-06, + "loss": 0.2985, + "step": 3654 + }, + { + "epoch": 0.17, + "grad_norm": 0.6464234700243526, + "learning_rate": 4.959568142470018e-06, + "loss": 0.3069, + "step": 3655 + }, + { + "epoch": 0.17, + "grad_norm": 0.6941780639411497, + "learning_rate": 4.959534165282554e-06, + "loss": 0.3018, + "step": 3656 + }, + { + "epoch": 0.17, + "grad_norm": 0.6969351308849965, + "learning_rate": 4.9595001739410886e-06, + "loss": 0.3048, + "step": 3657 + }, + { + "epoch": 0.17, + "grad_norm": 0.7183685609645173, + "learning_rate": 4.959466168445818e-06, + "loss": 0.3, + "step": 3658 + }, + { + "epoch": 0.17, + "grad_norm": 0.6664472121656249, + "learning_rate": 4.959432148796937e-06, + "loss": 0.3175, + "step": 3659 + }, + { + "epoch": 0.17, + "grad_norm": 0.7033911006843407, + "learning_rate": 4.959398114994644e-06, + "loss": 0.2983, + "step": 3660 + }, + { + "epoch": 0.17, + "grad_norm": 0.7082482876124213, + "learning_rate": 4.959364067039131e-06, + "loss": 0.3118, + "step": 3661 + }, + { + "epoch": 0.17, + "grad_norm": 0.784308483093203, + "learning_rate": 4.959330004930597e-06, + "loss": 0.3245, + "step": 3662 + }, + { + "epoch": 0.17, + "grad_norm": 0.6755079264414691, + "learning_rate": 4.959295928669236e-06, + "loss": 0.2951, + "step": 3663 + }, + { + "epoch": 0.17, + "grad_norm": 0.681731577686721, + "learning_rate": 4.959261838255246e-06, + "loss": 0.3029, + "step": 3664 + }, + { + "epoch": 0.17, + "grad_norm": 0.6686290560676441, + "learning_rate": 4.959227733688822e-06, + "loss": 0.3126, + "step": 3665 + }, + { + "epoch": 0.17, + "grad_norm": 0.638685840179734, + "learning_rate": 4.95919361497016e-06, + "loss": 0.3077, + "step": 3666 + }, + { + "epoch": 0.17, + "grad_norm": 0.5768047760641626, + "learning_rate": 4.959159482099458e-06, + "loss": 0.2764, + "step": 3667 + }, + { + "epoch": 0.17, + "grad_norm": 0.6190840429924739, + "learning_rate": 4.95912533507691e-06, + "loss": 0.2917, + "step": 3668 + }, + { + "epoch": 0.17, + "grad_norm": 0.6964080333244289, + "learning_rate": 4.959091173902715e-06, + "loss": 0.3166, + "step": 3669 + }, + { + "epoch": 0.17, + "grad_norm": 0.665113046304739, + "learning_rate": 4.959056998577067e-06, + "loss": 0.2954, + "step": 3670 + }, + { + "epoch": 0.17, + "grad_norm": 0.6947779401145283, + "learning_rate": 4.9590228091001645e-06, + "loss": 0.3031, + "step": 3671 + }, + { + "epoch": 0.17, + "grad_norm": 0.6648751316640864, + "learning_rate": 4.958988605472205e-06, + "loss": 0.3098, + "step": 3672 + }, + { + "epoch": 0.17, + "grad_norm": 0.6425125862371125, + "learning_rate": 4.958954387693383e-06, + "loss": 0.3041, + "step": 3673 + }, + { + "epoch": 0.17, + "grad_norm": 0.6353442959835225, + "learning_rate": 4.958920155763896e-06, + "loss": 0.2913, + "step": 3674 + }, + { + "epoch": 0.17, + "grad_norm": 0.679852076711212, + "learning_rate": 4.958885909683942e-06, + "loss": 0.3175, + "step": 3675 + }, + { + "epoch": 0.17, + "grad_norm": 0.6706808336601867, + "learning_rate": 4.958851649453718e-06, + "loss": 0.2883, + "step": 3676 + }, + { + "epoch": 0.17, + "grad_norm": 0.651919005670173, + "learning_rate": 4.958817375073421e-06, + "loss": 0.3256, + "step": 3677 + }, + { + "epoch": 0.17, + "grad_norm": 0.6193481219791098, + "learning_rate": 4.958783086543248e-06, + "loss": 0.3081, + "step": 3678 + }, + { + "epoch": 0.17, + "grad_norm": 0.6992567177910312, + "learning_rate": 4.958748783863395e-06, + "loss": 0.3187, + "step": 3679 + }, + { + "epoch": 0.17, + "grad_norm": 0.6447904345172644, + "learning_rate": 4.9587144670340614e-06, + "loss": 0.3166, + "step": 3680 + }, + { + "epoch": 0.17, + "grad_norm": 0.7301840868795018, + "learning_rate": 4.958680136055445e-06, + "loss": 0.3529, + "step": 3681 + }, + { + "epoch": 0.17, + "grad_norm": 0.60775496305513, + "learning_rate": 4.958645790927741e-06, + "loss": 0.3147, + "step": 3682 + }, + { + "epoch": 0.17, + "grad_norm": 0.6942170373313539, + "learning_rate": 4.958611431651149e-06, + "loss": 0.3066, + "step": 3683 + }, + { + "epoch": 0.17, + "grad_norm": 0.6862911880663698, + "learning_rate": 4.958577058225866e-06, + "loss": 0.2939, + "step": 3684 + }, + { + "epoch": 0.17, + "grad_norm": 0.6544833420713664, + "learning_rate": 4.958542670652091e-06, + "loss": 0.3155, + "step": 3685 + }, + { + "epoch": 0.17, + "grad_norm": 0.6787080031368596, + "learning_rate": 4.95850826893002e-06, + "loss": 0.3217, + "step": 3686 + }, + { + "epoch": 0.17, + "grad_norm": 0.6616982185138659, + "learning_rate": 4.958473853059852e-06, + "loss": 0.3106, + "step": 3687 + }, + { + "epoch": 0.17, + "grad_norm": 0.5890148371855489, + "learning_rate": 4.958439423041784e-06, + "loss": 0.2943, + "step": 3688 + }, + { + "epoch": 0.17, + "grad_norm": 0.664629344545347, + "learning_rate": 4.958404978876016e-06, + "loss": 0.33, + "step": 3689 + }, + { + "epoch": 0.17, + "grad_norm": 0.6664437730099032, + "learning_rate": 4.9583705205627454e-06, + "loss": 0.3029, + "step": 3690 + }, + { + "epoch": 0.17, + "grad_norm": 0.6623965324322186, + "learning_rate": 4.95833604810217e-06, + "loss": 0.3124, + "step": 3691 + }, + { + "epoch": 0.17, + "grad_norm": 0.6409962504697018, + "learning_rate": 4.958301561494488e-06, + "loss": 0.3219, + "step": 3692 + }, + { + "epoch": 0.17, + "grad_norm": 0.6822816657999984, + "learning_rate": 4.958267060739899e-06, + "loss": 0.33, + "step": 3693 + }, + { + "epoch": 0.17, + "grad_norm": 0.7071815698586355, + "learning_rate": 4.958232545838601e-06, + "loss": 0.2993, + "step": 3694 + }, + { + "epoch": 0.17, + "grad_norm": 0.6532139931150612, + "learning_rate": 4.958198016790792e-06, + "loss": 0.3058, + "step": 3695 + }, + { + "epoch": 0.17, + "grad_norm": 0.6563199729194366, + "learning_rate": 4.958163473596672e-06, + "loss": 0.3101, + "step": 3696 + }, + { + "epoch": 0.17, + "grad_norm": 0.7310221187615719, + "learning_rate": 4.958128916256439e-06, + "loss": 0.3316, + "step": 3697 + }, + { + "epoch": 0.17, + "grad_norm": 0.6280325438562813, + "learning_rate": 4.958094344770292e-06, + "loss": 0.3178, + "step": 3698 + }, + { + "epoch": 0.17, + "grad_norm": 0.6430240171852373, + "learning_rate": 4.958059759138428e-06, + "loss": 0.294, + "step": 3699 + }, + { + "epoch": 0.17, + "grad_norm": 0.6785595451222093, + "learning_rate": 4.9580251593610504e-06, + "loss": 0.3068, + "step": 3700 + }, + { + "epoch": 0.17, + "grad_norm": 0.6750961568485454, + "learning_rate": 4.9579905454383545e-06, + "loss": 0.3155, + "step": 3701 + }, + { + "epoch": 0.17, + "grad_norm": 0.6491864164470899, + "learning_rate": 4.957955917370541e-06, + "loss": 0.3305, + "step": 3702 + }, + { + "epoch": 0.17, + "grad_norm": 0.6800552472540977, + "learning_rate": 4.957921275157809e-06, + "loss": 0.3133, + "step": 3703 + }, + { + "epoch": 0.17, + "grad_norm": 0.6817041112931528, + "learning_rate": 4.957886618800358e-06, + "loss": 0.3289, + "step": 3704 + }, + { + "epoch": 0.17, + "grad_norm": 0.6760957953308356, + "learning_rate": 4.957851948298387e-06, + "loss": 0.2908, + "step": 3705 + }, + { + "epoch": 0.17, + "grad_norm": 0.6774619082035219, + "learning_rate": 4.957817263652096e-06, + "loss": 0.315, + "step": 3706 + }, + { + "epoch": 0.17, + "grad_norm": 0.5970244573152493, + "learning_rate": 4.957782564861684e-06, + "loss": 0.298, + "step": 3707 + }, + { + "epoch": 0.17, + "grad_norm": 0.6717837092049953, + "learning_rate": 4.957747851927351e-06, + "loss": 0.3184, + "step": 3708 + }, + { + "epoch": 0.17, + "grad_norm": 0.7268784575643834, + "learning_rate": 4.9577131248492985e-06, + "loss": 0.3135, + "step": 3709 + }, + { + "epoch": 0.17, + "grad_norm": 0.7304469260189919, + "learning_rate": 4.9576783836277234e-06, + "loss": 0.3272, + "step": 3710 + }, + { + "epoch": 0.17, + "grad_norm": 0.6113296781816023, + "learning_rate": 4.957643628262827e-06, + "loss": 0.3057, + "step": 3711 + }, + { + "epoch": 0.17, + "grad_norm": 0.6241517336538518, + "learning_rate": 4.957608858754809e-06, + "loss": 0.3043, + "step": 3712 + }, + { + "epoch": 0.17, + "grad_norm": 0.6275230667967374, + "learning_rate": 4.957574075103869e-06, + "loss": 0.2882, + "step": 3713 + }, + { + "epoch": 0.17, + "grad_norm": 0.6210297247174288, + "learning_rate": 4.957539277310209e-06, + "loss": 0.306, + "step": 3714 + }, + { + "epoch": 0.17, + "grad_norm": 0.6816674481308945, + "learning_rate": 4.9575044653740285e-06, + "loss": 0.3122, + "step": 3715 + }, + { + "epoch": 0.17, + "grad_norm": 0.6350436945414795, + "learning_rate": 4.9574696392955265e-06, + "loss": 0.2977, + "step": 3716 + }, + { + "epoch": 0.17, + "grad_norm": 0.6054069446517701, + "learning_rate": 4.957434799074905e-06, + "loss": 0.299, + "step": 3717 + }, + { + "epoch": 0.17, + "grad_norm": 0.6875574169845918, + "learning_rate": 4.957399944712364e-06, + "loss": 0.3223, + "step": 3718 + }, + { + "epoch": 0.17, + "grad_norm": 0.6791505074944582, + "learning_rate": 4.957365076208103e-06, + "loss": 0.3134, + "step": 3719 + }, + { + "epoch": 0.17, + "grad_norm": 0.6294003820867861, + "learning_rate": 4.9573301935623245e-06, + "loss": 0.29, + "step": 3720 + }, + { + "epoch": 0.17, + "grad_norm": 0.7183245239740692, + "learning_rate": 4.957295296775229e-06, + "loss": 0.3188, + "step": 3721 + }, + { + "epoch": 0.17, + "grad_norm": 0.6777497374193041, + "learning_rate": 4.957260385847015e-06, + "loss": 0.303, + "step": 3722 + }, + { + "epoch": 0.17, + "grad_norm": 0.7143594284980629, + "learning_rate": 4.957225460777886e-06, + "loss": 0.3287, + "step": 3723 + }, + { + "epoch": 0.17, + "grad_norm": 0.7898862218553862, + "learning_rate": 4.957190521568042e-06, + "loss": 0.3339, + "step": 3724 + }, + { + "epoch": 0.17, + "grad_norm": 0.6744815917616712, + "learning_rate": 4.957155568217684e-06, + "loss": 0.3221, + "step": 3725 + }, + { + "epoch": 0.17, + "grad_norm": 0.6879346868861232, + "learning_rate": 4.957120600727013e-06, + "loss": 0.3124, + "step": 3726 + }, + { + "epoch": 0.17, + "grad_norm": 0.7067395740907835, + "learning_rate": 4.957085619096231e-06, + "loss": 0.3027, + "step": 3727 + }, + { + "epoch": 0.17, + "grad_norm": 0.6393021878956638, + "learning_rate": 4.957050623325539e-06, + "loss": 0.2949, + "step": 3728 + }, + { + "epoch": 0.17, + "grad_norm": 0.6621160612548815, + "learning_rate": 4.957015613415138e-06, + "loss": 0.3036, + "step": 3729 + }, + { + "epoch": 0.17, + "grad_norm": 0.6819931160458901, + "learning_rate": 4.95698058936523e-06, + "loss": 0.3185, + "step": 3730 + }, + { + "epoch": 0.17, + "grad_norm": 0.6662592399709985, + "learning_rate": 4.956945551176016e-06, + "loss": 0.3118, + "step": 3731 + }, + { + "epoch": 0.17, + "grad_norm": 0.6520972489753759, + "learning_rate": 4.9569104988476975e-06, + "loss": 0.285, + "step": 3732 + }, + { + "epoch": 0.17, + "grad_norm": 0.6368010059298652, + "learning_rate": 4.956875432380477e-06, + "loss": 0.297, + "step": 3733 + }, + { + "epoch": 0.17, + "grad_norm": 0.6215177486318567, + "learning_rate": 4.956840351774556e-06, + "loss": 0.2751, + "step": 3734 + }, + { + "epoch": 0.17, + "grad_norm": 0.6241567577270442, + "learning_rate": 4.956805257030135e-06, + "loss": 0.2893, + "step": 3735 + }, + { + "epoch": 0.18, + "grad_norm": 0.6626082601383543, + "learning_rate": 4.956770148147419e-06, + "loss": 0.3255, + "step": 3736 + }, + { + "epoch": 0.18, + "grad_norm": 0.7155675690205063, + "learning_rate": 4.9567350251266075e-06, + "loss": 0.3228, + "step": 3737 + }, + { + "epoch": 0.18, + "grad_norm": 0.6729949327916983, + "learning_rate": 4.956699887967904e-06, + "loss": 0.3126, + "step": 3738 + }, + { + "epoch": 0.18, + "grad_norm": 0.6602157184808664, + "learning_rate": 4.956664736671509e-06, + "loss": 0.3046, + "step": 3739 + }, + { + "epoch": 0.18, + "grad_norm": 0.644056936896296, + "learning_rate": 4.9566295712376265e-06, + "loss": 0.2968, + "step": 3740 + }, + { + "epoch": 0.18, + "grad_norm": 0.6617695881918844, + "learning_rate": 4.956594391666458e-06, + "loss": 0.2968, + "step": 3741 + }, + { + "epoch": 0.18, + "grad_norm": 0.6995125862064424, + "learning_rate": 4.956559197958207e-06, + "loss": 0.3131, + "step": 3742 + }, + { + "epoch": 0.18, + "grad_norm": 0.6548908043386106, + "learning_rate": 4.9565239901130745e-06, + "loss": 0.3143, + "step": 3743 + }, + { + "epoch": 0.18, + "grad_norm": 0.6243218334941747, + "learning_rate": 4.9564887681312645e-06, + "loss": 0.3002, + "step": 3744 + }, + { + "epoch": 0.18, + "grad_norm": 0.6498999872807134, + "learning_rate": 4.9564535320129786e-06, + "loss": 0.3114, + "step": 3745 + }, + { + "epoch": 0.18, + "grad_norm": 0.664604477546171, + "learning_rate": 4.95641828175842e-06, + "loss": 0.3005, + "step": 3746 + }, + { + "epoch": 0.18, + "grad_norm": 0.6493955292665344, + "learning_rate": 4.956383017367792e-06, + "loss": 0.2973, + "step": 3747 + }, + { + "epoch": 0.18, + "grad_norm": 0.6422665697315185, + "learning_rate": 4.9563477388412965e-06, + "loss": 0.2987, + "step": 3748 + }, + { + "epoch": 0.18, + "grad_norm": 0.6763771003135712, + "learning_rate": 4.956312446179137e-06, + "loss": 0.3064, + "step": 3749 + }, + { + "epoch": 0.18, + "grad_norm": 0.6742419875589394, + "learning_rate": 4.956277139381518e-06, + "loss": 0.32, + "step": 3750 + }, + { + "epoch": 0.18, + "grad_norm": 0.5935021996172818, + "learning_rate": 4.95624181844864e-06, + "loss": 0.3058, + "step": 3751 + }, + { + "epoch": 0.18, + "grad_norm": 0.7057018238156969, + "learning_rate": 4.956206483380709e-06, + "loss": 0.3168, + "step": 3752 + }, + { + "epoch": 0.18, + "grad_norm": 0.6860303192704905, + "learning_rate": 4.956171134177926e-06, + "loss": 0.3059, + "step": 3753 + }, + { + "epoch": 0.18, + "grad_norm": 0.6728828827117264, + "learning_rate": 4.956135770840495e-06, + "loss": 0.296, + "step": 3754 + }, + { + "epoch": 0.18, + "grad_norm": 0.6613974649503525, + "learning_rate": 4.956100393368621e-06, + "loss": 0.2825, + "step": 3755 + }, + { + "epoch": 0.18, + "grad_norm": 0.6412137225763401, + "learning_rate": 4.956065001762507e-06, + "loss": 0.3027, + "step": 3756 + }, + { + "epoch": 0.18, + "grad_norm": 0.7250902840376898, + "learning_rate": 4.956029596022356e-06, + "loss": 0.3177, + "step": 3757 + }, + { + "epoch": 0.18, + "grad_norm": 0.6650248966164864, + "learning_rate": 4.955994176148371e-06, + "loss": 0.3196, + "step": 3758 + }, + { + "epoch": 0.18, + "grad_norm": 0.6228279909549999, + "learning_rate": 4.9559587421407575e-06, + "loss": 0.3073, + "step": 3759 + }, + { + "epoch": 0.18, + "grad_norm": 0.632061418833055, + "learning_rate": 4.955923293999718e-06, + "loss": 0.3082, + "step": 3760 + }, + { + "epoch": 0.18, + "grad_norm": 0.6507916429641172, + "learning_rate": 4.955887831725457e-06, + "loss": 0.3214, + "step": 3761 + }, + { + "epoch": 0.18, + "grad_norm": 0.6825254317195222, + "learning_rate": 4.955852355318181e-06, + "loss": 0.3114, + "step": 3762 + }, + { + "epoch": 0.18, + "grad_norm": 0.63244234928474, + "learning_rate": 4.95581686477809e-06, + "loss": 0.3022, + "step": 3763 + }, + { + "epoch": 0.18, + "grad_norm": 0.6548802885230385, + "learning_rate": 4.955781360105391e-06, + "loss": 0.3065, + "step": 3764 + }, + { + "epoch": 0.18, + "grad_norm": 0.6767204516857956, + "learning_rate": 4.955745841300287e-06, + "loss": 0.311, + "step": 3765 + }, + { + "epoch": 0.18, + "grad_norm": 0.7032105475813829, + "learning_rate": 4.955710308362983e-06, + "loss": 0.3177, + "step": 3766 + }, + { + "epoch": 0.18, + "grad_norm": 0.6617955675952766, + "learning_rate": 4.955674761293683e-06, + "loss": 0.3483, + "step": 3767 + }, + { + "epoch": 0.18, + "grad_norm": 0.6478688499966516, + "learning_rate": 4.955639200092593e-06, + "loss": 0.2874, + "step": 3768 + }, + { + "epoch": 0.18, + "grad_norm": 0.6137204201262089, + "learning_rate": 4.955603624759916e-06, + "loss": 0.3014, + "step": 3769 + }, + { + "epoch": 0.18, + "grad_norm": 0.5705660754687994, + "learning_rate": 4.955568035295857e-06, + "loss": 0.2824, + "step": 3770 + }, + { + "epoch": 0.18, + "grad_norm": 0.6169477138285894, + "learning_rate": 4.9555324317006214e-06, + "loss": 0.2969, + "step": 3771 + }, + { + "epoch": 0.18, + "grad_norm": 0.6865789343493441, + "learning_rate": 4.9554968139744144e-06, + "loss": 0.3128, + "step": 3772 + }, + { + "epoch": 0.18, + "grad_norm": 0.6913963123608945, + "learning_rate": 4.9554611821174405e-06, + "loss": 0.3197, + "step": 3773 + }, + { + "epoch": 0.18, + "grad_norm": 0.6723390929578422, + "learning_rate": 4.955425536129904e-06, + "loss": 0.3138, + "step": 3774 + }, + { + "epoch": 0.18, + "grad_norm": 0.6523485413310461, + "learning_rate": 4.95538987601201e-06, + "loss": 0.3111, + "step": 3775 + }, + { + "epoch": 0.18, + "grad_norm": 0.6606768530010558, + "learning_rate": 4.9553542017639655e-06, + "loss": 0.3372, + "step": 3776 + }, + { + "epoch": 0.18, + "grad_norm": 0.6646597143035937, + "learning_rate": 4.955318513385975e-06, + "loss": 0.3118, + "step": 3777 + }, + { + "epoch": 0.18, + "grad_norm": 0.6571352957100912, + "learning_rate": 4.955282810878243e-06, + "loss": 0.3227, + "step": 3778 + }, + { + "epoch": 0.18, + "grad_norm": 0.7954776294886046, + "learning_rate": 4.9552470942409745e-06, + "loss": 0.3264, + "step": 3779 + }, + { + "epoch": 0.18, + "grad_norm": 0.7266462253410153, + "learning_rate": 4.955211363474378e-06, + "loss": 0.3196, + "step": 3780 + }, + { + "epoch": 0.18, + "grad_norm": 0.7047630091321079, + "learning_rate": 4.9551756185786555e-06, + "loss": 0.2976, + "step": 3781 + }, + { + "epoch": 0.18, + "grad_norm": 0.602441511975903, + "learning_rate": 4.9551398595540155e-06, + "loss": 0.2848, + "step": 3782 + }, + { + "epoch": 0.18, + "grad_norm": 0.6527056697448925, + "learning_rate": 4.955104086400663e-06, + "loss": 0.2895, + "step": 3783 + }, + { + "epoch": 0.18, + "grad_norm": 0.6243537093625596, + "learning_rate": 4.955068299118803e-06, + "loss": 0.2921, + "step": 3784 + }, + { + "epoch": 0.18, + "grad_norm": 0.7199693220231292, + "learning_rate": 4.955032497708642e-06, + "loss": 0.3073, + "step": 3785 + }, + { + "epoch": 0.18, + "grad_norm": 0.7168950315572221, + "learning_rate": 4.954996682170385e-06, + "loss": 0.3102, + "step": 3786 + }, + { + "epoch": 0.18, + "grad_norm": 0.6435264558550668, + "learning_rate": 4.95496085250424e-06, + "loss": 0.3177, + "step": 3787 + }, + { + "epoch": 0.18, + "grad_norm": 0.6312465711380998, + "learning_rate": 4.954925008710413e-06, + "loss": 0.3017, + "step": 3788 + }, + { + "epoch": 0.18, + "grad_norm": 0.6439144928952655, + "learning_rate": 4.954889150789109e-06, + "loss": 0.2936, + "step": 3789 + }, + { + "epoch": 0.18, + "grad_norm": 0.7602679746442265, + "learning_rate": 4.9548532787405346e-06, + "loss": 0.3108, + "step": 3790 + }, + { + "epoch": 0.18, + "grad_norm": 0.6237227709823835, + "learning_rate": 4.954817392564897e-06, + "loss": 0.2904, + "step": 3791 + }, + { + "epoch": 0.18, + "grad_norm": 0.6431190084249112, + "learning_rate": 4.954781492262403e-06, + "loss": 0.3019, + "step": 3792 + }, + { + "epoch": 0.18, + "grad_norm": 0.6447170258837911, + "learning_rate": 4.9547455778332575e-06, + "loss": 0.3041, + "step": 3793 + }, + { + "epoch": 0.18, + "grad_norm": 0.682703325270086, + "learning_rate": 4.954709649277668e-06, + "loss": 0.3274, + "step": 3794 + }, + { + "epoch": 0.18, + "grad_norm": 0.6492236478099049, + "learning_rate": 4.954673706595841e-06, + "loss": 0.2977, + "step": 3795 + }, + { + "epoch": 0.18, + "grad_norm": 0.6149368891920085, + "learning_rate": 4.954637749787986e-06, + "loss": 0.2942, + "step": 3796 + }, + { + "epoch": 0.18, + "grad_norm": 0.6485092593350104, + "learning_rate": 4.954601778854306e-06, + "loss": 0.3004, + "step": 3797 + }, + { + "epoch": 0.18, + "grad_norm": 0.6680870435914187, + "learning_rate": 4.95456579379501e-06, + "loss": 0.3, + "step": 3798 + }, + { + "epoch": 0.18, + "grad_norm": 0.6267675363466689, + "learning_rate": 4.954529794610305e-06, + "loss": 0.2827, + "step": 3799 + }, + { + "epoch": 0.18, + "grad_norm": 0.6892091686852478, + "learning_rate": 4.954493781300398e-06, + "loss": 0.3059, + "step": 3800 + }, + { + "epoch": 0.18, + "grad_norm": 0.6116838627106528, + "learning_rate": 4.954457753865496e-06, + "loss": 0.2911, + "step": 3801 + }, + { + "epoch": 0.18, + "grad_norm": 0.6963826651391127, + "learning_rate": 4.9544217123058055e-06, + "loss": 0.288, + "step": 3802 + }, + { + "epoch": 0.18, + "grad_norm": 0.593537320037102, + "learning_rate": 4.954385656621537e-06, + "loss": 0.2965, + "step": 3803 + }, + { + "epoch": 0.18, + "grad_norm": 0.6572777015133263, + "learning_rate": 4.954349586812894e-06, + "loss": 0.3037, + "step": 3804 + }, + { + "epoch": 0.18, + "grad_norm": 0.6387658616285461, + "learning_rate": 4.954313502880087e-06, + "loss": 0.3058, + "step": 3805 + }, + { + "epoch": 0.18, + "grad_norm": 0.6324743276031467, + "learning_rate": 4.9542774048233236e-06, + "loss": 0.3002, + "step": 3806 + }, + { + "epoch": 0.18, + "grad_norm": 0.6195394931467697, + "learning_rate": 4.9542412926428084e-06, + "loss": 0.3104, + "step": 3807 + }, + { + "epoch": 0.18, + "grad_norm": 0.5844826741821396, + "learning_rate": 4.954205166338753e-06, + "loss": 0.3186, + "step": 3808 + }, + { + "epoch": 0.18, + "grad_norm": 0.635015728944429, + "learning_rate": 4.954169025911363e-06, + "loss": 0.3156, + "step": 3809 + }, + { + "epoch": 0.18, + "grad_norm": 0.684195361238336, + "learning_rate": 4.954132871360848e-06, + "loss": 0.2912, + "step": 3810 + }, + { + "epoch": 0.18, + "grad_norm": 0.6284846928250545, + "learning_rate": 4.9540967026874145e-06, + "loss": 0.2893, + "step": 3811 + }, + { + "epoch": 0.18, + "grad_norm": 0.6883236535857055, + "learning_rate": 4.954060519891271e-06, + "loss": 0.3266, + "step": 3812 + }, + { + "epoch": 0.18, + "grad_norm": 0.6765674200870546, + "learning_rate": 4.954024322972626e-06, + "loss": 0.3224, + "step": 3813 + }, + { + "epoch": 0.18, + "grad_norm": 0.6057113242886419, + "learning_rate": 4.953988111931689e-06, + "loss": 0.2753, + "step": 3814 + }, + { + "epoch": 0.18, + "grad_norm": 0.6587593678525668, + "learning_rate": 4.953951886768666e-06, + "loss": 0.3142, + "step": 3815 + }, + { + "epoch": 0.18, + "grad_norm": 0.692137498196956, + "learning_rate": 4.953915647483767e-06, + "loss": 0.3235, + "step": 3816 + }, + { + "epoch": 0.18, + "grad_norm": 0.6001029610761947, + "learning_rate": 4.9538793940772e-06, + "loss": 0.3048, + "step": 3817 + }, + { + "epoch": 0.18, + "grad_norm": 0.6594802752564866, + "learning_rate": 4.953843126549174e-06, + "loss": 0.3194, + "step": 3818 + }, + { + "epoch": 0.18, + "grad_norm": 0.6452811267300098, + "learning_rate": 4.953806844899898e-06, + "loss": 0.3026, + "step": 3819 + }, + { + "epoch": 0.18, + "grad_norm": 0.6196256421776781, + "learning_rate": 4.95377054912958e-06, + "loss": 0.2945, + "step": 3820 + }, + { + "epoch": 0.18, + "grad_norm": 0.6580476604433857, + "learning_rate": 4.9537342392384295e-06, + "loss": 0.3057, + "step": 3821 + }, + { + "epoch": 0.18, + "grad_norm": 0.6857565196950721, + "learning_rate": 4.953697915226654e-06, + "loss": 0.3043, + "step": 3822 + }, + { + "epoch": 0.18, + "grad_norm": 0.7456004007337436, + "learning_rate": 4.953661577094465e-06, + "loss": 0.31, + "step": 3823 + }, + { + "epoch": 0.18, + "grad_norm": 0.664651823946127, + "learning_rate": 4.95362522484207e-06, + "loss": 0.3057, + "step": 3824 + }, + { + "epoch": 0.18, + "grad_norm": 0.770812945910472, + "learning_rate": 4.9535888584696776e-06, + "loss": 0.3018, + "step": 3825 + }, + { + "epoch": 0.18, + "grad_norm": 0.6244445673765677, + "learning_rate": 4.953552477977499e-06, + "loss": 0.2962, + "step": 3826 + }, + { + "epoch": 0.18, + "grad_norm": 0.6329410330292136, + "learning_rate": 4.953516083365742e-06, + "loss": 0.2991, + "step": 3827 + }, + { + "epoch": 0.18, + "grad_norm": 0.7253463297866423, + "learning_rate": 4.953479674634618e-06, + "loss": 0.2877, + "step": 3828 + }, + { + "epoch": 0.18, + "grad_norm": 0.6235486817364045, + "learning_rate": 4.953443251784334e-06, + "loss": 0.2885, + "step": 3829 + }, + { + "epoch": 0.18, + "grad_norm": 0.6157822790229128, + "learning_rate": 4.9534068148151e-06, + "loss": 0.3003, + "step": 3830 + }, + { + "epoch": 0.18, + "grad_norm": 0.6382447875285903, + "learning_rate": 4.953370363727128e-06, + "loss": 0.3007, + "step": 3831 + }, + { + "epoch": 0.18, + "grad_norm": 0.6930263498092591, + "learning_rate": 4.953333898520626e-06, + "loss": 0.3224, + "step": 3832 + }, + { + "epoch": 0.18, + "grad_norm": 0.628986605724357, + "learning_rate": 4.953297419195802e-06, + "loss": 0.3144, + "step": 3833 + }, + { + "epoch": 0.18, + "grad_norm": 0.6342918654488737, + "learning_rate": 4.953260925752871e-06, + "loss": 0.307, + "step": 3834 + }, + { + "epoch": 0.18, + "grad_norm": 0.6882941050780148, + "learning_rate": 4.953224418192038e-06, + "loss": 0.3086, + "step": 3835 + }, + { + "epoch": 0.18, + "grad_norm": 0.6371699153593882, + "learning_rate": 4.953187896513516e-06, + "loss": 0.301, + "step": 3836 + }, + { + "epoch": 0.18, + "grad_norm": 0.6372467323526152, + "learning_rate": 4.953151360717513e-06, + "loss": 0.2997, + "step": 3837 + }, + { + "epoch": 0.18, + "grad_norm": 0.6730091995910144, + "learning_rate": 4.953114810804242e-06, + "loss": 0.3, + "step": 3838 + }, + { + "epoch": 0.18, + "grad_norm": 0.6745486492115671, + "learning_rate": 4.95307824677391e-06, + "loss": 0.3382, + "step": 3839 + }, + { + "epoch": 0.18, + "grad_norm": 0.604878769695645, + "learning_rate": 4.953041668626731e-06, + "loss": 0.2928, + "step": 3840 + }, + { + "epoch": 0.18, + "grad_norm": 0.6529854397311117, + "learning_rate": 4.953005076362913e-06, + "loss": 0.3288, + "step": 3841 + }, + { + "epoch": 0.18, + "grad_norm": 0.6305627767661223, + "learning_rate": 4.952968469982667e-06, + "loss": 0.2892, + "step": 3842 + }, + { + "epoch": 0.18, + "grad_norm": 0.6792964777757876, + "learning_rate": 4.952931849486206e-06, + "loss": 0.308, + "step": 3843 + }, + { + "epoch": 0.18, + "grad_norm": 0.6378099277796465, + "learning_rate": 4.952895214873737e-06, + "loss": 0.2978, + "step": 3844 + }, + { + "epoch": 0.18, + "grad_norm": 0.6801983988543489, + "learning_rate": 4.952858566145472e-06, + "loss": 0.3204, + "step": 3845 + }, + { + "epoch": 0.18, + "grad_norm": 0.6575354835718736, + "learning_rate": 4.952821903301623e-06, + "loss": 0.3205, + "step": 3846 + }, + { + "epoch": 0.18, + "grad_norm": 0.646068724632923, + "learning_rate": 4.9527852263424e-06, + "loss": 0.305, + "step": 3847 + }, + { + "epoch": 0.18, + "grad_norm": 0.6855244940850691, + "learning_rate": 4.952748535268016e-06, + "loss": 0.296, + "step": 3848 + }, + { + "epoch": 0.18, + "grad_norm": 0.6530882972738677, + "learning_rate": 4.952711830078679e-06, + "loss": 0.2974, + "step": 3849 + }, + { + "epoch": 0.18, + "grad_norm": 0.576533778188705, + "learning_rate": 4.952675110774603e-06, + "loss": 0.2769, + "step": 3850 + }, + { + "epoch": 0.18, + "grad_norm": 0.6544812723109635, + "learning_rate": 4.952638377355998e-06, + "loss": 0.3087, + "step": 3851 + }, + { + "epoch": 0.18, + "grad_norm": 0.7129633703346069, + "learning_rate": 4.952601629823075e-06, + "loss": 0.332, + "step": 3852 + }, + { + "epoch": 0.18, + "grad_norm": 0.6293656567911837, + "learning_rate": 4.952564868176047e-06, + "loss": 0.3071, + "step": 3853 + }, + { + "epoch": 0.18, + "grad_norm": 0.6110529028756692, + "learning_rate": 4.952528092415124e-06, + "loss": 0.2923, + "step": 3854 + }, + { + "epoch": 0.18, + "grad_norm": 0.6419245653286838, + "learning_rate": 4.952491302540518e-06, + "loss": 0.2875, + "step": 3855 + }, + { + "epoch": 0.18, + "grad_norm": 0.6321835246877708, + "learning_rate": 4.952454498552441e-06, + "loss": 0.29, + "step": 3856 + }, + { + "epoch": 0.18, + "grad_norm": 0.6753296582083702, + "learning_rate": 4.952417680451106e-06, + "loss": 0.3023, + "step": 3857 + }, + { + "epoch": 0.18, + "grad_norm": 0.6346421668837727, + "learning_rate": 4.952380848236722e-06, + "loss": 0.3211, + "step": 3858 + }, + { + "epoch": 0.18, + "grad_norm": 0.6483967002291722, + "learning_rate": 4.952344001909504e-06, + "loss": 0.2944, + "step": 3859 + }, + { + "epoch": 0.18, + "grad_norm": 0.667891000697319, + "learning_rate": 4.9523071414696615e-06, + "loss": 0.3081, + "step": 3860 + }, + { + "epoch": 0.18, + "grad_norm": 0.5929815033573294, + "learning_rate": 4.952270266917408e-06, + "loss": 0.2988, + "step": 3861 + }, + { + "epoch": 0.18, + "grad_norm": 0.6656622288991725, + "learning_rate": 4.952233378252956e-06, + "loss": 0.3185, + "step": 3862 + }, + { + "epoch": 0.18, + "grad_norm": 0.6145603577636161, + "learning_rate": 4.952196475476517e-06, + "loss": 0.278, + "step": 3863 + }, + { + "epoch": 0.18, + "grad_norm": 0.6938460091665332, + "learning_rate": 4.952159558588303e-06, + "loss": 0.312, + "step": 3864 + }, + { + "epoch": 0.18, + "grad_norm": 0.6597777073439282, + "learning_rate": 4.952122627588528e-06, + "loss": 0.3306, + "step": 3865 + }, + { + "epoch": 0.18, + "grad_norm": 0.6769885036773343, + "learning_rate": 4.952085682477403e-06, + "loss": 0.2908, + "step": 3866 + }, + { + "epoch": 0.18, + "grad_norm": 0.615377393086395, + "learning_rate": 4.952048723255142e-06, + "loss": 0.2787, + "step": 3867 + }, + { + "epoch": 0.18, + "grad_norm": 0.6823500917538109, + "learning_rate": 4.952011749921956e-06, + "loss": 0.3222, + "step": 3868 + }, + { + "epoch": 0.18, + "grad_norm": 0.6354050521313009, + "learning_rate": 4.951974762478059e-06, + "loss": 0.3077, + "step": 3869 + }, + { + "epoch": 0.18, + "grad_norm": 0.6806202103644033, + "learning_rate": 4.951937760923664e-06, + "loss": 0.3166, + "step": 3870 + }, + { + "epoch": 0.18, + "grad_norm": 0.6367206229738928, + "learning_rate": 4.9519007452589825e-06, + "loss": 0.3212, + "step": 3871 + }, + { + "epoch": 0.18, + "grad_norm": 0.6463868385554403, + "learning_rate": 4.951863715484229e-06, + "loss": 0.2996, + "step": 3872 + }, + { + "epoch": 0.18, + "grad_norm": 0.6466340225324219, + "learning_rate": 4.951826671599615e-06, + "loss": 0.2732, + "step": 3873 + }, + { + "epoch": 0.18, + "grad_norm": 0.7274143424091667, + "learning_rate": 4.951789613605357e-06, + "loss": 0.3142, + "step": 3874 + }, + { + "epoch": 0.18, + "grad_norm": 0.728355485917629, + "learning_rate": 4.951752541501664e-06, + "loss": 0.3056, + "step": 3875 + }, + { + "epoch": 0.18, + "grad_norm": 0.6674009017384275, + "learning_rate": 4.951715455288753e-06, + "loss": 0.3112, + "step": 3876 + }, + { + "epoch": 0.18, + "grad_norm": 0.726311686067271, + "learning_rate": 4.951678354966834e-06, + "loss": 0.3102, + "step": 3877 + }, + { + "epoch": 0.18, + "grad_norm": 0.6736735704077752, + "learning_rate": 4.951641240536123e-06, + "loss": 0.2943, + "step": 3878 + }, + { + "epoch": 0.18, + "grad_norm": 0.7052039359066079, + "learning_rate": 4.951604111996834e-06, + "loss": 0.3052, + "step": 3879 + }, + { + "epoch": 0.18, + "grad_norm": 0.6671803198342157, + "learning_rate": 4.951566969349178e-06, + "loss": 0.3132, + "step": 3880 + }, + { + "epoch": 0.18, + "grad_norm": 0.6695452588130605, + "learning_rate": 4.951529812593371e-06, + "loss": 0.2968, + "step": 3881 + }, + { + "epoch": 0.18, + "grad_norm": 0.7280653715166338, + "learning_rate": 4.951492641729626e-06, + "loss": 0.3292, + "step": 3882 + }, + { + "epoch": 0.18, + "grad_norm": 0.6745466703776889, + "learning_rate": 4.9514554567581565e-06, + "loss": 0.3029, + "step": 3883 + }, + { + "epoch": 0.18, + "grad_norm": 0.6522919310521477, + "learning_rate": 4.9514182576791775e-06, + "loss": 0.3149, + "step": 3884 + }, + { + "epoch": 0.18, + "grad_norm": 0.6497961472866646, + "learning_rate": 4.951381044492902e-06, + "loss": 0.3075, + "step": 3885 + }, + { + "epoch": 0.18, + "grad_norm": 0.6185731186432837, + "learning_rate": 4.951343817199545e-06, + "loss": 0.3002, + "step": 3886 + }, + { + "epoch": 0.18, + "grad_norm": 0.6671718493302634, + "learning_rate": 4.9513065757993214e-06, + "loss": 0.3152, + "step": 3887 + }, + { + "epoch": 0.18, + "grad_norm": 0.6494815246566065, + "learning_rate": 4.951269320292444e-06, + "loss": 0.3027, + "step": 3888 + }, + { + "epoch": 0.18, + "grad_norm": 0.6669338507926097, + "learning_rate": 4.9512320506791274e-06, + "loss": 0.3089, + "step": 3889 + }, + { + "epoch": 0.18, + "grad_norm": 0.6351302629814924, + "learning_rate": 4.951194766959587e-06, + "loss": 0.3015, + "step": 3890 + }, + { + "epoch": 0.18, + "grad_norm": 0.6700979031512542, + "learning_rate": 4.951157469134036e-06, + "loss": 0.3162, + "step": 3891 + }, + { + "epoch": 0.18, + "grad_norm": 0.6305412411511966, + "learning_rate": 4.95112015720269e-06, + "loss": 0.291, + "step": 3892 + }, + { + "epoch": 0.18, + "grad_norm": 0.6782890032920235, + "learning_rate": 4.951082831165764e-06, + "loss": 0.3074, + "step": 3893 + }, + { + "epoch": 0.18, + "grad_norm": 0.7301534223898111, + "learning_rate": 4.951045491023473e-06, + "loss": 0.3119, + "step": 3894 + }, + { + "epoch": 0.18, + "grad_norm": 0.6703207274288979, + "learning_rate": 4.95100813677603e-06, + "loss": 0.3196, + "step": 3895 + }, + { + "epoch": 0.18, + "grad_norm": 0.6176043434933147, + "learning_rate": 4.9509707684236515e-06, + "loss": 0.3018, + "step": 3896 + }, + { + "epoch": 0.18, + "grad_norm": 0.6904681511935216, + "learning_rate": 4.9509333859665525e-06, + "loss": 0.3288, + "step": 3897 + }, + { + "epoch": 0.18, + "grad_norm": 0.6843013261486285, + "learning_rate": 4.950895989404948e-06, + "loss": 0.2923, + "step": 3898 + }, + { + "epoch": 0.18, + "grad_norm": 0.6206228135226102, + "learning_rate": 4.950858578739053e-06, + "loss": 0.3004, + "step": 3899 + }, + { + "epoch": 0.18, + "grad_norm": 0.6711023439810306, + "learning_rate": 4.950821153969082e-06, + "loss": 0.3133, + "step": 3900 + }, + { + "epoch": 0.18, + "grad_norm": 0.6918559695502714, + "learning_rate": 4.950783715095252e-06, + "loss": 0.3262, + "step": 3901 + }, + { + "epoch": 0.18, + "grad_norm": 0.6785557586188156, + "learning_rate": 4.9507462621177784e-06, + "loss": 0.3124, + "step": 3902 + }, + { + "epoch": 0.18, + "grad_norm": 0.6343779211041368, + "learning_rate": 4.9507087950368744e-06, + "loss": 0.2847, + "step": 3903 + }, + { + "epoch": 0.18, + "grad_norm": 0.6201983031855963, + "learning_rate": 4.950671313852758e-06, + "loss": 0.2955, + "step": 3904 + }, + { + "epoch": 0.18, + "grad_norm": 0.6579042764251729, + "learning_rate": 4.950633818565645e-06, + "loss": 0.283, + "step": 3905 + }, + { + "epoch": 0.18, + "grad_norm": 0.7474216611349931, + "learning_rate": 4.95059630917575e-06, + "loss": 0.3304, + "step": 3906 + }, + { + "epoch": 0.18, + "grad_norm": 0.6596008241851985, + "learning_rate": 4.950558785683288e-06, + "loss": 0.2985, + "step": 3907 + }, + { + "epoch": 0.18, + "grad_norm": 0.6674766207453474, + "learning_rate": 4.950521248088477e-06, + "loss": 0.3087, + "step": 3908 + }, + { + "epoch": 0.18, + "grad_norm": 0.6638535823202008, + "learning_rate": 4.950483696391533e-06, + "loss": 0.3013, + "step": 3909 + }, + { + "epoch": 0.18, + "grad_norm": 0.6699366715538422, + "learning_rate": 4.95044613059267e-06, + "loss": 0.3267, + "step": 3910 + }, + { + "epoch": 0.18, + "grad_norm": 0.708734058468736, + "learning_rate": 4.9504085506921055e-06, + "loss": 0.3075, + "step": 3911 + }, + { + "epoch": 0.18, + "grad_norm": 0.6897765080175224, + "learning_rate": 4.950370956690056e-06, + "loss": 0.3223, + "step": 3912 + }, + { + "epoch": 0.18, + "grad_norm": 0.6161319836414754, + "learning_rate": 4.950333348586737e-06, + "loss": 0.2924, + "step": 3913 + }, + { + "epoch": 0.18, + "grad_norm": 0.6274886804667097, + "learning_rate": 4.950295726382366e-06, + "loss": 0.2947, + "step": 3914 + }, + { + "epoch": 0.18, + "grad_norm": 0.6348230888850753, + "learning_rate": 4.950258090077159e-06, + "loss": 0.2945, + "step": 3915 + }, + { + "epoch": 0.18, + "grad_norm": 0.6276787555918011, + "learning_rate": 4.950220439671333e-06, + "loss": 0.306, + "step": 3916 + }, + { + "epoch": 0.18, + "grad_norm": 0.6968863726772772, + "learning_rate": 4.950182775165103e-06, + "loss": 0.3112, + "step": 3917 + }, + { + "epoch": 0.18, + "grad_norm": 0.6162590271020141, + "learning_rate": 4.950145096558687e-06, + "loss": 0.3133, + "step": 3918 + }, + { + "epoch": 0.18, + "grad_norm": 0.654240371819918, + "learning_rate": 4.9501074038523025e-06, + "loss": 0.2865, + "step": 3919 + }, + { + "epoch": 0.18, + "grad_norm": 0.6586156446075343, + "learning_rate": 4.950069697046166e-06, + "loss": 0.3074, + "step": 3920 + }, + { + "epoch": 0.18, + "grad_norm": 0.673953263719859, + "learning_rate": 4.950031976140494e-06, + "loss": 0.3289, + "step": 3921 + }, + { + "epoch": 0.18, + "grad_norm": 0.66617159363869, + "learning_rate": 4.949994241135503e-06, + "loss": 0.2987, + "step": 3922 + }, + { + "epoch": 0.18, + "grad_norm": 0.6681442070869108, + "learning_rate": 4.9499564920314116e-06, + "loss": 0.3042, + "step": 3923 + }, + { + "epoch": 0.18, + "grad_norm": 0.6802211162063929, + "learning_rate": 4.9499187288284355e-06, + "loss": 0.3425, + "step": 3924 + }, + { + "epoch": 0.18, + "grad_norm": 0.6423642298679146, + "learning_rate": 4.949880951526794e-06, + "loss": 0.2891, + "step": 3925 + }, + { + "epoch": 0.18, + "grad_norm": 0.7183667521683209, + "learning_rate": 4.949843160126703e-06, + "loss": 0.3245, + "step": 3926 + }, + { + "epoch": 0.18, + "grad_norm": 0.6604712370586171, + "learning_rate": 4.94980535462838e-06, + "loss": 0.286, + "step": 3927 + }, + { + "epoch": 0.18, + "grad_norm": 0.5628237801749574, + "learning_rate": 4.949767535032043e-06, + "loss": 0.2696, + "step": 3928 + }, + { + "epoch": 0.18, + "grad_norm": 0.6403467151595718, + "learning_rate": 4.94972970133791e-06, + "loss": 0.3128, + "step": 3929 + }, + { + "epoch": 0.18, + "grad_norm": 0.7843278532005523, + "learning_rate": 4.9496918535461976e-06, + "loss": 0.3389, + "step": 3930 + }, + { + "epoch": 0.18, + "grad_norm": 0.6514220323808496, + "learning_rate": 4.9496539916571255e-06, + "loss": 0.2954, + "step": 3931 + }, + { + "epoch": 0.18, + "grad_norm": 0.6857185944474273, + "learning_rate": 4.9496161156709095e-06, + "loss": 0.3384, + "step": 3932 + }, + { + "epoch": 0.18, + "grad_norm": 0.6132260224363009, + "learning_rate": 4.949578225587769e-06, + "loss": 0.2814, + "step": 3933 + }, + { + "epoch": 0.18, + "grad_norm": 0.7417342912923326, + "learning_rate": 4.949540321407921e-06, + "loss": 0.3129, + "step": 3934 + }, + { + "epoch": 0.18, + "grad_norm": 0.6048659469081206, + "learning_rate": 4.949502403131583e-06, + "loss": 0.3023, + "step": 3935 + }, + { + "epoch": 0.18, + "grad_norm": 0.6472939072464807, + "learning_rate": 4.949464470758976e-06, + "loss": 0.2764, + "step": 3936 + }, + { + "epoch": 0.18, + "grad_norm": 0.6876517521224476, + "learning_rate": 4.949426524290316e-06, + "loss": 0.311, + "step": 3937 + }, + { + "epoch": 0.18, + "grad_norm": 0.6658593478998952, + "learning_rate": 4.949388563725822e-06, + "loss": 0.3225, + "step": 3938 + }, + { + "epoch": 0.18, + "grad_norm": 0.6982193305215572, + "learning_rate": 4.949350589065713e-06, + "loss": 0.2987, + "step": 3939 + }, + { + "epoch": 0.18, + "grad_norm": 0.6164802441838267, + "learning_rate": 4.9493126003102065e-06, + "loss": 0.3107, + "step": 3940 + }, + { + "epoch": 0.18, + "grad_norm": 0.6636565395530204, + "learning_rate": 4.9492745974595216e-06, + "loss": 0.291, + "step": 3941 + }, + { + "epoch": 0.18, + "grad_norm": 0.6606329858744575, + "learning_rate": 4.949236580513877e-06, + "loss": 0.3109, + "step": 3942 + }, + { + "epoch": 0.18, + "grad_norm": 0.7042762367231757, + "learning_rate": 4.949198549473492e-06, + "loss": 0.2978, + "step": 3943 + }, + { + "epoch": 0.18, + "grad_norm": 0.6610493125381501, + "learning_rate": 4.9491605043385835e-06, + "loss": 0.3137, + "step": 3944 + }, + { + "epoch": 0.18, + "grad_norm": 0.6274154639802467, + "learning_rate": 4.949122445109374e-06, + "loss": 0.3092, + "step": 3945 + }, + { + "epoch": 0.18, + "grad_norm": 0.660735061613187, + "learning_rate": 4.949084371786078e-06, + "loss": 0.2957, + "step": 3946 + }, + { + "epoch": 0.18, + "grad_norm": 0.6505321994083009, + "learning_rate": 4.949046284368919e-06, + "loss": 0.3215, + "step": 3947 + }, + { + "epoch": 0.18, + "grad_norm": 0.6437008629830809, + "learning_rate": 4.949008182858113e-06, + "loss": 0.2769, + "step": 3948 + }, + { + "epoch": 0.18, + "grad_norm": 0.6733097953009933, + "learning_rate": 4.948970067253881e-06, + "loss": 0.3036, + "step": 3949 + }, + { + "epoch": 0.19, + "grad_norm": 0.6278823893478783, + "learning_rate": 4.948931937556442e-06, + "loss": 0.3067, + "step": 3950 + }, + { + "epoch": 0.19, + "grad_norm": 0.6196851503418588, + "learning_rate": 4.948893793766014e-06, + "loss": 0.3097, + "step": 3951 + }, + { + "epoch": 0.19, + "grad_norm": 0.6648903368310763, + "learning_rate": 4.948855635882819e-06, + "loss": 0.3031, + "step": 3952 + }, + { + "epoch": 0.19, + "grad_norm": 0.7269564843448098, + "learning_rate": 4.948817463907074e-06, + "loss": 0.3294, + "step": 3953 + }, + { + "epoch": 0.19, + "grad_norm": 0.6385120338989279, + "learning_rate": 4.9487792778390014e-06, + "loss": 0.3046, + "step": 3954 + }, + { + "epoch": 0.19, + "grad_norm": 0.6247868509098149, + "learning_rate": 4.948741077678819e-06, + "loss": 0.2893, + "step": 3955 + }, + { + "epoch": 0.19, + "grad_norm": 0.6205482615380392, + "learning_rate": 4.9487028634267475e-06, + "loss": 0.2889, + "step": 3956 + }, + { + "epoch": 0.19, + "grad_norm": 0.6799878664434692, + "learning_rate": 4.948664635083006e-06, + "loss": 0.3115, + "step": 3957 + }, + { + "epoch": 0.19, + "grad_norm": 0.6635373522391816, + "learning_rate": 4.948626392647815e-06, + "loss": 0.3076, + "step": 3958 + }, + { + "epoch": 0.19, + "grad_norm": 0.6509507162613966, + "learning_rate": 4.948588136121395e-06, + "loss": 0.2884, + "step": 3959 + }, + { + "epoch": 0.19, + "grad_norm": 0.6136343003141659, + "learning_rate": 4.948549865503965e-06, + "loss": 0.2994, + "step": 3960 + }, + { + "epoch": 0.19, + "grad_norm": 0.609053921138647, + "learning_rate": 4.948511580795746e-06, + "loss": 0.293, + "step": 3961 + }, + { + "epoch": 0.19, + "grad_norm": 0.6538497259839825, + "learning_rate": 4.948473281996959e-06, + "loss": 0.2834, + "step": 3962 + }, + { + "epoch": 0.19, + "grad_norm": 0.6728093679517096, + "learning_rate": 4.948434969107822e-06, + "loss": 0.3063, + "step": 3963 + }, + { + "epoch": 0.19, + "grad_norm": 0.6528870502854779, + "learning_rate": 4.948396642128559e-06, + "loss": 0.3232, + "step": 3964 + }, + { + "epoch": 0.19, + "grad_norm": 0.6669238214821401, + "learning_rate": 4.948358301059388e-06, + "loss": 0.308, + "step": 3965 + }, + { + "epoch": 0.19, + "grad_norm": 0.6514497087307065, + "learning_rate": 4.94831994590053e-06, + "loss": 0.3024, + "step": 3966 + }, + { + "epoch": 0.19, + "grad_norm": 0.6107176695038865, + "learning_rate": 4.9482815766522075e-06, + "loss": 0.2855, + "step": 3967 + }, + { + "epoch": 0.19, + "grad_norm": 0.6427523272484069, + "learning_rate": 4.948243193314639e-06, + "loss": 0.3127, + "step": 3968 + }, + { + "epoch": 0.19, + "grad_norm": 0.6300986836126059, + "learning_rate": 4.9482047958880455e-06, + "loss": 0.305, + "step": 3969 + }, + { + "epoch": 0.19, + "grad_norm": 0.6741435910380809, + "learning_rate": 4.94816638437265e-06, + "loss": 0.3192, + "step": 3970 + }, + { + "epoch": 0.19, + "grad_norm": 0.7235803508441121, + "learning_rate": 4.9481279587686715e-06, + "loss": 0.3143, + "step": 3971 + }, + { + "epoch": 0.19, + "grad_norm": 0.6826236383347479, + "learning_rate": 4.948089519076332e-06, + "loss": 0.3004, + "step": 3972 + }, + { + "epoch": 0.19, + "grad_norm": 0.6606006308986659, + "learning_rate": 4.9480510652958525e-06, + "loss": 0.2941, + "step": 3973 + }, + { + "epoch": 0.19, + "grad_norm": 0.6927407078450962, + "learning_rate": 4.948012597427455e-06, + "loss": 0.2944, + "step": 3974 + }, + { + "epoch": 0.19, + "grad_norm": 0.6861844239786287, + "learning_rate": 4.947974115471359e-06, + "loss": 0.3216, + "step": 3975 + }, + { + "epoch": 0.19, + "grad_norm": 0.7011065258090498, + "learning_rate": 4.947935619427788e-06, + "loss": 0.2998, + "step": 3976 + }, + { + "epoch": 0.19, + "grad_norm": 0.680250596621615, + "learning_rate": 4.947897109296963e-06, + "loss": 0.3195, + "step": 3977 + }, + { + "epoch": 0.19, + "grad_norm": 0.66128836062599, + "learning_rate": 4.947858585079106e-06, + "loss": 0.3305, + "step": 3978 + }, + { + "epoch": 0.19, + "grad_norm": 0.6818930733972258, + "learning_rate": 4.947820046774437e-06, + "loss": 0.3241, + "step": 3979 + }, + { + "epoch": 0.19, + "grad_norm": 0.6642742919001419, + "learning_rate": 4.947781494383179e-06, + "loss": 0.3009, + "step": 3980 + }, + { + "epoch": 0.19, + "grad_norm": 0.6345695918561203, + "learning_rate": 4.947742927905554e-06, + "loss": 0.2976, + "step": 3981 + }, + { + "epoch": 0.19, + "grad_norm": 0.6656821151431106, + "learning_rate": 4.9477043473417844e-06, + "loss": 0.3099, + "step": 3982 + }, + { + "epoch": 0.19, + "grad_norm": 0.6930919262497441, + "learning_rate": 4.94766575269209e-06, + "loss": 0.3037, + "step": 3983 + }, + { + "epoch": 0.19, + "grad_norm": 0.666885965327191, + "learning_rate": 4.9476271439566955e-06, + "loss": 0.2847, + "step": 3984 + }, + { + "epoch": 0.19, + "grad_norm": 0.679000538353933, + "learning_rate": 4.947588521135821e-06, + "loss": 0.3216, + "step": 3985 + }, + { + "epoch": 0.19, + "grad_norm": 0.6463745145614769, + "learning_rate": 4.947549884229691e-06, + "loss": 0.3003, + "step": 3986 + }, + { + "epoch": 0.19, + "grad_norm": 0.7438162171474378, + "learning_rate": 4.947511233238525e-06, + "loss": 0.3105, + "step": 3987 + }, + { + "epoch": 0.19, + "grad_norm": 0.6591293845092312, + "learning_rate": 4.947472568162548e-06, + "loss": 0.3197, + "step": 3988 + }, + { + "epoch": 0.19, + "grad_norm": 0.5979033318288396, + "learning_rate": 4.947433889001982e-06, + "loss": 0.2837, + "step": 3989 + }, + { + "epoch": 0.19, + "grad_norm": 0.647753163181197, + "learning_rate": 4.947395195757049e-06, + "loss": 0.291, + "step": 3990 + }, + { + "epoch": 0.19, + "grad_norm": 0.6909971849241102, + "learning_rate": 4.947356488427971e-06, + "loss": 0.3181, + "step": 3991 + }, + { + "epoch": 0.19, + "grad_norm": 0.6340257628027509, + "learning_rate": 4.947317767014972e-06, + "loss": 0.3032, + "step": 3992 + }, + { + "epoch": 0.19, + "grad_norm": 0.6279732045125143, + "learning_rate": 4.947279031518274e-06, + "loss": 0.2908, + "step": 3993 + }, + { + "epoch": 0.19, + "grad_norm": 0.6781615031972513, + "learning_rate": 4.947240281938101e-06, + "loss": 0.3104, + "step": 3994 + }, + { + "epoch": 0.19, + "grad_norm": 0.6874031210042395, + "learning_rate": 4.947201518274674e-06, + "loss": 0.3058, + "step": 3995 + }, + { + "epoch": 0.19, + "grad_norm": 0.6202072521330736, + "learning_rate": 4.947162740528219e-06, + "loss": 0.2956, + "step": 3996 + }, + { + "epoch": 0.19, + "grad_norm": 0.736186472666844, + "learning_rate": 4.947123948698956e-06, + "loss": 0.3043, + "step": 3997 + }, + { + "epoch": 0.19, + "grad_norm": 0.6804924012149006, + "learning_rate": 4.947085142787111e-06, + "loss": 0.3092, + "step": 3998 + }, + { + "epoch": 0.19, + "grad_norm": 0.7002335267262974, + "learning_rate": 4.9470463227929045e-06, + "loss": 0.3125, + "step": 3999 + }, + { + "epoch": 0.19, + "grad_norm": 0.6948965760608342, + "learning_rate": 4.947007488716562e-06, + "loss": 0.3003, + "step": 4000 + }, + { + "epoch": 0.19, + "grad_norm": 0.6463040354037872, + "learning_rate": 4.946968640558307e-06, + "loss": 0.3056, + "step": 4001 + }, + { + "epoch": 0.19, + "grad_norm": 0.6696435940729352, + "learning_rate": 4.946929778318363e-06, + "loss": 0.3103, + "step": 4002 + }, + { + "epoch": 0.19, + "grad_norm": 0.6123970071735837, + "learning_rate": 4.946890901996952e-06, + "loss": 0.2952, + "step": 4003 + }, + { + "epoch": 0.19, + "grad_norm": 0.6731440222283925, + "learning_rate": 4.946852011594299e-06, + "loss": 0.286, + "step": 4004 + }, + { + "epoch": 0.19, + "grad_norm": 0.7086137582180966, + "learning_rate": 4.9468131071106285e-06, + "loss": 0.2967, + "step": 4005 + }, + { + "epoch": 0.19, + "grad_norm": 0.6299162955026192, + "learning_rate": 4.946774188546163e-06, + "loss": 0.3012, + "step": 4006 + }, + { + "epoch": 0.19, + "grad_norm": 0.5828398978130501, + "learning_rate": 4.946735255901127e-06, + "loss": 0.2745, + "step": 4007 + }, + { + "epoch": 0.19, + "grad_norm": 0.6690716612590937, + "learning_rate": 4.9466963091757446e-06, + "loss": 0.3345, + "step": 4008 + }, + { + "epoch": 0.19, + "grad_norm": 0.6685616092090618, + "learning_rate": 4.946657348370239e-06, + "loss": 0.3056, + "step": 4009 + }, + { + "epoch": 0.19, + "grad_norm": 0.6270102849636614, + "learning_rate": 4.946618373484836e-06, + "loss": 0.2965, + "step": 4010 + }, + { + "epoch": 0.19, + "grad_norm": 0.6203807049692027, + "learning_rate": 4.9465793845197606e-06, + "loss": 0.2975, + "step": 4011 + }, + { + "epoch": 0.19, + "grad_norm": 0.6492791792665498, + "learning_rate": 4.946540381475234e-06, + "loss": 0.2884, + "step": 4012 + }, + { + "epoch": 0.19, + "grad_norm": 0.631858564022216, + "learning_rate": 4.9465013643514825e-06, + "loss": 0.3111, + "step": 4013 + }, + { + "epoch": 0.19, + "grad_norm": 0.6241410734656001, + "learning_rate": 4.946462333148732e-06, + "loss": 0.3025, + "step": 4014 + }, + { + "epoch": 0.19, + "grad_norm": 0.647680396361955, + "learning_rate": 4.946423287867204e-06, + "loss": 0.3065, + "step": 4015 + }, + { + "epoch": 0.19, + "grad_norm": 0.6958073893635972, + "learning_rate": 4.946384228507126e-06, + "loss": 0.2937, + "step": 4016 + }, + { + "epoch": 0.19, + "grad_norm": 1.056434764257981, + "learning_rate": 4.946345155068721e-06, + "loss": 0.306, + "step": 4017 + }, + { + "epoch": 0.19, + "grad_norm": 0.6399977984100117, + "learning_rate": 4.946306067552214e-06, + "loss": 0.3038, + "step": 4018 + }, + { + "epoch": 0.19, + "grad_norm": 0.6407805815432042, + "learning_rate": 4.9462669659578315e-06, + "loss": 0.2923, + "step": 4019 + }, + { + "epoch": 0.19, + "grad_norm": 0.617666967227774, + "learning_rate": 4.946227850285798e-06, + "loss": 0.286, + "step": 4020 + }, + { + "epoch": 0.19, + "grad_norm": 0.7114699989071811, + "learning_rate": 4.946188720536337e-06, + "loss": 0.3251, + "step": 4021 + }, + { + "epoch": 0.19, + "grad_norm": 0.6719297303330776, + "learning_rate": 4.946149576709675e-06, + "loss": 0.2959, + "step": 4022 + }, + { + "epoch": 0.19, + "grad_norm": 0.7231490451272335, + "learning_rate": 4.946110418806036e-06, + "loss": 0.31, + "step": 4023 + }, + { + "epoch": 0.19, + "grad_norm": 0.6795599188492125, + "learning_rate": 4.946071246825648e-06, + "loss": 0.3053, + "step": 4024 + }, + { + "epoch": 0.19, + "grad_norm": 0.6371725011861904, + "learning_rate": 4.946032060768734e-06, + "loss": 0.3239, + "step": 4025 + }, + { + "epoch": 0.19, + "grad_norm": 0.6505441789227641, + "learning_rate": 4.945992860635519e-06, + "loss": 0.3104, + "step": 4026 + }, + { + "epoch": 0.19, + "grad_norm": 0.6883772884317736, + "learning_rate": 4.945953646426232e-06, + "loss": 0.2994, + "step": 4027 + }, + { + "epoch": 0.19, + "grad_norm": 0.7463740351335906, + "learning_rate": 4.945914418141095e-06, + "loss": 0.2942, + "step": 4028 + }, + { + "epoch": 0.19, + "grad_norm": 0.6876262772221503, + "learning_rate": 4.9458751757803365e-06, + "loss": 0.2927, + "step": 4029 + }, + { + "epoch": 0.19, + "grad_norm": 0.6565757172426933, + "learning_rate": 4.9458359193441805e-06, + "loss": 0.3229, + "step": 4030 + }, + { + "epoch": 0.19, + "grad_norm": 0.6764059421219064, + "learning_rate": 4.9457966488328535e-06, + "loss": 0.3279, + "step": 4031 + }, + { + "epoch": 0.19, + "grad_norm": 0.609052778819772, + "learning_rate": 4.9457573642465815e-06, + "loss": 0.2966, + "step": 4032 + }, + { + "epoch": 0.19, + "grad_norm": 0.6849800503278759, + "learning_rate": 4.945718065585591e-06, + "loss": 0.3193, + "step": 4033 + }, + { + "epoch": 0.19, + "grad_norm": 0.7085945242467979, + "learning_rate": 4.945678752850107e-06, + "loss": 0.3001, + "step": 4034 + }, + { + "epoch": 0.19, + "grad_norm": 0.6711784788386089, + "learning_rate": 4.945639426040357e-06, + "loss": 0.2923, + "step": 4035 + }, + { + "epoch": 0.19, + "grad_norm": 0.6762028364639064, + "learning_rate": 4.945600085156566e-06, + "loss": 0.3163, + "step": 4036 + }, + { + "epoch": 0.19, + "grad_norm": 0.615892860267046, + "learning_rate": 4.945560730198963e-06, + "loss": 0.2957, + "step": 4037 + }, + { + "epoch": 0.19, + "grad_norm": 0.6615497537142055, + "learning_rate": 4.945521361167771e-06, + "loss": 0.3179, + "step": 4038 + }, + { + "epoch": 0.19, + "grad_norm": 0.6779255772696079, + "learning_rate": 4.945481978063219e-06, + "loss": 0.3119, + "step": 4039 + }, + { + "epoch": 0.19, + "grad_norm": 0.642064766588064, + "learning_rate": 4.945442580885533e-06, + "loss": 0.2996, + "step": 4040 + }, + { + "epoch": 0.19, + "grad_norm": 0.6242205474600128, + "learning_rate": 4.945403169634939e-06, + "loss": 0.2994, + "step": 4041 + }, + { + "epoch": 0.19, + "grad_norm": 0.6770770228249772, + "learning_rate": 4.945363744311664e-06, + "loss": 0.2996, + "step": 4042 + }, + { + "epoch": 0.19, + "grad_norm": 0.6015218119097001, + "learning_rate": 4.945324304915936e-06, + "loss": 0.2864, + "step": 4043 + }, + { + "epoch": 0.19, + "grad_norm": 0.6162962689049805, + "learning_rate": 4.9452848514479814e-06, + "loss": 0.2846, + "step": 4044 + }, + { + "epoch": 0.19, + "grad_norm": 0.6238779990436835, + "learning_rate": 4.9452453839080275e-06, + "loss": 0.2842, + "step": 4045 + }, + { + "epoch": 0.19, + "grad_norm": 0.6500167559235598, + "learning_rate": 4.9452059022963e-06, + "loss": 0.2909, + "step": 4046 + }, + { + "epoch": 0.19, + "grad_norm": 0.6809930227127255, + "learning_rate": 4.945166406613027e-06, + "loss": 0.3184, + "step": 4047 + }, + { + "epoch": 0.19, + "grad_norm": 0.5817051882442985, + "learning_rate": 4.945126896858436e-06, + "loss": 0.2917, + "step": 4048 + }, + { + "epoch": 0.19, + "grad_norm": 0.619619665319572, + "learning_rate": 4.945087373032755e-06, + "loss": 0.2975, + "step": 4049 + }, + { + "epoch": 0.19, + "grad_norm": 0.6715203322094637, + "learning_rate": 4.945047835136211e-06, + "loss": 0.323, + "step": 4050 + }, + { + "epoch": 0.19, + "grad_norm": 0.6907206935431491, + "learning_rate": 4.94500828316903e-06, + "loss": 0.2982, + "step": 4051 + }, + { + "epoch": 0.19, + "grad_norm": 0.6313029643350748, + "learning_rate": 4.944968717131441e-06, + "loss": 0.3026, + "step": 4052 + }, + { + "epoch": 0.19, + "grad_norm": 0.7186612696828485, + "learning_rate": 4.944929137023672e-06, + "loss": 0.2854, + "step": 4053 + }, + { + "epoch": 0.19, + "grad_norm": 0.719452369747068, + "learning_rate": 4.944889542845951e-06, + "loss": 0.3223, + "step": 4054 + }, + { + "epoch": 0.19, + "grad_norm": 0.7138271316447846, + "learning_rate": 4.944849934598504e-06, + "loss": 0.2986, + "step": 4055 + }, + { + "epoch": 0.19, + "grad_norm": 0.6695386538215767, + "learning_rate": 4.94481031228156e-06, + "loss": 0.2857, + "step": 4056 + }, + { + "epoch": 0.19, + "grad_norm": 0.6309283869072212, + "learning_rate": 4.944770675895349e-06, + "loss": 0.2808, + "step": 4057 + }, + { + "epoch": 0.19, + "grad_norm": 0.6566257093179256, + "learning_rate": 4.944731025440095e-06, + "loss": 0.3099, + "step": 4058 + }, + { + "epoch": 0.19, + "grad_norm": 0.5895292717749633, + "learning_rate": 4.94469136091603e-06, + "loss": 0.2954, + "step": 4059 + }, + { + "epoch": 0.19, + "grad_norm": 0.6968774191750897, + "learning_rate": 4.9446516823233795e-06, + "loss": 0.3106, + "step": 4060 + }, + { + "epoch": 0.19, + "grad_norm": 0.6660930437586527, + "learning_rate": 4.944611989662373e-06, + "loss": 0.3127, + "step": 4061 + }, + { + "epoch": 0.19, + "grad_norm": 0.6777853319064056, + "learning_rate": 4.94457228293324e-06, + "loss": 0.2962, + "step": 4062 + }, + { + "epoch": 0.19, + "grad_norm": 0.6647785041910057, + "learning_rate": 4.944532562136207e-06, + "loss": 0.3226, + "step": 4063 + }, + { + "epoch": 0.19, + "grad_norm": 0.6649997435454427, + "learning_rate": 4.944492827271504e-06, + "loss": 0.2905, + "step": 4064 + }, + { + "epoch": 0.19, + "grad_norm": 0.650592516958424, + "learning_rate": 4.94445307833936e-06, + "loss": 0.2987, + "step": 4065 + }, + { + "epoch": 0.19, + "grad_norm": 0.6873429099878308, + "learning_rate": 4.944413315340001e-06, + "loss": 0.2991, + "step": 4066 + }, + { + "epoch": 0.19, + "grad_norm": 0.7129572857755725, + "learning_rate": 4.944373538273659e-06, + "loss": 0.3228, + "step": 4067 + }, + { + "epoch": 0.19, + "grad_norm": 0.720066914269198, + "learning_rate": 4.944333747140562e-06, + "loss": 0.3169, + "step": 4068 + }, + { + "epoch": 0.19, + "grad_norm": 0.7173104338697783, + "learning_rate": 4.944293941940938e-06, + "loss": 0.3262, + "step": 4069 + }, + { + "epoch": 0.19, + "grad_norm": 0.6245944918113665, + "learning_rate": 4.944254122675016e-06, + "loss": 0.3258, + "step": 4070 + }, + { + "epoch": 0.19, + "grad_norm": 0.6506344222223426, + "learning_rate": 4.944214289343027e-06, + "loss": 0.3063, + "step": 4071 + }, + { + "epoch": 0.19, + "grad_norm": 0.6766487589683423, + "learning_rate": 4.944174441945199e-06, + "loss": 0.2819, + "step": 4072 + }, + { + "epoch": 0.19, + "grad_norm": 0.6477755603289902, + "learning_rate": 4.9441345804817605e-06, + "loss": 0.3015, + "step": 4073 + }, + { + "epoch": 0.19, + "grad_norm": 0.6782299151274053, + "learning_rate": 4.9440947049529435e-06, + "loss": 0.3003, + "step": 4074 + }, + { + "epoch": 0.19, + "grad_norm": 0.6478883310019935, + "learning_rate": 4.944054815358974e-06, + "loss": 0.3004, + "step": 4075 + }, + { + "epoch": 0.19, + "grad_norm": 0.6388899429772594, + "learning_rate": 4.944014911700085e-06, + "loss": 0.2902, + "step": 4076 + }, + { + "epoch": 0.19, + "grad_norm": 0.6508246997724356, + "learning_rate": 4.943974993976503e-06, + "loss": 0.3207, + "step": 4077 + }, + { + "epoch": 0.19, + "grad_norm": 0.7100455383878176, + "learning_rate": 4.9439350621884595e-06, + "loss": 0.3021, + "step": 4078 + }, + { + "epoch": 0.19, + "grad_norm": 0.6261266849415391, + "learning_rate": 4.943895116336184e-06, + "loss": 0.2764, + "step": 4079 + }, + { + "epoch": 0.19, + "grad_norm": 0.6528802091460985, + "learning_rate": 4.943855156419907e-06, + "loss": 0.3208, + "step": 4080 + }, + { + "epoch": 0.19, + "grad_norm": 0.6255832837263752, + "learning_rate": 4.943815182439858e-06, + "loss": 0.2945, + "step": 4081 + }, + { + "epoch": 0.19, + "grad_norm": 0.6254182695441898, + "learning_rate": 4.943775194396265e-06, + "loss": 0.2979, + "step": 4082 + }, + { + "epoch": 0.19, + "grad_norm": 0.6206138023161015, + "learning_rate": 4.943735192289361e-06, + "loss": 0.3112, + "step": 4083 + }, + { + "epoch": 0.19, + "grad_norm": 0.644868334251421, + "learning_rate": 4.943695176119376e-06, + "loss": 0.3137, + "step": 4084 + }, + { + "epoch": 0.19, + "grad_norm": 0.717473932110518, + "learning_rate": 4.943655145886539e-06, + "loss": 0.3074, + "step": 4085 + }, + { + "epoch": 0.19, + "grad_norm": 0.6050138296431044, + "learning_rate": 4.94361510159108e-06, + "loss": 0.2992, + "step": 4086 + }, + { + "epoch": 0.19, + "grad_norm": 0.6148089838385821, + "learning_rate": 4.943575043233231e-06, + "loss": 0.2825, + "step": 4087 + }, + { + "epoch": 0.19, + "grad_norm": 0.6769806335652231, + "learning_rate": 4.943534970813222e-06, + "loss": 0.2996, + "step": 4088 + }, + { + "epoch": 0.19, + "grad_norm": 0.6581465905534803, + "learning_rate": 4.943494884331282e-06, + "loss": 0.2951, + "step": 4089 + }, + { + "epoch": 0.19, + "grad_norm": 0.6394452451495053, + "learning_rate": 4.943454783787644e-06, + "loss": 0.3119, + "step": 4090 + }, + { + "epoch": 0.19, + "grad_norm": 0.637693307439015, + "learning_rate": 4.943414669182539e-06, + "loss": 0.2985, + "step": 4091 + }, + { + "epoch": 0.19, + "grad_norm": 0.6185736486519982, + "learning_rate": 4.943374540516196e-06, + "loss": 0.2951, + "step": 4092 + }, + { + "epoch": 0.19, + "grad_norm": 0.7137331448637823, + "learning_rate": 4.943334397788846e-06, + "loss": 0.3214, + "step": 4093 + }, + { + "epoch": 0.19, + "grad_norm": 0.6729389933093118, + "learning_rate": 4.943294241000721e-06, + "loss": 0.2974, + "step": 4094 + }, + { + "epoch": 0.19, + "grad_norm": 0.6544911240689266, + "learning_rate": 4.943254070152052e-06, + "loss": 0.323, + "step": 4095 + }, + { + "epoch": 0.19, + "grad_norm": 0.6307486287723785, + "learning_rate": 4.94321388524307e-06, + "loss": 0.2934, + "step": 4096 + }, + { + "epoch": 0.19, + "grad_norm": 0.6826314206008418, + "learning_rate": 4.943173686274005e-06, + "loss": 0.2958, + "step": 4097 + }, + { + "epoch": 0.19, + "grad_norm": 0.7167698642439724, + "learning_rate": 4.943133473245091e-06, + "loss": 0.3129, + "step": 4098 + }, + { + "epoch": 0.19, + "grad_norm": 0.7264539515881033, + "learning_rate": 4.9430932461565575e-06, + "loss": 0.3059, + "step": 4099 + }, + { + "epoch": 0.19, + "grad_norm": 0.61644830990953, + "learning_rate": 4.943053005008635e-06, + "loss": 0.3075, + "step": 4100 + }, + { + "epoch": 0.19, + "grad_norm": 0.6347567861706847, + "learning_rate": 4.943012749801559e-06, + "loss": 0.2985, + "step": 4101 + }, + { + "epoch": 0.19, + "grad_norm": 0.665240038940821, + "learning_rate": 4.942972480535557e-06, + "loss": 0.2739, + "step": 4102 + }, + { + "epoch": 0.19, + "grad_norm": 0.6340101727567423, + "learning_rate": 4.9429321972108624e-06, + "loss": 0.2863, + "step": 4103 + }, + { + "epoch": 0.19, + "grad_norm": 0.6747082834588, + "learning_rate": 4.942891899827708e-06, + "loss": 0.3038, + "step": 4104 + }, + { + "epoch": 0.19, + "grad_norm": 0.6503186116087462, + "learning_rate": 4.942851588386324e-06, + "loss": 0.3108, + "step": 4105 + }, + { + "epoch": 0.19, + "grad_norm": 0.6415081072712722, + "learning_rate": 4.9428112628869425e-06, + "loss": 0.2858, + "step": 4106 + }, + { + "epoch": 0.19, + "grad_norm": 0.7003380004804693, + "learning_rate": 4.942770923329797e-06, + "loss": 0.3224, + "step": 4107 + }, + { + "epoch": 0.19, + "grad_norm": 0.6326384474411696, + "learning_rate": 4.942730569715119e-06, + "loss": 0.2943, + "step": 4108 + }, + { + "epoch": 0.19, + "grad_norm": 0.6198777569007651, + "learning_rate": 4.94269020204314e-06, + "loss": 0.2945, + "step": 4109 + }, + { + "epoch": 0.19, + "grad_norm": 0.6284866463330124, + "learning_rate": 4.942649820314092e-06, + "loss": 0.3222, + "step": 4110 + }, + { + "epoch": 0.19, + "grad_norm": 0.6614469782428811, + "learning_rate": 4.94260942452821e-06, + "loss": 0.3133, + "step": 4111 + }, + { + "epoch": 0.19, + "grad_norm": 0.6977629948004583, + "learning_rate": 4.942569014685724e-06, + "loss": 0.3088, + "step": 4112 + }, + { + "epoch": 0.19, + "grad_norm": 0.6673090849165911, + "learning_rate": 4.942528590786867e-06, + "loss": 0.2892, + "step": 4113 + }, + { + "epoch": 0.19, + "grad_norm": 0.6365463660019918, + "learning_rate": 4.942488152831873e-06, + "loss": 0.2811, + "step": 4114 + }, + { + "epoch": 0.19, + "grad_norm": 0.6797860676624364, + "learning_rate": 4.942447700820972e-06, + "loss": 0.3105, + "step": 4115 + }, + { + "epoch": 0.19, + "grad_norm": 0.6266442883566442, + "learning_rate": 4.942407234754399e-06, + "loss": 0.2916, + "step": 4116 + }, + { + "epoch": 0.19, + "grad_norm": 0.6588311076644136, + "learning_rate": 4.942366754632386e-06, + "loss": 0.3035, + "step": 4117 + }, + { + "epoch": 0.19, + "grad_norm": 0.6836797319363849, + "learning_rate": 4.942326260455167e-06, + "loss": 0.2843, + "step": 4118 + }, + { + "epoch": 0.19, + "grad_norm": 0.6372033311516753, + "learning_rate": 4.942285752222973e-06, + "loss": 0.3115, + "step": 4119 + }, + { + "epoch": 0.19, + "grad_norm": 0.6079992250321575, + "learning_rate": 4.942245229936039e-06, + "loss": 0.2982, + "step": 4120 + }, + { + "epoch": 0.19, + "grad_norm": 0.6532451587724025, + "learning_rate": 4.9422046935945975e-06, + "loss": 0.306, + "step": 4121 + }, + { + "epoch": 0.19, + "grad_norm": 0.6485491975665721, + "learning_rate": 4.942164143198882e-06, + "loss": 0.3029, + "step": 4122 + }, + { + "epoch": 0.19, + "grad_norm": 0.6207974525798321, + "learning_rate": 4.942123578749125e-06, + "loss": 0.3092, + "step": 4123 + }, + { + "epoch": 0.19, + "grad_norm": 0.6202883485558583, + "learning_rate": 4.9420830002455615e-06, + "loss": 0.279, + "step": 4124 + }, + { + "epoch": 0.19, + "grad_norm": 0.6517647535691435, + "learning_rate": 4.942042407688423e-06, + "loss": 0.306, + "step": 4125 + }, + { + "epoch": 0.19, + "grad_norm": 0.6752871402619917, + "learning_rate": 4.942001801077946e-06, + "loss": 0.3121, + "step": 4126 + }, + { + "epoch": 0.19, + "grad_norm": 0.6187110045047849, + "learning_rate": 4.9419611804143605e-06, + "loss": 0.308, + "step": 4127 + }, + { + "epoch": 0.19, + "grad_norm": 0.6815090129746666, + "learning_rate": 4.941920545697904e-06, + "loss": 0.3138, + "step": 4128 + }, + { + "epoch": 0.19, + "grad_norm": 0.6686087539565762, + "learning_rate": 4.941879896928807e-06, + "loss": 0.3061, + "step": 4129 + }, + { + "epoch": 0.19, + "grad_norm": 0.660355084307782, + "learning_rate": 4.941839234107305e-06, + "loss": 0.3259, + "step": 4130 + }, + { + "epoch": 0.19, + "grad_norm": 0.6200241907585098, + "learning_rate": 4.941798557233633e-06, + "loss": 0.3101, + "step": 4131 + }, + { + "epoch": 0.19, + "grad_norm": 0.6398798407568975, + "learning_rate": 4.941757866308024e-06, + "loss": 0.2946, + "step": 4132 + }, + { + "epoch": 0.19, + "grad_norm": 0.6895995411503758, + "learning_rate": 4.941717161330712e-06, + "loss": 0.2974, + "step": 4133 + }, + { + "epoch": 0.19, + "grad_norm": 0.6511794848651576, + "learning_rate": 4.94167644230193e-06, + "loss": 0.3122, + "step": 4134 + }, + { + "epoch": 0.19, + "grad_norm": 0.6322237345767258, + "learning_rate": 4.941635709221915e-06, + "loss": 0.3064, + "step": 4135 + }, + { + "epoch": 0.19, + "grad_norm": 0.6728204013705917, + "learning_rate": 4.9415949620909e-06, + "loss": 0.3142, + "step": 4136 + }, + { + "epoch": 0.19, + "grad_norm": 0.6343797521622473, + "learning_rate": 4.94155420090912e-06, + "loss": 0.3017, + "step": 4137 + }, + { + "epoch": 0.19, + "grad_norm": 0.6402992914631411, + "learning_rate": 4.941513425676808e-06, + "loss": 0.2826, + "step": 4138 + }, + { + "epoch": 0.19, + "grad_norm": 0.7328090466642322, + "learning_rate": 4.941472636394201e-06, + "loss": 0.323, + "step": 4139 + }, + { + "epoch": 0.19, + "grad_norm": 0.7305690229067504, + "learning_rate": 4.941431833061533e-06, + "loss": 0.3111, + "step": 4140 + }, + { + "epoch": 0.19, + "grad_norm": 0.632051322063656, + "learning_rate": 4.941391015679038e-06, + "loss": 0.3171, + "step": 4141 + }, + { + "epoch": 0.19, + "grad_norm": 0.631831517818316, + "learning_rate": 4.941350184246951e-06, + "loss": 0.3097, + "step": 4142 + }, + { + "epoch": 0.19, + "grad_norm": 0.6842786157734789, + "learning_rate": 4.941309338765508e-06, + "loss": 0.2954, + "step": 4143 + }, + { + "epoch": 0.19, + "grad_norm": 0.604713092602542, + "learning_rate": 4.941268479234942e-06, + "loss": 0.2933, + "step": 4144 + }, + { + "epoch": 0.19, + "grad_norm": 0.6510344189426738, + "learning_rate": 4.94122760565549e-06, + "loss": 0.3076, + "step": 4145 + }, + { + "epoch": 0.19, + "grad_norm": 0.608393534720883, + "learning_rate": 4.941186718027388e-06, + "loss": 0.2987, + "step": 4146 + }, + { + "epoch": 0.19, + "grad_norm": 0.6590722500234707, + "learning_rate": 4.941145816350868e-06, + "loss": 0.3047, + "step": 4147 + }, + { + "epoch": 0.19, + "grad_norm": 0.6935630475154986, + "learning_rate": 4.941104900626169e-06, + "loss": 0.292, + "step": 4148 + }, + { + "epoch": 0.19, + "grad_norm": 0.6673052518764858, + "learning_rate": 4.941063970853524e-06, + "loss": 0.3162, + "step": 4149 + }, + { + "epoch": 0.19, + "grad_norm": 0.6835645274666473, + "learning_rate": 4.94102302703317e-06, + "loss": 0.3222, + "step": 4150 + }, + { + "epoch": 0.19, + "grad_norm": 0.6086800216153062, + "learning_rate": 4.9409820691653415e-06, + "loss": 0.3002, + "step": 4151 + }, + { + "epoch": 0.19, + "grad_norm": 0.5897163008580615, + "learning_rate": 4.940941097250274e-06, + "loss": 0.2792, + "step": 4152 + }, + { + "epoch": 0.19, + "grad_norm": 0.6830898621621222, + "learning_rate": 4.940900111288206e-06, + "loss": 0.2904, + "step": 4153 + }, + { + "epoch": 0.19, + "grad_norm": 0.6441274599804213, + "learning_rate": 4.94085911127937e-06, + "loss": 0.3083, + "step": 4154 + }, + { + "epoch": 0.19, + "grad_norm": 0.6786433431709745, + "learning_rate": 4.940818097224004e-06, + "loss": 0.3203, + "step": 4155 + }, + { + "epoch": 0.19, + "grad_norm": 0.687524983564155, + "learning_rate": 4.940777069122342e-06, + "loss": 0.3092, + "step": 4156 + }, + { + "epoch": 0.19, + "grad_norm": 0.643311954373158, + "learning_rate": 4.940736026974623e-06, + "loss": 0.3085, + "step": 4157 + }, + { + "epoch": 0.19, + "grad_norm": 0.6607644930624998, + "learning_rate": 4.9406949707810806e-06, + "loss": 0.2957, + "step": 4158 + }, + { + "epoch": 0.19, + "grad_norm": 0.684191828013537, + "learning_rate": 4.940653900541952e-06, + "loss": 0.3009, + "step": 4159 + }, + { + "epoch": 0.19, + "grad_norm": 0.6653989104858317, + "learning_rate": 4.940612816257474e-06, + "loss": 0.2911, + "step": 4160 + }, + { + "epoch": 0.19, + "grad_norm": 0.6308129916413564, + "learning_rate": 4.9405717179278835e-06, + "loss": 0.308, + "step": 4161 + }, + { + "epoch": 0.19, + "grad_norm": 0.6482944033951525, + "learning_rate": 4.940530605553415e-06, + "loss": 0.2844, + "step": 4162 + }, + { + "epoch": 0.2, + "grad_norm": 0.6418853123557304, + "learning_rate": 4.940489479134306e-06, + "loss": 0.3134, + "step": 4163 + }, + { + "epoch": 0.2, + "grad_norm": 0.6764277085371114, + "learning_rate": 4.940448338670795e-06, + "loss": 0.3012, + "step": 4164 + }, + { + "epoch": 0.2, + "grad_norm": 0.6243043328637422, + "learning_rate": 4.9404071841631165e-06, + "loss": 0.2717, + "step": 4165 + }, + { + "epoch": 0.2, + "grad_norm": 0.6654355120627087, + "learning_rate": 4.940366015611507e-06, + "loss": 0.2936, + "step": 4166 + }, + { + "epoch": 0.2, + "grad_norm": 0.6611542917848251, + "learning_rate": 4.940324833016206e-06, + "loss": 0.3127, + "step": 4167 + }, + { + "epoch": 0.2, + "grad_norm": 0.6534500527204972, + "learning_rate": 4.9402836363774475e-06, + "loss": 0.3079, + "step": 4168 + }, + { + "epoch": 0.2, + "grad_norm": 0.6683199140497549, + "learning_rate": 4.940242425695471e-06, + "loss": 0.3021, + "step": 4169 + }, + { + "epoch": 0.2, + "grad_norm": 0.6556015954203259, + "learning_rate": 4.940201200970512e-06, + "loss": 0.3242, + "step": 4170 + }, + { + "epoch": 0.2, + "grad_norm": 0.6884579252000916, + "learning_rate": 4.940159962202809e-06, + "loss": 0.3051, + "step": 4171 + }, + { + "epoch": 0.2, + "grad_norm": 0.6048085254247872, + "learning_rate": 4.9401187093925984e-06, + "loss": 0.3022, + "step": 4172 + }, + { + "epoch": 0.2, + "grad_norm": 0.6245738594493908, + "learning_rate": 4.940077442540118e-06, + "loss": 0.2927, + "step": 4173 + }, + { + "epoch": 0.2, + "grad_norm": 0.5928165431992007, + "learning_rate": 4.9400361616456055e-06, + "loss": 0.2805, + "step": 4174 + }, + { + "epoch": 0.2, + "grad_norm": 0.6732288968905489, + "learning_rate": 4.939994866709298e-06, + "loss": 0.2958, + "step": 4175 + }, + { + "epoch": 0.2, + "grad_norm": 0.660680666019072, + "learning_rate": 4.9399535577314326e-06, + "loss": 0.2927, + "step": 4176 + }, + { + "epoch": 0.2, + "grad_norm": 0.6833967372070826, + "learning_rate": 4.939912234712249e-06, + "loss": 0.3023, + "step": 4177 + }, + { + "epoch": 0.2, + "grad_norm": 0.6969214720327701, + "learning_rate": 4.939870897651983e-06, + "loss": 0.3225, + "step": 4178 + }, + { + "epoch": 0.2, + "grad_norm": 0.650058137568874, + "learning_rate": 4.939829546550874e-06, + "loss": 0.3081, + "step": 4179 + }, + { + "epoch": 0.2, + "grad_norm": 0.6825402409039648, + "learning_rate": 4.9397881814091575e-06, + "loss": 0.3165, + "step": 4180 + }, + { + "epoch": 0.2, + "grad_norm": 0.713363912574367, + "learning_rate": 4.939746802227075e-06, + "loss": 0.3216, + "step": 4181 + }, + { + "epoch": 0.2, + "grad_norm": 0.6441712111062389, + "learning_rate": 4.939705409004862e-06, + "loss": 0.2874, + "step": 4182 + }, + { + "epoch": 0.2, + "grad_norm": 0.656644525181994, + "learning_rate": 4.939664001742758e-06, + "loss": 0.3043, + "step": 4183 + }, + { + "epoch": 0.2, + "grad_norm": 0.7259575831196572, + "learning_rate": 4.939622580441e-06, + "loss": 0.3098, + "step": 4184 + }, + { + "epoch": 0.2, + "grad_norm": 0.7222619020004604, + "learning_rate": 4.939581145099828e-06, + "loss": 0.315, + "step": 4185 + }, + { + "epoch": 0.2, + "grad_norm": 0.6688624061953945, + "learning_rate": 4.9395396957194795e-06, + "loss": 0.296, + "step": 4186 + }, + { + "epoch": 0.2, + "grad_norm": 0.6366066553999672, + "learning_rate": 4.939498232300193e-06, + "loss": 0.2951, + "step": 4187 + }, + { + "epoch": 0.2, + "grad_norm": 0.61628158469505, + "learning_rate": 4.939456754842207e-06, + "loss": 0.2743, + "step": 4188 + }, + { + "epoch": 0.2, + "grad_norm": 0.6783985237227866, + "learning_rate": 4.939415263345762e-06, + "loss": 0.2852, + "step": 4189 + }, + { + "epoch": 0.2, + "grad_norm": 0.6881088665115965, + "learning_rate": 4.939373757811093e-06, + "loss": 0.2976, + "step": 4190 + }, + { + "epoch": 0.2, + "grad_norm": 0.6124903044668188, + "learning_rate": 4.939332238238443e-06, + "loss": 0.2945, + "step": 4191 + }, + { + "epoch": 0.2, + "grad_norm": 0.6689417949168224, + "learning_rate": 4.939290704628048e-06, + "loss": 0.3106, + "step": 4192 + }, + { + "epoch": 0.2, + "grad_norm": 0.6949882354109607, + "learning_rate": 4.939249156980149e-06, + "loss": 0.319, + "step": 4193 + }, + { + "epoch": 0.2, + "grad_norm": 0.6299243986474148, + "learning_rate": 4.939207595294983e-06, + "loss": 0.2871, + "step": 4194 + }, + { + "epoch": 0.2, + "grad_norm": 0.665970845729932, + "learning_rate": 4.939166019572792e-06, + "loss": 0.3281, + "step": 4195 + }, + { + "epoch": 0.2, + "grad_norm": 0.6553341837629645, + "learning_rate": 4.939124429813813e-06, + "loss": 0.3103, + "step": 4196 + }, + { + "epoch": 0.2, + "grad_norm": 0.7377901068912297, + "learning_rate": 4.939082826018286e-06, + "loss": 0.3147, + "step": 4197 + }, + { + "epoch": 0.2, + "grad_norm": 0.6069548623849501, + "learning_rate": 4.939041208186449e-06, + "loss": 0.2875, + "step": 4198 + }, + { + "epoch": 0.2, + "grad_norm": 0.6872257178605964, + "learning_rate": 4.9389995763185435e-06, + "loss": 0.3011, + "step": 4199 + }, + { + "epoch": 0.2, + "grad_norm": 0.681010674379607, + "learning_rate": 4.938957930414809e-06, + "loss": 0.294, + "step": 4200 + }, + { + "epoch": 0.2, + "grad_norm": 0.6793703284645963, + "learning_rate": 4.938916270475485e-06, + "loss": 0.3106, + "step": 4201 + }, + { + "epoch": 0.2, + "grad_norm": 0.601478697820927, + "learning_rate": 4.938874596500811e-06, + "loss": 0.2943, + "step": 4202 + }, + { + "epoch": 0.2, + "grad_norm": 0.6309138087541395, + "learning_rate": 4.938832908491025e-06, + "loss": 0.2996, + "step": 4203 + }, + { + "epoch": 0.2, + "grad_norm": 0.714511239571989, + "learning_rate": 4.938791206446371e-06, + "loss": 0.2895, + "step": 4204 + }, + { + "epoch": 0.2, + "grad_norm": 0.6463083360692565, + "learning_rate": 4.938749490367084e-06, + "loss": 0.2961, + "step": 4205 + }, + { + "epoch": 0.2, + "grad_norm": 0.6390446162738385, + "learning_rate": 4.9387077602534086e-06, + "loss": 0.2809, + "step": 4206 + }, + { + "epoch": 0.2, + "grad_norm": 0.640700325661885, + "learning_rate": 4.938666016105582e-06, + "loss": 0.3241, + "step": 4207 + }, + { + "epoch": 0.2, + "grad_norm": 0.6811396571583231, + "learning_rate": 4.938624257923845e-06, + "loss": 0.3386, + "step": 4208 + }, + { + "epoch": 0.2, + "grad_norm": 0.6525280485529813, + "learning_rate": 4.93858248570844e-06, + "loss": 0.3091, + "step": 4209 + }, + { + "epoch": 0.2, + "grad_norm": 0.7522069788351038, + "learning_rate": 4.938540699459604e-06, + "loss": 0.3208, + "step": 4210 + }, + { + "epoch": 0.2, + "grad_norm": 0.6900292450306414, + "learning_rate": 4.93849889917758e-06, + "loss": 0.2915, + "step": 4211 + }, + { + "epoch": 0.2, + "grad_norm": 0.6018812986047284, + "learning_rate": 4.938457084862608e-06, + "loss": 0.2837, + "step": 4212 + }, + { + "epoch": 0.2, + "grad_norm": 0.6301841708731635, + "learning_rate": 4.938415256514928e-06, + "loss": 0.2985, + "step": 4213 + }, + { + "epoch": 0.2, + "grad_norm": 0.6514208801883623, + "learning_rate": 4.93837341413478e-06, + "loss": 0.2923, + "step": 4214 + }, + { + "epoch": 0.2, + "grad_norm": 0.7115874138527223, + "learning_rate": 4.938331557722408e-06, + "loss": 0.2956, + "step": 4215 + }, + { + "epoch": 0.2, + "grad_norm": 0.648007640600105, + "learning_rate": 4.93828968727805e-06, + "loss": 0.2988, + "step": 4216 + }, + { + "epoch": 0.2, + "grad_norm": 0.6693044262206057, + "learning_rate": 4.938247802801946e-06, + "loss": 0.3098, + "step": 4217 + }, + { + "epoch": 0.2, + "grad_norm": 0.6236046702322466, + "learning_rate": 4.938205904294341e-06, + "loss": 0.3007, + "step": 4218 + }, + { + "epoch": 0.2, + "grad_norm": 0.6052909277904271, + "learning_rate": 4.938163991755473e-06, + "loss": 0.308, + "step": 4219 + }, + { + "epoch": 0.2, + "grad_norm": 0.6464404675538252, + "learning_rate": 4.938122065185583e-06, + "loss": 0.3045, + "step": 4220 + }, + { + "epoch": 0.2, + "grad_norm": 0.7700065777683092, + "learning_rate": 4.938080124584915e-06, + "loss": 0.3202, + "step": 4221 + }, + { + "epoch": 0.2, + "grad_norm": 0.6481808862391903, + "learning_rate": 4.938038169953707e-06, + "loss": 0.3224, + "step": 4222 + }, + { + "epoch": 0.2, + "grad_norm": 0.6862569620187376, + "learning_rate": 4.9379962012922036e-06, + "loss": 0.3138, + "step": 4223 + }, + { + "epoch": 0.2, + "grad_norm": 0.6717599938904384, + "learning_rate": 4.937954218600644e-06, + "loss": 0.3255, + "step": 4224 + }, + { + "epoch": 0.2, + "grad_norm": 0.648705793150037, + "learning_rate": 4.937912221879271e-06, + "loss": 0.3125, + "step": 4225 + }, + { + "epoch": 0.2, + "grad_norm": 0.6691683273749116, + "learning_rate": 4.937870211128326e-06, + "loss": 0.3204, + "step": 4226 + }, + { + "epoch": 0.2, + "grad_norm": 0.6965100193691286, + "learning_rate": 4.93782818634805e-06, + "loss": 0.3097, + "step": 4227 + }, + { + "epoch": 0.2, + "grad_norm": 0.6718281773873149, + "learning_rate": 4.937786147538686e-06, + "loss": 0.3192, + "step": 4228 + }, + { + "epoch": 0.2, + "grad_norm": 0.7033105134331614, + "learning_rate": 4.937744094700475e-06, + "loss": 0.2934, + "step": 4229 + }, + { + "epoch": 0.2, + "grad_norm": 0.6154848805721683, + "learning_rate": 4.937702027833661e-06, + "loss": 0.3115, + "step": 4230 + }, + { + "epoch": 0.2, + "grad_norm": 0.6627520364900018, + "learning_rate": 4.937659946938483e-06, + "loss": 0.303, + "step": 4231 + }, + { + "epoch": 0.2, + "grad_norm": 0.6502166802747167, + "learning_rate": 4.9376178520151855e-06, + "loss": 0.2972, + "step": 4232 + }, + { + "epoch": 0.2, + "grad_norm": 0.6471718549238404, + "learning_rate": 4.937575743064009e-06, + "loss": 0.3057, + "step": 4233 + }, + { + "epoch": 0.2, + "grad_norm": 0.6774626212357046, + "learning_rate": 4.937533620085197e-06, + "loss": 0.3181, + "step": 4234 + }, + { + "epoch": 0.2, + "grad_norm": 0.6097926128515423, + "learning_rate": 4.937491483078992e-06, + "loss": 0.288, + "step": 4235 + }, + { + "epoch": 0.2, + "grad_norm": 0.6899936373527461, + "learning_rate": 4.937449332045637e-06, + "loss": 0.2985, + "step": 4236 + }, + { + "epoch": 0.2, + "grad_norm": 0.6289807159596459, + "learning_rate": 4.9374071669853715e-06, + "loss": 0.3074, + "step": 4237 + }, + { + "epoch": 0.2, + "grad_norm": 0.6646847546994074, + "learning_rate": 4.937364987898442e-06, + "loss": 0.2917, + "step": 4238 + }, + { + "epoch": 0.2, + "grad_norm": 0.6520855789095588, + "learning_rate": 4.937322794785089e-06, + "loss": 0.3139, + "step": 4239 + }, + { + "epoch": 0.2, + "grad_norm": 0.6964020707383516, + "learning_rate": 4.937280587645556e-06, + "loss": 0.3015, + "step": 4240 + }, + { + "epoch": 0.2, + "grad_norm": 0.6145329909973687, + "learning_rate": 4.937238366480087e-06, + "loss": 0.2692, + "step": 4241 + }, + { + "epoch": 0.2, + "grad_norm": 0.7335010393881785, + "learning_rate": 4.9371961312889225e-06, + "loss": 0.3077, + "step": 4242 + }, + { + "epoch": 0.2, + "grad_norm": 0.648134349938646, + "learning_rate": 4.937153882072306e-06, + "loss": 0.316, + "step": 4243 + }, + { + "epoch": 0.2, + "grad_norm": 0.6096587452435731, + "learning_rate": 4.937111618830484e-06, + "loss": 0.2828, + "step": 4244 + }, + { + "epoch": 0.2, + "grad_norm": 0.7224199714549407, + "learning_rate": 4.937069341563695e-06, + "loss": 0.3202, + "step": 4245 + }, + { + "epoch": 0.2, + "grad_norm": 0.6005261797467649, + "learning_rate": 4.937027050272185e-06, + "loss": 0.2929, + "step": 4246 + }, + { + "epoch": 0.2, + "grad_norm": 0.6520958092043545, + "learning_rate": 4.936984744956198e-06, + "loss": 0.3124, + "step": 4247 + }, + { + "epoch": 0.2, + "grad_norm": 0.6321068145886485, + "learning_rate": 4.936942425615974e-06, + "loss": 0.3049, + "step": 4248 + }, + { + "epoch": 0.2, + "grad_norm": 0.7070940965186946, + "learning_rate": 4.936900092251761e-06, + "loss": 0.3218, + "step": 4249 + }, + { + "epoch": 0.2, + "grad_norm": 0.6182150667429792, + "learning_rate": 4.9368577448638e-06, + "loss": 0.3126, + "step": 4250 + }, + { + "epoch": 0.2, + "grad_norm": 0.6210876028949713, + "learning_rate": 4.9368153834523346e-06, + "loss": 0.2846, + "step": 4251 + }, + { + "epoch": 0.2, + "grad_norm": 0.6268510638145587, + "learning_rate": 4.936773008017609e-06, + "loss": 0.3017, + "step": 4252 + }, + { + "epoch": 0.2, + "grad_norm": 0.6400644576262882, + "learning_rate": 4.936730618559868e-06, + "loss": 0.2867, + "step": 4253 + }, + { + "epoch": 0.2, + "grad_norm": 0.6210965123361114, + "learning_rate": 4.936688215079354e-06, + "loss": 0.2871, + "step": 4254 + }, + { + "epoch": 0.2, + "grad_norm": 0.6215456460570467, + "learning_rate": 4.936645797576312e-06, + "loss": 0.3095, + "step": 4255 + }, + { + "epoch": 0.2, + "grad_norm": 0.6140142101668649, + "learning_rate": 4.936603366050986e-06, + "loss": 0.3075, + "step": 4256 + }, + { + "epoch": 0.2, + "grad_norm": 0.676079888182186, + "learning_rate": 4.93656092050362e-06, + "loss": 0.3163, + "step": 4257 + }, + { + "epoch": 0.2, + "grad_norm": 0.6185334776853224, + "learning_rate": 4.936518460934458e-06, + "loss": 0.2873, + "step": 4258 + }, + { + "epoch": 0.2, + "grad_norm": 0.6476066929761994, + "learning_rate": 4.936475987343745e-06, + "loss": 0.2939, + "step": 4259 + }, + { + "epoch": 0.2, + "grad_norm": 0.5765860063937199, + "learning_rate": 4.936433499731725e-06, + "loss": 0.2717, + "step": 4260 + }, + { + "epoch": 0.2, + "grad_norm": 0.6573638339564868, + "learning_rate": 4.936390998098643e-06, + "loss": 0.3157, + "step": 4261 + }, + { + "epoch": 0.2, + "grad_norm": 0.6583512255495342, + "learning_rate": 4.936348482444743e-06, + "loss": 0.3182, + "step": 4262 + }, + { + "epoch": 0.2, + "grad_norm": 0.6852786605137863, + "learning_rate": 4.93630595277027e-06, + "loss": 0.3271, + "step": 4263 + }, + { + "epoch": 0.2, + "grad_norm": 0.7097016904409551, + "learning_rate": 4.9362634090754675e-06, + "loss": 0.3294, + "step": 4264 + }, + { + "epoch": 0.2, + "grad_norm": 0.6544302629921404, + "learning_rate": 4.9362208513605826e-06, + "loss": 0.3115, + "step": 4265 + }, + { + "epoch": 0.2, + "grad_norm": 0.6341540771294116, + "learning_rate": 4.936178279625858e-06, + "loss": 0.3021, + "step": 4266 + }, + { + "epoch": 0.2, + "grad_norm": 0.665930934217816, + "learning_rate": 4.93613569387154e-06, + "loss": 0.3001, + "step": 4267 + }, + { + "epoch": 0.2, + "grad_norm": 0.6363986860447337, + "learning_rate": 4.936093094097874e-06, + "loss": 0.3019, + "step": 4268 + }, + { + "epoch": 0.2, + "grad_norm": 0.6709033363841166, + "learning_rate": 4.936050480305104e-06, + "loss": 0.3182, + "step": 4269 + }, + { + "epoch": 0.2, + "grad_norm": 0.6679906433208914, + "learning_rate": 4.936007852493476e-06, + "loss": 0.3154, + "step": 4270 + }, + { + "epoch": 0.2, + "grad_norm": 0.6509844453559771, + "learning_rate": 4.935965210663235e-06, + "loss": 0.2903, + "step": 4271 + }, + { + "epoch": 0.2, + "grad_norm": 0.6340379520104161, + "learning_rate": 4.935922554814626e-06, + "loss": 0.3168, + "step": 4272 + }, + { + "epoch": 0.2, + "grad_norm": 0.6336042962469038, + "learning_rate": 4.935879884947896e-06, + "loss": 0.3091, + "step": 4273 + }, + { + "epoch": 0.2, + "grad_norm": 0.6650087733608286, + "learning_rate": 4.935837201063289e-06, + "loss": 0.3041, + "step": 4274 + }, + { + "epoch": 0.2, + "grad_norm": 0.601998793217193, + "learning_rate": 4.935794503161051e-06, + "loss": 0.3105, + "step": 4275 + }, + { + "epoch": 0.2, + "grad_norm": 0.6712022286108309, + "learning_rate": 4.935751791241428e-06, + "loss": 0.3073, + "step": 4276 + }, + { + "epoch": 0.2, + "grad_norm": 0.7362383427964769, + "learning_rate": 4.935709065304665e-06, + "loss": 0.3033, + "step": 4277 + }, + { + "epoch": 0.2, + "grad_norm": 0.7280973867956524, + "learning_rate": 4.935666325351009e-06, + "loss": 0.3063, + "step": 4278 + }, + { + "epoch": 0.2, + "grad_norm": 0.6847463226263703, + "learning_rate": 4.935623571380706e-06, + "loss": 0.2978, + "step": 4279 + }, + { + "epoch": 0.2, + "grad_norm": 0.6314115674331612, + "learning_rate": 4.935580803394001e-06, + "loss": 0.3029, + "step": 4280 + }, + { + "epoch": 0.2, + "grad_norm": 0.6555497992104744, + "learning_rate": 4.9355380213911405e-06, + "loss": 0.3104, + "step": 4281 + }, + { + "epoch": 0.2, + "grad_norm": 0.6691274414674875, + "learning_rate": 4.935495225372371e-06, + "loss": 0.308, + "step": 4282 + }, + { + "epoch": 0.2, + "grad_norm": 0.6299097070089956, + "learning_rate": 4.935452415337939e-06, + "loss": 0.2733, + "step": 4283 + }, + { + "epoch": 0.2, + "grad_norm": 0.6631587851003573, + "learning_rate": 4.935409591288089e-06, + "loss": 0.3091, + "step": 4284 + }, + { + "epoch": 0.2, + "grad_norm": 0.641360597843259, + "learning_rate": 4.9353667532230706e-06, + "loss": 0.3071, + "step": 4285 + }, + { + "epoch": 0.2, + "grad_norm": 0.6666902231627266, + "learning_rate": 4.9353239011431284e-06, + "loss": 0.3198, + "step": 4286 + }, + { + "epoch": 0.2, + "grad_norm": 0.649669625049403, + "learning_rate": 4.9352810350485095e-06, + "loss": 0.3106, + "step": 4287 + }, + { + "epoch": 0.2, + "grad_norm": 0.7043698642390054, + "learning_rate": 4.935238154939459e-06, + "loss": 0.2923, + "step": 4288 + }, + { + "epoch": 0.2, + "grad_norm": 0.6352589719774874, + "learning_rate": 4.9351952608162255e-06, + "loss": 0.2977, + "step": 4289 + }, + { + "epoch": 0.2, + "grad_norm": 0.6398878044084061, + "learning_rate": 4.935152352679056e-06, + "loss": 0.2974, + "step": 4290 + }, + { + "epoch": 0.2, + "grad_norm": 0.6477792016933916, + "learning_rate": 4.935109430528196e-06, + "loss": 0.3075, + "step": 4291 + }, + { + "epoch": 0.2, + "grad_norm": 0.6410940089209384, + "learning_rate": 4.935066494363894e-06, + "loss": 0.2888, + "step": 4292 + }, + { + "epoch": 0.2, + "grad_norm": 0.7148361094975859, + "learning_rate": 4.9350235441863956e-06, + "loss": 0.318, + "step": 4293 + }, + { + "epoch": 0.2, + "grad_norm": 0.6347225727643427, + "learning_rate": 4.934980579995949e-06, + "loss": 0.2926, + "step": 4294 + }, + { + "epoch": 0.2, + "grad_norm": 0.6892951968695588, + "learning_rate": 4.934937601792802e-06, + "loss": 0.3238, + "step": 4295 + }, + { + "epoch": 0.2, + "grad_norm": 0.6527091990778614, + "learning_rate": 4.9348946095772e-06, + "loss": 0.3065, + "step": 4296 + }, + { + "epoch": 0.2, + "grad_norm": 0.7012321540965558, + "learning_rate": 4.9348516033493925e-06, + "loss": 0.3057, + "step": 4297 + }, + { + "epoch": 0.2, + "grad_norm": 0.6894088031386522, + "learning_rate": 4.934808583109625e-06, + "loss": 0.3153, + "step": 4298 + }, + { + "epoch": 0.2, + "grad_norm": 0.6693994931869798, + "learning_rate": 4.934765548858146e-06, + "loss": 0.3047, + "step": 4299 + }, + { + "epoch": 0.2, + "grad_norm": 0.6618855580090117, + "learning_rate": 4.9347225005952035e-06, + "loss": 0.3049, + "step": 4300 + }, + { + "epoch": 0.2, + "grad_norm": 0.6578674867507429, + "learning_rate": 4.934679438321045e-06, + "loss": 0.3034, + "step": 4301 + }, + { + "epoch": 0.2, + "grad_norm": 0.6897468086786607, + "learning_rate": 4.934636362035918e-06, + "loss": 0.2852, + "step": 4302 + }, + { + "epoch": 0.2, + "grad_norm": 0.6352331414215208, + "learning_rate": 4.934593271740072e-06, + "loss": 0.303, + "step": 4303 + }, + { + "epoch": 0.2, + "grad_norm": 0.6742238291468476, + "learning_rate": 4.934550167433752e-06, + "loss": 0.2927, + "step": 4304 + }, + { + "epoch": 0.2, + "grad_norm": 0.6446578312710344, + "learning_rate": 4.934507049117209e-06, + "loss": 0.3101, + "step": 4305 + }, + { + "epoch": 0.2, + "grad_norm": 0.6922774281213533, + "learning_rate": 4.934463916790689e-06, + "loss": 0.3208, + "step": 4306 + }, + { + "epoch": 0.2, + "grad_norm": 0.6321329509569132, + "learning_rate": 4.934420770454441e-06, + "loss": 0.3063, + "step": 4307 + }, + { + "epoch": 0.2, + "grad_norm": 0.6611684478892351, + "learning_rate": 4.934377610108714e-06, + "loss": 0.2923, + "step": 4308 + }, + { + "epoch": 0.2, + "grad_norm": 0.6671874437356626, + "learning_rate": 4.934334435753755e-06, + "loss": 0.292, + "step": 4309 + }, + { + "epoch": 0.2, + "grad_norm": 0.6998096488071737, + "learning_rate": 4.9342912473898135e-06, + "loss": 0.3273, + "step": 4310 + }, + { + "epoch": 0.2, + "grad_norm": 0.6016346782623055, + "learning_rate": 4.934248045017138e-06, + "loss": 0.2787, + "step": 4311 + }, + { + "epoch": 0.2, + "grad_norm": 0.6447519245731846, + "learning_rate": 4.934204828635976e-06, + "loss": 0.2849, + "step": 4312 + }, + { + "epoch": 0.2, + "grad_norm": 0.5528309302916804, + "learning_rate": 4.934161598246577e-06, + "loss": 0.2837, + "step": 4313 + }, + { + "epoch": 0.2, + "grad_norm": 0.6686181747594405, + "learning_rate": 4.934118353849191e-06, + "loss": 0.2933, + "step": 4314 + }, + { + "epoch": 0.2, + "grad_norm": 0.7262005217155555, + "learning_rate": 4.934075095444065e-06, + "loss": 0.319, + "step": 4315 + }, + { + "epoch": 0.2, + "grad_norm": 0.6937718567223268, + "learning_rate": 4.934031823031449e-06, + "loss": 0.3086, + "step": 4316 + }, + { + "epoch": 0.2, + "grad_norm": 0.703581078261803, + "learning_rate": 4.9339885366115904e-06, + "loss": 0.3079, + "step": 4317 + }, + { + "epoch": 0.2, + "grad_norm": 0.6588978152159801, + "learning_rate": 4.933945236184741e-06, + "loss": 0.2843, + "step": 4318 + }, + { + "epoch": 0.2, + "grad_norm": 0.6607709533235281, + "learning_rate": 4.933901921751147e-06, + "loss": 0.2931, + "step": 4319 + }, + { + "epoch": 0.2, + "grad_norm": 0.5822221331362756, + "learning_rate": 4.9338585933110605e-06, + "loss": 0.2778, + "step": 4320 + }, + { + "epoch": 0.2, + "grad_norm": 0.6651306742319352, + "learning_rate": 4.933815250864729e-06, + "loss": 0.2921, + "step": 4321 + }, + { + "epoch": 0.2, + "grad_norm": 0.6675527310614991, + "learning_rate": 4.9337718944124025e-06, + "loss": 0.3205, + "step": 4322 + }, + { + "epoch": 0.2, + "grad_norm": 0.6011851120159789, + "learning_rate": 4.933728523954331e-06, + "loss": 0.2845, + "step": 4323 + }, + { + "epoch": 0.2, + "grad_norm": 0.6778153777785397, + "learning_rate": 4.933685139490763e-06, + "loss": 0.2989, + "step": 4324 + }, + { + "epoch": 0.2, + "grad_norm": 0.6768453511173526, + "learning_rate": 4.9336417410219485e-06, + "loss": 0.3053, + "step": 4325 + }, + { + "epoch": 0.2, + "grad_norm": 0.6175548281535788, + "learning_rate": 4.933598328548137e-06, + "loss": 0.2949, + "step": 4326 + }, + { + "epoch": 0.2, + "grad_norm": 0.6846240497408438, + "learning_rate": 4.933554902069579e-06, + "loss": 0.3117, + "step": 4327 + }, + { + "epoch": 0.2, + "grad_norm": 0.6576300512447794, + "learning_rate": 4.933511461586526e-06, + "loss": 0.2986, + "step": 4328 + }, + { + "epoch": 0.2, + "grad_norm": 0.6509301857799977, + "learning_rate": 4.933468007099224e-06, + "loss": 0.2964, + "step": 4329 + }, + { + "epoch": 0.2, + "grad_norm": 0.5964385112414027, + "learning_rate": 4.933424538607926e-06, + "loss": 0.2794, + "step": 4330 + }, + { + "epoch": 0.2, + "grad_norm": 0.6630108831114617, + "learning_rate": 4.9333810561128815e-06, + "loss": 0.3104, + "step": 4331 + }, + { + "epoch": 0.2, + "grad_norm": 0.669131980613362, + "learning_rate": 4.9333375596143405e-06, + "loss": 0.3018, + "step": 4332 + }, + { + "epoch": 0.2, + "grad_norm": 0.6795069126437588, + "learning_rate": 4.9332940491125535e-06, + "loss": 0.31, + "step": 4333 + }, + { + "epoch": 0.2, + "grad_norm": 0.6711854735388263, + "learning_rate": 4.933250524607771e-06, + "loss": 0.299, + "step": 4334 + }, + { + "epoch": 0.2, + "grad_norm": 0.6017992648874781, + "learning_rate": 4.933206986100243e-06, + "loss": 0.2868, + "step": 4335 + }, + { + "epoch": 0.2, + "grad_norm": 0.6382601199330635, + "learning_rate": 4.933163433590221e-06, + "loss": 0.3012, + "step": 4336 + }, + { + "epoch": 0.2, + "grad_norm": 0.6378885907446955, + "learning_rate": 4.9331198670779546e-06, + "loss": 0.2973, + "step": 4337 + }, + { + "epoch": 0.2, + "grad_norm": 0.6874540018486377, + "learning_rate": 4.9330762865636945e-06, + "loss": 0.3182, + "step": 4338 + }, + { + "epoch": 0.2, + "grad_norm": 0.6357871285267934, + "learning_rate": 4.933032692047693e-06, + "loss": 0.2887, + "step": 4339 + }, + { + "epoch": 0.2, + "grad_norm": 0.6581891271192593, + "learning_rate": 4.932989083530199e-06, + "loss": 0.3005, + "step": 4340 + }, + { + "epoch": 0.2, + "grad_norm": 0.683041419882167, + "learning_rate": 4.932945461011463e-06, + "loss": 0.3121, + "step": 4341 + }, + { + "epoch": 0.2, + "grad_norm": 0.615158416267059, + "learning_rate": 4.9329018244917396e-06, + "loss": 0.3006, + "step": 4342 + }, + { + "epoch": 0.2, + "grad_norm": 0.6597761242894131, + "learning_rate": 4.932858173971277e-06, + "loss": 0.2962, + "step": 4343 + }, + { + "epoch": 0.2, + "grad_norm": 0.6604315642992126, + "learning_rate": 4.932814509450326e-06, + "loss": 0.3142, + "step": 4344 + }, + { + "epoch": 0.2, + "grad_norm": 0.6295785761068077, + "learning_rate": 4.932770830929141e-06, + "loss": 0.317, + "step": 4345 + }, + { + "epoch": 0.2, + "grad_norm": 0.660444979914469, + "learning_rate": 4.93272713840797e-06, + "loss": 0.3044, + "step": 4346 + }, + { + "epoch": 0.2, + "grad_norm": 0.6613703074673201, + "learning_rate": 4.932683431887066e-06, + "loss": 0.3222, + "step": 4347 + }, + { + "epoch": 0.2, + "grad_norm": 0.6178765630912544, + "learning_rate": 4.93263971136668e-06, + "loss": 0.3111, + "step": 4348 + }, + { + "epoch": 0.2, + "grad_norm": 0.6573796331288045, + "learning_rate": 4.932595976847064e-06, + "loss": 0.3013, + "step": 4349 + }, + { + "epoch": 0.2, + "grad_norm": 0.6368525809269094, + "learning_rate": 4.93255222832847e-06, + "loss": 0.3234, + "step": 4350 + }, + { + "epoch": 0.2, + "grad_norm": 0.6468961769732541, + "learning_rate": 4.9325084658111496e-06, + "loss": 0.3307, + "step": 4351 + }, + { + "epoch": 0.2, + "grad_norm": 0.612039689583627, + "learning_rate": 4.9324646892953535e-06, + "loss": 0.3144, + "step": 4352 + }, + { + "epoch": 0.2, + "grad_norm": 0.6795752143181633, + "learning_rate": 4.932420898781335e-06, + "loss": 0.3299, + "step": 4353 + }, + { + "epoch": 0.2, + "grad_norm": 0.6806070020175328, + "learning_rate": 4.932377094269345e-06, + "loss": 0.3034, + "step": 4354 + }, + { + "epoch": 0.2, + "grad_norm": 0.643790415820373, + "learning_rate": 4.932333275759637e-06, + "loss": 0.3298, + "step": 4355 + }, + { + "epoch": 0.2, + "grad_norm": 0.5825798412382219, + "learning_rate": 4.932289443252462e-06, + "loss": 0.2803, + "step": 4356 + }, + { + "epoch": 0.2, + "grad_norm": 0.6556234037568299, + "learning_rate": 4.932245596748072e-06, + "loss": 0.3223, + "step": 4357 + }, + { + "epoch": 0.2, + "grad_norm": 0.6268513723800507, + "learning_rate": 4.9322017362467216e-06, + "loss": 0.3184, + "step": 4358 + }, + { + "epoch": 0.2, + "grad_norm": 0.6915362514299522, + "learning_rate": 4.93215786174866e-06, + "loss": 0.3161, + "step": 4359 + }, + { + "epoch": 0.2, + "grad_norm": 0.6568896258805925, + "learning_rate": 4.932113973254142e-06, + "loss": 0.3178, + "step": 4360 + }, + { + "epoch": 0.2, + "grad_norm": 0.6594634073413123, + "learning_rate": 4.932070070763419e-06, + "loss": 0.3203, + "step": 4361 + }, + { + "epoch": 0.2, + "grad_norm": 0.6406446047773646, + "learning_rate": 4.932026154276744e-06, + "loss": 0.2986, + "step": 4362 + }, + { + "epoch": 0.2, + "grad_norm": 0.7115189435938546, + "learning_rate": 4.931982223794369e-06, + "loss": 0.3101, + "step": 4363 + }, + { + "epoch": 0.2, + "grad_norm": 0.6122516716044054, + "learning_rate": 4.931938279316548e-06, + "loss": 0.2872, + "step": 4364 + }, + { + "epoch": 0.2, + "grad_norm": 0.5998832480046006, + "learning_rate": 4.931894320843534e-06, + "loss": 0.2868, + "step": 4365 + }, + { + "epoch": 0.2, + "grad_norm": 0.7039044957513444, + "learning_rate": 4.931850348375579e-06, + "loss": 0.3251, + "step": 4366 + }, + { + "epoch": 0.2, + "grad_norm": 0.6528245114186524, + "learning_rate": 4.931806361912936e-06, + "loss": 0.2968, + "step": 4367 + }, + { + "epoch": 0.2, + "grad_norm": 0.7193364783647047, + "learning_rate": 4.93176236145586e-06, + "loss": 0.311, + "step": 4368 + }, + { + "epoch": 0.2, + "grad_norm": 0.6753859640023959, + "learning_rate": 4.931718347004601e-06, + "loss": 0.3144, + "step": 4369 + }, + { + "epoch": 0.2, + "grad_norm": 0.6390304658764332, + "learning_rate": 4.931674318559416e-06, + "loss": 0.3069, + "step": 4370 + }, + { + "epoch": 0.2, + "grad_norm": 0.6280134191099735, + "learning_rate": 4.931630276120555e-06, + "loss": 0.3093, + "step": 4371 + }, + { + "epoch": 0.2, + "grad_norm": 0.6128861235023645, + "learning_rate": 4.931586219688273e-06, + "loss": 0.2884, + "step": 4372 + }, + { + "epoch": 0.2, + "grad_norm": 0.6143911844724484, + "learning_rate": 4.931542149262825e-06, + "loss": 0.3052, + "step": 4373 + }, + { + "epoch": 0.2, + "grad_norm": 0.5682666496388501, + "learning_rate": 4.931498064844462e-06, + "loss": 0.277, + "step": 4374 + }, + { + "epoch": 0.2, + "grad_norm": 0.6468193802584029, + "learning_rate": 4.931453966433439e-06, + "loss": 0.2822, + "step": 4375 + }, + { + "epoch": 0.2, + "grad_norm": 0.6563894498557792, + "learning_rate": 4.931409854030009e-06, + "loss": 0.2879, + "step": 4376 + }, + { + "epoch": 0.21, + "grad_norm": 0.6480233117043074, + "learning_rate": 4.931365727634427e-06, + "loss": 0.2934, + "step": 4377 + }, + { + "epoch": 0.21, + "grad_norm": 0.6823230781342008, + "learning_rate": 4.931321587246946e-06, + "loss": 0.3112, + "step": 4378 + }, + { + "epoch": 0.21, + "grad_norm": 0.6021990811435083, + "learning_rate": 4.93127743286782e-06, + "loss": 0.2743, + "step": 4379 + }, + { + "epoch": 0.21, + "grad_norm": 0.6912471384950183, + "learning_rate": 4.931233264497304e-06, + "loss": 0.3332, + "step": 4380 + }, + { + "epoch": 0.21, + "grad_norm": 0.6257025316921443, + "learning_rate": 4.931189082135652e-06, + "loss": 0.3076, + "step": 4381 + }, + { + "epoch": 0.21, + "grad_norm": 0.6474523081666915, + "learning_rate": 4.931144885783118e-06, + "loss": 0.3125, + "step": 4382 + }, + { + "epoch": 0.21, + "grad_norm": 0.6828415420776818, + "learning_rate": 4.931100675439955e-06, + "loss": 0.3158, + "step": 4383 + }, + { + "epoch": 0.21, + "grad_norm": 0.6372818457864952, + "learning_rate": 4.931056451106419e-06, + "loss": 0.3106, + "step": 4384 + }, + { + "epoch": 0.21, + "grad_norm": 0.6331789867427792, + "learning_rate": 4.931012212782765e-06, + "loss": 0.2957, + "step": 4385 + }, + { + "epoch": 0.21, + "grad_norm": 0.6697899950834043, + "learning_rate": 4.930967960469246e-06, + "loss": 0.2998, + "step": 4386 + }, + { + "epoch": 0.21, + "grad_norm": 0.6025667506528898, + "learning_rate": 4.930923694166118e-06, + "loss": 0.3073, + "step": 4387 + }, + { + "epoch": 0.21, + "grad_norm": 0.6742174389564954, + "learning_rate": 4.9308794138736334e-06, + "loss": 0.3122, + "step": 4388 + }, + { + "epoch": 0.21, + "grad_norm": 0.6576707909338951, + "learning_rate": 4.930835119592051e-06, + "loss": 0.305, + "step": 4389 + }, + { + "epoch": 0.21, + "grad_norm": 0.6324061788113203, + "learning_rate": 4.930790811321622e-06, + "loss": 0.3126, + "step": 4390 + }, + { + "epoch": 0.21, + "grad_norm": 0.634493264222439, + "learning_rate": 4.930746489062603e-06, + "loss": 0.3026, + "step": 4391 + }, + { + "epoch": 0.21, + "grad_norm": 0.712460107557727, + "learning_rate": 4.93070215281525e-06, + "loss": 0.2991, + "step": 4392 + }, + { + "epoch": 0.21, + "grad_norm": 0.6785092656318267, + "learning_rate": 4.930657802579815e-06, + "loss": 0.3203, + "step": 4393 + }, + { + "epoch": 0.21, + "grad_norm": 0.6541306207848187, + "learning_rate": 4.930613438356557e-06, + "loss": 0.3151, + "step": 4394 + }, + { + "epoch": 0.21, + "grad_norm": 0.7098397373937927, + "learning_rate": 4.93056906014573e-06, + "loss": 0.3038, + "step": 4395 + }, + { + "epoch": 0.21, + "grad_norm": 0.6920522490799274, + "learning_rate": 4.930524667947588e-06, + "loss": 0.2783, + "step": 4396 + }, + { + "epoch": 0.21, + "grad_norm": 0.5741431704404987, + "learning_rate": 4.930480261762387e-06, + "loss": 0.282, + "step": 4397 + }, + { + "epoch": 0.21, + "grad_norm": 0.7079597413364511, + "learning_rate": 4.930435841590384e-06, + "loss": 0.3228, + "step": 4398 + }, + { + "epoch": 0.21, + "grad_norm": 0.6402726030647493, + "learning_rate": 4.930391407431833e-06, + "loss": 0.3026, + "step": 4399 + }, + { + "epoch": 0.21, + "grad_norm": 0.8252825746403726, + "learning_rate": 4.93034695928699e-06, + "loss": 0.2685, + "step": 4400 + }, + { + "epoch": 0.21, + "grad_norm": 0.6295060935234387, + "learning_rate": 4.930302497156112e-06, + "loss": 0.3005, + "step": 4401 + }, + { + "epoch": 0.21, + "grad_norm": 0.7254474397868759, + "learning_rate": 4.930258021039453e-06, + "loss": 0.3122, + "step": 4402 + }, + { + "epoch": 0.21, + "grad_norm": 0.7268561303730519, + "learning_rate": 4.93021353093727e-06, + "loss": 0.2984, + "step": 4403 + }, + { + "epoch": 0.21, + "grad_norm": 0.6628869509709043, + "learning_rate": 4.9301690268498204e-06, + "loss": 0.3071, + "step": 4404 + }, + { + "epoch": 0.21, + "grad_norm": 0.6601639569747484, + "learning_rate": 4.930124508777358e-06, + "loss": 0.3022, + "step": 4405 + }, + { + "epoch": 0.21, + "grad_norm": 0.6759740294031514, + "learning_rate": 4.930079976720139e-06, + "loss": 0.3074, + "step": 4406 + }, + { + "epoch": 0.21, + "grad_norm": 0.626459115859232, + "learning_rate": 4.930035430678421e-06, + "loss": 0.2946, + "step": 4407 + }, + { + "epoch": 0.21, + "grad_norm": 0.6304846915344932, + "learning_rate": 4.9299908706524605e-06, + "loss": 0.2978, + "step": 4408 + }, + { + "epoch": 0.21, + "grad_norm": 0.6705556251737627, + "learning_rate": 4.929946296642512e-06, + "loss": 0.2987, + "step": 4409 + }, + { + "epoch": 0.21, + "grad_norm": 0.7431844741784033, + "learning_rate": 4.929901708648835e-06, + "loss": 0.3362, + "step": 4410 + }, + { + "epoch": 0.21, + "grad_norm": 0.6830608007459501, + "learning_rate": 4.929857106671683e-06, + "loss": 0.2812, + "step": 4411 + }, + { + "epoch": 0.21, + "grad_norm": 0.6668876276981174, + "learning_rate": 4.9298124907113145e-06, + "loss": 0.2931, + "step": 4412 + }, + { + "epoch": 0.21, + "grad_norm": 0.6561357460471147, + "learning_rate": 4.929767860767986e-06, + "loss": 0.3066, + "step": 4413 + }, + { + "epoch": 0.21, + "grad_norm": 0.6449691766738963, + "learning_rate": 4.929723216841954e-06, + "loss": 0.3068, + "step": 4414 + }, + { + "epoch": 0.21, + "grad_norm": 0.6919868661632343, + "learning_rate": 4.929678558933475e-06, + "loss": 0.3409, + "step": 4415 + }, + { + "epoch": 0.21, + "grad_norm": 0.6773628143060986, + "learning_rate": 4.929633887042807e-06, + "loss": 0.3266, + "step": 4416 + }, + { + "epoch": 0.21, + "grad_norm": 0.6841370073597205, + "learning_rate": 4.929589201170207e-06, + "loss": 0.3138, + "step": 4417 + }, + { + "epoch": 0.21, + "grad_norm": 0.6202297390210262, + "learning_rate": 4.929544501315932e-06, + "loss": 0.2953, + "step": 4418 + }, + { + "epoch": 0.21, + "grad_norm": 0.6078126282703337, + "learning_rate": 4.929499787480238e-06, + "loss": 0.3, + "step": 4419 + }, + { + "epoch": 0.21, + "grad_norm": 0.6851338317533511, + "learning_rate": 4.929455059663384e-06, + "loss": 0.3036, + "step": 4420 + }, + { + "epoch": 0.21, + "grad_norm": 0.6948121370102972, + "learning_rate": 4.929410317865627e-06, + "loss": 0.2921, + "step": 4421 + }, + { + "epoch": 0.21, + "grad_norm": 0.6868579488580818, + "learning_rate": 4.929365562087224e-06, + "loss": 0.3249, + "step": 4422 + }, + { + "epoch": 0.21, + "grad_norm": 0.6568429875314463, + "learning_rate": 4.929320792328433e-06, + "loss": 0.3092, + "step": 4423 + }, + { + "epoch": 0.21, + "grad_norm": 0.6253498682143068, + "learning_rate": 4.929276008589511e-06, + "loss": 0.3026, + "step": 4424 + }, + { + "epoch": 0.21, + "grad_norm": 0.6272973777818328, + "learning_rate": 4.9292312108707165e-06, + "loss": 0.2947, + "step": 4425 + }, + { + "epoch": 0.21, + "grad_norm": 0.6207120772508524, + "learning_rate": 4.9291863991723065e-06, + "loss": 0.2741, + "step": 4426 + }, + { + "epoch": 0.21, + "grad_norm": 0.6874198374016196, + "learning_rate": 4.92914157349454e-06, + "loss": 0.3076, + "step": 4427 + }, + { + "epoch": 0.21, + "grad_norm": 0.655256644773478, + "learning_rate": 4.929096733837674e-06, + "loss": 0.2934, + "step": 4428 + }, + { + "epoch": 0.21, + "grad_norm": 0.6572135392311848, + "learning_rate": 4.929051880201967e-06, + "loss": 0.279, + "step": 4429 + }, + { + "epoch": 0.21, + "grad_norm": 0.7129297937691496, + "learning_rate": 4.929007012587677e-06, + "loss": 0.2851, + "step": 4430 + }, + { + "epoch": 0.21, + "grad_norm": 0.6917709469123174, + "learning_rate": 4.928962130995061e-06, + "loss": 0.3244, + "step": 4431 + }, + { + "epoch": 0.21, + "grad_norm": 0.6269496782451589, + "learning_rate": 4.92891723542438e-06, + "loss": 0.2949, + "step": 4432 + }, + { + "epoch": 0.21, + "grad_norm": 0.6574525932087604, + "learning_rate": 4.9288723258758895e-06, + "loss": 0.3112, + "step": 4433 + }, + { + "epoch": 0.21, + "grad_norm": 0.6461060323214031, + "learning_rate": 4.92882740234985e-06, + "loss": 0.2973, + "step": 4434 + }, + { + "epoch": 0.21, + "grad_norm": 0.6345106910766878, + "learning_rate": 4.928782464846519e-06, + "loss": 0.3017, + "step": 4435 + }, + { + "epoch": 0.21, + "grad_norm": 0.654442776190022, + "learning_rate": 4.928737513366155e-06, + "loss": 0.3112, + "step": 4436 + }, + { + "epoch": 0.21, + "grad_norm": 0.7041201841494911, + "learning_rate": 4.928692547909017e-06, + "loss": 0.3305, + "step": 4437 + }, + { + "epoch": 0.21, + "grad_norm": 0.677295288836208, + "learning_rate": 4.928647568475365e-06, + "loss": 0.3134, + "step": 4438 + }, + { + "epoch": 0.21, + "grad_norm": 0.6756816478974931, + "learning_rate": 4.928602575065456e-06, + "loss": 0.3247, + "step": 4439 + }, + { + "epoch": 0.21, + "grad_norm": 0.5750927571006492, + "learning_rate": 4.92855756767955e-06, + "loss": 0.2922, + "step": 4440 + }, + { + "epoch": 0.21, + "grad_norm": 0.6680857082370398, + "learning_rate": 4.928512546317905e-06, + "loss": 0.2862, + "step": 4441 + }, + { + "epoch": 0.21, + "grad_norm": 0.6631457489663308, + "learning_rate": 4.928467510980781e-06, + "loss": 0.3053, + "step": 4442 + }, + { + "epoch": 0.21, + "grad_norm": 0.6295343254273811, + "learning_rate": 4.928422461668436e-06, + "loss": 0.2911, + "step": 4443 + }, + { + "epoch": 0.21, + "grad_norm": 0.6898474578139467, + "learning_rate": 4.9283773983811314e-06, + "loss": 0.327, + "step": 4444 + }, + { + "epoch": 0.21, + "grad_norm": 0.5922350089255672, + "learning_rate": 4.928332321119124e-06, + "loss": 0.3205, + "step": 4445 + }, + { + "epoch": 0.21, + "grad_norm": 0.6687926857005492, + "learning_rate": 4.928287229882675e-06, + "loss": 0.3296, + "step": 4446 + }, + { + "epoch": 0.21, + "grad_norm": 0.6624646143427426, + "learning_rate": 4.928242124672043e-06, + "loss": 0.3025, + "step": 4447 + }, + { + "epoch": 0.21, + "grad_norm": 0.6194349800932485, + "learning_rate": 4.928197005487489e-06, + "loss": 0.2947, + "step": 4448 + }, + { + "epoch": 0.21, + "grad_norm": 0.640037110265729, + "learning_rate": 4.928151872329271e-06, + "loss": 0.2852, + "step": 4449 + }, + { + "epoch": 0.21, + "grad_norm": 0.6950066445005875, + "learning_rate": 4.928106725197649e-06, + "loss": 0.3252, + "step": 4450 + }, + { + "epoch": 0.21, + "grad_norm": 0.6571754894137806, + "learning_rate": 4.928061564092883e-06, + "loss": 0.3081, + "step": 4451 + }, + { + "epoch": 0.21, + "grad_norm": 0.6539331765954127, + "learning_rate": 4.928016389015232e-06, + "loss": 0.3103, + "step": 4452 + }, + { + "epoch": 0.21, + "grad_norm": 0.6910927340459722, + "learning_rate": 4.927971199964959e-06, + "loss": 0.319, + "step": 4453 + }, + { + "epoch": 0.21, + "grad_norm": 0.7113065225121853, + "learning_rate": 4.927925996942321e-06, + "loss": 0.3222, + "step": 4454 + }, + { + "epoch": 0.21, + "grad_norm": 0.7013536565245575, + "learning_rate": 4.927880779947579e-06, + "loss": 0.3338, + "step": 4455 + }, + { + "epoch": 0.21, + "grad_norm": 0.6880328643141408, + "learning_rate": 4.927835548980994e-06, + "loss": 0.3276, + "step": 4456 + }, + { + "epoch": 0.21, + "grad_norm": 0.6370444429649221, + "learning_rate": 4.927790304042824e-06, + "loss": 0.3032, + "step": 4457 + }, + { + "epoch": 0.21, + "grad_norm": 0.6309204922905849, + "learning_rate": 4.927745045133332e-06, + "loss": 0.3005, + "step": 4458 + }, + { + "epoch": 0.21, + "grad_norm": 0.6654617228772759, + "learning_rate": 4.927699772252778e-06, + "loss": 0.3114, + "step": 4459 + }, + { + "epoch": 0.21, + "grad_norm": 0.6607557902570279, + "learning_rate": 4.927654485401422e-06, + "loss": 0.2692, + "step": 4460 + }, + { + "epoch": 0.21, + "grad_norm": 0.6185907336730514, + "learning_rate": 4.927609184579523e-06, + "loss": 0.2901, + "step": 4461 + }, + { + "epoch": 0.21, + "grad_norm": 0.633708036941045, + "learning_rate": 4.927563869787345e-06, + "loss": 0.3129, + "step": 4462 + }, + { + "epoch": 0.21, + "grad_norm": 0.6472463406987137, + "learning_rate": 4.927518541025147e-06, + "loss": 0.3132, + "step": 4463 + }, + { + "epoch": 0.21, + "grad_norm": 0.7138312820839836, + "learning_rate": 4.927473198293189e-06, + "loss": 0.332, + "step": 4464 + }, + { + "epoch": 0.21, + "grad_norm": 0.667618595288727, + "learning_rate": 4.927427841591734e-06, + "loss": 0.3116, + "step": 4465 + }, + { + "epoch": 0.21, + "grad_norm": 0.719104575802436, + "learning_rate": 4.9273824709210405e-06, + "loss": 0.2935, + "step": 4466 + }, + { + "epoch": 0.21, + "grad_norm": 0.6244483217224905, + "learning_rate": 4.927337086281372e-06, + "loss": 0.2993, + "step": 4467 + }, + { + "epoch": 0.21, + "grad_norm": 0.6439622988822955, + "learning_rate": 4.927291687672988e-06, + "loss": 0.3275, + "step": 4468 + }, + { + "epoch": 0.21, + "grad_norm": 0.636133959934106, + "learning_rate": 4.927246275096151e-06, + "loss": 0.2748, + "step": 4469 + }, + { + "epoch": 0.21, + "grad_norm": 0.6475157737027254, + "learning_rate": 4.927200848551122e-06, + "loss": 0.3154, + "step": 4470 + }, + { + "epoch": 0.21, + "grad_norm": 0.6989385465904451, + "learning_rate": 4.927155408038161e-06, + "loss": 0.3166, + "step": 4471 + }, + { + "epoch": 0.21, + "grad_norm": 0.7245177077101411, + "learning_rate": 4.927109953557532e-06, + "loss": 0.3173, + "step": 4472 + }, + { + "epoch": 0.21, + "grad_norm": 0.5764487353828057, + "learning_rate": 4.927064485109494e-06, + "loss": 0.2742, + "step": 4473 + }, + { + "epoch": 0.21, + "grad_norm": 0.6106967927749954, + "learning_rate": 4.927019002694311e-06, + "loss": 0.2988, + "step": 4474 + }, + { + "epoch": 0.21, + "grad_norm": 0.649039895063909, + "learning_rate": 4.9269735063122424e-06, + "loss": 0.3172, + "step": 4475 + }, + { + "epoch": 0.21, + "grad_norm": 0.6976299101070069, + "learning_rate": 4.926927995963553e-06, + "loss": 0.3104, + "step": 4476 + }, + { + "epoch": 0.21, + "grad_norm": 0.6419245298869971, + "learning_rate": 4.926882471648502e-06, + "loss": 0.305, + "step": 4477 + }, + { + "epoch": 0.21, + "grad_norm": 0.6525870031717338, + "learning_rate": 4.9268369333673514e-06, + "loss": 0.3049, + "step": 4478 + }, + { + "epoch": 0.21, + "grad_norm": 0.741861053350525, + "learning_rate": 4.926791381120366e-06, + "loss": 0.3145, + "step": 4479 + }, + { + "epoch": 0.21, + "grad_norm": 0.6298175088796799, + "learning_rate": 4.926745814907805e-06, + "loss": 0.3031, + "step": 4480 + }, + { + "epoch": 0.21, + "grad_norm": 0.6686664424020168, + "learning_rate": 4.926700234729932e-06, + "loss": 0.3053, + "step": 4481 + }, + { + "epoch": 0.21, + "grad_norm": 0.6371966199996749, + "learning_rate": 4.9266546405870095e-06, + "loss": 0.2921, + "step": 4482 + }, + { + "epoch": 0.21, + "grad_norm": 0.6169112654346999, + "learning_rate": 4.926609032479299e-06, + "loss": 0.2942, + "step": 4483 + }, + { + "epoch": 0.21, + "grad_norm": 0.6755194845196444, + "learning_rate": 4.926563410407063e-06, + "loss": 0.3029, + "step": 4484 + }, + { + "epoch": 0.21, + "grad_norm": 0.6670996507767957, + "learning_rate": 4.926517774370565e-06, + "loss": 0.3153, + "step": 4485 + }, + { + "epoch": 0.21, + "grad_norm": 0.644714534756908, + "learning_rate": 4.926472124370067e-06, + "loss": 0.3043, + "step": 4486 + }, + { + "epoch": 0.21, + "grad_norm": 0.6174622591437556, + "learning_rate": 4.926426460405832e-06, + "loss": 0.2932, + "step": 4487 + }, + { + "epoch": 0.21, + "grad_norm": 0.664226740207951, + "learning_rate": 4.926380782478123e-06, + "loss": 0.3077, + "step": 4488 + }, + { + "epoch": 0.21, + "grad_norm": 0.5872178983495393, + "learning_rate": 4.926335090587201e-06, + "loss": 0.287, + "step": 4489 + }, + { + "epoch": 0.21, + "grad_norm": 0.6278919032826058, + "learning_rate": 4.926289384733332e-06, + "loss": 0.3131, + "step": 4490 + }, + { + "epoch": 0.21, + "grad_norm": 0.612181625831464, + "learning_rate": 4.926243664916776e-06, + "loss": 0.2716, + "step": 4491 + }, + { + "epoch": 0.21, + "grad_norm": 0.6568876097302112, + "learning_rate": 4.9261979311377985e-06, + "loss": 0.3091, + "step": 4492 + }, + { + "epoch": 0.21, + "grad_norm": 0.6209786601956355, + "learning_rate": 4.9261521833966615e-06, + "loss": 0.2994, + "step": 4493 + }, + { + "epoch": 0.21, + "grad_norm": 0.700131476620929, + "learning_rate": 4.926106421693629e-06, + "loss": 0.3164, + "step": 4494 + }, + { + "epoch": 0.21, + "grad_norm": 0.6507324315655947, + "learning_rate": 4.9260606460289625e-06, + "loss": 0.2978, + "step": 4495 + }, + { + "epoch": 0.21, + "grad_norm": 0.6266453639325512, + "learning_rate": 4.926014856402928e-06, + "loss": 0.2854, + "step": 4496 + }, + { + "epoch": 0.21, + "grad_norm": 0.6678585157378778, + "learning_rate": 4.9259690528157875e-06, + "loss": 0.3037, + "step": 4497 + }, + { + "epoch": 0.21, + "grad_norm": 0.6154491255192899, + "learning_rate": 4.925923235267804e-06, + "loss": 0.2848, + "step": 4498 + }, + { + "epoch": 0.21, + "grad_norm": 0.7001360377920539, + "learning_rate": 4.925877403759243e-06, + "loss": 0.3274, + "step": 4499 + }, + { + "epoch": 0.21, + "grad_norm": 0.6528627369828501, + "learning_rate": 4.925831558290367e-06, + "loss": 0.2871, + "step": 4500 + }, + { + "epoch": 0.21, + "grad_norm": 0.6832028414189534, + "learning_rate": 4.925785698861439e-06, + "loss": 0.309, + "step": 4501 + }, + { + "epoch": 0.21, + "grad_norm": 0.6404512594002214, + "learning_rate": 4.925739825472726e-06, + "loss": 0.3064, + "step": 4502 + }, + { + "epoch": 0.21, + "grad_norm": 0.577023496299867, + "learning_rate": 4.925693938124488e-06, + "loss": 0.2992, + "step": 4503 + }, + { + "epoch": 0.21, + "grad_norm": 0.6550802850196321, + "learning_rate": 4.925648036816992e-06, + "loss": 0.2969, + "step": 4504 + }, + { + "epoch": 0.21, + "grad_norm": 0.6288605491403765, + "learning_rate": 4.925602121550502e-06, + "loss": 0.2953, + "step": 4505 + }, + { + "epoch": 0.21, + "grad_norm": 0.6377067402695463, + "learning_rate": 4.92555619232528e-06, + "loss": 0.3012, + "step": 4506 + }, + { + "epoch": 0.21, + "grad_norm": 0.7147660379493258, + "learning_rate": 4.925510249141593e-06, + "loss": 0.306, + "step": 4507 + }, + { + "epoch": 0.21, + "grad_norm": 0.714553628020211, + "learning_rate": 4.925464291999704e-06, + "loss": 0.3199, + "step": 4508 + }, + { + "epoch": 0.21, + "grad_norm": 0.678363100573355, + "learning_rate": 4.925418320899877e-06, + "loss": 0.3149, + "step": 4509 + }, + { + "epoch": 0.21, + "grad_norm": 0.6795559379776098, + "learning_rate": 4.925372335842377e-06, + "loss": 0.2837, + "step": 4510 + }, + { + "epoch": 0.21, + "grad_norm": 0.5974344819315637, + "learning_rate": 4.92532633682747e-06, + "loss": 0.2756, + "step": 4511 + }, + { + "epoch": 0.21, + "grad_norm": 0.6350555453988977, + "learning_rate": 4.925280323855418e-06, + "loss": 0.2812, + "step": 4512 + }, + { + "epoch": 0.21, + "grad_norm": 0.7320977354612084, + "learning_rate": 4.925234296926488e-06, + "loss": 0.3128, + "step": 4513 + }, + { + "epoch": 0.21, + "grad_norm": 0.6561027098534131, + "learning_rate": 4.925188256040944e-06, + "loss": 0.2927, + "step": 4514 + }, + { + "epoch": 0.21, + "grad_norm": 0.6274795660916729, + "learning_rate": 4.925142201199052e-06, + "loss": 0.2864, + "step": 4515 + }, + { + "epoch": 0.21, + "grad_norm": 0.6852541994848502, + "learning_rate": 4.925096132401075e-06, + "loss": 0.2927, + "step": 4516 + }, + { + "epoch": 0.21, + "grad_norm": 0.6960059251766032, + "learning_rate": 4.92505004964728e-06, + "loss": 0.3089, + "step": 4517 + }, + { + "epoch": 0.21, + "grad_norm": 0.6536896155229162, + "learning_rate": 4.925003952937931e-06, + "loss": 0.2998, + "step": 4518 + }, + { + "epoch": 0.21, + "grad_norm": 0.6741570974625475, + "learning_rate": 4.924957842273294e-06, + "loss": 0.305, + "step": 4519 + }, + { + "epoch": 0.21, + "grad_norm": 0.6264843069402526, + "learning_rate": 4.924911717653634e-06, + "loss": 0.3137, + "step": 4520 + }, + { + "epoch": 0.21, + "grad_norm": 0.6626169519839777, + "learning_rate": 4.924865579079217e-06, + "loss": 0.3147, + "step": 4521 + }, + { + "epoch": 0.21, + "grad_norm": 0.6486421472692034, + "learning_rate": 4.924819426550308e-06, + "loss": 0.3005, + "step": 4522 + }, + { + "epoch": 0.21, + "grad_norm": 0.6062814478089581, + "learning_rate": 4.924773260067171e-06, + "loss": 0.295, + "step": 4523 + }, + { + "epoch": 0.21, + "grad_norm": 0.6153609058195016, + "learning_rate": 4.924727079630075e-06, + "loss": 0.2908, + "step": 4524 + }, + { + "epoch": 0.21, + "grad_norm": 0.6719003771471443, + "learning_rate": 4.924680885239284e-06, + "loss": 0.3051, + "step": 4525 + }, + { + "epoch": 0.21, + "grad_norm": 0.7022460875437452, + "learning_rate": 4.924634676895063e-06, + "loss": 0.3033, + "step": 4526 + }, + { + "epoch": 0.21, + "grad_norm": 0.6131103933530557, + "learning_rate": 4.92458845459768e-06, + "loss": 0.2977, + "step": 4527 + }, + { + "epoch": 0.21, + "grad_norm": 0.5980915270000645, + "learning_rate": 4.9245422183474e-06, + "loss": 0.2894, + "step": 4528 + }, + { + "epoch": 0.21, + "grad_norm": 0.6641319677711318, + "learning_rate": 4.924495968144488e-06, + "loss": 0.3028, + "step": 4529 + }, + { + "epoch": 0.21, + "grad_norm": 0.6298510004200859, + "learning_rate": 4.924449703989211e-06, + "loss": 0.299, + "step": 4530 + }, + { + "epoch": 0.21, + "grad_norm": 0.6564397180602507, + "learning_rate": 4.9244034258818365e-06, + "loss": 0.3146, + "step": 4531 + }, + { + "epoch": 0.21, + "grad_norm": 0.6605534758508583, + "learning_rate": 4.924357133822628e-06, + "loss": 0.2926, + "step": 4532 + }, + { + "epoch": 0.21, + "grad_norm": 0.7007670800020913, + "learning_rate": 4.924310827811855e-06, + "loss": 0.2923, + "step": 4533 + }, + { + "epoch": 0.21, + "grad_norm": 0.6480840672206956, + "learning_rate": 4.924264507849782e-06, + "loss": 0.3105, + "step": 4534 + }, + { + "epoch": 0.21, + "grad_norm": 0.6560304308901874, + "learning_rate": 4.924218173936675e-06, + "loss": 0.3105, + "step": 4535 + }, + { + "epoch": 0.21, + "grad_norm": 0.6528242205841888, + "learning_rate": 4.924171826072804e-06, + "loss": 0.2966, + "step": 4536 + }, + { + "epoch": 0.21, + "grad_norm": 0.7271470788097215, + "learning_rate": 4.9241254642584315e-06, + "loss": 0.3224, + "step": 4537 + }, + { + "epoch": 0.21, + "grad_norm": 0.6882800094392971, + "learning_rate": 4.9240790884938265e-06, + "loss": 0.3146, + "step": 4538 + }, + { + "epoch": 0.21, + "grad_norm": 0.6633956247920071, + "learning_rate": 4.924032698779256e-06, + "loss": 0.3077, + "step": 4539 + }, + { + "epoch": 0.21, + "grad_norm": 0.6578209551460945, + "learning_rate": 4.923986295114986e-06, + "loss": 0.3127, + "step": 4540 + }, + { + "epoch": 0.21, + "grad_norm": 0.6539348220735014, + "learning_rate": 4.923939877501285e-06, + "loss": 0.3048, + "step": 4541 + }, + { + "epoch": 0.21, + "grad_norm": 0.6595726927937876, + "learning_rate": 4.923893445938419e-06, + "loss": 0.3028, + "step": 4542 + }, + { + "epoch": 0.21, + "grad_norm": 0.6673780761384033, + "learning_rate": 4.923847000426656e-06, + "loss": 0.325, + "step": 4543 + }, + { + "epoch": 0.21, + "grad_norm": 0.644534642193851, + "learning_rate": 4.923800540966261e-06, + "loss": 0.3233, + "step": 4544 + }, + { + "epoch": 0.21, + "grad_norm": 0.6209787190180615, + "learning_rate": 4.923754067557505e-06, + "loss": 0.2825, + "step": 4545 + }, + { + "epoch": 0.21, + "grad_norm": 0.6332285769470354, + "learning_rate": 4.923707580200653e-06, + "loss": 0.2877, + "step": 4546 + }, + { + "epoch": 0.21, + "grad_norm": 0.6649358552263912, + "learning_rate": 4.923661078895972e-06, + "loss": 0.2923, + "step": 4547 + }, + { + "epoch": 0.21, + "grad_norm": 0.6867038334468127, + "learning_rate": 4.923614563643732e-06, + "loss": 0.3051, + "step": 4548 + }, + { + "epoch": 0.21, + "grad_norm": 0.6630853412467658, + "learning_rate": 4.923568034444198e-06, + "loss": 0.3203, + "step": 4549 + }, + { + "epoch": 0.21, + "grad_norm": 0.6446207851667675, + "learning_rate": 4.9235214912976394e-06, + "loss": 0.297, + "step": 4550 + }, + { + "epoch": 0.21, + "grad_norm": 0.5861806614430553, + "learning_rate": 4.923474934204324e-06, + "loss": 0.285, + "step": 4551 + }, + { + "epoch": 0.21, + "grad_norm": 0.6543573655749679, + "learning_rate": 4.92342836316452e-06, + "loss": 0.3165, + "step": 4552 + }, + { + "epoch": 0.21, + "grad_norm": 0.5999907445875508, + "learning_rate": 4.923381778178495e-06, + "loss": 0.2818, + "step": 4553 + }, + { + "epoch": 0.21, + "grad_norm": 0.6578521135484489, + "learning_rate": 4.923335179246516e-06, + "loss": 0.329, + "step": 4554 + }, + { + "epoch": 0.21, + "grad_norm": 0.5875618727801298, + "learning_rate": 4.9232885663688525e-06, + "loss": 0.2902, + "step": 4555 + }, + { + "epoch": 0.21, + "grad_norm": 0.6614291459534196, + "learning_rate": 4.923241939545773e-06, + "loss": 0.2947, + "step": 4556 + }, + { + "epoch": 0.21, + "grad_norm": 0.6940160697121641, + "learning_rate": 4.923195298777544e-06, + "loss": 0.318, + "step": 4557 + }, + { + "epoch": 0.21, + "grad_norm": 0.6512688040630048, + "learning_rate": 4.923148644064436e-06, + "loss": 0.3172, + "step": 4558 + }, + { + "epoch": 0.21, + "grad_norm": 0.6057247338026178, + "learning_rate": 4.923101975406717e-06, + "loss": 0.282, + "step": 4559 + }, + { + "epoch": 0.21, + "grad_norm": 0.6570229675353221, + "learning_rate": 4.923055292804654e-06, + "loss": 0.3085, + "step": 4560 + }, + { + "epoch": 0.21, + "grad_norm": 0.5973202785756488, + "learning_rate": 4.923008596258517e-06, + "loss": 0.2684, + "step": 4561 + }, + { + "epoch": 0.21, + "grad_norm": 0.6421583628550253, + "learning_rate": 4.9229618857685755e-06, + "loss": 0.3094, + "step": 4562 + }, + { + "epoch": 0.21, + "grad_norm": 0.6310489195655733, + "learning_rate": 4.9229151613350966e-06, + "loss": 0.2747, + "step": 4563 + }, + { + "epoch": 0.21, + "grad_norm": 0.6642183772958913, + "learning_rate": 4.92286842295835e-06, + "loss": 0.2995, + "step": 4564 + }, + { + "epoch": 0.21, + "grad_norm": 0.702239809439023, + "learning_rate": 4.922821670638604e-06, + "loss": 0.3191, + "step": 4565 + }, + { + "epoch": 0.21, + "grad_norm": 0.6262345004068487, + "learning_rate": 4.922774904376129e-06, + "loss": 0.3157, + "step": 4566 + }, + { + "epoch": 0.21, + "grad_norm": 0.6348053149222856, + "learning_rate": 4.922728124171193e-06, + "loss": 0.3155, + "step": 4567 + }, + { + "epoch": 0.21, + "grad_norm": 0.6421756559848522, + "learning_rate": 4.922681330024066e-06, + "loss": 0.309, + "step": 4568 + }, + { + "epoch": 0.21, + "grad_norm": 0.6123948448943998, + "learning_rate": 4.922634521935017e-06, + "loss": 0.2734, + "step": 4569 + }, + { + "epoch": 0.21, + "grad_norm": 0.6159196475225955, + "learning_rate": 4.922587699904314e-06, + "loss": 0.2946, + "step": 4570 + }, + { + "epoch": 0.21, + "grad_norm": 0.645104020176425, + "learning_rate": 4.922540863932229e-06, + "loss": 0.2873, + "step": 4571 + }, + { + "epoch": 0.21, + "grad_norm": 0.6149053254637892, + "learning_rate": 4.92249401401903e-06, + "loss": 0.2825, + "step": 4572 + }, + { + "epoch": 0.21, + "grad_norm": 0.6716015151893557, + "learning_rate": 4.922447150164987e-06, + "loss": 0.3222, + "step": 4573 + }, + { + "epoch": 0.21, + "grad_norm": 0.6657359871315198, + "learning_rate": 4.922400272370368e-06, + "loss": 0.3111, + "step": 4574 + }, + { + "epoch": 0.21, + "grad_norm": 0.5995591289743971, + "learning_rate": 4.922353380635446e-06, + "loss": 0.2943, + "step": 4575 + }, + { + "epoch": 0.21, + "grad_norm": 0.638348553758986, + "learning_rate": 4.922306474960489e-06, + "loss": 0.3067, + "step": 4576 + }, + { + "epoch": 0.21, + "grad_norm": 0.6537637701425527, + "learning_rate": 4.922259555345766e-06, + "loss": 0.3045, + "step": 4577 + }, + { + "epoch": 0.21, + "grad_norm": 0.6586391651778001, + "learning_rate": 4.922212621791549e-06, + "loss": 0.3315, + "step": 4578 + }, + { + "epoch": 0.21, + "grad_norm": 0.7427415993980501, + "learning_rate": 4.922165674298106e-06, + "loss": 0.3037, + "step": 4579 + }, + { + "epoch": 0.21, + "grad_norm": 0.6423213271810424, + "learning_rate": 4.922118712865709e-06, + "loss": 0.3099, + "step": 4580 + }, + { + "epoch": 0.21, + "grad_norm": 0.5945349292317494, + "learning_rate": 4.922071737494628e-06, + "loss": 0.3105, + "step": 4581 + }, + { + "epoch": 0.21, + "grad_norm": 0.7062897813312039, + "learning_rate": 4.922024748185133e-06, + "loss": 0.3272, + "step": 4582 + }, + { + "epoch": 0.21, + "grad_norm": 0.6503170917128496, + "learning_rate": 4.921977744937494e-06, + "loss": 0.2828, + "step": 4583 + }, + { + "epoch": 0.21, + "grad_norm": 0.7005702860217456, + "learning_rate": 4.921930727751981e-06, + "loss": 0.3059, + "step": 4584 + }, + { + "epoch": 0.21, + "grad_norm": 0.7008044048065711, + "learning_rate": 4.9218836966288665e-06, + "loss": 0.3183, + "step": 4585 + }, + { + "epoch": 0.21, + "grad_norm": 0.6281764712298719, + "learning_rate": 4.92183665156842e-06, + "loss": 0.3138, + "step": 4586 + }, + { + "epoch": 0.21, + "grad_norm": 0.6279173393981322, + "learning_rate": 4.921789592570912e-06, + "loss": 0.3133, + "step": 4587 + }, + { + "epoch": 0.21, + "grad_norm": 0.6079860822578063, + "learning_rate": 4.9217425196366134e-06, + "loss": 0.2945, + "step": 4588 + }, + { + "epoch": 0.21, + "grad_norm": 0.664788877370541, + "learning_rate": 4.921695432765795e-06, + "loss": 0.3079, + "step": 4589 + }, + { + "epoch": 0.22, + "grad_norm": 0.6927913751162266, + "learning_rate": 4.921648331958729e-06, + "loss": 0.305, + "step": 4590 + }, + { + "epoch": 0.22, + "grad_norm": 0.6469791250167011, + "learning_rate": 4.921601217215684e-06, + "loss": 0.3053, + "step": 4591 + }, + { + "epoch": 0.22, + "grad_norm": 0.6453360303035577, + "learning_rate": 4.921554088536934e-06, + "loss": 0.3039, + "step": 4592 + }, + { + "epoch": 0.22, + "grad_norm": 0.7185349827859864, + "learning_rate": 4.9215069459227496e-06, + "loss": 0.317, + "step": 4593 + }, + { + "epoch": 0.22, + "grad_norm": 0.6884824893750112, + "learning_rate": 4.921459789373399e-06, + "loss": 0.3134, + "step": 4594 + }, + { + "epoch": 0.22, + "grad_norm": 0.7496360705693766, + "learning_rate": 4.921412618889157e-06, + "loss": 0.3242, + "step": 4595 + }, + { + "epoch": 0.22, + "grad_norm": 0.6009729658102253, + "learning_rate": 4.921365434470295e-06, + "loss": 0.2839, + "step": 4596 + }, + { + "epoch": 0.22, + "grad_norm": 0.6557826174916532, + "learning_rate": 4.921318236117083e-06, + "loss": 0.3144, + "step": 4597 + }, + { + "epoch": 0.22, + "grad_norm": 0.6506468659960883, + "learning_rate": 4.921271023829793e-06, + "loss": 0.2856, + "step": 4598 + }, + { + "epoch": 0.22, + "grad_norm": 0.6427303867331313, + "learning_rate": 4.921223797608696e-06, + "loss": 0.3054, + "step": 4599 + }, + { + "epoch": 0.22, + "grad_norm": 0.6556700591448696, + "learning_rate": 4.9211765574540655e-06, + "loss": 0.2728, + "step": 4600 + }, + { + "epoch": 0.22, + "grad_norm": 0.7103110925917483, + "learning_rate": 4.921129303366173e-06, + "loss": 0.3246, + "step": 4601 + }, + { + "epoch": 0.22, + "grad_norm": 0.6220887645465705, + "learning_rate": 4.921082035345288e-06, + "loss": 0.3188, + "step": 4602 + }, + { + "epoch": 0.22, + "grad_norm": 0.6156503700081077, + "learning_rate": 4.921034753391686e-06, + "loss": 0.3003, + "step": 4603 + }, + { + "epoch": 0.22, + "grad_norm": 0.5701603476575733, + "learning_rate": 4.920987457505637e-06, + "loss": 0.266, + "step": 4604 + }, + { + "epoch": 0.22, + "grad_norm": 0.6354815720553649, + "learning_rate": 4.920940147687413e-06, + "loss": 0.2969, + "step": 4605 + }, + { + "epoch": 0.22, + "grad_norm": 0.599612763674905, + "learning_rate": 4.920892823937287e-06, + "loss": 0.2905, + "step": 4606 + }, + { + "epoch": 0.22, + "grad_norm": 0.6084768617215734, + "learning_rate": 4.9208454862555325e-06, + "loss": 0.2828, + "step": 4607 + }, + { + "epoch": 0.22, + "grad_norm": 0.6823048297721882, + "learning_rate": 4.92079813464242e-06, + "loss": 0.3255, + "step": 4608 + }, + { + "epoch": 0.22, + "grad_norm": 0.6387043057512019, + "learning_rate": 4.920750769098223e-06, + "loss": 0.3028, + "step": 4609 + }, + { + "epoch": 0.22, + "grad_norm": 0.6543725158657582, + "learning_rate": 4.9207033896232135e-06, + "loss": 0.3126, + "step": 4610 + }, + { + "epoch": 0.22, + "grad_norm": 0.640712480029257, + "learning_rate": 4.920655996217664e-06, + "loss": 0.2866, + "step": 4611 + }, + { + "epoch": 0.22, + "grad_norm": 0.60714234303834, + "learning_rate": 4.920608588881848e-06, + "loss": 0.305, + "step": 4612 + }, + { + "epoch": 0.22, + "grad_norm": 0.6570117438573411, + "learning_rate": 4.920561167616038e-06, + "loss": 0.3138, + "step": 4613 + }, + { + "epoch": 0.22, + "grad_norm": 0.6485681273634838, + "learning_rate": 4.920513732420507e-06, + "loss": 0.3122, + "step": 4614 + }, + { + "epoch": 0.22, + "grad_norm": 0.6388042200197137, + "learning_rate": 4.920466283295528e-06, + "loss": 0.3097, + "step": 4615 + }, + { + "epoch": 0.22, + "grad_norm": 0.6717327692376269, + "learning_rate": 4.920418820241373e-06, + "loss": 0.3112, + "step": 4616 + }, + { + "epoch": 0.22, + "grad_norm": 0.6432346609588325, + "learning_rate": 4.9203713432583165e-06, + "loss": 0.3019, + "step": 4617 + }, + { + "epoch": 0.22, + "grad_norm": 0.6178035182987959, + "learning_rate": 4.920323852346631e-06, + "loss": 0.3091, + "step": 4618 + }, + { + "epoch": 0.22, + "grad_norm": 0.6657805969017181, + "learning_rate": 4.920276347506591e-06, + "loss": 0.2897, + "step": 4619 + }, + { + "epoch": 0.22, + "grad_norm": 0.6543817206592588, + "learning_rate": 4.920228828738468e-06, + "loss": 0.2795, + "step": 4620 + }, + { + "epoch": 0.22, + "grad_norm": 0.6999257252292103, + "learning_rate": 4.920181296042537e-06, + "loss": 0.315, + "step": 4621 + }, + { + "epoch": 0.22, + "grad_norm": 0.5808673482702043, + "learning_rate": 4.920133749419071e-06, + "loss": 0.2906, + "step": 4622 + }, + { + "epoch": 0.22, + "grad_norm": 0.6390209789031241, + "learning_rate": 4.9200861888683434e-06, + "loss": 0.3018, + "step": 4623 + }, + { + "epoch": 0.22, + "grad_norm": 0.5979078894770896, + "learning_rate": 4.920038614390628e-06, + "loss": 0.2806, + "step": 4624 + }, + { + "epoch": 0.22, + "grad_norm": 0.626944798838219, + "learning_rate": 4.919991025986198e-06, + "loss": 0.2954, + "step": 4625 + }, + { + "epoch": 0.22, + "grad_norm": 0.630938519525427, + "learning_rate": 4.919943423655329e-06, + "loss": 0.3172, + "step": 4626 + }, + { + "epoch": 0.22, + "grad_norm": 0.6809965706813415, + "learning_rate": 4.9198958073982925e-06, + "loss": 0.3044, + "step": 4627 + }, + { + "epoch": 0.22, + "grad_norm": 0.6759085002033419, + "learning_rate": 4.919848177215365e-06, + "loss": 0.3013, + "step": 4628 + }, + { + "epoch": 0.22, + "grad_norm": 0.6391825468235061, + "learning_rate": 4.9198005331068185e-06, + "loss": 0.2828, + "step": 4629 + }, + { + "epoch": 0.22, + "grad_norm": 0.6244587054301652, + "learning_rate": 4.919752875072929e-06, + "loss": 0.3115, + "step": 4630 + }, + { + "epoch": 0.22, + "grad_norm": 0.6481823440003496, + "learning_rate": 4.919705203113969e-06, + "loss": 0.3023, + "step": 4631 + }, + { + "epoch": 0.22, + "grad_norm": 0.6967002542481644, + "learning_rate": 4.9196575172302155e-06, + "loss": 0.3104, + "step": 4632 + }, + { + "epoch": 0.22, + "grad_norm": 0.5879820639737037, + "learning_rate": 4.919609817421939e-06, + "loss": 0.263, + "step": 4633 + }, + { + "epoch": 0.22, + "grad_norm": 0.603987326766066, + "learning_rate": 4.919562103689417e-06, + "loss": 0.2994, + "step": 4634 + }, + { + "epoch": 0.22, + "grad_norm": 0.6901537958101457, + "learning_rate": 4.919514376032923e-06, + "loss": 0.2972, + "step": 4635 + }, + { + "epoch": 0.22, + "grad_norm": 0.6086650076590844, + "learning_rate": 4.919466634452733e-06, + "loss": 0.2756, + "step": 4636 + }, + { + "epoch": 0.22, + "grad_norm": 0.6799650508865167, + "learning_rate": 4.919418878949119e-06, + "loss": 0.311, + "step": 4637 + }, + { + "epoch": 0.22, + "grad_norm": 0.7073206175425172, + "learning_rate": 4.9193711095223584e-06, + "loss": 0.3285, + "step": 4638 + }, + { + "epoch": 0.22, + "grad_norm": 0.6468175021904542, + "learning_rate": 4.919323326172726e-06, + "loss": 0.2792, + "step": 4639 + }, + { + "epoch": 0.22, + "grad_norm": 0.6156131354870613, + "learning_rate": 4.919275528900495e-06, + "loss": 0.3216, + "step": 4640 + }, + { + "epoch": 0.22, + "grad_norm": 0.6450150574617112, + "learning_rate": 4.919227717705941e-06, + "loss": 0.3154, + "step": 4641 + }, + { + "epoch": 0.22, + "grad_norm": 0.6574026284220841, + "learning_rate": 4.91917989258934e-06, + "loss": 0.3127, + "step": 4642 + }, + { + "epoch": 0.22, + "grad_norm": 0.6643203209704499, + "learning_rate": 4.919132053550966e-06, + "loss": 0.3187, + "step": 4643 + }, + { + "epoch": 0.22, + "grad_norm": 0.6201182700542179, + "learning_rate": 4.9190842005910955e-06, + "loss": 0.293, + "step": 4644 + }, + { + "epoch": 0.22, + "grad_norm": 0.6297163142211779, + "learning_rate": 4.9190363337100036e-06, + "loss": 0.2964, + "step": 4645 + }, + { + "epoch": 0.22, + "grad_norm": 0.5824649502876073, + "learning_rate": 4.918988452907966e-06, + "loss": 0.2979, + "step": 4646 + }, + { + "epoch": 0.22, + "grad_norm": 0.6420578963808022, + "learning_rate": 4.9189405581852575e-06, + "loss": 0.3104, + "step": 4647 + }, + { + "epoch": 0.22, + "grad_norm": 0.7046269192986725, + "learning_rate": 4.918892649542154e-06, + "loss": 0.3043, + "step": 4648 + }, + { + "epoch": 0.22, + "grad_norm": 0.6791513501339436, + "learning_rate": 4.918844726978931e-06, + "loss": 0.2999, + "step": 4649 + }, + { + "epoch": 0.22, + "grad_norm": 0.6746384159249698, + "learning_rate": 4.918796790495865e-06, + "loss": 0.3119, + "step": 4650 + }, + { + "epoch": 0.22, + "grad_norm": 0.6617211177233525, + "learning_rate": 4.918748840093231e-06, + "loss": 0.2866, + "step": 4651 + }, + { + "epoch": 0.22, + "grad_norm": 0.6177055528507687, + "learning_rate": 4.918700875771306e-06, + "loss": 0.2861, + "step": 4652 + }, + { + "epoch": 0.22, + "grad_norm": 0.6668373501500584, + "learning_rate": 4.918652897530365e-06, + "loss": 0.3024, + "step": 4653 + }, + { + "epoch": 0.22, + "grad_norm": 0.6674510634037614, + "learning_rate": 4.918604905370684e-06, + "loss": 0.3088, + "step": 4654 + }, + { + "epoch": 0.22, + "grad_norm": 0.6445155937708686, + "learning_rate": 4.91855689929254e-06, + "loss": 0.3084, + "step": 4655 + }, + { + "epoch": 0.22, + "grad_norm": 0.6629805535073241, + "learning_rate": 4.91850887929621e-06, + "loss": 0.3075, + "step": 4656 + }, + { + "epoch": 0.22, + "grad_norm": 0.6495964359526777, + "learning_rate": 4.918460845381968e-06, + "loss": 0.3052, + "step": 4657 + }, + { + "epoch": 0.22, + "grad_norm": 0.6617750527618876, + "learning_rate": 4.918412797550092e-06, + "loss": 0.3144, + "step": 4658 + }, + { + "epoch": 0.22, + "grad_norm": 0.6695189746931611, + "learning_rate": 4.918364735800858e-06, + "loss": 0.3146, + "step": 4659 + }, + { + "epoch": 0.22, + "grad_norm": 0.6719135330381957, + "learning_rate": 4.918316660134543e-06, + "loss": 0.3113, + "step": 4660 + }, + { + "epoch": 0.22, + "grad_norm": 0.6184385001897809, + "learning_rate": 4.918268570551424e-06, + "loss": 0.3169, + "step": 4661 + }, + { + "epoch": 0.22, + "grad_norm": 0.6890683857936853, + "learning_rate": 4.918220467051776e-06, + "loss": 0.3068, + "step": 4662 + }, + { + "epoch": 0.22, + "grad_norm": 0.630646049059073, + "learning_rate": 4.918172349635878e-06, + "loss": 0.2915, + "step": 4663 + }, + { + "epoch": 0.22, + "grad_norm": 0.6310240924226657, + "learning_rate": 4.918124218304006e-06, + "loss": 0.3189, + "step": 4664 + }, + { + "epoch": 0.22, + "grad_norm": 0.6564143549987344, + "learning_rate": 4.918076073056436e-06, + "loss": 0.2998, + "step": 4665 + }, + { + "epoch": 0.22, + "grad_norm": 0.6527132457344403, + "learning_rate": 4.918027913893446e-06, + "loss": 0.2978, + "step": 4666 + }, + { + "epoch": 0.22, + "grad_norm": 0.6677866198449403, + "learning_rate": 4.917979740815314e-06, + "loss": 0.3081, + "step": 4667 + }, + { + "epoch": 0.22, + "grad_norm": 0.6534822281942936, + "learning_rate": 4.917931553822315e-06, + "loss": 0.3105, + "step": 4668 + }, + { + "epoch": 0.22, + "grad_norm": 0.6349484386068629, + "learning_rate": 4.917883352914729e-06, + "loss": 0.3048, + "step": 4669 + }, + { + "epoch": 0.22, + "grad_norm": 0.6210774281738634, + "learning_rate": 4.917835138092831e-06, + "loss": 0.2894, + "step": 4670 + }, + { + "epoch": 0.22, + "grad_norm": 0.7007914477404632, + "learning_rate": 4.917786909356901e-06, + "loss": 0.3066, + "step": 4671 + }, + { + "epoch": 0.22, + "grad_norm": 0.6112344660918914, + "learning_rate": 4.917738666707214e-06, + "loss": 0.2681, + "step": 4672 + }, + { + "epoch": 0.22, + "grad_norm": 0.7008251145812795, + "learning_rate": 4.917690410144048e-06, + "loss": 0.3083, + "step": 4673 + }, + { + "epoch": 0.22, + "grad_norm": 0.6768041505470049, + "learning_rate": 4.917642139667682e-06, + "loss": 0.2986, + "step": 4674 + }, + { + "epoch": 0.22, + "grad_norm": 0.6635934327876942, + "learning_rate": 4.917593855278393e-06, + "loss": 0.3064, + "step": 4675 + }, + { + "epoch": 0.22, + "grad_norm": 0.5858481648444755, + "learning_rate": 4.91754555697646e-06, + "loss": 0.2905, + "step": 4676 + }, + { + "epoch": 0.22, + "grad_norm": 0.6625282252307694, + "learning_rate": 4.917497244762158e-06, + "loss": 0.2924, + "step": 4677 + }, + { + "epoch": 0.22, + "grad_norm": 0.7128714995246705, + "learning_rate": 4.917448918635769e-06, + "loss": 0.3198, + "step": 4678 + }, + { + "epoch": 0.22, + "grad_norm": 0.7224386270465862, + "learning_rate": 4.917400578597567e-06, + "loss": 0.309, + "step": 4679 + }, + { + "epoch": 0.22, + "grad_norm": 0.6592394075255144, + "learning_rate": 4.917352224647834e-06, + "loss": 0.2899, + "step": 4680 + }, + { + "epoch": 0.22, + "grad_norm": 0.606227597473977, + "learning_rate": 4.917303856786846e-06, + "loss": 0.292, + "step": 4681 + }, + { + "epoch": 0.22, + "grad_norm": 0.5791680096607743, + "learning_rate": 4.917255475014881e-06, + "loss": 0.2986, + "step": 4682 + }, + { + "epoch": 0.22, + "grad_norm": 0.6659621719757085, + "learning_rate": 4.91720707933222e-06, + "loss": 0.3233, + "step": 4683 + }, + { + "epoch": 0.22, + "grad_norm": 0.6291129982458655, + "learning_rate": 4.917158669739138e-06, + "loss": 0.2979, + "step": 4684 + }, + { + "epoch": 0.22, + "grad_norm": 0.607602871046246, + "learning_rate": 4.9171102462359165e-06, + "loss": 0.3003, + "step": 4685 + }, + { + "epoch": 0.22, + "grad_norm": 0.6859218085052302, + "learning_rate": 4.917061808822833e-06, + "loss": 0.3161, + "step": 4686 + }, + { + "epoch": 0.22, + "grad_norm": 0.653683471512791, + "learning_rate": 4.917013357500167e-06, + "loss": 0.3152, + "step": 4687 + }, + { + "epoch": 0.22, + "grad_norm": 0.6961064183102886, + "learning_rate": 4.916964892268195e-06, + "loss": 0.3124, + "step": 4688 + }, + { + "epoch": 0.22, + "grad_norm": 0.7537404650545658, + "learning_rate": 4.9169164131271985e-06, + "loss": 0.3325, + "step": 4689 + }, + { + "epoch": 0.22, + "grad_norm": 0.5853206064987486, + "learning_rate": 4.916867920077455e-06, + "loss": 0.2988, + "step": 4690 + }, + { + "epoch": 0.22, + "grad_norm": 0.6728288811717145, + "learning_rate": 4.9168194131192445e-06, + "loss": 0.3205, + "step": 4691 + }, + { + "epoch": 0.22, + "grad_norm": 0.6622787370966358, + "learning_rate": 4.916770892252846e-06, + "loss": 0.3061, + "step": 4692 + }, + { + "epoch": 0.22, + "grad_norm": 0.6271057785174585, + "learning_rate": 4.916722357478538e-06, + "loss": 0.2852, + "step": 4693 + }, + { + "epoch": 0.22, + "grad_norm": 0.6814522770043056, + "learning_rate": 4.9166738087965995e-06, + "loss": 0.3322, + "step": 4694 + }, + { + "epoch": 0.22, + "grad_norm": 0.5769116111916841, + "learning_rate": 4.916625246207311e-06, + "loss": 0.2948, + "step": 4695 + }, + { + "epoch": 0.22, + "grad_norm": 0.6173755919230178, + "learning_rate": 4.916576669710953e-06, + "loss": 0.3021, + "step": 4696 + }, + { + "epoch": 0.22, + "grad_norm": 0.6433309971632742, + "learning_rate": 4.916528079307803e-06, + "loss": 0.2952, + "step": 4697 + }, + { + "epoch": 0.22, + "grad_norm": 0.6309635130243462, + "learning_rate": 4.91647947499814e-06, + "loss": 0.2835, + "step": 4698 + }, + { + "epoch": 0.22, + "grad_norm": 0.7197522225186105, + "learning_rate": 4.916430856782246e-06, + "loss": 0.3011, + "step": 4699 + }, + { + "epoch": 0.22, + "grad_norm": 0.6373660149554168, + "learning_rate": 4.9163822246604e-06, + "loss": 0.2948, + "step": 4700 + }, + { + "epoch": 0.22, + "grad_norm": 0.6802173036414846, + "learning_rate": 4.916333578632881e-06, + "loss": 0.298, + "step": 4701 + }, + { + "epoch": 0.22, + "grad_norm": 0.6006803775422077, + "learning_rate": 4.91628491869997e-06, + "loss": 0.3023, + "step": 4702 + }, + { + "epoch": 0.22, + "grad_norm": 0.6655665491399155, + "learning_rate": 4.916236244861946e-06, + "loss": 0.3049, + "step": 4703 + }, + { + "epoch": 0.22, + "grad_norm": 0.6360615788886047, + "learning_rate": 4.916187557119091e-06, + "loss": 0.2985, + "step": 4704 + }, + { + "epoch": 0.22, + "grad_norm": 0.6456331970587739, + "learning_rate": 4.916138855471682e-06, + "loss": 0.3131, + "step": 4705 + }, + { + "epoch": 0.22, + "grad_norm": 0.6961865377329493, + "learning_rate": 4.916090139920003e-06, + "loss": 0.2869, + "step": 4706 + }, + { + "epoch": 0.22, + "grad_norm": 0.6515189424258969, + "learning_rate": 4.916041410464332e-06, + "loss": 0.2935, + "step": 4707 + }, + { + "epoch": 0.22, + "grad_norm": 0.6910779575097085, + "learning_rate": 4.91599266710495e-06, + "loss": 0.3168, + "step": 4708 + }, + { + "epoch": 0.22, + "grad_norm": 0.6489348205875948, + "learning_rate": 4.915943909842137e-06, + "loss": 0.2848, + "step": 4709 + }, + { + "epoch": 0.22, + "grad_norm": 0.6203679736965066, + "learning_rate": 4.9158951386761734e-06, + "loss": 0.2882, + "step": 4710 + }, + { + "epoch": 0.22, + "grad_norm": 0.6139099487759837, + "learning_rate": 4.915846353607342e-06, + "loss": 0.2905, + "step": 4711 + }, + { + "epoch": 0.22, + "grad_norm": 0.6750876617505711, + "learning_rate": 4.915797554635921e-06, + "loss": 0.2963, + "step": 4712 + }, + { + "epoch": 0.22, + "grad_norm": 0.6551796351653764, + "learning_rate": 4.915748741762192e-06, + "loss": 0.2962, + "step": 4713 + }, + { + "epoch": 0.22, + "grad_norm": 0.6365898135785991, + "learning_rate": 4.915699914986437e-06, + "loss": 0.3121, + "step": 4714 + }, + { + "epoch": 0.22, + "grad_norm": 0.6857564295920738, + "learning_rate": 4.915651074308936e-06, + "loss": 0.317, + "step": 4715 + }, + { + "epoch": 0.22, + "grad_norm": 0.6356573193455815, + "learning_rate": 4.91560221972997e-06, + "loss": 0.3039, + "step": 4716 + }, + { + "epoch": 0.22, + "grad_norm": 0.6117571888691118, + "learning_rate": 4.91555335124982e-06, + "loss": 0.2763, + "step": 4717 + }, + { + "epoch": 0.22, + "grad_norm": 0.6488750320494839, + "learning_rate": 4.915504468868769e-06, + "loss": 0.2832, + "step": 4718 + }, + { + "epoch": 0.22, + "grad_norm": 0.6763170303065337, + "learning_rate": 4.9154555725870955e-06, + "loss": 0.2997, + "step": 4719 + }, + { + "epoch": 0.22, + "grad_norm": 0.6315320915095025, + "learning_rate": 4.915406662405083e-06, + "loss": 0.3125, + "step": 4720 + }, + { + "epoch": 0.22, + "grad_norm": 0.6090096852546022, + "learning_rate": 4.915357738323012e-06, + "loss": 0.2991, + "step": 4721 + }, + { + "epoch": 0.22, + "grad_norm": 0.6336041889046143, + "learning_rate": 4.9153088003411645e-06, + "loss": 0.3044, + "step": 4722 + }, + { + "epoch": 0.22, + "grad_norm": 0.651515135620815, + "learning_rate": 4.915259848459821e-06, + "loss": 0.3055, + "step": 4723 + }, + { + "epoch": 0.22, + "grad_norm": 0.6170029962752971, + "learning_rate": 4.915210882679265e-06, + "loss": 0.3104, + "step": 4724 + }, + { + "epoch": 0.22, + "grad_norm": 0.6087494038881424, + "learning_rate": 4.915161902999777e-06, + "loss": 0.3222, + "step": 4725 + }, + { + "epoch": 0.22, + "grad_norm": 0.6171286174107941, + "learning_rate": 4.91511290942164e-06, + "loss": 0.3026, + "step": 4726 + }, + { + "epoch": 0.22, + "grad_norm": 0.6255939634086025, + "learning_rate": 4.915063901945134e-06, + "loss": 0.3051, + "step": 4727 + }, + { + "epoch": 0.22, + "grad_norm": 0.7179481943821372, + "learning_rate": 4.915014880570543e-06, + "loss": 0.3165, + "step": 4728 + }, + { + "epoch": 0.22, + "grad_norm": 0.6976159014169668, + "learning_rate": 4.914965845298149e-06, + "loss": 0.3306, + "step": 4729 + }, + { + "epoch": 0.22, + "grad_norm": 0.6642358565160235, + "learning_rate": 4.914916796128232e-06, + "loss": 0.3043, + "step": 4730 + }, + { + "epoch": 0.22, + "grad_norm": 0.6265041876135546, + "learning_rate": 4.914867733061077e-06, + "loss": 0.3016, + "step": 4731 + }, + { + "epoch": 0.22, + "grad_norm": 0.7131013942976969, + "learning_rate": 4.914818656096965e-06, + "loss": 0.3169, + "step": 4732 + }, + { + "epoch": 0.22, + "grad_norm": 0.6027952566819451, + "learning_rate": 4.914769565236179e-06, + "loss": 0.2902, + "step": 4733 + }, + { + "epoch": 0.22, + "grad_norm": 0.6654590590017119, + "learning_rate": 4.914720460479e-06, + "loss": 0.311, + "step": 4734 + }, + { + "epoch": 0.22, + "grad_norm": 0.7132180235710306, + "learning_rate": 4.914671341825712e-06, + "loss": 0.3118, + "step": 4735 + }, + { + "epoch": 0.22, + "grad_norm": 0.6760275391581045, + "learning_rate": 4.914622209276597e-06, + "loss": 0.2909, + "step": 4736 + }, + { + "epoch": 0.22, + "grad_norm": 0.592870474535652, + "learning_rate": 4.914573062831939e-06, + "loss": 0.2888, + "step": 4737 + }, + { + "epoch": 0.22, + "grad_norm": 0.7011658621184228, + "learning_rate": 4.914523902492019e-06, + "loss": 0.2976, + "step": 4738 + }, + { + "epoch": 0.22, + "grad_norm": 0.6620072147997381, + "learning_rate": 4.914474728257122e-06, + "loss": 0.3037, + "step": 4739 + }, + { + "epoch": 0.22, + "grad_norm": 0.6387296873112804, + "learning_rate": 4.914425540127529e-06, + "loss": 0.3156, + "step": 4740 + }, + { + "epoch": 0.22, + "grad_norm": 0.6967781939836509, + "learning_rate": 4.914376338103524e-06, + "loss": 0.2974, + "step": 4741 + }, + { + "epoch": 0.22, + "grad_norm": 0.6521148575111595, + "learning_rate": 4.91432712218539e-06, + "loss": 0.3234, + "step": 4742 + }, + { + "epoch": 0.22, + "grad_norm": 0.6079013779847154, + "learning_rate": 4.914277892373409e-06, + "loss": 0.2909, + "step": 4743 + }, + { + "epoch": 0.22, + "grad_norm": 0.6231922869327325, + "learning_rate": 4.914228648667867e-06, + "loss": 0.3034, + "step": 4744 + }, + { + "epoch": 0.22, + "grad_norm": 0.6375730365593395, + "learning_rate": 4.914179391069046e-06, + "loss": 0.2917, + "step": 4745 + }, + { + "epoch": 0.22, + "grad_norm": 0.6088903532408108, + "learning_rate": 4.914130119577228e-06, + "loss": 0.3012, + "step": 4746 + }, + { + "epoch": 0.22, + "grad_norm": 0.6907133538459269, + "learning_rate": 4.914080834192699e-06, + "loss": 0.3239, + "step": 4747 + }, + { + "epoch": 0.22, + "grad_norm": 0.6270659782489301, + "learning_rate": 4.914031534915742e-06, + "loss": 0.3065, + "step": 4748 + }, + { + "epoch": 0.22, + "grad_norm": 0.623169906413525, + "learning_rate": 4.91398222174664e-06, + "loss": 0.3088, + "step": 4749 + }, + { + "epoch": 0.22, + "grad_norm": 0.6722532095468662, + "learning_rate": 4.913932894685677e-06, + "loss": 0.2954, + "step": 4750 + }, + { + "epoch": 0.22, + "grad_norm": 0.660990246757122, + "learning_rate": 4.913883553733136e-06, + "loss": 0.3088, + "step": 4751 + }, + { + "epoch": 0.22, + "grad_norm": 0.7581312410412591, + "learning_rate": 4.9138341988893025e-06, + "loss": 0.3125, + "step": 4752 + }, + { + "epoch": 0.22, + "grad_norm": 0.6970144465062915, + "learning_rate": 4.91378483015446e-06, + "loss": 0.3057, + "step": 4753 + }, + { + "epoch": 0.22, + "grad_norm": 0.6120704666417933, + "learning_rate": 4.913735447528892e-06, + "loss": 0.2985, + "step": 4754 + }, + { + "epoch": 0.22, + "grad_norm": 0.6673864679025044, + "learning_rate": 4.913686051012885e-06, + "loss": 0.3179, + "step": 4755 + }, + { + "epoch": 0.22, + "grad_norm": 0.7011064653409748, + "learning_rate": 4.913636640606719e-06, + "loss": 0.293, + "step": 4756 + }, + { + "epoch": 0.22, + "grad_norm": 0.6521746657212177, + "learning_rate": 4.9135872163106824e-06, + "loss": 0.3029, + "step": 4757 + }, + { + "epoch": 0.22, + "grad_norm": 0.6433443473679628, + "learning_rate": 4.913537778125057e-06, + "loss": 0.3076, + "step": 4758 + }, + { + "epoch": 0.22, + "grad_norm": 0.6830641431980108, + "learning_rate": 4.913488326050129e-06, + "loss": 0.3141, + "step": 4759 + }, + { + "epoch": 0.22, + "grad_norm": 0.7122407625985115, + "learning_rate": 4.9134388600861816e-06, + "loss": 0.3177, + "step": 4760 + }, + { + "epoch": 0.22, + "grad_norm": 0.6569837674586347, + "learning_rate": 4.913389380233501e-06, + "loss": 0.305, + "step": 4761 + }, + { + "epoch": 0.22, + "grad_norm": 0.6709719237427477, + "learning_rate": 4.91333988649237e-06, + "loss": 0.3134, + "step": 4762 + }, + { + "epoch": 0.22, + "grad_norm": 0.6663308311228373, + "learning_rate": 4.913290378863075e-06, + "loss": 0.3159, + "step": 4763 + }, + { + "epoch": 0.22, + "grad_norm": 0.6008918589545276, + "learning_rate": 4.913240857345901e-06, + "loss": 0.2741, + "step": 4764 + }, + { + "epoch": 0.22, + "grad_norm": 0.6746189764886209, + "learning_rate": 4.913191321941132e-06, + "loss": 0.3033, + "step": 4765 + }, + { + "epoch": 0.22, + "grad_norm": 0.6221114971349353, + "learning_rate": 4.913141772649054e-06, + "loss": 0.3198, + "step": 4766 + }, + { + "epoch": 0.22, + "grad_norm": 0.6312017441697149, + "learning_rate": 4.9130922094699504e-06, + "loss": 0.3102, + "step": 4767 + }, + { + "epoch": 0.22, + "grad_norm": 0.6320719174165474, + "learning_rate": 4.913042632404108e-06, + "loss": 0.2939, + "step": 4768 + }, + { + "epoch": 0.22, + "grad_norm": 0.6205332750657601, + "learning_rate": 4.912993041451812e-06, + "loss": 0.3135, + "step": 4769 + }, + { + "epoch": 0.22, + "grad_norm": 0.6606488551842469, + "learning_rate": 4.912943436613348e-06, + "loss": 0.3129, + "step": 4770 + }, + { + "epoch": 0.22, + "grad_norm": 0.5822606775198145, + "learning_rate": 4.912893817889001e-06, + "loss": 0.2837, + "step": 4771 + }, + { + "epoch": 0.22, + "grad_norm": 0.6125300204515292, + "learning_rate": 4.912844185279056e-06, + "loss": 0.2947, + "step": 4772 + }, + { + "epoch": 0.22, + "grad_norm": 0.6400377222072344, + "learning_rate": 4.9127945387837995e-06, + "loss": 0.278, + "step": 4773 + }, + { + "epoch": 0.22, + "grad_norm": 0.6101530767245031, + "learning_rate": 4.912744878403516e-06, + "loss": 0.3103, + "step": 4774 + }, + { + "epoch": 0.22, + "grad_norm": 0.6500718756248658, + "learning_rate": 4.912695204138494e-06, + "loss": 0.3079, + "step": 4775 + }, + { + "epoch": 0.22, + "grad_norm": 0.6506468603044561, + "learning_rate": 4.912645515989015e-06, + "loss": 0.2927, + "step": 4776 + }, + { + "epoch": 0.22, + "grad_norm": 0.6671685697284879, + "learning_rate": 4.9125958139553695e-06, + "loss": 0.3202, + "step": 4777 + }, + { + "epoch": 0.22, + "grad_norm": 0.7275468654753984, + "learning_rate": 4.9125460980378405e-06, + "loss": 0.3302, + "step": 4778 + }, + { + "epoch": 0.22, + "grad_norm": 0.649310085309838, + "learning_rate": 4.9124963682367156e-06, + "loss": 0.3197, + "step": 4779 + }, + { + "epoch": 0.22, + "grad_norm": 0.6210169510835925, + "learning_rate": 4.912446624552279e-06, + "loss": 0.3122, + "step": 4780 + }, + { + "epoch": 0.22, + "grad_norm": 0.6388581659460779, + "learning_rate": 4.912396866984821e-06, + "loss": 0.3131, + "step": 4781 + }, + { + "epoch": 0.22, + "grad_norm": 0.5951640332139397, + "learning_rate": 4.912347095534623e-06, + "loss": 0.2985, + "step": 4782 + }, + { + "epoch": 0.22, + "grad_norm": 0.6132136510147432, + "learning_rate": 4.912297310201975e-06, + "loss": 0.2897, + "step": 4783 + }, + { + "epoch": 0.22, + "grad_norm": 0.6705719398048903, + "learning_rate": 4.912247510987162e-06, + "loss": 0.2853, + "step": 4784 + }, + { + "epoch": 0.22, + "grad_norm": 0.6614987417434678, + "learning_rate": 4.912197697890471e-06, + "loss": 0.3149, + "step": 4785 + }, + { + "epoch": 0.22, + "grad_norm": 0.6884136549922983, + "learning_rate": 4.912147870912189e-06, + "loss": 0.301, + "step": 4786 + }, + { + "epoch": 0.22, + "grad_norm": 0.6301676607548392, + "learning_rate": 4.912098030052601e-06, + "loss": 0.3023, + "step": 4787 + }, + { + "epoch": 0.22, + "grad_norm": 0.6454295745466244, + "learning_rate": 4.912048175311997e-06, + "loss": 0.3103, + "step": 4788 + }, + { + "epoch": 0.22, + "grad_norm": 0.6715279368831083, + "learning_rate": 4.91199830669066e-06, + "loss": 0.3091, + "step": 4789 + }, + { + "epoch": 0.22, + "grad_norm": 0.5896436171567991, + "learning_rate": 4.91194842418888e-06, + "loss": 0.292, + "step": 4790 + }, + { + "epoch": 0.22, + "grad_norm": 0.6391932006411144, + "learning_rate": 4.911898527806942e-06, + "loss": 0.2983, + "step": 4791 + }, + { + "epoch": 0.22, + "grad_norm": 0.6609214878928429, + "learning_rate": 4.911848617545135e-06, + "loss": 0.3056, + "step": 4792 + }, + { + "epoch": 0.22, + "grad_norm": 0.6953903781334551, + "learning_rate": 4.911798693403746e-06, + "loss": 0.3095, + "step": 4793 + }, + { + "epoch": 0.22, + "grad_norm": 0.6639802787616015, + "learning_rate": 4.911748755383061e-06, + "loss": 0.2943, + "step": 4794 + }, + { + "epoch": 0.22, + "grad_norm": 0.6439897146760032, + "learning_rate": 4.911698803483368e-06, + "loss": 0.3056, + "step": 4795 + }, + { + "epoch": 0.22, + "grad_norm": 0.6443574454649527, + "learning_rate": 4.911648837704955e-06, + "loss": 0.3035, + "step": 4796 + }, + { + "epoch": 0.22, + "grad_norm": 0.6943493026825515, + "learning_rate": 4.9115988580481085e-06, + "loss": 0.3147, + "step": 4797 + }, + { + "epoch": 0.22, + "grad_norm": 0.6819683974679126, + "learning_rate": 4.911548864513117e-06, + "loss": 0.3054, + "step": 4798 + }, + { + "epoch": 0.22, + "grad_norm": 0.6636920125907301, + "learning_rate": 4.911498857100268e-06, + "loss": 0.3227, + "step": 4799 + }, + { + "epoch": 0.22, + "grad_norm": 0.6416917845660871, + "learning_rate": 4.9114488358098485e-06, + "loss": 0.2979, + "step": 4800 + }, + { + "epoch": 0.22, + "grad_norm": 0.6223655169948561, + "learning_rate": 4.911398800642148e-06, + "loss": 0.3088, + "step": 4801 + }, + { + "epoch": 0.22, + "grad_norm": 0.6436763809286503, + "learning_rate": 4.911348751597453e-06, + "loss": 0.3087, + "step": 4802 + }, + { + "epoch": 0.22, + "grad_norm": 0.6064018967210065, + "learning_rate": 4.911298688676053e-06, + "loss": 0.3006, + "step": 4803 + }, + { + "epoch": 0.23, + "grad_norm": 0.6521560035936225, + "learning_rate": 4.911248611878234e-06, + "loss": 0.3123, + "step": 4804 + }, + { + "epoch": 0.23, + "grad_norm": 0.6430431823198839, + "learning_rate": 4.911198521204285e-06, + "loss": 0.3052, + "step": 4805 + }, + { + "epoch": 0.23, + "grad_norm": 0.6650993777482658, + "learning_rate": 4.911148416654495e-06, + "loss": 0.3057, + "step": 4806 + }, + { + "epoch": 0.23, + "grad_norm": 0.6152977203872838, + "learning_rate": 4.911098298229152e-06, + "loss": 0.2909, + "step": 4807 + }, + { + "epoch": 0.23, + "grad_norm": 0.6653000466128979, + "learning_rate": 4.911048165928545e-06, + "loss": 0.2968, + "step": 4808 + }, + { + "epoch": 0.23, + "grad_norm": 0.6240499687780728, + "learning_rate": 4.91099801975296e-06, + "loss": 0.3002, + "step": 4809 + }, + { + "epoch": 0.23, + "grad_norm": 0.6419034231021984, + "learning_rate": 4.910947859702689e-06, + "loss": 0.2886, + "step": 4810 + }, + { + "epoch": 0.23, + "grad_norm": 0.6352071404476337, + "learning_rate": 4.910897685778019e-06, + "loss": 0.3049, + "step": 4811 + }, + { + "epoch": 0.23, + "grad_norm": 0.6654963225546116, + "learning_rate": 4.910847497979237e-06, + "loss": 0.2992, + "step": 4812 + }, + { + "epoch": 0.23, + "grad_norm": 0.7034061496022909, + "learning_rate": 4.910797296306635e-06, + "loss": 0.328, + "step": 4813 + }, + { + "epoch": 0.23, + "grad_norm": 0.6117552309326021, + "learning_rate": 4.910747080760501e-06, + "loss": 0.2778, + "step": 4814 + }, + { + "epoch": 0.23, + "grad_norm": 0.6459208234840148, + "learning_rate": 4.910696851341122e-06, + "loss": 0.3276, + "step": 4815 + }, + { + "epoch": 0.23, + "grad_norm": 0.6874451318619211, + "learning_rate": 4.9106466080487905e-06, + "loss": 0.3116, + "step": 4816 + }, + { + "epoch": 0.23, + "grad_norm": 0.7020771673465714, + "learning_rate": 4.910596350883791e-06, + "loss": 0.3099, + "step": 4817 + }, + { + "epoch": 0.23, + "grad_norm": 0.6249874372362013, + "learning_rate": 4.910546079846418e-06, + "loss": 0.3153, + "step": 4818 + }, + { + "epoch": 0.23, + "grad_norm": 0.6983392550248901, + "learning_rate": 4.910495794936957e-06, + "loss": 0.2999, + "step": 4819 + }, + { + "epoch": 0.23, + "grad_norm": 0.6439048302259689, + "learning_rate": 4.9104454961556985e-06, + "loss": 0.3075, + "step": 4820 + }, + { + "epoch": 0.23, + "grad_norm": 0.6574370361340581, + "learning_rate": 4.910395183502932e-06, + "loss": 0.2971, + "step": 4821 + }, + { + "epoch": 0.23, + "grad_norm": 0.6497468661338608, + "learning_rate": 4.910344856978948e-06, + "loss": 0.2847, + "step": 4822 + }, + { + "epoch": 0.23, + "grad_norm": 0.6433533281438316, + "learning_rate": 4.910294516584034e-06, + "loss": 0.2844, + "step": 4823 + }, + { + "epoch": 0.23, + "grad_norm": 0.6161945443053013, + "learning_rate": 4.910244162318481e-06, + "loss": 0.2977, + "step": 4824 + }, + { + "epoch": 0.23, + "grad_norm": 0.6040710688377966, + "learning_rate": 4.910193794182578e-06, + "loss": 0.2841, + "step": 4825 + }, + { + "epoch": 0.23, + "grad_norm": 0.7110892350155563, + "learning_rate": 4.910143412176617e-06, + "loss": 0.3092, + "step": 4826 + }, + { + "epoch": 0.23, + "grad_norm": 0.652376180519213, + "learning_rate": 4.9100930163008855e-06, + "loss": 0.2959, + "step": 4827 + }, + { + "epoch": 0.23, + "grad_norm": 0.6614280822700732, + "learning_rate": 4.910042606555675e-06, + "loss": 0.3197, + "step": 4828 + }, + { + "epoch": 0.23, + "grad_norm": 0.7168524655156807, + "learning_rate": 4.909992182941274e-06, + "loss": 0.3105, + "step": 4829 + }, + { + "epoch": 0.23, + "grad_norm": 0.6512715409899743, + "learning_rate": 4.909941745457975e-06, + "loss": 0.3114, + "step": 4830 + }, + { + "epoch": 0.23, + "grad_norm": 0.5949673012338795, + "learning_rate": 4.9098912941060665e-06, + "loss": 0.2887, + "step": 4831 + }, + { + "epoch": 0.23, + "grad_norm": 0.5780640144635302, + "learning_rate": 4.909840828885839e-06, + "loss": 0.2836, + "step": 4832 + }, + { + "epoch": 0.23, + "grad_norm": 0.6421750022771274, + "learning_rate": 4.9097903497975845e-06, + "loss": 0.3083, + "step": 4833 + }, + { + "epoch": 0.23, + "grad_norm": 0.6320726212915466, + "learning_rate": 4.90973985684159e-06, + "loss": 0.2815, + "step": 4834 + }, + { + "epoch": 0.23, + "grad_norm": 0.6644345881019117, + "learning_rate": 4.90968935001815e-06, + "loss": 0.2983, + "step": 4835 + }, + { + "epoch": 0.23, + "grad_norm": 0.6368023136094144, + "learning_rate": 4.909638829327552e-06, + "loss": 0.2934, + "step": 4836 + }, + { + "epoch": 0.23, + "grad_norm": 0.6750158463189109, + "learning_rate": 4.90958829477009e-06, + "loss": 0.3002, + "step": 4837 + }, + { + "epoch": 0.23, + "grad_norm": 0.6930472767261608, + "learning_rate": 4.909537746346052e-06, + "loss": 0.2888, + "step": 4838 + }, + { + "epoch": 0.23, + "grad_norm": 0.7136754738476725, + "learning_rate": 4.90948718405573e-06, + "loss": 0.2904, + "step": 4839 + }, + { + "epoch": 0.23, + "grad_norm": 0.6025820503901883, + "learning_rate": 4.909436607899415e-06, + "loss": 0.2862, + "step": 4840 + }, + { + "epoch": 0.23, + "grad_norm": 0.6109274112281793, + "learning_rate": 4.909386017877397e-06, + "loss": 0.292, + "step": 4841 + }, + { + "epoch": 0.23, + "grad_norm": 0.6755141566854054, + "learning_rate": 4.909335413989969e-06, + "loss": 0.2856, + "step": 4842 + }, + { + "epoch": 0.23, + "grad_norm": 0.6369691768331908, + "learning_rate": 4.909284796237421e-06, + "loss": 0.3217, + "step": 4843 + }, + { + "epoch": 0.23, + "grad_norm": 0.6679579443925197, + "learning_rate": 4.909234164620044e-06, + "loss": 0.3037, + "step": 4844 + }, + { + "epoch": 0.23, + "grad_norm": 0.6497818717781451, + "learning_rate": 4.909183519138131e-06, + "loss": 0.3102, + "step": 4845 + }, + { + "epoch": 0.23, + "grad_norm": 0.6512673209950056, + "learning_rate": 4.909132859791972e-06, + "loss": 0.3066, + "step": 4846 + }, + { + "epoch": 0.23, + "grad_norm": 0.6679247316362044, + "learning_rate": 4.909082186581859e-06, + "loss": 0.279, + "step": 4847 + }, + { + "epoch": 0.23, + "grad_norm": 0.6732151005084462, + "learning_rate": 4.909031499508083e-06, + "loss": 0.3047, + "step": 4848 + }, + { + "epoch": 0.23, + "grad_norm": 0.6267215666470008, + "learning_rate": 4.9089807985709366e-06, + "loss": 0.3088, + "step": 4849 + }, + { + "epoch": 0.23, + "grad_norm": 0.5863037035413868, + "learning_rate": 4.908930083770711e-06, + "loss": 0.2799, + "step": 4850 + }, + { + "epoch": 0.23, + "grad_norm": 0.6906973552404329, + "learning_rate": 4.908879355107699e-06, + "loss": 0.3077, + "step": 4851 + }, + { + "epoch": 0.23, + "grad_norm": 0.622261695709231, + "learning_rate": 4.908828612582191e-06, + "loss": 0.2885, + "step": 4852 + }, + { + "epoch": 0.23, + "grad_norm": 0.7595284884806521, + "learning_rate": 4.908777856194479e-06, + "loss": 0.2922, + "step": 4853 + }, + { + "epoch": 0.23, + "grad_norm": 0.6821054003896908, + "learning_rate": 4.908727085944857e-06, + "loss": 0.3027, + "step": 4854 + }, + { + "epoch": 0.23, + "grad_norm": 0.6604920314450847, + "learning_rate": 4.9086763018336155e-06, + "loss": 0.3266, + "step": 4855 + }, + { + "epoch": 0.23, + "grad_norm": 0.6609995687252148, + "learning_rate": 4.908625503861048e-06, + "loss": 0.2943, + "step": 4856 + }, + { + "epoch": 0.23, + "grad_norm": 0.65425731707093, + "learning_rate": 4.9085746920274456e-06, + "loss": 0.307, + "step": 4857 + }, + { + "epoch": 0.23, + "grad_norm": 0.6544080871762498, + "learning_rate": 4.9085238663331004e-06, + "loss": 0.3169, + "step": 4858 + }, + { + "epoch": 0.23, + "grad_norm": 0.6070773300364595, + "learning_rate": 4.908473026778307e-06, + "loss": 0.2863, + "step": 4859 + }, + { + "epoch": 0.23, + "grad_norm": 0.6501103135633721, + "learning_rate": 4.908422173363356e-06, + "loss": 0.283, + "step": 4860 + }, + { + "epoch": 0.23, + "grad_norm": 0.6315129111410207, + "learning_rate": 4.9083713060885405e-06, + "loss": 0.3052, + "step": 4861 + }, + { + "epoch": 0.23, + "grad_norm": 0.534609796315131, + "learning_rate": 4.908320424954155e-06, + "loss": 0.2698, + "step": 4862 + }, + { + "epoch": 0.23, + "grad_norm": 0.6469459348921285, + "learning_rate": 4.90826952996049e-06, + "loss": 0.2967, + "step": 4863 + }, + { + "epoch": 0.23, + "grad_norm": 0.663613253085424, + "learning_rate": 4.908218621107838e-06, + "loss": 0.3121, + "step": 4864 + }, + { + "epoch": 0.23, + "grad_norm": 0.67962055573337, + "learning_rate": 4.908167698396495e-06, + "loss": 0.3096, + "step": 4865 + }, + { + "epoch": 0.23, + "grad_norm": 0.7240851463311618, + "learning_rate": 4.90811676182675e-06, + "loss": 0.3026, + "step": 4866 + }, + { + "epoch": 0.23, + "grad_norm": 0.6778080831277881, + "learning_rate": 4.908065811398899e-06, + "loss": 0.2934, + "step": 4867 + }, + { + "epoch": 0.23, + "grad_norm": 0.6856196434213608, + "learning_rate": 4.908014847113235e-06, + "loss": 0.2902, + "step": 4868 + }, + { + "epoch": 0.23, + "grad_norm": 0.6747542271888527, + "learning_rate": 4.907963868970051e-06, + "loss": 0.2809, + "step": 4869 + }, + { + "epoch": 0.23, + "grad_norm": 0.6266583516857662, + "learning_rate": 4.90791287696964e-06, + "loss": 0.2951, + "step": 4870 + }, + { + "epoch": 0.23, + "grad_norm": 0.671345054746889, + "learning_rate": 4.907861871112295e-06, + "loss": 0.3197, + "step": 4871 + }, + { + "epoch": 0.23, + "grad_norm": 0.7035757608429964, + "learning_rate": 4.907810851398311e-06, + "loss": 0.3135, + "step": 4872 + }, + { + "epoch": 0.23, + "grad_norm": 0.6779459580902061, + "learning_rate": 4.90775981782798e-06, + "loss": 0.3243, + "step": 4873 + }, + { + "epoch": 0.23, + "grad_norm": 0.6890178834972497, + "learning_rate": 4.907708770401597e-06, + "loss": 0.2996, + "step": 4874 + }, + { + "epoch": 0.23, + "grad_norm": 0.6118210917618375, + "learning_rate": 4.907657709119455e-06, + "loss": 0.2918, + "step": 4875 + }, + { + "epoch": 0.23, + "grad_norm": 0.616453334551133, + "learning_rate": 4.907606633981848e-06, + "loss": 0.2897, + "step": 4876 + }, + { + "epoch": 0.23, + "grad_norm": 0.7054927348193678, + "learning_rate": 4.907555544989069e-06, + "loss": 0.319, + "step": 4877 + }, + { + "epoch": 0.23, + "grad_norm": 0.5996712216829164, + "learning_rate": 4.9075044421414145e-06, + "loss": 0.2902, + "step": 4878 + }, + { + "epoch": 0.23, + "grad_norm": 0.6543873590107286, + "learning_rate": 4.9074533254391764e-06, + "loss": 0.3246, + "step": 4879 + }, + { + "epoch": 0.23, + "grad_norm": 0.6828527830777072, + "learning_rate": 4.907402194882649e-06, + "loss": 0.3078, + "step": 4880 + }, + { + "epoch": 0.23, + "grad_norm": 0.6533621658017833, + "learning_rate": 4.907351050472128e-06, + "loss": 0.3011, + "step": 4881 + }, + { + "epoch": 0.23, + "grad_norm": 0.5982085851090544, + "learning_rate": 4.907299892207906e-06, + "loss": 0.2942, + "step": 4882 + }, + { + "epoch": 0.23, + "grad_norm": 0.6265723766748602, + "learning_rate": 4.907248720090278e-06, + "loss": 0.314, + "step": 4883 + }, + { + "epoch": 0.23, + "grad_norm": 0.574609694091718, + "learning_rate": 4.907197534119539e-06, + "loss": 0.2818, + "step": 4884 + }, + { + "epoch": 0.23, + "grad_norm": 0.617863682248405, + "learning_rate": 4.9071463342959835e-06, + "loss": 0.2891, + "step": 4885 + }, + { + "epoch": 0.23, + "grad_norm": 0.5950476453847877, + "learning_rate": 4.907095120619905e-06, + "loss": 0.2871, + "step": 4886 + }, + { + "epoch": 0.23, + "grad_norm": 0.627277419835945, + "learning_rate": 4.907043893091601e-06, + "loss": 0.2963, + "step": 4887 + }, + { + "epoch": 0.23, + "grad_norm": 0.6550714264872411, + "learning_rate": 4.906992651711363e-06, + "loss": 0.283, + "step": 4888 + }, + { + "epoch": 0.23, + "grad_norm": 0.6635935802467845, + "learning_rate": 4.906941396479488e-06, + "loss": 0.3161, + "step": 4889 + }, + { + "epoch": 0.23, + "grad_norm": 0.6271205334477905, + "learning_rate": 4.906890127396269e-06, + "loss": 0.314, + "step": 4890 + }, + { + "epoch": 0.23, + "grad_norm": 0.6195452854549665, + "learning_rate": 4.906838844462003e-06, + "loss": 0.2773, + "step": 4891 + }, + { + "epoch": 0.23, + "grad_norm": 0.6345104815552405, + "learning_rate": 4.906787547676984e-06, + "loss": 0.2781, + "step": 4892 + }, + { + "epoch": 0.23, + "grad_norm": 0.6907474517794432, + "learning_rate": 4.906736237041508e-06, + "loss": 0.2924, + "step": 4893 + }, + { + "epoch": 0.23, + "grad_norm": 0.6654935894967462, + "learning_rate": 4.90668491255587e-06, + "loss": 0.3131, + "step": 4894 + }, + { + "epoch": 0.23, + "grad_norm": 0.6334102843700652, + "learning_rate": 4.906633574220365e-06, + "loss": 0.2855, + "step": 4895 + }, + { + "epoch": 0.23, + "grad_norm": 0.61775119995487, + "learning_rate": 4.906582222035288e-06, + "loss": 0.304, + "step": 4896 + }, + { + "epoch": 0.23, + "grad_norm": 0.6503583097836383, + "learning_rate": 4.9065308560009365e-06, + "loss": 0.294, + "step": 4897 + }, + { + "epoch": 0.23, + "grad_norm": 0.657156643323677, + "learning_rate": 4.906479476117604e-06, + "loss": 0.2975, + "step": 4898 + }, + { + "epoch": 0.23, + "grad_norm": 0.6199280428136705, + "learning_rate": 4.906428082385587e-06, + "loss": 0.2884, + "step": 4899 + }, + { + "epoch": 0.23, + "grad_norm": 0.6434841996455111, + "learning_rate": 4.906376674805181e-06, + "loss": 0.2838, + "step": 4900 + }, + { + "epoch": 0.23, + "grad_norm": 0.6407687282176534, + "learning_rate": 4.906325253376682e-06, + "loss": 0.2999, + "step": 4901 + }, + { + "epoch": 0.23, + "grad_norm": 0.66879751156267, + "learning_rate": 4.9062738181003866e-06, + "loss": 0.3086, + "step": 4902 + }, + { + "epoch": 0.23, + "grad_norm": 0.614540368198526, + "learning_rate": 4.9062223689765896e-06, + "loss": 0.3021, + "step": 4903 + }, + { + "epoch": 0.23, + "grad_norm": 0.6720625186961431, + "learning_rate": 4.9061709060055886e-06, + "loss": 0.3118, + "step": 4904 + }, + { + "epoch": 0.23, + "grad_norm": 0.5978687375504986, + "learning_rate": 4.9061194291876775e-06, + "loss": 0.2881, + "step": 4905 + }, + { + "epoch": 0.23, + "grad_norm": 0.6498978079042762, + "learning_rate": 4.9060679385231545e-06, + "loss": 0.2923, + "step": 4906 + }, + { + "epoch": 0.23, + "grad_norm": 0.6796366754956933, + "learning_rate": 4.906016434012315e-06, + "loss": 0.2941, + "step": 4907 + }, + { + "epoch": 0.23, + "grad_norm": 0.6183174637014014, + "learning_rate": 4.905964915655456e-06, + "loss": 0.2732, + "step": 4908 + }, + { + "epoch": 0.23, + "grad_norm": 0.6160357316656014, + "learning_rate": 4.905913383452874e-06, + "loss": 0.3053, + "step": 4909 + }, + { + "epoch": 0.23, + "grad_norm": 0.6523158377167521, + "learning_rate": 4.905861837404864e-06, + "loss": 0.3297, + "step": 4910 + }, + { + "epoch": 0.23, + "grad_norm": 0.6682087776728468, + "learning_rate": 4.905810277511725e-06, + "loss": 0.3152, + "step": 4911 + }, + { + "epoch": 0.23, + "grad_norm": 0.7331986853017163, + "learning_rate": 4.905758703773752e-06, + "loss": 0.282, + "step": 4912 + }, + { + "epoch": 0.23, + "grad_norm": 0.6765746790390098, + "learning_rate": 4.9057071161912425e-06, + "loss": 0.3126, + "step": 4913 + }, + { + "epoch": 0.23, + "grad_norm": 0.6443584350112708, + "learning_rate": 4.905655514764493e-06, + "loss": 0.2991, + "step": 4914 + }, + { + "epoch": 0.23, + "grad_norm": 0.6823666450111981, + "learning_rate": 4.905603899493801e-06, + "loss": 0.3019, + "step": 4915 + }, + { + "epoch": 0.23, + "grad_norm": 0.6153116198798328, + "learning_rate": 4.905552270379462e-06, + "loss": 0.2947, + "step": 4916 + }, + { + "epoch": 0.23, + "grad_norm": 0.6329226678937637, + "learning_rate": 4.9055006274217755e-06, + "loss": 0.3019, + "step": 4917 + }, + { + "epoch": 0.23, + "grad_norm": 0.6467214247162777, + "learning_rate": 4.905448970621037e-06, + "loss": 0.3085, + "step": 4918 + }, + { + "epoch": 0.23, + "grad_norm": 0.5946386707298917, + "learning_rate": 4.905397299977545e-06, + "loss": 0.3005, + "step": 4919 + }, + { + "epoch": 0.23, + "grad_norm": 0.6699898316854893, + "learning_rate": 4.905345615491595e-06, + "loss": 0.3155, + "step": 4920 + }, + { + "epoch": 0.23, + "grad_norm": 0.6286380594403421, + "learning_rate": 4.905293917163486e-06, + "loss": 0.3043, + "step": 4921 + }, + { + "epoch": 0.23, + "grad_norm": 0.6018419748041278, + "learning_rate": 4.905242204993516e-06, + "loss": 0.3042, + "step": 4922 + }, + { + "epoch": 0.23, + "grad_norm": 0.6034428759661863, + "learning_rate": 4.90519047898198e-06, + "loss": 0.295, + "step": 4923 + }, + { + "epoch": 0.23, + "grad_norm": 0.635653311080953, + "learning_rate": 4.905138739129178e-06, + "loss": 0.287, + "step": 4924 + }, + { + "epoch": 0.23, + "grad_norm": 0.6737531008454107, + "learning_rate": 4.9050869854354075e-06, + "loss": 0.2921, + "step": 4925 + }, + { + "epoch": 0.23, + "grad_norm": 0.6876826104438359, + "learning_rate": 4.905035217900965e-06, + "loss": 0.3085, + "step": 4926 + }, + { + "epoch": 0.23, + "grad_norm": 0.6847280395128166, + "learning_rate": 4.904983436526151e-06, + "loss": 0.2888, + "step": 4927 + }, + { + "epoch": 0.23, + "grad_norm": 0.6315752840420158, + "learning_rate": 4.90493164131126e-06, + "loss": 0.324, + "step": 4928 + }, + { + "epoch": 0.23, + "grad_norm": 0.6532502408405532, + "learning_rate": 4.9048798322565925e-06, + "loss": 0.2957, + "step": 4929 + }, + { + "epoch": 0.23, + "grad_norm": 0.6239013952141966, + "learning_rate": 4.9048280093624466e-06, + "loss": 0.3007, + "step": 4930 + }, + { + "epoch": 0.23, + "grad_norm": 0.7165808716882065, + "learning_rate": 4.904776172629119e-06, + "loss": 0.3369, + "step": 4931 + }, + { + "epoch": 0.23, + "grad_norm": 0.6366320356528586, + "learning_rate": 4.904724322056909e-06, + "loss": 0.2945, + "step": 4932 + }, + { + "epoch": 0.23, + "grad_norm": 0.6631681269187037, + "learning_rate": 4.904672457646116e-06, + "loss": 0.3221, + "step": 4933 + }, + { + "epoch": 0.23, + "grad_norm": 0.6787864552412481, + "learning_rate": 4.9046205793970355e-06, + "loss": 0.3157, + "step": 4934 + }, + { + "epoch": 0.23, + "grad_norm": 0.7208554699287946, + "learning_rate": 4.904568687309969e-06, + "loss": 0.2954, + "step": 4935 + }, + { + "epoch": 0.23, + "grad_norm": 0.682701514896217, + "learning_rate": 4.9045167813852145e-06, + "loss": 0.304, + "step": 4936 + }, + { + "epoch": 0.23, + "grad_norm": 0.602419484051279, + "learning_rate": 4.90446486162307e-06, + "loss": 0.2878, + "step": 4937 + }, + { + "epoch": 0.23, + "grad_norm": 0.6825477132903969, + "learning_rate": 4.904412928023835e-06, + "loss": 0.3334, + "step": 4938 + }, + { + "epoch": 0.23, + "grad_norm": 0.6365745382722405, + "learning_rate": 4.904360980587807e-06, + "loss": 0.2971, + "step": 4939 + }, + { + "epoch": 0.23, + "grad_norm": 0.6506271447643448, + "learning_rate": 4.904309019315286e-06, + "loss": 0.3082, + "step": 4940 + }, + { + "epoch": 0.23, + "grad_norm": 0.6947917779895205, + "learning_rate": 4.904257044206572e-06, + "loss": 0.3116, + "step": 4941 + }, + { + "epoch": 0.23, + "grad_norm": 0.6807123099394795, + "learning_rate": 4.904205055261962e-06, + "loss": 0.3019, + "step": 4942 + }, + { + "epoch": 0.23, + "grad_norm": 0.6411209733894886, + "learning_rate": 4.904153052481756e-06, + "loss": 0.3028, + "step": 4943 + }, + { + "epoch": 0.23, + "grad_norm": 0.6115265553967283, + "learning_rate": 4.9041010358662545e-06, + "loss": 0.2928, + "step": 4944 + }, + { + "epoch": 0.23, + "grad_norm": 0.6698581242016535, + "learning_rate": 4.904049005415755e-06, + "loss": 0.2873, + "step": 4945 + }, + { + "epoch": 0.23, + "grad_norm": 0.6684272024942013, + "learning_rate": 4.903996961130557e-06, + "loss": 0.2996, + "step": 4946 + }, + { + "epoch": 0.23, + "grad_norm": 0.6840028480986319, + "learning_rate": 4.903944903010962e-06, + "loss": 0.294, + "step": 4947 + }, + { + "epoch": 0.23, + "grad_norm": 0.6430540160946681, + "learning_rate": 4.903892831057268e-06, + "loss": 0.3088, + "step": 4948 + }, + { + "epoch": 0.23, + "grad_norm": 0.641759232360012, + "learning_rate": 4.903840745269774e-06, + "loss": 0.2956, + "step": 4949 + }, + { + "epoch": 0.23, + "grad_norm": 0.632233711355445, + "learning_rate": 4.903788645648782e-06, + "loss": 0.3197, + "step": 4950 + }, + { + "epoch": 0.23, + "grad_norm": 0.6118032670296409, + "learning_rate": 4.90373653219459e-06, + "loss": 0.2959, + "step": 4951 + }, + { + "epoch": 0.23, + "grad_norm": 0.6305667105262824, + "learning_rate": 4.903684404907498e-06, + "loss": 0.3032, + "step": 4952 + }, + { + "epoch": 0.23, + "grad_norm": 0.6803193001572024, + "learning_rate": 4.903632263787807e-06, + "loss": 0.3243, + "step": 4953 + }, + { + "epoch": 0.23, + "grad_norm": 0.6576279667458154, + "learning_rate": 4.903580108835817e-06, + "loss": 0.3013, + "step": 4954 + }, + { + "epoch": 0.23, + "grad_norm": 0.6705879696065558, + "learning_rate": 4.903527940051826e-06, + "loss": 0.2948, + "step": 4955 + }, + { + "epoch": 0.23, + "grad_norm": 0.6319978909154639, + "learning_rate": 4.903475757436137e-06, + "loss": 0.2876, + "step": 4956 + }, + { + "epoch": 0.23, + "grad_norm": 0.7106066826496841, + "learning_rate": 4.9034235609890485e-06, + "loss": 0.2943, + "step": 4957 + }, + { + "epoch": 0.23, + "grad_norm": 0.6730321934778193, + "learning_rate": 4.903371350710861e-06, + "loss": 0.2897, + "step": 4958 + }, + { + "epoch": 0.23, + "grad_norm": 0.604610458467186, + "learning_rate": 4.903319126601877e-06, + "loss": 0.2734, + "step": 4959 + }, + { + "epoch": 0.23, + "grad_norm": 0.6071265484995634, + "learning_rate": 4.9032668886623945e-06, + "loss": 0.2898, + "step": 4960 + }, + { + "epoch": 0.23, + "grad_norm": 0.6557748567721903, + "learning_rate": 4.903214636892715e-06, + "loss": 0.2754, + "step": 4961 + }, + { + "epoch": 0.23, + "grad_norm": 0.6492404998562967, + "learning_rate": 4.903162371293139e-06, + "loss": 0.2876, + "step": 4962 + }, + { + "epoch": 0.23, + "grad_norm": 0.691500734399718, + "learning_rate": 4.903110091863969e-06, + "loss": 0.3072, + "step": 4963 + }, + { + "epoch": 0.23, + "grad_norm": 0.6364267864879378, + "learning_rate": 4.903057798605503e-06, + "loss": 0.288, + "step": 4964 + }, + { + "epoch": 0.23, + "grad_norm": 0.6936809480227777, + "learning_rate": 4.9030054915180445e-06, + "loss": 0.329, + "step": 4965 + }, + { + "epoch": 0.23, + "grad_norm": 0.6550619927769101, + "learning_rate": 4.902953170601892e-06, + "loss": 0.3045, + "step": 4966 + }, + { + "epoch": 0.23, + "grad_norm": 0.6546523207334249, + "learning_rate": 4.90290083585735e-06, + "loss": 0.3103, + "step": 4967 + }, + { + "epoch": 0.23, + "grad_norm": 0.6482961424213652, + "learning_rate": 4.902848487284715e-06, + "loss": 0.2964, + "step": 4968 + }, + { + "epoch": 0.23, + "grad_norm": 0.6945049921682679, + "learning_rate": 4.902796124884293e-06, + "loss": 0.283, + "step": 4969 + }, + { + "epoch": 0.23, + "grad_norm": 0.6110971295060152, + "learning_rate": 4.902743748656382e-06, + "loss": 0.2865, + "step": 4970 + }, + { + "epoch": 0.23, + "grad_norm": 0.6550979517535978, + "learning_rate": 4.902691358601286e-06, + "loss": 0.3147, + "step": 4971 + }, + { + "epoch": 0.23, + "grad_norm": 0.6553435506023665, + "learning_rate": 4.902638954719303e-06, + "loss": 0.3014, + "step": 4972 + }, + { + "epoch": 0.23, + "grad_norm": 0.6833079265416042, + "learning_rate": 4.902586537010739e-06, + "loss": 0.3014, + "step": 4973 + }, + { + "epoch": 0.23, + "grad_norm": 0.645348717933374, + "learning_rate": 4.9025341054758915e-06, + "loss": 0.3023, + "step": 4974 + }, + { + "epoch": 0.23, + "grad_norm": 0.6363469114675112, + "learning_rate": 4.902481660115065e-06, + "loss": 0.2999, + "step": 4975 + }, + { + "epoch": 0.23, + "grad_norm": 0.6533553175453575, + "learning_rate": 4.90242920092856e-06, + "loss": 0.2786, + "step": 4976 + }, + { + "epoch": 0.23, + "grad_norm": 0.7670751450995019, + "learning_rate": 4.902376727916679e-06, + "loss": 0.3299, + "step": 4977 + }, + { + "epoch": 0.23, + "grad_norm": 0.5952736142517793, + "learning_rate": 4.902324241079723e-06, + "loss": 0.27, + "step": 4978 + }, + { + "epoch": 0.23, + "grad_norm": 0.6301599718253795, + "learning_rate": 4.902271740417996e-06, + "loss": 0.3065, + "step": 4979 + }, + { + "epoch": 0.23, + "grad_norm": 0.6825556815549928, + "learning_rate": 4.902219225931799e-06, + "loss": 0.3182, + "step": 4980 + }, + { + "epoch": 0.23, + "grad_norm": 0.6376179609658398, + "learning_rate": 4.902166697621433e-06, + "loss": 0.2941, + "step": 4981 + }, + { + "epoch": 0.23, + "grad_norm": 0.6099538745436441, + "learning_rate": 4.902114155487202e-06, + "loss": 0.2867, + "step": 4982 + }, + { + "epoch": 0.23, + "grad_norm": 0.7110249674427886, + "learning_rate": 4.902061599529408e-06, + "loss": 0.2961, + "step": 4983 + }, + { + "epoch": 0.23, + "grad_norm": 0.7373717537059779, + "learning_rate": 4.902009029748353e-06, + "loss": 0.3092, + "step": 4984 + }, + { + "epoch": 0.23, + "grad_norm": 0.6617767417897664, + "learning_rate": 4.90195644614434e-06, + "loss": 0.2967, + "step": 4985 + }, + { + "epoch": 0.23, + "grad_norm": 0.6383853917764183, + "learning_rate": 4.901903848717671e-06, + "loss": 0.2945, + "step": 4986 + }, + { + "epoch": 0.23, + "grad_norm": 0.6506638957988167, + "learning_rate": 4.9018512374686486e-06, + "loss": 0.2948, + "step": 4987 + }, + { + "epoch": 0.23, + "grad_norm": 0.6728743102275163, + "learning_rate": 4.901798612397577e-06, + "loss": 0.3129, + "step": 4988 + }, + { + "epoch": 0.23, + "grad_norm": 0.6007081501152104, + "learning_rate": 4.901745973504758e-06, + "loss": 0.2897, + "step": 4989 + }, + { + "epoch": 0.23, + "grad_norm": 0.6838524114058869, + "learning_rate": 4.901693320790494e-06, + "loss": 0.3161, + "step": 4990 + }, + { + "epoch": 0.23, + "grad_norm": 0.6342481171513263, + "learning_rate": 4.901640654255089e-06, + "loss": 0.3056, + "step": 4991 + }, + { + "epoch": 0.23, + "grad_norm": 0.7005797443223993, + "learning_rate": 4.901587973898844e-06, + "loss": 0.306, + "step": 4992 + }, + { + "epoch": 0.23, + "grad_norm": 0.6430557473331605, + "learning_rate": 4.901535279722066e-06, + "loss": 0.2981, + "step": 4993 + }, + { + "epoch": 0.23, + "grad_norm": 0.6559113253359407, + "learning_rate": 4.901482571725056e-06, + "loss": 0.2867, + "step": 4994 + }, + { + "epoch": 0.23, + "grad_norm": 0.6954748326733052, + "learning_rate": 4.901429849908116e-06, + "loss": 0.3126, + "step": 4995 + }, + { + "epoch": 0.23, + "grad_norm": 0.6704939468084763, + "learning_rate": 4.901377114271552e-06, + "loss": 0.3097, + "step": 4996 + }, + { + "epoch": 0.23, + "grad_norm": 0.6419537643245828, + "learning_rate": 4.901324364815666e-06, + "loss": 0.2841, + "step": 4997 + }, + { + "epoch": 0.23, + "grad_norm": 0.6288262678339315, + "learning_rate": 4.901271601540762e-06, + "loss": 0.3132, + "step": 4998 + }, + { + "epoch": 0.23, + "grad_norm": 0.6808341824161397, + "learning_rate": 4.901218824447142e-06, + "loss": 0.2999, + "step": 4999 + }, + { + "epoch": 0.23, + "grad_norm": 0.6145904174504441, + "learning_rate": 4.901166033535113e-06, + "loss": 0.2985, + "step": 5000 + }, + { + "epoch": 0.23, + "grad_norm": 0.6293468999609461, + "learning_rate": 4.901113228804977e-06, + "loss": 0.3097, + "step": 5001 + }, + { + "epoch": 0.23, + "grad_norm": 0.6389519062263136, + "learning_rate": 4.901060410257036e-06, + "loss": 0.3094, + "step": 5002 + }, + { + "epoch": 0.23, + "grad_norm": 0.6157233979751966, + "learning_rate": 4.901007577891597e-06, + "loss": 0.3094, + "step": 5003 + }, + { + "epoch": 0.23, + "grad_norm": 0.6628503116883913, + "learning_rate": 4.900954731708964e-06, + "loss": 0.3343, + "step": 5004 + }, + { + "epoch": 0.23, + "grad_norm": 0.6634512922969606, + "learning_rate": 4.900901871709438e-06, + "loss": 0.3029, + "step": 5005 + }, + { + "epoch": 0.23, + "grad_norm": 0.609997523114147, + "learning_rate": 4.900848997893326e-06, + "loss": 0.3109, + "step": 5006 + }, + { + "epoch": 0.23, + "grad_norm": 0.6243270486227518, + "learning_rate": 4.900796110260931e-06, + "loss": 0.3142, + "step": 5007 + }, + { + "epoch": 0.23, + "grad_norm": 0.6451699321834804, + "learning_rate": 4.900743208812558e-06, + "loss": 0.3065, + "step": 5008 + }, + { + "epoch": 0.23, + "grad_norm": 0.6209424146721033, + "learning_rate": 4.900690293548512e-06, + "loss": 0.2829, + "step": 5009 + }, + { + "epoch": 0.23, + "grad_norm": 0.6121348114867212, + "learning_rate": 4.900637364469097e-06, + "loss": 0.282, + "step": 5010 + }, + { + "epoch": 0.23, + "grad_norm": 0.6875729053306006, + "learning_rate": 4.9005844215746156e-06, + "loss": 0.3345, + "step": 5011 + }, + { + "epoch": 0.23, + "grad_norm": 0.6611482077531915, + "learning_rate": 4.900531464865376e-06, + "loss": 0.3008, + "step": 5012 + }, + { + "epoch": 0.23, + "grad_norm": 0.5899454798307215, + "learning_rate": 4.90047849434168e-06, + "loss": 0.3004, + "step": 5013 + }, + { + "epoch": 0.23, + "grad_norm": 0.6753380310678905, + "learning_rate": 4.900425510003834e-06, + "loss": 0.3172, + "step": 5014 + }, + { + "epoch": 0.23, + "grad_norm": 0.6565254704037873, + "learning_rate": 4.900372511852142e-06, + "loss": 0.3057, + "step": 5015 + }, + { + "epoch": 0.23, + "grad_norm": 0.6585116275852085, + "learning_rate": 4.9003194998869104e-06, + "loss": 0.3068, + "step": 5016 + }, + { + "epoch": 0.24, + "grad_norm": 0.5903584230611052, + "learning_rate": 4.900266474108443e-06, + "loss": 0.3095, + "step": 5017 + }, + { + "epoch": 0.24, + "grad_norm": 0.626977819829023, + "learning_rate": 4.9002134345170445e-06, + "loss": 0.3127, + "step": 5018 + }, + { + "epoch": 0.24, + "grad_norm": 0.6529848903675087, + "learning_rate": 4.9001603811130224e-06, + "loss": 0.3034, + "step": 5019 + }, + { + "epoch": 0.24, + "grad_norm": 0.6600931826751415, + "learning_rate": 4.90010731389668e-06, + "loss": 0.3112, + "step": 5020 + }, + { + "epoch": 0.24, + "grad_norm": 0.6411039340008952, + "learning_rate": 4.900054232868323e-06, + "loss": 0.3091, + "step": 5021 + }, + { + "epoch": 0.24, + "grad_norm": 0.6533594051661354, + "learning_rate": 4.900001138028257e-06, + "loss": 0.3126, + "step": 5022 + }, + { + "epoch": 0.24, + "grad_norm": 0.6978846165983246, + "learning_rate": 4.899948029376788e-06, + "loss": 0.324, + "step": 5023 + }, + { + "epoch": 0.24, + "grad_norm": 0.6283365975005338, + "learning_rate": 4.899894906914221e-06, + "loss": 0.3041, + "step": 5024 + }, + { + "epoch": 0.24, + "grad_norm": 0.5935587413171053, + "learning_rate": 4.899841770640862e-06, + "loss": 0.2945, + "step": 5025 + }, + { + "epoch": 0.24, + "grad_norm": 0.5798825021033409, + "learning_rate": 4.899788620557018e-06, + "loss": 0.292, + "step": 5026 + }, + { + "epoch": 0.24, + "grad_norm": 0.7172553208992074, + "learning_rate": 4.899735456662993e-06, + "loss": 0.301, + "step": 5027 + }, + { + "epoch": 0.24, + "grad_norm": 0.6174985065177876, + "learning_rate": 4.899682278959092e-06, + "loss": 0.2903, + "step": 5028 + }, + { + "epoch": 0.24, + "grad_norm": 0.5842424920886079, + "learning_rate": 4.899629087445625e-06, + "loss": 0.2753, + "step": 5029 + }, + { + "epoch": 0.24, + "grad_norm": 0.610890611841172, + "learning_rate": 4.8995758821228935e-06, + "loss": 0.2845, + "step": 5030 + }, + { + "epoch": 0.24, + "grad_norm": 1.056201648715382, + "learning_rate": 4.899522662991208e-06, + "loss": 0.3353, + "step": 5031 + }, + { + "epoch": 0.24, + "grad_norm": 0.6212476288687496, + "learning_rate": 4.899469430050872e-06, + "loss": 0.2881, + "step": 5032 + }, + { + "epoch": 0.24, + "grad_norm": 0.5912445196343996, + "learning_rate": 4.899416183302192e-06, + "loss": 0.2913, + "step": 5033 + }, + { + "epoch": 0.24, + "grad_norm": 0.6332028487584183, + "learning_rate": 4.8993629227454746e-06, + "loss": 0.3057, + "step": 5034 + }, + { + "epoch": 0.24, + "grad_norm": 0.6952294344699778, + "learning_rate": 4.899309648381027e-06, + "loss": 0.3153, + "step": 5035 + }, + { + "epoch": 0.24, + "grad_norm": 0.6189002749467335, + "learning_rate": 4.899256360209156e-06, + "loss": 0.3067, + "step": 5036 + }, + { + "epoch": 0.24, + "grad_norm": 0.6032352696624658, + "learning_rate": 4.899203058230167e-06, + "loss": 0.2873, + "step": 5037 + }, + { + "epoch": 0.24, + "grad_norm": 0.6294222831470365, + "learning_rate": 4.8991497424443675e-06, + "loss": 0.3122, + "step": 5038 + }, + { + "epoch": 0.24, + "grad_norm": 0.6216140638159572, + "learning_rate": 4.899096412852065e-06, + "loss": 0.2929, + "step": 5039 + }, + { + "epoch": 0.24, + "grad_norm": 0.5963809413950203, + "learning_rate": 4.899043069453565e-06, + "loss": 0.2959, + "step": 5040 + }, + { + "epoch": 0.24, + "grad_norm": 0.6156776772382877, + "learning_rate": 4.898989712249175e-06, + "loss": 0.2976, + "step": 5041 + }, + { + "epoch": 0.24, + "grad_norm": 0.6209268844449913, + "learning_rate": 4.898936341239202e-06, + "loss": 0.3063, + "step": 5042 + }, + { + "epoch": 0.24, + "grad_norm": 0.6271656596031232, + "learning_rate": 4.898882956423954e-06, + "loss": 0.2915, + "step": 5043 + }, + { + "epoch": 0.24, + "grad_norm": 0.6313451639721498, + "learning_rate": 4.898829557803737e-06, + "loss": 0.2966, + "step": 5044 + }, + { + "epoch": 0.24, + "grad_norm": 0.6409121664081797, + "learning_rate": 4.898776145378859e-06, + "loss": 0.3215, + "step": 5045 + }, + { + "epoch": 0.24, + "grad_norm": 0.6107663973264055, + "learning_rate": 4.898722719149628e-06, + "loss": 0.2875, + "step": 5046 + }, + { + "epoch": 0.24, + "grad_norm": 0.6513899425599715, + "learning_rate": 4.8986692791163496e-06, + "loss": 0.2883, + "step": 5047 + }, + { + "epoch": 0.24, + "grad_norm": 0.6753283388247416, + "learning_rate": 4.898615825279333e-06, + "loss": 0.2954, + "step": 5048 + }, + { + "epoch": 0.24, + "grad_norm": 0.7175896333677413, + "learning_rate": 4.8985623576388845e-06, + "loss": 0.2763, + "step": 5049 + }, + { + "epoch": 0.24, + "grad_norm": 0.7035515144489924, + "learning_rate": 4.8985088761953125e-06, + "loss": 0.2982, + "step": 5050 + }, + { + "epoch": 0.24, + "grad_norm": 0.5912188436815574, + "learning_rate": 4.898455380948925e-06, + "loss": 0.2935, + "step": 5051 + }, + { + "epoch": 0.24, + "grad_norm": 0.6309470618689158, + "learning_rate": 4.898401871900029e-06, + "loss": 0.312, + "step": 5052 + }, + { + "epoch": 0.24, + "grad_norm": 0.677097514735883, + "learning_rate": 4.898348349048934e-06, + "loss": 0.3092, + "step": 5053 + }, + { + "epoch": 0.24, + "grad_norm": 0.6368769503177941, + "learning_rate": 4.898294812395948e-06, + "loss": 0.3105, + "step": 5054 + }, + { + "epoch": 0.24, + "grad_norm": 0.6603168148127505, + "learning_rate": 4.898241261941375e-06, + "loss": 0.2824, + "step": 5055 + }, + { + "epoch": 0.24, + "grad_norm": 0.6595891631188412, + "learning_rate": 4.898187697685529e-06, + "loss": 0.3231, + "step": 5056 + }, + { + "epoch": 0.24, + "grad_norm": 0.6345040134248771, + "learning_rate": 4.898134119628715e-06, + "loss": 0.2867, + "step": 5057 + }, + { + "epoch": 0.24, + "grad_norm": 0.6189662937414572, + "learning_rate": 4.898080527771242e-06, + "loss": 0.2932, + "step": 5058 + }, + { + "epoch": 0.24, + "grad_norm": 0.6511008141253832, + "learning_rate": 4.898026922113417e-06, + "loss": 0.2866, + "step": 5059 + }, + { + "epoch": 0.24, + "grad_norm": 0.653905534332155, + "learning_rate": 4.897973302655551e-06, + "loss": 0.3027, + "step": 5060 + }, + { + "epoch": 0.24, + "grad_norm": 0.6401300604207992, + "learning_rate": 4.8979196693979516e-06, + "loss": 0.323, + "step": 5061 + }, + { + "epoch": 0.24, + "grad_norm": 0.6008358003560047, + "learning_rate": 4.897866022340927e-06, + "loss": 0.2819, + "step": 5062 + }, + { + "epoch": 0.24, + "grad_norm": 0.6433789032981104, + "learning_rate": 4.897812361484785e-06, + "loss": 0.3142, + "step": 5063 + }, + { + "epoch": 0.24, + "grad_norm": 0.6973915632070986, + "learning_rate": 4.897758686829837e-06, + "loss": 0.3275, + "step": 5064 + }, + { + "epoch": 0.24, + "grad_norm": 0.617742613858665, + "learning_rate": 4.8977049983763895e-06, + "loss": 0.3057, + "step": 5065 + }, + { + "epoch": 0.24, + "grad_norm": 0.6622018311798163, + "learning_rate": 4.897651296124753e-06, + "loss": 0.2871, + "step": 5066 + }, + { + "epoch": 0.24, + "grad_norm": 0.6652622238184539, + "learning_rate": 4.897597580075235e-06, + "loss": 0.2868, + "step": 5067 + }, + { + "epoch": 0.24, + "grad_norm": 0.662603781450254, + "learning_rate": 4.897543850228146e-06, + "loss": 0.3032, + "step": 5068 + }, + { + "epoch": 0.24, + "grad_norm": 0.6210686640257527, + "learning_rate": 4.897490106583795e-06, + "loss": 0.2954, + "step": 5069 + }, + { + "epoch": 0.24, + "grad_norm": 0.6486448718651182, + "learning_rate": 4.897436349142491e-06, + "loss": 0.2969, + "step": 5070 + }, + { + "epoch": 0.24, + "grad_norm": 0.640897905210256, + "learning_rate": 4.897382577904544e-06, + "loss": 0.2859, + "step": 5071 + }, + { + "epoch": 0.24, + "grad_norm": 0.7584300601717701, + "learning_rate": 4.897328792870261e-06, + "loss": 0.3152, + "step": 5072 + }, + { + "epoch": 0.24, + "grad_norm": 0.6813967595585443, + "learning_rate": 4.897274994039955e-06, + "loss": 0.3102, + "step": 5073 + }, + { + "epoch": 0.24, + "grad_norm": 0.6242204379919696, + "learning_rate": 4.897221181413933e-06, + "loss": 0.3021, + "step": 5074 + }, + { + "epoch": 0.24, + "grad_norm": 0.6785368830888221, + "learning_rate": 4.897167354992506e-06, + "loss": 0.2909, + "step": 5075 + }, + { + "epoch": 0.24, + "grad_norm": 0.6555137677173287, + "learning_rate": 4.897113514775984e-06, + "loss": 0.3163, + "step": 5076 + }, + { + "epoch": 0.24, + "grad_norm": 0.6570980650289568, + "learning_rate": 4.897059660764675e-06, + "loss": 0.2948, + "step": 5077 + }, + { + "epoch": 0.24, + "grad_norm": 0.6339619680291116, + "learning_rate": 4.897005792958891e-06, + "loss": 0.3048, + "step": 5078 + }, + { + "epoch": 0.24, + "grad_norm": 0.6479833814513009, + "learning_rate": 4.89695191135894e-06, + "loss": 0.289, + "step": 5079 + }, + { + "epoch": 0.24, + "grad_norm": 0.6680960886843692, + "learning_rate": 4.8968980159651345e-06, + "loss": 0.3064, + "step": 5080 + }, + { + "epoch": 0.24, + "grad_norm": 0.6188987829331495, + "learning_rate": 4.896844106777783e-06, + "loss": 0.2893, + "step": 5081 + }, + { + "epoch": 0.24, + "grad_norm": 0.7000668073661758, + "learning_rate": 4.896790183797196e-06, + "loss": 0.3028, + "step": 5082 + }, + { + "epoch": 0.24, + "grad_norm": 0.6830898333725892, + "learning_rate": 4.896736247023684e-06, + "loss": 0.2917, + "step": 5083 + }, + { + "epoch": 0.24, + "grad_norm": 0.6143374874280696, + "learning_rate": 4.896682296457556e-06, + "loss": 0.2957, + "step": 5084 + }, + { + "epoch": 0.24, + "grad_norm": 0.6417410729411349, + "learning_rate": 4.896628332099126e-06, + "loss": 0.287, + "step": 5085 + }, + { + "epoch": 0.24, + "grad_norm": 0.6586577962995818, + "learning_rate": 4.896574353948701e-06, + "loss": 0.3094, + "step": 5086 + }, + { + "epoch": 0.24, + "grad_norm": 0.6972699846971214, + "learning_rate": 4.896520362006593e-06, + "loss": 0.3121, + "step": 5087 + }, + { + "epoch": 0.24, + "grad_norm": 0.6442602917631615, + "learning_rate": 4.896466356273113e-06, + "loss": 0.2852, + "step": 5088 + }, + { + "epoch": 0.24, + "grad_norm": 0.6276814004169056, + "learning_rate": 4.896412336748571e-06, + "loss": 0.3071, + "step": 5089 + }, + { + "epoch": 0.24, + "grad_norm": 0.6663995435918733, + "learning_rate": 4.89635830343328e-06, + "loss": 0.3187, + "step": 5090 + }, + { + "epoch": 0.24, + "grad_norm": 0.6536103027357174, + "learning_rate": 4.896304256327547e-06, + "loss": 0.3133, + "step": 5091 + }, + { + "epoch": 0.24, + "grad_norm": 0.6401392248647356, + "learning_rate": 4.896250195431687e-06, + "loss": 0.3014, + "step": 5092 + }, + { + "epoch": 0.24, + "grad_norm": 0.6797762432694613, + "learning_rate": 4.896196120746008e-06, + "loss": 0.3258, + "step": 5093 + }, + { + "epoch": 0.24, + "grad_norm": 0.6875681118065124, + "learning_rate": 4.896142032270823e-06, + "loss": 0.3162, + "step": 5094 + }, + { + "epoch": 0.24, + "grad_norm": 0.6854812814927574, + "learning_rate": 4.896087930006444e-06, + "loss": 0.3033, + "step": 5095 + }, + { + "epoch": 0.24, + "grad_norm": 0.6953842647435576, + "learning_rate": 4.89603381395318e-06, + "loss": 0.3152, + "step": 5096 + }, + { + "epoch": 0.24, + "grad_norm": 0.705910224727134, + "learning_rate": 4.895979684111343e-06, + "loss": 0.3101, + "step": 5097 + }, + { + "epoch": 0.24, + "grad_norm": 0.6606591755971951, + "learning_rate": 4.895925540481246e-06, + "loss": 0.3038, + "step": 5098 + }, + { + "epoch": 0.24, + "grad_norm": 0.6595134309465683, + "learning_rate": 4.8958713830632e-06, + "loss": 0.3117, + "step": 5099 + }, + { + "epoch": 0.24, + "grad_norm": 0.6389769203013871, + "learning_rate": 4.895817211857516e-06, + "loss": 0.2816, + "step": 5100 + }, + { + "epoch": 0.24, + "grad_norm": 0.64211785302623, + "learning_rate": 4.8957630268645065e-06, + "loss": 0.3115, + "step": 5101 + }, + { + "epoch": 0.24, + "grad_norm": 0.697420598792953, + "learning_rate": 4.895708828084482e-06, + "loss": 0.2881, + "step": 5102 + }, + { + "epoch": 0.24, + "grad_norm": 0.6222271371457464, + "learning_rate": 4.895654615517756e-06, + "loss": 0.2841, + "step": 5103 + }, + { + "epoch": 0.24, + "grad_norm": 0.641484685903683, + "learning_rate": 4.89560038916464e-06, + "loss": 0.3098, + "step": 5104 + }, + { + "epoch": 0.24, + "grad_norm": 0.6554669889172927, + "learning_rate": 4.895546149025445e-06, + "loss": 0.2992, + "step": 5105 + }, + { + "epoch": 0.24, + "grad_norm": 0.6748180724818486, + "learning_rate": 4.895491895100485e-06, + "loss": 0.3018, + "step": 5106 + }, + { + "epoch": 0.24, + "grad_norm": 0.5965964914341664, + "learning_rate": 4.8954376273900705e-06, + "loss": 0.2943, + "step": 5107 + }, + { + "epoch": 0.24, + "grad_norm": 0.6385450806258893, + "learning_rate": 4.895383345894516e-06, + "loss": 0.3019, + "step": 5108 + }, + { + "epoch": 0.24, + "grad_norm": 0.6413599108263112, + "learning_rate": 4.895329050614131e-06, + "loss": 0.2901, + "step": 5109 + }, + { + "epoch": 0.24, + "grad_norm": 0.6444634566107219, + "learning_rate": 4.895274741549229e-06, + "loss": 0.3166, + "step": 5110 + }, + { + "epoch": 0.24, + "grad_norm": 0.6291651549917138, + "learning_rate": 4.895220418700124e-06, + "loss": 0.3029, + "step": 5111 + }, + { + "epoch": 0.24, + "grad_norm": 0.6586349362773792, + "learning_rate": 4.895166082067126e-06, + "loss": 0.3166, + "step": 5112 + }, + { + "epoch": 0.24, + "grad_norm": 0.6904606813745607, + "learning_rate": 4.895111731650551e-06, + "loss": 0.3237, + "step": 5113 + }, + { + "epoch": 0.24, + "grad_norm": 0.7036319932909176, + "learning_rate": 4.895057367450709e-06, + "loss": 0.3259, + "step": 5114 + }, + { + "epoch": 0.24, + "grad_norm": 0.728216992328819, + "learning_rate": 4.895002989467914e-06, + "loss": 0.3034, + "step": 5115 + }, + { + "epoch": 0.24, + "grad_norm": 0.6167570258375489, + "learning_rate": 4.8949485977024795e-06, + "loss": 0.2958, + "step": 5116 + }, + { + "epoch": 0.24, + "grad_norm": 0.681800630928544, + "learning_rate": 4.894894192154717e-06, + "loss": 0.3007, + "step": 5117 + }, + { + "epoch": 0.24, + "grad_norm": 0.5745245752153533, + "learning_rate": 4.89483977282494e-06, + "loss": 0.2876, + "step": 5118 + }, + { + "epoch": 0.24, + "grad_norm": 0.6402130086786343, + "learning_rate": 4.894785339713462e-06, + "loss": 0.3212, + "step": 5119 + }, + { + "epoch": 0.24, + "grad_norm": 0.6152997286819488, + "learning_rate": 4.894730892820598e-06, + "loss": 0.3152, + "step": 5120 + }, + { + "epoch": 0.24, + "grad_norm": 0.6240856471736022, + "learning_rate": 4.894676432146658e-06, + "loss": 0.284, + "step": 5121 + }, + { + "epoch": 0.24, + "grad_norm": 0.6073781365456631, + "learning_rate": 4.894621957691957e-06, + "loss": 0.3002, + "step": 5122 + }, + { + "epoch": 0.24, + "grad_norm": 0.6403564755760239, + "learning_rate": 4.894567469456808e-06, + "loss": 0.3194, + "step": 5123 + }, + { + "epoch": 0.24, + "grad_norm": 0.6198252121375961, + "learning_rate": 4.8945129674415265e-06, + "loss": 0.2938, + "step": 5124 + }, + { + "epoch": 0.24, + "grad_norm": 0.6414121668757555, + "learning_rate": 4.894458451646425e-06, + "loss": 0.3068, + "step": 5125 + }, + { + "epoch": 0.24, + "grad_norm": 0.6453894347971341, + "learning_rate": 4.894403922071815e-06, + "loss": 0.3145, + "step": 5126 + }, + { + "epoch": 0.24, + "grad_norm": 0.6161989806369667, + "learning_rate": 4.894349378718014e-06, + "loss": 0.2801, + "step": 5127 + }, + { + "epoch": 0.24, + "grad_norm": 0.6664894846642954, + "learning_rate": 4.894294821585332e-06, + "loss": 0.3057, + "step": 5128 + }, + { + "epoch": 0.24, + "grad_norm": 0.6130143585968736, + "learning_rate": 4.894240250674087e-06, + "loss": 0.2986, + "step": 5129 + }, + { + "epoch": 0.24, + "grad_norm": 0.647745629566583, + "learning_rate": 4.894185665984591e-06, + "loss": 0.2836, + "step": 5130 + }, + { + "epoch": 0.24, + "grad_norm": 0.6112524840342127, + "learning_rate": 4.894131067517158e-06, + "loss": 0.3064, + "step": 5131 + }, + { + "epoch": 0.24, + "grad_norm": 0.6711784442228633, + "learning_rate": 4.894076455272102e-06, + "loss": 0.3073, + "step": 5132 + }, + { + "epoch": 0.24, + "grad_norm": 0.6118411996228785, + "learning_rate": 4.894021829249738e-06, + "loss": 0.3083, + "step": 5133 + }, + { + "epoch": 0.24, + "grad_norm": 0.6147087193432235, + "learning_rate": 4.8939671894503805e-06, + "loss": 0.2786, + "step": 5134 + }, + { + "epoch": 0.24, + "grad_norm": 0.5827966224275695, + "learning_rate": 4.893912535874343e-06, + "loss": 0.2841, + "step": 5135 + }, + { + "epoch": 0.24, + "grad_norm": 0.5950949299634859, + "learning_rate": 4.893857868521941e-06, + "loss": 0.2804, + "step": 5136 + }, + { + "epoch": 0.24, + "grad_norm": 0.6186959003163575, + "learning_rate": 4.893803187393488e-06, + "loss": 0.2951, + "step": 5137 + }, + { + "epoch": 0.24, + "grad_norm": 0.6426675747399225, + "learning_rate": 4.8937484924893e-06, + "loss": 0.3331, + "step": 5138 + }, + { + "epoch": 0.24, + "grad_norm": 0.6436393907130512, + "learning_rate": 4.89369378380969e-06, + "loss": 0.2947, + "step": 5139 + }, + { + "epoch": 0.24, + "grad_norm": 0.5945389920347681, + "learning_rate": 4.893639061354975e-06, + "loss": 0.2961, + "step": 5140 + }, + { + "epoch": 0.24, + "grad_norm": 0.6434502227240558, + "learning_rate": 4.893584325125468e-06, + "loss": 0.313, + "step": 5141 + }, + { + "epoch": 0.24, + "grad_norm": 0.6162757005088488, + "learning_rate": 4.893529575121486e-06, + "loss": 0.2886, + "step": 5142 + }, + { + "epoch": 0.24, + "grad_norm": 0.6231214547095865, + "learning_rate": 4.8934748113433414e-06, + "loss": 0.2935, + "step": 5143 + }, + { + "epoch": 0.24, + "grad_norm": 0.6229151735926917, + "learning_rate": 4.893420033791352e-06, + "loss": 0.3006, + "step": 5144 + }, + { + "epoch": 0.24, + "grad_norm": 0.6958062784679158, + "learning_rate": 4.893365242465832e-06, + "loss": 0.3084, + "step": 5145 + }, + { + "epoch": 0.24, + "grad_norm": 0.6508933110939545, + "learning_rate": 4.8933104373670955e-06, + "loss": 0.3112, + "step": 5146 + }, + { + "epoch": 0.24, + "grad_norm": 0.6090311322164359, + "learning_rate": 4.893255618495459e-06, + "loss": 0.2878, + "step": 5147 + }, + { + "epoch": 0.24, + "grad_norm": 0.6086594655293158, + "learning_rate": 4.893200785851239e-06, + "loss": 0.3037, + "step": 5148 + }, + { + "epoch": 0.24, + "grad_norm": 0.6158079345780221, + "learning_rate": 4.8931459394347495e-06, + "loss": 0.2955, + "step": 5149 + }, + { + "epoch": 0.24, + "grad_norm": 0.6749731783040199, + "learning_rate": 4.893091079246306e-06, + "loss": 0.3023, + "step": 5150 + }, + { + "epoch": 0.24, + "grad_norm": 0.6166394134748452, + "learning_rate": 4.8930362052862255e-06, + "loss": 0.2841, + "step": 5151 + }, + { + "epoch": 0.24, + "grad_norm": 0.6655417054818155, + "learning_rate": 4.892981317554824e-06, + "loss": 0.3037, + "step": 5152 + }, + { + "epoch": 0.24, + "grad_norm": 0.6383945343518598, + "learning_rate": 4.892926416052415e-06, + "loss": 0.3031, + "step": 5153 + }, + { + "epoch": 0.24, + "grad_norm": 0.6184040929894001, + "learning_rate": 4.892871500779316e-06, + "loss": 0.2943, + "step": 5154 + }, + { + "epoch": 0.24, + "grad_norm": 0.6198516708048311, + "learning_rate": 4.892816571735843e-06, + "loss": 0.2957, + "step": 5155 + }, + { + "epoch": 0.24, + "grad_norm": 0.6338569360051909, + "learning_rate": 4.892761628922313e-06, + "loss": 0.2957, + "step": 5156 + }, + { + "epoch": 0.24, + "grad_norm": 0.6348738356990268, + "learning_rate": 4.8927066723390404e-06, + "loss": 0.301, + "step": 5157 + }, + { + "epoch": 0.24, + "grad_norm": 0.5875314179739225, + "learning_rate": 4.8926517019863425e-06, + "loss": 0.2839, + "step": 5158 + }, + { + "epoch": 0.24, + "grad_norm": 0.6452005341122407, + "learning_rate": 4.892596717864535e-06, + "loss": 0.3249, + "step": 5159 + }, + { + "epoch": 0.24, + "grad_norm": 0.6825236817305996, + "learning_rate": 4.892541719973936e-06, + "loss": 0.3154, + "step": 5160 + }, + { + "epoch": 0.24, + "grad_norm": 0.6492412226822253, + "learning_rate": 4.89248670831486e-06, + "loss": 0.3072, + "step": 5161 + }, + { + "epoch": 0.24, + "grad_norm": 0.6272659917703546, + "learning_rate": 4.892431682887623e-06, + "loss": 0.3107, + "step": 5162 + }, + { + "epoch": 0.24, + "grad_norm": 0.6718179709256632, + "learning_rate": 4.892376643692544e-06, + "loss": 0.315, + "step": 5163 + }, + { + "epoch": 0.24, + "grad_norm": 0.6149836751939087, + "learning_rate": 4.8923215907299394e-06, + "loss": 0.2912, + "step": 5164 + }, + { + "epoch": 0.24, + "grad_norm": 0.5840121639603745, + "learning_rate": 4.892266524000125e-06, + "loss": 0.2981, + "step": 5165 + }, + { + "epoch": 0.24, + "grad_norm": 0.6874572265965615, + "learning_rate": 4.8922114435034176e-06, + "loss": 0.314, + "step": 5166 + }, + { + "epoch": 0.24, + "grad_norm": 0.6434328509510293, + "learning_rate": 4.892156349240135e-06, + "loss": 0.2881, + "step": 5167 + }, + { + "epoch": 0.24, + "grad_norm": 0.6513267615638119, + "learning_rate": 4.892101241210594e-06, + "loss": 0.2834, + "step": 5168 + }, + { + "epoch": 0.24, + "grad_norm": 0.6701606953433286, + "learning_rate": 4.892046119415111e-06, + "loss": 0.3002, + "step": 5169 + }, + { + "epoch": 0.24, + "grad_norm": 0.6050093867821545, + "learning_rate": 4.891990983854004e-06, + "loss": 0.2866, + "step": 5170 + }, + { + "epoch": 0.24, + "grad_norm": 0.643949809900528, + "learning_rate": 4.8919358345275904e-06, + "loss": 0.3082, + "step": 5171 + }, + { + "epoch": 0.24, + "grad_norm": 0.5662862388569373, + "learning_rate": 4.891880671436187e-06, + "loss": 0.2839, + "step": 5172 + }, + { + "epoch": 0.24, + "grad_norm": 0.6169224021716748, + "learning_rate": 4.8918254945801115e-06, + "loss": 0.2891, + "step": 5173 + }, + { + "epoch": 0.24, + "grad_norm": 0.6837994786713489, + "learning_rate": 4.891770303959681e-06, + "loss": 0.3019, + "step": 5174 + }, + { + "epoch": 0.24, + "grad_norm": 0.5736452495853722, + "learning_rate": 4.891715099575215e-06, + "loss": 0.265, + "step": 5175 + }, + { + "epoch": 0.24, + "grad_norm": 0.620283345887727, + "learning_rate": 4.891659881427028e-06, + "loss": 0.2872, + "step": 5176 + }, + { + "epoch": 0.24, + "grad_norm": 0.6597608762003112, + "learning_rate": 4.891604649515441e-06, + "loss": 0.3035, + "step": 5177 + }, + { + "epoch": 0.24, + "grad_norm": 0.635700588354394, + "learning_rate": 4.891549403840769e-06, + "loss": 0.2781, + "step": 5178 + }, + { + "epoch": 0.24, + "grad_norm": 0.5958686572091743, + "learning_rate": 4.891494144403333e-06, + "loss": 0.2858, + "step": 5179 + }, + { + "epoch": 0.24, + "grad_norm": 0.623559168860812, + "learning_rate": 4.8914388712034475e-06, + "loss": 0.2926, + "step": 5180 + }, + { + "epoch": 0.24, + "grad_norm": 0.7036834816619151, + "learning_rate": 4.891383584241433e-06, + "loss": 0.3208, + "step": 5181 + }, + { + "epoch": 0.24, + "grad_norm": 0.6577748096273982, + "learning_rate": 4.891328283517607e-06, + "loss": 0.2919, + "step": 5182 + }, + { + "epoch": 0.24, + "grad_norm": 0.6686081430065548, + "learning_rate": 4.891272969032288e-06, + "loss": 0.3062, + "step": 5183 + }, + { + "epoch": 0.24, + "grad_norm": 0.6789512008074047, + "learning_rate": 4.891217640785794e-06, + "loss": 0.3339, + "step": 5184 + }, + { + "epoch": 0.24, + "grad_norm": 0.6174946193730139, + "learning_rate": 4.891162298778444e-06, + "loss": 0.2927, + "step": 5185 + }, + { + "epoch": 0.24, + "grad_norm": 0.6647465844595235, + "learning_rate": 4.891106943010555e-06, + "loss": 0.3056, + "step": 5186 + }, + { + "epoch": 0.24, + "grad_norm": 0.6274109789726052, + "learning_rate": 4.891051573482446e-06, + "loss": 0.2834, + "step": 5187 + }, + { + "epoch": 0.24, + "grad_norm": 0.6359566988437559, + "learning_rate": 4.8909961901944375e-06, + "loss": 0.2928, + "step": 5188 + }, + { + "epoch": 0.24, + "grad_norm": 0.5975628766504468, + "learning_rate": 4.890940793146847e-06, + "loss": 0.3053, + "step": 5189 + }, + { + "epoch": 0.24, + "grad_norm": 0.6249447777471269, + "learning_rate": 4.890885382339992e-06, + "loss": 0.272, + "step": 5190 + }, + { + "epoch": 0.24, + "grad_norm": 0.6573086309737979, + "learning_rate": 4.890829957774193e-06, + "loss": 0.324, + "step": 5191 + }, + { + "epoch": 0.24, + "grad_norm": 0.6859583697943874, + "learning_rate": 4.890774519449769e-06, + "loss": 0.3124, + "step": 5192 + }, + { + "epoch": 0.24, + "grad_norm": 0.65674448319712, + "learning_rate": 4.890719067367038e-06, + "loss": 0.3157, + "step": 5193 + }, + { + "epoch": 0.24, + "grad_norm": 0.6076501074392219, + "learning_rate": 4.89066360152632e-06, + "loss": 0.2954, + "step": 5194 + }, + { + "epoch": 0.24, + "grad_norm": 0.6159284477790362, + "learning_rate": 4.890608121927934e-06, + "loss": 0.2894, + "step": 5195 + }, + { + "epoch": 0.24, + "grad_norm": 0.6858716533885446, + "learning_rate": 4.890552628572199e-06, + "loss": 0.3306, + "step": 5196 + }, + { + "epoch": 0.24, + "grad_norm": 0.6981071652972534, + "learning_rate": 4.890497121459434e-06, + "loss": 0.299, + "step": 5197 + }, + { + "epoch": 0.24, + "grad_norm": 0.6612112874530105, + "learning_rate": 4.890441600589959e-06, + "loss": 0.2918, + "step": 5198 + }, + { + "epoch": 0.24, + "grad_norm": 0.6274839715689987, + "learning_rate": 4.890386065964094e-06, + "loss": 0.2925, + "step": 5199 + }, + { + "epoch": 0.24, + "grad_norm": 0.6285452747146265, + "learning_rate": 4.890330517582157e-06, + "loss": 0.2991, + "step": 5200 + }, + { + "epoch": 0.24, + "grad_norm": 0.6276451686951531, + "learning_rate": 4.89027495544447e-06, + "loss": 0.2643, + "step": 5201 + }, + { + "epoch": 0.24, + "grad_norm": 0.6016138360047912, + "learning_rate": 4.89021937955135e-06, + "loss": 0.3017, + "step": 5202 + }, + { + "epoch": 0.24, + "grad_norm": 0.6412917803010806, + "learning_rate": 4.890163789903119e-06, + "loss": 0.3153, + "step": 5203 + }, + { + "epoch": 0.24, + "grad_norm": 0.6350377171921265, + "learning_rate": 4.8901081865000965e-06, + "loss": 0.3067, + "step": 5204 + }, + { + "epoch": 0.24, + "grad_norm": 0.6503203962168622, + "learning_rate": 4.890052569342601e-06, + "loss": 0.3167, + "step": 5205 + }, + { + "epoch": 0.24, + "grad_norm": 0.6337113407239329, + "learning_rate": 4.889996938430955e-06, + "loss": 0.2999, + "step": 5206 + }, + { + "epoch": 0.24, + "grad_norm": 0.6108623088253532, + "learning_rate": 4.8899412937654765e-06, + "loss": 0.3122, + "step": 5207 + }, + { + "epoch": 0.24, + "grad_norm": 0.6545560796700736, + "learning_rate": 4.8898856353464865e-06, + "loss": 0.2874, + "step": 5208 + }, + { + "epoch": 0.24, + "grad_norm": 0.6151821444131818, + "learning_rate": 4.8898299631743055e-06, + "loss": 0.3156, + "step": 5209 + }, + { + "epoch": 0.24, + "grad_norm": 0.6704974877788645, + "learning_rate": 4.889774277249254e-06, + "loss": 0.3125, + "step": 5210 + }, + { + "epoch": 0.24, + "grad_norm": 0.6262538174531066, + "learning_rate": 4.8897185775716514e-06, + "loss": 0.3152, + "step": 5211 + }, + { + "epoch": 0.24, + "grad_norm": 0.5841833507179075, + "learning_rate": 4.8896628641418195e-06, + "loss": 0.2856, + "step": 5212 + }, + { + "epoch": 0.24, + "grad_norm": 0.6187609480734129, + "learning_rate": 4.889607136960079e-06, + "loss": 0.2994, + "step": 5213 + }, + { + "epoch": 0.24, + "grad_norm": 0.669415339517152, + "learning_rate": 4.889551396026749e-06, + "loss": 0.2954, + "step": 5214 + }, + { + "epoch": 0.24, + "grad_norm": 0.6473317685627805, + "learning_rate": 4.889495641342152e-06, + "loss": 0.3173, + "step": 5215 + }, + { + "epoch": 0.24, + "grad_norm": 0.6381391843445409, + "learning_rate": 4.889439872906608e-06, + "loss": 0.3241, + "step": 5216 + }, + { + "epoch": 0.24, + "grad_norm": 0.6239271153187887, + "learning_rate": 4.889384090720438e-06, + "loss": 0.2891, + "step": 5217 + }, + { + "epoch": 0.24, + "grad_norm": 0.6769334339098761, + "learning_rate": 4.889328294783964e-06, + "loss": 0.3067, + "step": 5218 + }, + { + "epoch": 0.24, + "grad_norm": 0.6181728119556958, + "learning_rate": 4.8892724850975045e-06, + "loss": 0.3098, + "step": 5219 + }, + { + "epoch": 0.24, + "grad_norm": 0.6602706240393948, + "learning_rate": 4.8892166616613836e-06, + "loss": 0.3135, + "step": 5220 + }, + { + "epoch": 0.24, + "grad_norm": 0.6386969755551418, + "learning_rate": 4.889160824475921e-06, + "loss": 0.2843, + "step": 5221 + }, + { + "epoch": 0.24, + "grad_norm": 0.6598875842131721, + "learning_rate": 4.889104973541438e-06, + "loss": 0.2869, + "step": 5222 + }, + { + "epoch": 0.24, + "grad_norm": 0.6992082215850818, + "learning_rate": 4.889049108858257e-06, + "loss": 0.3178, + "step": 5223 + }, + { + "epoch": 0.24, + "grad_norm": 0.703927427371972, + "learning_rate": 4.888993230426698e-06, + "loss": 0.3003, + "step": 5224 + }, + { + "epoch": 0.24, + "grad_norm": 0.6334654281696623, + "learning_rate": 4.888937338247084e-06, + "loss": 0.2725, + "step": 5225 + }, + { + "epoch": 0.24, + "grad_norm": 0.6065220222016624, + "learning_rate": 4.888881432319737e-06, + "loss": 0.2801, + "step": 5226 + }, + { + "epoch": 0.24, + "grad_norm": 0.6039612118231689, + "learning_rate": 4.8888255126449765e-06, + "loss": 0.2953, + "step": 5227 + }, + { + "epoch": 0.24, + "grad_norm": 0.5675205500148867, + "learning_rate": 4.888769579223126e-06, + "loss": 0.2776, + "step": 5228 + }, + { + "epoch": 0.24, + "grad_norm": 0.6687707211770961, + "learning_rate": 4.888713632054506e-06, + "loss": 0.2938, + "step": 5229 + }, + { + "epoch": 0.24, + "grad_norm": 0.6412091521694604, + "learning_rate": 4.888657671139441e-06, + "loss": 0.2985, + "step": 5230 + }, + { + "epoch": 0.25, + "grad_norm": 0.5875540145944776, + "learning_rate": 4.88860169647825e-06, + "loss": 0.2962, + "step": 5231 + }, + { + "epoch": 0.25, + "grad_norm": 0.6295363426830813, + "learning_rate": 4.8885457080712576e-06, + "loss": 0.3125, + "step": 5232 + }, + { + "epoch": 0.25, + "grad_norm": 0.6251185574448718, + "learning_rate": 4.888489705918784e-06, + "loss": 0.3142, + "step": 5233 + }, + { + "epoch": 0.25, + "grad_norm": 0.6051264958411696, + "learning_rate": 4.8884336900211535e-06, + "loss": 0.2764, + "step": 5234 + }, + { + "epoch": 0.25, + "grad_norm": 0.6549589022979034, + "learning_rate": 4.888377660378688e-06, + "loss": 0.3006, + "step": 5235 + }, + { + "epoch": 0.25, + "grad_norm": 0.6509933571388234, + "learning_rate": 4.888321616991708e-06, + "loss": 0.3273, + "step": 5236 + }, + { + "epoch": 0.25, + "grad_norm": 0.6199960632884824, + "learning_rate": 4.888265559860538e-06, + "loss": 0.3022, + "step": 5237 + }, + { + "epoch": 0.25, + "grad_norm": 0.6318904625588456, + "learning_rate": 4.888209488985499e-06, + "loss": 0.3235, + "step": 5238 + }, + { + "epoch": 0.25, + "grad_norm": 0.613043601148947, + "learning_rate": 4.888153404366916e-06, + "loss": 0.3057, + "step": 5239 + }, + { + "epoch": 0.25, + "grad_norm": 0.6285381997512166, + "learning_rate": 4.888097306005109e-06, + "loss": 0.3008, + "step": 5240 + }, + { + "epoch": 0.25, + "grad_norm": 0.6429958539000407, + "learning_rate": 4.888041193900404e-06, + "loss": 0.2898, + "step": 5241 + }, + { + "epoch": 0.25, + "grad_norm": 0.6875889901826256, + "learning_rate": 4.887985068053121e-06, + "loss": 0.303, + "step": 5242 + }, + { + "epoch": 0.25, + "grad_norm": 0.6502945532718232, + "learning_rate": 4.887928928463585e-06, + "loss": 0.3116, + "step": 5243 + }, + { + "epoch": 0.25, + "grad_norm": 0.6256617347326331, + "learning_rate": 4.887872775132117e-06, + "loss": 0.3021, + "step": 5244 + }, + { + "epoch": 0.25, + "grad_norm": 0.6242550065365551, + "learning_rate": 4.887816608059042e-06, + "loss": 0.2945, + "step": 5245 + }, + { + "epoch": 0.25, + "grad_norm": 0.6068100260562845, + "learning_rate": 4.887760427244682e-06, + "loss": 0.2861, + "step": 5246 + }, + { + "epoch": 0.25, + "grad_norm": 0.6486251738807932, + "learning_rate": 4.887704232689362e-06, + "loss": 0.3002, + "step": 5247 + }, + { + "epoch": 0.25, + "grad_norm": 0.6833190584983988, + "learning_rate": 4.887648024393403e-06, + "loss": 0.3042, + "step": 5248 + }, + { + "epoch": 0.25, + "grad_norm": 0.6579209693809229, + "learning_rate": 4.88759180235713e-06, + "loss": 0.3058, + "step": 5249 + }, + { + "epoch": 0.25, + "grad_norm": 0.7026482161759655, + "learning_rate": 4.887535566580867e-06, + "loss": 0.3354, + "step": 5250 + }, + { + "epoch": 0.25, + "grad_norm": 0.6131119530105485, + "learning_rate": 4.887479317064937e-06, + "loss": 0.301, + "step": 5251 + }, + { + "epoch": 0.25, + "grad_norm": 0.5989683241676299, + "learning_rate": 4.887423053809663e-06, + "loss": 0.2997, + "step": 5252 + }, + { + "epoch": 0.25, + "grad_norm": 0.6599198292357555, + "learning_rate": 4.887366776815369e-06, + "loss": 0.3089, + "step": 5253 + }, + { + "epoch": 0.25, + "grad_norm": 0.6393456101852203, + "learning_rate": 4.88731048608238e-06, + "loss": 0.301, + "step": 5254 + }, + { + "epoch": 0.25, + "grad_norm": 0.6580373191848946, + "learning_rate": 4.8872541816110186e-06, + "loss": 0.2845, + "step": 5255 + }, + { + "epoch": 0.25, + "grad_norm": 0.6582772892483606, + "learning_rate": 4.8871978634016105e-06, + "loss": 0.3075, + "step": 5256 + }, + { + "epoch": 0.25, + "grad_norm": 0.6091510259404116, + "learning_rate": 4.887141531454478e-06, + "loss": 0.3083, + "step": 5257 + }, + { + "epoch": 0.25, + "grad_norm": 0.6309187943417661, + "learning_rate": 4.8870851857699455e-06, + "loss": 0.31, + "step": 5258 + }, + { + "epoch": 0.25, + "grad_norm": 0.6166836675348699, + "learning_rate": 4.887028826348338e-06, + "loss": 0.3002, + "step": 5259 + }, + { + "epoch": 0.25, + "grad_norm": 0.678738299044307, + "learning_rate": 4.88697245318998e-06, + "loss": 0.3107, + "step": 5260 + }, + { + "epoch": 0.25, + "grad_norm": 0.6940895156829381, + "learning_rate": 4.886916066295195e-06, + "loss": 0.315, + "step": 5261 + }, + { + "epoch": 0.25, + "grad_norm": 0.6483045202605507, + "learning_rate": 4.886859665664308e-06, + "loss": 0.3066, + "step": 5262 + }, + { + "epoch": 0.25, + "grad_norm": 0.6796791561243735, + "learning_rate": 4.8868032512976436e-06, + "loss": 0.3152, + "step": 5263 + }, + { + "epoch": 0.25, + "grad_norm": 0.6157331332165094, + "learning_rate": 4.886746823195526e-06, + "loss": 0.3065, + "step": 5264 + }, + { + "epoch": 0.25, + "grad_norm": 0.5996085420748275, + "learning_rate": 4.886690381358281e-06, + "loss": 0.2906, + "step": 5265 + }, + { + "epoch": 0.25, + "grad_norm": 0.6685346938785186, + "learning_rate": 4.886633925786233e-06, + "loss": 0.3113, + "step": 5266 + }, + { + "epoch": 0.25, + "grad_norm": 0.6441215452641088, + "learning_rate": 4.8865774564797056e-06, + "loss": 0.287, + "step": 5267 + }, + { + "epoch": 0.25, + "grad_norm": 0.655750808945052, + "learning_rate": 4.886520973439026e-06, + "loss": 0.3081, + "step": 5268 + }, + { + "epoch": 0.25, + "grad_norm": 0.6456927645863532, + "learning_rate": 4.886464476664517e-06, + "loss": 0.3052, + "step": 5269 + }, + { + "epoch": 0.25, + "grad_norm": 0.6064782565940878, + "learning_rate": 4.886407966156505e-06, + "loss": 0.2922, + "step": 5270 + }, + { + "epoch": 0.25, + "grad_norm": 0.6056188836621018, + "learning_rate": 4.886351441915315e-06, + "loss": 0.2784, + "step": 5271 + }, + { + "epoch": 0.25, + "grad_norm": 0.6698074036966454, + "learning_rate": 4.886294903941272e-06, + "loss": 0.3313, + "step": 5272 + }, + { + "epoch": 0.25, + "grad_norm": 0.6221551738880189, + "learning_rate": 4.886238352234702e-06, + "loss": 0.2793, + "step": 5273 + }, + { + "epoch": 0.25, + "grad_norm": 0.6312730802322105, + "learning_rate": 4.88618178679593e-06, + "loss": 0.2905, + "step": 5274 + }, + { + "epoch": 0.25, + "grad_norm": 0.6351562010324319, + "learning_rate": 4.886125207625282e-06, + "loss": 0.3113, + "step": 5275 + }, + { + "epoch": 0.25, + "grad_norm": 0.5818939872478521, + "learning_rate": 4.886068614723082e-06, + "loss": 0.287, + "step": 5276 + }, + { + "epoch": 0.25, + "grad_norm": 0.6011985683382307, + "learning_rate": 4.886012008089657e-06, + "loss": 0.2734, + "step": 5277 + }, + { + "epoch": 0.25, + "grad_norm": 0.65688309568886, + "learning_rate": 4.885955387725333e-06, + "loss": 0.3026, + "step": 5278 + }, + { + "epoch": 0.25, + "grad_norm": 0.6671735551105278, + "learning_rate": 4.885898753630436e-06, + "loss": 0.3098, + "step": 5279 + }, + { + "epoch": 0.25, + "grad_norm": 0.6425018045054312, + "learning_rate": 4.885842105805291e-06, + "loss": 0.2867, + "step": 5280 + }, + { + "epoch": 0.25, + "grad_norm": 0.6429122844665603, + "learning_rate": 4.885785444250224e-06, + "loss": 0.3075, + "step": 5281 + }, + { + "epoch": 0.25, + "grad_norm": 0.636492683046552, + "learning_rate": 4.885728768965562e-06, + "loss": 0.294, + "step": 5282 + }, + { + "epoch": 0.25, + "grad_norm": 0.6109221330102181, + "learning_rate": 4.88567207995163e-06, + "loss": 0.3122, + "step": 5283 + }, + { + "epoch": 0.25, + "grad_norm": 0.6770918338385654, + "learning_rate": 4.885615377208754e-06, + "loss": 0.296, + "step": 5284 + }, + { + "epoch": 0.25, + "grad_norm": 0.6467448938621218, + "learning_rate": 4.885558660737263e-06, + "loss": 0.2948, + "step": 5285 + }, + { + "epoch": 0.25, + "grad_norm": 0.6073956466814938, + "learning_rate": 4.885501930537481e-06, + "loss": 0.2756, + "step": 5286 + }, + { + "epoch": 0.25, + "grad_norm": 0.6135494266081651, + "learning_rate": 4.885445186609734e-06, + "loss": 0.2975, + "step": 5287 + }, + { + "epoch": 0.25, + "grad_norm": 0.5814888309897918, + "learning_rate": 4.885388428954349e-06, + "loss": 0.2767, + "step": 5288 + }, + { + "epoch": 0.25, + "grad_norm": 0.6447873947824557, + "learning_rate": 4.8853316575716545e-06, + "loss": 0.3015, + "step": 5289 + }, + { + "epoch": 0.25, + "grad_norm": 0.6479450247731127, + "learning_rate": 4.885274872461976e-06, + "loss": 0.3215, + "step": 5290 + }, + { + "epoch": 0.25, + "grad_norm": 0.6147573645172083, + "learning_rate": 4.88521807362564e-06, + "loss": 0.3013, + "step": 5291 + }, + { + "epoch": 0.25, + "grad_norm": 0.6805380435330799, + "learning_rate": 4.885161261062973e-06, + "loss": 0.3052, + "step": 5292 + }, + { + "epoch": 0.25, + "grad_norm": 0.6160523758056062, + "learning_rate": 4.885104434774302e-06, + "loss": 0.291, + "step": 5293 + }, + { + "epoch": 0.25, + "grad_norm": 0.6706590985480252, + "learning_rate": 4.885047594759955e-06, + "loss": 0.3023, + "step": 5294 + }, + { + "epoch": 0.25, + "grad_norm": 0.6262465999275699, + "learning_rate": 4.884990741020259e-06, + "loss": 0.2967, + "step": 5295 + }, + { + "epoch": 0.25, + "grad_norm": 0.6145977350166221, + "learning_rate": 4.88493387355554e-06, + "loss": 0.2829, + "step": 5296 + }, + { + "epoch": 0.25, + "grad_norm": 0.6785701322145553, + "learning_rate": 4.884876992366127e-06, + "loss": 0.3018, + "step": 5297 + }, + { + "epoch": 0.25, + "grad_norm": 0.636353476588475, + "learning_rate": 4.884820097452345e-06, + "loss": 0.2805, + "step": 5298 + }, + { + "epoch": 0.25, + "grad_norm": 0.6191327684860397, + "learning_rate": 4.884763188814523e-06, + "loss": 0.2836, + "step": 5299 + }, + { + "epoch": 0.25, + "grad_norm": 0.6115692107214359, + "learning_rate": 4.884706266452989e-06, + "loss": 0.2737, + "step": 5300 + }, + { + "epoch": 0.25, + "grad_norm": 0.6714558854724313, + "learning_rate": 4.8846493303680695e-06, + "loss": 0.3288, + "step": 5301 + }, + { + "epoch": 0.25, + "grad_norm": 0.637147308574395, + "learning_rate": 4.884592380560093e-06, + "loss": 0.3164, + "step": 5302 + }, + { + "epoch": 0.25, + "grad_norm": 0.6191618170807413, + "learning_rate": 4.884535417029385e-06, + "loss": 0.288, + "step": 5303 + }, + { + "epoch": 0.25, + "grad_norm": 0.6230905976517038, + "learning_rate": 4.884478439776276e-06, + "loss": 0.2869, + "step": 5304 + }, + { + "epoch": 0.25, + "grad_norm": 0.6580709126462738, + "learning_rate": 4.884421448801093e-06, + "loss": 0.2969, + "step": 5305 + }, + { + "epoch": 0.25, + "grad_norm": 0.602372907924338, + "learning_rate": 4.884364444104163e-06, + "loss": 0.2803, + "step": 5306 + }, + { + "epoch": 0.25, + "grad_norm": 0.6266681906448633, + "learning_rate": 4.884307425685817e-06, + "loss": 0.2995, + "step": 5307 + }, + { + "epoch": 0.25, + "grad_norm": 0.6646680584561576, + "learning_rate": 4.884250393546379e-06, + "loss": 0.294, + "step": 5308 + }, + { + "epoch": 0.25, + "grad_norm": 0.5956256169655395, + "learning_rate": 4.88419334768618e-06, + "loss": 0.2961, + "step": 5309 + }, + { + "epoch": 0.25, + "grad_norm": 0.6176442644846631, + "learning_rate": 4.884136288105548e-06, + "loss": 0.3045, + "step": 5310 + }, + { + "epoch": 0.25, + "grad_norm": 0.6573641654618249, + "learning_rate": 4.884079214804811e-06, + "loss": 0.2747, + "step": 5311 + }, + { + "epoch": 0.25, + "grad_norm": 0.6656696442490575, + "learning_rate": 4.884022127784296e-06, + "loss": 0.3216, + "step": 5312 + }, + { + "epoch": 0.25, + "grad_norm": 0.6415780250285189, + "learning_rate": 4.883965027044334e-06, + "loss": 0.2919, + "step": 5313 + }, + { + "epoch": 0.25, + "grad_norm": 0.5947516911952767, + "learning_rate": 4.883907912585252e-06, + "loss": 0.2888, + "step": 5314 + }, + { + "epoch": 0.25, + "grad_norm": 0.6316358912850435, + "learning_rate": 4.883850784407379e-06, + "loss": 0.2994, + "step": 5315 + }, + { + "epoch": 0.25, + "grad_norm": 0.6402680620396806, + "learning_rate": 4.883793642511045e-06, + "loss": 0.304, + "step": 5316 + }, + { + "epoch": 0.25, + "grad_norm": 0.6119352669147557, + "learning_rate": 4.883736486896576e-06, + "loss": 0.278, + "step": 5317 + }, + { + "epoch": 0.25, + "grad_norm": 0.6487446281662871, + "learning_rate": 4.883679317564304e-06, + "loss": 0.3248, + "step": 5318 + }, + { + "epoch": 0.25, + "grad_norm": 0.6208988965524863, + "learning_rate": 4.883622134514556e-06, + "loss": 0.2824, + "step": 5319 + }, + { + "epoch": 0.25, + "grad_norm": 0.6581250769222275, + "learning_rate": 4.883564937747661e-06, + "loss": 0.3313, + "step": 5320 + }, + { + "epoch": 0.25, + "grad_norm": 0.6764493754992806, + "learning_rate": 4.8835077272639495e-06, + "loss": 0.3101, + "step": 5321 + }, + { + "epoch": 0.25, + "grad_norm": 0.6190533055414508, + "learning_rate": 4.883450503063751e-06, + "loss": 0.296, + "step": 5322 + }, + { + "epoch": 0.25, + "grad_norm": 0.6336743072776989, + "learning_rate": 4.883393265147393e-06, + "loss": 0.2933, + "step": 5323 + }, + { + "epoch": 0.25, + "grad_norm": 0.6251373323528577, + "learning_rate": 4.8833360135152065e-06, + "loss": 0.2996, + "step": 5324 + }, + { + "epoch": 0.25, + "grad_norm": 0.656540867914268, + "learning_rate": 4.883278748167519e-06, + "loss": 0.3027, + "step": 5325 + }, + { + "epoch": 0.25, + "grad_norm": 0.6155764457373823, + "learning_rate": 4.883221469104663e-06, + "loss": 0.2951, + "step": 5326 + }, + { + "epoch": 0.25, + "grad_norm": 0.6335328030193575, + "learning_rate": 4.8831641763269655e-06, + "loss": 0.2899, + "step": 5327 + }, + { + "epoch": 0.25, + "grad_norm": 0.6964914385543854, + "learning_rate": 4.883106869834757e-06, + "loss": 0.2991, + "step": 5328 + }, + { + "epoch": 0.25, + "grad_norm": 0.5885984666412658, + "learning_rate": 4.883049549628368e-06, + "loss": 0.278, + "step": 5329 + }, + { + "epoch": 0.25, + "grad_norm": 0.6801394954329106, + "learning_rate": 4.882992215708126e-06, + "loss": 0.3033, + "step": 5330 + }, + { + "epoch": 0.25, + "grad_norm": 0.751632568889787, + "learning_rate": 4.8829348680743646e-06, + "loss": 0.3176, + "step": 5331 + }, + { + "epoch": 0.25, + "grad_norm": 0.7202380245798502, + "learning_rate": 4.882877506727412e-06, + "loss": 0.2878, + "step": 5332 + }, + { + "epoch": 0.25, + "grad_norm": 0.6385300874047006, + "learning_rate": 4.882820131667598e-06, + "loss": 0.286, + "step": 5333 + }, + { + "epoch": 0.25, + "grad_norm": 0.6245977018765678, + "learning_rate": 4.882762742895253e-06, + "loss": 0.3156, + "step": 5334 + }, + { + "epoch": 0.25, + "grad_norm": 0.6506711510176735, + "learning_rate": 4.882705340410707e-06, + "loss": 0.3092, + "step": 5335 + }, + { + "epoch": 0.25, + "grad_norm": 0.6601509990072474, + "learning_rate": 4.88264792421429e-06, + "loss": 0.2912, + "step": 5336 + }, + { + "epoch": 0.25, + "grad_norm": 0.7123595456848982, + "learning_rate": 4.882590494306334e-06, + "loss": 0.3119, + "step": 5337 + }, + { + "epoch": 0.25, + "grad_norm": 0.6802503816914424, + "learning_rate": 4.882533050687169e-06, + "loss": 0.2841, + "step": 5338 + }, + { + "epoch": 0.25, + "grad_norm": 0.6676760020968481, + "learning_rate": 4.8824755933571245e-06, + "loss": 0.3159, + "step": 5339 + }, + { + "epoch": 0.25, + "grad_norm": 0.5793452056982231, + "learning_rate": 4.8824181223165315e-06, + "loss": 0.2863, + "step": 5340 + }, + { + "epoch": 0.25, + "grad_norm": 0.6523599974808035, + "learning_rate": 4.882360637565722e-06, + "loss": 0.2847, + "step": 5341 + }, + { + "epoch": 0.25, + "grad_norm": 0.6530847180945774, + "learning_rate": 4.882303139105025e-06, + "loss": 0.2995, + "step": 5342 + }, + { + "epoch": 0.25, + "grad_norm": 0.6611390678448507, + "learning_rate": 4.882245626934772e-06, + "loss": 0.3145, + "step": 5343 + }, + { + "epoch": 0.25, + "grad_norm": 0.6041994195204708, + "learning_rate": 4.882188101055294e-06, + "loss": 0.2971, + "step": 5344 + }, + { + "epoch": 0.25, + "grad_norm": 0.6273784140265756, + "learning_rate": 4.882130561466923e-06, + "loss": 0.2798, + "step": 5345 + }, + { + "epoch": 0.25, + "grad_norm": 0.6929706782931321, + "learning_rate": 4.882073008169988e-06, + "loss": 0.312, + "step": 5346 + }, + { + "epoch": 0.25, + "grad_norm": 0.6351430131732594, + "learning_rate": 4.882015441164824e-06, + "loss": 0.2951, + "step": 5347 + }, + { + "epoch": 0.25, + "grad_norm": 0.615931745347138, + "learning_rate": 4.881957860451758e-06, + "loss": 0.2976, + "step": 5348 + }, + { + "epoch": 0.25, + "grad_norm": 0.6303805940425748, + "learning_rate": 4.881900266031123e-06, + "loss": 0.2959, + "step": 5349 + }, + { + "epoch": 0.25, + "grad_norm": 0.6744877076089193, + "learning_rate": 4.881842657903252e-06, + "loss": 0.307, + "step": 5350 + }, + { + "epoch": 0.25, + "grad_norm": 0.6597655461028271, + "learning_rate": 4.881785036068474e-06, + "loss": 0.2931, + "step": 5351 + }, + { + "epoch": 0.25, + "grad_norm": 0.6040964847792819, + "learning_rate": 4.881727400527122e-06, + "loss": 0.2842, + "step": 5352 + }, + { + "epoch": 0.25, + "grad_norm": 0.6450471071945794, + "learning_rate": 4.881669751279528e-06, + "loss": 0.2984, + "step": 5353 + }, + { + "epoch": 0.25, + "grad_norm": 0.5669965140183295, + "learning_rate": 4.881612088326023e-06, + "loss": 0.2707, + "step": 5354 + }, + { + "epoch": 0.25, + "grad_norm": 0.6253841505245376, + "learning_rate": 4.881554411666939e-06, + "loss": 0.318, + "step": 5355 + }, + { + "epoch": 0.25, + "grad_norm": 0.6666246441489277, + "learning_rate": 4.881496721302608e-06, + "loss": 0.3125, + "step": 5356 + }, + { + "epoch": 0.25, + "grad_norm": 0.7497169682689956, + "learning_rate": 4.881439017233362e-06, + "loss": 0.3239, + "step": 5357 + }, + { + "epoch": 0.25, + "grad_norm": 0.6292632007738315, + "learning_rate": 4.881381299459532e-06, + "loss": 0.2864, + "step": 5358 + }, + { + "epoch": 0.25, + "grad_norm": 0.600569547835337, + "learning_rate": 4.881323567981452e-06, + "loss": 0.2802, + "step": 5359 + }, + { + "epoch": 0.25, + "grad_norm": 0.6646873188329938, + "learning_rate": 4.881265822799453e-06, + "loss": 0.3104, + "step": 5360 + }, + { + "epoch": 0.25, + "grad_norm": 0.6904459108310202, + "learning_rate": 4.881208063913868e-06, + "loss": 0.3138, + "step": 5361 + }, + { + "epoch": 0.25, + "grad_norm": 0.6398285457460082, + "learning_rate": 4.881150291325029e-06, + "loss": 0.2831, + "step": 5362 + }, + { + "epoch": 0.25, + "grad_norm": 0.7222022091946274, + "learning_rate": 4.881092505033268e-06, + "loss": 0.3189, + "step": 5363 + }, + { + "epoch": 0.25, + "grad_norm": 0.6813360167890108, + "learning_rate": 4.88103470503892e-06, + "loss": 0.316, + "step": 5364 + }, + { + "epoch": 0.25, + "grad_norm": 0.6200508227355667, + "learning_rate": 4.880976891342314e-06, + "loss": 0.2754, + "step": 5365 + }, + { + "epoch": 0.25, + "grad_norm": 0.5730716786354303, + "learning_rate": 4.880919063943784e-06, + "loss": 0.269, + "step": 5366 + }, + { + "epoch": 0.25, + "grad_norm": 0.6870096233766069, + "learning_rate": 4.880861222843665e-06, + "loss": 0.312, + "step": 5367 + }, + { + "epoch": 0.25, + "grad_norm": 0.6991962473061362, + "learning_rate": 4.880803368042286e-06, + "loss": 0.2834, + "step": 5368 + }, + { + "epoch": 0.25, + "grad_norm": 0.7397000083236708, + "learning_rate": 4.880745499539984e-06, + "loss": 0.3103, + "step": 5369 + }, + { + "epoch": 0.25, + "grad_norm": 0.7018616838054869, + "learning_rate": 4.880687617337089e-06, + "loss": 0.2862, + "step": 5370 + }, + { + "epoch": 0.25, + "grad_norm": 0.641858620608206, + "learning_rate": 4.880629721433935e-06, + "loss": 0.2991, + "step": 5371 + }, + { + "epoch": 0.25, + "grad_norm": 0.7185555795772554, + "learning_rate": 4.880571811830855e-06, + "loss": 0.316, + "step": 5372 + }, + { + "epoch": 0.25, + "grad_norm": 0.6644213196544215, + "learning_rate": 4.880513888528184e-06, + "loss": 0.2852, + "step": 5373 + }, + { + "epoch": 0.25, + "grad_norm": 0.6689240372855696, + "learning_rate": 4.880455951526253e-06, + "loss": 0.2961, + "step": 5374 + }, + { + "epoch": 0.25, + "grad_norm": 0.684090071401504, + "learning_rate": 4.880398000825396e-06, + "loss": 0.3197, + "step": 5375 + }, + { + "epoch": 0.25, + "grad_norm": 0.6614664340715655, + "learning_rate": 4.880340036425947e-06, + "loss": 0.2917, + "step": 5376 + }, + { + "epoch": 0.25, + "grad_norm": 0.6073679691903908, + "learning_rate": 4.880282058328239e-06, + "loss": 0.3017, + "step": 5377 + }, + { + "epoch": 0.25, + "grad_norm": 0.6383062647777124, + "learning_rate": 4.880224066532608e-06, + "loss": 0.3154, + "step": 5378 + }, + { + "epoch": 0.25, + "grad_norm": 0.6160292866678484, + "learning_rate": 4.8801660610393835e-06, + "loss": 0.2935, + "step": 5379 + }, + { + "epoch": 0.25, + "grad_norm": 0.682178296146495, + "learning_rate": 4.880108041848902e-06, + "loss": 0.2962, + "step": 5380 + }, + { + "epoch": 0.25, + "grad_norm": 0.6896805872521841, + "learning_rate": 4.880050008961498e-06, + "loss": 0.3147, + "step": 5381 + }, + { + "epoch": 0.25, + "grad_norm": 0.652713441426731, + "learning_rate": 4.879991962377503e-06, + "loss": 0.3013, + "step": 5382 + }, + { + "epoch": 0.25, + "grad_norm": 0.6526297820184668, + "learning_rate": 4.879933902097254e-06, + "loss": 0.3, + "step": 5383 + }, + { + "epoch": 0.25, + "grad_norm": 0.6865510736237388, + "learning_rate": 4.879875828121082e-06, + "loss": 0.3107, + "step": 5384 + }, + { + "epoch": 0.25, + "grad_norm": 0.6243362803674456, + "learning_rate": 4.879817740449324e-06, + "loss": 0.2922, + "step": 5385 + }, + { + "epoch": 0.25, + "grad_norm": 0.6531656010669502, + "learning_rate": 4.879759639082312e-06, + "loss": 0.2952, + "step": 5386 + }, + { + "epoch": 0.25, + "grad_norm": 0.7347146687337638, + "learning_rate": 4.879701524020381e-06, + "loss": 0.3242, + "step": 5387 + }, + { + "epoch": 0.25, + "grad_norm": 0.6494395060067831, + "learning_rate": 4.8796433952638675e-06, + "loss": 0.3014, + "step": 5388 + }, + { + "epoch": 0.25, + "grad_norm": 0.6662312509995179, + "learning_rate": 4.8795852528131035e-06, + "loss": 0.3005, + "step": 5389 + }, + { + "epoch": 0.25, + "grad_norm": 0.6018480054310483, + "learning_rate": 4.879527096668425e-06, + "loss": 0.2891, + "step": 5390 + }, + { + "epoch": 0.25, + "grad_norm": 0.6455740357510124, + "learning_rate": 4.879468926830166e-06, + "loss": 0.2982, + "step": 5391 + }, + { + "epoch": 0.25, + "grad_norm": 0.613654190867462, + "learning_rate": 4.87941074329866e-06, + "loss": 0.303, + "step": 5392 + }, + { + "epoch": 0.25, + "grad_norm": 0.6659548343037033, + "learning_rate": 4.879352546074245e-06, + "loss": 0.3041, + "step": 5393 + }, + { + "epoch": 0.25, + "grad_norm": 0.6516343320971907, + "learning_rate": 4.879294335157253e-06, + "loss": 0.297, + "step": 5394 + }, + { + "epoch": 0.25, + "grad_norm": 0.6734032997797581, + "learning_rate": 4.87923611054802e-06, + "loss": 0.2826, + "step": 5395 + }, + { + "epoch": 0.25, + "grad_norm": 0.5830542665504428, + "learning_rate": 4.879177872246882e-06, + "loss": 0.2847, + "step": 5396 + }, + { + "epoch": 0.25, + "grad_norm": 0.696255009741275, + "learning_rate": 4.8791196202541734e-06, + "loss": 0.3144, + "step": 5397 + }, + { + "epoch": 0.25, + "grad_norm": 0.6030249921816235, + "learning_rate": 4.879061354570229e-06, + "loss": 0.2943, + "step": 5398 + }, + { + "epoch": 0.25, + "grad_norm": 0.7111731192760863, + "learning_rate": 4.879003075195385e-06, + "loss": 0.3234, + "step": 5399 + }, + { + "epoch": 0.25, + "grad_norm": 0.6511049181851356, + "learning_rate": 4.8789447821299755e-06, + "loss": 0.3164, + "step": 5400 + }, + { + "epoch": 0.25, + "grad_norm": 0.6358417031831467, + "learning_rate": 4.878886475374336e-06, + "loss": 0.293, + "step": 5401 + }, + { + "epoch": 0.25, + "grad_norm": 0.6525781823378872, + "learning_rate": 4.878828154928804e-06, + "loss": 0.3022, + "step": 5402 + }, + { + "epoch": 0.25, + "grad_norm": 0.630813315568195, + "learning_rate": 4.878769820793713e-06, + "loss": 0.3263, + "step": 5403 + }, + { + "epoch": 0.25, + "grad_norm": 0.603694751124343, + "learning_rate": 4.878711472969401e-06, + "loss": 0.3028, + "step": 5404 + }, + { + "epoch": 0.25, + "grad_norm": 0.6652780346565992, + "learning_rate": 4.878653111456201e-06, + "loss": 0.3006, + "step": 5405 + }, + { + "epoch": 0.25, + "grad_norm": 0.6078488456120821, + "learning_rate": 4.878594736254451e-06, + "loss": 0.2821, + "step": 5406 + }, + { + "epoch": 0.25, + "grad_norm": 0.6303028366398514, + "learning_rate": 4.878536347364486e-06, + "loss": 0.2974, + "step": 5407 + }, + { + "epoch": 0.25, + "grad_norm": 0.6099722855637346, + "learning_rate": 4.878477944786641e-06, + "loss": 0.259, + "step": 5408 + }, + { + "epoch": 0.25, + "grad_norm": 0.6290885300173407, + "learning_rate": 4.878419528521254e-06, + "loss": 0.2907, + "step": 5409 + }, + { + "epoch": 0.25, + "grad_norm": 0.6386543649626717, + "learning_rate": 4.8783610985686605e-06, + "loss": 0.3148, + "step": 5410 + }, + { + "epoch": 0.25, + "grad_norm": 0.5809114111412179, + "learning_rate": 4.878302654929197e-06, + "loss": 0.2949, + "step": 5411 + }, + { + "epoch": 0.25, + "grad_norm": 0.6362736398492193, + "learning_rate": 4.878244197603199e-06, + "loss": 0.3068, + "step": 5412 + }, + { + "epoch": 0.25, + "grad_norm": 0.6877009989755354, + "learning_rate": 4.878185726591004e-06, + "loss": 0.3293, + "step": 5413 + }, + { + "epoch": 0.25, + "grad_norm": 0.6823498341747958, + "learning_rate": 4.878127241892947e-06, + "loss": 0.2951, + "step": 5414 + }, + { + "epoch": 0.25, + "grad_norm": 0.6427874251611962, + "learning_rate": 4.8780687435093655e-06, + "loss": 0.2817, + "step": 5415 + }, + { + "epoch": 0.25, + "grad_norm": 0.7249973688138142, + "learning_rate": 4.878010231440596e-06, + "loss": 0.3001, + "step": 5416 + }, + { + "epoch": 0.25, + "grad_norm": 0.6804582851287344, + "learning_rate": 4.877951705686976e-06, + "loss": 0.3108, + "step": 5417 + }, + { + "epoch": 0.25, + "grad_norm": 0.685579206473282, + "learning_rate": 4.877893166248842e-06, + "loss": 0.306, + "step": 5418 + }, + { + "epoch": 0.25, + "grad_norm": 0.6880004165605332, + "learning_rate": 4.87783461312653e-06, + "loss": 0.3223, + "step": 5419 + }, + { + "epoch": 0.25, + "grad_norm": 0.6582125326930732, + "learning_rate": 4.877776046320377e-06, + "loss": 0.3053, + "step": 5420 + }, + { + "epoch": 0.25, + "grad_norm": 0.604052370122469, + "learning_rate": 4.877717465830721e-06, + "loss": 0.3042, + "step": 5421 + }, + { + "epoch": 0.25, + "grad_norm": 0.6076375902544965, + "learning_rate": 4.8776588716578984e-06, + "loss": 0.2809, + "step": 5422 + }, + { + "epoch": 0.25, + "grad_norm": 0.7206180908696247, + "learning_rate": 4.877600263802246e-06, + "loss": 0.3127, + "step": 5423 + }, + { + "epoch": 0.25, + "grad_norm": 0.7202711203348171, + "learning_rate": 4.877541642264103e-06, + "loss": 0.3136, + "step": 5424 + }, + { + "epoch": 0.25, + "grad_norm": 0.6042036075892007, + "learning_rate": 4.877483007043804e-06, + "loss": 0.3013, + "step": 5425 + }, + { + "epoch": 0.25, + "grad_norm": 0.6632807696016199, + "learning_rate": 4.877424358141689e-06, + "loss": 0.2893, + "step": 5426 + }, + { + "epoch": 0.25, + "grad_norm": 0.6307108059369505, + "learning_rate": 4.877365695558094e-06, + "loss": 0.2894, + "step": 5427 + }, + { + "epoch": 0.25, + "grad_norm": 0.6823438608492999, + "learning_rate": 4.877307019293356e-06, + "loss": 0.2984, + "step": 5428 + }, + { + "epoch": 0.25, + "grad_norm": 0.6648576516915208, + "learning_rate": 4.877248329347815e-06, + "loss": 0.3024, + "step": 5429 + }, + { + "epoch": 0.25, + "grad_norm": 0.695268147067455, + "learning_rate": 4.877189625721806e-06, + "loss": 0.3249, + "step": 5430 + }, + { + "epoch": 0.25, + "grad_norm": 0.6535111438009465, + "learning_rate": 4.87713090841567e-06, + "loss": 0.3086, + "step": 5431 + }, + { + "epoch": 0.25, + "grad_norm": 0.6506234379060687, + "learning_rate": 4.877072177429742e-06, + "loss": 0.3185, + "step": 5432 + }, + { + "epoch": 0.25, + "grad_norm": 0.6307723067488354, + "learning_rate": 4.877013432764361e-06, + "loss": 0.2909, + "step": 5433 + }, + { + "epoch": 0.25, + "grad_norm": 0.609097424961254, + "learning_rate": 4.8769546744198655e-06, + "loss": 0.3021, + "step": 5434 + }, + { + "epoch": 0.25, + "grad_norm": 0.6110833068102284, + "learning_rate": 4.876895902396593e-06, + "loss": 0.2862, + "step": 5435 + }, + { + "epoch": 0.25, + "grad_norm": 0.6690522070357867, + "learning_rate": 4.876837116694882e-06, + "loss": 0.2939, + "step": 5436 + }, + { + "epoch": 0.25, + "grad_norm": 0.6556181438924621, + "learning_rate": 4.8767783173150705e-06, + "loss": 0.3048, + "step": 5437 + }, + { + "epoch": 0.25, + "grad_norm": 0.6402125035986953, + "learning_rate": 4.876719504257498e-06, + "loss": 0.2811, + "step": 5438 + }, + { + "epoch": 0.25, + "grad_norm": 0.6330584592173703, + "learning_rate": 4.876660677522501e-06, + "loss": 0.2947, + "step": 5439 + }, + { + "epoch": 0.25, + "grad_norm": 0.6343141263078419, + "learning_rate": 4.876601837110421e-06, + "loss": 0.3252, + "step": 5440 + }, + { + "epoch": 0.25, + "grad_norm": 0.6312949494391614, + "learning_rate": 4.876542983021593e-06, + "loss": 0.2847, + "step": 5441 + }, + { + "epoch": 0.25, + "grad_norm": 0.698920065098512, + "learning_rate": 4.876484115256358e-06, + "loss": 0.3057, + "step": 5442 + }, + { + "epoch": 0.25, + "grad_norm": 0.5686288205101425, + "learning_rate": 4.876425233815054e-06, + "loss": 0.2631, + "step": 5443 + }, + { + "epoch": 0.26, + "grad_norm": 0.6413837866212989, + "learning_rate": 4.8763663386980205e-06, + "loss": 0.3238, + "step": 5444 + }, + { + "epoch": 0.26, + "grad_norm": 0.6370916495835779, + "learning_rate": 4.876307429905596e-06, + "loss": 0.308, + "step": 5445 + }, + { + "epoch": 0.26, + "grad_norm": 0.6294877431508796, + "learning_rate": 4.876248507438119e-06, + "loss": 0.2921, + "step": 5446 + }, + { + "epoch": 0.26, + "grad_norm": 0.6529558058379951, + "learning_rate": 4.876189571295929e-06, + "loss": 0.3066, + "step": 5447 + }, + { + "epoch": 0.26, + "grad_norm": 0.6097933029952716, + "learning_rate": 4.876130621479365e-06, + "loss": 0.2993, + "step": 5448 + }, + { + "epoch": 0.26, + "grad_norm": 0.6094841105652009, + "learning_rate": 4.876071657988768e-06, + "loss": 0.2928, + "step": 5449 + }, + { + "epoch": 0.26, + "grad_norm": 0.61358834398206, + "learning_rate": 4.8760126808244754e-06, + "loss": 0.3043, + "step": 5450 + }, + { + "epoch": 0.26, + "grad_norm": 0.6162648705526884, + "learning_rate": 4.875953689986827e-06, + "loss": 0.3044, + "step": 5451 + }, + { + "epoch": 0.26, + "grad_norm": 0.6321703951217535, + "learning_rate": 4.8758946854761616e-06, + "loss": 0.3142, + "step": 5452 + }, + { + "epoch": 0.26, + "grad_norm": 0.6002224111906579, + "learning_rate": 4.875835667292819e-06, + "loss": 0.2794, + "step": 5453 + }, + { + "epoch": 0.26, + "grad_norm": 0.631950305737267, + "learning_rate": 4.87577663543714e-06, + "loss": 0.288, + "step": 5454 + }, + { + "epoch": 0.26, + "grad_norm": 0.5808630174164485, + "learning_rate": 4.875717589909464e-06, + "loss": 0.2743, + "step": 5455 + }, + { + "epoch": 0.26, + "grad_norm": 0.6526910499557632, + "learning_rate": 4.87565853071013e-06, + "loss": 0.2938, + "step": 5456 + }, + { + "epoch": 0.26, + "grad_norm": 0.6609335167014255, + "learning_rate": 4.875599457839479e-06, + "loss": 0.3118, + "step": 5457 + }, + { + "epoch": 0.26, + "grad_norm": 0.5969801277769665, + "learning_rate": 4.875540371297848e-06, + "loss": 0.2885, + "step": 5458 + }, + { + "epoch": 0.26, + "grad_norm": 0.6299313640461759, + "learning_rate": 4.875481271085581e-06, + "loss": 0.3101, + "step": 5459 + }, + { + "epoch": 0.26, + "grad_norm": 0.6001954793876108, + "learning_rate": 4.875422157203016e-06, + "loss": 0.2807, + "step": 5460 + }, + { + "epoch": 0.26, + "grad_norm": 0.6733646077269637, + "learning_rate": 4.875363029650494e-06, + "loss": 0.2959, + "step": 5461 + }, + { + "epoch": 0.26, + "grad_norm": 0.6015113620938866, + "learning_rate": 4.875303888428354e-06, + "loss": 0.2936, + "step": 5462 + }, + { + "epoch": 0.26, + "grad_norm": 0.6243177632161654, + "learning_rate": 4.875244733536937e-06, + "loss": 0.3032, + "step": 5463 + }, + { + "epoch": 0.26, + "grad_norm": 0.6280284362160226, + "learning_rate": 4.875185564976584e-06, + "loss": 0.2977, + "step": 5464 + }, + { + "epoch": 0.26, + "grad_norm": 0.6370598795309392, + "learning_rate": 4.875126382747636e-06, + "loss": 0.3172, + "step": 5465 + }, + { + "epoch": 0.26, + "grad_norm": 0.6599173543062189, + "learning_rate": 4.875067186850431e-06, + "loss": 0.3157, + "step": 5466 + }, + { + "epoch": 0.26, + "grad_norm": 0.6403471447984175, + "learning_rate": 4.875007977285312e-06, + "loss": 0.2918, + "step": 5467 + }, + { + "epoch": 0.26, + "grad_norm": 0.6640829222949213, + "learning_rate": 4.874948754052619e-06, + "loss": 0.3028, + "step": 5468 + }, + { + "epoch": 0.26, + "grad_norm": 0.5963614578944388, + "learning_rate": 4.874889517152693e-06, + "loss": 0.2982, + "step": 5469 + }, + { + "epoch": 0.26, + "grad_norm": 0.6706388096733958, + "learning_rate": 4.874830266585875e-06, + "loss": 0.3094, + "step": 5470 + }, + { + "epoch": 0.26, + "grad_norm": 0.603775169096324, + "learning_rate": 4.8747710023525046e-06, + "loss": 0.2995, + "step": 5471 + }, + { + "epoch": 0.26, + "grad_norm": 0.6399394552629114, + "learning_rate": 4.8747117244529245e-06, + "loss": 0.3054, + "step": 5472 + }, + { + "epoch": 0.26, + "grad_norm": 0.6292620648564033, + "learning_rate": 4.874652432887476e-06, + "loss": 0.3027, + "step": 5473 + }, + { + "epoch": 0.26, + "grad_norm": 0.6083472080148612, + "learning_rate": 4.874593127656498e-06, + "loss": 0.2892, + "step": 5474 + }, + { + "epoch": 0.26, + "grad_norm": 0.6419606233320876, + "learning_rate": 4.874533808760335e-06, + "loss": 0.2994, + "step": 5475 + }, + { + "epoch": 0.26, + "grad_norm": 0.6872228908690925, + "learning_rate": 4.874474476199325e-06, + "loss": 0.3153, + "step": 5476 + }, + { + "epoch": 0.26, + "grad_norm": 0.6493392576103416, + "learning_rate": 4.874415129973813e-06, + "loss": 0.3068, + "step": 5477 + }, + { + "epoch": 0.26, + "grad_norm": 0.6101056374466358, + "learning_rate": 4.874355770084138e-06, + "loss": 0.3015, + "step": 5478 + }, + { + "epoch": 0.26, + "grad_norm": 0.6551991143325158, + "learning_rate": 4.8742963965306426e-06, + "loss": 0.2957, + "step": 5479 + }, + { + "epoch": 0.26, + "grad_norm": 0.5803099986022079, + "learning_rate": 4.874237009313668e-06, + "loss": 0.2885, + "step": 5480 + }, + { + "epoch": 0.26, + "grad_norm": 0.6467640325415318, + "learning_rate": 4.874177608433556e-06, + "loss": 0.2994, + "step": 5481 + }, + { + "epoch": 0.26, + "grad_norm": 0.6253849125792986, + "learning_rate": 4.874118193890649e-06, + "loss": 0.3076, + "step": 5482 + }, + { + "epoch": 0.26, + "grad_norm": 0.609781070525776, + "learning_rate": 4.874058765685288e-06, + "loss": 0.2801, + "step": 5483 + }, + { + "epoch": 0.26, + "grad_norm": 0.6284781867900737, + "learning_rate": 4.873999323817816e-06, + "loss": 0.3077, + "step": 5484 + }, + { + "epoch": 0.26, + "grad_norm": 0.628800735683364, + "learning_rate": 4.873939868288574e-06, + "loss": 0.2897, + "step": 5485 + }, + { + "epoch": 0.26, + "grad_norm": 0.6787889061767544, + "learning_rate": 4.873880399097905e-06, + "loss": 0.2962, + "step": 5486 + }, + { + "epoch": 0.26, + "grad_norm": 0.6459128474869755, + "learning_rate": 4.873820916246151e-06, + "loss": 0.3186, + "step": 5487 + }, + { + "epoch": 0.26, + "grad_norm": 0.5965923847447279, + "learning_rate": 4.873761419733654e-06, + "loss": 0.2761, + "step": 5488 + }, + { + "epoch": 0.26, + "grad_norm": 0.6641256332456922, + "learning_rate": 4.8737019095607575e-06, + "loss": 0.3089, + "step": 5489 + }, + { + "epoch": 0.26, + "grad_norm": 0.586565849811383, + "learning_rate": 4.873642385727802e-06, + "loss": 0.2889, + "step": 5490 + }, + { + "epoch": 0.26, + "grad_norm": 0.6320893159724281, + "learning_rate": 4.873582848235132e-06, + "loss": 0.2629, + "step": 5491 + }, + { + "epoch": 0.26, + "grad_norm": 0.6420192966281518, + "learning_rate": 4.873523297083089e-06, + "loss": 0.2803, + "step": 5492 + }, + { + "epoch": 0.26, + "grad_norm": 0.6233607906983637, + "learning_rate": 4.8734637322720155e-06, + "loss": 0.2958, + "step": 5493 + }, + { + "epoch": 0.26, + "grad_norm": 0.616508309455779, + "learning_rate": 4.873404153802255e-06, + "loss": 0.3005, + "step": 5494 + }, + { + "epoch": 0.26, + "grad_norm": 0.6169112541776156, + "learning_rate": 4.873344561674151e-06, + "loss": 0.3026, + "step": 5495 + }, + { + "epoch": 0.26, + "grad_norm": 0.6332342564802367, + "learning_rate": 4.873284955888045e-06, + "loss": 0.2912, + "step": 5496 + }, + { + "epoch": 0.26, + "grad_norm": 0.6336853046660206, + "learning_rate": 4.8732253364442796e-06, + "loss": 0.3011, + "step": 5497 + }, + { + "epoch": 0.26, + "grad_norm": 0.6419490587362787, + "learning_rate": 4.8731657033432e-06, + "loss": 0.3144, + "step": 5498 + }, + { + "epoch": 0.26, + "grad_norm": 0.6481239651519504, + "learning_rate": 4.873106056585148e-06, + "loss": 0.3084, + "step": 5499 + }, + { + "epoch": 0.26, + "grad_norm": 0.6218098489321372, + "learning_rate": 4.873046396170467e-06, + "loss": 0.2766, + "step": 5500 + }, + { + "epoch": 0.26, + "grad_norm": 0.6926512370936382, + "learning_rate": 4.872986722099501e-06, + "loss": 0.2957, + "step": 5501 + }, + { + "epoch": 0.26, + "grad_norm": 0.6270337697455974, + "learning_rate": 4.872927034372592e-06, + "loss": 0.2957, + "step": 5502 + }, + { + "epoch": 0.26, + "grad_norm": 0.6788969179817623, + "learning_rate": 4.872867332990085e-06, + "loss": 0.3078, + "step": 5503 + }, + { + "epoch": 0.26, + "grad_norm": 0.5829582940646449, + "learning_rate": 4.872807617952323e-06, + "loss": 0.2785, + "step": 5504 + }, + { + "epoch": 0.26, + "grad_norm": 0.6190183451435706, + "learning_rate": 4.872747889259649e-06, + "loss": 0.2874, + "step": 5505 + }, + { + "epoch": 0.26, + "grad_norm": 0.649338327793758, + "learning_rate": 4.872688146912407e-06, + "loss": 0.2816, + "step": 5506 + }, + { + "epoch": 0.26, + "grad_norm": 0.6615760397533403, + "learning_rate": 4.872628390910942e-06, + "loss": 0.3038, + "step": 5507 + }, + { + "epoch": 0.26, + "grad_norm": 0.6388625491962739, + "learning_rate": 4.872568621255596e-06, + "loss": 0.3009, + "step": 5508 + }, + { + "epoch": 0.26, + "grad_norm": 0.6562057129341733, + "learning_rate": 4.8725088379467144e-06, + "loss": 0.2989, + "step": 5509 + }, + { + "epoch": 0.26, + "grad_norm": 0.6762868070539108, + "learning_rate": 4.87244904098464e-06, + "loss": 0.3073, + "step": 5510 + }, + { + "epoch": 0.26, + "grad_norm": 0.605503946553365, + "learning_rate": 4.872389230369718e-06, + "loss": 0.291, + "step": 5511 + }, + { + "epoch": 0.26, + "grad_norm": 0.6758534964377807, + "learning_rate": 4.872329406102294e-06, + "loss": 0.3003, + "step": 5512 + }, + { + "epoch": 0.26, + "grad_norm": 0.6772201076926235, + "learning_rate": 4.872269568182709e-06, + "loss": 0.316, + "step": 5513 + }, + { + "epoch": 0.26, + "grad_norm": 0.6330167767476145, + "learning_rate": 4.8722097166113086e-06, + "loss": 0.2866, + "step": 5514 + }, + { + "epoch": 0.26, + "grad_norm": 0.669055116480727, + "learning_rate": 4.872149851388438e-06, + "loss": 0.3102, + "step": 5515 + }, + { + "epoch": 0.26, + "grad_norm": 0.6679768126394003, + "learning_rate": 4.872089972514441e-06, + "loss": 0.2943, + "step": 5516 + }, + { + "epoch": 0.26, + "grad_norm": 0.7085625989839022, + "learning_rate": 4.872030079989663e-06, + "loss": 0.3175, + "step": 5517 + }, + { + "epoch": 0.26, + "grad_norm": 0.6653350374194613, + "learning_rate": 4.871970173814448e-06, + "loss": 0.3125, + "step": 5518 + }, + { + "epoch": 0.26, + "grad_norm": 0.690445055212461, + "learning_rate": 4.871910253989139e-06, + "loss": 0.3041, + "step": 5519 + }, + { + "epoch": 0.26, + "grad_norm": 0.6346021006740935, + "learning_rate": 4.871850320514085e-06, + "loss": 0.286, + "step": 5520 + }, + { + "epoch": 0.26, + "grad_norm": 0.6419131932931257, + "learning_rate": 4.871790373389628e-06, + "loss": 0.2952, + "step": 5521 + }, + { + "epoch": 0.26, + "grad_norm": 0.6359181839205211, + "learning_rate": 4.871730412616112e-06, + "loss": 0.298, + "step": 5522 + }, + { + "epoch": 0.26, + "grad_norm": 0.6071503023477985, + "learning_rate": 4.871670438193885e-06, + "loss": 0.284, + "step": 5523 + }, + { + "epoch": 0.26, + "grad_norm": 0.6477801701274801, + "learning_rate": 4.871610450123291e-06, + "loss": 0.2915, + "step": 5524 + }, + { + "epoch": 0.26, + "grad_norm": 0.5938788314980942, + "learning_rate": 4.871550448404674e-06, + "loss": 0.292, + "step": 5525 + }, + { + "epoch": 0.26, + "grad_norm": 0.639076011398981, + "learning_rate": 4.871490433038381e-06, + "loss": 0.3176, + "step": 5526 + }, + { + "epoch": 0.26, + "grad_norm": 0.6258181790468927, + "learning_rate": 4.8714304040247565e-06, + "loss": 0.2996, + "step": 5527 + }, + { + "epoch": 0.26, + "grad_norm": 0.6547755864107838, + "learning_rate": 4.871370361364145e-06, + "loss": 0.3168, + "step": 5528 + }, + { + "epoch": 0.26, + "grad_norm": 0.680265296899351, + "learning_rate": 4.871310305056894e-06, + "loss": 0.3049, + "step": 5529 + }, + { + "epoch": 0.26, + "grad_norm": 0.6193942355548423, + "learning_rate": 4.8712502351033485e-06, + "loss": 0.2929, + "step": 5530 + }, + { + "epoch": 0.26, + "grad_norm": 0.6273104166138279, + "learning_rate": 4.871190151503854e-06, + "loss": 0.3081, + "step": 5531 + }, + { + "epoch": 0.26, + "grad_norm": 0.6455214572993384, + "learning_rate": 4.871130054258756e-06, + "loss": 0.2943, + "step": 5532 + }, + { + "epoch": 0.26, + "grad_norm": 0.6251117570517374, + "learning_rate": 4.8710699433684e-06, + "loss": 0.3121, + "step": 5533 + }, + { + "epoch": 0.26, + "grad_norm": 0.6231678571288064, + "learning_rate": 4.8710098188331326e-06, + "loss": 0.2987, + "step": 5534 + }, + { + "epoch": 0.26, + "grad_norm": 0.6685615638543873, + "learning_rate": 4.870949680653299e-06, + "loss": 0.3145, + "step": 5535 + }, + { + "epoch": 0.26, + "grad_norm": 0.6097711340144087, + "learning_rate": 4.8708895288292465e-06, + "loss": 0.3132, + "step": 5536 + }, + { + "epoch": 0.26, + "grad_norm": 0.6015290601536549, + "learning_rate": 4.8708293633613205e-06, + "loss": 0.3028, + "step": 5537 + }, + { + "epoch": 0.26, + "grad_norm": 0.6195926503788461, + "learning_rate": 4.870769184249868e-06, + "loss": 0.288, + "step": 5538 + }, + { + "epoch": 0.26, + "grad_norm": 0.6176723940223823, + "learning_rate": 4.870708991495235e-06, + "loss": 0.2965, + "step": 5539 + }, + { + "epoch": 0.26, + "grad_norm": 0.7223938954409113, + "learning_rate": 4.870648785097766e-06, + "loss": 0.3134, + "step": 5540 + }, + { + "epoch": 0.26, + "grad_norm": 0.6218003028484267, + "learning_rate": 4.87058856505781e-06, + "loss": 0.2805, + "step": 5541 + }, + { + "epoch": 0.26, + "grad_norm": 0.5991157406729479, + "learning_rate": 4.870528331375712e-06, + "loss": 0.2842, + "step": 5542 + }, + { + "epoch": 0.26, + "grad_norm": 0.6306487397995817, + "learning_rate": 4.870468084051821e-06, + "loss": 0.2886, + "step": 5543 + }, + { + "epoch": 0.26, + "grad_norm": 0.6102227729769676, + "learning_rate": 4.8704078230864805e-06, + "loss": 0.3037, + "step": 5544 + }, + { + "epoch": 0.26, + "grad_norm": 0.5954305539057411, + "learning_rate": 4.870347548480039e-06, + "loss": 0.3106, + "step": 5545 + }, + { + "epoch": 0.26, + "grad_norm": 0.6201076631079506, + "learning_rate": 4.870287260232843e-06, + "loss": 0.3013, + "step": 5546 + }, + { + "epoch": 0.26, + "grad_norm": 0.5999232910685395, + "learning_rate": 4.87022695834524e-06, + "loss": 0.2921, + "step": 5547 + }, + { + "epoch": 0.26, + "grad_norm": 0.5986631057158287, + "learning_rate": 4.870166642817577e-06, + "loss": 0.3061, + "step": 5548 + }, + { + "epoch": 0.26, + "grad_norm": 0.5897401954666623, + "learning_rate": 4.870106313650199e-06, + "loss": 0.2703, + "step": 5549 + }, + { + "epoch": 0.26, + "grad_norm": 0.5983582149959937, + "learning_rate": 4.870045970843456e-06, + "loss": 0.2784, + "step": 5550 + }, + { + "epoch": 0.26, + "grad_norm": 0.6085293882659889, + "learning_rate": 4.869985614397694e-06, + "loss": 0.2994, + "step": 5551 + }, + { + "epoch": 0.26, + "grad_norm": 0.6276306429015254, + "learning_rate": 4.869925244313261e-06, + "loss": 0.3218, + "step": 5552 + }, + { + "epoch": 0.26, + "grad_norm": 0.5644202928861917, + "learning_rate": 4.869864860590504e-06, + "loss": 0.2911, + "step": 5553 + }, + { + "epoch": 0.26, + "grad_norm": 0.6702681786708262, + "learning_rate": 4.869804463229769e-06, + "loss": 0.3058, + "step": 5554 + }, + { + "epoch": 0.26, + "grad_norm": 0.609952151343464, + "learning_rate": 4.869744052231406e-06, + "loss": 0.2948, + "step": 5555 + }, + { + "epoch": 0.26, + "grad_norm": 0.6196779498622399, + "learning_rate": 4.869683627595762e-06, + "loss": 0.3047, + "step": 5556 + }, + { + "epoch": 0.26, + "grad_norm": 0.6107545913499757, + "learning_rate": 4.869623189323183e-06, + "loss": 0.274, + "step": 5557 + }, + { + "epoch": 0.26, + "grad_norm": 0.6229171906974388, + "learning_rate": 4.8695627374140184e-06, + "loss": 0.2956, + "step": 5558 + }, + { + "epoch": 0.26, + "grad_norm": 0.6081209838075439, + "learning_rate": 4.869502271868617e-06, + "loss": 0.2925, + "step": 5559 + }, + { + "epoch": 0.26, + "grad_norm": 0.5948772869132961, + "learning_rate": 4.869441792687324e-06, + "loss": 0.3015, + "step": 5560 + }, + { + "epoch": 0.26, + "grad_norm": 0.6825785583314096, + "learning_rate": 4.86938129987049e-06, + "loss": 0.3099, + "step": 5561 + }, + { + "epoch": 0.26, + "grad_norm": 0.6432629284019618, + "learning_rate": 4.869320793418462e-06, + "loss": 0.2983, + "step": 5562 + }, + { + "epoch": 0.26, + "grad_norm": 0.5772858477198827, + "learning_rate": 4.869260273331588e-06, + "loss": 0.2934, + "step": 5563 + }, + { + "epoch": 0.26, + "grad_norm": 0.6229193224474038, + "learning_rate": 4.869199739610218e-06, + "loss": 0.3034, + "step": 5564 + }, + { + "epoch": 0.26, + "grad_norm": 0.6406907277945431, + "learning_rate": 4.869139192254697e-06, + "loss": 0.3076, + "step": 5565 + }, + { + "epoch": 0.26, + "grad_norm": 0.6440147880325805, + "learning_rate": 4.869078631265376e-06, + "loss": 0.3199, + "step": 5566 + }, + { + "epoch": 0.26, + "grad_norm": 0.6199684001403363, + "learning_rate": 4.869018056642604e-06, + "loss": 0.307, + "step": 5567 + }, + { + "epoch": 0.26, + "grad_norm": 0.6414696304477919, + "learning_rate": 4.868957468386727e-06, + "loss": 0.2867, + "step": 5568 + }, + { + "epoch": 0.26, + "grad_norm": 0.6680808057897228, + "learning_rate": 4.868896866498096e-06, + "loss": 0.3, + "step": 5569 + }, + { + "epoch": 0.26, + "grad_norm": 0.6034837734837039, + "learning_rate": 4.868836250977058e-06, + "loss": 0.3089, + "step": 5570 + }, + { + "epoch": 0.26, + "grad_norm": 0.6426036130444991, + "learning_rate": 4.868775621823964e-06, + "loss": 0.3229, + "step": 5571 + }, + { + "epoch": 0.26, + "grad_norm": 0.6537738526443143, + "learning_rate": 4.868714979039162e-06, + "loss": 0.2977, + "step": 5572 + }, + { + "epoch": 0.26, + "grad_norm": 0.6416243204921291, + "learning_rate": 4.868654322623e-06, + "loss": 0.3009, + "step": 5573 + }, + { + "epoch": 0.26, + "grad_norm": 0.6160012359452957, + "learning_rate": 4.868593652575827e-06, + "loss": 0.2947, + "step": 5574 + }, + { + "epoch": 0.26, + "grad_norm": 0.6635506842784712, + "learning_rate": 4.868532968897993e-06, + "loss": 0.2947, + "step": 5575 + }, + { + "epoch": 0.26, + "grad_norm": 0.6177955376953078, + "learning_rate": 4.868472271589848e-06, + "loss": 0.2935, + "step": 5576 + }, + { + "epoch": 0.26, + "grad_norm": 0.5837793008422817, + "learning_rate": 4.8684115606517405e-06, + "loss": 0.2754, + "step": 5577 + }, + { + "epoch": 0.26, + "grad_norm": 0.6474397491980489, + "learning_rate": 4.868350836084019e-06, + "loss": 0.3074, + "step": 5578 + }, + { + "epoch": 0.26, + "grad_norm": 0.618898323734983, + "learning_rate": 4.868290097887034e-06, + "loss": 0.2621, + "step": 5579 + }, + { + "epoch": 0.26, + "grad_norm": 0.6213106294557246, + "learning_rate": 4.868229346061134e-06, + "loss": 0.3031, + "step": 5580 + }, + { + "epoch": 0.26, + "grad_norm": 0.5962724740784611, + "learning_rate": 4.868168580606671e-06, + "loss": 0.2954, + "step": 5581 + }, + { + "epoch": 0.26, + "grad_norm": 0.6658056607205938, + "learning_rate": 4.868107801523991e-06, + "loss": 0.2886, + "step": 5582 + }, + { + "epoch": 0.26, + "grad_norm": 0.6410772727933716, + "learning_rate": 4.8680470088134465e-06, + "loss": 0.2882, + "step": 5583 + }, + { + "epoch": 0.26, + "grad_norm": 0.6514897315781664, + "learning_rate": 4.8679862024753874e-06, + "loss": 0.2936, + "step": 5584 + }, + { + "epoch": 0.26, + "grad_norm": 0.6920407246698469, + "learning_rate": 4.867925382510162e-06, + "loss": 0.3184, + "step": 5585 + }, + { + "epoch": 0.26, + "grad_norm": 0.6132581463493805, + "learning_rate": 4.867864548918122e-06, + "loss": 0.3085, + "step": 5586 + }, + { + "epoch": 0.26, + "grad_norm": 0.6620486455070685, + "learning_rate": 4.867803701699616e-06, + "loss": 0.3194, + "step": 5587 + }, + { + "epoch": 0.26, + "grad_norm": 0.6324377930450368, + "learning_rate": 4.8677428408549955e-06, + "loss": 0.2972, + "step": 5588 + }, + { + "epoch": 0.26, + "grad_norm": 0.6345117879483085, + "learning_rate": 4.867681966384609e-06, + "loss": 0.2993, + "step": 5589 + }, + { + "epoch": 0.26, + "grad_norm": 0.6258047200764223, + "learning_rate": 4.867621078288809e-06, + "loss": 0.2908, + "step": 5590 + }, + { + "epoch": 0.26, + "grad_norm": 0.6766838348007644, + "learning_rate": 4.867560176567943e-06, + "loss": 0.3001, + "step": 5591 + }, + { + "epoch": 0.26, + "grad_norm": 0.6360190040600452, + "learning_rate": 4.867499261222366e-06, + "loss": 0.2978, + "step": 5592 + }, + { + "epoch": 0.26, + "grad_norm": 0.6212437493482201, + "learning_rate": 4.867438332252424e-06, + "loss": 0.2849, + "step": 5593 + }, + { + "epoch": 0.26, + "grad_norm": 0.5734963741304379, + "learning_rate": 4.86737738965847e-06, + "loss": 0.2975, + "step": 5594 + }, + { + "epoch": 0.26, + "grad_norm": 0.6237467731308053, + "learning_rate": 4.867316433440853e-06, + "loss": 0.3186, + "step": 5595 + }, + { + "epoch": 0.26, + "grad_norm": 0.6313301908289631, + "learning_rate": 4.867255463599926e-06, + "loss": 0.283, + "step": 5596 + }, + { + "epoch": 0.26, + "grad_norm": 0.6523707933926233, + "learning_rate": 4.867194480136039e-06, + "loss": 0.3018, + "step": 5597 + }, + { + "epoch": 0.26, + "grad_norm": 0.6080162631740829, + "learning_rate": 4.867133483049542e-06, + "loss": 0.2679, + "step": 5598 + }, + { + "epoch": 0.26, + "grad_norm": 0.568857451136617, + "learning_rate": 4.867072472340787e-06, + "loss": 0.2901, + "step": 5599 + }, + { + "epoch": 0.26, + "grad_norm": 0.6260331346412203, + "learning_rate": 4.867011448010125e-06, + "loss": 0.3023, + "step": 5600 + }, + { + "epoch": 0.26, + "grad_norm": 0.6258974594664936, + "learning_rate": 4.866950410057907e-06, + "loss": 0.2722, + "step": 5601 + }, + { + "epoch": 0.26, + "grad_norm": 0.6864903459948013, + "learning_rate": 4.866889358484485e-06, + "loss": 0.3256, + "step": 5602 + }, + { + "epoch": 0.26, + "grad_norm": 0.6061678615103058, + "learning_rate": 4.8668282932902085e-06, + "loss": 0.2882, + "step": 5603 + }, + { + "epoch": 0.26, + "grad_norm": 0.6156425758411151, + "learning_rate": 4.866767214475431e-06, + "loss": 0.2848, + "step": 5604 + }, + { + "epoch": 0.26, + "grad_norm": 0.587673100503274, + "learning_rate": 4.866706122040502e-06, + "loss": 0.276, + "step": 5605 + }, + { + "epoch": 0.26, + "grad_norm": 0.6140328156431575, + "learning_rate": 4.866645015985775e-06, + "loss": 0.2862, + "step": 5606 + }, + { + "epoch": 0.26, + "grad_norm": 0.6478679748656404, + "learning_rate": 4.8665838963116e-06, + "loss": 0.288, + "step": 5607 + }, + { + "epoch": 0.26, + "grad_norm": 0.6467711619857247, + "learning_rate": 4.86652276301833e-06, + "loss": 0.295, + "step": 5608 + }, + { + "epoch": 0.26, + "grad_norm": 0.6291520924975681, + "learning_rate": 4.866461616106316e-06, + "loss": 0.3032, + "step": 5609 + }, + { + "epoch": 0.26, + "grad_norm": 0.6676257838786461, + "learning_rate": 4.866400455575911e-06, + "loss": 0.323, + "step": 5610 + }, + { + "epoch": 0.26, + "grad_norm": 0.5846013296268753, + "learning_rate": 4.866339281427465e-06, + "loss": 0.2763, + "step": 5611 + }, + { + "epoch": 0.26, + "grad_norm": 0.6203054876279339, + "learning_rate": 4.8662780936613315e-06, + "loss": 0.2874, + "step": 5612 + }, + { + "epoch": 0.26, + "grad_norm": 0.60870758578037, + "learning_rate": 4.866216892277863e-06, + "loss": 0.2999, + "step": 5613 + }, + { + "epoch": 0.26, + "grad_norm": 0.6720994888711843, + "learning_rate": 4.866155677277411e-06, + "loss": 0.2948, + "step": 5614 + }, + { + "epoch": 0.26, + "grad_norm": 0.6844185470388031, + "learning_rate": 4.866094448660327e-06, + "loss": 0.3077, + "step": 5615 + }, + { + "epoch": 0.26, + "grad_norm": 0.6174049161346777, + "learning_rate": 4.866033206426965e-06, + "loss": 0.315, + "step": 5616 + }, + { + "epoch": 0.26, + "grad_norm": 0.5851921536531463, + "learning_rate": 4.865971950577676e-06, + "loss": 0.2881, + "step": 5617 + }, + { + "epoch": 0.26, + "grad_norm": 0.6657462160047708, + "learning_rate": 4.865910681112813e-06, + "loss": 0.3229, + "step": 5618 + }, + { + "epoch": 0.26, + "grad_norm": 0.6259857049159009, + "learning_rate": 4.865849398032728e-06, + "loss": 0.3113, + "step": 5619 + }, + { + "epoch": 0.26, + "grad_norm": 0.6028837623502489, + "learning_rate": 4.865788101337776e-06, + "loss": 0.2968, + "step": 5620 + }, + { + "epoch": 0.26, + "grad_norm": 0.6613134856307777, + "learning_rate": 4.865726791028307e-06, + "loss": 0.3334, + "step": 5621 + }, + { + "epoch": 0.26, + "grad_norm": 0.5714656350450178, + "learning_rate": 4.8656654671046755e-06, + "loss": 0.3038, + "step": 5622 + }, + { + "epoch": 0.26, + "grad_norm": 0.6258851033194253, + "learning_rate": 4.865604129567233e-06, + "loss": 0.3098, + "step": 5623 + }, + { + "epoch": 0.26, + "grad_norm": 0.6220959779621282, + "learning_rate": 4.865542778416335e-06, + "loss": 0.298, + "step": 5624 + }, + { + "epoch": 0.26, + "grad_norm": 0.6650737079154327, + "learning_rate": 4.865481413652331e-06, + "loss": 0.2781, + "step": 5625 + }, + { + "epoch": 0.26, + "grad_norm": 0.6098947479449187, + "learning_rate": 4.865420035275576e-06, + "loss": 0.3009, + "step": 5626 + }, + { + "epoch": 0.26, + "grad_norm": 0.6472862095202022, + "learning_rate": 4.865358643286425e-06, + "loss": 0.3276, + "step": 5627 + }, + { + "epoch": 0.26, + "grad_norm": 0.6111498716195589, + "learning_rate": 4.865297237685228e-06, + "loss": 0.2869, + "step": 5628 + }, + { + "epoch": 0.26, + "grad_norm": 0.6297264015590932, + "learning_rate": 4.86523581847234e-06, + "loss": 0.3148, + "step": 5629 + }, + { + "epoch": 0.26, + "grad_norm": 0.5936215776185949, + "learning_rate": 4.865174385648115e-06, + "loss": 0.3047, + "step": 5630 + }, + { + "epoch": 0.26, + "grad_norm": 0.6341147328043187, + "learning_rate": 4.865112939212906e-06, + "loss": 0.2984, + "step": 5631 + }, + { + "epoch": 0.26, + "grad_norm": 0.6568388357514422, + "learning_rate": 4.865051479167066e-06, + "loss": 0.3071, + "step": 5632 + }, + { + "epoch": 0.26, + "grad_norm": 0.6204303385165802, + "learning_rate": 4.864990005510949e-06, + "loss": 0.3005, + "step": 5633 + }, + { + "epoch": 0.26, + "grad_norm": 0.6112547160833431, + "learning_rate": 4.864928518244909e-06, + "loss": 0.3012, + "step": 5634 + }, + { + "epoch": 0.26, + "grad_norm": 0.6677567659204444, + "learning_rate": 4.8648670173693e-06, + "loss": 0.2953, + "step": 5635 + }, + { + "epoch": 0.26, + "grad_norm": 0.6393532107198567, + "learning_rate": 4.864805502884476e-06, + "loss": 0.3044, + "step": 5636 + }, + { + "epoch": 0.26, + "grad_norm": 0.629690378629245, + "learning_rate": 4.864743974790791e-06, + "loss": 0.2989, + "step": 5637 + }, + { + "epoch": 0.26, + "grad_norm": 0.6380897457743413, + "learning_rate": 4.864682433088597e-06, + "loss": 0.3252, + "step": 5638 + }, + { + "epoch": 0.26, + "grad_norm": 0.6144114511539374, + "learning_rate": 4.8646208777782515e-06, + "loss": 0.3024, + "step": 5639 + }, + { + "epoch": 0.26, + "grad_norm": 0.6683888110337018, + "learning_rate": 4.864559308860107e-06, + "loss": 0.3146, + "step": 5640 + }, + { + "epoch": 0.26, + "grad_norm": 0.6795694986782569, + "learning_rate": 4.864497726334518e-06, + "loss": 0.3098, + "step": 5641 + }, + { + "epoch": 0.26, + "grad_norm": 0.6310221045080122, + "learning_rate": 4.864436130201839e-06, + "loss": 0.3129, + "step": 5642 + }, + { + "epoch": 0.26, + "grad_norm": 0.6326839338686994, + "learning_rate": 4.8643745204624235e-06, + "loss": 0.2931, + "step": 5643 + }, + { + "epoch": 0.26, + "grad_norm": 0.6432587482134959, + "learning_rate": 4.864312897116627e-06, + "loss": 0.3323, + "step": 5644 + }, + { + "epoch": 0.26, + "grad_norm": 0.5969074297900521, + "learning_rate": 4.864251260164805e-06, + "loss": 0.2904, + "step": 5645 + }, + { + "epoch": 0.26, + "grad_norm": 0.6001552003368578, + "learning_rate": 4.864189609607311e-06, + "loss": 0.2935, + "step": 5646 + }, + { + "epoch": 0.26, + "grad_norm": 0.6280020332022784, + "learning_rate": 4.864127945444499e-06, + "loss": 0.2996, + "step": 5647 + }, + { + "epoch": 0.26, + "grad_norm": 0.5789812046006781, + "learning_rate": 4.864066267676725e-06, + "loss": 0.2961, + "step": 5648 + }, + { + "epoch": 0.26, + "grad_norm": 0.6366122130823676, + "learning_rate": 4.864004576304344e-06, + "loss": 0.2774, + "step": 5649 + }, + { + "epoch": 0.26, + "grad_norm": 0.6361032128160325, + "learning_rate": 4.8639428713277114e-06, + "loss": 0.3048, + "step": 5650 + }, + { + "epoch": 0.26, + "grad_norm": 0.6784319736340535, + "learning_rate": 4.863881152747182e-06, + "loss": 0.3225, + "step": 5651 + }, + { + "epoch": 0.26, + "grad_norm": 0.6278982537017205, + "learning_rate": 4.86381942056311e-06, + "loss": 0.3024, + "step": 5652 + }, + { + "epoch": 0.26, + "grad_norm": 0.6770081642651079, + "learning_rate": 4.863757674775852e-06, + "loss": 0.3253, + "step": 5653 + }, + { + "epoch": 0.26, + "grad_norm": 0.6514485134964588, + "learning_rate": 4.863695915385762e-06, + "loss": 0.3032, + "step": 5654 + }, + { + "epoch": 0.26, + "grad_norm": 0.6117533840799099, + "learning_rate": 4.863634142393197e-06, + "loss": 0.2961, + "step": 5655 + }, + { + "epoch": 0.26, + "grad_norm": 0.6488491779877001, + "learning_rate": 4.863572355798511e-06, + "loss": 0.301, + "step": 5656 + }, + { + "epoch": 0.27, + "grad_norm": 0.6609818143663311, + "learning_rate": 4.863510555602059e-06, + "loss": 0.3078, + "step": 5657 + }, + { + "epoch": 0.27, + "grad_norm": 0.6324844052359626, + "learning_rate": 4.863448741804199e-06, + "loss": 0.3047, + "step": 5658 + }, + { + "epoch": 0.27, + "grad_norm": 0.6404325811178234, + "learning_rate": 4.863386914405285e-06, + "loss": 0.2773, + "step": 5659 + }, + { + "epoch": 0.27, + "grad_norm": 0.622255471742013, + "learning_rate": 4.863325073405674e-06, + "loss": 0.296, + "step": 5660 + }, + { + "epoch": 0.27, + "grad_norm": 0.6052828523352918, + "learning_rate": 4.863263218805721e-06, + "loss": 0.3111, + "step": 5661 + }, + { + "epoch": 0.27, + "grad_norm": 0.6569998654619116, + "learning_rate": 4.863201350605782e-06, + "loss": 0.2998, + "step": 5662 + }, + { + "epoch": 0.27, + "grad_norm": 0.6540104870105156, + "learning_rate": 4.863139468806213e-06, + "loss": 0.2969, + "step": 5663 + }, + { + "epoch": 0.27, + "grad_norm": 0.6424111210317865, + "learning_rate": 4.863077573407371e-06, + "loss": 0.2962, + "step": 5664 + }, + { + "epoch": 0.27, + "grad_norm": 0.6745726812401147, + "learning_rate": 4.863015664409611e-06, + "loss": 0.3011, + "step": 5665 + }, + { + "epoch": 0.27, + "grad_norm": 0.6172091086730888, + "learning_rate": 4.86295374181329e-06, + "loss": 0.2906, + "step": 5666 + }, + { + "epoch": 0.27, + "grad_norm": 0.6260738035884014, + "learning_rate": 4.862891805618764e-06, + "loss": 0.3042, + "step": 5667 + }, + { + "epoch": 0.27, + "grad_norm": 0.6960388943889589, + "learning_rate": 4.86282985582639e-06, + "loss": 0.3052, + "step": 5668 + }, + { + "epoch": 0.27, + "grad_norm": 0.6626039191956902, + "learning_rate": 4.862767892436523e-06, + "loss": 0.3107, + "step": 5669 + }, + { + "epoch": 0.27, + "grad_norm": 0.6229167415920124, + "learning_rate": 4.862705915449522e-06, + "loss": 0.2918, + "step": 5670 + }, + { + "epoch": 0.27, + "grad_norm": 0.63656125516992, + "learning_rate": 4.862643924865741e-06, + "loss": 0.2921, + "step": 5671 + }, + { + "epoch": 0.27, + "grad_norm": 0.6048725669484984, + "learning_rate": 4.862581920685539e-06, + "loss": 0.2877, + "step": 5672 + }, + { + "epoch": 0.27, + "grad_norm": 0.5533869612763046, + "learning_rate": 4.86251990290927e-06, + "loss": 0.2777, + "step": 5673 + }, + { + "epoch": 0.27, + "grad_norm": 0.6287138796456239, + "learning_rate": 4.862457871537295e-06, + "loss": 0.2988, + "step": 5674 + }, + { + "epoch": 0.27, + "grad_norm": 0.6580879719152559, + "learning_rate": 4.862395826569968e-06, + "loss": 0.3161, + "step": 5675 + }, + { + "epoch": 0.27, + "grad_norm": 0.6765270553092236, + "learning_rate": 4.862333768007647e-06, + "loss": 0.3073, + "step": 5676 + }, + { + "epoch": 0.27, + "grad_norm": 0.6007968371582593, + "learning_rate": 4.862271695850688e-06, + "loss": 0.2839, + "step": 5677 + }, + { + "epoch": 0.27, + "grad_norm": 0.6394944434652058, + "learning_rate": 4.86220961009945e-06, + "loss": 0.2824, + "step": 5678 + }, + { + "epoch": 0.27, + "grad_norm": 0.6025725662421099, + "learning_rate": 4.862147510754289e-06, + "loss": 0.2886, + "step": 5679 + }, + { + "epoch": 0.27, + "grad_norm": 0.6150796847717903, + "learning_rate": 4.862085397815563e-06, + "loss": 0.2935, + "step": 5680 + }, + { + "epoch": 0.27, + "grad_norm": 0.6359034560175085, + "learning_rate": 4.8620232712836294e-06, + "loss": 0.2899, + "step": 5681 + }, + { + "epoch": 0.27, + "grad_norm": 0.6264786599857474, + "learning_rate": 4.861961131158846e-06, + "loss": 0.2981, + "step": 5682 + }, + { + "epoch": 0.27, + "grad_norm": 0.6254284797651553, + "learning_rate": 4.861898977441568e-06, + "loss": 0.3037, + "step": 5683 + }, + { + "epoch": 0.27, + "grad_norm": 0.6497646426975437, + "learning_rate": 4.861836810132157e-06, + "loss": 0.3048, + "step": 5684 + }, + { + "epoch": 0.27, + "grad_norm": 0.6560214896162078, + "learning_rate": 4.861774629230967e-06, + "loss": 0.3123, + "step": 5685 + }, + { + "epoch": 0.27, + "grad_norm": 0.6655393379496667, + "learning_rate": 4.861712434738359e-06, + "loss": 0.3111, + "step": 5686 + }, + { + "epoch": 0.27, + "grad_norm": 0.5912394869152607, + "learning_rate": 4.86165022665469e-06, + "loss": 0.2895, + "step": 5687 + }, + { + "epoch": 0.27, + "grad_norm": 0.6173312675765351, + "learning_rate": 4.861588004980316e-06, + "loss": 0.2889, + "step": 5688 + }, + { + "epoch": 0.27, + "grad_norm": 0.6066643687842386, + "learning_rate": 4.861525769715598e-06, + "loss": 0.3169, + "step": 5689 + }, + { + "epoch": 0.27, + "grad_norm": 0.7028522466922931, + "learning_rate": 4.861463520860892e-06, + "loss": 0.3114, + "step": 5690 + }, + { + "epoch": 0.27, + "grad_norm": 0.6424453279063231, + "learning_rate": 4.861401258416557e-06, + "loss": 0.2916, + "step": 5691 + }, + { + "epoch": 0.27, + "grad_norm": 0.6404276799559668, + "learning_rate": 4.861338982382952e-06, + "loss": 0.3102, + "step": 5692 + }, + { + "epoch": 0.27, + "grad_norm": 0.6386579551104258, + "learning_rate": 4.861276692760433e-06, + "loss": 0.2979, + "step": 5693 + }, + { + "epoch": 0.27, + "grad_norm": 0.6083561176121749, + "learning_rate": 4.8612143895493616e-06, + "loss": 0.299, + "step": 5694 + }, + { + "epoch": 0.27, + "grad_norm": 0.6304776688250935, + "learning_rate": 4.861152072750094e-06, + "loss": 0.3208, + "step": 5695 + }, + { + "epoch": 0.27, + "grad_norm": 0.6872161001705999, + "learning_rate": 4.86108974236299e-06, + "loss": 0.2897, + "step": 5696 + }, + { + "epoch": 0.27, + "grad_norm": 0.6009197588579976, + "learning_rate": 4.861027398388408e-06, + "loss": 0.2867, + "step": 5697 + }, + { + "epoch": 0.27, + "grad_norm": 0.6088051777875035, + "learning_rate": 4.860965040826707e-06, + "loss": 0.2901, + "step": 5698 + }, + { + "epoch": 0.27, + "grad_norm": 0.6241972271545848, + "learning_rate": 4.8609026696782444e-06, + "loss": 0.2946, + "step": 5699 + }, + { + "epoch": 0.27, + "grad_norm": 0.6118763436286392, + "learning_rate": 4.860840284943382e-06, + "loss": 0.3056, + "step": 5700 + }, + { + "epoch": 0.27, + "grad_norm": 0.6132569371335402, + "learning_rate": 4.860777886622477e-06, + "loss": 0.2852, + "step": 5701 + }, + { + "epoch": 0.27, + "grad_norm": 0.6454487458788597, + "learning_rate": 4.860715474715887e-06, + "loss": 0.3231, + "step": 5702 + }, + { + "epoch": 0.27, + "grad_norm": 0.5700756219332328, + "learning_rate": 4.860653049223974e-06, + "loss": 0.2899, + "step": 5703 + }, + { + "epoch": 0.27, + "grad_norm": 0.6513170068090945, + "learning_rate": 4.860590610147096e-06, + "loss": 0.3053, + "step": 5704 + }, + { + "epoch": 0.27, + "grad_norm": 0.6721537294751206, + "learning_rate": 4.860528157485612e-06, + "loss": 0.2894, + "step": 5705 + }, + { + "epoch": 0.27, + "grad_norm": 0.6215148099777349, + "learning_rate": 4.860465691239882e-06, + "loss": 0.2882, + "step": 5706 + }, + { + "epoch": 0.27, + "grad_norm": 0.57965808026227, + "learning_rate": 4.8604032114102655e-06, + "loss": 0.2834, + "step": 5707 + }, + { + "epoch": 0.27, + "grad_norm": 0.6508347744629961, + "learning_rate": 4.860340717997121e-06, + "loss": 0.3088, + "step": 5708 + }, + { + "epoch": 0.27, + "grad_norm": 0.6376078172077033, + "learning_rate": 4.8602782110008095e-06, + "loss": 0.3012, + "step": 5709 + }, + { + "epoch": 0.27, + "grad_norm": 0.6597670850488246, + "learning_rate": 4.86021569042169e-06, + "loss": 0.3142, + "step": 5710 + }, + { + "epoch": 0.27, + "grad_norm": 0.6226308440745011, + "learning_rate": 4.8601531562601225e-06, + "loss": 0.2998, + "step": 5711 + }, + { + "epoch": 0.27, + "grad_norm": 0.6071209326711048, + "learning_rate": 4.8600906085164666e-06, + "loss": 0.299, + "step": 5712 + }, + { + "epoch": 0.27, + "grad_norm": 0.6464824543295244, + "learning_rate": 4.860028047191083e-06, + "loss": 0.3192, + "step": 5713 + }, + { + "epoch": 0.27, + "grad_norm": 0.5863099341677137, + "learning_rate": 4.8599654722843305e-06, + "loss": 0.2889, + "step": 5714 + }, + { + "epoch": 0.27, + "grad_norm": 0.6587756120682531, + "learning_rate": 4.85990288379657e-06, + "loss": 0.2958, + "step": 5715 + }, + { + "epoch": 0.27, + "grad_norm": 0.6537443223065056, + "learning_rate": 4.8598402817281616e-06, + "loss": 0.2919, + "step": 5716 + }, + { + "epoch": 0.27, + "grad_norm": 0.6350523978180356, + "learning_rate": 4.859777666079465e-06, + "loss": 0.3087, + "step": 5717 + }, + { + "epoch": 0.27, + "grad_norm": 0.6634460651419954, + "learning_rate": 4.859715036850841e-06, + "loss": 0.291, + "step": 5718 + }, + { + "epoch": 0.27, + "grad_norm": 0.5826953651663683, + "learning_rate": 4.859652394042652e-06, + "loss": 0.2913, + "step": 5719 + }, + { + "epoch": 0.27, + "grad_norm": 0.6722687145523714, + "learning_rate": 4.859589737655255e-06, + "loss": 0.3109, + "step": 5720 + }, + { + "epoch": 0.27, + "grad_norm": 0.6076754876934568, + "learning_rate": 4.859527067689012e-06, + "loss": 0.3106, + "step": 5721 + }, + { + "epoch": 0.27, + "grad_norm": 0.695852781107032, + "learning_rate": 4.859464384144284e-06, + "loss": 0.3156, + "step": 5722 + }, + { + "epoch": 0.27, + "grad_norm": 0.7007449996585747, + "learning_rate": 4.859401687021431e-06, + "loss": 0.3025, + "step": 5723 + }, + { + "epoch": 0.27, + "grad_norm": 0.7554107677900934, + "learning_rate": 4.8593389763208156e-06, + "loss": 0.3361, + "step": 5724 + }, + { + "epoch": 0.27, + "grad_norm": 0.5888083559303612, + "learning_rate": 4.859276252042796e-06, + "loss": 0.2994, + "step": 5725 + }, + { + "epoch": 0.27, + "grad_norm": 0.5872021220709797, + "learning_rate": 4.859213514187735e-06, + "loss": 0.2828, + "step": 5726 + }, + { + "epoch": 0.27, + "grad_norm": 0.7181925555639597, + "learning_rate": 4.859150762755994e-06, + "loss": 0.312, + "step": 5727 + }, + { + "epoch": 0.27, + "grad_norm": 0.6544912818227964, + "learning_rate": 4.859087997747932e-06, + "loss": 0.3161, + "step": 5728 + }, + { + "epoch": 0.27, + "grad_norm": 0.6404589382442597, + "learning_rate": 4.859025219163912e-06, + "loss": 0.2915, + "step": 5729 + }, + { + "epoch": 0.27, + "grad_norm": 0.6784242873130013, + "learning_rate": 4.858962427004296e-06, + "loss": 0.3224, + "step": 5730 + }, + { + "epoch": 0.27, + "grad_norm": 0.677190320684809, + "learning_rate": 4.8588996212694424e-06, + "loss": 0.3024, + "step": 5731 + }, + { + "epoch": 0.27, + "grad_norm": 0.6988147320493001, + "learning_rate": 4.858836801959715e-06, + "loss": 0.3305, + "step": 5732 + }, + { + "epoch": 0.27, + "grad_norm": 0.6524536089966898, + "learning_rate": 4.858773969075476e-06, + "loss": 0.2821, + "step": 5733 + }, + { + "epoch": 0.27, + "grad_norm": 0.5744538897519142, + "learning_rate": 4.858711122617084e-06, + "loss": 0.278, + "step": 5734 + }, + { + "epoch": 0.27, + "grad_norm": 0.6687729004363799, + "learning_rate": 4.858648262584902e-06, + "loss": 0.3189, + "step": 5735 + }, + { + "epoch": 0.27, + "grad_norm": 0.6289876263987286, + "learning_rate": 4.858585388979293e-06, + "loss": 0.2986, + "step": 5736 + }, + { + "epoch": 0.27, + "grad_norm": 0.6107666738550822, + "learning_rate": 4.858522501800618e-06, + "loss": 0.2903, + "step": 5737 + }, + { + "epoch": 0.27, + "grad_norm": 0.6574944616233749, + "learning_rate": 4.858459601049238e-06, + "loss": 0.297, + "step": 5738 + }, + { + "epoch": 0.27, + "grad_norm": 0.5947047572670836, + "learning_rate": 4.858396686725516e-06, + "loss": 0.3057, + "step": 5739 + }, + { + "epoch": 0.27, + "grad_norm": 0.6750385010817668, + "learning_rate": 4.858333758829815e-06, + "loss": 0.2934, + "step": 5740 + }, + { + "epoch": 0.27, + "grad_norm": 0.6094401022943908, + "learning_rate": 4.858270817362495e-06, + "loss": 0.2703, + "step": 5741 + }, + { + "epoch": 0.27, + "grad_norm": 0.6222618487269143, + "learning_rate": 4.858207862323919e-06, + "loss": 0.3091, + "step": 5742 + }, + { + "epoch": 0.27, + "grad_norm": 0.6315129369010182, + "learning_rate": 4.85814489371445e-06, + "loss": 0.299, + "step": 5743 + }, + { + "epoch": 0.27, + "grad_norm": 0.6532368606266921, + "learning_rate": 4.858081911534449e-06, + "loss": 0.2933, + "step": 5744 + }, + { + "epoch": 0.27, + "grad_norm": 0.6085304953443407, + "learning_rate": 4.85801891578428e-06, + "loss": 0.2919, + "step": 5745 + }, + { + "epoch": 0.27, + "grad_norm": 0.6338764875554642, + "learning_rate": 4.857955906464304e-06, + "loss": 0.2985, + "step": 5746 + }, + { + "epoch": 0.27, + "grad_norm": 0.6608631255049184, + "learning_rate": 4.857892883574886e-06, + "loss": 0.2929, + "step": 5747 + }, + { + "epoch": 0.27, + "grad_norm": 0.6292529934108186, + "learning_rate": 4.8578298471163856e-06, + "loss": 0.2943, + "step": 5748 + }, + { + "epoch": 0.27, + "grad_norm": 0.6767439972899137, + "learning_rate": 4.857766797089167e-06, + "loss": 0.3137, + "step": 5749 + }, + { + "epoch": 0.27, + "grad_norm": 0.6136188225106958, + "learning_rate": 4.857703733493595e-06, + "loss": 0.2931, + "step": 5750 + }, + { + "epoch": 0.27, + "grad_norm": 0.6675454410537499, + "learning_rate": 4.8576406563300286e-06, + "loss": 0.2975, + "step": 5751 + }, + { + "epoch": 0.27, + "grad_norm": 0.6579596765022365, + "learning_rate": 4.857577565598833e-06, + "loss": 0.2899, + "step": 5752 + }, + { + "epoch": 0.27, + "grad_norm": 0.6892268739315478, + "learning_rate": 4.857514461300372e-06, + "loss": 0.3067, + "step": 5753 + }, + { + "epoch": 0.27, + "grad_norm": 0.6284870190170241, + "learning_rate": 4.857451343435007e-06, + "loss": 0.2968, + "step": 5754 + }, + { + "epoch": 0.27, + "grad_norm": 0.6739230735922291, + "learning_rate": 4.857388212003102e-06, + "loss": 0.3157, + "step": 5755 + }, + { + "epoch": 0.27, + "grad_norm": 0.6089838136120663, + "learning_rate": 4.857325067005022e-06, + "loss": 0.2853, + "step": 5756 + }, + { + "epoch": 0.27, + "grad_norm": 0.6179447796296674, + "learning_rate": 4.857261908441127e-06, + "loss": 0.292, + "step": 5757 + }, + { + "epoch": 0.27, + "grad_norm": 0.6468542770456118, + "learning_rate": 4.857198736311782e-06, + "loss": 0.3176, + "step": 5758 + }, + { + "epoch": 0.27, + "grad_norm": 0.6431219493976394, + "learning_rate": 4.8571355506173515e-06, + "loss": 0.2924, + "step": 5759 + }, + { + "epoch": 0.27, + "grad_norm": 0.6080027795434348, + "learning_rate": 4.857072351358198e-06, + "loss": 0.3035, + "step": 5760 + }, + { + "epoch": 0.27, + "grad_norm": 0.6270607521129332, + "learning_rate": 4.857009138534686e-06, + "loss": 0.2817, + "step": 5761 + }, + { + "epoch": 0.27, + "grad_norm": 0.6650815796210631, + "learning_rate": 4.856945912147179e-06, + "loss": 0.3243, + "step": 5762 + }, + { + "epoch": 0.27, + "grad_norm": 0.5924873917058061, + "learning_rate": 4.8568826721960406e-06, + "loss": 0.291, + "step": 5763 + }, + { + "epoch": 0.27, + "grad_norm": 0.6244163097997162, + "learning_rate": 4.856819418681634e-06, + "loss": 0.3078, + "step": 5764 + }, + { + "epoch": 0.27, + "grad_norm": 0.6305711087913461, + "learning_rate": 4.856756151604325e-06, + "loss": 0.2828, + "step": 5765 + }, + { + "epoch": 0.27, + "grad_norm": 0.6166955469515798, + "learning_rate": 4.856692870964476e-06, + "loss": 0.2851, + "step": 5766 + }, + { + "epoch": 0.27, + "grad_norm": 0.6100095505749571, + "learning_rate": 4.8566295767624516e-06, + "loss": 0.3059, + "step": 5767 + }, + { + "epoch": 0.27, + "grad_norm": 0.5633605778729681, + "learning_rate": 4.856566268998617e-06, + "loss": 0.2979, + "step": 5768 + }, + { + "epoch": 0.27, + "grad_norm": 0.6061348352511793, + "learning_rate": 4.856502947673335e-06, + "loss": 0.2801, + "step": 5769 + }, + { + "epoch": 0.27, + "grad_norm": 0.7509358901192117, + "learning_rate": 4.8564396127869714e-06, + "loss": 0.3152, + "step": 5770 + }, + { + "epoch": 0.27, + "grad_norm": 0.617674214911693, + "learning_rate": 4.856376264339891e-06, + "loss": 0.295, + "step": 5771 + }, + { + "epoch": 0.27, + "grad_norm": 0.6037976383787206, + "learning_rate": 4.856312902332456e-06, + "loss": 0.295, + "step": 5772 + }, + { + "epoch": 0.27, + "grad_norm": 0.6314304072519684, + "learning_rate": 4.856249526765033e-06, + "loss": 0.3089, + "step": 5773 + }, + { + "epoch": 0.27, + "grad_norm": 0.6173613082741368, + "learning_rate": 4.856186137637986e-06, + "loss": 0.3, + "step": 5774 + }, + { + "epoch": 0.27, + "grad_norm": 0.6814274791646417, + "learning_rate": 4.85612273495168e-06, + "loss": 0.3266, + "step": 5775 + }, + { + "epoch": 0.27, + "grad_norm": 0.6895019741552595, + "learning_rate": 4.8560593187064805e-06, + "loss": 0.3091, + "step": 5776 + }, + { + "epoch": 0.27, + "grad_norm": 0.6458984279341645, + "learning_rate": 4.855995888902752e-06, + "loss": 0.2786, + "step": 5777 + }, + { + "epoch": 0.27, + "grad_norm": 0.6177518685189285, + "learning_rate": 4.855932445540857e-06, + "loss": 0.2905, + "step": 5778 + }, + { + "epoch": 0.27, + "grad_norm": 0.5475410125294297, + "learning_rate": 4.855868988621165e-06, + "loss": 0.2587, + "step": 5779 + }, + { + "epoch": 0.27, + "grad_norm": 0.6460383820111196, + "learning_rate": 4.855805518144039e-06, + "loss": 0.3011, + "step": 5780 + }, + { + "epoch": 0.27, + "grad_norm": 0.6690534976008322, + "learning_rate": 4.855742034109844e-06, + "loss": 0.2972, + "step": 5781 + }, + { + "epoch": 0.27, + "grad_norm": 0.6136544079502261, + "learning_rate": 4.855678536518946e-06, + "loss": 0.3022, + "step": 5782 + }, + { + "epoch": 0.27, + "grad_norm": 0.6093455100178301, + "learning_rate": 4.85561502537171e-06, + "loss": 0.3013, + "step": 5783 + }, + { + "epoch": 0.27, + "grad_norm": 0.6348469744104498, + "learning_rate": 4.855551500668501e-06, + "loss": 0.2704, + "step": 5784 + }, + { + "epoch": 0.27, + "grad_norm": 0.6074464319899573, + "learning_rate": 4.855487962409685e-06, + "loss": 0.2942, + "step": 5785 + }, + { + "epoch": 0.27, + "grad_norm": 0.6227751736556532, + "learning_rate": 4.855424410595629e-06, + "loss": 0.2812, + "step": 5786 + }, + { + "epoch": 0.27, + "grad_norm": 0.6133003083017416, + "learning_rate": 4.855360845226697e-06, + "loss": 0.2869, + "step": 5787 + }, + { + "epoch": 0.27, + "grad_norm": 0.698346179152879, + "learning_rate": 4.855297266303255e-06, + "loss": 0.3236, + "step": 5788 + }, + { + "epoch": 0.27, + "grad_norm": 0.6650785803054893, + "learning_rate": 4.855233673825669e-06, + "loss": 0.3362, + "step": 5789 + }, + { + "epoch": 0.27, + "grad_norm": 0.6001206815147938, + "learning_rate": 4.855170067794306e-06, + "loss": 0.2884, + "step": 5790 + }, + { + "epoch": 0.27, + "grad_norm": 0.6187939840472475, + "learning_rate": 4.855106448209531e-06, + "loss": 0.2944, + "step": 5791 + }, + { + "epoch": 0.27, + "grad_norm": 0.5862125855669837, + "learning_rate": 4.85504281507171e-06, + "loss": 0.2782, + "step": 5792 + }, + { + "epoch": 0.27, + "grad_norm": 0.6260557564328433, + "learning_rate": 4.85497916838121e-06, + "loss": 0.2898, + "step": 5793 + }, + { + "epoch": 0.27, + "grad_norm": 0.6325124747498747, + "learning_rate": 4.854915508138396e-06, + "loss": 0.3029, + "step": 5794 + }, + { + "epoch": 0.27, + "grad_norm": 0.6448222250410853, + "learning_rate": 4.854851834343636e-06, + "loss": 0.3161, + "step": 5795 + }, + { + "epoch": 0.27, + "grad_norm": 0.6135731039170583, + "learning_rate": 4.8547881469972945e-06, + "loss": 0.2801, + "step": 5796 + }, + { + "epoch": 0.27, + "grad_norm": 0.6488779805579212, + "learning_rate": 4.85472444609974e-06, + "loss": 0.2982, + "step": 5797 + }, + { + "epoch": 0.27, + "grad_norm": 0.5880940033611937, + "learning_rate": 4.854660731651338e-06, + "loss": 0.2696, + "step": 5798 + }, + { + "epoch": 0.27, + "grad_norm": 0.6176965789759894, + "learning_rate": 4.8545970036524546e-06, + "loss": 0.289, + "step": 5799 + }, + { + "epoch": 0.27, + "grad_norm": 0.6675968372692602, + "learning_rate": 4.854533262103458e-06, + "loss": 0.3238, + "step": 5800 + }, + { + "epoch": 0.27, + "grad_norm": 0.66998009238095, + "learning_rate": 4.854469507004714e-06, + "loss": 0.326, + "step": 5801 + }, + { + "epoch": 0.27, + "grad_norm": 0.6204021682776897, + "learning_rate": 4.85440573835659e-06, + "loss": 0.3183, + "step": 5802 + }, + { + "epoch": 0.27, + "grad_norm": 0.694906769509233, + "learning_rate": 4.854341956159453e-06, + "loss": 0.2936, + "step": 5803 + }, + { + "epoch": 0.27, + "grad_norm": 0.6733156660265672, + "learning_rate": 4.854278160413669e-06, + "loss": 0.3051, + "step": 5804 + }, + { + "epoch": 0.27, + "grad_norm": 0.6363149609894377, + "learning_rate": 4.854214351119606e-06, + "loss": 0.2963, + "step": 5805 + }, + { + "epoch": 0.27, + "grad_norm": 0.5977539113829826, + "learning_rate": 4.854150528277631e-06, + "loss": 0.2734, + "step": 5806 + }, + { + "epoch": 0.27, + "grad_norm": 0.6913076769615131, + "learning_rate": 4.854086691888112e-06, + "loss": 0.3134, + "step": 5807 + }, + { + "epoch": 0.27, + "grad_norm": 0.5925711056015109, + "learning_rate": 4.854022841951416e-06, + "loss": 0.2898, + "step": 5808 + }, + { + "epoch": 0.27, + "grad_norm": 0.6246486652878591, + "learning_rate": 4.85395897846791e-06, + "loss": 0.283, + "step": 5809 + }, + { + "epoch": 0.27, + "grad_norm": 0.6372090014744488, + "learning_rate": 4.853895101437961e-06, + "loss": 0.2935, + "step": 5810 + }, + { + "epoch": 0.27, + "grad_norm": 0.621120064515527, + "learning_rate": 4.853831210861938e-06, + "loss": 0.3088, + "step": 5811 + }, + { + "epoch": 0.27, + "grad_norm": 0.6182161134673724, + "learning_rate": 4.853767306740208e-06, + "loss": 0.2992, + "step": 5812 + }, + { + "epoch": 0.27, + "grad_norm": 0.6324182388234784, + "learning_rate": 4.853703389073139e-06, + "loss": 0.2965, + "step": 5813 + }, + { + "epoch": 0.27, + "grad_norm": 0.5895923286283091, + "learning_rate": 4.853639457861098e-06, + "loss": 0.2833, + "step": 5814 + }, + { + "epoch": 0.27, + "grad_norm": 0.5929819615300933, + "learning_rate": 4.8535755131044536e-06, + "loss": 0.294, + "step": 5815 + }, + { + "epoch": 0.27, + "grad_norm": 0.6082356835887578, + "learning_rate": 4.853511554803573e-06, + "loss": 0.2786, + "step": 5816 + }, + { + "epoch": 0.27, + "grad_norm": 0.6416568783352559, + "learning_rate": 4.853447582958826e-06, + "loss": 0.2846, + "step": 5817 + }, + { + "epoch": 0.27, + "grad_norm": 0.594386213779637, + "learning_rate": 4.853383597570579e-06, + "loss": 0.2693, + "step": 5818 + }, + { + "epoch": 0.27, + "grad_norm": 0.6831387233972898, + "learning_rate": 4.8533195986392015e-06, + "loss": 0.2849, + "step": 5819 + }, + { + "epoch": 0.27, + "grad_norm": 0.6144064073312462, + "learning_rate": 4.853255586165061e-06, + "loss": 0.2766, + "step": 5820 + }, + { + "epoch": 0.27, + "grad_norm": 0.628008881882958, + "learning_rate": 4.853191560148526e-06, + "loss": 0.2975, + "step": 5821 + }, + { + "epoch": 0.27, + "grad_norm": 0.7051758188144693, + "learning_rate": 4.8531275205899644e-06, + "loss": 0.2845, + "step": 5822 + }, + { + "epoch": 0.27, + "grad_norm": 0.7042663074860837, + "learning_rate": 4.853063467489747e-06, + "loss": 0.2752, + "step": 5823 + }, + { + "epoch": 0.27, + "grad_norm": 0.6039300967897374, + "learning_rate": 4.8529994008482394e-06, + "loss": 0.3037, + "step": 5824 + }, + { + "epoch": 0.27, + "grad_norm": 0.6856267994754284, + "learning_rate": 4.852935320665811e-06, + "loss": 0.3083, + "step": 5825 + }, + { + "epoch": 0.27, + "grad_norm": 0.5793282251850433, + "learning_rate": 4.8528712269428326e-06, + "loss": 0.2879, + "step": 5826 + }, + { + "epoch": 0.27, + "grad_norm": 0.634898837696702, + "learning_rate": 4.852807119679671e-06, + "loss": 0.2917, + "step": 5827 + }, + { + "epoch": 0.27, + "grad_norm": 0.6894999375677663, + "learning_rate": 4.852742998876696e-06, + "loss": 0.305, + "step": 5828 + }, + { + "epoch": 0.27, + "grad_norm": 0.6461931051655723, + "learning_rate": 4.852678864534277e-06, + "loss": 0.3113, + "step": 5829 + }, + { + "epoch": 0.27, + "grad_norm": 0.6978249948601705, + "learning_rate": 4.8526147166527816e-06, + "loss": 0.2932, + "step": 5830 + }, + { + "epoch": 0.27, + "grad_norm": 0.7054346414025633, + "learning_rate": 4.85255055523258e-06, + "loss": 0.3045, + "step": 5831 + }, + { + "epoch": 0.27, + "grad_norm": 0.6628561387658907, + "learning_rate": 4.852486380274042e-06, + "loss": 0.291, + "step": 5832 + }, + { + "epoch": 0.27, + "grad_norm": 0.6406560217160556, + "learning_rate": 4.852422191777535e-06, + "loss": 0.2884, + "step": 5833 + }, + { + "epoch": 0.27, + "grad_norm": 0.638903166603644, + "learning_rate": 4.85235798974343e-06, + "loss": 0.2964, + "step": 5834 + }, + { + "epoch": 0.27, + "grad_norm": 0.6032785229199948, + "learning_rate": 4.852293774172096e-06, + "loss": 0.2716, + "step": 5835 + }, + { + "epoch": 0.27, + "grad_norm": 0.7062279819599317, + "learning_rate": 4.8522295450639025e-06, + "loss": 0.3088, + "step": 5836 + }, + { + "epoch": 0.27, + "grad_norm": 0.632586098970034, + "learning_rate": 4.852165302419219e-06, + "loss": 0.2883, + "step": 5837 + }, + { + "epoch": 0.27, + "grad_norm": 0.5877975799830472, + "learning_rate": 4.852101046238416e-06, + "loss": 0.2826, + "step": 5838 + }, + { + "epoch": 0.27, + "grad_norm": 0.6370965271678429, + "learning_rate": 4.852036776521862e-06, + "loss": 0.2906, + "step": 5839 + }, + { + "epoch": 0.27, + "grad_norm": 0.620379588558801, + "learning_rate": 4.851972493269929e-06, + "loss": 0.2859, + "step": 5840 + }, + { + "epoch": 0.27, + "grad_norm": 0.6313282261988249, + "learning_rate": 4.851908196482983e-06, + "loss": 0.2793, + "step": 5841 + }, + { + "epoch": 0.27, + "grad_norm": 0.6793021781812733, + "learning_rate": 4.851843886161398e-06, + "loss": 0.3097, + "step": 5842 + }, + { + "epoch": 0.27, + "grad_norm": 0.6500384089182413, + "learning_rate": 4.851779562305543e-06, + "loss": 0.3129, + "step": 5843 + }, + { + "epoch": 0.27, + "grad_norm": 0.6687044758413895, + "learning_rate": 4.851715224915786e-06, + "loss": 0.2991, + "step": 5844 + }, + { + "epoch": 0.27, + "grad_norm": 0.5911762724073949, + "learning_rate": 4.8516508739925e-06, + "loss": 0.2759, + "step": 5845 + }, + { + "epoch": 0.27, + "grad_norm": 0.6316507573027358, + "learning_rate": 4.8515865095360545e-06, + "loss": 0.2694, + "step": 5846 + }, + { + "epoch": 0.27, + "grad_norm": 0.618905481975788, + "learning_rate": 4.85152213154682e-06, + "loss": 0.29, + "step": 5847 + }, + { + "epoch": 0.27, + "grad_norm": 0.6173212633890243, + "learning_rate": 4.851457740025166e-06, + "loss": 0.2762, + "step": 5848 + }, + { + "epoch": 0.27, + "grad_norm": 0.5932773222503025, + "learning_rate": 4.851393334971463e-06, + "loss": 0.2721, + "step": 5849 + }, + { + "epoch": 0.27, + "grad_norm": 0.6197877033662196, + "learning_rate": 4.8513289163860834e-06, + "loss": 0.2961, + "step": 5850 + }, + { + "epoch": 0.27, + "grad_norm": 0.6427857617945204, + "learning_rate": 4.851264484269397e-06, + "loss": 0.2953, + "step": 5851 + }, + { + "epoch": 0.27, + "grad_norm": 0.610570963602475, + "learning_rate": 4.851200038621774e-06, + "loss": 0.2938, + "step": 5852 + }, + { + "epoch": 0.27, + "grad_norm": 0.605532281530976, + "learning_rate": 4.851135579443586e-06, + "loss": 0.3079, + "step": 5853 + }, + { + "epoch": 0.27, + "grad_norm": 0.7044554080188318, + "learning_rate": 4.8510711067352035e-06, + "loss": 0.3199, + "step": 5854 + }, + { + "epoch": 0.27, + "grad_norm": 0.6403875068606936, + "learning_rate": 4.851006620496997e-06, + "loss": 0.3025, + "step": 5855 + }, + { + "epoch": 0.27, + "grad_norm": 0.6343243814149251, + "learning_rate": 4.850942120729339e-06, + "loss": 0.3071, + "step": 5856 + }, + { + "epoch": 0.27, + "grad_norm": 0.6290649642846211, + "learning_rate": 4.8508776074326e-06, + "loss": 0.3044, + "step": 5857 + }, + { + "epoch": 0.27, + "grad_norm": 0.6115364371393391, + "learning_rate": 4.850813080607152e-06, + "loss": 0.279, + "step": 5858 + }, + { + "epoch": 0.27, + "grad_norm": 0.6268258517916696, + "learning_rate": 4.850748540253365e-06, + "loss": 0.3131, + "step": 5859 + }, + { + "epoch": 0.27, + "grad_norm": 0.6496006472110328, + "learning_rate": 4.8506839863716104e-06, + "loss": 0.3004, + "step": 5860 + }, + { + "epoch": 0.27, + "grad_norm": 0.6116349222516312, + "learning_rate": 4.8506194189622605e-06, + "loss": 0.292, + "step": 5861 + }, + { + "epoch": 0.27, + "grad_norm": 0.6439521586585903, + "learning_rate": 4.850554838025688e-06, + "loss": 0.3082, + "step": 5862 + }, + { + "epoch": 0.27, + "grad_norm": 0.6418928220898055, + "learning_rate": 4.850490243562261e-06, + "loss": 0.3003, + "step": 5863 + }, + { + "epoch": 0.27, + "grad_norm": 0.6544842994944913, + "learning_rate": 4.850425635572355e-06, + "loss": 0.3038, + "step": 5864 + }, + { + "epoch": 0.27, + "grad_norm": 0.6056185693902946, + "learning_rate": 4.850361014056339e-06, + "loss": 0.2758, + "step": 5865 + }, + { + "epoch": 0.27, + "grad_norm": 0.6098071592499972, + "learning_rate": 4.850296379014587e-06, + "loss": 0.3035, + "step": 5866 + }, + { + "epoch": 0.27, + "grad_norm": 0.6035518330459689, + "learning_rate": 4.85023173044747e-06, + "loss": 0.293, + "step": 5867 + }, + { + "epoch": 0.27, + "grad_norm": 0.5972453649443912, + "learning_rate": 4.850167068355359e-06, + "loss": 0.309, + "step": 5868 + }, + { + "epoch": 0.27, + "grad_norm": 0.6050425155689564, + "learning_rate": 4.8501023927386286e-06, + "loss": 0.2982, + "step": 5869 + }, + { + "epoch": 0.27, + "grad_norm": 0.6242664042109345, + "learning_rate": 4.850037703597649e-06, + "loss": 0.2954, + "step": 5870 + }, + { + "epoch": 0.28, + "grad_norm": 0.6693099522143127, + "learning_rate": 4.849973000932794e-06, + "loss": 0.3163, + "step": 5871 + }, + { + "epoch": 0.28, + "grad_norm": 0.6096340903140555, + "learning_rate": 4.849908284744434e-06, + "loss": 0.2982, + "step": 5872 + }, + { + "epoch": 0.28, + "grad_norm": 0.5606104137552246, + "learning_rate": 4.849843555032942e-06, + "loss": 0.2928, + "step": 5873 + }, + { + "epoch": 0.28, + "grad_norm": 0.6263681090965362, + "learning_rate": 4.849778811798692e-06, + "loss": 0.3067, + "step": 5874 + }, + { + "epoch": 0.28, + "grad_norm": 0.594963296253528, + "learning_rate": 4.849714055042055e-06, + "loss": 0.2779, + "step": 5875 + }, + { + "epoch": 0.28, + "grad_norm": 0.6102791873041256, + "learning_rate": 4.849649284763404e-06, + "loss": 0.3066, + "step": 5876 + }, + { + "epoch": 0.28, + "grad_norm": 0.6176621813544668, + "learning_rate": 4.849584500963113e-06, + "loss": 0.293, + "step": 5877 + }, + { + "epoch": 0.28, + "grad_norm": 0.6497742240914988, + "learning_rate": 4.849519703641553e-06, + "loss": 0.2716, + "step": 5878 + }, + { + "epoch": 0.28, + "grad_norm": 0.6371694557510448, + "learning_rate": 4.849454892799098e-06, + "loss": 0.3062, + "step": 5879 + }, + { + "epoch": 0.28, + "grad_norm": 0.6208289094386819, + "learning_rate": 4.84939006843612e-06, + "loss": 0.2997, + "step": 5880 + }, + { + "epoch": 0.28, + "grad_norm": 0.6085426802534977, + "learning_rate": 4.849325230552993e-06, + "loss": 0.3031, + "step": 5881 + }, + { + "epoch": 0.28, + "grad_norm": 0.638562803317257, + "learning_rate": 4.84926037915009e-06, + "loss": 0.3074, + "step": 5882 + }, + { + "epoch": 0.28, + "grad_norm": 0.6620331908082419, + "learning_rate": 4.849195514227784e-06, + "loss": 0.3036, + "step": 5883 + }, + { + "epoch": 0.28, + "grad_norm": 0.6448685155976454, + "learning_rate": 4.849130635786447e-06, + "loss": 0.2996, + "step": 5884 + }, + { + "epoch": 0.28, + "grad_norm": 0.5747798768744754, + "learning_rate": 4.849065743826456e-06, + "loss": 0.291, + "step": 5885 + }, + { + "epoch": 0.28, + "grad_norm": 0.6333961062822125, + "learning_rate": 4.84900083834818e-06, + "loss": 0.3107, + "step": 5886 + }, + { + "epoch": 0.28, + "grad_norm": 0.6312741454426328, + "learning_rate": 4.8489359193519945e-06, + "loss": 0.2901, + "step": 5887 + }, + { + "epoch": 0.28, + "grad_norm": 0.621627608650699, + "learning_rate": 4.8488709868382745e-06, + "loss": 0.3225, + "step": 5888 + }, + { + "epoch": 0.28, + "grad_norm": 0.6159992229570009, + "learning_rate": 4.848806040807392e-06, + "loss": 0.2874, + "step": 5889 + }, + { + "epoch": 0.28, + "grad_norm": 0.6380281211453016, + "learning_rate": 4.84874108125972e-06, + "loss": 0.3003, + "step": 5890 + }, + { + "epoch": 0.28, + "grad_norm": 0.6183726357190389, + "learning_rate": 4.848676108195635e-06, + "loss": 0.284, + "step": 5891 + }, + { + "epoch": 0.28, + "grad_norm": 0.6951219980572763, + "learning_rate": 4.848611121615509e-06, + "loss": 0.3183, + "step": 5892 + }, + { + "epoch": 0.28, + "grad_norm": 0.6958615683016878, + "learning_rate": 4.848546121519715e-06, + "loss": 0.3201, + "step": 5893 + }, + { + "epoch": 0.28, + "grad_norm": 0.6081209006957491, + "learning_rate": 4.848481107908629e-06, + "loss": 0.2858, + "step": 5894 + }, + { + "epoch": 0.28, + "grad_norm": 0.6076769412318445, + "learning_rate": 4.848416080782625e-06, + "loss": 0.288, + "step": 5895 + }, + { + "epoch": 0.28, + "grad_norm": 0.6799500258313079, + "learning_rate": 4.8483510401420765e-06, + "loss": 0.2999, + "step": 5896 + }, + { + "epoch": 0.28, + "grad_norm": 0.6633481890853292, + "learning_rate": 4.848285985987358e-06, + "loss": 0.3018, + "step": 5897 + }, + { + "epoch": 0.28, + "grad_norm": 0.6354521014889968, + "learning_rate": 4.848220918318843e-06, + "loss": 0.2833, + "step": 5898 + }, + { + "epoch": 0.28, + "grad_norm": 0.6139128039356401, + "learning_rate": 4.848155837136908e-06, + "loss": 0.2765, + "step": 5899 + }, + { + "epoch": 0.28, + "grad_norm": 0.5791987978929211, + "learning_rate": 4.848090742441926e-06, + "loss": 0.2889, + "step": 5900 + }, + { + "epoch": 0.28, + "grad_norm": 0.678685228087802, + "learning_rate": 4.848025634234272e-06, + "loss": 0.3133, + "step": 5901 + }, + { + "epoch": 0.28, + "grad_norm": 0.5966013942749029, + "learning_rate": 4.84796051251432e-06, + "loss": 0.287, + "step": 5902 + }, + { + "epoch": 0.28, + "grad_norm": 0.6650508893459215, + "learning_rate": 4.847895377282447e-06, + "loss": 0.3021, + "step": 5903 + }, + { + "epoch": 0.28, + "grad_norm": 0.6251614526092083, + "learning_rate": 4.847830228539024e-06, + "loss": 0.3056, + "step": 5904 + }, + { + "epoch": 0.28, + "grad_norm": 0.6273426166671646, + "learning_rate": 4.84776506628443e-06, + "loss": 0.3185, + "step": 5905 + }, + { + "epoch": 0.28, + "grad_norm": 0.6377784734167955, + "learning_rate": 4.847699890519037e-06, + "loss": 0.3076, + "step": 5906 + }, + { + "epoch": 0.28, + "grad_norm": 0.6565241379028509, + "learning_rate": 4.847634701243222e-06, + "loss": 0.3098, + "step": 5907 + }, + { + "epoch": 0.28, + "grad_norm": 0.6151020839682104, + "learning_rate": 4.847569498457358e-06, + "loss": 0.306, + "step": 5908 + }, + { + "epoch": 0.28, + "grad_norm": 0.6167299334757728, + "learning_rate": 4.847504282161824e-06, + "loss": 0.3053, + "step": 5909 + }, + { + "epoch": 0.28, + "grad_norm": 0.6454190510609554, + "learning_rate": 4.847439052356991e-06, + "loss": 0.2967, + "step": 5910 + }, + { + "epoch": 0.28, + "grad_norm": 0.6440534166608831, + "learning_rate": 4.8473738090432365e-06, + "loss": 0.2972, + "step": 5911 + }, + { + "epoch": 0.28, + "grad_norm": 0.7068583987729024, + "learning_rate": 4.847308552220935e-06, + "loss": 0.319, + "step": 5912 + }, + { + "epoch": 0.28, + "grad_norm": 0.6530281892376595, + "learning_rate": 4.847243281890464e-06, + "loss": 0.3091, + "step": 5913 + }, + { + "epoch": 0.28, + "grad_norm": 0.7260545248528352, + "learning_rate": 4.8471779980521975e-06, + "loss": 0.3003, + "step": 5914 + }, + { + "epoch": 0.28, + "grad_norm": 0.6349126597657393, + "learning_rate": 4.8471127007065115e-06, + "loss": 0.3024, + "step": 5915 + }, + { + "epoch": 0.28, + "grad_norm": 0.6373691519885881, + "learning_rate": 4.8470473898537815e-06, + "loss": 0.3204, + "step": 5916 + }, + { + "epoch": 0.28, + "grad_norm": 0.6254245858436283, + "learning_rate": 4.846982065494384e-06, + "loss": 0.2985, + "step": 5917 + }, + { + "epoch": 0.28, + "grad_norm": 0.6061617149422404, + "learning_rate": 4.846916727628695e-06, + "loss": 0.2976, + "step": 5918 + }, + { + "epoch": 0.28, + "grad_norm": 0.6489866453512925, + "learning_rate": 4.8468513762570905e-06, + "loss": 0.3029, + "step": 5919 + }, + { + "epoch": 0.28, + "grad_norm": 0.6860126525667131, + "learning_rate": 4.846786011379944e-06, + "loss": 0.317, + "step": 5920 + }, + { + "epoch": 0.28, + "grad_norm": 0.6118825880053985, + "learning_rate": 4.846720632997637e-06, + "loss": 0.3027, + "step": 5921 + }, + { + "epoch": 0.28, + "grad_norm": 0.5914275707976868, + "learning_rate": 4.846655241110541e-06, + "loss": 0.2697, + "step": 5922 + }, + { + "epoch": 0.28, + "grad_norm": 0.6311641048435361, + "learning_rate": 4.8465898357190335e-06, + "loss": 0.2874, + "step": 5923 + }, + { + "epoch": 0.28, + "grad_norm": 0.6048375200876132, + "learning_rate": 4.846524416823491e-06, + "loss": 0.2948, + "step": 5924 + }, + { + "epoch": 0.28, + "grad_norm": 0.6161870496382548, + "learning_rate": 4.846458984424292e-06, + "loss": 0.3018, + "step": 5925 + }, + { + "epoch": 0.28, + "grad_norm": 0.6443447119839462, + "learning_rate": 4.8463935385218095e-06, + "loss": 0.2865, + "step": 5926 + }, + { + "epoch": 0.28, + "grad_norm": 0.6710353137588849, + "learning_rate": 4.8463280791164225e-06, + "loss": 0.2994, + "step": 5927 + }, + { + "epoch": 0.28, + "grad_norm": 0.6276940469333931, + "learning_rate": 4.846262606208507e-06, + "loss": 0.3114, + "step": 5928 + }, + { + "epoch": 0.28, + "grad_norm": 0.7024162231853419, + "learning_rate": 4.84619711979844e-06, + "loss": 0.3109, + "step": 5929 + }, + { + "epoch": 0.28, + "grad_norm": 0.5841348807591599, + "learning_rate": 4.846131619886598e-06, + "loss": 0.3032, + "step": 5930 + }, + { + "epoch": 0.28, + "grad_norm": 0.6534226747158569, + "learning_rate": 4.846066106473359e-06, + "loss": 0.291, + "step": 5931 + }, + { + "epoch": 0.28, + "grad_norm": 0.6296017561828468, + "learning_rate": 4.846000579559099e-06, + "loss": 0.2887, + "step": 5932 + }, + { + "epoch": 0.28, + "grad_norm": 0.6428616157651743, + "learning_rate": 4.845935039144195e-06, + "loss": 0.2824, + "step": 5933 + }, + { + "epoch": 0.28, + "grad_norm": 0.6108766280129645, + "learning_rate": 4.845869485229025e-06, + "loss": 0.2967, + "step": 5934 + }, + { + "epoch": 0.28, + "grad_norm": 0.6429252319502311, + "learning_rate": 4.845803917813965e-06, + "loss": 0.331, + "step": 5935 + }, + { + "epoch": 0.28, + "grad_norm": 0.6183421984716312, + "learning_rate": 4.845738336899394e-06, + "loss": 0.2887, + "step": 5936 + }, + { + "epoch": 0.28, + "grad_norm": 0.5840255958631215, + "learning_rate": 4.8456727424856875e-06, + "loss": 0.2919, + "step": 5937 + }, + { + "epoch": 0.28, + "grad_norm": 0.648885359014251, + "learning_rate": 4.845607134573224e-06, + "loss": 0.2907, + "step": 5938 + }, + { + "epoch": 0.28, + "grad_norm": 0.6029531549161292, + "learning_rate": 4.845541513162382e-06, + "loss": 0.3214, + "step": 5939 + }, + { + "epoch": 0.28, + "grad_norm": 0.6298162700000636, + "learning_rate": 4.845475878253537e-06, + "loss": 0.3206, + "step": 5940 + }, + { + "epoch": 0.28, + "grad_norm": 0.6299881425469819, + "learning_rate": 4.845410229847068e-06, + "loss": 0.3042, + "step": 5941 + }, + { + "epoch": 0.28, + "grad_norm": 0.615959553777756, + "learning_rate": 4.845344567943353e-06, + "loss": 0.3159, + "step": 5942 + }, + { + "epoch": 0.28, + "grad_norm": 0.6184852570022595, + "learning_rate": 4.845278892542769e-06, + "loss": 0.289, + "step": 5943 + }, + { + "epoch": 0.28, + "grad_norm": 0.5925212684799458, + "learning_rate": 4.845213203645695e-06, + "loss": 0.2851, + "step": 5944 + }, + { + "epoch": 0.28, + "grad_norm": 0.6753765888952429, + "learning_rate": 4.845147501252509e-06, + "loss": 0.2846, + "step": 5945 + }, + { + "epoch": 0.28, + "grad_norm": 0.6442704659932181, + "learning_rate": 4.845081785363587e-06, + "loss": 0.2997, + "step": 5946 + }, + { + "epoch": 0.28, + "grad_norm": 0.6236244274867494, + "learning_rate": 4.8450160559793095e-06, + "loss": 0.3096, + "step": 5947 + }, + { + "epoch": 0.28, + "grad_norm": 0.621666997266756, + "learning_rate": 4.844950313100054e-06, + "loss": 0.2919, + "step": 5948 + }, + { + "epoch": 0.28, + "grad_norm": 0.6437807857714328, + "learning_rate": 4.844884556726199e-06, + "loss": 0.3096, + "step": 5949 + }, + { + "epoch": 0.28, + "grad_norm": 0.6488932464428778, + "learning_rate": 4.844818786858122e-06, + "loss": 0.2835, + "step": 5950 + }, + { + "epoch": 0.28, + "grad_norm": 0.7147478356101504, + "learning_rate": 4.844753003496203e-06, + "loss": 0.313, + "step": 5951 + }, + { + "epoch": 0.28, + "grad_norm": 0.6135327988615731, + "learning_rate": 4.84468720664082e-06, + "loss": 0.2947, + "step": 5952 + }, + { + "epoch": 0.28, + "grad_norm": 0.6043083853414978, + "learning_rate": 4.84462139629235e-06, + "loss": 0.3089, + "step": 5953 + }, + { + "epoch": 0.28, + "grad_norm": 0.6343764797353192, + "learning_rate": 4.844555572451175e-06, + "loss": 0.3028, + "step": 5954 + }, + { + "epoch": 0.28, + "grad_norm": 0.7282141008965419, + "learning_rate": 4.844489735117671e-06, + "loss": 0.3152, + "step": 5955 + }, + { + "epoch": 0.28, + "grad_norm": 0.6496386145510998, + "learning_rate": 4.844423884292218e-06, + "loss": 0.3157, + "step": 5956 + }, + { + "epoch": 0.28, + "grad_norm": 0.7009042985606619, + "learning_rate": 4.844358019975195e-06, + "loss": 0.3224, + "step": 5957 + }, + { + "epoch": 0.28, + "grad_norm": 0.7297610091659886, + "learning_rate": 4.844292142166981e-06, + "loss": 0.3308, + "step": 5958 + }, + { + "epoch": 0.28, + "grad_norm": 0.5992932417287048, + "learning_rate": 4.844226250867954e-06, + "loss": 0.2911, + "step": 5959 + }, + { + "epoch": 0.28, + "grad_norm": 0.6660495117129963, + "learning_rate": 4.844160346078495e-06, + "loss": 0.3154, + "step": 5960 + }, + { + "epoch": 0.28, + "grad_norm": 0.5978414514688847, + "learning_rate": 4.844094427798982e-06, + "loss": 0.2933, + "step": 5961 + }, + { + "epoch": 0.28, + "grad_norm": 0.6449413389470685, + "learning_rate": 4.844028496029794e-06, + "loss": 0.2984, + "step": 5962 + }, + { + "epoch": 0.28, + "grad_norm": 0.6527732327039397, + "learning_rate": 4.843962550771313e-06, + "loss": 0.302, + "step": 5963 + }, + { + "epoch": 0.28, + "grad_norm": 0.7074628963529147, + "learning_rate": 4.843896592023916e-06, + "loss": 0.3272, + "step": 5964 + }, + { + "epoch": 0.28, + "grad_norm": 0.6407628107564335, + "learning_rate": 4.843830619787982e-06, + "loss": 0.3156, + "step": 5965 + }, + { + "epoch": 0.28, + "grad_norm": 0.573015389404475, + "learning_rate": 4.843764634063893e-06, + "loss": 0.2892, + "step": 5966 + }, + { + "epoch": 0.28, + "grad_norm": 0.6321261285959616, + "learning_rate": 4.843698634852028e-06, + "loss": 0.3247, + "step": 5967 + }, + { + "epoch": 0.28, + "grad_norm": 0.6702751216847195, + "learning_rate": 4.843632622152765e-06, + "loss": 0.3047, + "step": 5968 + }, + { + "epoch": 0.28, + "grad_norm": 0.5834801703686463, + "learning_rate": 4.843566595966487e-06, + "loss": 0.2836, + "step": 5969 + }, + { + "epoch": 0.28, + "grad_norm": 0.6722072930320362, + "learning_rate": 4.843500556293571e-06, + "loss": 0.2825, + "step": 5970 + }, + { + "epoch": 0.28, + "grad_norm": 0.6281727559992022, + "learning_rate": 4.843434503134399e-06, + "loss": 0.2994, + "step": 5971 + }, + { + "epoch": 0.28, + "grad_norm": 0.5570192607239071, + "learning_rate": 4.84336843648935e-06, + "loss": 0.2815, + "step": 5972 + }, + { + "epoch": 0.28, + "grad_norm": 0.5940634351938798, + "learning_rate": 4.843302356358804e-06, + "loss": 0.2892, + "step": 5973 + }, + { + "epoch": 0.28, + "grad_norm": 0.598629300970294, + "learning_rate": 4.843236262743143e-06, + "loss": 0.2999, + "step": 5974 + }, + { + "epoch": 0.28, + "grad_norm": 0.676091004075311, + "learning_rate": 4.8431701556427454e-06, + "loss": 0.2905, + "step": 5975 + }, + { + "epoch": 0.28, + "grad_norm": 0.6777386095081095, + "learning_rate": 4.8431040350579936e-06, + "loss": 0.2918, + "step": 5976 + }, + { + "epoch": 0.28, + "grad_norm": 0.5958910839647249, + "learning_rate": 4.843037900989267e-06, + "loss": 0.2861, + "step": 5977 + }, + { + "epoch": 0.28, + "grad_norm": 0.63366033663219, + "learning_rate": 4.842971753436945e-06, + "loss": 0.3071, + "step": 5978 + }, + { + "epoch": 0.28, + "grad_norm": 0.6052643315613083, + "learning_rate": 4.84290559240141e-06, + "loss": 0.2616, + "step": 5979 + }, + { + "epoch": 0.28, + "grad_norm": 0.6187568763258997, + "learning_rate": 4.842839417883042e-06, + "loss": 0.3049, + "step": 5980 + }, + { + "epoch": 0.28, + "grad_norm": 0.6627525371555457, + "learning_rate": 4.842773229882222e-06, + "loss": 0.2957, + "step": 5981 + }, + { + "epoch": 0.28, + "grad_norm": 0.6407090003950746, + "learning_rate": 4.842707028399332e-06, + "loss": 0.2911, + "step": 5982 + }, + { + "epoch": 0.28, + "grad_norm": 0.6492213028953588, + "learning_rate": 4.84264081343475e-06, + "loss": 0.2818, + "step": 5983 + }, + { + "epoch": 0.28, + "grad_norm": 0.7075078234778968, + "learning_rate": 4.84257458498886e-06, + "loss": 0.3234, + "step": 5984 + }, + { + "epoch": 0.28, + "grad_norm": 0.6932538218382338, + "learning_rate": 4.842508343062043e-06, + "loss": 0.2926, + "step": 5985 + }, + { + "epoch": 0.28, + "grad_norm": 0.6406411936179812, + "learning_rate": 4.842442087654677e-06, + "loss": 0.2905, + "step": 5986 + }, + { + "epoch": 0.28, + "grad_norm": 0.6632189519062031, + "learning_rate": 4.842375818767147e-06, + "loss": 0.2846, + "step": 5987 + }, + { + "epoch": 0.28, + "grad_norm": 0.6257200988872497, + "learning_rate": 4.842309536399833e-06, + "loss": 0.2967, + "step": 5988 + }, + { + "epoch": 0.28, + "grad_norm": 0.6498463538201537, + "learning_rate": 4.842243240553115e-06, + "loss": 0.3026, + "step": 5989 + }, + { + "epoch": 0.28, + "grad_norm": 0.6349059446433909, + "learning_rate": 4.842176931227378e-06, + "loss": 0.315, + "step": 5990 + }, + { + "epoch": 0.28, + "grad_norm": 0.6393397482455201, + "learning_rate": 4.842110608423e-06, + "loss": 0.3111, + "step": 5991 + }, + { + "epoch": 0.28, + "grad_norm": 0.6087994312109701, + "learning_rate": 4.842044272140364e-06, + "loss": 0.2903, + "step": 5992 + }, + { + "epoch": 0.28, + "grad_norm": 0.5884212507921193, + "learning_rate": 4.8419779223798525e-06, + "loss": 0.2862, + "step": 5993 + }, + { + "epoch": 0.28, + "grad_norm": 0.6005678718692495, + "learning_rate": 4.841911559141846e-06, + "loss": 0.3063, + "step": 5994 + }, + { + "epoch": 0.28, + "grad_norm": 0.5919188893564139, + "learning_rate": 4.841845182426727e-06, + "loss": 0.2862, + "step": 5995 + }, + { + "epoch": 0.28, + "grad_norm": 0.5704842534528899, + "learning_rate": 4.841778792234879e-06, + "loss": 0.2938, + "step": 5996 + }, + { + "epoch": 0.28, + "grad_norm": 0.6468889948709475, + "learning_rate": 4.841712388566681e-06, + "loss": 0.3278, + "step": 5997 + }, + { + "epoch": 0.28, + "grad_norm": 0.665208112223028, + "learning_rate": 4.841645971422518e-06, + "loss": 0.311, + "step": 5998 + }, + { + "epoch": 0.28, + "grad_norm": 0.681009090506375, + "learning_rate": 4.841579540802771e-06, + "loss": 0.301, + "step": 5999 + }, + { + "epoch": 0.28, + "grad_norm": 0.6594225981331265, + "learning_rate": 4.841513096707822e-06, + "loss": 0.3021, + "step": 6000 + }, + { + "epoch": 0.28, + "grad_norm": 0.6415210380952129, + "learning_rate": 4.841446639138053e-06, + "loss": 0.2888, + "step": 6001 + }, + { + "epoch": 0.28, + "grad_norm": 0.647564768831147, + "learning_rate": 4.841380168093848e-06, + "loss": 0.3164, + "step": 6002 + }, + { + "epoch": 0.28, + "grad_norm": 0.6494893897783077, + "learning_rate": 4.841313683575587e-06, + "loss": 0.3087, + "step": 6003 + }, + { + "epoch": 0.28, + "grad_norm": 0.6133433892760255, + "learning_rate": 4.8412471855836555e-06, + "loss": 0.286, + "step": 6004 + }, + { + "epoch": 0.28, + "grad_norm": 0.6254624572566048, + "learning_rate": 4.841180674118435e-06, + "loss": 0.3152, + "step": 6005 + }, + { + "epoch": 0.28, + "grad_norm": 0.5877895194608139, + "learning_rate": 4.841114149180308e-06, + "loss": 0.286, + "step": 6006 + }, + { + "epoch": 0.28, + "grad_norm": 0.6166443065200344, + "learning_rate": 4.841047610769657e-06, + "loss": 0.2848, + "step": 6007 + }, + { + "epoch": 0.28, + "grad_norm": 0.7108079930860258, + "learning_rate": 4.840981058886866e-06, + "loss": 0.3176, + "step": 6008 + }, + { + "epoch": 0.28, + "grad_norm": 0.589781330953379, + "learning_rate": 4.8409144935323175e-06, + "loss": 0.2952, + "step": 6009 + }, + { + "epoch": 0.28, + "grad_norm": 0.6492457250228623, + "learning_rate": 4.8408479147063936e-06, + "loss": 0.2867, + "step": 6010 + }, + { + "epoch": 0.28, + "grad_norm": 0.668868037633402, + "learning_rate": 4.840781322409479e-06, + "loss": 0.2998, + "step": 6011 + }, + { + "epoch": 0.28, + "grad_norm": 0.6543681766228843, + "learning_rate": 4.840714716641956e-06, + "loss": 0.3138, + "step": 6012 + }, + { + "epoch": 0.28, + "grad_norm": 0.5733036875877406, + "learning_rate": 4.840648097404208e-06, + "loss": 0.2763, + "step": 6013 + }, + { + "epoch": 0.28, + "grad_norm": 0.6522553630932679, + "learning_rate": 4.840581464696619e-06, + "loss": 0.2768, + "step": 6014 + }, + { + "epoch": 0.28, + "grad_norm": 0.6620364403205398, + "learning_rate": 4.8405148185195715e-06, + "loss": 0.3192, + "step": 6015 + }, + { + "epoch": 0.28, + "grad_norm": 0.6129089968965832, + "learning_rate": 4.84044815887345e-06, + "loss": 0.2896, + "step": 6016 + }, + { + "epoch": 0.28, + "grad_norm": 0.6451039776328191, + "learning_rate": 4.8403814857586365e-06, + "loss": 0.2776, + "step": 6017 + }, + { + "epoch": 0.28, + "grad_norm": 0.6480821462219595, + "learning_rate": 4.840314799175516e-06, + "loss": 0.289, + "step": 6018 + }, + { + "epoch": 0.28, + "grad_norm": 0.6346161225677094, + "learning_rate": 4.840248099124474e-06, + "loss": 0.2768, + "step": 6019 + }, + { + "epoch": 0.28, + "grad_norm": 0.6483488194449233, + "learning_rate": 4.84018138560589e-06, + "loss": 0.2962, + "step": 6020 + }, + { + "epoch": 0.28, + "grad_norm": 0.6885072716438164, + "learning_rate": 4.840114658620151e-06, + "loss": 0.3118, + "step": 6021 + }, + { + "epoch": 0.28, + "grad_norm": 0.6011158148906872, + "learning_rate": 4.840047918167641e-06, + "loss": 0.2837, + "step": 6022 + }, + { + "epoch": 0.28, + "grad_norm": 0.6985708252229311, + "learning_rate": 4.839981164248742e-06, + "loss": 0.2979, + "step": 6023 + }, + { + "epoch": 0.28, + "grad_norm": 0.6270059424726496, + "learning_rate": 4.839914396863842e-06, + "loss": 0.2888, + "step": 6024 + }, + { + "epoch": 0.28, + "grad_norm": 0.6087609611663648, + "learning_rate": 4.839847616013321e-06, + "loss": 0.2868, + "step": 6025 + }, + { + "epoch": 0.28, + "grad_norm": 0.6314168992550299, + "learning_rate": 4.839780821697565e-06, + "loss": 0.2924, + "step": 6026 + }, + { + "epoch": 0.28, + "grad_norm": 0.6594596874420995, + "learning_rate": 4.839714013916958e-06, + "loss": 0.3135, + "step": 6027 + }, + { + "epoch": 0.28, + "grad_norm": 0.597266336465702, + "learning_rate": 4.839647192671886e-06, + "loss": 0.2931, + "step": 6028 + }, + { + "epoch": 0.28, + "grad_norm": 0.6248692643681809, + "learning_rate": 4.839580357962732e-06, + "loss": 0.3043, + "step": 6029 + }, + { + "epoch": 0.28, + "grad_norm": 0.6254921285047045, + "learning_rate": 4.839513509789882e-06, + "loss": 0.2966, + "step": 6030 + }, + { + "epoch": 0.28, + "grad_norm": 0.6233430227291853, + "learning_rate": 4.839446648153718e-06, + "loss": 0.2984, + "step": 6031 + }, + { + "epoch": 0.28, + "grad_norm": 0.6421253951698503, + "learning_rate": 4.839379773054628e-06, + "loss": 0.3146, + "step": 6032 + }, + { + "epoch": 0.28, + "grad_norm": 0.6425075285075394, + "learning_rate": 4.8393128844929945e-06, + "loss": 0.3056, + "step": 6033 + }, + { + "epoch": 0.28, + "grad_norm": 0.5942111337164186, + "learning_rate": 4.839245982469204e-06, + "loss": 0.305, + "step": 6034 + }, + { + "epoch": 0.28, + "grad_norm": 0.6429502914578735, + "learning_rate": 4.83917906698364e-06, + "loss": 0.3025, + "step": 6035 + }, + { + "epoch": 0.28, + "grad_norm": 0.6715303495527629, + "learning_rate": 4.839112138036689e-06, + "loss": 0.3062, + "step": 6036 + }, + { + "epoch": 0.28, + "grad_norm": 0.6717183658414492, + "learning_rate": 4.839045195628735e-06, + "loss": 0.2842, + "step": 6037 + }, + { + "epoch": 0.28, + "grad_norm": 0.6662632180020349, + "learning_rate": 4.838978239760165e-06, + "loss": 0.2953, + "step": 6038 + }, + { + "epoch": 0.28, + "grad_norm": 0.6734085100650231, + "learning_rate": 4.8389112704313625e-06, + "loss": 0.3027, + "step": 6039 + }, + { + "epoch": 0.28, + "grad_norm": 0.6183295724629325, + "learning_rate": 4.838844287642713e-06, + "loss": 0.3226, + "step": 6040 + }, + { + "epoch": 0.28, + "grad_norm": 0.6411325665901693, + "learning_rate": 4.838777291394603e-06, + "loss": 0.3001, + "step": 6041 + }, + { + "epoch": 0.28, + "grad_norm": 0.6112278347531354, + "learning_rate": 4.8387102816874175e-06, + "loss": 0.3014, + "step": 6042 + }, + { + "epoch": 0.28, + "grad_norm": 0.6611220850655, + "learning_rate": 4.838643258521542e-06, + "loss": 0.3091, + "step": 6043 + }, + { + "epoch": 0.28, + "grad_norm": 0.5936041703050642, + "learning_rate": 4.838576221897362e-06, + "loss": 0.2923, + "step": 6044 + }, + { + "epoch": 0.28, + "grad_norm": 0.5805467938838257, + "learning_rate": 4.838509171815264e-06, + "loss": 0.2925, + "step": 6045 + }, + { + "epoch": 0.28, + "grad_norm": 0.638496511588802, + "learning_rate": 4.838442108275634e-06, + "loss": 0.2897, + "step": 6046 + }, + { + "epoch": 0.28, + "grad_norm": 0.6583981333077271, + "learning_rate": 4.838375031278857e-06, + "loss": 0.3109, + "step": 6047 + }, + { + "epoch": 0.28, + "grad_norm": 0.672307871166089, + "learning_rate": 4.838307940825319e-06, + "loss": 0.3298, + "step": 6048 + }, + { + "epoch": 0.28, + "grad_norm": 0.6286993189607331, + "learning_rate": 4.838240836915406e-06, + "loss": 0.2862, + "step": 6049 + }, + { + "epoch": 0.28, + "grad_norm": 0.6835105995084039, + "learning_rate": 4.838173719549506e-06, + "loss": 0.3353, + "step": 6050 + }, + { + "epoch": 0.28, + "grad_norm": 0.6714711075136224, + "learning_rate": 4.838106588728003e-06, + "loss": 0.3104, + "step": 6051 + }, + { + "epoch": 0.28, + "grad_norm": 0.5685838840548277, + "learning_rate": 4.838039444451284e-06, + "loss": 0.2757, + "step": 6052 + }, + { + "epoch": 0.28, + "grad_norm": 0.6623029517172029, + "learning_rate": 4.837972286719738e-06, + "loss": 0.2758, + "step": 6053 + }, + { + "epoch": 0.28, + "grad_norm": 0.6440980345704533, + "learning_rate": 4.837905115533747e-06, + "loss": 0.3099, + "step": 6054 + }, + { + "epoch": 0.28, + "grad_norm": 0.7067552725960652, + "learning_rate": 4.837837930893699e-06, + "loss": 0.3192, + "step": 6055 + }, + { + "epoch": 0.28, + "grad_norm": 0.6411045753223592, + "learning_rate": 4.837770732799983e-06, + "loss": 0.2984, + "step": 6056 + }, + { + "epoch": 0.28, + "grad_norm": 0.6308507393043443, + "learning_rate": 4.837703521252983e-06, + "loss": 0.3114, + "step": 6057 + }, + { + "epoch": 0.28, + "grad_norm": 0.5915193399436118, + "learning_rate": 4.837636296253088e-06, + "loss": 0.288, + "step": 6058 + }, + { + "epoch": 0.28, + "grad_norm": 0.6398371406008709, + "learning_rate": 4.837569057800682e-06, + "loss": 0.305, + "step": 6059 + }, + { + "epoch": 0.28, + "grad_norm": 0.6896049416619332, + "learning_rate": 4.8375018058961544e-06, + "loss": 0.3156, + "step": 6060 + }, + { + "epoch": 0.28, + "grad_norm": 0.6354591332295186, + "learning_rate": 4.837434540539891e-06, + "loss": 0.3267, + "step": 6061 + }, + { + "epoch": 0.28, + "grad_norm": 0.5986843754799889, + "learning_rate": 4.8373672617322805e-06, + "loss": 0.3009, + "step": 6062 + }, + { + "epoch": 0.28, + "grad_norm": 0.621101368708306, + "learning_rate": 4.837299969473708e-06, + "loss": 0.3114, + "step": 6063 + }, + { + "epoch": 0.28, + "grad_norm": 0.6182569022300022, + "learning_rate": 4.837232663764562e-06, + "loss": 0.3035, + "step": 6064 + }, + { + "epoch": 0.28, + "grad_norm": 0.5841591890598196, + "learning_rate": 4.837165344605229e-06, + "loss": 0.287, + "step": 6065 + }, + { + "epoch": 0.28, + "grad_norm": 0.5927110735075333, + "learning_rate": 4.8370980119960975e-06, + "loss": 0.3047, + "step": 6066 + }, + { + "epoch": 0.28, + "grad_norm": 0.5845960920386993, + "learning_rate": 4.837030665937554e-06, + "loss": 0.2799, + "step": 6067 + }, + { + "epoch": 0.28, + "grad_norm": 0.6407592130989415, + "learning_rate": 4.836963306429986e-06, + "loss": 0.3246, + "step": 6068 + }, + { + "epoch": 0.28, + "grad_norm": 0.6522987955020332, + "learning_rate": 4.836895933473782e-06, + "loss": 0.3156, + "step": 6069 + }, + { + "epoch": 0.28, + "grad_norm": 0.6612106040304513, + "learning_rate": 4.836828547069329e-06, + "loss": 0.2864, + "step": 6070 + }, + { + "epoch": 0.28, + "grad_norm": 0.6403054229247401, + "learning_rate": 4.836761147217015e-06, + "loss": 0.2727, + "step": 6071 + }, + { + "epoch": 0.28, + "grad_norm": 0.6514679582182634, + "learning_rate": 4.836693733917228e-06, + "loss": 0.3063, + "step": 6072 + }, + { + "epoch": 0.28, + "grad_norm": 0.6668351612818895, + "learning_rate": 4.836626307170356e-06, + "loss": 0.3133, + "step": 6073 + }, + { + "epoch": 0.28, + "grad_norm": 0.589988857709807, + "learning_rate": 4.8365588669767875e-06, + "loss": 0.2933, + "step": 6074 + }, + { + "epoch": 0.28, + "grad_norm": 0.6015470009211403, + "learning_rate": 4.836491413336909e-06, + "loss": 0.2839, + "step": 6075 + }, + { + "epoch": 0.28, + "grad_norm": 0.6071497010615169, + "learning_rate": 4.83642394625111e-06, + "loss": 0.2949, + "step": 6076 + }, + { + "epoch": 0.28, + "grad_norm": 0.6875401359293061, + "learning_rate": 4.836356465719779e-06, + "loss": 0.3078, + "step": 6077 + }, + { + "epoch": 0.28, + "grad_norm": 0.6936024438191227, + "learning_rate": 4.836288971743302e-06, + "loss": 0.3198, + "step": 6078 + }, + { + "epoch": 0.28, + "grad_norm": 0.5820892322199928, + "learning_rate": 4.836221464322071e-06, + "loss": 0.2805, + "step": 6079 + }, + { + "epoch": 0.28, + "grad_norm": 0.611905587201151, + "learning_rate": 4.836153943456472e-06, + "loss": 0.3039, + "step": 6080 + }, + { + "epoch": 0.28, + "grad_norm": 0.6312654863658659, + "learning_rate": 4.8360864091468945e-06, + "loss": 0.3093, + "step": 6081 + }, + { + "epoch": 0.28, + "grad_norm": 0.6593349747280555, + "learning_rate": 4.836018861393727e-06, + "loss": 0.2835, + "step": 6082 + }, + { + "epoch": 0.28, + "grad_norm": 0.5917654356346687, + "learning_rate": 4.835951300197358e-06, + "loss": 0.2992, + "step": 6083 + }, + { + "epoch": 0.29, + "grad_norm": 0.5822493744426046, + "learning_rate": 4.835883725558176e-06, + "loss": 0.301, + "step": 6084 + }, + { + "epoch": 0.29, + "grad_norm": 0.6366000970896043, + "learning_rate": 4.83581613747657e-06, + "loss": 0.2779, + "step": 6085 + }, + { + "epoch": 0.29, + "grad_norm": 0.6055288708576175, + "learning_rate": 4.8357485359529295e-06, + "loss": 0.2708, + "step": 6086 + }, + { + "epoch": 0.29, + "grad_norm": 0.6359981855450109, + "learning_rate": 4.835680920987643e-06, + "loss": 0.2886, + "step": 6087 + }, + { + "epoch": 0.29, + "grad_norm": 0.6021867205938356, + "learning_rate": 4.8356132925811005e-06, + "loss": 0.3128, + "step": 6088 + }, + { + "epoch": 0.29, + "grad_norm": 0.5623478180645314, + "learning_rate": 4.8355456507336905e-06, + "loss": 0.2725, + "step": 6089 + }, + { + "epoch": 0.29, + "grad_norm": 0.5933588984513485, + "learning_rate": 4.835477995445802e-06, + "loss": 0.3014, + "step": 6090 + }, + { + "epoch": 0.29, + "grad_norm": 0.626574338029129, + "learning_rate": 4.835410326717824e-06, + "loss": 0.29, + "step": 6091 + }, + { + "epoch": 0.29, + "grad_norm": 0.6427367922510132, + "learning_rate": 4.835342644550148e-06, + "loss": 0.2821, + "step": 6092 + }, + { + "epoch": 0.29, + "grad_norm": 0.6844135188127077, + "learning_rate": 4.835274948943161e-06, + "loss": 0.317, + "step": 6093 + }, + { + "epoch": 0.29, + "grad_norm": 0.6248553683952823, + "learning_rate": 4.835207239897254e-06, + "loss": 0.312, + "step": 6094 + }, + { + "epoch": 0.29, + "grad_norm": 0.6482519186252742, + "learning_rate": 4.835139517412816e-06, + "loss": 0.3015, + "step": 6095 + }, + { + "epoch": 0.29, + "grad_norm": 0.6555595322431483, + "learning_rate": 4.835071781490237e-06, + "loss": 0.2881, + "step": 6096 + }, + { + "epoch": 0.29, + "grad_norm": 0.6161368073613261, + "learning_rate": 4.835004032129907e-06, + "loss": 0.2656, + "step": 6097 + }, + { + "epoch": 0.29, + "grad_norm": 0.6489072937901694, + "learning_rate": 4.834936269332216e-06, + "loss": 0.3217, + "step": 6098 + }, + { + "epoch": 0.29, + "grad_norm": 0.6340565652621348, + "learning_rate": 4.834868493097553e-06, + "loss": 0.2963, + "step": 6099 + }, + { + "epoch": 0.29, + "grad_norm": 0.6620323586581536, + "learning_rate": 4.834800703426309e-06, + "loss": 0.3138, + "step": 6100 + }, + { + "epoch": 0.29, + "grad_norm": 0.669412107084942, + "learning_rate": 4.834732900318874e-06, + "loss": 0.3032, + "step": 6101 + }, + { + "epoch": 0.29, + "grad_norm": 0.6172699911233478, + "learning_rate": 4.834665083775637e-06, + "loss": 0.3069, + "step": 6102 + }, + { + "epoch": 0.29, + "grad_norm": 0.6402761899613656, + "learning_rate": 4.834597253796991e-06, + "loss": 0.2873, + "step": 6103 + }, + { + "epoch": 0.29, + "grad_norm": 0.6211924729806145, + "learning_rate": 4.834529410383323e-06, + "loss": 0.3056, + "step": 6104 + }, + { + "epoch": 0.29, + "grad_norm": 0.5994686166558912, + "learning_rate": 4.834461553535026e-06, + "loss": 0.2854, + "step": 6105 + }, + { + "epoch": 0.29, + "grad_norm": 0.614437868838758, + "learning_rate": 4.8343936832524886e-06, + "loss": 0.2989, + "step": 6106 + }, + { + "epoch": 0.29, + "grad_norm": 0.6837533211492401, + "learning_rate": 4.834325799536103e-06, + "loss": 0.3119, + "step": 6107 + }, + { + "epoch": 0.29, + "grad_norm": 0.6528006121795278, + "learning_rate": 4.834257902386258e-06, + "loss": 0.3011, + "step": 6108 + }, + { + "epoch": 0.29, + "grad_norm": 0.6570276846179137, + "learning_rate": 4.834189991803346e-06, + "loss": 0.304, + "step": 6109 + }, + { + "epoch": 0.29, + "grad_norm": 0.5534265579431968, + "learning_rate": 4.834122067787758e-06, + "loss": 0.2681, + "step": 6110 + }, + { + "epoch": 0.29, + "grad_norm": 0.6370350337305527, + "learning_rate": 4.834054130339883e-06, + "loss": 0.2983, + "step": 6111 + }, + { + "epoch": 0.29, + "grad_norm": 0.572656736708997, + "learning_rate": 4.833986179460115e-06, + "loss": 0.2657, + "step": 6112 + }, + { + "epoch": 0.29, + "grad_norm": 0.6514759959371614, + "learning_rate": 4.833918215148842e-06, + "loss": 0.2906, + "step": 6113 + }, + { + "epoch": 0.29, + "grad_norm": 0.6227198870633748, + "learning_rate": 4.833850237406456e-06, + "loss": 0.3006, + "step": 6114 + }, + { + "epoch": 0.29, + "grad_norm": 0.6644774485763411, + "learning_rate": 4.833782246233349e-06, + "loss": 0.3194, + "step": 6115 + }, + { + "epoch": 0.29, + "grad_norm": 0.6115921356479154, + "learning_rate": 4.833714241629911e-06, + "loss": 0.2932, + "step": 6116 + }, + { + "epoch": 0.29, + "grad_norm": 0.5945341376399713, + "learning_rate": 4.833646223596535e-06, + "loss": 0.2838, + "step": 6117 + }, + { + "epoch": 0.29, + "grad_norm": 0.637783777465528, + "learning_rate": 4.833578192133611e-06, + "loss": 0.3122, + "step": 6118 + }, + { + "epoch": 0.29, + "grad_norm": 0.5992910703038457, + "learning_rate": 4.8335101472415315e-06, + "loss": 0.3107, + "step": 6119 + }, + { + "epoch": 0.29, + "grad_norm": 0.6139078417388202, + "learning_rate": 4.833442088920687e-06, + "loss": 0.3051, + "step": 6120 + }, + { + "epoch": 0.29, + "grad_norm": 0.6650532384465274, + "learning_rate": 4.83337401717147e-06, + "loss": 0.3013, + "step": 6121 + }, + { + "epoch": 0.29, + "grad_norm": 0.6775317467842543, + "learning_rate": 4.833305931994272e-06, + "loss": 0.2948, + "step": 6122 + }, + { + "epoch": 0.29, + "grad_norm": 0.6728369278225129, + "learning_rate": 4.8332378333894845e-06, + "loss": 0.3196, + "step": 6123 + }, + { + "epoch": 0.29, + "grad_norm": 0.5967386511142584, + "learning_rate": 4.8331697213575e-06, + "loss": 0.2869, + "step": 6124 + }, + { + "epoch": 0.29, + "grad_norm": 0.6142876650372826, + "learning_rate": 4.833101595898711e-06, + "loss": 0.2878, + "step": 6125 + }, + { + "epoch": 0.29, + "grad_norm": 0.6022768204600467, + "learning_rate": 4.8330334570135075e-06, + "loss": 0.2912, + "step": 6126 + }, + { + "epoch": 0.29, + "grad_norm": 0.6368535352624674, + "learning_rate": 4.832965304702283e-06, + "loss": 0.2973, + "step": 6127 + }, + { + "epoch": 0.29, + "grad_norm": 0.6467868380238526, + "learning_rate": 4.83289713896543e-06, + "loss": 0.3314, + "step": 6128 + }, + { + "epoch": 0.29, + "grad_norm": 0.7090568208728026, + "learning_rate": 4.832828959803341e-06, + "loss": 0.3092, + "step": 6129 + }, + { + "epoch": 0.29, + "grad_norm": 0.6079219067863283, + "learning_rate": 4.832760767216406e-06, + "loss": 0.2784, + "step": 6130 + }, + { + "epoch": 0.29, + "grad_norm": 0.63223182564173, + "learning_rate": 4.83269256120502e-06, + "loss": 0.3038, + "step": 6131 + }, + { + "epoch": 0.29, + "grad_norm": 0.6434725459124763, + "learning_rate": 4.8326243417695745e-06, + "loss": 0.296, + "step": 6132 + }, + { + "epoch": 0.29, + "grad_norm": 0.6820529138655161, + "learning_rate": 4.832556108910462e-06, + "loss": 0.3137, + "step": 6133 + }, + { + "epoch": 0.29, + "grad_norm": 0.6057806537064832, + "learning_rate": 4.832487862628076e-06, + "loss": 0.293, + "step": 6134 + }, + { + "epoch": 0.29, + "grad_norm": 0.6645310157882716, + "learning_rate": 4.832419602922808e-06, + "loss": 0.3294, + "step": 6135 + }, + { + "epoch": 0.29, + "grad_norm": 0.6466489702068363, + "learning_rate": 4.832351329795052e-06, + "loss": 0.2886, + "step": 6136 + }, + { + "epoch": 0.29, + "grad_norm": 0.6576037291064445, + "learning_rate": 4.8322830432452e-06, + "loss": 0.2898, + "step": 6137 + }, + { + "epoch": 0.29, + "grad_norm": 0.6096492616969632, + "learning_rate": 4.832214743273645e-06, + "loss": 0.2928, + "step": 6138 + }, + { + "epoch": 0.29, + "grad_norm": 0.6201326708403206, + "learning_rate": 4.832146429880781e-06, + "loss": 0.2853, + "step": 6139 + }, + { + "epoch": 0.29, + "grad_norm": 0.5785325167318988, + "learning_rate": 4.832078103066999e-06, + "loss": 0.2872, + "step": 6140 + }, + { + "epoch": 0.29, + "grad_norm": 0.6498795478121165, + "learning_rate": 4.832009762832695e-06, + "loss": 0.295, + "step": 6141 + }, + { + "epoch": 0.29, + "grad_norm": 0.5982552464424951, + "learning_rate": 4.83194140917826e-06, + "loss": 0.2916, + "step": 6142 + }, + { + "epoch": 0.29, + "grad_norm": 0.6422558920275796, + "learning_rate": 4.831873042104089e-06, + "loss": 0.2983, + "step": 6143 + }, + { + "epoch": 0.29, + "grad_norm": 0.5882375907541039, + "learning_rate": 4.831804661610575e-06, + "loss": 0.2945, + "step": 6144 + }, + { + "epoch": 0.29, + "grad_norm": 0.6096071254124906, + "learning_rate": 4.831736267698111e-06, + "loss": 0.2913, + "step": 6145 + }, + { + "epoch": 0.29, + "grad_norm": 0.6049377784985884, + "learning_rate": 4.831667860367091e-06, + "loss": 0.2903, + "step": 6146 + }, + { + "epoch": 0.29, + "grad_norm": 0.5886565624064719, + "learning_rate": 4.831599439617908e-06, + "loss": 0.2903, + "step": 6147 + }, + { + "epoch": 0.29, + "grad_norm": 0.6398377444155218, + "learning_rate": 4.831531005450957e-06, + "loss": 0.2891, + "step": 6148 + }, + { + "epoch": 0.29, + "grad_norm": 0.6181813686115628, + "learning_rate": 4.83146255786663e-06, + "loss": 0.281, + "step": 6149 + }, + { + "epoch": 0.29, + "grad_norm": 0.6678307382050109, + "learning_rate": 4.831394096865323e-06, + "loss": 0.2909, + "step": 6150 + }, + { + "epoch": 0.29, + "grad_norm": 0.597301641306217, + "learning_rate": 4.831325622447428e-06, + "loss": 0.2697, + "step": 6151 + }, + { + "epoch": 0.29, + "grad_norm": 0.6529362896774769, + "learning_rate": 4.831257134613341e-06, + "loss": 0.3074, + "step": 6152 + }, + { + "epoch": 0.29, + "grad_norm": 0.6521808785017011, + "learning_rate": 4.8311886333634535e-06, + "loss": 0.3236, + "step": 6153 + }, + { + "epoch": 0.29, + "grad_norm": 0.6460471686357455, + "learning_rate": 4.831120118698162e-06, + "loss": 0.2916, + "step": 6154 + }, + { + "epoch": 0.29, + "grad_norm": 0.5891747211924871, + "learning_rate": 4.8310515906178616e-06, + "loss": 0.278, + "step": 6155 + }, + { + "epoch": 0.29, + "grad_norm": 0.5976530378727991, + "learning_rate": 4.830983049122944e-06, + "loss": 0.2861, + "step": 6156 + }, + { + "epoch": 0.29, + "grad_norm": 0.6279070832339828, + "learning_rate": 4.8309144942138056e-06, + "loss": 0.2875, + "step": 6157 + }, + { + "epoch": 0.29, + "grad_norm": 0.6598691784122568, + "learning_rate": 4.830845925890839e-06, + "loss": 0.2943, + "step": 6158 + }, + { + "epoch": 0.29, + "grad_norm": 0.676235546638165, + "learning_rate": 4.830777344154441e-06, + "loss": 0.3123, + "step": 6159 + }, + { + "epoch": 0.29, + "grad_norm": 0.6314937752879032, + "learning_rate": 4.830708749005004e-06, + "loss": 0.2861, + "step": 6160 + }, + { + "epoch": 0.29, + "grad_norm": 0.6894203032175898, + "learning_rate": 4.830640140442925e-06, + "loss": 0.2973, + "step": 6161 + }, + { + "epoch": 0.29, + "grad_norm": 0.5973585543599856, + "learning_rate": 4.830571518468597e-06, + "loss": 0.3008, + "step": 6162 + }, + { + "epoch": 0.29, + "grad_norm": 0.6413765248096862, + "learning_rate": 4.8305028830824165e-06, + "loss": 0.325, + "step": 6163 + }, + { + "epoch": 0.29, + "grad_norm": 0.6286168214435779, + "learning_rate": 4.8304342342847765e-06, + "loss": 0.3248, + "step": 6164 + }, + { + "epoch": 0.29, + "grad_norm": 0.6954494914628044, + "learning_rate": 4.830365572076074e-06, + "loss": 0.3305, + "step": 6165 + }, + { + "epoch": 0.29, + "grad_norm": 0.6763239059768207, + "learning_rate": 4.830296896456703e-06, + "loss": 0.3182, + "step": 6166 + }, + { + "epoch": 0.29, + "grad_norm": 0.6789381225887489, + "learning_rate": 4.83022820742706e-06, + "loss": 0.3037, + "step": 6167 + }, + { + "epoch": 0.29, + "grad_norm": 0.6051282701072567, + "learning_rate": 4.830159504987538e-06, + "loss": 0.283, + "step": 6168 + }, + { + "epoch": 0.29, + "grad_norm": 0.6048225510758175, + "learning_rate": 4.830090789138535e-06, + "loss": 0.2955, + "step": 6169 + }, + { + "epoch": 0.29, + "grad_norm": 0.61838660935659, + "learning_rate": 4.830022059880444e-06, + "loss": 0.3049, + "step": 6170 + }, + { + "epoch": 0.29, + "grad_norm": 0.6170324376194507, + "learning_rate": 4.829953317213663e-06, + "loss": 0.2873, + "step": 6171 + }, + { + "epoch": 0.29, + "grad_norm": 0.7009121609658738, + "learning_rate": 4.829884561138585e-06, + "loss": 0.3057, + "step": 6172 + }, + { + "epoch": 0.29, + "grad_norm": 0.6706309967351847, + "learning_rate": 4.829815791655608e-06, + "loss": 0.3121, + "step": 6173 + }, + { + "epoch": 0.29, + "grad_norm": 0.60741202655159, + "learning_rate": 4.829747008765126e-06, + "loss": 0.2896, + "step": 6174 + }, + { + "epoch": 0.29, + "grad_norm": 0.6254078222265819, + "learning_rate": 4.829678212467535e-06, + "loss": 0.3003, + "step": 6175 + }, + { + "epoch": 0.29, + "grad_norm": 0.5771814705796915, + "learning_rate": 4.829609402763232e-06, + "loss": 0.2958, + "step": 6176 + }, + { + "epoch": 0.29, + "grad_norm": 0.6706467295394561, + "learning_rate": 4.829540579652613e-06, + "loss": 0.308, + "step": 6177 + }, + { + "epoch": 0.29, + "grad_norm": 0.650753194087434, + "learning_rate": 4.829471743136073e-06, + "loss": 0.2982, + "step": 6178 + }, + { + "epoch": 0.29, + "grad_norm": 0.6080503665808172, + "learning_rate": 4.829402893214008e-06, + "loss": 0.3045, + "step": 6179 + }, + { + "epoch": 0.29, + "grad_norm": 0.6296529914749694, + "learning_rate": 4.829334029886816e-06, + "loss": 0.3237, + "step": 6180 + }, + { + "epoch": 0.29, + "grad_norm": 0.6033902460577066, + "learning_rate": 4.829265153154892e-06, + "loss": 0.2864, + "step": 6181 + }, + { + "epoch": 0.29, + "grad_norm": 0.5912592308095502, + "learning_rate": 4.829196263018631e-06, + "loss": 0.2908, + "step": 6182 + }, + { + "epoch": 0.29, + "grad_norm": 0.6064464711070964, + "learning_rate": 4.829127359478432e-06, + "loss": 0.3053, + "step": 6183 + }, + { + "epoch": 0.29, + "grad_norm": 0.6412770277734395, + "learning_rate": 4.82905844253469e-06, + "loss": 0.2997, + "step": 6184 + }, + { + "epoch": 0.29, + "grad_norm": 0.6159717403169443, + "learning_rate": 4.828989512187802e-06, + "loss": 0.3046, + "step": 6185 + }, + { + "epoch": 0.29, + "grad_norm": 0.6792644277316943, + "learning_rate": 4.828920568438166e-06, + "loss": 0.2938, + "step": 6186 + }, + { + "epoch": 0.29, + "grad_norm": 0.704502102991183, + "learning_rate": 4.828851611286176e-06, + "loss": 0.3359, + "step": 6187 + }, + { + "epoch": 0.29, + "grad_norm": 0.6454318771975012, + "learning_rate": 4.828782640732231e-06, + "loss": 0.2948, + "step": 6188 + }, + { + "epoch": 0.29, + "grad_norm": 0.5825006833439049, + "learning_rate": 4.828713656776728e-06, + "loss": 0.2785, + "step": 6189 + }, + { + "epoch": 0.29, + "grad_norm": 0.7236492350002763, + "learning_rate": 4.828644659420062e-06, + "loss": 0.3131, + "step": 6190 + }, + { + "epoch": 0.29, + "grad_norm": 0.6697252498241998, + "learning_rate": 4.828575648662632e-06, + "loss": 0.2845, + "step": 6191 + }, + { + "epoch": 0.29, + "grad_norm": 0.6208464829709375, + "learning_rate": 4.828506624504834e-06, + "loss": 0.2828, + "step": 6192 + }, + { + "epoch": 0.29, + "grad_norm": 0.5916982339616055, + "learning_rate": 4.828437586947066e-06, + "loss": 0.2919, + "step": 6193 + }, + { + "epoch": 0.29, + "grad_norm": 0.5901090355619375, + "learning_rate": 4.828368535989724e-06, + "loss": 0.2934, + "step": 6194 + }, + { + "epoch": 0.29, + "grad_norm": 0.6269521630022687, + "learning_rate": 4.8282994716332066e-06, + "loss": 0.3163, + "step": 6195 + }, + { + "epoch": 0.29, + "grad_norm": 0.6168646626339218, + "learning_rate": 4.828230393877912e-06, + "loss": 0.296, + "step": 6196 + }, + { + "epoch": 0.29, + "grad_norm": 0.6474631635649819, + "learning_rate": 4.828161302724235e-06, + "loss": 0.2977, + "step": 6197 + }, + { + "epoch": 0.29, + "grad_norm": 0.633074779458406, + "learning_rate": 4.828092198172576e-06, + "loss": 0.3125, + "step": 6198 + }, + { + "epoch": 0.29, + "grad_norm": 0.6734513639230094, + "learning_rate": 4.828023080223331e-06, + "loss": 0.3119, + "step": 6199 + }, + { + "epoch": 0.29, + "grad_norm": 0.641222043044516, + "learning_rate": 4.827953948876899e-06, + "loss": 0.3054, + "step": 6200 + }, + { + "epoch": 0.29, + "grad_norm": 0.6120933927239239, + "learning_rate": 4.8278848041336765e-06, + "loss": 0.2947, + "step": 6201 + }, + { + "epoch": 0.29, + "grad_norm": 0.651858636680289, + "learning_rate": 4.827815645994063e-06, + "loss": 0.3184, + "step": 6202 + }, + { + "epoch": 0.29, + "grad_norm": 0.6541921478678899, + "learning_rate": 4.827746474458454e-06, + "loss": 0.3141, + "step": 6203 + }, + { + "epoch": 0.29, + "grad_norm": 0.6362140138901661, + "learning_rate": 4.82767728952725e-06, + "loss": 0.2816, + "step": 6204 + }, + { + "epoch": 0.29, + "grad_norm": 0.5612376954588207, + "learning_rate": 4.8276080912008484e-06, + "loss": 0.2919, + "step": 6205 + }, + { + "epoch": 0.29, + "grad_norm": 0.6396547392887985, + "learning_rate": 4.827538879479647e-06, + "loss": 0.3158, + "step": 6206 + }, + { + "epoch": 0.29, + "grad_norm": 0.5865604286023868, + "learning_rate": 4.827469654364044e-06, + "loss": 0.2854, + "step": 6207 + }, + { + "epoch": 0.29, + "grad_norm": 0.5938362860402169, + "learning_rate": 4.827400415854439e-06, + "loss": 0.2924, + "step": 6208 + }, + { + "epoch": 0.29, + "grad_norm": 0.6134516504065471, + "learning_rate": 4.827331163951229e-06, + "loss": 0.2847, + "step": 6209 + }, + { + "epoch": 0.29, + "grad_norm": 0.6081738293021037, + "learning_rate": 4.827261898654812e-06, + "loss": 0.2759, + "step": 6210 + }, + { + "epoch": 0.29, + "grad_norm": 0.6027121772861329, + "learning_rate": 4.827192619965589e-06, + "loss": 0.3017, + "step": 6211 + }, + { + "epoch": 0.29, + "grad_norm": 0.630974561705865, + "learning_rate": 4.827123327883958e-06, + "loss": 0.3135, + "step": 6212 + }, + { + "epoch": 0.29, + "grad_norm": 0.6326911609121806, + "learning_rate": 4.8270540224103165e-06, + "loss": 0.3011, + "step": 6213 + }, + { + "epoch": 0.29, + "grad_norm": 0.6590548382741164, + "learning_rate": 4.826984703545064e-06, + "loss": 0.3417, + "step": 6214 + }, + { + "epoch": 0.29, + "grad_norm": 0.5728792832178264, + "learning_rate": 4.826915371288599e-06, + "loss": 0.2858, + "step": 6215 + }, + { + "epoch": 0.29, + "grad_norm": 0.6404974736456153, + "learning_rate": 4.8268460256413205e-06, + "loss": 0.3137, + "step": 6216 + }, + { + "epoch": 0.29, + "grad_norm": 0.6503251717981495, + "learning_rate": 4.8267766666036295e-06, + "loss": 0.3004, + "step": 6217 + }, + { + "epoch": 0.29, + "grad_norm": 0.6524337358572297, + "learning_rate": 4.826707294175922e-06, + "loss": 0.3224, + "step": 6218 + }, + { + "epoch": 0.29, + "grad_norm": 0.7004827445272358, + "learning_rate": 4.8266379083586e-06, + "loss": 0.3418, + "step": 6219 + }, + { + "epoch": 0.29, + "grad_norm": 0.5908097073587754, + "learning_rate": 4.826568509152061e-06, + "loss": 0.2831, + "step": 6220 + }, + { + "epoch": 0.29, + "grad_norm": 0.5982626412902401, + "learning_rate": 4.826499096556705e-06, + "loss": 0.2822, + "step": 6221 + }, + { + "epoch": 0.29, + "grad_norm": 0.6187232839989909, + "learning_rate": 4.826429670572932e-06, + "loss": 0.3103, + "step": 6222 + }, + { + "epoch": 0.29, + "grad_norm": 0.6242442852854508, + "learning_rate": 4.82636023120114e-06, + "loss": 0.3018, + "step": 6223 + }, + { + "epoch": 0.29, + "grad_norm": 0.6086009876073766, + "learning_rate": 4.82629077844173e-06, + "loss": 0.2901, + "step": 6224 + }, + { + "epoch": 0.29, + "grad_norm": 0.6170700033010145, + "learning_rate": 4.826221312295102e-06, + "loss": 0.3147, + "step": 6225 + }, + { + "epoch": 0.29, + "grad_norm": 0.5820348572619357, + "learning_rate": 4.826151832761654e-06, + "loss": 0.3059, + "step": 6226 + }, + { + "epoch": 0.29, + "grad_norm": 0.6331252224948062, + "learning_rate": 4.826082339841788e-06, + "loss": 0.2825, + "step": 6227 + }, + { + "epoch": 0.29, + "grad_norm": 0.6204015862275749, + "learning_rate": 4.826012833535901e-06, + "loss": 0.2954, + "step": 6228 + }, + { + "epoch": 0.29, + "grad_norm": 0.6991658640526733, + "learning_rate": 4.825943313844396e-06, + "loss": 0.331, + "step": 6229 + }, + { + "epoch": 0.29, + "grad_norm": 0.6348837396177787, + "learning_rate": 4.825873780767672e-06, + "loss": 0.2845, + "step": 6230 + }, + { + "epoch": 0.29, + "grad_norm": 0.6496128958363722, + "learning_rate": 4.825804234306128e-06, + "loss": 0.2999, + "step": 6231 + }, + { + "epoch": 0.29, + "grad_norm": 0.583214809642661, + "learning_rate": 4.825734674460166e-06, + "loss": 0.2814, + "step": 6232 + }, + { + "epoch": 0.29, + "grad_norm": 0.6716636893626531, + "learning_rate": 4.825665101230186e-06, + "loss": 0.2969, + "step": 6233 + }, + { + "epoch": 0.29, + "grad_norm": 0.6073187392325825, + "learning_rate": 4.825595514616587e-06, + "loss": 0.2864, + "step": 6234 + }, + { + "epoch": 0.29, + "grad_norm": 0.6050366744698725, + "learning_rate": 4.82552591461977e-06, + "loss": 0.2759, + "step": 6235 + }, + { + "epoch": 0.29, + "grad_norm": 0.6166834005617311, + "learning_rate": 4.825456301240137e-06, + "loss": 0.3055, + "step": 6236 + }, + { + "epoch": 0.29, + "grad_norm": 0.6448745295715907, + "learning_rate": 4.825386674478087e-06, + "loss": 0.3155, + "step": 6237 + }, + { + "epoch": 0.29, + "grad_norm": 0.6333298280665817, + "learning_rate": 4.825317034334021e-06, + "loss": 0.3019, + "step": 6238 + }, + { + "epoch": 0.29, + "grad_norm": 0.6128801458695848, + "learning_rate": 4.825247380808339e-06, + "loss": 0.284, + "step": 6239 + }, + { + "epoch": 0.29, + "grad_norm": 0.6018249359682017, + "learning_rate": 4.8251777139014446e-06, + "loss": 0.2953, + "step": 6240 + }, + { + "epoch": 0.29, + "grad_norm": 0.6097062938721395, + "learning_rate": 4.825108033613736e-06, + "loss": 0.3029, + "step": 6241 + }, + { + "epoch": 0.29, + "grad_norm": 0.6480676947375177, + "learning_rate": 4.825038339945615e-06, + "loss": 0.3012, + "step": 6242 + }, + { + "epoch": 0.29, + "grad_norm": 0.6080002268223272, + "learning_rate": 4.824968632897482e-06, + "loss": 0.3053, + "step": 6243 + }, + { + "epoch": 0.29, + "grad_norm": 0.601383805836042, + "learning_rate": 4.8248989124697395e-06, + "loss": 0.2919, + "step": 6244 + }, + { + "epoch": 0.29, + "grad_norm": 0.6388275344916537, + "learning_rate": 4.824829178662789e-06, + "loss": 0.2953, + "step": 6245 + }, + { + "epoch": 0.29, + "grad_norm": 0.6089038689160932, + "learning_rate": 4.824759431477029e-06, + "loss": 0.2815, + "step": 6246 + }, + { + "epoch": 0.29, + "grad_norm": 0.593185595824597, + "learning_rate": 4.824689670912864e-06, + "loss": 0.2925, + "step": 6247 + }, + { + "epoch": 0.29, + "grad_norm": 0.6416295180858544, + "learning_rate": 4.824619896970694e-06, + "loss": 0.3101, + "step": 6248 + }, + { + "epoch": 0.29, + "grad_norm": 0.6216879117077058, + "learning_rate": 4.824550109650922e-06, + "loss": 0.3115, + "step": 6249 + }, + { + "epoch": 0.29, + "grad_norm": 0.6068488287014918, + "learning_rate": 4.824480308953947e-06, + "loss": 0.2865, + "step": 6250 + }, + { + "epoch": 0.29, + "grad_norm": 0.6283863450383598, + "learning_rate": 4.8244104948801715e-06, + "loss": 0.2848, + "step": 6251 + }, + { + "epoch": 0.29, + "grad_norm": 0.6222193812955549, + "learning_rate": 4.824340667429999e-06, + "loss": 0.2922, + "step": 6252 + }, + { + "epoch": 0.29, + "grad_norm": 0.667240762147269, + "learning_rate": 4.82427082660383e-06, + "loss": 0.3245, + "step": 6253 + }, + { + "epoch": 0.29, + "grad_norm": 0.6989825463117072, + "learning_rate": 4.824200972402066e-06, + "loss": 0.2998, + "step": 6254 + }, + { + "epoch": 0.29, + "grad_norm": 0.6125829307319174, + "learning_rate": 4.82413110482511e-06, + "loss": 0.3021, + "step": 6255 + }, + { + "epoch": 0.29, + "grad_norm": 0.6541049535929004, + "learning_rate": 4.824061223873364e-06, + "loss": 0.3063, + "step": 6256 + }, + { + "epoch": 0.29, + "grad_norm": 0.5912084419052653, + "learning_rate": 4.823991329547229e-06, + "loss": 0.2825, + "step": 6257 + }, + { + "epoch": 0.29, + "grad_norm": 0.5940284268866959, + "learning_rate": 4.8239214218471085e-06, + "loss": 0.2997, + "step": 6258 + }, + { + "epoch": 0.29, + "grad_norm": 0.6377009525479451, + "learning_rate": 4.823851500773404e-06, + "loss": 0.2983, + "step": 6259 + }, + { + "epoch": 0.29, + "grad_norm": 0.622773327934764, + "learning_rate": 4.823781566326518e-06, + "loss": 0.2978, + "step": 6260 + }, + { + "epoch": 0.29, + "grad_norm": 0.6059864876233799, + "learning_rate": 4.823711618506854e-06, + "loss": 0.2854, + "step": 6261 + }, + { + "epoch": 0.29, + "grad_norm": 0.640723701040838, + "learning_rate": 4.823641657314814e-06, + "loss": 0.2965, + "step": 6262 + }, + { + "epoch": 0.29, + "grad_norm": 0.6753156265423013, + "learning_rate": 4.823571682750799e-06, + "loss": 0.2898, + "step": 6263 + }, + { + "epoch": 0.29, + "grad_norm": 0.6161735674979459, + "learning_rate": 4.823501694815213e-06, + "loss": 0.2901, + "step": 6264 + }, + { + "epoch": 0.29, + "grad_norm": 0.6125230546562012, + "learning_rate": 4.823431693508459e-06, + "loss": 0.3054, + "step": 6265 + }, + { + "epoch": 0.29, + "grad_norm": 0.6190175007193714, + "learning_rate": 4.82336167883094e-06, + "loss": 0.293, + "step": 6266 + }, + { + "epoch": 0.29, + "grad_norm": 0.6622429062263344, + "learning_rate": 4.823291650783058e-06, + "loss": 0.313, + "step": 6267 + }, + { + "epoch": 0.29, + "grad_norm": 0.6551202127690198, + "learning_rate": 4.8232216093652175e-06, + "loss": 0.2842, + "step": 6268 + }, + { + "epoch": 0.29, + "grad_norm": 0.6479131190333739, + "learning_rate": 4.82315155457782e-06, + "loss": 0.3046, + "step": 6269 + }, + { + "epoch": 0.29, + "grad_norm": 0.6501957695487285, + "learning_rate": 4.823081486421268e-06, + "loss": 0.2915, + "step": 6270 + }, + { + "epoch": 0.29, + "grad_norm": 0.5894734634415305, + "learning_rate": 4.823011404895967e-06, + "loss": 0.2701, + "step": 6271 + }, + { + "epoch": 0.29, + "grad_norm": 0.6872826614220638, + "learning_rate": 4.82294131000232e-06, + "loss": 0.2861, + "step": 6272 + }, + { + "epoch": 0.29, + "grad_norm": 0.6042059707282164, + "learning_rate": 4.822871201740729e-06, + "loss": 0.2892, + "step": 6273 + }, + { + "epoch": 0.29, + "grad_norm": 0.6458100040970107, + "learning_rate": 4.822801080111598e-06, + "loss": 0.2831, + "step": 6274 + }, + { + "epoch": 0.29, + "grad_norm": 0.6368445320513748, + "learning_rate": 4.82273094511533e-06, + "loss": 0.2994, + "step": 6275 + }, + { + "epoch": 0.29, + "grad_norm": 0.6827989749286195, + "learning_rate": 4.82266079675233e-06, + "loss": 0.3332, + "step": 6276 + }, + { + "epoch": 0.29, + "grad_norm": 0.5863885666635745, + "learning_rate": 4.822590635023e-06, + "loss": 0.2915, + "step": 6277 + }, + { + "epoch": 0.29, + "grad_norm": 0.6465857853220457, + "learning_rate": 4.822520459927746e-06, + "loss": 0.2908, + "step": 6278 + }, + { + "epoch": 0.29, + "grad_norm": 0.7982706344112456, + "learning_rate": 4.822450271466969e-06, + "loss": 0.3014, + "step": 6279 + }, + { + "epoch": 0.29, + "grad_norm": 0.6626623155642585, + "learning_rate": 4.8223800696410746e-06, + "loss": 0.282, + "step": 6280 + }, + { + "epoch": 0.29, + "grad_norm": 0.7242156119607864, + "learning_rate": 4.822309854450467e-06, + "loss": 0.3153, + "step": 6281 + }, + { + "epoch": 0.29, + "grad_norm": 0.5886483399012389, + "learning_rate": 4.82223962589555e-06, + "loss": 0.2958, + "step": 6282 + }, + { + "epoch": 0.29, + "grad_norm": 0.6082227233721815, + "learning_rate": 4.822169383976728e-06, + "loss": 0.2988, + "step": 6283 + }, + { + "epoch": 0.29, + "grad_norm": 0.6232157721654364, + "learning_rate": 4.822099128694405e-06, + "loss": 0.3083, + "step": 6284 + }, + { + "epoch": 0.29, + "grad_norm": 0.5829230382358075, + "learning_rate": 4.822028860048985e-06, + "loss": 0.2769, + "step": 6285 + }, + { + "epoch": 0.29, + "grad_norm": 0.6816487252675236, + "learning_rate": 4.821958578040872e-06, + "loss": 0.3122, + "step": 6286 + }, + { + "epoch": 0.29, + "grad_norm": 0.750931258675068, + "learning_rate": 4.821888282670471e-06, + "loss": 0.3215, + "step": 6287 + }, + { + "epoch": 0.29, + "grad_norm": 0.6835623464081387, + "learning_rate": 4.821817973938186e-06, + "loss": 0.3148, + "step": 6288 + }, + { + "epoch": 0.29, + "grad_norm": 0.5763163490588055, + "learning_rate": 4.8217476518444225e-06, + "loss": 0.2769, + "step": 6289 + }, + { + "epoch": 0.29, + "grad_norm": 0.6086600061804546, + "learning_rate": 4.821677316389585e-06, + "loss": 0.2879, + "step": 6290 + }, + { + "epoch": 0.29, + "grad_norm": 0.5981848882621146, + "learning_rate": 4.821606967574079e-06, + "loss": 0.2974, + "step": 6291 + }, + { + "epoch": 0.29, + "grad_norm": 0.6169587255884997, + "learning_rate": 4.821536605398308e-06, + "loss": 0.293, + "step": 6292 + }, + { + "epoch": 0.29, + "grad_norm": 0.6216270350604298, + "learning_rate": 4.8214662298626765e-06, + "loss": 0.2998, + "step": 6293 + }, + { + "epoch": 0.29, + "grad_norm": 0.6246563521904208, + "learning_rate": 4.8213958409675906e-06, + "loss": 0.2925, + "step": 6294 + }, + { + "epoch": 0.29, + "grad_norm": 0.6579877408952429, + "learning_rate": 4.821325438713456e-06, + "loss": 0.2855, + "step": 6295 + }, + { + "epoch": 0.29, + "grad_norm": 0.587035946586122, + "learning_rate": 4.821255023100676e-06, + "loss": 0.2721, + "step": 6296 + }, + { + "epoch": 0.29, + "grad_norm": 0.6505611089431194, + "learning_rate": 4.8211845941296565e-06, + "loss": 0.2979, + "step": 6297 + }, + { + "epoch": 0.3, + "grad_norm": 0.6438933185777362, + "learning_rate": 4.821114151800804e-06, + "loss": 0.3057, + "step": 6298 + }, + { + "epoch": 0.3, + "grad_norm": 0.5812430208732796, + "learning_rate": 4.8210436961145224e-06, + "loss": 0.2804, + "step": 6299 + }, + { + "epoch": 0.3, + "grad_norm": 0.6031075333795619, + "learning_rate": 4.820973227071217e-06, + "loss": 0.3046, + "step": 6300 + }, + { + "epoch": 0.3, + "grad_norm": 0.6178707812542462, + "learning_rate": 4.820902744671295e-06, + "loss": 0.2949, + "step": 6301 + }, + { + "epoch": 0.3, + "grad_norm": 0.5920259938798449, + "learning_rate": 4.820832248915161e-06, + "loss": 0.2697, + "step": 6302 + }, + { + "epoch": 0.3, + "grad_norm": 0.5760494635213464, + "learning_rate": 4.820761739803221e-06, + "loss": 0.285, + "step": 6303 + }, + { + "epoch": 0.3, + "grad_norm": 0.5979070079412995, + "learning_rate": 4.82069121733588e-06, + "loss": 0.303, + "step": 6304 + }, + { + "epoch": 0.3, + "grad_norm": 0.5539268686764561, + "learning_rate": 4.820620681513544e-06, + "loss": 0.2623, + "step": 6305 + }, + { + "epoch": 0.3, + "grad_norm": 0.6519233307814334, + "learning_rate": 4.82055013233662e-06, + "loss": 0.3128, + "step": 6306 + }, + { + "epoch": 0.3, + "grad_norm": 0.6471732910565137, + "learning_rate": 4.820479569805513e-06, + "loss": 0.3056, + "step": 6307 + }, + { + "epoch": 0.3, + "grad_norm": 0.62758591815652, + "learning_rate": 4.820408993920629e-06, + "loss": 0.3004, + "step": 6308 + }, + { + "epoch": 0.3, + "grad_norm": 0.627947736618736, + "learning_rate": 4.820338404682375e-06, + "loss": 0.2986, + "step": 6309 + }, + { + "epoch": 0.3, + "grad_norm": 0.6611571179163412, + "learning_rate": 4.820267802091156e-06, + "loss": 0.3183, + "step": 6310 + }, + { + "epoch": 0.3, + "grad_norm": 0.6165864063792296, + "learning_rate": 4.820197186147379e-06, + "loss": 0.2988, + "step": 6311 + }, + { + "epoch": 0.3, + "grad_norm": 0.5994265720705076, + "learning_rate": 4.820126556851451e-06, + "loss": 0.2724, + "step": 6312 + }, + { + "epoch": 0.3, + "grad_norm": 0.6204074898483616, + "learning_rate": 4.820055914203777e-06, + "loss": 0.2846, + "step": 6313 + }, + { + "epoch": 0.3, + "grad_norm": 0.6388746296685581, + "learning_rate": 4.819985258204765e-06, + "loss": 0.3078, + "step": 6314 + }, + { + "epoch": 0.3, + "grad_norm": 0.6539440410111488, + "learning_rate": 4.819914588854821e-06, + "loss": 0.3081, + "step": 6315 + }, + { + "epoch": 0.3, + "grad_norm": 0.6186710528026385, + "learning_rate": 4.819843906154351e-06, + "loss": 0.2929, + "step": 6316 + }, + { + "epoch": 0.3, + "grad_norm": 0.6108606320922768, + "learning_rate": 4.819773210103763e-06, + "loss": 0.3073, + "step": 6317 + }, + { + "epoch": 0.3, + "grad_norm": 0.6356061904770577, + "learning_rate": 4.819702500703463e-06, + "loss": 0.2705, + "step": 6318 + }, + { + "epoch": 0.3, + "grad_norm": 0.609572435083713, + "learning_rate": 4.819631777953858e-06, + "loss": 0.313, + "step": 6319 + }, + { + "epoch": 0.3, + "grad_norm": 0.5961475313247442, + "learning_rate": 4.819561041855355e-06, + "loss": 0.2969, + "step": 6320 + }, + { + "epoch": 0.3, + "grad_norm": 0.5963696167051685, + "learning_rate": 4.819490292408362e-06, + "loss": 0.2989, + "step": 6321 + }, + { + "epoch": 0.3, + "grad_norm": 0.6106750857664064, + "learning_rate": 4.819419529613285e-06, + "loss": 0.2699, + "step": 6322 + }, + { + "epoch": 0.3, + "grad_norm": 0.5971103064735996, + "learning_rate": 4.819348753470531e-06, + "loss": 0.2845, + "step": 6323 + }, + { + "epoch": 0.3, + "grad_norm": 0.5917183999655773, + "learning_rate": 4.819277963980509e-06, + "loss": 0.2742, + "step": 6324 + }, + { + "epoch": 0.3, + "grad_norm": 0.5969677410471294, + "learning_rate": 4.819207161143624e-06, + "loss": 0.2892, + "step": 6325 + }, + { + "epoch": 0.3, + "grad_norm": 0.6827897923135162, + "learning_rate": 4.819136344960285e-06, + "loss": 0.3107, + "step": 6326 + }, + { + "epoch": 0.3, + "grad_norm": 0.5809833434576422, + "learning_rate": 4.8190655154309e-06, + "loss": 0.2897, + "step": 6327 + }, + { + "epoch": 0.3, + "grad_norm": 0.5968272258114613, + "learning_rate": 4.818994672555875e-06, + "loss": 0.2897, + "step": 6328 + }, + { + "epoch": 0.3, + "grad_norm": 0.6288655330939714, + "learning_rate": 4.818923816335619e-06, + "loss": 0.3025, + "step": 6329 + }, + { + "epoch": 0.3, + "grad_norm": 0.648185438005464, + "learning_rate": 4.818852946770539e-06, + "loss": 0.329, + "step": 6330 + }, + { + "epoch": 0.3, + "grad_norm": 0.5985358080700368, + "learning_rate": 4.818782063861043e-06, + "loss": 0.2804, + "step": 6331 + }, + { + "epoch": 0.3, + "grad_norm": 0.5912461788932037, + "learning_rate": 4.818711167607539e-06, + "loss": 0.2926, + "step": 6332 + }, + { + "epoch": 0.3, + "grad_norm": 0.663832054646061, + "learning_rate": 4.8186402580104355e-06, + "loss": 0.3138, + "step": 6333 + }, + { + "epoch": 0.3, + "grad_norm": 0.6045605637277787, + "learning_rate": 4.8185693350701396e-06, + "loss": 0.299, + "step": 6334 + }, + { + "epoch": 0.3, + "grad_norm": 0.5857215052808619, + "learning_rate": 4.8184983987870605e-06, + "loss": 0.2869, + "step": 6335 + }, + { + "epoch": 0.3, + "grad_norm": 0.6267587365356722, + "learning_rate": 4.818427449161605e-06, + "loss": 0.2855, + "step": 6336 + }, + { + "epoch": 0.3, + "grad_norm": 0.5967560493004298, + "learning_rate": 4.8183564861941825e-06, + "loss": 0.2893, + "step": 6337 + }, + { + "epoch": 0.3, + "grad_norm": 0.6086176953248059, + "learning_rate": 4.818285509885202e-06, + "loss": 0.3018, + "step": 6338 + }, + { + "epoch": 0.3, + "grad_norm": 0.6409207853994683, + "learning_rate": 4.81821452023507e-06, + "loss": 0.3047, + "step": 6339 + }, + { + "epoch": 0.3, + "grad_norm": 0.6392105616347321, + "learning_rate": 4.818143517244196e-06, + "loss": 0.3012, + "step": 6340 + }, + { + "epoch": 0.3, + "grad_norm": 0.7304961244534058, + "learning_rate": 4.81807250091299e-06, + "loss": 0.3062, + "step": 6341 + }, + { + "epoch": 0.3, + "grad_norm": 0.6441649775753865, + "learning_rate": 4.818001471241859e-06, + "loss": 0.2931, + "step": 6342 + }, + { + "epoch": 0.3, + "grad_norm": 0.6056882514340324, + "learning_rate": 4.817930428231211e-06, + "loss": 0.2856, + "step": 6343 + }, + { + "epoch": 0.3, + "grad_norm": 0.5913566189648113, + "learning_rate": 4.817859371881457e-06, + "loss": 0.2818, + "step": 6344 + }, + { + "epoch": 0.3, + "grad_norm": 0.6213227473747966, + "learning_rate": 4.817788302193004e-06, + "loss": 0.2877, + "step": 6345 + }, + { + "epoch": 0.3, + "grad_norm": 0.6170797414231646, + "learning_rate": 4.817717219166263e-06, + "loss": 0.3147, + "step": 6346 + }, + { + "epoch": 0.3, + "grad_norm": 0.5880426736361009, + "learning_rate": 4.817646122801641e-06, + "loss": 0.2853, + "step": 6347 + }, + { + "epoch": 0.3, + "grad_norm": 0.6111334449275346, + "learning_rate": 4.817575013099549e-06, + "loss": 0.3076, + "step": 6348 + }, + { + "epoch": 0.3, + "grad_norm": 0.6344948585621523, + "learning_rate": 4.817503890060395e-06, + "loss": 0.3185, + "step": 6349 + }, + { + "epoch": 0.3, + "grad_norm": 0.6356013303636913, + "learning_rate": 4.817432753684588e-06, + "loss": 0.2941, + "step": 6350 + }, + { + "epoch": 0.3, + "grad_norm": 0.6175166875532276, + "learning_rate": 4.817361603972538e-06, + "loss": 0.2976, + "step": 6351 + }, + { + "epoch": 0.3, + "grad_norm": 0.590597464869481, + "learning_rate": 4.8172904409246555e-06, + "loss": 0.3041, + "step": 6352 + }, + { + "epoch": 0.3, + "grad_norm": 0.5723256144337745, + "learning_rate": 4.817219264541348e-06, + "loss": 0.2846, + "step": 6353 + }, + { + "epoch": 0.3, + "grad_norm": 0.5949285100837298, + "learning_rate": 4.817148074823026e-06, + "loss": 0.2688, + "step": 6354 + }, + { + "epoch": 0.3, + "grad_norm": 0.6352154640476165, + "learning_rate": 4.817076871770099e-06, + "loss": 0.2904, + "step": 6355 + }, + { + "epoch": 0.3, + "grad_norm": 0.6104435870124612, + "learning_rate": 4.817005655382978e-06, + "loss": 0.3099, + "step": 6356 + }, + { + "epoch": 0.3, + "grad_norm": 0.6099345103087882, + "learning_rate": 4.816934425662071e-06, + "loss": 0.2881, + "step": 6357 + }, + { + "epoch": 0.3, + "grad_norm": 0.6343563761838131, + "learning_rate": 4.816863182607789e-06, + "loss": 0.306, + "step": 6358 + }, + { + "epoch": 0.3, + "grad_norm": 0.6167417767150614, + "learning_rate": 4.816791926220542e-06, + "loss": 0.2779, + "step": 6359 + }, + { + "epoch": 0.3, + "grad_norm": 0.5773057888628518, + "learning_rate": 4.81672065650074e-06, + "loss": 0.2924, + "step": 6360 + }, + { + "epoch": 0.3, + "grad_norm": 0.6162397898423463, + "learning_rate": 4.816649373448792e-06, + "loss": 0.2945, + "step": 6361 + }, + { + "epoch": 0.3, + "grad_norm": 0.6272748298499355, + "learning_rate": 4.8165780770651095e-06, + "loss": 0.2897, + "step": 6362 + }, + { + "epoch": 0.3, + "grad_norm": 0.6546946148503158, + "learning_rate": 4.816506767350102e-06, + "loss": 0.2913, + "step": 6363 + }, + { + "epoch": 0.3, + "grad_norm": 0.6106342271389288, + "learning_rate": 4.81643544430418e-06, + "loss": 0.2962, + "step": 6364 + }, + { + "epoch": 0.3, + "grad_norm": 0.6079612509346274, + "learning_rate": 4.816364107927756e-06, + "loss": 0.2959, + "step": 6365 + }, + { + "epoch": 0.3, + "grad_norm": 0.6321869055360821, + "learning_rate": 4.816292758221237e-06, + "loss": 0.2943, + "step": 6366 + }, + { + "epoch": 0.3, + "grad_norm": 0.6261667561894849, + "learning_rate": 4.816221395185036e-06, + "loss": 0.2938, + "step": 6367 + }, + { + "epoch": 0.3, + "grad_norm": 0.6195850874488738, + "learning_rate": 4.816150018819564e-06, + "loss": 0.2959, + "step": 6368 + }, + { + "epoch": 0.3, + "grad_norm": 0.722763876297791, + "learning_rate": 4.816078629125229e-06, + "loss": 0.3063, + "step": 6369 + }, + { + "epoch": 0.3, + "grad_norm": 0.5808285395814748, + "learning_rate": 4.816007226102445e-06, + "loss": 0.2963, + "step": 6370 + }, + { + "epoch": 0.3, + "grad_norm": 0.6222818431202006, + "learning_rate": 4.81593580975162e-06, + "loss": 0.3284, + "step": 6371 + }, + { + "epoch": 0.3, + "grad_norm": 0.6030374243441949, + "learning_rate": 4.815864380073168e-06, + "loss": 0.2786, + "step": 6372 + }, + { + "epoch": 0.3, + "grad_norm": 0.5708308411593986, + "learning_rate": 4.815792937067498e-06, + "loss": 0.2694, + "step": 6373 + }, + { + "epoch": 0.3, + "grad_norm": 0.6158695235814774, + "learning_rate": 4.8157214807350225e-06, + "loss": 0.2715, + "step": 6374 + }, + { + "epoch": 0.3, + "grad_norm": 0.6240512210148889, + "learning_rate": 4.81565001107615e-06, + "loss": 0.2865, + "step": 6375 + }, + { + "epoch": 0.3, + "grad_norm": 0.6118311827830286, + "learning_rate": 4.815578528091296e-06, + "loss": 0.286, + "step": 6376 + }, + { + "epoch": 0.3, + "grad_norm": 0.6610445479413224, + "learning_rate": 4.815507031780868e-06, + "loss": 0.3005, + "step": 6377 + }, + { + "epoch": 0.3, + "grad_norm": 0.5855354217463637, + "learning_rate": 4.81543552214528e-06, + "loss": 0.2691, + "step": 6378 + }, + { + "epoch": 0.3, + "grad_norm": 0.6621557529623174, + "learning_rate": 4.815363999184941e-06, + "loss": 0.3029, + "step": 6379 + }, + { + "epoch": 0.3, + "grad_norm": 0.6562866931922059, + "learning_rate": 4.815292462900266e-06, + "loss": 0.2818, + "step": 6380 + }, + { + "epoch": 0.3, + "grad_norm": 0.6190382818577949, + "learning_rate": 4.815220913291664e-06, + "loss": 0.2898, + "step": 6381 + }, + { + "epoch": 0.3, + "grad_norm": 0.6280111942357042, + "learning_rate": 4.815149350359547e-06, + "loss": 0.2964, + "step": 6382 + }, + { + "epoch": 0.3, + "grad_norm": 0.6771049495735094, + "learning_rate": 4.815077774104328e-06, + "loss": 0.3041, + "step": 6383 + }, + { + "epoch": 0.3, + "grad_norm": 0.7225541557884111, + "learning_rate": 4.815006184526418e-06, + "loss": 0.314, + "step": 6384 + }, + { + "epoch": 0.3, + "grad_norm": 0.6556620252178403, + "learning_rate": 4.8149345816262295e-06, + "loss": 0.3025, + "step": 6385 + }, + { + "epoch": 0.3, + "grad_norm": 0.622969505052788, + "learning_rate": 4.814862965404174e-06, + "loss": 0.2804, + "step": 6386 + }, + { + "epoch": 0.3, + "grad_norm": 0.5985764049006888, + "learning_rate": 4.814791335860665e-06, + "loss": 0.299, + "step": 6387 + }, + { + "epoch": 0.3, + "grad_norm": 0.6430875985775593, + "learning_rate": 4.814719692996112e-06, + "loss": 0.2948, + "step": 6388 + }, + { + "epoch": 0.3, + "grad_norm": 0.6201936259036896, + "learning_rate": 4.81464803681093e-06, + "loss": 0.302, + "step": 6389 + }, + { + "epoch": 0.3, + "grad_norm": 0.6103685187327702, + "learning_rate": 4.81457636730553e-06, + "loss": 0.3041, + "step": 6390 + }, + { + "epoch": 0.3, + "grad_norm": 0.6210428512942501, + "learning_rate": 4.814504684480325e-06, + "loss": 0.2949, + "step": 6391 + }, + { + "epoch": 0.3, + "grad_norm": 0.700819643727102, + "learning_rate": 4.814432988335727e-06, + "loss": 0.2991, + "step": 6392 + }, + { + "epoch": 0.3, + "grad_norm": 0.7608995322394798, + "learning_rate": 4.814361278872149e-06, + "loss": 0.2812, + "step": 6393 + }, + { + "epoch": 0.3, + "grad_norm": 0.6201020257131432, + "learning_rate": 4.814289556090004e-06, + "loss": 0.2931, + "step": 6394 + }, + { + "epoch": 0.3, + "grad_norm": 0.6037201673739065, + "learning_rate": 4.814217819989703e-06, + "loss": 0.2678, + "step": 6395 + }, + { + "epoch": 0.3, + "grad_norm": 0.6205761054644319, + "learning_rate": 4.8141460705716615e-06, + "loss": 0.3039, + "step": 6396 + }, + { + "epoch": 0.3, + "grad_norm": 0.6329816181508058, + "learning_rate": 4.81407430783629e-06, + "loss": 0.2913, + "step": 6397 + }, + { + "epoch": 0.3, + "grad_norm": 0.6117809984799216, + "learning_rate": 4.8140025317840035e-06, + "loss": 0.2861, + "step": 6398 + }, + { + "epoch": 0.3, + "grad_norm": 0.6297320357799048, + "learning_rate": 4.813930742415213e-06, + "loss": 0.2981, + "step": 6399 + }, + { + "epoch": 0.3, + "grad_norm": 0.6225415465609447, + "learning_rate": 4.813858939730333e-06, + "loss": 0.2968, + "step": 6400 + }, + { + "epoch": 0.3, + "grad_norm": 0.6353694837768441, + "learning_rate": 4.813787123729776e-06, + "loss": 0.2825, + "step": 6401 + }, + { + "epoch": 0.3, + "grad_norm": 0.6414678855319967, + "learning_rate": 4.813715294413957e-06, + "loss": 0.3017, + "step": 6402 + }, + { + "epoch": 0.3, + "grad_norm": 0.6684713367316452, + "learning_rate": 4.813643451783286e-06, + "loss": 0.3222, + "step": 6403 + }, + { + "epoch": 0.3, + "grad_norm": 0.6882574412986505, + "learning_rate": 4.81357159583818e-06, + "loss": 0.3032, + "step": 6404 + }, + { + "epoch": 0.3, + "grad_norm": 0.6092845972739893, + "learning_rate": 4.813499726579051e-06, + "loss": 0.277, + "step": 6405 + }, + { + "epoch": 0.3, + "grad_norm": 0.6095180590819789, + "learning_rate": 4.813427844006312e-06, + "loss": 0.2941, + "step": 6406 + }, + { + "epoch": 0.3, + "grad_norm": 0.588486032852792, + "learning_rate": 4.813355948120377e-06, + "loss": 0.2828, + "step": 6407 + }, + { + "epoch": 0.3, + "grad_norm": 0.6730638836748449, + "learning_rate": 4.813284038921661e-06, + "loss": 0.304, + "step": 6408 + }, + { + "epoch": 0.3, + "grad_norm": 0.5600752128915978, + "learning_rate": 4.813212116410575e-06, + "loss": 0.2829, + "step": 6409 + }, + { + "epoch": 0.3, + "grad_norm": 0.624219892222901, + "learning_rate": 4.813140180587536e-06, + "loss": 0.3065, + "step": 6410 + }, + { + "epoch": 0.3, + "grad_norm": 0.6446660043434537, + "learning_rate": 4.813068231452956e-06, + "loss": 0.2897, + "step": 6411 + }, + { + "epoch": 0.3, + "grad_norm": 0.6148963260695935, + "learning_rate": 4.812996269007251e-06, + "loss": 0.3058, + "step": 6412 + }, + { + "epoch": 0.3, + "grad_norm": 0.6246086838554485, + "learning_rate": 4.812924293250833e-06, + "loss": 0.3141, + "step": 6413 + }, + { + "epoch": 0.3, + "grad_norm": 0.622416353900839, + "learning_rate": 4.812852304184116e-06, + "loss": 0.2941, + "step": 6414 + }, + { + "epoch": 0.3, + "grad_norm": 0.7284111655756387, + "learning_rate": 4.812780301807516e-06, + "loss": 0.325, + "step": 6415 + }, + { + "epoch": 0.3, + "grad_norm": 0.5920657883713981, + "learning_rate": 4.812708286121447e-06, + "loss": 0.2986, + "step": 6416 + }, + { + "epoch": 0.3, + "grad_norm": 0.6166957484495198, + "learning_rate": 4.812636257126323e-06, + "loss": 0.2777, + "step": 6417 + }, + { + "epoch": 0.3, + "grad_norm": 0.6261745414961125, + "learning_rate": 4.812564214822558e-06, + "loss": 0.2873, + "step": 6418 + }, + { + "epoch": 0.3, + "grad_norm": 0.6302808250048847, + "learning_rate": 4.812492159210568e-06, + "loss": 0.2833, + "step": 6419 + }, + { + "epoch": 0.3, + "grad_norm": 0.6307072442021344, + "learning_rate": 4.812420090290767e-06, + "loss": 0.3112, + "step": 6420 + }, + { + "epoch": 0.3, + "grad_norm": 0.6301700550277375, + "learning_rate": 4.812348008063569e-06, + "loss": 0.2963, + "step": 6421 + }, + { + "epoch": 0.3, + "grad_norm": 0.6007420139015295, + "learning_rate": 4.812275912529389e-06, + "loss": 0.3217, + "step": 6422 + }, + { + "epoch": 0.3, + "grad_norm": 0.6127970255175152, + "learning_rate": 4.8122038036886435e-06, + "loss": 0.2869, + "step": 6423 + }, + { + "epoch": 0.3, + "grad_norm": 0.5975960974127681, + "learning_rate": 4.8121316815417454e-06, + "loss": 0.2861, + "step": 6424 + }, + { + "epoch": 0.3, + "grad_norm": 0.6167037350228425, + "learning_rate": 4.812059546089111e-06, + "loss": 0.2845, + "step": 6425 + }, + { + "epoch": 0.3, + "grad_norm": 0.6591712816399506, + "learning_rate": 4.811987397331155e-06, + "loss": 0.2965, + "step": 6426 + }, + { + "epoch": 0.3, + "grad_norm": 0.6128541100314858, + "learning_rate": 4.811915235268292e-06, + "loss": 0.2875, + "step": 6427 + }, + { + "epoch": 0.3, + "grad_norm": 0.6246851493811437, + "learning_rate": 4.811843059900939e-06, + "loss": 0.3012, + "step": 6428 + }, + { + "epoch": 0.3, + "grad_norm": 0.6417661298502622, + "learning_rate": 4.811770871229509e-06, + "loss": 0.3229, + "step": 6429 + }, + { + "epoch": 0.3, + "grad_norm": 0.6246264861541114, + "learning_rate": 4.811698669254419e-06, + "loss": 0.2948, + "step": 6430 + }, + { + "epoch": 0.3, + "grad_norm": 0.5748540892061558, + "learning_rate": 4.811626453976085e-06, + "loss": 0.2812, + "step": 6431 + }, + { + "epoch": 0.3, + "grad_norm": 0.6382665423334578, + "learning_rate": 4.811554225394921e-06, + "loss": 0.2994, + "step": 6432 + }, + { + "epoch": 0.3, + "grad_norm": 0.6725340544611266, + "learning_rate": 4.811481983511344e-06, + "loss": 0.3125, + "step": 6433 + }, + { + "epoch": 0.3, + "grad_norm": 0.6393712380364579, + "learning_rate": 4.811409728325769e-06, + "loss": 0.3114, + "step": 6434 + }, + { + "epoch": 0.3, + "grad_norm": 0.5768839723766772, + "learning_rate": 4.8113374598386105e-06, + "loss": 0.3052, + "step": 6435 + }, + { + "epoch": 0.3, + "grad_norm": 0.7224761235150189, + "learning_rate": 4.811265178050288e-06, + "loss": 0.3054, + "step": 6436 + }, + { + "epoch": 0.3, + "grad_norm": 0.6266439347096646, + "learning_rate": 4.811192882961214e-06, + "loss": 0.292, + "step": 6437 + }, + { + "epoch": 0.3, + "grad_norm": 0.6170806104676169, + "learning_rate": 4.811120574571806e-06, + "loss": 0.3051, + "step": 6438 + }, + { + "epoch": 0.3, + "grad_norm": 0.646316333416476, + "learning_rate": 4.811048252882481e-06, + "loss": 0.3269, + "step": 6439 + }, + { + "epoch": 0.3, + "grad_norm": 0.6366213707715748, + "learning_rate": 4.810975917893654e-06, + "loss": 0.3038, + "step": 6440 + }, + { + "epoch": 0.3, + "grad_norm": 0.6401185791311386, + "learning_rate": 4.81090356960574e-06, + "loss": 0.292, + "step": 6441 + }, + { + "epoch": 0.3, + "grad_norm": 0.5813696649356882, + "learning_rate": 4.810831208019158e-06, + "loss": 0.2744, + "step": 6442 + }, + { + "epoch": 0.3, + "grad_norm": 0.6737471087427953, + "learning_rate": 4.810758833134322e-06, + "loss": 0.2924, + "step": 6443 + }, + { + "epoch": 0.3, + "grad_norm": 0.6572562049475186, + "learning_rate": 4.810686444951651e-06, + "loss": 0.3117, + "step": 6444 + }, + { + "epoch": 0.3, + "grad_norm": 0.6267209434519709, + "learning_rate": 4.81061404347156e-06, + "loss": 0.2912, + "step": 6445 + }, + { + "epoch": 0.3, + "grad_norm": 0.627615535696452, + "learning_rate": 4.810541628694466e-06, + "loss": 0.2897, + "step": 6446 + }, + { + "epoch": 0.3, + "grad_norm": 0.6435156853210229, + "learning_rate": 4.810469200620785e-06, + "loss": 0.3142, + "step": 6447 + }, + { + "epoch": 0.3, + "grad_norm": 0.6443176155037775, + "learning_rate": 4.810396759250935e-06, + "loss": 0.3288, + "step": 6448 + }, + { + "epoch": 0.3, + "grad_norm": 0.6178581713659251, + "learning_rate": 4.810324304585332e-06, + "loss": 0.2962, + "step": 6449 + }, + { + "epoch": 0.3, + "grad_norm": 0.6493621646001294, + "learning_rate": 4.810251836624394e-06, + "loss": 0.3053, + "step": 6450 + }, + { + "epoch": 0.3, + "grad_norm": 0.6526336551414509, + "learning_rate": 4.810179355368536e-06, + "loss": 0.3075, + "step": 6451 + }, + { + "epoch": 0.3, + "grad_norm": 0.5914525629236534, + "learning_rate": 4.810106860818178e-06, + "loss": 0.2867, + "step": 6452 + }, + { + "epoch": 0.3, + "grad_norm": 0.6455080376669529, + "learning_rate": 4.8100343529737345e-06, + "loss": 0.3104, + "step": 6453 + }, + { + "epoch": 0.3, + "grad_norm": 0.6571377485982773, + "learning_rate": 4.809961831835624e-06, + "loss": 0.2889, + "step": 6454 + }, + { + "epoch": 0.3, + "grad_norm": 0.624768001404484, + "learning_rate": 4.809889297404265e-06, + "loss": 0.2994, + "step": 6455 + }, + { + "epoch": 0.3, + "grad_norm": 0.5849137201154148, + "learning_rate": 4.809816749680073e-06, + "loss": 0.2809, + "step": 6456 + }, + { + "epoch": 0.3, + "grad_norm": 0.6392349253029025, + "learning_rate": 4.809744188663465e-06, + "loss": 0.2805, + "step": 6457 + }, + { + "epoch": 0.3, + "grad_norm": 0.6550707193595322, + "learning_rate": 4.8096716143548615e-06, + "loss": 0.2957, + "step": 6458 + }, + { + "epoch": 0.3, + "grad_norm": 0.6402508659767708, + "learning_rate": 4.809599026754677e-06, + "loss": 0.2742, + "step": 6459 + }, + { + "epoch": 0.3, + "grad_norm": 0.7121755108019602, + "learning_rate": 4.8095264258633315e-06, + "loss": 0.306, + "step": 6460 + }, + { + "epoch": 0.3, + "grad_norm": 0.5918562983608701, + "learning_rate": 4.809453811681242e-06, + "loss": 0.2808, + "step": 6461 + }, + { + "epoch": 0.3, + "grad_norm": 0.6383077379420466, + "learning_rate": 4.809381184208825e-06, + "loss": 0.3083, + "step": 6462 + }, + { + "epoch": 0.3, + "grad_norm": 0.572664315399171, + "learning_rate": 4.809308543446501e-06, + "loss": 0.2781, + "step": 6463 + }, + { + "epoch": 0.3, + "grad_norm": 0.6390357808564294, + "learning_rate": 4.809235889394686e-06, + "loss": 0.2957, + "step": 6464 + }, + { + "epoch": 0.3, + "grad_norm": 0.6129225894179203, + "learning_rate": 4.8091632220537996e-06, + "loss": 0.2891, + "step": 6465 + }, + { + "epoch": 0.3, + "grad_norm": 0.6241001037312398, + "learning_rate": 4.809090541424258e-06, + "loss": 0.3221, + "step": 6466 + }, + { + "epoch": 0.3, + "grad_norm": 0.6847994936636033, + "learning_rate": 4.809017847506482e-06, + "loss": 0.2988, + "step": 6467 + }, + { + "epoch": 0.3, + "grad_norm": 0.6376000029855408, + "learning_rate": 4.808945140300888e-06, + "loss": 0.303, + "step": 6468 + }, + { + "epoch": 0.3, + "grad_norm": 0.6382894691770082, + "learning_rate": 4.808872419807895e-06, + "loss": 0.2943, + "step": 6469 + }, + { + "epoch": 0.3, + "grad_norm": 0.5989442388908199, + "learning_rate": 4.808799686027922e-06, + "loss": 0.3077, + "step": 6470 + }, + { + "epoch": 0.3, + "grad_norm": 0.6130464788548811, + "learning_rate": 4.808726938961387e-06, + "loss": 0.3055, + "step": 6471 + }, + { + "epoch": 0.3, + "grad_norm": 0.6745267842600877, + "learning_rate": 4.808654178608708e-06, + "loss": 0.2971, + "step": 6472 + }, + { + "epoch": 0.3, + "grad_norm": 0.6803910461364004, + "learning_rate": 4.808581404970305e-06, + "loss": 0.308, + "step": 6473 + }, + { + "epoch": 0.3, + "grad_norm": 0.5894326509128436, + "learning_rate": 4.8085086180465965e-06, + "loss": 0.2975, + "step": 6474 + }, + { + "epoch": 0.3, + "grad_norm": 0.6016019147455104, + "learning_rate": 4.808435817838001e-06, + "loss": 0.3015, + "step": 6475 + }, + { + "epoch": 0.3, + "grad_norm": 0.620189861686934, + "learning_rate": 4.808363004344937e-06, + "loss": 0.317, + "step": 6476 + }, + { + "epoch": 0.3, + "grad_norm": 0.6486698098974619, + "learning_rate": 4.808290177567825e-06, + "loss": 0.3312, + "step": 6477 + }, + { + "epoch": 0.3, + "grad_norm": 0.6262559938244873, + "learning_rate": 4.808217337507083e-06, + "loss": 0.2807, + "step": 6478 + }, + { + "epoch": 0.3, + "grad_norm": 0.5747359469399156, + "learning_rate": 4.80814448416313e-06, + "loss": 0.2809, + "step": 6479 + }, + { + "epoch": 0.3, + "grad_norm": 0.6309126136832485, + "learning_rate": 4.808071617536386e-06, + "loss": 0.3031, + "step": 6480 + }, + { + "epoch": 0.3, + "grad_norm": 0.6389349167354913, + "learning_rate": 4.80799873762727e-06, + "loss": 0.3011, + "step": 6481 + }, + { + "epoch": 0.3, + "grad_norm": 0.6108958947710788, + "learning_rate": 4.8079258444362e-06, + "loss": 0.2793, + "step": 6482 + }, + { + "epoch": 0.3, + "grad_norm": 0.6738419122858498, + "learning_rate": 4.807852937963598e-06, + "loss": 0.3068, + "step": 6483 + }, + { + "epoch": 0.3, + "grad_norm": 0.6147211747523066, + "learning_rate": 4.807780018209882e-06, + "loss": 0.2897, + "step": 6484 + }, + { + "epoch": 0.3, + "grad_norm": 0.6405070183380235, + "learning_rate": 4.807707085175473e-06, + "loss": 0.3178, + "step": 6485 + }, + { + "epoch": 0.3, + "grad_norm": 0.6085285584759855, + "learning_rate": 4.807634138860789e-06, + "loss": 0.2944, + "step": 6486 + }, + { + "epoch": 0.3, + "grad_norm": 0.6374980779580163, + "learning_rate": 4.80756117926625e-06, + "loss": 0.2963, + "step": 6487 + }, + { + "epoch": 0.3, + "grad_norm": 0.6302689393726897, + "learning_rate": 4.807488206392278e-06, + "loss": 0.3073, + "step": 6488 + }, + { + "epoch": 0.3, + "grad_norm": 0.6246124331670857, + "learning_rate": 4.80741522023929e-06, + "loss": 0.2913, + "step": 6489 + }, + { + "epoch": 0.3, + "grad_norm": 0.6130412843848921, + "learning_rate": 4.807342220807708e-06, + "loss": 0.2994, + "step": 6490 + }, + { + "epoch": 0.3, + "grad_norm": 0.5820482813421187, + "learning_rate": 4.807269208097951e-06, + "loss": 0.2803, + "step": 6491 + }, + { + "epoch": 0.3, + "grad_norm": 0.6468755214674841, + "learning_rate": 4.80719618211044e-06, + "loss": 0.2933, + "step": 6492 + }, + { + "epoch": 0.3, + "grad_norm": 0.6065954967605646, + "learning_rate": 4.807123142845594e-06, + "loss": 0.303, + "step": 6493 + }, + { + "epoch": 0.3, + "grad_norm": 0.6236460548276084, + "learning_rate": 4.807050090303835e-06, + "loss": 0.2942, + "step": 6494 + }, + { + "epoch": 0.3, + "grad_norm": 0.5749239612680118, + "learning_rate": 4.806977024485582e-06, + "loss": 0.2944, + "step": 6495 + }, + { + "epoch": 0.3, + "grad_norm": 0.6449756413693455, + "learning_rate": 4.806903945391257e-06, + "loss": 0.3004, + "step": 6496 + }, + { + "epoch": 0.3, + "grad_norm": 0.5893072892141427, + "learning_rate": 4.806830853021279e-06, + "loss": 0.2824, + "step": 6497 + }, + { + "epoch": 0.3, + "grad_norm": 0.5786528647214324, + "learning_rate": 4.8067577473760695e-06, + "loss": 0.2896, + "step": 6498 + }, + { + "epoch": 0.3, + "grad_norm": 0.671734458091093, + "learning_rate": 4.806684628456048e-06, + "loss": 0.3053, + "step": 6499 + }, + { + "epoch": 0.3, + "grad_norm": 0.5990191772527703, + "learning_rate": 4.8066114962616375e-06, + "loss": 0.2845, + "step": 6500 + }, + { + "epoch": 0.3, + "grad_norm": 0.6399192145257537, + "learning_rate": 4.806538350793257e-06, + "loss": 0.2691, + "step": 6501 + }, + { + "epoch": 0.3, + "grad_norm": 0.6334864989984511, + "learning_rate": 4.806465192051328e-06, + "loss": 0.3055, + "step": 6502 + }, + { + "epoch": 0.3, + "grad_norm": 0.7094927566553819, + "learning_rate": 4.806392020036272e-06, + "loss": 0.3105, + "step": 6503 + }, + { + "epoch": 0.3, + "grad_norm": 0.6203260654697483, + "learning_rate": 4.806318834748509e-06, + "loss": 0.2909, + "step": 6504 + }, + { + "epoch": 0.3, + "grad_norm": 0.6166059363047092, + "learning_rate": 4.806245636188461e-06, + "loss": 0.3054, + "step": 6505 + }, + { + "epoch": 0.3, + "grad_norm": 0.6191022713388296, + "learning_rate": 4.806172424356549e-06, + "loss": 0.3026, + "step": 6506 + }, + { + "epoch": 0.3, + "grad_norm": 0.6542721153217897, + "learning_rate": 4.806099199253195e-06, + "loss": 0.2877, + "step": 6507 + }, + { + "epoch": 0.3, + "grad_norm": 0.6417005880224027, + "learning_rate": 4.806025960878818e-06, + "loss": 0.2996, + "step": 6508 + }, + { + "epoch": 0.3, + "grad_norm": 0.5832763155452052, + "learning_rate": 4.8059527092338435e-06, + "loss": 0.2799, + "step": 6509 + }, + { + "epoch": 0.3, + "grad_norm": 0.6075451502720174, + "learning_rate": 4.805879444318688e-06, + "loss": 0.2998, + "step": 6510 + }, + { + "epoch": 0.31, + "grad_norm": 0.6457267297344885, + "learning_rate": 4.805806166133778e-06, + "loss": 0.3072, + "step": 6511 + }, + { + "epoch": 0.31, + "grad_norm": 0.6187314240406843, + "learning_rate": 4.805732874679533e-06, + "loss": 0.3033, + "step": 6512 + }, + { + "epoch": 0.31, + "grad_norm": 0.5903937919013081, + "learning_rate": 4.805659569956374e-06, + "loss": 0.288, + "step": 6513 + }, + { + "epoch": 0.31, + "grad_norm": 0.6135563377921627, + "learning_rate": 4.805586251964724e-06, + "loss": 0.29, + "step": 6514 + }, + { + "epoch": 0.31, + "grad_norm": 0.6331425363836822, + "learning_rate": 4.805512920705004e-06, + "loss": 0.3096, + "step": 6515 + }, + { + "epoch": 0.31, + "grad_norm": 0.6084329386651036, + "learning_rate": 4.805439576177637e-06, + "loss": 0.2908, + "step": 6516 + }, + { + "epoch": 0.31, + "grad_norm": 0.6415808491224002, + "learning_rate": 4.805366218383045e-06, + "loss": 0.3048, + "step": 6517 + }, + { + "epoch": 0.31, + "grad_norm": 0.5701720898373069, + "learning_rate": 4.80529284732165e-06, + "loss": 0.2819, + "step": 6518 + }, + { + "epoch": 0.31, + "grad_norm": 0.5658624478029088, + "learning_rate": 4.805219462993874e-06, + "loss": 0.2748, + "step": 6519 + }, + { + "epoch": 0.31, + "grad_norm": 0.6956166150815454, + "learning_rate": 4.805146065400139e-06, + "loss": 0.312, + "step": 6520 + }, + { + "epoch": 0.31, + "grad_norm": 0.6198089317217974, + "learning_rate": 4.805072654540868e-06, + "loss": 0.2817, + "step": 6521 + }, + { + "epoch": 0.31, + "grad_norm": 0.6355267637224158, + "learning_rate": 4.804999230416482e-06, + "loss": 0.3033, + "step": 6522 + }, + { + "epoch": 0.31, + "grad_norm": 0.6367611650587656, + "learning_rate": 4.804925793027408e-06, + "loss": 0.2847, + "step": 6523 + }, + { + "epoch": 0.31, + "grad_norm": 0.663301738871258, + "learning_rate": 4.804852342374062e-06, + "loss": 0.3038, + "step": 6524 + }, + { + "epoch": 0.31, + "grad_norm": 0.6834817323753362, + "learning_rate": 4.804778878456872e-06, + "loss": 0.3093, + "step": 6525 + }, + { + "epoch": 0.31, + "grad_norm": 0.5840099176461229, + "learning_rate": 4.804705401276258e-06, + "loss": 0.2971, + "step": 6526 + }, + { + "epoch": 0.31, + "grad_norm": 0.5949594743232001, + "learning_rate": 4.804631910832644e-06, + "loss": 0.2841, + "step": 6527 + }, + { + "epoch": 0.31, + "grad_norm": 0.65395359383188, + "learning_rate": 4.804558407126452e-06, + "loss": 0.3127, + "step": 6528 + }, + { + "epoch": 0.31, + "grad_norm": 0.6588590376918197, + "learning_rate": 4.804484890158107e-06, + "loss": 0.3008, + "step": 6529 + }, + { + "epoch": 0.31, + "grad_norm": 0.5841838721618343, + "learning_rate": 4.804411359928029e-06, + "loss": 0.2936, + "step": 6530 + }, + { + "epoch": 0.31, + "grad_norm": 0.6108634817202248, + "learning_rate": 4.804337816436644e-06, + "loss": 0.3, + "step": 6531 + }, + { + "epoch": 0.31, + "grad_norm": 0.6286268180700968, + "learning_rate": 4.804264259684374e-06, + "loss": 0.2856, + "step": 6532 + }, + { + "epoch": 0.31, + "grad_norm": 0.6078917732309338, + "learning_rate": 4.804190689671641e-06, + "loss": 0.2815, + "step": 6533 + }, + { + "epoch": 0.31, + "grad_norm": 0.5634832065752128, + "learning_rate": 4.8041171063988715e-06, + "loss": 0.2816, + "step": 6534 + }, + { + "epoch": 0.31, + "grad_norm": 0.6410445398076255, + "learning_rate": 4.804043509866486e-06, + "loss": 0.3052, + "step": 6535 + }, + { + "epoch": 0.31, + "grad_norm": 0.6194473501247195, + "learning_rate": 4.80396990007491e-06, + "loss": 0.2903, + "step": 6536 + }, + { + "epoch": 0.31, + "grad_norm": 0.643311093545187, + "learning_rate": 4.8038962770245655e-06, + "loss": 0.3056, + "step": 6537 + }, + { + "epoch": 0.31, + "grad_norm": 0.6133287250965888, + "learning_rate": 4.803822640715877e-06, + "loss": 0.2857, + "step": 6538 + }, + { + "epoch": 0.31, + "grad_norm": 0.6680585518507811, + "learning_rate": 4.803748991149268e-06, + "loss": 0.3192, + "step": 6539 + }, + { + "epoch": 0.31, + "grad_norm": 0.6080445679122998, + "learning_rate": 4.803675328325164e-06, + "loss": 0.2989, + "step": 6540 + }, + { + "epoch": 0.31, + "grad_norm": 0.6072412865214252, + "learning_rate": 4.803601652243987e-06, + "loss": 0.2856, + "step": 6541 + }, + { + "epoch": 0.31, + "grad_norm": 0.6265345246644659, + "learning_rate": 4.80352796290616e-06, + "loss": 0.2901, + "step": 6542 + }, + { + "epoch": 0.31, + "grad_norm": 0.5782739637526352, + "learning_rate": 4.80345426031211e-06, + "loss": 0.2824, + "step": 6543 + }, + { + "epoch": 0.31, + "grad_norm": 0.6759411794340711, + "learning_rate": 4.803380544462259e-06, + "loss": 0.2935, + "step": 6544 + }, + { + "epoch": 0.31, + "grad_norm": 0.6521158845799649, + "learning_rate": 4.803306815357033e-06, + "loss": 0.3064, + "step": 6545 + }, + { + "epoch": 0.31, + "grad_norm": 0.641987882917084, + "learning_rate": 4.803233072996855e-06, + "loss": 0.2901, + "step": 6546 + }, + { + "epoch": 0.31, + "grad_norm": 0.6462769051127518, + "learning_rate": 4.803159317382149e-06, + "loss": 0.3061, + "step": 6547 + }, + { + "epoch": 0.31, + "grad_norm": 0.5797282255058513, + "learning_rate": 4.8030855485133396e-06, + "loss": 0.2612, + "step": 6548 + }, + { + "epoch": 0.31, + "grad_norm": 0.5868847590570614, + "learning_rate": 4.8030117663908525e-06, + "loss": 0.3, + "step": 6549 + }, + { + "epoch": 0.31, + "grad_norm": 0.6097508790505088, + "learning_rate": 4.802937971015111e-06, + "loss": 0.2993, + "step": 6550 + }, + { + "epoch": 0.31, + "grad_norm": 0.6195315693796465, + "learning_rate": 4.8028641623865405e-06, + "loss": 0.2783, + "step": 6551 + }, + { + "epoch": 0.31, + "grad_norm": 0.6731406245280781, + "learning_rate": 4.802790340505565e-06, + "loss": 0.2942, + "step": 6552 + }, + { + "epoch": 0.31, + "grad_norm": 0.6389087361476891, + "learning_rate": 4.802716505372611e-06, + "loss": 0.3158, + "step": 6553 + }, + { + "epoch": 0.31, + "grad_norm": 0.576947424586191, + "learning_rate": 4.802642656988101e-06, + "loss": 0.2801, + "step": 6554 + }, + { + "epoch": 0.31, + "grad_norm": 0.6118709183749439, + "learning_rate": 4.802568795352462e-06, + "loss": 0.2992, + "step": 6555 + }, + { + "epoch": 0.31, + "grad_norm": 0.6376459177834826, + "learning_rate": 4.802494920466118e-06, + "loss": 0.3026, + "step": 6556 + }, + { + "epoch": 0.31, + "grad_norm": 0.659826189696323, + "learning_rate": 4.802421032329495e-06, + "loss": 0.3029, + "step": 6557 + }, + { + "epoch": 0.31, + "grad_norm": 0.6466212524639132, + "learning_rate": 4.802347130943018e-06, + "loss": 0.2924, + "step": 6558 + }, + { + "epoch": 0.31, + "grad_norm": 0.6811639048324153, + "learning_rate": 4.8022732163071104e-06, + "loss": 0.3203, + "step": 6559 + }, + { + "epoch": 0.31, + "grad_norm": 0.6366532117447259, + "learning_rate": 4.8021992884222005e-06, + "loss": 0.2778, + "step": 6560 + }, + { + "epoch": 0.31, + "grad_norm": 0.6568987182274207, + "learning_rate": 4.802125347288712e-06, + "loss": 0.2949, + "step": 6561 + }, + { + "epoch": 0.31, + "grad_norm": 0.6541603905711327, + "learning_rate": 4.802051392907071e-06, + "loss": 0.291, + "step": 6562 + }, + { + "epoch": 0.31, + "grad_norm": 0.6249046643188265, + "learning_rate": 4.801977425277702e-06, + "loss": 0.3016, + "step": 6563 + }, + { + "epoch": 0.31, + "grad_norm": 0.6877936972503381, + "learning_rate": 4.801903444401032e-06, + "loss": 0.3199, + "step": 6564 + }, + { + "epoch": 0.31, + "grad_norm": 0.6836810468038866, + "learning_rate": 4.801829450277486e-06, + "loss": 0.2857, + "step": 6565 + }, + { + "epoch": 0.31, + "grad_norm": 0.5751963311902337, + "learning_rate": 4.801755442907489e-06, + "loss": 0.2862, + "step": 6566 + }, + { + "epoch": 0.31, + "grad_norm": 0.7054452852169807, + "learning_rate": 4.80168142229147e-06, + "loss": 0.2979, + "step": 6567 + }, + { + "epoch": 0.31, + "grad_norm": 0.6146910837851159, + "learning_rate": 4.801607388429852e-06, + "loss": 0.287, + "step": 6568 + }, + { + "epoch": 0.31, + "grad_norm": 0.6025778864675411, + "learning_rate": 4.801533341323063e-06, + "loss": 0.2946, + "step": 6569 + }, + { + "epoch": 0.31, + "grad_norm": 0.5987218994882247, + "learning_rate": 4.801459280971527e-06, + "loss": 0.2927, + "step": 6570 + }, + { + "epoch": 0.31, + "grad_norm": 0.6420377053659762, + "learning_rate": 4.801385207375671e-06, + "loss": 0.2964, + "step": 6571 + }, + { + "epoch": 0.31, + "grad_norm": 0.6038172583776691, + "learning_rate": 4.801311120535922e-06, + "loss": 0.2778, + "step": 6572 + }, + { + "epoch": 0.31, + "grad_norm": 0.6129574535153148, + "learning_rate": 4.801237020452706e-06, + "loss": 0.2887, + "step": 6573 + }, + { + "epoch": 0.31, + "grad_norm": 0.6098410078015327, + "learning_rate": 4.8011629071264486e-06, + "loss": 0.2824, + "step": 6574 + }, + { + "epoch": 0.31, + "grad_norm": 0.5533143007576484, + "learning_rate": 4.801088780557578e-06, + "loss": 0.2895, + "step": 6575 + }, + { + "epoch": 0.31, + "grad_norm": 0.6405879907226196, + "learning_rate": 4.801014640746519e-06, + "loss": 0.2909, + "step": 6576 + }, + { + "epoch": 0.31, + "grad_norm": 0.6361072674039717, + "learning_rate": 4.8009404876937e-06, + "loss": 0.2958, + "step": 6577 + }, + { + "epoch": 0.31, + "grad_norm": 0.5642640354540319, + "learning_rate": 4.8008663213995465e-06, + "loss": 0.2852, + "step": 6578 + }, + { + "epoch": 0.31, + "grad_norm": 0.625812147293599, + "learning_rate": 4.800792141864485e-06, + "loss": 0.2997, + "step": 6579 + }, + { + "epoch": 0.31, + "grad_norm": 0.6404500514717565, + "learning_rate": 4.800717949088944e-06, + "loss": 0.3233, + "step": 6580 + }, + { + "epoch": 0.31, + "grad_norm": 0.6143100780623463, + "learning_rate": 4.800643743073349e-06, + "loss": 0.3018, + "step": 6581 + }, + { + "epoch": 0.31, + "grad_norm": 0.6081778637819225, + "learning_rate": 4.800569523818127e-06, + "loss": 0.3087, + "step": 6582 + }, + { + "epoch": 0.31, + "grad_norm": 0.6424163590274653, + "learning_rate": 4.800495291323706e-06, + "loss": 0.2996, + "step": 6583 + }, + { + "epoch": 0.31, + "grad_norm": 0.6450175997142548, + "learning_rate": 4.800421045590513e-06, + "loss": 0.2886, + "step": 6584 + }, + { + "epoch": 0.31, + "grad_norm": 0.6863062514724276, + "learning_rate": 4.800346786618975e-06, + "loss": 0.2955, + "step": 6585 + }, + { + "epoch": 0.31, + "grad_norm": 0.6362325349961918, + "learning_rate": 4.80027251440952e-06, + "loss": 0.2957, + "step": 6586 + }, + { + "epoch": 0.31, + "grad_norm": 0.5767325855907771, + "learning_rate": 4.8001982289625735e-06, + "loss": 0.2767, + "step": 6587 + }, + { + "epoch": 0.31, + "grad_norm": 0.6178176211762827, + "learning_rate": 4.800123930278565e-06, + "loss": 0.2941, + "step": 6588 + }, + { + "epoch": 0.31, + "grad_norm": 0.5916952709350408, + "learning_rate": 4.800049618357921e-06, + "loss": 0.284, + "step": 6589 + }, + { + "epoch": 0.31, + "grad_norm": 0.6328067747835833, + "learning_rate": 4.799975293201071e-06, + "loss": 0.2898, + "step": 6590 + }, + { + "epoch": 0.31, + "grad_norm": 0.6147006386493905, + "learning_rate": 4.79990095480844e-06, + "loss": 0.3077, + "step": 6591 + }, + { + "epoch": 0.31, + "grad_norm": 0.6195132405117054, + "learning_rate": 4.799826603180457e-06, + "loss": 0.2993, + "step": 6592 + }, + { + "epoch": 0.31, + "grad_norm": 0.6370957039515163, + "learning_rate": 4.79975223831755e-06, + "loss": 0.2999, + "step": 6593 + }, + { + "epoch": 0.31, + "grad_norm": 0.5902115560315078, + "learning_rate": 4.799677860220148e-06, + "loss": 0.2943, + "step": 6594 + }, + { + "epoch": 0.31, + "grad_norm": 0.5835544224182313, + "learning_rate": 4.7996034688886765e-06, + "loss": 0.2933, + "step": 6595 + }, + { + "epoch": 0.31, + "grad_norm": 0.5957694952830577, + "learning_rate": 4.799529064323566e-06, + "loss": 0.2896, + "step": 6596 + }, + { + "epoch": 0.31, + "grad_norm": 0.6149179804873621, + "learning_rate": 4.799454646525243e-06, + "loss": 0.2993, + "step": 6597 + }, + { + "epoch": 0.31, + "grad_norm": 0.5886503714088258, + "learning_rate": 4.799380215494137e-06, + "loss": 0.2836, + "step": 6598 + }, + { + "epoch": 0.31, + "grad_norm": 0.6024844718527667, + "learning_rate": 4.799305771230675e-06, + "loss": 0.2937, + "step": 6599 + }, + { + "epoch": 0.31, + "grad_norm": 0.6449029501578568, + "learning_rate": 4.7992313137352866e-06, + "loss": 0.3055, + "step": 6600 + }, + { + "epoch": 0.31, + "grad_norm": 0.6820149952429254, + "learning_rate": 4.7991568430084e-06, + "loss": 0.3212, + "step": 6601 + }, + { + "epoch": 0.31, + "grad_norm": 0.6137704328795461, + "learning_rate": 4.799082359050445e-06, + "loss": 0.2899, + "step": 6602 + }, + { + "epoch": 0.31, + "grad_norm": 0.6109930833210385, + "learning_rate": 4.7990078618618464e-06, + "loss": 0.2846, + "step": 6603 + }, + { + "epoch": 0.31, + "grad_norm": 0.5825907093467428, + "learning_rate": 4.798933351443037e-06, + "loss": 0.2825, + "step": 6604 + }, + { + "epoch": 0.31, + "grad_norm": 0.6131612941251774, + "learning_rate": 4.798858827794443e-06, + "loss": 0.3093, + "step": 6605 + }, + { + "epoch": 0.31, + "grad_norm": 0.6000372948305601, + "learning_rate": 4.798784290916495e-06, + "loss": 0.3014, + "step": 6606 + }, + { + "epoch": 0.31, + "grad_norm": 0.6378393865748176, + "learning_rate": 4.798709740809621e-06, + "loss": 0.277, + "step": 6607 + }, + { + "epoch": 0.31, + "grad_norm": 0.6526871549516099, + "learning_rate": 4.79863517747425e-06, + "loss": 0.2934, + "step": 6608 + }, + { + "epoch": 0.31, + "grad_norm": 0.5894638489566595, + "learning_rate": 4.798560600910811e-06, + "loss": 0.2845, + "step": 6609 + }, + { + "epoch": 0.31, + "grad_norm": 0.6191671254326977, + "learning_rate": 4.798486011119734e-06, + "loss": 0.2775, + "step": 6610 + }, + { + "epoch": 0.31, + "grad_norm": 0.5973676958914304, + "learning_rate": 4.798411408101448e-06, + "loss": 0.2765, + "step": 6611 + }, + { + "epoch": 0.31, + "grad_norm": 0.5951338601337697, + "learning_rate": 4.798336791856381e-06, + "loss": 0.3061, + "step": 6612 + }, + { + "epoch": 0.31, + "grad_norm": 0.6412733831626449, + "learning_rate": 4.798262162384964e-06, + "loss": 0.2956, + "step": 6613 + }, + { + "epoch": 0.31, + "grad_norm": 0.6194380977700076, + "learning_rate": 4.798187519687626e-06, + "loss": 0.2863, + "step": 6614 + }, + { + "epoch": 0.31, + "grad_norm": 0.6277245890795148, + "learning_rate": 4.798112863764797e-06, + "loss": 0.2827, + "step": 6615 + }, + { + "epoch": 0.31, + "grad_norm": 0.5455141748231288, + "learning_rate": 4.798038194616905e-06, + "loss": 0.2815, + "step": 6616 + }, + { + "epoch": 0.31, + "grad_norm": 0.5858496722263657, + "learning_rate": 4.797963512244381e-06, + "loss": 0.2959, + "step": 6617 + }, + { + "epoch": 0.31, + "grad_norm": 0.5964263615214668, + "learning_rate": 4.7978888166476544e-06, + "loss": 0.2917, + "step": 6618 + }, + { + "epoch": 0.31, + "grad_norm": 0.5976644035144325, + "learning_rate": 4.797814107827156e-06, + "loss": 0.2873, + "step": 6619 + }, + { + "epoch": 0.31, + "grad_norm": 0.6627735000460602, + "learning_rate": 4.797739385783314e-06, + "loss": 0.2963, + "step": 6620 + }, + { + "epoch": 0.31, + "grad_norm": 0.6064239643775109, + "learning_rate": 4.797664650516561e-06, + "loss": 0.2958, + "step": 6621 + }, + { + "epoch": 0.31, + "grad_norm": 0.5877642827506953, + "learning_rate": 4.797589902027324e-06, + "loss": 0.2868, + "step": 6622 + }, + { + "epoch": 0.31, + "grad_norm": 0.6146017303146296, + "learning_rate": 4.7975151403160344e-06, + "loss": 0.2909, + "step": 6623 + }, + { + "epoch": 0.31, + "grad_norm": 0.6591928764514204, + "learning_rate": 4.797440365383124e-06, + "loss": 0.3067, + "step": 6624 + }, + { + "epoch": 0.31, + "grad_norm": 0.5882978253448711, + "learning_rate": 4.797365577229021e-06, + "loss": 0.2996, + "step": 6625 + }, + { + "epoch": 0.31, + "grad_norm": 0.6140878191830048, + "learning_rate": 4.7972907758541555e-06, + "loss": 0.3024, + "step": 6626 + }, + { + "epoch": 0.31, + "grad_norm": 0.6533384749330697, + "learning_rate": 4.79721596125896e-06, + "loss": 0.3034, + "step": 6627 + }, + { + "epoch": 0.31, + "grad_norm": 0.6639343987972169, + "learning_rate": 4.797141133443864e-06, + "loss": 0.3116, + "step": 6628 + }, + { + "epoch": 0.31, + "grad_norm": 0.6790637082836986, + "learning_rate": 4.7970662924092985e-06, + "loss": 0.3233, + "step": 6629 + }, + { + "epoch": 0.31, + "grad_norm": 0.615488789433508, + "learning_rate": 4.796991438155693e-06, + "loss": 0.3017, + "step": 6630 + }, + { + "epoch": 0.31, + "grad_norm": 0.5927135461788344, + "learning_rate": 4.79691657068348e-06, + "loss": 0.2733, + "step": 6631 + }, + { + "epoch": 0.31, + "grad_norm": 0.665492664354327, + "learning_rate": 4.796841689993089e-06, + "loss": 0.3151, + "step": 6632 + }, + { + "epoch": 0.31, + "grad_norm": 0.6370342427568181, + "learning_rate": 4.7967667960849504e-06, + "loss": 0.2937, + "step": 6633 + }, + { + "epoch": 0.31, + "grad_norm": 0.5935732237053637, + "learning_rate": 4.796691888959497e-06, + "loss": 0.2584, + "step": 6634 + }, + { + "epoch": 0.31, + "grad_norm": 0.5828079407223811, + "learning_rate": 4.796616968617159e-06, + "loss": 0.2627, + "step": 6635 + }, + { + "epoch": 0.31, + "grad_norm": 0.6050323626042696, + "learning_rate": 4.796542035058368e-06, + "loss": 0.3042, + "step": 6636 + }, + { + "epoch": 0.31, + "grad_norm": 0.5805932566780997, + "learning_rate": 4.796467088283555e-06, + "loss": 0.2842, + "step": 6637 + }, + { + "epoch": 0.31, + "grad_norm": 0.7347956572561547, + "learning_rate": 4.79639212829315e-06, + "loss": 0.3122, + "step": 6638 + }, + { + "epoch": 0.31, + "grad_norm": 0.6433448044985199, + "learning_rate": 4.796317155087586e-06, + "loss": 0.2789, + "step": 6639 + }, + { + "epoch": 0.31, + "grad_norm": 0.5989634090721363, + "learning_rate": 4.796242168667295e-06, + "loss": 0.2971, + "step": 6640 + }, + { + "epoch": 0.31, + "grad_norm": 0.6296040837749279, + "learning_rate": 4.796167169032706e-06, + "loss": 0.3004, + "step": 6641 + }, + { + "epoch": 0.31, + "grad_norm": 0.6326675329726228, + "learning_rate": 4.796092156184252e-06, + "loss": 0.3031, + "step": 6642 + }, + { + "epoch": 0.31, + "grad_norm": 0.6634038745963424, + "learning_rate": 4.796017130122365e-06, + "loss": 0.3056, + "step": 6643 + }, + { + "epoch": 0.31, + "grad_norm": 0.6770647404640497, + "learning_rate": 4.795942090847478e-06, + "loss": 0.32, + "step": 6644 + }, + { + "epoch": 0.31, + "grad_norm": 0.6706711749994808, + "learning_rate": 4.795867038360019e-06, + "loss": 0.298, + "step": 6645 + }, + { + "epoch": 0.31, + "grad_norm": 0.5880985630812148, + "learning_rate": 4.795791972660424e-06, + "loss": 0.2764, + "step": 6646 + }, + { + "epoch": 0.31, + "grad_norm": 0.6219002141490056, + "learning_rate": 4.7957168937491226e-06, + "loss": 0.3042, + "step": 6647 + }, + { + "epoch": 0.31, + "grad_norm": 0.639652122693227, + "learning_rate": 4.7956418016265475e-06, + "loss": 0.2896, + "step": 6648 + }, + { + "epoch": 0.31, + "grad_norm": 0.6260988215901895, + "learning_rate": 4.79556669629313e-06, + "loss": 0.3027, + "step": 6649 + }, + { + "epoch": 0.31, + "grad_norm": 0.617941509346728, + "learning_rate": 4.7954915777493035e-06, + "loss": 0.2802, + "step": 6650 + }, + { + "epoch": 0.31, + "grad_norm": 0.561340436822796, + "learning_rate": 4.795416445995501e-06, + "loss": 0.2729, + "step": 6651 + }, + { + "epoch": 0.31, + "grad_norm": 0.6148074760243509, + "learning_rate": 4.795341301032153e-06, + "loss": 0.2835, + "step": 6652 + }, + { + "epoch": 0.31, + "grad_norm": 0.692770100251545, + "learning_rate": 4.7952661428596926e-06, + "loss": 0.3161, + "step": 6653 + }, + { + "epoch": 0.31, + "grad_norm": 0.7126980473857287, + "learning_rate": 4.795190971478553e-06, + "loss": 0.2852, + "step": 6654 + }, + { + "epoch": 0.31, + "grad_norm": 0.6301328318728442, + "learning_rate": 4.7951157868891656e-06, + "loss": 0.2754, + "step": 6655 + }, + { + "epoch": 0.31, + "grad_norm": 0.5726366043469484, + "learning_rate": 4.795040589091964e-06, + "loss": 0.2885, + "step": 6656 + }, + { + "epoch": 0.31, + "grad_norm": 0.6240730520662572, + "learning_rate": 4.794965378087381e-06, + "loss": 0.2836, + "step": 6657 + }, + { + "epoch": 0.31, + "grad_norm": 0.6222533822226746, + "learning_rate": 4.794890153875849e-06, + "loss": 0.291, + "step": 6658 + }, + { + "epoch": 0.31, + "grad_norm": 0.640442329211622, + "learning_rate": 4.7948149164578e-06, + "loss": 0.3115, + "step": 6659 + }, + { + "epoch": 0.31, + "grad_norm": 0.6831851121508192, + "learning_rate": 4.794739665833669e-06, + "loss": 0.3306, + "step": 6660 + }, + { + "epoch": 0.31, + "grad_norm": 0.592965251231151, + "learning_rate": 4.794664402003887e-06, + "loss": 0.3015, + "step": 6661 + }, + { + "epoch": 0.31, + "grad_norm": 0.7254873960449615, + "learning_rate": 4.794589124968889e-06, + "loss": 0.3029, + "step": 6662 + }, + { + "epoch": 0.31, + "grad_norm": 0.5602976291405168, + "learning_rate": 4.794513834729107e-06, + "loss": 0.2644, + "step": 6663 + }, + { + "epoch": 0.31, + "grad_norm": 0.6338927777242748, + "learning_rate": 4.794438531284974e-06, + "loss": 0.2888, + "step": 6664 + }, + { + "epoch": 0.31, + "grad_norm": 0.6731663862700482, + "learning_rate": 4.794363214636925e-06, + "loss": 0.313, + "step": 6665 + }, + { + "epoch": 0.31, + "grad_norm": 0.6840316531301414, + "learning_rate": 4.7942878847853915e-06, + "loss": 0.2977, + "step": 6666 + }, + { + "epoch": 0.31, + "grad_norm": 0.645505778809843, + "learning_rate": 4.7942125417308084e-06, + "loss": 0.3052, + "step": 6667 + }, + { + "epoch": 0.31, + "grad_norm": 0.5938865889826843, + "learning_rate": 4.794137185473609e-06, + "loss": 0.2973, + "step": 6668 + }, + { + "epoch": 0.31, + "grad_norm": 0.7047186940203763, + "learning_rate": 4.794061816014226e-06, + "loss": 0.2946, + "step": 6669 + }, + { + "epoch": 0.31, + "grad_norm": 0.6191222357865546, + "learning_rate": 4.7939864333530946e-06, + "loss": 0.2994, + "step": 6670 + }, + { + "epoch": 0.31, + "grad_norm": 0.7636567356982716, + "learning_rate": 4.793911037490647e-06, + "loss": 0.312, + "step": 6671 + }, + { + "epoch": 0.31, + "grad_norm": 0.6326594578554282, + "learning_rate": 4.793835628427319e-06, + "loss": 0.2879, + "step": 6672 + }, + { + "epoch": 0.31, + "grad_norm": 0.6750034061379222, + "learning_rate": 4.793760206163542e-06, + "loss": 0.2915, + "step": 6673 + }, + { + "epoch": 0.31, + "grad_norm": 0.649350508536125, + "learning_rate": 4.7936847706997525e-06, + "loss": 0.3092, + "step": 6674 + }, + { + "epoch": 0.31, + "grad_norm": 0.622716440238515, + "learning_rate": 4.793609322036384e-06, + "loss": 0.3041, + "step": 6675 + }, + { + "epoch": 0.31, + "grad_norm": 0.599786176397505, + "learning_rate": 4.79353386017387e-06, + "loss": 0.2826, + "step": 6676 + }, + { + "epoch": 0.31, + "grad_norm": 0.6361712216892169, + "learning_rate": 4.7934583851126444e-06, + "loss": 0.2714, + "step": 6677 + }, + { + "epoch": 0.31, + "grad_norm": 0.6163925266254576, + "learning_rate": 4.793382896853143e-06, + "loss": 0.3202, + "step": 6678 + }, + { + "epoch": 0.31, + "grad_norm": 0.6288188081285712, + "learning_rate": 4.793307395395798e-06, + "loss": 0.2925, + "step": 6679 + }, + { + "epoch": 0.31, + "grad_norm": 0.606253653005436, + "learning_rate": 4.793231880741048e-06, + "loss": 0.2952, + "step": 6680 + }, + { + "epoch": 0.31, + "grad_norm": 0.6487424763013017, + "learning_rate": 4.793156352889323e-06, + "loss": 0.3022, + "step": 6681 + }, + { + "epoch": 0.31, + "grad_norm": 0.7052177169863901, + "learning_rate": 4.7930808118410595e-06, + "loss": 0.3201, + "step": 6682 + }, + { + "epoch": 0.31, + "grad_norm": 0.6296155390665952, + "learning_rate": 4.793005257596694e-06, + "loss": 0.298, + "step": 6683 + }, + { + "epoch": 0.31, + "grad_norm": 0.6643801924919703, + "learning_rate": 4.792929690156658e-06, + "loss": 0.3135, + "step": 6684 + }, + { + "epoch": 0.31, + "grad_norm": 0.590317934174981, + "learning_rate": 4.7928541095213875e-06, + "loss": 0.2875, + "step": 6685 + }, + { + "epoch": 0.31, + "grad_norm": 0.6435750115115787, + "learning_rate": 4.792778515691319e-06, + "loss": 0.2865, + "step": 6686 + }, + { + "epoch": 0.31, + "grad_norm": 0.7090336334555117, + "learning_rate": 4.792702908666887e-06, + "loss": 0.2971, + "step": 6687 + }, + { + "epoch": 0.31, + "grad_norm": 0.5966235143625357, + "learning_rate": 4.792627288448524e-06, + "loss": 0.2819, + "step": 6688 + }, + { + "epoch": 0.31, + "grad_norm": 0.670573516584797, + "learning_rate": 4.792551655036668e-06, + "loss": 0.3169, + "step": 6689 + }, + { + "epoch": 0.31, + "grad_norm": 0.6757905061308881, + "learning_rate": 4.792476008431754e-06, + "loss": 0.308, + "step": 6690 + }, + { + "epoch": 0.31, + "grad_norm": 0.6312286669035938, + "learning_rate": 4.792400348634216e-06, + "loss": 0.3163, + "step": 6691 + }, + { + "epoch": 0.31, + "grad_norm": 0.6145417416624962, + "learning_rate": 4.792324675644491e-06, + "loss": 0.2934, + "step": 6692 + }, + { + "epoch": 0.31, + "grad_norm": 0.6457028597708877, + "learning_rate": 4.792248989463014e-06, + "loss": 0.2887, + "step": 6693 + }, + { + "epoch": 0.31, + "grad_norm": 0.6188506195661526, + "learning_rate": 4.792173290090219e-06, + "loss": 0.2897, + "step": 6694 + }, + { + "epoch": 0.31, + "grad_norm": 0.6181428217394258, + "learning_rate": 4.792097577526543e-06, + "loss": 0.2823, + "step": 6695 + }, + { + "epoch": 0.31, + "grad_norm": 0.7456178214363496, + "learning_rate": 4.792021851772423e-06, + "loss": 0.3188, + "step": 6696 + }, + { + "epoch": 0.31, + "grad_norm": 0.6468176827607621, + "learning_rate": 4.791946112828292e-06, + "loss": 0.3098, + "step": 6697 + }, + { + "epoch": 0.31, + "grad_norm": 0.5850055121991444, + "learning_rate": 4.791870360694587e-06, + "loss": 0.2785, + "step": 6698 + }, + { + "epoch": 0.31, + "grad_norm": 0.626951965872251, + "learning_rate": 4.7917945953717445e-06, + "loss": 0.2992, + "step": 6699 + }, + { + "epoch": 0.31, + "grad_norm": 0.6263571845629468, + "learning_rate": 4.7917188168602e-06, + "loss": 0.2914, + "step": 6700 + }, + { + "epoch": 0.31, + "grad_norm": 0.6442215765003929, + "learning_rate": 4.79164302516039e-06, + "loss": 0.3013, + "step": 6701 + }, + { + "epoch": 0.31, + "grad_norm": 0.6471642858025701, + "learning_rate": 4.79156722027275e-06, + "loss": 0.3057, + "step": 6702 + }, + { + "epoch": 0.31, + "grad_norm": 0.6113110561010493, + "learning_rate": 4.791491402197717e-06, + "loss": 0.3119, + "step": 6703 + }, + { + "epoch": 0.31, + "grad_norm": 0.6492154125568546, + "learning_rate": 4.7914155709357265e-06, + "loss": 0.3111, + "step": 6704 + }, + { + "epoch": 0.31, + "grad_norm": 0.6148472322261296, + "learning_rate": 4.7913397264872156e-06, + "loss": 0.2762, + "step": 6705 + }, + { + "epoch": 0.31, + "grad_norm": 0.6290295236359765, + "learning_rate": 4.79126386885262e-06, + "loss": 0.2936, + "step": 6706 + }, + { + "epoch": 0.31, + "grad_norm": 0.6246486760697918, + "learning_rate": 4.791187998032377e-06, + "loss": 0.2897, + "step": 6707 + }, + { + "epoch": 0.31, + "grad_norm": 0.6078149633000904, + "learning_rate": 4.791112114026923e-06, + "loss": 0.2829, + "step": 6708 + }, + { + "epoch": 0.31, + "grad_norm": 0.5966608104265818, + "learning_rate": 4.791036216836695e-06, + "loss": 0.2745, + "step": 6709 + }, + { + "epoch": 0.31, + "grad_norm": 0.6099565135123983, + "learning_rate": 4.790960306462129e-06, + "loss": 0.293, + "step": 6710 + }, + { + "epoch": 0.31, + "grad_norm": 0.7501994783935046, + "learning_rate": 4.790884382903662e-06, + "loss": 0.3082, + "step": 6711 + }, + { + "epoch": 0.31, + "grad_norm": 0.7014386149646997, + "learning_rate": 4.790808446161732e-06, + "loss": 0.2935, + "step": 6712 + }, + { + "epoch": 0.31, + "grad_norm": 0.6769457018049438, + "learning_rate": 4.790732496236774e-06, + "loss": 0.3135, + "step": 6713 + }, + { + "epoch": 0.31, + "grad_norm": 0.6380809458960379, + "learning_rate": 4.790656533129228e-06, + "loss": 0.3157, + "step": 6714 + }, + { + "epoch": 0.31, + "grad_norm": 0.6099779918092356, + "learning_rate": 4.790580556839528e-06, + "loss": 0.2919, + "step": 6715 + }, + { + "epoch": 0.31, + "grad_norm": 0.6347304812905186, + "learning_rate": 4.790504567368113e-06, + "loss": 0.3311, + "step": 6716 + }, + { + "epoch": 0.31, + "grad_norm": 0.6247916632375933, + "learning_rate": 4.79042856471542e-06, + "loss": 0.3189, + "step": 6717 + }, + { + "epoch": 0.31, + "grad_norm": 0.664923934262849, + "learning_rate": 4.790352548881886e-06, + "loss": 0.3273, + "step": 6718 + }, + { + "epoch": 0.31, + "grad_norm": 0.6252472590699943, + "learning_rate": 4.790276519867949e-06, + "loss": 0.3133, + "step": 6719 + }, + { + "epoch": 0.31, + "grad_norm": 0.6855472675526801, + "learning_rate": 4.790200477674046e-06, + "loss": 0.2857, + "step": 6720 + }, + { + "epoch": 0.31, + "grad_norm": 0.6029401150361539, + "learning_rate": 4.7901244223006145e-06, + "loss": 0.2899, + "step": 6721 + }, + { + "epoch": 0.31, + "grad_norm": 0.6133143757620835, + "learning_rate": 4.790048353748094e-06, + "loss": 0.2922, + "step": 6722 + }, + { + "epoch": 0.31, + "grad_norm": 0.6318162007734442, + "learning_rate": 4.7899722720169196e-06, + "loss": 0.2866, + "step": 6723 + }, + { + "epoch": 0.31, + "grad_norm": 0.6223759182590276, + "learning_rate": 4.7898961771075305e-06, + "loss": 0.2874, + "step": 6724 + }, + { + "epoch": 0.32, + "grad_norm": 0.6224980366319585, + "learning_rate": 4.7898200690203645e-06, + "loss": 0.3132, + "step": 6725 + }, + { + "epoch": 0.32, + "grad_norm": 0.6109897212781759, + "learning_rate": 4.7897439477558595e-06, + "loss": 0.2983, + "step": 6726 + }, + { + "epoch": 0.32, + "grad_norm": 0.5697920692909192, + "learning_rate": 4.7896678133144535e-06, + "loss": 0.3053, + "step": 6727 + }, + { + "epoch": 0.32, + "grad_norm": 0.684233963301494, + "learning_rate": 4.789591665696584e-06, + "loss": 0.3065, + "step": 6728 + }, + { + "epoch": 0.32, + "grad_norm": 0.6072741429535305, + "learning_rate": 4.789515504902692e-06, + "loss": 0.2968, + "step": 6729 + }, + { + "epoch": 0.32, + "grad_norm": 0.6390916857327231, + "learning_rate": 4.789439330933212e-06, + "loss": 0.2882, + "step": 6730 + }, + { + "epoch": 0.32, + "grad_norm": 0.6129609736875438, + "learning_rate": 4.789363143788584e-06, + "loss": 0.2821, + "step": 6731 + }, + { + "epoch": 0.32, + "grad_norm": 0.6625924122098164, + "learning_rate": 4.7892869434692476e-06, + "loss": 0.3113, + "step": 6732 + }, + { + "epoch": 0.32, + "grad_norm": 0.5878670812373302, + "learning_rate": 4.789210729975641e-06, + "loss": 0.2896, + "step": 6733 + }, + { + "epoch": 0.32, + "grad_norm": 0.6574339929157921, + "learning_rate": 4.7891345033082e-06, + "loss": 0.3054, + "step": 6734 + }, + { + "epoch": 0.32, + "grad_norm": 0.623326047502507, + "learning_rate": 4.789058263467366e-06, + "loss": 0.2739, + "step": 6735 + }, + { + "epoch": 0.32, + "grad_norm": 0.6757525783755244, + "learning_rate": 4.788982010453577e-06, + "loss": 0.291, + "step": 6736 + }, + { + "epoch": 0.32, + "grad_norm": 0.6911255559800534, + "learning_rate": 4.7889057442672724e-06, + "loss": 0.319, + "step": 6737 + }, + { + "epoch": 0.32, + "grad_norm": 0.6299679778536131, + "learning_rate": 4.788829464908889e-06, + "loss": 0.2734, + "step": 6738 + }, + { + "epoch": 0.32, + "grad_norm": 0.6241080874558047, + "learning_rate": 4.788753172378869e-06, + "loss": 0.2875, + "step": 6739 + }, + { + "epoch": 0.32, + "grad_norm": 0.6455873562561697, + "learning_rate": 4.788676866677649e-06, + "loss": 0.2907, + "step": 6740 + }, + { + "epoch": 0.32, + "grad_norm": 0.6682973621926189, + "learning_rate": 4.788600547805669e-06, + "loss": 0.2946, + "step": 6741 + }, + { + "epoch": 0.32, + "grad_norm": 0.5931462191412469, + "learning_rate": 4.788524215763368e-06, + "loss": 0.2965, + "step": 6742 + }, + { + "epoch": 0.32, + "grad_norm": 0.6648557098311126, + "learning_rate": 4.7884478705511855e-06, + "loss": 0.2954, + "step": 6743 + }, + { + "epoch": 0.32, + "grad_norm": 0.6388154819469779, + "learning_rate": 4.7883715121695605e-06, + "loss": 0.2938, + "step": 6744 + }, + { + "epoch": 0.32, + "grad_norm": 0.558961828845344, + "learning_rate": 4.788295140618933e-06, + "loss": 0.2888, + "step": 6745 + }, + { + "epoch": 0.32, + "grad_norm": 0.6629056615736998, + "learning_rate": 4.788218755899742e-06, + "loss": 0.3032, + "step": 6746 + }, + { + "epoch": 0.32, + "grad_norm": 0.6695878639321495, + "learning_rate": 4.7881423580124265e-06, + "loss": 0.3151, + "step": 6747 + }, + { + "epoch": 0.32, + "grad_norm": 0.6575427836994759, + "learning_rate": 4.7880659469574275e-06, + "loss": 0.3082, + "step": 6748 + }, + { + "epoch": 0.32, + "grad_norm": 0.5852660603030773, + "learning_rate": 4.7879895227351836e-06, + "loss": 0.2845, + "step": 6749 + }, + { + "epoch": 0.32, + "grad_norm": 0.6518253289274784, + "learning_rate": 4.787913085346135e-06, + "loss": 0.304, + "step": 6750 + }, + { + "epoch": 0.32, + "grad_norm": 0.6148065539955525, + "learning_rate": 4.787836634790722e-06, + "loss": 0.307, + "step": 6751 + }, + { + "epoch": 0.32, + "grad_norm": 0.5351692415082352, + "learning_rate": 4.7877601710693845e-06, + "loss": 0.2795, + "step": 6752 + }, + { + "epoch": 0.32, + "grad_norm": 0.667388200448351, + "learning_rate": 4.787683694182562e-06, + "loss": 0.2918, + "step": 6753 + }, + { + "epoch": 0.32, + "grad_norm": 0.6023435623156916, + "learning_rate": 4.787607204130695e-06, + "loss": 0.2765, + "step": 6754 + }, + { + "epoch": 0.32, + "grad_norm": 0.6450969166164892, + "learning_rate": 4.787530700914223e-06, + "loss": 0.2993, + "step": 6755 + }, + { + "epoch": 0.32, + "grad_norm": 0.5834895216242275, + "learning_rate": 4.787454184533587e-06, + "loss": 0.2703, + "step": 6756 + }, + { + "epoch": 0.32, + "grad_norm": 0.6064004174333638, + "learning_rate": 4.787377654989227e-06, + "loss": 0.2794, + "step": 6757 + }, + { + "epoch": 0.32, + "grad_norm": 0.6969585252257512, + "learning_rate": 4.787301112281584e-06, + "loss": 0.2971, + "step": 6758 + }, + { + "epoch": 0.32, + "grad_norm": 0.6992205772874777, + "learning_rate": 4.7872245564110975e-06, + "loss": 0.3316, + "step": 6759 + }, + { + "epoch": 0.32, + "grad_norm": 0.6401331802430191, + "learning_rate": 4.787147987378209e-06, + "loss": 0.3179, + "step": 6760 + }, + { + "epoch": 0.32, + "grad_norm": 0.6211937766733787, + "learning_rate": 4.787071405183358e-06, + "loss": 0.299, + "step": 6761 + }, + { + "epoch": 0.32, + "grad_norm": 0.5824347457752344, + "learning_rate": 4.7869948098269856e-06, + "loss": 0.2783, + "step": 6762 + }, + { + "epoch": 0.32, + "grad_norm": 0.6195950381905977, + "learning_rate": 4.786918201309534e-06, + "loss": 0.2909, + "step": 6763 + }, + { + "epoch": 0.32, + "grad_norm": 0.6477807892730595, + "learning_rate": 4.7868415796314425e-06, + "loss": 0.2887, + "step": 6764 + }, + { + "epoch": 0.32, + "grad_norm": 0.6670174025589377, + "learning_rate": 4.786764944793152e-06, + "loss": 0.3356, + "step": 6765 + }, + { + "epoch": 0.32, + "grad_norm": 0.6188279223658039, + "learning_rate": 4.786688296795105e-06, + "loss": 0.3047, + "step": 6766 + }, + { + "epoch": 0.32, + "grad_norm": 0.6741267148210651, + "learning_rate": 4.7866116356377404e-06, + "loss": 0.3004, + "step": 6767 + }, + { + "epoch": 0.32, + "grad_norm": 0.5994487934448157, + "learning_rate": 4.7865349613215014e-06, + "loss": 0.3002, + "step": 6768 + }, + { + "epoch": 0.32, + "grad_norm": 0.6704127956627155, + "learning_rate": 4.7864582738468275e-06, + "loss": 0.2983, + "step": 6769 + }, + { + "epoch": 0.32, + "grad_norm": 0.6662407922567143, + "learning_rate": 4.7863815732141625e-06, + "loss": 0.2935, + "step": 6770 + }, + { + "epoch": 0.32, + "grad_norm": 0.6173779776452293, + "learning_rate": 4.786304859423944e-06, + "loss": 0.3273, + "step": 6771 + }, + { + "epoch": 0.32, + "grad_norm": 0.6301128113779848, + "learning_rate": 4.786228132476618e-06, + "loss": 0.2948, + "step": 6772 + }, + { + "epoch": 0.32, + "grad_norm": 0.5947473090481369, + "learning_rate": 4.786151392372622e-06, + "loss": 0.2893, + "step": 6773 + }, + { + "epoch": 0.32, + "grad_norm": 0.6101037281559053, + "learning_rate": 4.7860746391124e-06, + "loss": 0.3146, + "step": 6774 + }, + { + "epoch": 0.32, + "grad_norm": 0.6273713081466549, + "learning_rate": 4.785997872696394e-06, + "loss": 0.2863, + "step": 6775 + }, + { + "epoch": 0.32, + "grad_norm": 0.5748642267924943, + "learning_rate": 4.785921093125044e-06, + "loss": 0.2772, + "step": 6776 + }, + { + "epoch": 0.32, + "grad_norm": 0.6502310238182333, + "learning_rate": 4.785844300398792e-06, + "loss": 0.2861, + "step": 6777 + }, + { + "epoch": 0.32, + "grad_norm": 0.570011096977999, + "learning_rate": 4.785767494518081e-06, + "loss": 0.2944, + "step": 6778 + }, + { + "epoch": 0.32, + "grad_norm": 0.5689021417692347, + "learning_rate": 4.785690675483353e-06, + "loss": 0.275, + "step": 6779 + }, + { + "epoch": 0.32, + "grad_norm": 0.6712474267699603, + "learning_rate": 4.785613843295049e-06, + "loss": 0.2845, + "step": 6780 + }, + { + "epoch": 0.32, + "grad_norm": 0.591727580554672, + "learning_rate": 4.785536997953613e-06, + "loss": 0.2919, + "step": 6781 + }, + { + "epoch": 0.32, + "grad_norm": 0.5909458118353709, + "learning_rate": 4.7854601394594846e-06, + "loss": 0.3032, + "step": 6782 + }, + { + "epoch": 0.32, + "grad_norm": 0.5637877725991514, + "learning_rate": 4.785383267813108e-06, + "loss": 0.2737, + "step": 6783 + }, + { + "epoch": 0.32, + "grad_norm": 0.6155577960558054, + "learning_rate": 4.785306383014925e-06, + "loss": 0.2983, + "step": 6784 + }, + { + "epoch": 0.32, + "grad_norm": 0.6042147794478928, + "learning_rate": 4.785229485065378e-06, + "loss": 0.2831, + "step": 6785 + }, + { + "epoch": 0.32, + "grad_norm": 0.5887734778505844, + "learning_rate": 4.7851525739649105e-06, + "loss": 0.2909, + "step": 6786 + }, + { + "epoch": 0.32, + "grad_norm": 0.6461546542553256, + "learning_rate": 4.785075649713964e-06, + "loss": 0.301, + "step": 6787 + }, + { + "epoch": 0.32, + "grad_norm": 0.6139417171760021, + "learning_rate": 4.784998712312981e-06, + "loss": 0.3116, + "step": 6788 + }, + { + "epoch": 0.32, + "grad_norm": 0.6284704004926761, + "learning_rate": 4.784921761762405e-06, + "loss": 0.2988, + "step": 6789 + }, + { + "epoch": 0.32, + "grad_norm": 0.5917915263096103, + "learning_rate": 4.784844798062679e-06, + "loss": 0.2924, + "step": 6790 + }, + { + "epoch": 0.32, + "grad_norm": 0.6188436580567324, + "learning_rate": 4.784767821214245e-06, + "loss": 0.2962, + "step": 6791 + }, + { + "epoch": 0.32, + "grad_norm": 0.6113449721497909, + "learning_rate": 4.784690831217546e-06, + "loss": 0.3009, + "step": 6792 + }, + { + "epoch": 0.32, + "grad_norm": 0.583631993181195, + "learning_rate": 4.784613828073026e-06, + "loss": 0.2956, + "step": 6793 + }, + { + "epoch": 0.32, + "grad_norm": 0.6221818201698222, + "learning_rate": 4.784536811781127e-06, + "loss": 0.3108, + "step": 6794 + }, + { + "epoch": 0.32, + "grad_norm": 0.5725422472647465, + "learning_rate": 4.784459782342294e-06, + "loss": 0.2807, + "step": 6795 + }, + { + "epoch": 0.32, + "grad_norm": 0.7008295801822253, + "learning_rate": 4.784382739756968e-06, + "loss": 0.3206, + "step": 6796 + }, + { + "epoch": 0.32, + "grad_norm": 0.6538713999691355, + "learning_rate": 4.784305684025594e-06, + "loss": 0.3088, + "step": 6797 + }, + { + "epoch": 0.32, + "grad_norm": 0.6212150216608995, + "learning_rate": 4.784228615148615e-06, + "loss": 0.2906, + "step": 6798 + }, + { + "epoch": 0.32, + "grad_norm": 0.5639768447016785, + "learning_rate": 4.784151533126475e-06, + "loss": 0.2778, + "step": 6799 + }, + { + "epoch": 0.32, + "grad_norm": 0.6030192855797651, + "learning_rate": 4.784074437959616e-06, + "loss": 0.2986, + "step": 6800 + }, + { + "epoch": 0.32, + "grad_norm": 0.6350657816984873, + "learning_rate": 4.7839973296484836e-06, + "loss": 0.2827, + "step": 6801 + }, + { + "epoch": 0.32, + "grad_norm": 0.6364443889090742, + "learning_rate": 4.78392020819352e-06, + "loss": 0.2998, + "step": 6802 + }, + { + "epoch": 0.32, + "grad_norm": 0.7045160286300739, + "learning_rate": 4.78384307359517e-06, + "loss": 0.3018, + "step": 6803 + }, + { + "epoch": 0.32, + "grad_norm": 0.6815115230781608, + "learning_rate": 4.783765925853877e-06, + "loss": 0.3092, + "step": 6804 + }, + { + "epoch": 0.32, + "grad_norm": 0.6092682137549182, + "learning_rate": 4.783688764970085e-06, + "loss": 0.2827, + "step": 6805 + }, + { + "epoch": 0.32, + "grad_norm": 0.6680930208324195, + "learning_rate": 4.783611590944239e-06, + "loss": 0.3288, + "step": 6806 + }, + { + "epoch": 0.32, + "grad_norm": 0.6351258940397989, + "learning_rate": 4.7835344037767804e-06, + "loss": 0.3104, + "step": 6807 + }, + { + "epoch": 0.32, + "grad_norm": 0.6309457376466625, + "learning_rate": 4.783457203468157e-06, + "loss": 0.2931, + "step": 6808 + }, + { + "epoch": 0.32, + "grad_norm": 0.6020773426007577, + "learning_rate": 4.78337999001881e-06, + "loss": 0.3037, + "step": 6809 + }, + { + "epoch": 0.32, + "grad_norm": 0.6309135856639192, + "learning_rate": 4.783302763429186e-06, + "loss": 0.2856, + "step": 6810 + }, + { + "epoch": 0.32, + "grad_norm": 0.5831483316474185, + "learning_rate": 4.7832255236997286e-06, + "loss": 0.3, + "step": 6811 + }, + { + "epoch": 0.32, + "grad_norm": 0.609270118259804, + "learning_rate": 4.783148270830881e-06, + "loss": 0.2897, + "step": 6812 + }, + { + "epoch": 0.32, + "grad_norm": 0.6224170702397583, + "learning_rate": 4.78307100482309e-06, + "loss": 0.302, + "step": 6813 + }, + { + "epoch": 0.32, + "grad_norm": 0.6443703099207507, + "learning_rate": 4.782993725676799e-06, + "loss": 0.3047, + "step": 6814 + }, + { + "epoch": 0.32, + "grad_norm": 0.6092279435220377, + "learning_rate": 4.782916433392453e-06, + "loss": 0.306, + "step": 6815 + }, + { + "epoch": 0.32, + "grad_norm": 0.6127376993298204, + "learning_rate": 4.782839127970495e-06, + "loss": 0.2914, + "step": 6816 + }, + { + "epoch": 0.32, + "grad_norm": 0.649063912770158, + "learning_rate": 4.7827618094113734e-06, + "loss": 0.291, + "step": 6817 + }, + { + "epoch": 0.32, + "grad_norm": 0.6204403538097648, + "learning_rate": 4.782684477715531e-06, + "loss": 0.2976, + "step": 6818 + }, + { + "epoch": 0.32, + "grad_norm": 0.5888514833942345, + "learning_rate": 4.782607132883414e-06, + "loss": 0.2811, + "step": 6819 + }, + { + "epoch": 0.32, + "grad_norm": 0.6313710589567815, + "learning_rate": 4.782529774915465e-06, + "loss": 0.2914, + "step": 6820 + }, + { + "epoch": 0.32, + "grad_norm": 0.5932390718057042, + "learning_rate": 4.782452403812132e-06, + "loss": 0.289, + "step": 6821 + }, + { + "epoch": 0.32, + "grad_norm": 0.5618396015009922, + "learning_rate": 4.782375019573859e-06, + "loss": 0.2765, + "step": 6822 + }, + { + "epoch": 0.32, + "grad_norm": 0.6204794475531198, + "learning_rate": 4.7822976222010915e-06, + "loss": 0.2902, + "step": 6823 + }, + { + "epoch": 0.32, + "grad_norm": 0.6262322112419082, + "learning_rate": 4.782220211694274e-06, + "loss": 0.299, + "step": 6824 + }, + { + "epoch": 0.32, + "grad_norm": 0.6281256440368811, + "learning_rate": 4.782142788053854e-06, + "loss": 0.3035, + "step": 6825 + }, + { + "epoch": 0.32, + "grad_norm": 0.6383922273714296, + "learning_rate": 4.782065351280275e-06, + "loss": 0.2973, + "step": 6826 + }, + { + "epoch": 0.32, + "grad_norm": 0.5656904484296362, + "learning_rate": 4.781987901373983e-06, + "loss": 0.2881, + "step": 6827 + }, + { + "epoch": 0.32, + "grad_norm": 0.6715884724787493, + "learning_rate": 4.781910438335426e-06, + "loss": 0.2985, + "step": 6828 + }, + { + "epoch": 0.32, + "grad_norm": 0.6291035567595853, + "learning_rate": 4.7818329621650465e-06, + "loss": 0.2804, + "step": 6829 + }, + { + "epoch": 0.32, + "grad_norm": 0.620758583063766, + "learning_rate": 4.781755472863292e-06, + "loss": 0.2939, + "step": 6830 + }, + { + "epoch": 0.32, + "grad_norm": 0.650924754590003, + "learning_rate": 4.7816779704306085e-06, + "loss": 0.2986, + "step": 6831 + }, + { + "epoch": 0.32, + "grad_norm": 0.6085321084575459, + "learning_rate": 4.781600454867441e-06, + "loss": 0.2988, + "step": 6832 + }, + { + "epoch": 0.32, + "grad_norm": 0.5994576972819505, + "learning_rate": 4.781522926174237e-06, + "loss": 0.2938, + "step": 6833 + }, + { + "epoch": 0.32, + "grad_norm": 0.6758918399641763, + "learning_rate": 4.7814453843514416e-06, + "loss": 0.3038, + "step": 6834 + }, + { + "epoch": 0.32, + "grad_norm": 0.5937991193945171, + "learning_rate": 4.781367829399503e-06, + "loss": 0.2839, + "step": 6835 + }, + { + "epoch": 0.32, + "grad_norm": 0.6641726634887735, + "learning_rate": 4.781290261318864e-06, + "loss": 0.2914, + "step": 6836 + }, + { + "epoch": 0.32, + "grad_norm": 0.6384837249315415, + "learning_rate": 4.781212680109974e-06, + "loss": 0.3036, + "step": 6837 + }, + { + "epoch": 0.32, + "grad_norm": 0.5954759030320778, + "learning_rate": 4.781135085773278e-06, + "loss": 0.2988, + "step": 6838 + }, + { + "epoch": 0.32, + "grad_norm": 0.6220518496427733, + "learning_rate": 4.781057478309223e-06, + "loss": 0.2975, + "step": 6839 + }, + { + "epoch": 0.32, + "grad_norm": 0.5806033915142267, + "learning_rate": 4.780979857718256e-06, + "loss": 0.2836, + "step": 6840 + }, + { + "epoch": 0.32, + "grad_norm": 0.6253846226355699, + "learning_rate": 4.7809022240008215e-06, + "loss": 0.3071, + "step": 6841 + }, + { + "epoch": 0.32, + "grad_norm": 0.6603912190931942, + "learning_rate": 4.78082457715737e-06, + "loss": 0.293, + "step": 6842 + }, + { + "epoch": 0.32, + "grad_norm": 0.6400882675389638, + "learning_rate": 4.780746917188345e-06, + "loss": 0.3064, + "step": 6843 + }, + { + "epoch": 0.32, + "grad_norm": 0.622664185538138, + "learning_rate": 4.780669244094196e-06, + "loss": 0.2912, + "step": 6844 + }, + { + "epoch": 0.32, + "grad_norm": 0.6118787506645604, + "learning_rate": 4.780591557875368e-06, + "loss": 0.2939, + "step": 6845 + }, + { + "epoch": 0.32, + "grad_norm": 0.9743457454672286, + "learning_rate": 4.780513858532309e-06, + "loss": 0.2995, + "step": 6846 + }, + { + "epoch": 0.32, + "grad_norm": 0.6059604560769997, + "learning_rate": 4.780436146065465e-06, + "loss": 0.2686, + "step": 6847 + }, + { + "epoch": 0.32, + "grad_norm": 0.6139895860433021, + "learning_rate": 4.780358420475285e-06, + "loss": 0.2788, + "step": 6848 + }, + { + "epoch": 0.32, + "grad_norm": 0.6670356703390972, + "learning_rate": 4.780280681762216e-06, + "loss": 0.3223, + "step": 6849 + }, + { + "epoch": 0.32, + "grad_norm": 0.6605972631815106, + "learning_rate": 4.7802029299267035e-06, + "loss": 0.2863, + "step": 6850 + }, + { + "epoch": 0.32, + "grad_norm": 0.7331128327862084, + "learning_rate": 4.780125164969197e-06, + "loss": 0.3073, + "step": 6851 + }, + { + "epoch": 0.32, + "grad_norm": 0.6259920651525873, + "learning_rate": 4.780047386890143e-06, + "loss": 0.2922, + "step": 6852 + }, + { + "epoch": 0.32, + "grad_norm": 0.620904717971174, + "learning_rate": 4.7799695956899896e-06, + "loss": 0.2882, + "step": 6853 + }, + { + "epoch": 0.32, + "grad_norm": 0.6356842972001107, + "learning_rate": 4.779891791369184e-06, + "loss": 0.2963, + "step": 6854 + }, + { + "epoch": 0.32, + "grad_norm": 0.6237533951426569, + "learning_rate": 4.779813973928175e-06, + "loss": 0.3028, + "step": 6855 + }, + { + "epoch": 0.32, + "grad_norm": 0.5782287708169944, + "learning_rate": 4.779736143367409e-06, + "loss": 0.2766, + "step": 6856 + }, + { + "epoch": 0.32, + "grad_norm": 0.7079453739370345, + "learning_rate": 4.779658299687333e-06, + "loss": 0.325, + "step": 6857 + }, + { + "epoch": 0.32, + "grad_norm": 0.704575800524028, + "learning_rate": 4.779580442888399e-06, + "loss": 0.2976, + "step": 6858 + }, + { + "epoch": 0.32, + "grad_norm": 0.6571145612738398, + "learning_rate": 4.779502572971051e-06, + "loss": 0.3009, + "step": 6859 + }, + { + "epoch": 0.32, + "grad_norm": 0.565900801317214, + "learning_rate": 4.779424689935739e-06, + "loss": 0.2845, + "step": 6860 + }, + { + "epoch": 0.32, + "grad_norm": 0.6735709729251539, + "learning_rate": 4.77934679378291e-06, + "loss": 0.3171, + "step": 6861 + }, + { + "epoch": 0.32, + "grad_norm": 0.6311796225196313, + "learning_rate": 4.779268884513014e-06, + "loss": 0.317, + "step": 6862 + }, + { + "epoch": 0.32, + "grad_norm": 0.6636718053656234, + "learning_rate": 4.779190962126498e-06, + "loss": 0.3034, + "step": 6863 + }, + { + "epoch": 0.32, + "grad_norm": 0.63731013150209, + "learning_rate": 4.779113026623812e-06, + "loss": 0.2917, + "step": 6864 + }, + { + "epoch": 0.32, + "grad_norm": 0.5904525035346447, + "learning_rate": 4.779035078005401e-06, + "loss": 0.2987, + "step": 6865 + }, + { + "epoch": 0.32, + "grad_norm": 0.6158919631873302, + "learning_rate": 4.778957116271718e-06, + "loss": 0.3084, + "step": 6866 + }, + { + "epoch": 0.32, + "grad_norm": 0.7041754626773252, + "learning_rate": 4.7788791414232096e-06, + "loss": 0.2924, + "step": 6867 + }, + { + "epoch": 0.32, + "grad_norm": 0.6322086340769173, + "learning_rate": 4.7788011534603244e-06, + "loss": 0.2983, + "step": 6868 + }, + { + "epoch": 0.32, + "grad_norm": 0.6279541614720138, + "learning_rate": 4.77872315238351e-06, + "loss": 0.2944, + "step": 6869 + }, + { + "epoch": 0.32, + "grad_norm": 0.5928544613913441, + "learning_rate": 4.778645138193218e-06, + "loss": 0.2635, + "step": 6870 + }, + { + "epoch": 0.32, + "grad_norm": 0.6525388696479292, + "learning_rate": 4.778567110889895e-06, + "loss": 0.3064, + "step": 6871 + }, + { + "epoch": 0.32, + "grad_norm": 0.6277433671258675, + "learning_rate": 4.778489070473992e-06, + "loss": 0.3171, + "step": 6872 + }, + { + "epoch": 0.32, + "grad_norm": 0.6811379906196401, + "learning_rate": 4.778411016945956e-06, + "loss": 0.2993, + "step": 6873 + }, + { + "epoch": 0.32, + "grad_norm": 0.6145624104052538, + "learning_rate": 4.778332950306238e-06, + "loss": 0.3072, + "step": 6874 + }, + { + "epoch": 0.32, + "grad_norm": 0.6285731811832048, + "learning_rate": 4.7782548705552865e-06, + "loss": 0.2848, + "step": 6875 + }, + { + "epoch": 0.32, + "grad_norm": 0.6531905605339305, + "learning_rate": 4.77817677769355e-06, + "loss": 0.2848, + "step": 6876 + }, + { + "epoch": 0.32, + "grad_norm": 0.6310477424804443, + "learning_rate": 4.77809867172148e-06, + "loss": 0.288, + "step": 6877 + }, + { + "epoch": 0.32, + "grad_norm": 0.637799097474085, + "learning_rate": 4.778020552639523e-06, + "loss": 0.3053, + "step": 6878 + }, + { + "epoch": 0.32, + "grad_norm": 0.6569581661851782, + "learning_rate": 4.777942420448132e-06, + "loss": 0.3031, + "step": 6879 + }, + { + "epoch": 0.32, + "grad_norm": 0.6407593536890467, + "learning_rate": 4.777864275147754e-06, + "loss": 0.299, + "step": 6880 + }, + { + "epoch": 0.32, + "grad_norm": 0.6285656788411782, + "learning_rate": 4.777786116738839e-06, + "loss": 0.2972, + "step": 6881 + }, + { + "epoch": 0.32, + "grad_norm": 0.6043201643682852, + "learning_rate": 4.777707945221839e-06, + "loss": 0.2729, + "step": 6882 + }, + { + "epoch": 0.32, + "grad_norm": 0.619901347439824, + "learning_rate": 4.777629760597202e-06, + "loss": 0.3115, + "step": 6883 + }, + { + "epoch": 0.32, + "grad_norm": 0.6152715708123422, + "learning_rate": 4.777551562865377e-06, + "loss": 0.2911, + "step": 6884 + }, + { + "epoch": 0.32, + "grad_norm": 0.6467148258292518, + "learning_rate": 4.777473352026816e-06, + "loss": 0.2899, + "step": 6885 + }, + { + "epoch": 0.32, + "grad_norm": 0.6361383191414287, + "learning_rate": 4.7773951280819685e-06, + "loss": 0.321, + "step": 6886 + }, + { + "epoch": 0.32, + "grad_norm": 0.5505632951954131, + "learning_rate": 4.777316891031284e-06, + "loss": 0.2795, + "step": 6887 + }, + { + "epoch": 0.32, + "grad_norm": 0.6391451964099006, + "learning_rate": 4.777238640875213e-06, + "loss": 0.3061, + "step": 6888 + }, + { + "epoch": 0.32, + "grad_norm": 0.6078904886118004, + "learning_rate": 4.777160377614206e-06, + "loss": 0.2875, + "step": 6889 + }, + { + "epoch": 0.32, + "grad_norm": 0.6162767007101196, + "learning_rate": 4.777082101248714e-06, + "loss": 0.2911, + "step": 6890 + }, + { + "epoch": 0.32, + "grad_norm": 0.5779215106587001, + "learning_rate": 4.777003811779186e-06, + "loss": 0.2996, + "step": 6891 + }, + { + "epoch": 0.32, + "grad_norm": 0.6264385371847897, + "learning_rate": 4.776925509206074e-06, + "loss": 0.3058, + "step": 6892 + }, + { + "epoch": 0.32, + "grad_norm": 0.608093785313454, + "learning_rate": 4.776847193529828e-06, + "loss": 0.2893, + "step": 6893 + }, + { + "epoch": 0.32, + "grad_norm": 0.5542114547614223, + "learning_rate": 4.776768864750898e-06, + "loss": 0.2864, + "step": 6894 + }, + { + "epoch": 0.32, + "grad_norm": 0.6061762314918059, + "learning_rate": 4.7766905228697365e-06, + "loss": 0.2782, + "step": 6895 + }, + { + "epoch": 0.32, + "grad_norm": 0.6205152439645388, + "learning_rate": 4.776612167886793e-06, + "loss": 0.2944, + "step": 6896 + }, + { + "epoch": 0.32, + "grad_norm": 0.5983424462705927, + "learning_rate": 4.776533799802518e-06, + "loss": 0.2946, + "step": 6897 + }, + { + "epoch": 0.32, + "grad_norm": 0.6422914127873295, + "learning_rate": 4.776455418617363e-06, + "loss": 0.2864, + "step": 6898 + }, + { + "epoch": 0.32, + "grad_norm": 0.5907634534736821, + "learning_rate": 4.7763770243317805e-06, + "loss": 0.2979, + "step": 6899 + }, + { + "epoch": 0.32, + "grad_norm": 0.6070560161529902, + "learning_rate": 4.776298616946219e-06, + "loss": 0.2854, + "step": 6900 + }, + { + "epoch": 0.32, + "grad_norm": 0.5817809193081751, + "learning_rate": 4.7762201964611325e-06, + "loss": 0.2801, + "step": 6901 + }, + { + "epoch": 0.32, + "grad_norm": 0.6603072347985666, + "learning_rate": 4.77614176287697e-06, + "loss": 0.2897, + "step": 6902 + }, + { + "epoch": 0.32, + "grad_norm": 0.6584245181106828, + "learning_rate": 4.776063316194183e-06, + "loss": 0.3105, + "step": 6903 + }, + { + "epoch": 0.32, + "grad_norm": 0.6561539665677425, + "learning_rate": 4.775984856413225e-06, + "loss": 0.3229, + "step": 6904 + }, + { + "epoch": 0.32, + "grad_norm": 0.6321164135619515, + "learning_rate": 4.775906383534545e-06, + "loss": 0.3033, + "step": 6905 + }, + { + "epoch": 0.32, + "grad_norm": 0.6333743763636945, + "learning_rate": 4.775827897558597e-06, + "loss": 0.2998, + "step": 6906 + }, + { + "epoch": 0.32, + "grad_norm": 0.6493859072896295, + "learning_rate": 4.775749398485831e-06, + "loss": 0.298, + "step": 6907 + }, + { + "epoch": 0.32, + "grad_norm": 0.6814098663175101, + "learning_rate": 4.775670886316699e-06, + "loss": 0.3083, + "step": 6908 + }, + { + "epoch": 0.32, + "grad_norm": 0.647873421730969, + "learning_rate": 4.775592361051653e-06, + "loss": 0.3043, + "step": 6909 + }, + { + "epoch": 0.32, + "grad_norm": 0.6328999790137633, + "learning_rate": 4.775513822691146e-06, + "loss": 0.3006, + "step": 6910 + }, + { + "epoch": 0.32, + "grad_norm": 0.7329129211810387, + "learning_rate": 4.775435271235627e-06, + "loss": 0.3189, + "step": 6911 + }, + { + "epoch": 0.32, + "grad_norm": 0.6232436760873628, + "learning_rate": 4.775356706685551e-06, + "loss": 0.3084, + "step": 6912 + }, + { + "epoch": 0.32, + "grad_norm": 0.6931477265604515, + "learning_rate": 4.7752781290413695e-06, + "loss": 0.3075, + "step": 6913 + }, + { + "epoch": 0.32, + "grad_norm": 0.5966481330824122, + "learning_rate": 4.775199538303533e-06, + "loss": 0.2948, + "step": 6914 + }, + { + "epoch": 0.32, + "grad_norm": 0.6705587099620299, + "learning_rate": 4.7751209344724955e-06, + "loss": 0.3013, + "step": 6915 + }, + { + "epoch": 0.32, + "grad_norm": 0.6695072284883249, + "learning_rate": 4.775042317548709e-06, + "loss": 0.2839, + "step": 6916 + }, + { + "epoch": 0.32, + "grad_norm": 0.6531158081212579, + "learning_rate": 4.774963687532626e-06, + "loss": 0.2787, + "step": 6917 + }, + { + "epoch": 0.32, + "grad_norm": 0.7297831235384425, + "learning_rate": 4.774885044424698e-06, + "loss": 0.3125, + "step": 6918 + }, + { + "epoch": 0.32, + "grad_norm": 0.627417999704011, + "learning_rate": 4.774806388225379e-06, + "loss": 0.2893, + "step": 6919 + }, + { + "epoch": 0.32, + "grad_norm": 0.5874691761166103, + "learning_rate": 4.774727718935121e-06, + "loss": 0.2888, + "step": 6920 + }, + { + "epoch": 0.32, + "grad_norm": 0.7275796870045356, + "learning_rate": 4.7746490365543776e-06, + "loss": 0.3165, + "step": 6921 + }, + { + "epoch": 0.32, + "grad_norm": 0.6703825573659111, + "learning_rate": 4.7745703410835995e-06, + "loss": 0.2883, + "step": 6922 + }, + { + "epoch": 0.32, + "grad_norm": 0.6292320655562654, + "learning_rate": 4.77449163252324e-06, + "loss": 0.3219, + "step": 6923 + }, + { + "epoch": 0.32, + "grad_norm": 0.5806085421339808, + "learning_rate": 4.774412910873754e-06, + "loss": 0.2832, + "step": 6924 + }, + { + "epoch": 0.32, + "grad_norm": 0.6303914859735954, + "learning_rate": 4.7743341761355935e-06, + "loss": 0.3043, + "step": 6925 + }, + { + "epoch": 0.32, + "grad_norm": 0.6279011942444226, + "learning_rate": 4.774255428309211e-06, + "loss": 0.2827, + "step": 6926 + }, + { + "epoch": 0.32, + "grad_norm": 0.5705021919480717, + "learning_rate": 4.7741766673950605e-06, + "loss": 0.2904, + "step": 6927 + }, + { + "epoch": 0.32, + "grad_norm": 0.6394144880740725, + "learning_rate": 4.774097893393595e-06, + "loss": 0.2915, + "step": 6928 + }, + { + "epoch": 0.32, + "grad_norm": 0.6329032688867489, + "learning_rate": 4.7740191063052664e-06, + "loss": 0.3116, + "step": 6929 + }, + { + "epoch": 0.32, + "grad_norm": 0.6383253102850106, + "learning_rate": 4.773940306130531e-06, + "loss": 0.3151, + "step": 6930 + }, + { + "epoch": 0.32, + "grad_norm": 0.6335809914617321, + "learning_rate": 4.77386149286984e-06, + "loss": 0.2873, + "step": 6931 + }, + { + "epoch": 0.32, + "grad_norm": 0.7207473374775368, + "learning_rate": 4.773782666523647e-06, + "loss": 0.3037, + "step": 6932 + }, + { + "epoch": 0.32, + "grad_norm": 0.5601801987713836, + "learning_rate": 4.773703827092407e-06, + "loss": 0.2756, + "step": 6933 + }, + { + "epoch": 0.32, + "grad_norm": 0.5889195029285713, + "learning_rate": 4.7736249745765725e-06, + "loss": 0.2804, + "step": 6934 + }, + { + "epoch": 0.32, + "grad_norm": 0.6103029852845783, + "learning_rate": 4.773546108976599e-06, + "loss": 0.299, + "step": 6935 + }, + { + "epoch": 0.32, + "grad_norm": 0.6331213635367405, + "learning_rate": 4.773467230292937e-06, + "loss": 0.3007, + "step": 6936 + }, + { + "epoch": 0.32, + "grad_norm": 0.6333781729832997, + "learning_rate": 4.773388338526044e-06, + "loss": 0.3046, + "step": 6937 + }, + { + "epoch": 0.33, + "grad_norm": 0.689127575854476, + "learning_rate": 4.773309433676372e-06, + "loss": 0.3185, + "step": 6938 + }, + { + "epoch": 0.33, + "grad_norm": 0.6285071409110452, + "learning_rate": 4.773230515744376e-06, + "loss": 0.2891, + "step": 6939 + }, + { + "epoch": 0.33, + "grad_norm": 0.5430985266581835, + "learning_rate": 4.773151584730509e-06, + "loss": 0.2765, + "step": 6940 + }, + { + "epoch": 0.33, + "grad_norm": 0.6146998309589403, + "learning_rate": 4.773072640635226e-06, + "loss": 0.3027, + "step": 6941 + }, + { + "epoch": 0.33, + "grad_norm": 0.6547561497840626, + "learning_rate": 4.772993683458982e-06, + "loss": 0.3077, + "step": 6942 + }, + { + "epoch": 0.33, + "grad_norm": 0.6614887298609512, + "learning_rate": 4.77291471320223e-06, + "loss": 0.3233, + "step": 6943 + }, + { + "epoch": 0.33, + "grad_norm": 0.6349677893471269, + "learning_rate": 4.772835729865426e-06, + "loss": 0.3152, + "step": 6944 + }, + { + "epoch": 0.33, + "grad_norm": 0.6414363714539312, + "learning_rate": 4.772756733449023e-06, + "loss": 0.2815, + "step": 6945 + }, + { + "epoch": 0.33, + "grad_norm": 0.6393520009670259, + "learning_rate": 4.772677723953476e-06, + "loss": 0.2948, + "step": 6946 + }, + { + "epoch": 0.33, + "grad_norm": 0.6400241212965291, + "learning_rate": 4.7725987013792405e-06, + "loss": 0.2983, + "step": 6947 + }, + { + "epoch": 0.33, + "grad_norm": 0.617667135703893, + "learning_rate": 4.7725196657267705e-06, + "loss": 0.2918, + "step": 6948 + }, + { + "epoch": 0.33, + "grad_norm": 0.6106337430683656, + "learning_rate": 4.77244061699652e-06, + "loss": 0.278, + "step": 6949 + }, + { + "epoch": 0.33, + "grad_norm": 0.6179652106143176, + "learning_rate": 4.772361555188947e-06, + "loss": 0.3213, + "step": 6950 + }, + { + "epoch": 0.33, + "grad_norm": 0.628504829009979, + "learning_rate": 4.772282480304502e-06, + "loss": 0.2942, + "step": 6951 + }, + { + "epoch": 0.33, + "grad_norm": 0.6094614363109827, + "learning_rate": 4.772203392343644e-06, + "loss": 0.2898, + "step": 6952 + }, + { + "epoch": 0.33, + "grad_norm": 0.6165117983329105, + "learning_rate": 4.772124291306826e-06, + "loss": 0.2935, + "step": 6953 + }, + { + "epoch": 0.33, + "grad_norm": 0.5977317405396716, + "learning_rate": 4.772045177194504e-06, + "loss": 0.3003, + "step": 6954 + }, + { + "epoch": 0.33, + "grad_norm": 0.6206767751193423, + "learning_rate": 4.771966050007133e-06, + "loss": 0.296, + "step": 6955 + }, + { + "epoch": 0.33, + "grad_norm": 0.5829626062331945, + "learning_rate": 4.771886909745168e-06, + "loss": 0.2804, + "step": 6956 + }, + { + "epoch": 0.33, + "grad_norm": 0.626698729970327, + "learning_rate": 4.771807756409066e-06, + "loss": 0.2963, + "step": 6957 + }, + { + "epoch": 0.33, + "grad_norm": 0.6273496339539957, + "learning_rate": 4.77172858999928e-06, + "loss": 0.2951, + "step": 6958 + }, + { + "epoch": 0.33, + "grad_norm": 0.5584716502760156, + "learning_rate": 4.771649410516268e-06, + "loss": 0.2731, + "step": 6959 + }, + { + "epoch": 0.33, + "grad_norm": 0.5975724842683257, + "learning_rate": 4.771570217960484e-06, + "loss": 0.2839, + "step": 6960 + }, + { + "epoch": 0.33, + "grad_norm": 0.646160584812177, + "learning_rate": 4.771491012332384e-06, + "loss": 0.3042, + "step": 6961 + }, + { + "epoch": 0.33, + "grad_norm": 0.625503959381413, + "learning_rate": 4.771411793632425e-06, + "loss": 0.2954, + "step": 6962 + }, + { + "epoch": 0.33, + "grad_norm": 0.6620967667237448, + "learning_rate": 4.771332561861062e-06, + "loss": 0.2989, + "step": 6963 + }, + { + "epoch": 0.33, + "grad_norm": 0.6247014197879976, + "learning_rate": 4.77125331701875e-06, + "loss": 0.3011, + "step": 6964 + }, + { + "epoch": 0.33, + "grad_norm": 0.630884954104319, + "learning_rate": 4.771174059105947e-06, + "loss": 0.3058, + "step": 6965 + }, + { + "epoch": 0.33, + "grad_norm": 0.6639847541607893, + "learning_rate": 4.771094788123108e-06, + "loss": 0.3212, + "step": 6966 + }, + { + "epoch": 0.33, + "grad_norm": 0.5863160705535437, + "learning_rate": 4.771015504070689e-06, + "loss": 0.2874, + "step": 6967 + }, + { + "epoch": 0.33, + "grad_norm": 0.6517866284635684, + "learning_rate": 4.770936206949147e-06, + "loss": 0.3089, + "step": 6968 + }, + { + "epoch": 0.33, + "grad_norm": 0.5823336246306774, + "learning_rate": 4.770856896758937e-06, + "loss": 0.2988, + "step": 6969 + }, + { + "epoch": 0.33, + "grad_norm": 0.631190755788091, + "learning_rate": 4.770777573500517e-06, + "loss": 0.274, + "step": 6970 + }, + { + "epoch": 0.33, + "grad_norm": 0.6372886385746989, + "learning_rate": 4.7706982371743424e-06, + "loss": 0.29, + "step": 6971 + }, + { + "epoch": 0.33, + "grad_norm": 0.610338425093599, + "learning_rate": 4.770618887780871e-06, + "loss": 0.2991, + "step": 6972 + }, + { + "epoch": 0.33, + "grad_norm": 0.570774317194474, + "learning_rate": 4.7705395253205575e-06, + "loss": 0.2548, + "step": 6973 + }, + { + "epoch": 0.33, + "grad_norm": 0.6829429987651153, + "learning_rate": 4.770460149793861e-06, + "loss": 0.3034, + "step": 6974 + }, + { + "epoch": 0.33, + "grad_norm": 0.6350591559232837, + "learning_rate": 4.7703807612012365e-06, + "loss": 0.2844, + "step": 6975 + }, + { + "epoch": 0.33, + "grad_norm": 0.6375107914960665, + "learning_rate": 4.770301359543141e-06, + "loss": 0.2982, + "step": 6976 + }, + { + "epoch": 0.33, + "grad_norm": 0.6000690135121659, + "learning_rate": 4.770221944820032e-06, + "loss": 0.2791, + "step": 6977 + }, + { + "epoch": 0.33, + "grad_norm": 0.6110313827078676, + "learning_rate": 4.770142517032365e-06, + "loss": 0.3248, + "step": 6978 + }, + { + "epoch": 0.33, + "grad_norm": 0.5923127986847405, + "learning_rate": 4.770063076180601e-06, + "loss": 0.2777, + "step": 6979 + }, + { + "epoch": 0.33, + "grad_norm": 0.637572786575788, + "learning_rate": 4.769983622265193e-06, + "loss": 0.2966, + "step": 6980 + }, + { + "epoch": 0.33, + "grad_norm": 0.5628244087563559, + "learning_rate": 4.7699041552866e-06, + "loss": 0.2739, + "step": 6981 + }, + { + "epoch": 0.33, + "grad_norm": 0.6102094880790113, + "learning_rate": 4.769824675245279e-06, + "loss": 0.2847, + "step": 6982 + }, + { + "epoch": 0.33, + "grad_norm": 0.6267471792814153, + "learning_rate": 4.7697451821416875e-06, + "loss": 0.3052, + "step": 6983 + }, + { + "epoch": 0.33, + "grad_norm": 0.6639817912069355, + "learning_rate": 4.769665675976283e-06, + "loss": 0.3216, + "step": 6984 + }, + { + "epoch": 0.33, + "grad_norm": 0.631743765829883, + "learning_rate": 4.769586156749523e-06, + "loss": 0.2957, + "step": 6985 + }, + { + "epoch": 0.33, + "grad_norm": 0.5802457443106747, + "learning_rate": 4.769506624461865e-06, + "loss": 0.2862, + "step": 6986 + }, + { + "epoch": 0.33, + "grad_norm": 0.6292770534141418, + "learning_rate": 4.769427079113767e-06, + "loss": 0.2942, + "step": 6987 + }, + { + "epoch": 0.33, + "grad_norm": 0.6228761372200878, + "learning_rate": 4.769347520705687e-06, + "loss": 0.3051, + "step": 6988 + }, + { + "epoch": 0.33, + "grad_norm": 0.6858461165556816, + "learning_rate": 4.769267949238081e-06, + "loss": 0.3061, + "step": 6989 + }, + { + "epoch": 0.33, + "grad_norm": 0.64110446593106, + "learning_rate": 4.769188364711409e-06, + "loss": 0.2947, + "step": 6990 + }, + { + "epoch": 0.33, + "grad_norm": 0.6521783161525703, + "learning_rate": 4.769108767126129e-06, + "loss": 0.2964, + "step": 6991 + }, + { + "epoch": 0.33, + "grad_norm": 0.5711400321563834, + "learning_rate": 4.769029156482698e-06, + "loss": 0.2909, + "step": 6992 + }, + { + "epoch": 0.33, + "grad_norm": 0.624535116429599, + "learning_rate": 4.768949532781574e-06, + "loss": 0.3259, + "step": 6993 + }, + { + "epoch": 0.33, + "grad_norm": 0.6391456828455228, + "learning_rate": 4.768869896023217e-06, + "loss": 0.324, + "step": 6994 + }, + { + "epoch": 0.33, + "grad_norm": 0.7088403673153436, + "learning_rate": 4.7687902462080825e-06, + "loss": 0.3029, + "step": 6995 + }, + { + "epoch": 0.33, + "grad_norm": 0.5925343119926854, + "learning_rate": 4.768710583336631e-06, + "loss": 0.319, + "step": 6996 + }, + { + "epoch": 0.33, + "grad_norm": 0.5741227441861887, + "learning_rate": 4.768630907409321e-06, + "loss": 0.2783, + "step": 6997 + }, + { + "epoch": 0.33, + "grad_norm": 0.6211042834938812, + "learning_rate": 4.768551218426609e-06, + "loss": 0.3071, + "step": 6998 + }, + { + "epoch": 0.33, + "grad_norm": 0.5857725115224344, + "learning_rate": 4.768471516388955e-06, + "loss": 0.2897, + "step": 6999 + }, + { + "epoch": 0.33, + "grad_norm": 0.6507531252397484, + "learning_rate": 4.768391801296819e-06, + "loss": 0.295, + "step": 7000 + }, + { + "epoch": 0.33, + "grad_norm": 0.6486294894327876, + "learning_rate": 4.768312073150657e-06, + "loss": 0.3161, + "step": 7001 + }, + { + "epoch": 0.33, + "grad_norm": 0.5655086992257424, + "learning_rate": 4.768232331950929e-06, + "loss": 0.2799, + "step": 7002 + }, + { + "epoch": 0.33, + "grad_norm": 0.6138823370519436, + "learning_rate": 4.7681525776980955e-06, + "loss": 0.2979, + "step": 7003 + }, + { + "epoch": 0.33, + "grad_norm": 0.6248059422985991, + "learning_rate": 4.768072810392613e-06, + "loss": 0.2997, + "step": 7004 + }, + { + "epoch": 0.33, + "grad_norm": 0.5929748459874332, + "learning_rate": 4.767993030034941e-06, + "loss": 0.3053, + "step": 7005 + }, + { + "epoch": 0.33, + "grad_norm": 0.6144842918219847, + "learning_rate": 4.76791323662554e-06, + "loss": 0.2916, + "step": 7006 + }, + { + "epoch": 0.33, + "grad_norm": 0.5730485678665298, + "learning_rate": 4.767833430164868e-06, + "loss": 0.2793, + "step": 7007 + }, + { + "epoch": 0.33, + "grad_norm": 0.6006790526520538, + "learning_rate": 4.767753610653385e-06, + "loss": 0.2979, + "step": 7008 + }, + { + "epoch": 0.33, + "grad_norm": 0.5871091306347785, + "learning_rate": 4.7676737780915495e-06, + "loss": 0.2639, + "step": 7009 + }, + { + "epoch": 0.33, + "grad_norm": 0.6289315774664356, + "learning_rate": 4.767593932479822e-06, + "loss": 0.2678, + "step": 7010 + }, + { + "epoch": 0.33, + "grad_norm": 0.5911125269627426, + "learning_rate": 4.76751407381866e-06, + "loss": 0.2872, + "step": 7011 + }, + { + "epoch": 0.33, + "grad_norm": 0.5454318481432804, + "learning_rate": 4.767434202108527e-06, + "loss": 0.2828, + "step": 7012 + }, + { + "epoch": 0.33, + "grad_norm": 0.6251924110309678, + "learning_rate": 4.767354317349877e-06, + "loss": 0.2927, + "step": 7013 + }, + { + "epoch": 0.33, + "grad_norm": 0.6185054906863586, + "learning_rate": 4.767274419543174e-06, + "loss": 0.2877, + "step": 7014 + }, + { + "epoch": 0.33, + "grad_norm": 0.6310532806394057, + "learning_rate": 4.767194508688877e-06, + "loss": 0.308, + "step": 7015 + }, + { + "epoch": 0.33, + "grad_norm": 0.5983710625465795, + "learning_rate": 4.767114584787446e-06, + "loss": 0.2961, + "step": 7016 + }, + { + "epoch": 0.33, + "grad_norm": 0.5979639018269131, + "learning_rate": 4.767034647839339e-06, + "loss": 0.299, + "step": 7017 + }, + { + "epoch": 0.33, + "grad_norm": 0.642577902080583, + "learning_rate": 4.766954697845018e-06, + "loss": 0.2896, + "step": 7018 + }, + { + "epoch": 0.33, + "grad_norm": 0.587816445646305, + "learning_rate": 4.766874734804942e-06, + "loss": 0.3052, + "step": 7019 + }, + { + "epoch": 0.33, + "grad_norm": 0.578909654006269, + "learning_rate": 4.766794758719572e-06, + "loss": 0.277, + "step": 7020 + }, + { + "epoch": 0.33, + "grad_norm": 0.5638104704251016, + "learning_rate": 4.766714769589368e-06, + "loss": 0.2835, + "step": 7021 + }, + { + "epoch": 0.33, + "grad_norm": 0.6432167448510111, + "learning_rate": 4.76663476741479e-06, + "loss": 0.291, + "step": 7022 + }, + { + "epoch": 0.33, + "grad_norm": 0.6129462367405053, + "learning_rate": 4.7665547521962995e-06, + "loss": 0.3091, + "step": 7023 + }, + { + "epoch": 0.33, + "grad_norm": 0.5744364677373599, + "learning_rate": 4.766474723934356e-06, + "loss": 0.28, + "step": 7024 + }, + { + "epoch": 0.33, + "grad_norm": 0.6194023652930074, + "learning_rate": 4.766394682629419e-06, + "loss": 0.2869, + "step": 7025 + }, + { + "epoch": 0.33, + "grad_norm": 0.6599919618100214, + "learning_rate": 4.766314628281951e-06, + "loss": 0.3135, + "step": 7026 + }, + { + "epoch": 0.33, + "grad_norm": 0.7109787413686554, + "learning_rate": 4.766234560892411e-06, + "loss": 0.3167, + "step": 7027 + }, + { + "epoch": 0.33, + "grad_norm": 0.6156930881914601, + "learning_rate": 4.766154480461261e-06, + "loss": 0.2838, + "step": 7028 + }, + { + "epoch": 0.33, + "grad_norm": 0.6341648880815232, + "learning_rate": 4.766074386988963e-06, + "loss": 0.311, + "step": 7029 + }, + { + "epoch": 0.33, + "grad_norm": 0.5839632671065301, + "learning_rate": 4.765994280475975e-06, + "loss": 0.2805, + "step": 7030 + }, + { + "epoch": 0.33, + "grad_norm": 0.5748894857111683, + "learning_rate": 4.76591416092276e-06, + "loss": 0.2998, + "step": 7031 + }, + { + "epoch": 0.33, + "grad_norm": 0.6109018363901091, + "learning_rate": 4.765834028329778e-06, + "loss": 0.2777, + "step": 7032 + }, + { + "epoch": 0.33, + "grad_norm": 0.7030136764840285, + "learning_rate": 4.765753882697491e-06, + "loss": 0.2975, + "step": 7033 + }, + { + "epoch": 0.33, + "grad_norm": 0.6016574760807887, + "learning_rate": 4.7656737240263604e-06, + "loss": 0.2906, + "step": 7034 + }, + { + "epoch": 0.33, + "grad_norm": 0.5727721953281582, + "learning_rate": 4.765593552316846e-06, + "loss": 0.2931, + "step": 7035 + }, + { + "epoch": 0.33, + "grad_norm": 0.6503213741479242, + "learning_rate": 4.765513367569411e-06, + "loss": 0.2901, + "step": 7036 + }, + { + "epoch": 0.33, + "grad_norm": 0.6563638381041748, + "learning_rate": 4.765433169784516e-06, + "loss": 0.2963, + "step": 7037 + }, + { + "epoch": 0.33, + "grad_norm": 0.6142654420285537, + "learning_rate": 4.7653529589626216e-06, + "loss": 0.273, + "step": 7038 + }, + { + "epoch": 0.33, + "grad_norm": 0.6426377536804755, + "learning_rate": 4.765272735104191e-06, + "loss": 0.2791, + "step": 7039 + }, + { + "epoch": 0.33, + "grad_norm": 0.6158822635235748, + "learning_rate": 4.765192498209685e-06, + "loss": 0.2846, + "step": 7040 + }, + { + "epoch": 0.33, + "grad_norm": 0.6211366358409897, + "learning_rate": 4.765112248279566e-06, + "loss": 0.286, + "step": 7041 + }, + { + "epoch": 0.33, + "grad_norm": 0.5906989032423492, + "learning_rate": 4.7650319853142945e-06, + "loss": 0.2931, + "step": 7042 + }, + { + "epoch": 0.33, + "grad_norm": 0.6158503703020657, + "learning_rate": 4.764951709314333e-06, + "loss": 0.2974, + "step": 7043 + }, + { + "epoch": 0.33, + "grad_norm": 0.55617524643091, + "learning_rate": 4.764871420280145e-06, + "loss": 0.2808, + "step": 7044 + }, + { + "epoch": 0.33, + "grad_norm": 0.5861922644095505, + "learning_rate": 4.76479111821219e-06, + "loss": 0.2835, + "step": 7045 + }, + { + "epoch": 0.33, + "grad_norm": 0.6668030924333569, + "learning_rate": 4.7647108031109315e-06, + "loss": 0.3008, + "step": 7046 + }, + { + "epoch": 0.33, + "grad_norm": 0.6188127217332317, + "learning_rate": 4.764630474976833e-06, + "loss": 0.2805, + "step": 7047 + }, + { + "epoch": 0.33, + "grad_norm": 0.6243068378833373, + "learning_rate": 4.764550133810353e-06, + "loss": 0.2945, + "step": 7048 + }, + { + "epoch": 0.33, + "grad_norm": 0.6343223404640562, + "learning_rate": 4.764469779611958e-06, + "loss": 0.3059, + "step": 7049 + }, + { + "epoch": 0.33, + "grad_norm": 0.6018067375605, + "learning_rate": 4.764389412382107e-06, + "loss": 0.2851, + "step": 7050 + }, + { + "epoch": 0.33, + "grad_norm": 0.5614419054979537, + "learning_rate": 4.7643090321212655e-06, + "loss": 0.2743, + "step": 7051 + }, + { + "epoch": 0.33, + "grad_norm": 0.5832417097764869, + "learning_rate": 4.764228638829894e-06, + "loss": 0.2984, + "step": 7052 + }, + { + "epoch": 0.33, + "grad_norm": 0.6122644028428802, + "learning_rate": 4.7641482325084556e-06, + "loss": 0.3008, + "step": 7053 + }, + { + "epoch": 0.33, + "grad_norm": 0.5858916995376349, + "learning_rate": 4.764067813157413e-06, + "loss": 0.3002, + "step": 7054 + }, + { + "epoch": 0.33, + "grad_norm": 0.61990618770254, + "learning_rate": 4.76398738077723e-06, + "loss": 0.2848, + "step": 7055 + }, + { + "epoch": 0.33, + "grad_norm": 0.6697728170249563, + "learning_rate": 4.763906935368368e-06, + "loss": 0.3159, + "step": 7056 + }, + { + "epoch": 0.33, + "grad_norm": 0.6018178530278753, + "learning_rate": 4.7638264769312915e-06, + "loss": 0.2968, + "step": 7057 + }, + { + "epoch": 0.33, + "grad_norm": 0.6262265844622066, + "learning_rate": 4.763746005466462e-06, + "loss": 0.2981, + "step": 7058 + }, + { + "epoch": 0.33, + "grad_norm": 0.6111233825369398, + "learning_rate": 4.763665520974343e-06, + "loss": 0.2703, + "step": 7059 + }, + { + "epoch": 0.33, + "grad_norm": 0.6579616224506106, + "learning_rate": 4.763585023455398e-06, + "loss": 0.2874, + "step": 7060 + }, + { + "epoch": 0.33, + "grad_norm": 0.662005296592419, + "learning_rate": 4.763504512910091e-06, + "loss": 0.3076, + "step": 7061 + }, + { + "epoch": 0.33, + "grad_norm": 0.6322256836776048, + "learning_rate": 4.763423989338883e-06, + "loss": 0.3143, + "step": 7062 + }, + { + "epoch": 0.33, + "grad_norm": 0.5362249080614689, + "learning_rate": 4.763343452742239e-06, + "loss": 0.2672, + "step": 7063 + }, + { + "epoch": 0.33, + "grad_norm": 0.6448306375904549, + "learning_rate": 4.763262903120624e-06, + "loss": 0.3125, + "step": 7064 + }, + { + "epoch": 0.33, + "grad_norm": 0.5758701164409384, + "learning_rate": 4.763182340474498e-06, + "loss": 0.2769, + "step": 7065 + }, + { + "epoch": 0.33, + "grad_norm": 0.5672727706618069, + "learning_rate": 4.7631017648043275e-06, + "loss": 0.299, + "step": 7066 + }, + { + "epoch": 0.33, + "grad_norm": 0.6067629765679432, + "learning_rate": 4.763021176110575e-06, + "loss": 0.2867, + "step": 7067 + }, + { + "epoch": 0.33, + "grad_norm": 0.6161864975384377, + "learning_rate": 4.762940574393703e-06, + "loss": 0.2978, + "step": 7068 + }, + { + "epoch": 0.33, + "grad_norm": 0.610901313389123, + "learning_rate": 4.762859959654179e-06, + "loss": 0.2689, + "step": 7069 + }, + { + "epoch": 0.33, + "grad_norm": 0.7553162863863713, + "learning_rate": 4.762779331892463e-06, + "loss": 0.3071, + "step": 7070 + }, + { + "epoch": 0.33, + "grad_norm": 0.5885878935552987, + "learning_rate": 4.762698691109021e-06, + "loss": 0.2817, + "step": 7071 + }, + { + "epoch": 0.33, + "grad_norm": 0.6078252916828982, + "learning_rate": 4.762618037304317e-06, + "loss": 0.2979, + "step": 7072 + }, + { + "epoch": 0.33, + "grad_norm": 0.6153763724860114, + "learning_rate": 4.762537370478815e-06, + "loss": 0.3025, + "step": 7073 + }, + { + "epoch": 0.33, + "grad_norm": 0.6238580014374452, + "learning_rate": 4.762456690632979e-06, + "loss": 0.3015, + "step": 7074 + }, + { + "epoch": 0.33, + "grad_norm": 0.687016877673287, + "learning_rate": 4.762375997767273e-06, + "loss": 0.308, + "step": 7075 + }, + { + "epoch": 0.33, + "grad_norm": 0.6803545451005307, + "learning_rate": 4.762295291882163e-06, + "loss": 0.2978, + "step": 7076 + }, + { + "epoch": 0.33, + "grad_norm": 0.6476542692974647, + "learning_rate": 4.762214572978111e-06, + "loss": 0.2823, + "step": 7077 + }, + { + "epoch": 0.33, + "grad_norm": 0.6502421929911508, + "learning_rate": 4.762133841055583e-06, + "loss": 0.3002, + "step": 7078 + }, + { + "epoch": 0.33, + "grad_norm": 0.6795767773985185, + "learning_rate": 4.762053096115044e-06, + "loss": 0.3252, + "step": 7079 + }, + { + "epoch": 0.33, + "grad_norm": 0.6711201217165709, + "learning_rate": 4.7619723381569575e-06, + "loss": 0.3054, + "step": 7080 + }, + { + "epoch": 0.33, + "grad_norm": 0.6849784779701654, + "learning_rate": 4.761891567181788e-06, + "loss": 0.318, + "step": 7081 + }, + { + "epoch": 0.33, + "grad_norm": 0.6644916818637296, + "learning_rate": 4.761810783190002e-06, + "loss": 0.3126, + "step": 7082 + }, + { + "epoch": 0.33, + "grad_norm": 0.6290289381771458, + "learning_rate": 4.761729986182063e-06, + "loss": 0.2805, + "step": 7083 + }, + { + "epoch": 0.33, + "grad_norm": 0.6780456774596239, + "learning_rate": 4.761649176158436e-06, + "loss": 0.3273, + "step": 7084 + }, + { + "epoch": 0.33, + "grad_norm": 0.5840186386416225, + "learning_rate": 4.761568353119587e-06, + "loss": 0.2822, + "step": 7085 + }, + { + "epoch": 0.33, + "grad_norm": 0.5863102408046682, + "learning_rate": 4.76148751706598e-06, + "loss": 0.2846, + "step": 7086 + }, + { + "epoch": 0.33, + "grad_norm": 0.627777346174055, + "learning_rate": 4.761406667998082e-06, + "loss": 0.304, + "step": 7087 + }, + { + "epoch": 0.33, + "grad_norm": 0.60491242365142, + "learning_rate": 4.761325805916356e-06, + "loss": 0.2707, + "step": 7088 + }, + { + "epoch": 0.33, + "grad_norm": 0.5840242808683851, + "learning_rate": 4.761244930821268e-06, + "loss": 0.2873, + "step": 7089 + }, + { + "epoch": 0.33, + "grad_norm": 0.6546114673676194, + "learning_rate": 4.761164042713284e-06, + "loss": 0.3264, + "step": 7090 + }, + { + "epoch": 0.33, + "grad_norm": 0.599051051001699, + "learning_rate": 4.76108314159287e-06, + "loss": 0.3099, + "step": 7091 + }, + { + "epoch": 0.33, + "grad_norm": 0.5886445748978977, + "learning_rate": 4.761002227460491e-06, + "loss": 0.2875, + "step": 7092 + }, + { + "epoch": 0.33, + "grad_norm": 0.6305027965813983, + "learning_rate": 4.760921300316611e-06, + "loss": 0.2901, + "step": 7093 + }, + { + "epoch": 0.33, + "grad_norm": 0.6285455947455879, + "learning_rate": 4.760840360161698e-06, + "loss": 0.2763, + "step": 7094 + }, + { + "epoch": 0.33, + "grad_norm": 0.6129587651114137, + "learning_rate": 4.760759406996217e-06, + "loss": 0.2788, + "step": 7095 + }, + { + "epoch": 0.33, + "grad_norm": 0.5584969309480788, + "learning_rate": 4.760678440820634e-06, + "loss": 0.2725, + "step": 7096 + }, + { + "epoch": 0.33, + "grad_norm": 0.5800539720831643, + "learning_rate": 4.760597461635414e-06, + "loss": 0.2644, + "step": 7097 + }, + { + "epoch": 0.33, + "grad_norm": 0.6237134845370319, + "learning_rate": 4.760516469441025e-06, + "loss": 0.2868, + "step": 7098 + }, + { + "epoch": 0.33, + "grad_norm": 0.6773897850985493, + "learning_rate": 4.760435464237932e-06, + "loss": 0.2907, + "step": 7099 + }, + { + "epoch": 0.33, + "grad_norm": 0.5868304635383637, + "learning_rate": 4.7603544460266e-06, + "loss": 0.2873, + "step": 7100 + }, + { + "epoch": 0.33, + "grad_norm": 0.6124479613264049, + "learning_rate": 4.7602734148074955e-06, + "loss": 0.2814, + "step": 7101 + }, + { + "epoch": 0.33, + "grad_norm": 0.5999626190576688, + "learning_rate": 4.760192370581087e-06, + "loss": 0.3066, + "step": 7102 + }, + { + "epoch": 0.33, + "grad_norm": 1.1239927609798785, + "learning_rate": 4.760111313347839e-06, + "loss": 0.2916, + "step": 7103 + }, + { + "epoch": 0.33, + "grad_norm": 0.6014176527776756, + "learning_rate": 4.760030243108219e-06, + "loss": 0.2865, + "step": 7104 + }, + { + "epoch": 0.33, + "grad_norm": 0.6285056515279009, + "learning_rate": 4.759949159862693e-06, + "loss": 0.3044, + "step": 7105 + }, + { + "epoch": 0.33, + "grad_norm": 0.6201244189609648, + "learning_rate": 4.759868063611727e-06, + "loss": 0.2648, + "step": 7106 + }, + { + "epoch": 0.33, + "grad_norm": 0.6864507874251349, + "learning_rate": 4.759786954355788e-06, + "loss": 0.3174, + "step": 7107 + }, + { + "epoch": 0.33, + "grad_norm": 0.5998908108846531, + "learning_rate": 4.759705832095344e-06, + "loss": 0.2889, + "step": 7108 + }, + { + "epoch": 0.33, + "grad_norm": 0.6296268639018545, + "learning_rate": 4.7596246968308605e-06, + "loss": 0.2873, + "step": 7109 + }, + { + "epoch": 0.33, + "grad_norm": 0.6853723651532513, + "learning_rate": 4.759543548562805e-06, + "loss": 0.3075, + "step": 7110 + }, + { + "epoch": 0.33, + "grad_norm": 0.649138451319389, + "learning_rate": 4.7594623872916436e-06, + "loss": 0.2996, + "step": 7111 + }, + { + "epoch": 0.33, + "grad_norm": 0.595214217727851, + "learning_rate": 4.7593812130178445e-06, + "loss": 0.2857, + "step": 7112 + }, + { + "epoch": 0.33, + "grad_norm": 0.6815890342315153, + "learning_rate": 4.7593000257418745e-06, + "loss": 0.3168, + "step": 7113 + }, + { + "epoch": 0.33, + "grad_norm": 0.6441090180352892, + "learning_rate": 4.7592188254642e-06, + "loss": 0.2875, + "step": 7114 + }, + { + "epoch": 0.33, + "grad_norm": 0.6478353901669629, + "learning_rate": 4.75913761218529e-06, + "loss": 0.2762, + "step": 7115 + }, + { + "epoch": 0.33, + "grad_norm": 0.5794174267287522, + "learning_rate": 4.759056385905611e-06, + "loss": 0.2807, + "step": 7116 + }, + { + "epoch": 0.33, + "grad_norm": 0.6422204047817497, + "learning_rate": 4.7589751466256295e-06, + "loss": 0.2963, + "step": 7117 + }, + { + "epoch": 0.33, + "grad_norm": 0.5869346405260522, + "learning_rate": 4.758893894345814e-06, + "loss": 0.298, + "step": 7118 + }, + { + "epoch": 0.33, + "grad_norm": 0.6520441119038152, + "learning_rate": 4.758812629066631e-06, + "loss": 0.3044, + "step": 7119 + }, + { + "epoch": 0.33, + "grad_norm": 0.5590673698603426, + "learning_rate": 4.758731350788551e-06, + "loss": 0.281, + "step": 7120 + }, + { + "epoch": 0.33, + "grad_norm": 0.6592953679144379, + "learning_rate": 4.758650059512038e-06, + "loss": 0.2989, + "step": 7121 + }, + { + "epoch": 0.33, + "grad_norm": 0.6057467424398495, + "learning_rate": 4.758568755237562e-06, + "loss": 0.3233, + "step": 7122 + }, + { + "epoch": 0.33, + "grad_norm": 0.5733130789744583, + "learning_rate": 4.7584874379655925e-06, + "loss": 0.287, + "step": 7123 + }, + { + "epoch": 0.33, + "grad_norm": 0.6376742635815434, + "learning_rate": 4.7584061076965926e-06, + "loss": 0.3147, + "step": 7124 + }, + { + "epoch": 0.33, + "grad_norm": 0.5898143422396902, + "learning_rate": 4.758324764431035e-06, + "loss": 0.2991, + "step": 7125 + }, + { + "epoch": 0.33, + "grad_norm": 0.6240396045206196, + "learning_rate": 4.758243408169385e-06, + "loss": 0.2913, + "step": 7126 + }, + { + "epoch": 0.33, + "grad_norm": 0.5672514428013556, + "learning_rate": 4.7581620389121115e-06, + "loss": 0.269, + "step": 7127 + }, + { + "epoch": 0.33, + "grad_norm": 0.5991488735740395, + "learning_rate": 4.758080656659684e-06, + "loss": 0.3017, + "step": 7128 + }, + { + "epoch": 0.33, + "grad_norm": 0.6123608971168918, + "learning_rate": 4.75799926141257e-06, + "loss": 0.2883, + "step": 7129 + }, + { + "epoch": 0.33, + "grad_norm": 0.5919607754715226, + "learning_rate": 4.757917853171237e-06, + "loss": 0.2911, + "step": 7130 + }, + { + "epoch": 0.33, + "grad_norm": 0.6535304728547953, + "learning_rate": 4.7578364319361545e-06, + "loss": 0.306, + "step": 7131 + }, + { + "epoch": 0.33, + "grad_norm": 0.5999261838463317, + "learning_rate": 4.757754997707791e-06, + "loss": 0.2855, + "step": 7132 + }, + { + "epoch": 0.33, + "grad_norm": 0.6513761886079614, + "learning_rate": 4.757673550486615e-06, + "loss": 0.2963, + "step": 7133 + }, + { + "epoch": 0.33, + "grad_norm": 0.5968129367525258, + "learning_rate": 4.757592090273095e-06, + "loss": 0.2948, + "step": 7134 + }, + { + "epoch": 0.33, + "grad_norm": 0.6143774367846461, + "learning_rate": 4.7575106170677e-06, + "loss": 0.2822, + "step": 7135 + }, + { + "epoch": 0.33, + "grad_norm": 0.6183053844600567, + "learning_rate": 4.757429130870899e-06, + "loss": 0.2718, + "step": 7136 + }, + { + "epoch": 0.33, + "grad_norm": 0.6116022424362277, + "learning_rate": 4.75734763168316e-06, + "loss": 0.2825, + "step": 7137 + }, + { + "epoch": 0.33, + "grad_norm": 0.6698052039229963, + "learning_rate": 4.757266119504953e-06, + "loss": 0.2974, + "step": 7138 + }, + { + "epoch": 0.33, + "grad_norm": 0.6351806501995143, + "learning_rate": 4.757184594336747e-06, + "loss": 0.2871, + "step": 7139 + }, + { + "epoch": 0.33, + "grad_norm": 0.6544855267118413, + "learning_rate": 4.757103056179012e-06, + "loss": 0.292, + "step": 7140 + }, + { + "epoch": 0.33, + "grad_norm": 0.6039144638427417, + "learning_rate": 4.757021505032214e-06, + "loss": 0.2968, + "step": 7141 + }, + { + "epoch": 0.33, + "grad_norm": 0.6973730468712719, + "learning_rate": 4.756939940896826e-06, + "loss": 0.3176, + "step": 7142 + }, + { + "epoch": 0.33, + "grad_norm": 0.6379533935627639, + "learning_rate": 4.7568583637733165e-06, + "loss": 0.3126, + "step": 7143 + }, + { + "epoch": 0.33, + "grad_norm": 0.6184769182239683, + "learning_rate": 4.756776773662153e-06, + "loss": 0.2837, + "step": 7144 + }, + { + "epoch": 0.33, + "grad_norm": 0.6718833529217535, + "learning_rate": 4.756695170563807e-06, + "loss": 0.3178, + "step": 7145 + }, + { + "epoch": 0.33, + "grad_norm": 0.6291734176122026, + "learning_rate": 4.756613554478747e-06, + "loss": 0.2995, + "step": 7146 + }, + { + "epoch": 0.33, + "grad_norm": 0.6618080495411175, + "learning_rate": 4.756531925407444e-06, + "loss": 0.3046, + "step": 7147 + }, + { + "epoch": 0.33, + "grad_norm": 0.6815547370501628, + "learning_rate": 4.756450283350367e-06, + "loss": 0.2908, + "step": 7148 + }, + { + "epoch": 0.33, + "grad_norm": 0.612940795444423, + "learning_rate": 4.756368628307985e-06, + "loss": 0.282, + "step": 7149 + }, + { + "epoch": 0.33, + "grad_norm": 0.5574265508507142, + "learning_rate": 4.756286960280768e-06, + "loss": 0.2563, + "step": 7150 + }, + { + "epoch": 0.33, + "grad_norm": 0.6149979258408274, + "learning_rate": 4.756205279269188e-06, + "loss": 0.2849, + "step": 7151 + }, + { + "epoch": 0.34, + "grad_norm": 0.6664096281497885, + "learning_rate": 4.756123585273714e-06, + "loss": 0.3201, + "step": 7152 + }, + { + "epoch": 0.34, + "grad_norm": 0.6928019915590308, + "learning_rate": 4.756041878294814e-06, + "loss": 0.3321, + "step": 7153 + }, + { + "epoch": 0.34, + "grad_norm": 0.6493029801826877, + "learning_rate": 4.755960158332961e-06, + "loss": 0.2884, + "step": 7154 + }, + { + "epoch": 0.34, + "grad_norm": 0.6495034204744425, + "learning_rate": 4.755878425388625e-06, + "loss": 0.3054, + "step": 7155 + }, + { + "epoch": 0.34, + "grad_norm": 0.642903029429679, + "learning_rate": 4.755796679462275e-06, + "loss": 0.3101, + "step": 7156 + }, + { + "epoch": 0.34, + "grad_norm": 0.6640352843556018, + "learning_rate": 4.755714920554382e-06, + "loss": 0.3151, + "step": 7157 + }, + { + "epoch": 0.34, + "grad_norm": 0.7013541090844082, + "learning_rate": 4.7556331486654174e-06, + "loss": 0.3063, + "step": 7158 + }, + { + "epoch": 0.34, + "grad_norm": 0.57649035797841, + "learning_rate": 4.75555136379585e-06, + "loss": 0.2551, + "step": 7159 + }, + { + "epoch": 0.34, + "grad_norm": 0.6701092215947037, + "learning_rate": 4.755469565946151e-06, + "loss": 0.3034, + "step": 7160 + }, + { + "epoch": 0.34, + "grad_norm": 0.6362213242018914, + "learning_rate": 4.755387755116792e-06, + "loss": 0.2897, + "step": 7161 + }, + { + "epoch": 0.34, + "grad_norm": 0.6003342195316453, + "learning_rate": 4.755305931308244e-06, + "loss": 0.2966, + "step": 7162 + }, + { + "epoch": 0.34, + "grad_norm": 0.6511564571837568, + "learning_rate": 4.755224094520977e-06, + "loss": 0.2888, + "step": 7163 + }, + { + "epoch": 0.34, + "grad_norm": 0.6844268079345391, + "learning_rate": 4.755142244755462e-06, + "loss": 0.2881, + "step": 7164 + }, + { + "epoch": 0.34, + "grad_norm": 0.6242163341723366, + "learning_rate": 4.75506038201217e-06, + "loss": 0.2617, + "step": 7165 + }, + { + "epoch": 0.34, + "grad_norm": 0.5803786079067867, + "learning_rate": 4.754978506291572e-06, + "loss": 0.2828, + "step": 7166 + }, + { + "epoch": 0.34, + "grad_norm": 0.6499127725502019, + "learning_rate": 4.75489661759414e-06, + "loss": 0.3223, + "step": 7167 + }, + { + "epoch": 0.34, + "grad_norm": 0.5704410708115618, + "learning_rate": 4.754814715920345e-06, + "loss": 0.2731, + "step": 7168 + }, + { + "epoch": 0.34, + "grad_norm": 0.6215680515310124, + "learning_rate": 4.754732801270658e-06, + "loss": 0.2836, + "step": 7169 + }, + { + "epoch": 0.34, + "grad_norm": 0.7028119085972645, + "learning_rate": 4.75465087364555e-06, + "loss": 0.2949, + "step": 7170 + }, + { + "epoch": 0.34, + "grad_norm": 0.598436104604755, + "learning_rate": 4.754568933045493e-06, + "loss": 0.2773, + "step": 7171 + }, + { + "epoch": 0.34, + "grad_norm": 0.6212215532451201, + "learning_rate": 4.754486979470958e-06, + "loss": 0.2933, + "step": 7172 + }, + { + "epoch": 0.34, + "grad_norm": 0.6066040855129653, + "learning_rate": 4.754405012922418e-06, + "loss": 0.2595, + "step": 7173 + }, + { + "epoch": 0.34, + "grad_norm": 0.6520723340971312, + "learning_rate": 4.754323033400343e-06, + "loss": 0.2921, + "step": 7174 + }, + { + "epoch": 0.34, + "grad_norm": 0.9436646207160776, + "learning_rate": 4.754241040905206e-06, + "loss": 0.2701, + "step": 7175 + }, + { + "epoch": 0.34, + "grad_norm": 0.6724493974287007, + "learning_rate": 4.754159035437478e-06, + "loss": 0.314, + "step": 7176 + }, + { + "epoch": 0.34, + "grad_norm": 0.6925249721582137, + "learning_rate": 4.754077016997632e-06, + "loss": 0.3259, + "step": 7177 + }, + { + "epoch": 0.34, + "grad_norm": 0.5956027048879808, + "learning_rate": 4.753994985586139e-06, + "loss": 0.2848, + "step": 7178 + }, + { + "epoch": 0.34, + "grad_norm": 0.6144552683655146, + "learning_rate": 4.7539129412034715e-06, + "loss": 0.2868, + "step": 7179 + }, + { + "epoch": 0.34, + "grad_norm": 0.6063352447249405, + "learning_rate": 4.7538308838501005e-06, + "loss": 0.2947, + "step": 7180 + }, + { + "epoch": 0.34, + "grad_norm": 0.6282360180999127, + "learning_rate": 4.7537488135265e-06, + "loss": 0.2951, + "step": 7181 + }, + { + "epoch": 0.34, + "grad_norm": 0.6556366530914399, + "learning_rate": 4.753666730233142e-06, + "loss": 0.3088, + "step": 7182 + }, + { + "epoch": 0.34, + "grad_norm": 0.7000066455994356, + "learning_rate": 4.7535846339704975e-06, + "loss": 0.2951, + "step": 7183 + }, + { + "epoch": 0.34, + "grad_norm": 0.6661372763780156, + "learning_rate": 4.753502524739041e-06, + "loss": 0.3025, + "step": 7184 + }, + { + "epoch": 0.34, + "grad_norm": 0.6206338570694383, + "learning_rate": 4.7534204025392425e-06, + "loss": 0.29, + "step": 7185 + }, + { + "epoch": 0.34, + "grad_norm": 0.6519517848126124, + "learning_rate": 4.753338267371576e-06, + "loss": 0.2731, + "step": 7186 + }, + { + "epoch": 0.34, + "grad_norm": 0.6114125280146399, + "learning_rate": 4.753256119236516e-06, + "loss": 0.2821, + "step": 7187 + }, + { + "epoch": 0.34, + "grad_norm": 0.6716911641615539, + "learning_rate": 4.753173958134531e-06, + "loss": 0.2899, + "step": 7188 + }, + { + "epoch": 0.34, + "grad_norm": 0.6163981951262699, + "learning_rate": 4.7530917840660975e-06, + "loss": 0.2978, + "step": 7189 + }, + { + "epoch": 0.34, + "grad_norm": 0.6289637870857777, + "learning_rate": 4.753009597031687e-06, + "loss": 0.3005, + "step": 7190 + }, + { + "epoch": 0.34, + "grad_norm": 0.6265897709094497, + "learning_rate": 4.752927397031772e-06, + "loss": 0.2981, + "step": 7191 + }, + { + "epoch": 0.34, + "grad_norm": 0.6211587537157097, + "learning_rate": 4.7528451840668276e-06, + "loss": 0.2921, + "step": 7192 + }, + { + "epoch": 0.34, + "grad_norm": 0.6835521895294715, + "learning_rate": 4.752762958137324e-06, + "loss": 0.3025, + "step": 7193 + }, + { + "epoch": 0.34, + "grad_norm": 0.7018145112595019, + "learning_rate": 4.7526807192437366e-06, + "loss": 0.3158, + "step": 7194 + }, + { + "epoch": 0.34, + "grad_norm": 0.6459319459377256, + "learning_rate": 4.752598467386536e-06, + "loss": 0.2993, + "step": 7195 + }, + { + "epoch": 0.34, + "grad_norm": 0.6011960559177745, + "learning_rate": 4.7525162025662e-06, + "loss": 0.3045, + "step": 7196 + }, + { + "epoch": 0.34, + "grad_norm": 0.6478754058558188, + "learning_rate": 4.752433924783197e-06, + "loss": 0.2853, + "step": 7197 + }, + { + "epoch": 0.34, + "grad_norm": 0.5696136515436422, + "learning_rate": 4.752351634038005e-06, + "loss": 0.2883, + "step": 7198 + }, + { + "epoch": 0.34, + "grad_norm": 0.6559624220890086, + "learning_rate": 4.752269330331094e-06, + "loss": 0.2833, + "step": 7199 + }, + { + "epoch": 0.34, + "grad_norm": 0.6421252809314947, + "learning_rate": 4.75218701366294e-06, + "loss": 0.3032, + "step": 7200 + }, + { + "epoch": 0.34, + "grad_norm": 0.5741540694593871, + "learning_rate": 4.752104684034015e-06, + "loss": 0.292, + "step": 7201 + }, + { + "epoch": 0.34, + "grad_norm": 0.6151840367300169, + "learning_rate": 4.752022341444794e-06, + "loss": 0.2933, + "step": 7202 + }, + { + "epoch": 0.34, + "grad_norm": 0.584992616863506, + "learning_rate": 4.751939985895751e-06, + "loss": 0.2728, + "step": 7203 + }, + { + "epoch": 0.34, + "grad_norm": 0.6066708434766975, + "learning_rate": 4.751857617387358e-06, + "loss": 0.2899, + "step": 7204 + }, + { + "epoch": 0.34, + "grad_norm": 0.6183984545895418, + "learning_rate": 4.75177523592009e-06, + "loss": 0.2875, + "step": 7205 + }, + { + "epoch": 0.34, + "grad_norm": 0.6671255705247428, + "learning_rate": 4.7516928414944245e-06, + "loss": 0.2999, + "step": 7206 + }, + { + "epoch": 0.34, + "grad_norm": 0.6417201542744615, + "learning_rate": 4.751610434110831e-06, + "loss": 0.3127, + "step": 7207 + }, + { + "epoch": 0.34, + "grad_norm": 0.6157300606505982, + "learning_rate": 4.751528013769784e-06, + "loss": 0.285, + "step": 7208 + }, + { + "epoch": 0.34, + "grad_norm": 0.6240382363134814, + "learning_rate": 4.751445580471761e-06, + "loss": 0.3009, + "step": 7209 + }, + { + "epoch": 0.34, + "grad_norm": 0.7321035344611591, + "learning_rate": 4.751363134217234e-06, + "loss": 0.2829, + "step": 7210 + }, + { + "epoch": 0.34, + "grad_norm": 0.5790923856747446, + "learning_rate": 4.751280675006677e-06, + "loss": 0.2929, + "step": 7211 + }, + { + "epoch": 0.34, + "grad_norm": 0.6464711010787253, + "learning_rate": 4.751198202840567e-06, + "loss": 0.3137, + "step": 7212 + }, + { + "epoch": 0.34, + "grad_norm": 0.6034490448056324, + "learning_rate": 4.751115717719377e-06, + "loss": 0.2938, + "step": 7213 + }, + { + "epoch": 0.34, + "grad_norm": 0.6497595745281062, + "learning_rate": 4.751033219643582e-06, + "loss": 0.2878, + "step": 7214 + }, + { + "epoch": 0.34, + "grad_norm": 0.7058118644604822, + "learning_rate": 4.750950708613655e-06, + "loss": 0.3024, + "step": 7215 + }, + { + "epoch": 0.34, + "grad_norm": 0.6642341024960456, + "learning_rate": 4.750868184630074e-06, + "loss": 0.3167, + "step": 7216 + }, + { + "epoch": 0.34, + "grad_norm": 0.6011815557794722, + "learning_rate": 4.750785647693311e-06, + "loss": 0.2898, + "step": 7217 + }, + { + "epoch": 0.34, + "grad_norm": 0.5647072691772882, + "learning_rate": 4.750703097803843e-06, + "loss": 0.2861, + "step": 7218 + }, + { + "epoch": 0.34, + "grad_norm": 0.5669805642152219, + "learning_rate": 4.750620534962146e-06, + "loss": 0.2818, + "step": 7219 + }, + { + "epoch": 0.34, + "grad_norm": 0.7022284012736645, + "learning_rate": 4.7505379591686915e-06, + "loss": 0.3009, + "step": 7220 + }, + { + "epoch": 0.34, + "grad_norm": 0.6724331357004378, + "learning_rate": 4.750455370423958e-06, + "loss": 0.3055, + "step": 7221 + }, + { + "epoch": 0.34, + "grad_norm": 0.7630029830306165, + "learning_rate": 4.750372768728418e-06, + "loss": 0.2773, + "step": 7222 + }, + { + "epoch": 0.34, + "grad_norm": 0.6803883431745448, + "learning_rate": 4.750290154082548e-06, + "loss": 0.3175, + "step": 7223 + }, + { + "epoch": 0.34, + "grad_norm": 0.565525159233524, + "learning_rate": 4.750207526486826e-06, + "loss": 0.2968, + "step": 7224 + }, + { + "epoch": 0.34, + "grad_norm": 0.6817550338660445, + "learning_rate": 4.750124885941724e-06, + "loss": 0.2814, + "step": 7225 + }, + { + "epoch": 0.34, + "grad_norm": 0.6560683061070527, + "learning_rate": 4.750042232447719e-06, + "loss": 0.3171, + "step": 7226 + }, + { + "epoch": 0.34, + "grad_norm": 0.6411275834394, + "learning_rate": 4.749959566005286e-06, + "loss": 0.2739, + "step": 7227 + }, + { + "epoch": 0.34, + "grad_norm": 0.6088689138710041, + "learning_rate": 4.749876886614901e-06, + "loss": 0.2883, + "step": 7228 + }, + { + "epoch": 0.34, + "grad_norm": 0.620780929599305, + "learning_rate": 4.749794194277041e-06, + "loss": 0.3023, + "step": 7229 + }, + { + "epoch": 0.34, + "grad_norm": 0.6275011543426623, + "learning_rate": 4.74971148899218e-06, + "loss": 0.2919, + "step": 7230 + }, + { + "epoch": 0.34, + "grad_norm": 0.5980059753818008, + "learning_rate": 4.749628770760796e-06, + "loss": 0.307, + "step": 7231 + }, + { + "epoch": 0.34, + "grad_norm": 0.5873871123742035, + "learning_rate": 4.7495460395833624e-06, + "loss": 0.2808, + "step": 7232 + }, + { + "epoch": 0.34, + "grad_norm": 0.6010368825560967, + "learning_rate": 4.749463295460358e-06, + "loss": 0.2785, + "step": 7233 + }, + { + "epoch": 0.34, + "grad_norm": 0.5994259188840438, + "learning_rate": 4.749380538392257e-06, + "loss": 0.2742, + "step": 7234 + }, + { + "epoch": 0.34, + "grad_norm": 0.6563591848237115, + "learning_rate": 4.749297768379536e-06, + "loss": 0.2857, + "step": 7235 + }, + { + "epoch": 0.34, + "grad_norm": 0.6227398398514761, + "learning_rate": 4.749214985422672e-06, + "loss": 0.2999, + "step": 7236 + }, + { + "epoch": 0.34, + "grad_norm": 0.6587135998837244, + "learning_rate": 4.749132189522142e-06, + "loss": 0.2932, + "step": 7237 + }, + { + "epoch": 0.34, + "grad_norm": 0.6205032790420216, + "learning_rate": 4.749049380678421e-06, + "loss": 0.2857, + "step": 7238 + }, + { + "epoch": 0.34, + "grad_norm": 0.6430971854151044, + "learning_rate": 4.748966558891986e-06, + "loss": 0.3186, + "step": 7239 + }, + { + "epoch": 0.34, + "grad_norm": 1.037721280947973, + "learning_rate": 4.748883724163313e-06, + "loss": 0.288, + "step": 7240 + }, + { + "epoch": 0.34, + "grad_norm": 0.6531118764574526, + "learning_rate": 4.74880087649288e-06, + "loss": 0.3047, + "step": 7241 + }, + { + "epoch": 0.34, + "grad_norm": 0.5766651167927717, + "learning_rate": 4.748718015881163e-06, + "loss": 0.2883, + "step": 7242 + }, + { + "epoch": 0.34, + "grad_norm": 0.6229171022549717, + "learning_rate": 4.748635142328639e-06, + "loss": 0.2923, + "step": 7243 + }, + { + "epoch": 0.34, + "grad_norm": 0.6125456606219442, + "learning_rate": 4.748552255835786e-06, + "loss": 0.2801, + "step": 7244 + }, + { + "epoch": 0.34, + "grad_norm": 0.6067925197477961, + "learning_rate": 4.748469356403079e-06, + "loss": 0.2808, + "step": 7245 + }, + { + "epoch": 0.34, + "grad_norm": 0.6533486958804539, + "learning_rate": 4.748386444030996e-06, + "loss": 0.2746, + "step": 7246 + }, + { + "epoch": 0.34, + "grad_norm": 0.6164721217163126, + "learning_rate": 4.748303518720014e-06, + "loss": 0.2862, + "step": 7247 + }, + { + "epoch": 0.34, + "grad_norm": 0.6387748752707747, + "learning_rate": 4.748220580470611e-06, + "loss": 0.3091, + "step": 7248 + }, + { + "epoch": 0.34, + "grad_norm": 0.6193593572303298, + "learning_rate": 4.7481376292832626e-06, + "loss": 0.2861, + "step": 7249 + }, + { + "epoch": 0.34, + "grad_norm": 0.6345327636321084, + "learning_rate": 4.748054665158448e-06, + "loss": 0.2859, + "step": 7250 + }, + { + "epoch": 0.34, + "grad_norm": 0.6165447919929902, + "learning_rate": 4.747971688096643e-06, + "loss": 0.3005, + "step": 7251 + }, + { + "epoch": 0.34, + "grad_norm": 0.536563125887946, + "learning_rate": 4.747888698098326e-06, + "loss": 0.2732, + "step": 7252 + }, + { + "epoch": 0.34, + "grad_norm": 0.6108605044877217, + "learning_rate": 4.747805695163976e-06, + "loss": 0.3145, + "step": 7253 + }, + { + "epoch": 0.34, + "grad_norm": 0.6645285284732863, + "learning_rate": 4.747722679294068e-06, + "loss": 0.3006, + "step": 7254 + }, + { + "epoch": 0.34, + "grad_norm": 0.6753541037276439, + "learning_rate": 4.747639650489081e-06, + "loss": 0.3175, + "step": 7255 + }, + { + "epoch": 0.34, + "grad_norm": 0.5921240621141299, + "learning_rate": 4.747556608749493e-06, + "loss": 0.2834, + "step": 7256 + }, + { + "epoch": 0.34, + "grad_norm": 0.6354183641464077, + "learning_rate": 4.747473554075782e-06, + "loss": 0.3078, + "step": 7257 + }, + { + "epoch": 0.34, + "grad_norm": 0.5854180347063513, + "learning_rate": 4.7473904864684245e-06, + "loss": 0.2908, + "step": 7258 + }, + { + "epoch": 0.34, + "grad_norm": 0.577481352757495, + "learning_rate": 4.7473074059279e-06, + "loss": 0.2732, + "step": 7259 + }, + { + "epoch": 0.34, + "grad_norm": 0.6338031435227918, + "learning_rate": 4.747224312454687e-06, + "loss": 0.2908, + "step": 7260 + }, + { + "epoch": 0.34, + "grad_norm": 0.6823575777342201, + "learning_rate": 4.747141206049261e-06, + "loss": 0.2971, + "step": 7261 + }, + { + "epoch": 0.34, + "grad_norm": 0.6103492480374147, + "learning_rate": 4.7470580867121045e-06, + "loss": 0.311, + "step": 7262 + }, + { + "epoch": 0.34, + "grad_norm": 0.6144019581245761, + "learning_rate": 4.746974954443692e-06, + "loss": 0.3024, + "step": 7263 + }, + { + "epoch": 0.34, + "grad_norm": 0.6390827220942267, + "learning_rate": 4.746891809244504e-06, + "loss": 0.2795, + "step": 7264 + }, + { + "epoch": 0.34, + "grad_norm": 0.5759696294591825, + "learning_rate": 4.746808651115018e-06, + "loss": 0.2844, + "step": 7265 + }, + { + "epoch": 0.34, + "grad_norm": 0.6208031736955902, + "learning_rate": 4.7467254800557135e-06, + "loss": 0.2939, + "step": 7266 + }, + { + "epoch": 0.34, + "grad_norm": 0.6500562393228494, + "learning_rate": 4.746642296067068e-06, + "loss": 0.3027, + "step": 7267 + }, + { + "epoch": 0.34, + "grad_norm": 0.6514534898337022, + "learning_rate": 4.746559099149561e-06, + "loss": 0.321, + "step": 7268 + }, + { + "epoch": 0.34, + "grad_norm": 0.6119673334615678, + "learning_rate": 4.746475889303671e-06, + "loss": 0.2987, + "step": 7269 + }, + { + "epoch": 0.34, + "grad_norm": 0.6075930204295305, + "learning_rate": 4.746392666529876e-06, + "loss": 0.2857, + "step": 7270 + }, + { + "epoch": 0.34, + "grad_norm": 0.6428827554912468, + "learning_rate": 4.746309430828657e-06, + "loss": 0.3145, + "step": 7271 + }, + { + "epoch": 0.34, + "grad_norm": 0.6393512348089222, + "learning_rate": 4.746226182200492e-06, + "loss": 0.3014, + "step": 7272 + }, + { + "epoch": 0.34, + "grad_norm": 0.6486437558920397, + "learning_rate": 4.74614292064586e-06, + "loss": 0.3055, + "step": 7273 + }, + { + "epoch": 0.34, + "grad_norm": 0.6439634639385253, + "learning_rate": 4.746059646165239e-06, + "loss": 0.2965, + "step": 7274 + }, + { + "epoch": 0.34, + "grad_norm": 0.6346952117684235, + "learning_rate": 4.745976358759109e-06, + "loss": 0.2862, + "step": 7275 + }, + { + "epoch": 0.34, + "grad_norm": 0.6816563489945843, + "learning_rate": 4.7458930584279504e-06, + "loss": 0.3019, + "step": 7276 + }, + { + "epoch": 0.34, + "grad_norm": 0.5846872169921644, + "learning_rate": 4.745809745172242e-06, + "loss": 0.2947, + "step": 7277 + }, + { + "epoch": 0.34, + "grad_norm": 0.6603056737979823, + "learning_rate": 4.745726418992463e-06, + "loss": 0.2954, + "step": 7278 + }, + { + "epoch": 0.34, + "grad_norm": 0.6040385037589162, + "learning_rate": 4.7456430798890915e-06, + "loss": 0.2729, + "step": 7279 + }, + { + "epoch": 0.34, + "grad_norm": 0.586916076483442, + "learning_rate": 4.74555972786261e-06, + "loss": 0.2911, + "step": 7280 + }, + { + "epoch": 0.34, + "grad_norm": 0.592004202749219, + "learning_rate": 4.7454763629134955e-06, + "loss": 0.3107, + "step": 7281 + }, + { + "epoch": 0.34, + "grad_norm": 0.6756336056878958, + "learning_rate": 4.7453929850422294e-06, + "loss": 0.2947, + "step": 7282 + }, + { + "epoch": 0.34, + "grad_norm": 0.6452581113801746, + "learning_rate": 4.745309594249292e-06, + "loss": 0.3091, + "step": 7283 + }, + { + "epoch": 0.34, + "grad_norm": 0.6546642368696872, + "learning_rate": 4.745226190535162e-06, + "loss": 0.3096, + "step": 7284 + }, + { + "epoch": 0.34, + "grad_norm": 0.5952705508463214, + "learning_rate": 4.745142773900319e-06, + "loss": 0.2891, + "step": 7285 + }, + { + "epoch": 0.34, + "grad_norm": 0.6115257389775568, + "learning_rate": 4.7450593443452434e-06, + "loss": 0.2931, + "step": 7286 + }, + { + "epoch": 0.34, + "grad_norm": 0.6992856888908054, + "learning_rate": 4.744975901870415e-06, + "loss": 0.3039, + "step": 7287 + }, + { + "epoch": 0.34, + "grad_norm": 0.5989050792247038, + "learning_rate": 4.7448924464763154e-06, + "loss": 0.2656, + "step": 7288 + }, + { + "epoch": 0.34, + "grad_norm": 0.6326903950967547, + "learning_rate": 4.744808978163424e-06, + "loss": 0.2817, + "step": 7289 + }, + { + "epoch": 0.34, + "grad_norm": 0.6395124101818844, + "learning_rate": 4.7447254969322206e-06, + "loss": 0.3105, + "step": 7290 + }, + { + "epoch": 0.34, + "grad_norm": 0.5964310899220346, + "learning_rate": 4.744642002783188e-06, + "loss": 0.2925, + "step": 7291 + }, + { + "epoch": 0.34, + "grad_norm": 0.582901456469109, + "learning_rate": 4.744558495716803e-06, + "loss": 0.298, + "step": 7292 + }, + { + "epoch": 0.34, + "grad_norm": 0.6302688841412526, + "learning_rate": 4.744474975733548e-06, + "loss": 0.2901, + "step": 7293 + }, + { + "epoch": 0.34, + "grad_norm": 0.5744716183567613, + "learning_rate": 4.744391442833905e-06, + "loss": 0.2735, + "step": 7294 + }, + { + "epoch": 0.34, + "grad_norm": 0.6772165128398817, + "learning_rate": 4.744307897018352e-06, + "loss": 0.2811, + "step": 7295 + }, + { + "epoch": 0.34, + "grad_norm": 0.6964087099344451, + "learning_rate": 4.7442243382873706e-06, + "loss": 0.3188, + "step": 7296 + }, + { + "epoch": 0.34, + "grad_norm": 0.605181789833805, + "learning_rate": 4.7441407666414435e-06, + "loss": 0.2793, + "step": 7297 + }, + { + "epoch": 0.34, + "grad_norm": 0.6018178009101748, + "learning_rate": 4.74405718208105e-06, + "loss": 0.2911, + "step": 7298 + }, + { + "epoch": 0.34, + "grad_norm": 0.6659122227295353, + "learning_rate": 4.743973584606672e-06, + "loss": 0.2895, + "step": 7299 + }, + { + "epoch": 0.34, + "grad_norm": 0.6564832433080829, + "learning_rate": 4.743889974218789e-06, + "loss": 0.2902, + "step": 7300 + }, + { + "epoch": 0.34, + "grad_norm": 0.5941989772755056, + "learning_rate": 4.743806350917884e-06, + "loss": 0.309, + "step": 7301 + }, + { + "epoch": 0.34, + "grad_norm": 0.6058243476313172, + "learning_rate": 4.743722714704437e-06, + "loss": 0.2879, + "step": 7302 + }, + { + "epoch": 0.34, + "grad_norm": 0.671909462532767, + "learning_rate": 4.74363906557893e-06, + "loss": 0.3086, + "step": 7303 + }, + { + "epoch": 0.34, + "grad_norm": 0.6346524364097609, + "learning_rate": 4.743555403541843e-06, + "loss": 0.2903, + "step": 7304 + }, + { + "epoch": 0.34, + "grad_norm": 0.644375589367703, + "learning_rate": 4.7434717285936595e-06, + "loss": 0.309, + "step": 7305 + }, + { + "epoch": 0.34, + "grad_norm": 0.5851089747792914, + "learning_rate": 4.74338804073486e-06, + "loss": 0.2638, + "step": 7306 + }, + { + "epoch": 0.34, + "grad_norm": 0.7112938537736015, + "learning_rate": 4.7433043399659264e-06, + "loss": 0.2887, + "step": 7307 + }, + { + "epoch": 0.34, + "grad_norm": 0.692016985591383, + "learning_rate": 4.74322062628734e-06, + "loss": 0.297, + "step": 7308 + }, + { + "epoch": 0.34, + "grad_norm": 0.6741592246797493, + "learning_rate": 4.743136899699583e-06, + "loss": 0.3117, + "step": 7309 + }, + { + "epoch": 0.34, + "grad_norm": 0.5793987718866819, + "learning_rate": 4.743053160203136e-06, + "loss": 0.2741, + "step": 7310 + }, + { + "epoch": 0.34, + "grad_norm": 0.6473480876090695, + "learning_rate": 4.7429694077984825e-06, + "loss": 0.3281, + "step": 7311 + }, + { + "epoch": 0.34, + "grad_norm": 0.6029878278063002, + "learning_rate": 4.742885642486104e-06, + "loss": 0.2816, + "step": 7312 + }, + { + "epoch": 0.34, + "grad_norm": 0.6735315370079036, + "learning_rate": 4.742801864266482e-06, + "loss": 0.301, + "step": 7313 + }, + { + "epoch": 0.34, + "grad_norm": 0.7513493591627997, + "learning_rate": 4.742718073140099e-06, + "loss": 0.3146, + "step": 7314 + }, + { + "epoch": 0.34, + "grad_norm": 0.5925501855273473, + "learning_rate": 4.742634269107437e-06, + "loss": 0.2871, + "step": 7315 + }, + { + "epoch": 0.34, + "grad_norm": 0.6138075265372565, + "learning_rate": 4.742550452168979e-06, + "loss": 0.3187, + "step": 7316 + }, + { + "epoch": 0.34, + "grad_norm": 0.6584708680553479, + "learning_rate": 4.742466622325206e-06, + "loss": 0.2951, + "step": 7317 + }, + { + "epoch": 0.34, + "grad_norm": 0.6481195101772236, + "learning_rate": 4.742382779576602e-06, + "loss": 0.2777, + "step": 7318 + }, + { + "epoch": 0.34, + "grad_norm": 0.6374152730363315, + "learning_rate": 4.742298923923649e-06, + "loss": 0.2961, + "step": 7319 + }, + { + "epoch": 0.34, + "grad_norm": 0.591357548865933, + "learning_rate": 4.742215055366828e-06, + "loss": 0.2932, + "step": 7320 + }, + { + "epoch": 0.34, + "grad_norm": 0.6390704593521823, + "learning_rate": 4.742131173906624e-06, + "loss": 0.2883, + "step": 7321 + }, + { + "epoch": 0.34, + "grad_norm": 0.5705413200807159, + "learning_rate": 4.742047279543518e-06, + "loss": 0.2854, + "step": 7322 + }, + { + "epoch": 0.34, + "grad_norm": 0.5725275725908526, + "learning_rate": 4.741963372277993e-06, + "loss": 0.277, + "step": 7323 + }, + { + "epoch": 0.34, + "grad_norm": 0.5722491920345919, + "learning_rate": 4.7418794521105334e-06, + "loss": 0.27, + "step": 7324 + }, + { + "epoch": 0.34, + "grad_norm": 0.5930035205914467, + "learning_rate": 4.74179551904162e-06, + "loss": 0.2975, + "step": 7325 + }, + { + "epoch": 0.34, + "grad_norm": 0.6388100525548736, + "learning_rate": 4.741711573071738e-06, + "loss": 0.2697, + "step": 7326 + }, + { + "epoch": 0.34, + "grad_norm": 0.5912016103435015, + "learning_rate": 4.741627614201369e-06, + "loss": 0.288, + "step": 7327 + }, + { + "epoch": 0.34, + "grad_norm": 0.6299023025944666, + "learning_rate": 4.741543642430996e-06, + "loss": 0.2778, + "step": 7328 + }, + { + "epoch": 0.34, + "grad_norm": 0.6272349487779347, + "learning_rate": 4.741459657761103e-06, + "loss": 0.2902, + "step": 7329 + }, + { + "epoch": 0.34, + "grad_norm": 0.5930204947647809, + "learning_rate": 4.7413756601921726e-06, + "loss": 0.2952, + "step": 7330 + }, + { + "epoch": 0.34, + "grad_norm": 0.5840098390184031, + "learning_rate": 4.741291649724689e-06, + "loss": 0.2975, + "step": 7331 + }, + { + "epoch": 0.34, + "grad_norm": 0.621320391219713, + "learning_rate": 4.741207626359135e-06, + "loss": 0.3059, + "step": 7332 + }, + { + "epoch": 0.34, + "grad_norm": 0.6190211806248461, + "learning_rate": 4.741123590095995e-06, + "loss": 0.3166, + "step": 7333 + }, + { + "epoch": 0.34, + "grad_norm": 0.5858636904134076, + "learning_rate": 4.741039540935751e-06, + "loss": 0.2875, + "step": 7334 + }, + { + "epoch": 0.34, + "grad_norm": 0.6282168322032932, + "learning_rate": 4.7409554788788895e-06, + "loss": 0.3084, + "step": 7335 + }, + { + "epoch": 0.34, + "grad_norm": 0.6502642240435842, + "learning_rate": 4.74087140392589e-06, + "loss": 0.3081, + "step": 7336 + }, + { + "epoch": 0.34, + "grad_norm": 0.6312953675013228, + "learning_rate": 4.740787316077241e-06, + "loss": 0.2857, + "step": 7337 + }, + { + "epoch": 0.34, + "grad_norm": 0.6600686224691668, + "learning_rate": 4.740703215333423e-06, + "loss": 0.3078, + "step": 7338 + }, + { + "epoch": 0.34, + "grad_norm": 0.6244942102047918, + "learning_rate": 4.7406191016949225e-06, + "loss": 0.3028, + "step": 7339 + }, + { + "epoch": 0.34, + "grad_norm": 0.5738620547231426, + "learning_rate": 4.740534975162221e-06, + "loss": 0.2778, + "step": 7340 + }, + { + "epoch": 0.34, + "grad_norm": 0.6492707877616962, + "learning_rate": 4.740450835735803e-06, + "loss": 0.2846, + "step": 7341 + }, + { + "epoch": 0.34, + "grad_norm": 0.5950681994140015, + "learning_rate": 4.740366683416156e-06, + "loss": 0.2879, + "step": 7342 + }, + { + "epoch": 0.34, + "grad_norm": 0.5850684411135006, + "learning_rate": 4.74028251820376e-06, + "loss": 0.2795, + "step": 7343 + }, + { + "epoch": 0.34, + "grad_norm": 0.6436434335458912, + "learning_rate": 4.740198340099102e-06, + "loss": 0.2908, + "step": 7344 + }, + { + "epoch": 0.34, + "grad_norm": 0.631836985204286, + "learning_rate": 4.740114149102665e-06, + "loss": 0.3141, + "step": 7345 + }, + { + "epoch": 0.34, + "grad_norm": 0.6060944748557834, + "learning_rate": 4.740029945214935e-06, + "loss": 0.2925, + "step": 7346 + }, + { + "epoch": 0.34, + "grad_norm": 0.6394034279146307, + "learning_rate": 4.739945728436395e-06, + "loss": 0.2965, + "step": 7347 + }, + { + "epoch": 0.34, + "grad_norm": 0.6528097293370982, + "learning_rate": 4.7398614987675305e-06, + "loss": 0.2941, + "step": 7348 + }, + { + "epoch": 0.34, + "grad_norm": 0.5896056261260759, + "learning_rate": 4.739777256208825e-06, + "loss": 0.2869, + "step": 7349 + }, + { + "epoch": 0.34, + "grad_norm": 0.6174283851300725, + "learning_rate": 4.739693000760766e-06, + "loss": 0.2901, + "step": 7350 + }, + { + "epoch": 0.34, + "grad_norm": 0.6166363651363348, + "learning_rate": 4.739608732423836e-06, + "loss": 0.2945, + "step": 7351 + }, + { + "epoch": 0.34, + "grad_norm": 0.5952609453493468, + "learning_rate": 4.739524451198521e-06, + "loss": 0.2777, + "step": 7352 + }, + { + "epoch": 0.34, + "grad_norm": 0.650210194146987, + "learning_rate": 4.739440157085305e-06, + "loss": 0.3284, + "step": 7353 + }, + { + "epoch": 0.34, + "grad_norm": 0.6215740334366563, + "learning_rate": 4.739355850084674e-06, + "loss": 0.2911, + "step": 7354 + }, + { + "epoch": 0.34, + "grad_norm": 0.6154241092777475, + "learning_rate": 4.7392715301971126e-06, + "loss": 0.2849, + "step": 7355 + }, + { + "epoch": 0.34, + "grad_norm": 0.6176849627648645, + "learning_rate": 4.739187197423108e-06, + "loss": 0.2985, + "step": 7356 + }, + { + "epoch": 0.34, + "grad_norm": 0.6621833506542315, + "learning_rate": 4.7391028517631415e-06, + "loss": 0.3009, + "step": 7357 + }, + { + "epoch": 0.34, + "grad_norm": 0.5804767651161813, + "learning_rate": 4.739018493217702e-06, + "loss": 0.2883, + "step": 7358 + }, + { + "epoch": 0.34, + "grad_norm": 0.603801173512278, + "learning_rate": 4.738934121787274e-06, + "loss": 0.2817, + "step": 7359 + }, + { + "epoch": 0.34, + "grad_norm": 0.590537351883969, + "learning_rate": 4.738849737472343e-06, + "loss": 0.296, + "step": 7360 + }, + { + "epoch": 0.34, + "grad_norm": 0.6258293721207157, + "learning_rate": 4.738765340273394e-06, + "loss": 0.2951, + "step": 7361 + }, + { + "epoch": 0.34, + "grad_norm": 0.5868596098093799, + "learning_rate": 4.738680930190913e-06, + "loss": 0.2928, + "step": 7362 + }, + { + "epoch": 0.34, + "grad_norm": 0.6260452178874372, + "learning_rate": 4.738596507225386e-06, + "loss": 0.2861, + "step": 7363 + }, + { + "epoch": 0.34, + "grad_norm": 0.6649140824525693, + "learning_rate": 4.738512071377299e-06, + "loss": 0.325, + "step": 7364 + }, + { + "epoch": 0.35, + "grad_norm": 0.6266757287709587, + "learning_rate": 4.738427622647138e-06, + "loss": 0.3031, + "step": 7365 + }, + { + "epoch": 0.35, + "grad_norm": 0.5345159366453675, + "learning_rate": 4.738343161035388e-06, + "loss": 0.282, + "step": 7366 + }, + { + "epoch": 0.35, + "grad_norm": 0.5481607497363084, + "learning_rate": 4.738258686542536e-06, + "loss": 0.2708, + "step": 7367 + }, + { + "epoch": 0.35, + "grad_norm": 0.6360087467604868, + "learning_rate": 4.7381741991690685e-06, + "loss": 0.3035, + "step": 7368 + }, + { + "epoch": 0.35, + "grad_norm": 0.6324355259218096, + "learning_rate": 4.73808969891547e-06, + "loss": 0.3125, + "step": 7369 + }, + { + "epoch": 0.35, + "grad_norm": 0.6321335008061877, + "learning_rate": 4.738005185782229e-06, + "loss": 0.2779, + "step": 7370 + }, + { + "epoch": 0.35, + "grad_norm": 0.6251594750474004, + "learning_rate": 4.737920659769829e-06, + "loss": 0.3048, + "step": 7371 + }, + { + "epoch": 0.35, + "grad_norm": 0.6140199173014192, + "learning_rate": 4.737836120878759e-06, + "loss": 0.2972, + "step": 7372 + }, + { + "epoch": 0.35, + "grad_norm": 0.5971546801333115, + "learning_rate": 4.737751569109504e-06, + "loss": 0.2974, + "step": 7373 + }, + { + "epoch": 0.35, + "grad_norm": 0.6388456609927984, + "learning_rate": 4.737667004462552e-06, + "loss": 0.3025, + "step": 7374 + }, + { + "epoch": 0.35, + "grad_norm": 0.6935715012262686, + "learning_rate": 4.737582426938389e-06, + "loss": 0.3172, + "step": 7375 + }, + { + "epoch": 0.35, + "grad_norm": 0.5956967082467002, + "learning_rate": 4.7374978365375e-06, + "loss": 0.2783, + "step": 7376 + }, + { + "epoch": 0.35, + "grad_norm": 0.6081299390098551, + "learning_rate": 4.737413233260374e-06, + "loss": 0.2819, + "step": 7377 + }, + { + "epoch": 0.35, + "grad_norm": 0.6348300303470137, + "learning_rate": 4.737328617107498e-06, + "loss": 0.3079, + "step": 7378 + }, + { + "epoch": 0.35, + "grad_norm": 0.6477136287287921, + "learning_rate": 4.737243988079358e-06, + "loss": 0.2895, + "step": 7379 + }, + { + "epoch": 0.35, + "grad_norm": 0.6169424809116343, + "learning_rate": 4.737159346176441e-06, + "loss": 0.2966, + "step": 7380 + }, + { + "epoch": 0.35, + "grad_norm": 0.6165398354415678, + "learning_rate": 4.7370746913992334e-06, + "loss": 0.2989, + "step": 7381 + }, + { + "epoch": 0.35, + "grad_norm": 0.5701986551524167, + "learning_rate": 4.7369900237482245e-06, + "loss": 0.289, + "step": 7382 + }, + { + "epoch": 0.35, + "grad_norm": 0.6491765472909399, + "learning_rate": 4.736905343223899e-06, + "loss": 0.2965, + "step": 7383 + }, + { + "epoch": 0.35, + "grad_norm": 0.6005806478073736, + "learning_rate": 4.7368206498267465e-06, + "loss": 0.2949, + "step": 7384 + }, + { + "epoch": 0.35, + "grad_norm": 0.7159725170550657, + "learning_rate": 4.736735943557252e-06, + "loss": 0.3336, + "step": 7385 + }, + { + "epoch": 0.35, + "grad_norm": 0.6522987900860786, + "learning_rate": 4.736651224415906e-06, + "loss": 0.2987, + "step": 7386 + }, + { + "epoch": 0.35, + "grad_norm": 0.6628886753835984, + "learning_rate": 4.736566492403194e-06, + "loss": 0.311, + "step": 7387 + }, + { + "epoch": 0.35, + "grad_norm": 0.7321082161546635, + "learning_rate": 4.736481747519603e-06, + "loss": 0.3126, + "step": 7388 + }, + { + "epoch": 0.35, + "grad_norm": 0.6397600962396963, + "learning_rate": 4.736396989765623e-06, + "loss": 0.3062, + "step": 7389 + }, + { + "epoch": 0.35, + "grad_norm": 0.631157227208804, + "learning_rate": 4.736312219141739e-06, + "loss": 0.2908, + "step": 7390 + }, + { + "epoch": 0.35, + "grad_norm": 0.6736829220157787, + "learning_rate": 4.736227435648441e-06, + "loss": 0.2949, + "step": 7391 + }, + { + "epoch": 0.35, + "grad_norm": 0.6263150095181184, + "learning_rate": 4.736142639286216e-06, + "loss": 0.2901, + "step": 7392 + }, + { + "epoch": 0.35, + "grad_norm": 0.6659169256377541, + "learning_rate": 4.736057830055553e-06, + "loss": 0.3172, + "step": 7393 + }, + { + "epoch": 0.35, + "grad_norm": 0.5988931080348462, + "learning_rate": 4.735973007956938e-06, + "loss": 0.2881, + "step": 7394 + }, + { + "epoch": 0.35, + "grad_norm": 0.6262826443254675, + "learning_rate": 4.7358881729908605e-06, + "loss": 0.293, + "step": 7395 + }, + { + "epoch": 0.35, + "grad_norm": 0.6078260190667134, + "learning_rate": 4.73580332515781e-06, + "loss": 0.2786, + "step": 7396 + }, + { + "epoch": 0.35, + "grad_norm": 0.6797955791828301, + "learning_rate": 4.735718464458271e-06, + "loss": 0.3006, + "step": 7397 + }, + { + "epoch": 0.35, + "grad_norm": 0.5794601435475575, + "learning_rate": 4.7356335908927356e-06, + "loss": 0.2882, + "step": 7398 + }, + { + "epoch": 0.35, + "grad_norm": 0.6503263414943407, + "learning_rate": 4.735548704461691e-06, + "loss": 0.2841, + "step": 7399 + }, + { + "epoch": 0.35, + "grad_norm": 0.655828408580984, + "learning_rate": 4.735463805165624e-06, + "loss": 0.3111, + "step": 7400 + }, + { + "epoch": 0.35, + "grad_norm": 0.6627015518892322, + "learning_rate": 4.735378893005026e-06, + "loss": 0.2946, + "step": 7401 + }, + { + "epoch": 0.35, + "grad_norm": 0.6073223709838094, + "learning_rate": 4.735293967980384e-06, + "loss": 0.2746, + "step": 7402 + }, + { + "epoch": 0.35, + "grad_norm": 0.6326471708168707, + "learning_rate": 4.735209030092187e-06, + "loss": 0.3032, + "step": 7403 + }, + { + "epoch": 0.35, + "grad_norm": 0.5985955881145648, + "learning_rate": 4.7351240793409235e-06, + "loss": 0.2933, + "step": 7404 + }, + { + "epoch": 0.35, + "grad_norm": 0.5989023406066852, + "learning_rate": 4.735039115727084e-06, + "loss": 0.2881, + "step": 7405 + }, + { + "epoch": 0.35, + "grad_norm": 0.5818935896832561, + "learning_rate": 4.734954139251155e-06, + "loss": 0.2958, + "step": 7406 + }, + { + "epoch": 0.35, + "grad_norm": 0.6448965807240151, + "learning_rate": 4.734869149913626e-06, + "loss": 0.2807, + "step": 7407 + }, + { + "epoch": 0.35, + "grad_norm": 0.6444005248660022, + "learning_rate": 4.734784147714988e-06, + "loss": 0.3082, + "step": 7408 + }, + { + "epoch": 0.35, + "grad_norm": 0.6285551887118892, + "learning_rate": 4.7346991326557284e-06, + "loss": 0.3032, + "step": 7409 + }, + { + "epoch": 0.35, + "grad_norm": 0.6224437582886647, + "learning_rate": 4.734614104736337e-06, + "loss": 0.2833, + "step": 7410 + }, + { + "epoch": 0.35, + "grad_norm": 0.6212496158753902, + "learning_rate": 4.734529063957303e-06, + "loss": 0.285, + "step": 7411 + }, + { + "epoch": 0.35, + "grad_norm": 0.615455025205431, + "learning_rate": 4.7344440103191156e-06, + "loss": 0.3129, + "step": 7412 + }, + { + "epoch": 0.35, + "grad_norm": 0.5875086416223626, + "learning_rate": 4.734358943822266e-06, + "loss": 0.2689, + "step": 7413 + }, + { + "epoch": 0.35, + "grad_norm": 0.6514004846903191, + "learning_rate": 4.734273864467241e-06, + "loss": 0.3145, + "step": 7414 + }, + { + "epoch": 0.35, + "grad_norm": 0.6077008926328643, + "learning_rate": 4.734188772254531e-06, + "loss": 0.2733, + "step": 7415 + }, + { + "epoch": 0.35, + "grad_norm": 0.6012491593832805, + "learning_rate": 4.734103667184628e-06, + "loss": 0.2651, + "step": 7416 + }, + { + "epoch": 0.35, + "grad_norm": 0.5809444589782784, + "learning_rate": 4.734018549258018e-06, + "loss": 0.2956, + "step": 7417 + }, + { + "epoch": 0.35, + "grad_norm": 0.600624314396837, + "learning_rate": 4.7339334184751935e-06, + "loss": 0.2906, + "step": 7418 + }, + { + "epoch": 0.35, + "grad_norm": 0.5994129671455996, + "learning_rate": 4.733848274836644e-06, + "loss": 0.3067, + "step": 7419 + }, + { + "epoch": 0.35, + "grad_norm": 0.5854944762981685, + "learning_rate": 4.733763118342858e-06, + "loss": 0.2835, + "step": 7420 + }, + { + "epoch": 0.35, + "grad_norm": 0.6456209010474646, + "learning_rate": 4.733677948994328e-06, + "loss": 0.2718, + "step": 7421 + }, + { + "epoch": 0.35, + "grad_norm": 0.6464684116198124, + "learning_rate": 4.733592766791542e-06, + "loss": 0.3112, + "step": 7422 + }, + { + "epoch": 0.35, + "grad_norm": 0.5675560103391237, + "learning_rate": 4.733507571734992e-06, + "loss": 0.2853, + "step": 7423 + }, + { + "epoch": 0.35, + "grad_norm": 0.6176334372189565, + "learning_rate": 4.733422363825166e-06, + "loss": 0.2886, + "step": 7424 + }, + { + "epoch": 0.35, + "grad_norm": 0.561613505272589, + "learning_rate": 4.733337143062557e-06, + "loss": 0.2774, + "step": 7425 + }, + { + "epoch": 0.35, + "grad_norm": 0.6276048036368561, + "learning_rate": 4.733251909447653e-06, + "loss": 0.3032, + "step": 7426 + }, + { + "epoch": 0.35, + "grad_norm": 0.6692028494169617, + "learning_rate": 4.733166662980946e-06, + "loss": 0.3253, + "step": 7427 + }, + { + "epoch": 0.35, + "grad_norm": 0.679613332127996, + "learning_rate": 4.733081403662926e-06, + "loss": 0.3186, + "step": 7428 + }, + { + "epoch": 0.35, + "grad_norm": 0.6194360895032495, + "learning_rate": 4.7329961314940835e-06, + "loss": 0.2827, + "step": 7429 + }, + { + "epoch": 0.35, + "grad_norm": 0.6407381044022489, + "learning_rate": 4.73291084647491e-06, + "loss": 0.2975, + "step": 7430 + }, + { + "epoch": 0.35, + "grad_norm": 0.610279486402482, + "learning_rate": 4.732825548605895e-06, + "loss": 0.3303, + "step": 7431 + }, + { + "epoch": 0.35, + "grad_norm": 0.5954216818558042, + "learning_rate": 4.732740237887531e-06, + "loss": 0.2796, + "step": 7432 + }, + { + "epoch": 0.35, + "grad_norm": 0.6279963821766901, + "learning_rate": 4.732654914320308e-06, + "loss": 0.314, + "step": 7433 + }, + { + "epoch": 0.35, + "grad_norm": 0.6169344205823378, + "learning_rate": 4.732569577904717e-06, + "loss": 0.2911, + "step": 7434 + }, + { + "epoch": 0.35, + "grad_norm": 0.6264548238440034, + "learning_rate": 4.73248422864125e-06, + "loss": 0.3106, + "step": 7435 + }, + { + "epoch": 0.35, + "grad_norm": 0.6451225890079316, + "learning_rate": 4.732398866530396e-06, + "loss": 0.2921, + "step": 7436 + }, + { + "epoch": 0.35, + "grad_norm": 0.6588151940246139, + "learning_rate": 4.732313491572648e-06, + "loss": 0.3205, + "step": 7437 + }, + { + "epoch": 0.35, + "grad_norm": 0.6279328348065062, + "learning_rate": 4.732228103768498e-06, + "loss": 0.2993, + "step": 7438 + }, + { + "epoch": 0.35, + "grad_norm": 0.6287789972668989, + "learning_rate": 4.732142703118435e-06, + "loss": 0.2806, + "step": 7439 + }, + { + "epoch": 0.35, + "grad_norm": 0.5604337369195165, + "learning_rate": 4.7320572896229524e-06, + "loss": 0.2771, + "step": 7440 + }, + { + "epoch": 0.35, + "grad_norm": 0.6270497831012652, + "learning_rate": 4.73197186328254e-06, + "loss": 0.3035, + "step": 7441 + }, + { + "epoch": 0.35, + "grad_norm": 0.6377913726438902, + "learning_rate": 4.731886424097693e-06, + "loss": 0.278, + "step": 7442 + }, + { + "epoch": 0.35, + "grad_norm": 0.6497297198243744, + "learning_rate": 4.731800972068898e-06, + "loss": 0.3169, + "step": 7443 + }, + { + "epoch": 0.35, + "grad_norm": 0.6240134702281946, + "learning_rate": 4.73171550719665e-06, + "loss": 0.2956, + "step": 7444 + }, + { + "epoch": 0.35, + "grad_norm": 0.6055118778405407, + "learning_rate": 4.731630029481441e-06, + "loss": 0.2924, + "step": 7445 + }, + { + "epoch": 0.35, + "grad_norm": 0.640720756127532, + "learning_rate": 4.731544538923762e-06, + "loss": 0.318, + "step": 7446 + }, + { + "epoch": 0.35, + "grad_norm": 0.5917864390824095, + "learning_rate": 4.731459035524104e-06, + "loss": 0.3069, + "step": 7447 + }, + { + "epoch": 0.35, + "grad_norm": 0.6336800433180175, + "learning_rate": 4.731373519282961e-06, + "loss": 0.2883, + "step": 7448 + }, + { + "epoch": 0.35, + "grad_norm": 0.5994243761930602, + "learning_rate": 4.7312879902008245e-06, + "loss": 0.3055, + "step": 7449 + }, + { + "epoch": 0.35, + "grad_norm": 0.6482704855697231, + "learning_rate": 4.731202448278186e-06, + "loss": 0.3097, + "step": 7450 + }, + { + "epoch": 0.35, + "grad_norm": 0.5977640667427278, + "learning_rate": 4.731116893515539e-06, + "loss": 0.2834, + "step": 7451 + }, + { + "epoch": 0.35, + "grad_norm": 0.59242595937296, + "learning_rate": 4.7310313259133735e-06, + "loss": 0.2743, + "step": 7452 + }, + { + "epoch": 0.35, + "grad_norm": 0.5896172512577673, + "learning_rate": 4.730945745472184e-06, + "loss": 0.2981, + "step": 7453 + }, + { + "epoch": 0.35, + "grad_norm": 0.6374576613009049, + "learning_rate": 4.730860152192462e-06, + "loss": 0.2743, + "step": 7454 + }, + { + "epoch": 0.35, + "grad_norm": 0.6559596238982796, + "learning_rate": 4.730774546074702e-06, + "loss": 0.3015, + "step": 7455 + }, + { + "epoch": 0.35, + "grad_norm": 0.6223390885182779, + "learning_rate": 4.730688927119395e-06, + "loss": 0.293, + "step": 7456 + }, + { + "epoch": 0.35, + "grad_norm": 0.5954302092423079, + "learning_rate": 4.730603295327032e-06, + "loss": 0.2989, + "step": 7457 + }, + { + "epoch": 0.35, + "grad_norm": 0.664769764369213, + "learning_rate": 4.7305176506981094e-06, + "loss": 0.29, + "step": 7458 + }, + { + "epoch": 0.35, + "grad_norm": 0.6301515579461444, + "learning_rate": 4.730431993233118e-06, + "loss": 0.3085, + "step": 7459 + }, + { + "epoch": 0.35, + "grad_norm": 0.6304641649603712, + "learning_rate": 4.730346322932551e-06, + "loss": 0.3113, + "step": 7460 + }, + { + "epoch": 0.35, + "grad_norm": 0.6028899819516099, + "learning_rate": 4.730260639796901e-06, + "loss": 0.2785, + "step": 7461 + }, + { + "epoch": 0.35, + "grad_norm": 0.5683950018814918, + "learning_rate": 4.730174943826662e-06, + "loss": 0.2735, + "step": 7462 + }, + { + "epoch": 0.35, + "grad_norm": 0.5849956048103411, + "learning_rate": 4.730089235022327e-06, + "loss": 0.277, + "step": 7463 + }, + { + "epoch": 0.35, + "grad_norm": 0.6403640234330732, + "learning_rate": 4.730003513384389e-06, + "loss": 0.3133, + "step": 7464 + }, + { + "epoch": 0.35, + "grad_norm": 0.6247419405645814, + "learning_rate": 4.7299177789133405e-06, + "loss": 0.2884, + "step": 7465 + }, + { + "epoch": 0.35, + "grad_norm": 0.6219422291851344, + "learning_rate": 4.729832031609676e-06, + "loss": 0.3117, + "step": 7466 + }, + { + "epoch": 0.35, + "grad_norm": 0.6107915881342317, + "learning_rate": 4.729746271473889e-06, + "loss": 0.3023, + "step": 7467 + }, + { + "epoch": 0.35, + "grad_norm": 0.5693716092712382, + "learning_rate": 4.729660498506472e-06, + "loss": 0.2908, + "step": 7468 + }, + { + "epoch": 0.35, + "grad_norm": 0.5570921217538, + "learning_rate": 4.72957471270792e-06, + "loss": 0.2762, + "step": 7469 + }, + { + "epoch": 0.35, + "grad_norm": 0.5732256071775237, + "learning_rate": 4.729488914078725e-06, + "loss": 0.2997, + "step": 7470 + }, + { + "epoch": 0.35, + "grad_norm": 0.6038654987373043, + "learning_rate": 4.729403102619382e-06, + "loss": 0.3013, + "step": 7471 + }, + { + "epoch": 0.35, + "grad_norm": 0.5991633453442625, + "learning_rate": 4.729317278330385e-06, + "loss": 0.2875, + "step": 7472 + }, + { + "epoch": 0.35, + "grad_norm": 0.6316182936179064, + "learning_rate": 4.729231441212228e-06, + "loss": 0.3003, + "step": 7473 + }, + { + "epoch": 0.35, + "grad_norm": 0.5614949615726194, + "learning_rate": 4.729145591265403e-06, + "loss": 0.2935, + "step": 7474 + }, + { + "epoch": 0.35, + "grad_norm": 0.5435288618165629, + "learning_rate": 4.729059728490406e-06, + "loss": 0.2702, + "step": 7475 + }, + { + "epoch": 0.35, + "grad_norm": 0.5766025454822886, + "learning_rate": 4.728973852887729e-06, + "loss": 0.2898, + "step": 7476 + }, + { + "epoch": 0.35, + "grad_norm": 0.620404499496171, + "learning_rate": 4.72888796445787e-06, + "loss": 0.3057, + "step": 7477 + }, + { + "epoch": 0.35, + "grad_norm": 0.6097016930205563, + "learning_rate": 4.72880206320132e-06, + "loss": 0.2887, + "step": 7478 + }, + { + "epoch": 0.35, + "grad_norm": 0.6096539343054278, + "learning_rate": 4.728716149118574e-06, + "loss": 0.2964, + "step": 7479 + }, + { + "epoch": 0.35, + "grad_norm": 0.5831839484791332, + "learning_rate": 4.7286302222101265e-06, + "loss": 0.289, + "step": 7480 + }, + { + "epoch": 0.35, + "grad_norm": 0.6251878680306576, + "learning_rate": 4.728544282476473e-06, + "loss": 0.3104, + "step": 7481 + }, + { + "epoch": 0.35, + "grad_norm": 0.5781146199693133, + "learning_rate": 4.728458329918107e-06, + "loss": 0.2876, + "step": 7482 + }, + { + "epoch": 0.35, + "grad_norm": 0.6643873741105488, + "learning_rate": 4.728372364535524e-06, + "loss": 0.2866, + "step": 7483 + }, + { + "epoch": 0.35, + "grad_norm": 0.668929581002265, + "learning_rate": 4.728286386329218e-06, + "loss": 0.2903, + "step": 7484 + }, + { + "epoch": 0.35, + "grad_norm": 0.6124011756200206, + "learning_rate": 4.7282003952996825e-06, + "loss": 0.3112, + "step": 7485 + }, + { + "epoch": 0.35, + "grad_norm": 0.618319255081924, + "learning_rate": 4.7281143914474146e-06, + "loss": 0.2968, + "step": 7486 + }, + { + "epoch": 0.35, + "grad_norm": 0.6509246162686734, + "learning_rate": 4.728028374772909e-06, + "loss": 0.2978, + "step": 7487 + }, + { + "epoch": 0.35, + "grad_norm": 0.6226062295453259, + "learning_rate": 4.7279423452766594e-06, + "loss": 0.2805, + "step": 7488 + }, + { + "epoch": 0.35, + "grad_norm": 0.6710359748104449, + "learning_rate": 4.727856302959162e-06, + "loss": 0.3087, + "step": 7489 + }, + { + "epoch": 0.35, + "grad_norm": 0.6169973317292393, + "learning_rate": 4.7277702478209105e-06, + "loss": 0.2824, + "step": 7490 + }, + { + "epoch": 0.35, + "grad_norm": 0.5902322668956256, + "learning_rate": 4.727684179862403e-06, + "loss": 0.2806, + "step": 7491 + }, + { + "epoch": 0.35, + "grad_norm": 0.6106625117743126, + "learning_rate": 4.727598099084131e-06, + "loss": 0.2778, + "step": 7492 + }, + { + "epoch": 0.35, + "grad_norm": 0.6558958591199224, + "learning_rate": 4.727512005486593e-06, + "loss": 0.3017, + "step": 7493 + }, + { + "epoch": 0.35, + "grad_norm": 0.6162474535652104, + "learning_rate": 4.727425899070283e-06, + "loss": 0.3022, + "step": 7494 + }, + { + "epoch": 0.35, + "grad_norm": 0.6649691875047167, + "learning_rate": 4.727339779835697e-06, + "loss": 0.3026, + "step": 7495 + }, + { + "epoch": 0.35, + "grad_norm": 0.6127363375398361, + "learning_rate": 4.727253647783331e-06, + "loss": 0.3039, + "step": 7496 + }, + { + "epoch": 0.35, + "grad_norm": 0.6350355436000771, + "learning_rate": 4.727167502913679e-06, + "loss": 0.3113, + "step": 7497 + }, + { + "epoch": 0.35, + "grad_norm": 0.6158602924381316, + "learning_rate": 4.727081345227237e-06, + "loss": 0.2924, + "step": 7498 + }, + { + "epoch": 0.35, + "grad_norm": 0.6260763733082504, + "learning_rate": 4.726995174724503e-06, + "loss": 0.2929, + "step": 7499 + }, + { + "epoch": 0.35, + "grad_norm": 0.6176365559141517, + "learning_rate": 4.7269089914059716e-06, + "loss": 0.3094, + "step": 7500 + }, + { + "epoch": 0.35, + "grad_norm": 0.6214822044041161, + "learning_rate": 4.726822795272138e-06, + "loss": 0.32, + "step": 7501 + }, + { + "epoch": 0.35, + "grad_norm": 0.6696192782076573, + "learning_rate": 4.726736586323499e-06, + "loss": 0.3172, + "step": 7502 + }, + { + "epoch": 0.35, + "grad_norm": 0.5859629309254354, + "learning_rate": 4.726650364560551e-06, + "loss": 0.2861, + "step": 7503 + }, + { + "epoch": 0.35, + "grad_norm": 0.5998867285924453, + "learning_rate": 4.726564129983789e-06, + "loss": 0.2844, + "step": 7504 + }, + { + "epoch": 0.35, + "grad_norm": 0.6312821677540199, + "learning_rate": 4.72647788259371e-06, + "loss": 0.2859, + "step": 7505 + }, + { + "epoch": 0.35, + "grad_norm": 0.5668166030551222, + "learning_rate": 4.726391622390812e-06, + "loss": 0.2777, + "step": 7506 + }, + { + "epoch": 0.35, + "grad_norm": 0.6279469148025668, + "learning_rate": 4.726305349375589e-06, + "loss": 0.303, + "step": 7507 + }, + { + "epoch": 0.35, + "grad_norm": 0.6614207409748224, + "learning_rate": 4.726219063548538e-06, + "loss": 0.3037, + "step": 7508 + }, + { + "epoch": 0.35, + "grad_norm": 0.5718739304593271, + "learning_rate": 4.726132764910156e-06, + "loss": 0.269, + "step": 7509 + }, + { + "epoch": 0.35, + "grad_norm": 0.6068684750317885, + "learning_rate": 4.726046453460939e-06, + "loss": 0.3032, + "step": 7510 + }, + { + "epoch": 0.35, + "grad_norm": 0.6463288532780276, + "learning_rate": 4.7259601292013845e-06, + "loss": 0.285, + "step": 7511 + }, + { + "epoch": 0.35, + "grad_norm": 0.6360224919109698, + "learning_rate": 4.7258737921319895e-06, + "loss": 0.2789, + "step": 7512 + }, + { + "epoch": 0.35, + "grad_norm": 0.5733324334010266, + "learning_rate": 4.725787442253249e-06, + "loss": 0.2725, + "step": 7513 + }, + { + "epoch": 0.35, + "grad_norm": 0.61712398758651, + "learning_rate": 4.725701079565662e-06, + "loss": 0.2879, + "step": 7514 + }, + { + "epoch": 0.35, + "grad_norm": 0.5874861642199224, + "learning_rate": 4.7256147040697245e-06, + "loss": 0.2962, + "step": 7515 + }, + { + "epoch": 0.35, + "grad_norm": 0.6530584743450764, + "learning_rate": 4.725528315765934e-06, + "loss": 0.3183, + "step": 7516 + }, + { + "epoch": 0.35, + "grad_norm": 0.6881849244458905, + "learning_rate": 4.725441914654788e-06, + "loss": 0.3202, + "step": 7517 + }, + { + "epoch": 0.35, + "grad_norm": 0.6128281079075308, + "learning_rate": 4.725355500736782e-06, + "loss": 0.2823, + "step": 7518 + }, + { + "epoch": 0.35, + "grad_norm": 0.6711245994580595, + "learning_rate": 4.7252690740124155e-06, + "loss": 0.2927, + "step": 7519 + }, + { + "epoch": 0.35, + "grad_norm": 0.6416757962027441, + "learning_rate": 4.725182634482183e-06, + "loss": 0.3102, + "step": 7520 + }, + { + "epoch": 0.35, + "grad_norm": 0.6392493053953677, + "learning_rate": 4.725096182146585e-06, + "loss": 0.3065, + "step": 7521 + }, + { + "epoch": 0.35, + "grad_norm": 0.6875526812045405, + "learning_rate": 4.725009717006117e-06, + "loss": 0.2959, + "step": 7522 + }, + { + "epoch": 0.35, + "grad_norm": 0.5593352747536343, + "learning_rate": 4.724923239061279e-06, + "loss": 0.2913, + "step": 7523 + }, + { + "epoch": 0.35, + "grad_norm": 0.6085440847343042, + "learning_rate": 4.724836748312565e-06, + "loss": 0.2805, + "step": 7524 + }, + { + "epoch": 0.35, + "grad_norm": 0.6391363068697624, + "learning_rate": 4.724750244760476e-06, + "loss": 0.3058, + "step": 7525 + }, + { + "epoch": 0.35, + "grad_norm": 0.631808366446407, + "learning_rate": 4.724663728405508e-06, + "loss": 0.2833, + "step": 7526 + }, + { + "epoch": 0.35, + "grad_norm": 0.5876430406885658, + "learning_rate": 4.724577199248159e-06, + "loss": 0.2906, + "step": 7527 + }, + { + "epoch": 0.35, + "grad_norm": 0.619317133349142, + "learning_rate": 4.724490657288929e-06, + "loss": 0.2979, + "step": 7528 + }, + { + "epoch": 0.35, + "grad_norm": 0.6870275163089027, + "learning_rate": 4.724404102528313e-06, + "loss": 0.3104, + "step": 7529 + }, + { + "epoch": 0.35, + "grad_norm": 0.6181060884643654, + "learning_rate": 4.72431753496681e-06, + "loss": 0.2944, + "step": 7530 + }, + { + "epoch": 0.35, + "grad_norm": 0.6468077974924066, + "learning_rate": 4.7242309546049194e-06, + "loss": 0.3039, + "step": 7531 + }, + { + "epoch": 0.35, + "grad_norm": 0.587675781448775, + "learning_rate": 4.7241443614431385e-06, + "loss": 0.2621, + "step": 7532 + }, + { + "epoch": 0.35, + "grad_norm": 0.6399413243756152, + "learning_rate": 4.724057755481966e-06, + "loss": 0.3152, + "step": 7533 + }, + { + "epoch": 0.35, + "grad_norm": 0.5773423834263149, + "learning_rate": 4.7239711367219e-06, + "loss": 0.2821, + "step": 7534 + }, + { + "epoch": 0.35, + "grad_norm": 0.6629664901429864, + "learning_rate": 4.723884505163439e-06, + "loss": 0.3, + "step": 7535 + }, + { + "epoch": 0.35, + "grad_norm": 0.6401802821679173, + "learning_rate": 4.7237978608070825e-06, + "loss": 0.2889, + "step": 7536 + }, + { + "epoch": 0.35, + "grad_norm": 0.6358942655489674, + "learning_rate": 4.723711203653327e-06, + "loss": 0.2958, + "step": 7537 + }, + { + "epoch": 0.35, + "grad_norm": 0.6065120012340971, + "learning_rate": 4.723624533702672e-06, + "loss": 0.3044, + "step": 7538 + }, + { + "epoch": 0.35, + "grad_norm": 0.5540282581024263, + "learning_rate": 4.723537850955619e-06, + "loss": 0.2737, + "step": 7539 + }, + { + "epoch": 0.35, + "grad_norm": 0.5478965679205589, + "learning_rate": 4.723451155412663e-06, + "loss": 0.2854, + "step": 7540 + }, + { + "epoch": 0.35, + "grad_norm": 0.6271327475755252, + "learning_rate": 4.723364447074304e-06, + "loss": 0.2848, + "step": 7541 + }, + { + "epoch": 0.35, + "grad_norm": 0.5976693852346072, + "learning_rate": 4.723277725941042e-06, + "loss": 0.303, + "step": 7542 + }, + { + "epoch": 0.35, + "grad_norm": 0.625259640198638, + "learning_rate": 4.723190992013376e-06, + "loss": 0.2822, + "step": 7543 + }, + { + "epoch": 0.35, + "grad_norm": 0.5835791749978609, + "learning_rate": 4.7231042452918035e-06, + "loss": 0.2882, + "step": 7544 + }, + { + "epoch": 0.35, + "grad_norm": 0.6010395182774803, + "learning_rate": 4.723017485776825e-06, + "loss": 0.2971, + "step": 7545 + }, + { + "epoch": 0.35, + "grad_norm": 0.6303934477515845, + "learning_rate": 4.72293071346894e-06, + "loss": 0.2984, + "step": 7546 + }, + { + "epoch": 0.35, + "grad_norm": 0.6302654307637993, + "learning_rate": 4.722843928368647e-06, + "loss": 0.27, + "step": 7547 + }, + { + "epoch": 0.35, + "grad_norm": 0.6049048386361194, + "learning_rate": 4.722757130476448e-06, + "loss": 0.3002, + "step": 7548 + }, + { + "epoch": 0.35, + "grad_norm": 0.8885709523964564, + "learning_rate": 4.722670319792838e-06, + "loss": 0.3203, + "step": 7549 + }, + { + "epoch": 0.35, + "grad_norm": 0.5694413572977337, + "learning_rate": 4.72258349631832e-06, + "loss": 0.2938, + "step": 7550 + }, + { + "epoch": 0.35, + "grad_norm": 0.6496669324544125, + "learning_rate": 4.722496660053392e-06, + "loss": 0.2715, + "step": 7551 + }, + { + "epoch": 0.35, + "grad_norm": 0.6858458472280337, + "learning_rate": 4.722409810998555e-06, + "loss": 0.2859, + "step": 7552 + }, + { + "epoch": 0.35, + "grad_norm": 0.6335148913465047, + "learning_rate": 4.722322949154308e-06, + "loss": 0.3249, + "step": 7553 + }, + { + "epoch": 0.35, + "grad_norm": 0.6514706677738663, + "learning_rate": 4.72223607452115e-06, + "loss": 0.3101, + "step": 7554 + }, + { + "epoch": 0.35, + "grad_norm": 0.5629434839175803, + "learning_rate": 4.722149187099583e-06, + "loss": 0.2629, + "step": 7555 + }, + { + "epoch": 0.35, + "grad_norm": 0.6721532873375645, + "learning_rate": 4.722062286890105e-06, + "loss": 0.2855, + "step": 7556 + }, + { + "epoch": 0.35, + "grad_norm": 0.664431673068973, + "learning_rate": 4.7219753738932185e-06, + "loss": 0.3086, + "step": 7557 + }, + { + "epoch": 0.35, + "grad_norm": 0.6610185976312628, + "learning_rate": 4.721888448109421e-06, + "loss": 0.2976, + "step": 7558 + }, + { + "epoch": 0.35, + "grad_norm": 0.5931165328255824, + "learning_rate": 4.721801509539214e-06, + "loss": 0.3044, + "step": 7559 + }, + { + "epoch": 0.35, + "grad_norm": 0.6372847722016072, + "learning_rate": 4.721714558183098e-06, + "loss": 0.2857, + "step": 7560 + }, + { + "epoch": 0.35, + "grad_norm": 0.6211667324317501, + "learning_rate": 4.721627594041574e-06, + "loss": 0.3068, + "step": 7561 + }, + { + "epoch": 0.35, + "grad_norm": 0.623989812970508, + "learning_rate": 4.7215406171151405e-06, + "loss": 0.3018, + "step": 7562 + }, + { + "epoch": 0.35, + "grad_norm": 0.5985927283057672, + "learning_rate": 4.721453627404299e-06, + "loss": 0.2923, + "step": 7563 + }, + { + "epoch": 0.35, + "grad_norm": 0.6676974193541655, + "learning_rate": 4.72136662490955e-06, + "loss": 0.3144, + "step": 7564 + }, + { + "epoch": 0.35, + "grad_norm": 0.6573285584240216, + "learning_rate": 4.721279609631395e-06, + "loss": 0.3074, + "step": 7565 + }, + { + "epoch": 0.35, + "grad_norm": 0.6353788774365766, + "learning_rate": 4.721192581570334e-06, + "loss": 0.3037, + "step": 7566 + }, + { + "epoch": 0.35, + "grad_norm": 0.6177850922692162, + "learning_rate": 4.721105540726868e-06, + "loss": 0.3025, + "step": 7567 + }, + { + "epoch": 0.35, + "grad_norm": 0.6521441260261635, + "learning_rate": 4.721018487101498e-06, + "loss": 0.3018, + "step": 7568 + }, + { + "epoch": 0.35, + "grad_norm": 0.6486926059054905, + "learning_rate": 4.7209314206947254e-06, + "loss": 0.2937, + "step": 7569 + }, + { + "epoch": 0.35, + "grad_norm": 0.6052050517850074, + "learning_rate": 4.7208443415070504e-06, + "loss": 0.2852, + "step": 7570 + }, + { + "epoch": 0.35, + "grad_norm": 0.6292136304949348, + "learning_rate": 4.720757249538974e-06, + "loss": 0.2594, + "step": 7571 + }, + { + "epoch": 0.35, + "grad_norm": 0.6245541372948232, + "learning_rate": 4.720670144790997e-06, + "loss": 0.299, + "step": 7572 + }, + { + "epoch": 0.35, + "grad_norm": 0.6040506979237273, + "learning_rate": 4.720583027263623e-06, + "loss": 0.2945, + "step": 7573 + }, + { + "epoch": 0.35, + "grad_norm": 0.5953025668194678, + "learning_rate": 4.720495896957351e-06, + "loss": 0.2968, + "step": 7574 + }, + { + "epoch": 0.35, + "grad_norm": 0.6812801411282351, + "learning_rate": 4.720408753872682e-06, + "loss": 0.2975, + "step": 7575 + }, + { + "epoch": 0.35, + "grad_norm": 0.6117373375411794, + "learning_rate": 4.720321598010121e-06, + "loss": 0.3001, + "step": 7576 + }, + { + "epoch": 0.35, + "grad_norm": 0.6132609766553516, + "learning_rate": 4.7202344293701665e-06, + "loss": 0.2941, + "step": 7577 + }, + { + "epoch": 0.35, + "grad_norm": 0.6333390407828674, + "learning_rate": 4.72014724795332e-06, + "loss": 0.2858, + "step": 7578 + }, + { + "epoch": 0.36, + "grad_norm": 0.6217367131036128, + "learning_rate": 4.720060053760086e-06, + "loss": 0.3026, + "step": 7579 + }, + { + "epoch": 0.36, + "grad_norm": 0.6450579287941302, + "learning_rate": 4.719972846790962e-06, + "loss": 0.2911, + "step": 7580 + }, + { + "epoch": 0.36, + "grad_norm": 0.6261720610309989, + "learning_rate": 4.719885627046455e-06, + "loss": 0.293, + "step": 7581 + }, + { + "epoch": 0.36, + "grad_norm": 0.6193894004175268, + "learning_rate": 4.719798394527062e-06, + "loss": 0.2983, + "step": 7582 + }, + { + "epoch": 0.36, + "grad_norm": 0.6090331890654185, + "learning_rate": 4.719711149233287e-06, + "loss": 0.3112, + "step": 7583 + }, + { + "epoch": 0.36, + "grad_norm": 0.5575110474651018, + "learning_rate": 4.719623891165633e-06, + "loss": 0.2718, + "step": 7584 + }, + { + "epoch": 0.36, + "grad_norm": 0.627813205988202, + "learning_rate": 4.719536620324601e-06, + "loss": 0.2918, + "step": 7585 + }, + { + "epoch": 0.36, + "grad_norm": 0.6382041545059722, + "learning_rate": 4.719449336710695e-06, + "loss": 0.3016, + "step": 7586 + }, + { + "epoch": 0.36, + "grad_norm": 0.6405954884363028, + "learning_rate": 4.719362040324414e-06, + "loss": 0.2906, + "step": 7587 + }, + { + "epoch": 0.36, + "grad_norm": 0.6263698728309975, + "learning_rate": 4.719274731166263e-06, + "loss": 0.2908, + "step": 7588 + }, + { + "epoch": 0.36, + "grad_norm": 0.5939546157225213, + "learning_rate": 4.719187409236745e-06, + "loss": 0.2936, + "step": 7589 + }, + { + "epoch": 0.36, + "grad_norm": 0.6129855089106118, + "learning_rate": 4.719100074536359e-06, + "loss": 0.279, + "step": 7590 + }, + { + "epoch": 0.36, + "grad_norm": 0.6370631110338236, + "learning_rate": 4.719012727065611e-06, + "loss": 0.3125, + "step": 7591 + }, + { + "epoch": 0.36, + "grad_norm": 0.6481796761424748, + "learning_rate": 4.718925366825003e-06, + "loss": 0.2955, + "step": 7592 + }, + { + "epoch": 0.36, + "grad_norm": 0.5580259164975809, + "learning_rate": 4.718837993815036e-06, + "loss": 0.2684, + "step": 7593 + }, + { + "epoch": 0.36, + "grad_norm": 0.6550065310348321, + "learning_rate": 4.718750608036216e-06, + "loss": 0.3086, + "step": 7594 + }, + { + "epoch": 0.36, + "grad_norm": 0.6541172653735561, + "learning_rate": 4.718663209489041e-06, + "loss": 0.2977, + "step": 7595 + }, + { + "epoch": 0.36, + "grad_norm": 0.6233136930884109, + "learning_rate": 4.718575798174018e-06, + "loss": 0.3003, + "step": 7596 + }, + { + "epoch": 0.36, + "grad_norm": 0.6546440801208433, + "learning_rate": 4.71848837409165e-06, + "loss": 0.3051, + "step": 7597 + }, + { + "epoch": 0.36, + "grad_norm": 0.6333495287497994, + "learning_rate": 4.7184009372424385e-06, + "loss": 0.3041, + "step": 7598 + }, + { + "epoch": 0.36, + "grad_norm": 0.6756722220039494, + "learning_rate": 4.718313487626888e-06, + "loss": 0.3155, + "step": 7599 + }, + { + "epoch": 0.36, + "grad_norm": 0.562736751957822, + "learning_rate": 4.7182260252455e-06, + "loss": 0.2634, + "step": 7600 + }, + { + "epoch": 0.36, + "grad_norm": 0.6069866620730213, + "learning_rate": 4.7181385500987785e-06, + "loss": 0.3051, + "step": 7601 + }, + { + "epoch": 0.36, + "grad_norm": 0.63026989584359, + "learning_rate": 4.718051062187227e-06, + "loss": 0.2791, + "step": 7602 + }, + { + "epoch": 0.36, + "grad_norm": 0.6338602853463451, + "learning_rate": 4.717963561511349e-06, + "loss": 0.2846, + "step": 7603 + }, + { + "epoch": 0.36, + "grad_norm": 0.5927971100011602, + "learning_rate": 4.717876048071649e-06, + "loss": 0.2716, + "step": 7604 + }, + { + "epoch": 0.36, + "grad_norm": 0.6539302477681378, + "learning_rate": 4.717788521868629e-06, + "loss": 0.298, + "step": 7605 + }, + { + "epoch": 0.36, + "grad_norm": 0.6175938964266003, + "learning_rate": 4.717700982902794e-06, + "loss": 0.2825, + "step": 7606 + }, + { + "epoch": 0.36, + "grad_norm": 0.6163988455976963, + "learning_rate": 4.717613431174648e-06, + "loss": 0.298, + "step": 7607 + }, + { + "epoch": 0.36, + "grad_norm": 0.6430943713709129, + "learning_rate": 4.717525866684692e-06, + "loss": 0.3044, + "step": 7608 + }, + { + "epoch": 0.36, + "grad_norm": 0.6392720016316104, + "learning_rate": 4.717438289433434e-06, + "loss": 0.2847, + "step": 7609 + }, + { + "epoch": 0.36, + "grad_norm": 0.6763512683761648, + "learning_rate": 4.717350699421375e-06, + "loss": 0.3013, + "step": 7610 + }, + { + "epoch": 0.36, + "grad_norm": 0.6170105389833304, + "learning_rate": 4.71726309664902e-06, + "loss": 0.2971, + "step": 7611 + }, + { + "epoch": 0.36, + "grad_norm": 0.6074988372628519, + "learning_rate": 4.717175481116873e-06, + "loss": 0.2789, + "step": 7612 + }, + { + "epoch": 0.36, + "grad_norm": 0.6083990892671407, + "learning_rate": 4.717087852825439e-06, + "loss": 0.2899, + "step": 7613 + }, + { + "epoch": 0.36, + "grad_norm": 0.6295528540628662, + "learning_rate": 4.717000211775221e-06, + "loss": 0.2982, + "step": 7614 + }, + { + "epoch": 0.36, + "grad_norm": 0.6378900737007075, + "learning_rate": 4.716912557966725e-06, + "loss": 0.3166, + "step": 7615 + }, + { + "epoch": 0.36, + "grad_norm": 0.6500678304505181, + "learning_rate": 4.7168248914004535e-06, + "loss": 0.2918, + "step": 7616 + }, + { + "epoch": 0.36, + "grad_norm": 0.638334313612095, + "learning_rate": 4.716737212076913e-06, + "loss": 0.3183, + "step": 7617 + }, + { + "epoch": 0.36, + "grad_norm": 0.6336672539158332, + "learning_rate": 4.716649519996606e-06, + "loss": 0.2889, + "step": 7618 + }, + { + "epoch": 0.36, + "grad_norm": 0.5860119282809304, + "learning_rate": 4.716561815160038e-06, + "loss": 0.2863, + "step": 7619 + }, + { + "epoch": 0.36, + "grad_norm": 0.5888617480754655, + "learning_rate": 4.7164740975677145e-06, + "loss": 0.281, + "step": 7620 + }, + { + "epoch": 0.36, + "grad_norm": 0.5837013469891027, + "learning_rate": 4.71638636722014e-06, + "loss": 0.2868, + "step": 7621 + }, + { + "epoch": 0.36, + "grad_norm": 0.5770954158603129, + "learning_rate": 4.716298624117818e-06, + "loss": 0.2731, + "step": 7622 + }, + { + "epoch": 0.36, + "grad_norm": 0.5728094754425737, + "learning_rate": 4.716210868261255e-06, + "loss": 0.2505, + "step": 7623 + }, + { + "epoch": 0.36, + "grad_norm": 0.559550718578162, + "learning_rate": 4.7161230996509555e-06, + "loss": 0.2873, + "step": 7624 + }, + { + "epoch": 0.36, + "grad_norm": 0.6259382222858794, + "learning_rate": 4.716035318287424e-06, + "loss": 0.299, + "step": 7625 + }, + { + "epoch": 0.36, + "grad_norm": 0.5836627972351173, + "learning_rate": 4.715947524171167e-06, + "loss": 0.3106, + "step": 7626 + }, + { + "epoch": 0.36, + "grad_norm": 0.615038968841859, + "learning_rate": 4.715859717302688e-06, + "loss": 0.305, + "step": 7627 + }, + { + "epoch": 0.36, + "grad_norm": 0.5658285970306189, + "learning_rate": 4.715771897682495e-06, + "loss": 0.2764, + "step": 7628 + }, + { + "epoch": 0.36, + "grad_norm": 0.594814688661201, + "learning_rate": 4.71568406531109e-06, + "loss": 0.2615, + "step": 7629 + }, + { + "epoch": 0.36, + "grad_norm": 0.6402957635480249, + "learning_rate": 4.715596220188981e-06, + "loss": 0.304, + "step": 7630 + }, + { + "epoch": 0.36, + "grad_norm": 0.6343098622346086, + "learning_rate": 4.715508362316672e-06, + "loss": 0.2986, + "step": 7631 + }, + { + "epoch": 0.36, + "grad_norm": 0.6108475217412314, + "learning_rate": 4.71542049169467e-06, + "loss": 0.3089, + "step": 7632 + }, + { + "epoch": 0.36, + "grad_norm": 0.5814589048434232, + "learning_rate": 4.7153326083234794e-06, + "loss": 0.2868, + "step": 7633 + }, + { + "epoch": 0.36, + "grad_norm": 0.5844255532930799, + "learning_rate": 4.715244712203606e-06, + "loss": 0.2768, + "step": 7634 + }, + { + "epoch": 0.36, + "grad_norm": 0.6425127362299846, + "learning_rate": 4.715156803335557e-06, + "loss": 0.3026, + "step": 7635 + }, + { + "epoch": 0.36, + "grad_norm": 0.6411820506041295, + "learning_rate": 4.715068881719837e-06, + "loss": 0.2809, + "step": 7636 + }, + { + "epoch": 0.36, + "grad_norm": 0.6023047935386585, + "learning_rate": 4.714980947356952e-06, + "loss": 0.3037, + "step": 7637 + }, + { + "epoch": 0.36, + "grad_norm": 0.6279026202215866, + "learning_rate": 4.714893000247408e-06, + "loss": 0.3031, + "step": 7638 + }, + { + "epoch": 0.36, + "grad_norm": 0.744299684510718, + "learning_rate": 4.714805040391712e-06, + "loss": 0.2992, + "step": 7639 + }, + { + "epoch": 0.36, + "grad_norm": 0.6139101689333187, + "learning_rate": 4.71471706779037e-06, + "loss": 0.2796, + "step": 7640 + }, + { + "epoch": 0.36, + "grad_norm": 0.6619204139039121, + "learning_rate": 4.714629082443888e-06, + "loss": 0.3091, + "step": 7641 + }, + { + "epoch": 0.36, + "grad_norm": 0.5809399156462101, + "learning_rate": 4.714541084352771e-06, + "loss": 0.2966, + "step": 7642 + }, + { + "epoch": 0.36, + "grad_norm": 0.6171099722550906, + "learning_rate": 4.714453073517528e-06, + "loss": 0.2861, + "step": 7643 + }, + { + "epoch": 0.36, + "grad_norm": 0.6590209120920258, + "learning_rate": 4.714365049938664e-06, + "loss": 0.3024, + "step": 7644 + }, + { + "epoch": 0.36, + "grad_norm": 0.5773738703520496, + "learning_rate": 4.714277013616685e-06, + "loss": 0.2847, + "step": 7645 + }, + { + "epoch": 0.36, + "grad_norm": 0.5814154297394668, + "learning_rate": 4.7141889645520985e-06, + "loss": 0.2884, + "step": 7646 + }, + { + "epoch": 0.36, + "grad_norm": 0.6343875988508536, + "learning_rate": 4.714100902745411e-06, + "loss": 0.3036, + "step": 7647 + }, + { + "epoch": 0.36, + "grad_norm": 0.6473872494436865, + "learning_rate": 4.71401282819713e-06, + "loss": 0.3024, + "step": 7648 + }, + { + "epoch": 0.36, + "grad_norm": 0.6039895368967144, + "learning_rate": 4.713924740907761e-06, + "loss": 0.2807, + "step": 7649 + }, + { + "epoch": 0.36, + "grad_norm": 0.6454968290831432, + "learning_rate": 4.713836640877811e-06, + "loss": 0.3043, + "step": 7650 + }, + { + "epoch": 0.36, + "grad_norm": 0.6736199139683001, + "learning_rate": 4.7137485281077885e-06, + "loss": 0.3159, + "step": 7651 + }, + { + "epoch": 0.36, + "grad_norm": 0.6607998353405472, + "learning_rate": 4.7136604025982e-06, + "loss": 0.3016, + "step": 7652 + }, + { + "epoch": 0.36, + "grad_norm": 0.5819396157806805, + "learning_rate": 4.713572264349552e-06, + "loss": 0.2864, + "step": 7653 + }, + { + "epoch": 0.36, + "grad_norm": 0.6783293845854382, + "learning_rate": 4.713484113362351e-06, + "loss": 0.3027, + "step": 7654 + }, + { + "epoch": 0.36, + "grad_norm": 0.6533068063469948, + "learning_rate": 4.713395949637106e-06, + "loss": 0.3014, + "step": 7655 + }, + { + "epoch": 0.36, + "grad_norm": 0.6264152180435957, + "learning_rate": 4.713307773174324e-06, + "loss": 0.2912, + "step": 7656 + }, + { + "epoch": 0.36, + "grad_norm": 0.6254233442311393, + "learning_rate": 4.713219583974511e-06, + "loss": 0.2837, + "step": 7657 + }, + { + "epoch": 0.36, + "grad_norm": 0.588277012646326, + "learning_rate": 4.713131382038176e-06, + "loss": 0.2948, + "step": 7658 + }, + { + "epoch": 0.36, + "grad_norm": 0.6273749529543962, + "learning_rate": 4.713043167365827e-06, + "loss": 0.3093, + "step": 7659 + }, + { + "epoch": 0.36, + "grad_norm": 0.5761078595557682, + "learning_rate": 4.71295493995797e-06, + "loss": 0.3059, + "step": 7660 + }, + { + "epoch": 0.36, + "grad_norm": 0.5925716654096674, + "learning_rate": 4.712866699815113e-06, + "loss": 0.2976, + "step": 7661 + }, + { + "epoch": 0.36, + "grad_norm": 0.6089878600326886, + "learning_rate": 4.712778446937765e-06, + "loss": 0.2815, + "step": 7662 + }, + { + "epoch": 0.36, + "grad_norm": 0.6249525130201824, + "learning_rate": 4.7126901813264334e-06, + "loss": 0.283, + "step": 7663 + }, + { + "epoch": 0.36, + "grad_norm": 0.5970128875264514, + "learning_rate": 4.712601902981626e-06, + "loss": 0.2894, + "step": 7664 + }, + { + "epoch": 0.36, + "grad_norm": 0.6249013385193074, + "learning_rate": 4.712513611903851e-06, + "loss": 0.2836, + "step": 7665 + }, + { + "epoch": 0.36, + "grad_norm": 0.5797493655091198, + "learning_rate": 4.712425308093615e-06, + "loss": 0.2887, + "step": 7666 + }, + { + "epoch": 0.36, + "grad_norm": 0.5957286052172068, + "learning_rate": 4.712336991551428e-06, + "loss": 0.2906, + "step": 7667 + }, + { + "epoch": 0.36, + "grad_norm": 0.6186746630699107, + "learning_rate": 4.712248662277798e-06, + "loss": 0.2903, + "step": 7668 + }, + { + "epoch": 0.36, + "grad_norm": 0.62479550841855, + "learning_rate": 4.712160320273232e-06, + "loss": 0.2934, + "step": 7669 + }, + { + "epoch": 0.36, + "grad_norm": 0.6345634959849168, + "learning_rate": 4.712071965538241e-06, + "loss": 0.2933, + "step": 7670 + }, + { + "epoch": 0.36, + "grad_norm": 0.6073926158003639, + "learning_rate": 4.711983598073331e-06, + "loss": 0.2725, + "step": 7671 + }, + { + "epoch": 0.36, + "grad_norm": 0.6142752796345038, + "learning_rate": 4.7118952178790115e-06, + "loss": 0.294, + "step": 7672 + }, + { + "epoch": 0.36, + "grad_norm": 0.6267566776054355, + "learning_rate": 4.71180682495579e-06, + "loss": 0.2805, + "step": 7673 + }, + { + "epoch": 0.36, + "grad_norm": 0.6066844694705806, + "learning_rate": 4.711718419304177e-06, + "loss": 0.292, + "step": 7674 + }, + { + "epoch": 0.36, + "grad_norm": 0.635886831730509, + "learning_rate": 4.711630000924681e-06, + "loss": 0.2894, + "step": 7675 + }, + { + "epoch": 0.36, + "grad_norm": 0.6038439854257852, + "learning_rate": 4.7115415698178095e-06, + "loss": 0.2811, + "step": 7676 + }, + { + "epoch": 0.36, + "grad_norm": 0.5860829999355569, + "learning_rate": 4.7114531259840725e-06, + "loss": 0.2758, + "step": 7677 + }, + { + "epoch": 0.36, + "grad_norm": 0.6165728342862139, + "learning_rate": 4.711364669423978e-06, + "loss": 0.2973, + "step": 7678 + }, + { + "epoch": 0.36, + "grad_norm": 0.6153905609034794, + "learning_rate": 4.711276200138035e-06, + "loss": 0.3103, + "step": 7679 + }, + { + "epoch": 0.36, + "grad_norm": 0.65877652975603, + "learning_rate": 4.711187718126755e-06, + "loss": 0.2905, + "step": 7680 + }, + { + "epoch": 0.36, + "grad_norm": 0.5890482803176528, + "learning_rate": 4.711099223390644e-06, + "loss": 0.29, + "step": 7681 + }, + { + "epoch": 0.36, + "grad_norm": 0.6063779782482279, + "learning_rate": 4.711010715930214e-06, + "loss": 0.3124, + "step": 7682 + }, + { + "epoch": 0.36, + "grad_norm": 0.6787702059358063, + "learning_rate": 4.710922195745972e-06, + "loss": 0.3082, + "step": 7683 + }, + { + "epoch": 0.36, + "grad_norm": 0.5955479270783727, + "learning_rate": 4.710833662838429e-06, + "loss": 0.2917, + "step": 7684 + }, + { + "epoch": 0.36, + "grad_norm": 0.6324742579767287, + "learning_rate": 4.710745117208093e-06, + "loss": 0.2941, + "step": 7685 + }, + { + "epoch": 0.36, + "grad_norm": 0.5756042226453724, + "learning_rate": 4.710656558855475e-06, + "loss": 0.2769, + "step": 7686 + }, + { + "epoch": 0.36, + "grad_norm": 0.6373858315091397, + "learning_rate": 4.710567987781085e-06, + "loss": 0.3169, + "step": 7687 + }, + { + "epoch": 0.36, + "grad_norm": 0.6011754822021567, + "learning_rate": 4.7104794039854305e-06, + "loss": 0.2927, + "step": 7688 + }, + { + "epoch": 0.36, + "grad_norm": 0.6584700579970695, + "learning_rate": 4.710390807469024e-06, + "loss": 0.2921, + "step": 7689 + }, + { + "epoch": 0.36, + "grad_norm": 0.5847739678065091, + "learning_rate": 4.7103021982323735e-06, + "loss": 0.2954, + "step": 7690 + }, + { + "epoch": 0.36, + "grad_norm": 0.5826365536243069, + "learning_rate": 4.710213576275989e-06, + "loss": 0.2748, + "step": 7691 + }, + { + "epoch": 0.36, + "grad_norm": 0.6404922326446054, + "learning_rate": 4.710124941600381e-06, + "loss": 0.2985, + "step": 7692 + }, + { + "epoch": 0.36, + "grad_norm": 0.5643039937920155, + "learning_rate": 4.71003629420606e-06, + "loss": 0.3018, + "step": 7693 + }, + { + "epoch": 0.36, + "grad_norm": 0.602117149415449, + "learning_rate": 4.709947634093535e-06, + "loss": 0.3001, + "step": 7694 + }, + { + "epoch": 0.36, + "grad_norm": 0.603357743633819, + "learning_rate": 4.709858961263316e-06, + "loss": 0.2999, + "step": 7695 + }, + { + "epoch": 0.36, + "grad_norm": 0.7314053553433765, + "learning_rate": 4.709770275715916e-06, + "loss": 0.3043, + "step": 7696 + }, + { + "epoch": 0.36, + "grad_norm": 0.5945130836952959, + "learning_rate": 4.709681577451842e-06, + "loss": 0.2949, + "step": 7697 + }, + { + "epoch": 0.36, + "grad_norm": 0.6885876535883964, + "learning_rate": 4.709592866471606e-06, + "loss": 0.3221, + "step": 7698 + }, + { + "epoch": 0.36, + "grad_norm": 0.565981104663937, + "learning_rate": 4.709504142775719e-06, + "loss": 0.2797, + "step": 7699 + }, + { + "epoch": 0.36, + "grad_norm": 0.571623142846503, + "learning_rate": 4.70941540636469e-06, + "loss": 0.2743, + "step": 7700 + }, + { + "epoch": 0.36, + "grad_norm": 0.5999409223553965, + "learning_rate": 4.709326657239032e-06, + "loss": 0.2939, + "step": 7701 + }, + { + "epoch": 0.36, + "grad_norm": 0.6131370400149947, + "learning_rate": 4.709237895399254e-06, + "loss": 0.2893, + "step": 7702 + }, + { + "epoch": 0.36, + "grad_norm": 0.5899259678918449, + "learning_rate": 4.709149120845867e-06, + "loss": 0.2895, + "step": 7703 + }, + { + "epoch": 0.36, + "grad_norm": 0.6337650592457487, + "learning_rate": 4.709060333579382e-06, + "loss": 0.3013, + "step": 7704 + }, + { + "epoch": 0.36, + "grad_norm": 0.6027923912659624, + "learning_rate": 4.70897153360031e-06, + "loss": 0.277, + "step": 7705 + }, + { + "epoch": 0.36, + "grad_norm": 0.5841463178086898, + "learning_rate": 4.708882720909163e-06, + "loss": 0.284, + "step": 7706 + }, + { + "epoch": 0.36, + "grad_norm": 0.6106127702576534, + "learning_rate": 4.70879389550645e-06, + "loss": 0.3038, + "step": 7707 + }, + { + "epoch": 0.36, + "grad_norm": 0.6146607015154779, + "learning_rate": 4.708705057392683e-06, + "loss": 0.2781, + "step": 7708 + }, + { + "epoch": 0.36, + "grad_norm": 0.5662140695845386, + "learning_rate": 4.708616206568374e-06, + "loss": 0.279, + "step": 7709 + }, + { + "epoch": 0.36, + "grad_norm": 0.6046564823618386, + "learning_rate": 4.708527343034034e-06, + "loss": 0.2809, + "step": 7710 + }, + { + "epoch": 0.36, + "grad_norm": 0.6251207749968273, + "learning_rate": 4.708438466790174e-06, + "loss": 0.3014, + "step": 7711 + }, + { + "epoch": 0.36, + "grad_norm": 0.5939254949860868, + "learning_rate": 4.708349577837306e-06, + "loss": 0.2898, + "step": 7712 + }, + { + "epoch": 0.36, + "grad_norm": 0.552071578596804, + "learning_rate": 4.708260676175941e-06, + "loss": 0.2764, + "step": 7713 + }, + { + "epoch": 0.36, + "grad_norm": 0.6181599890695686, + "learning_rate": 4.708171761806591e-06, + "loss": 0.2958, + "step": 7714 + }, + { + "epoch": 0.36, + "grad_norm": 0.6299156618375744, + "learning_rate": 4.708082834729767e-06, + "loss": 0.3293, + "step": 7715 + }, + { + "epoch": 0.36, + "grad_norm": 0.5766372782661704, + "learning_rate": 4.707993894945982e-06, + "loss": 0.286, + "step": 7716 + }, + { + "epoch": 0.36, + "grad_norm": 0.5797713057451661, + "learning_rate": 4.707904942455747e-06, + "loss": 0.2911, + "step": 7717 + }, + { + "epoch": 0.36, + "grad_norm": 0.6039346489283011, + "learning_rate": 4.707815977259573e-06, + "loss": 0.2972, + "step": 7718 + }, + { + "epoch": 0.36, + "grad_norm": 0.5756458330613189, + "learning_rate": 4.707726999357975e-06, + "loss": 0.2899, + "step": 7719 + }, + { + "epoch": 0.36, + "grad_norm": 0.6150503417856578, + "learning_rate": 4.707638008751461e-06, + "loss": 0.2898, + "step": 7720 + }, + { + "epoch": 0.36, + "grad_norm": 0.6505912914717936, + "learning_rate": 4.707549005440546e-06, + "loss": 0.3234, + "step": 7721 + }, + { + "epoch": 0.36, + "grad_norm": 0.6248302419807579, + "learning_rate": 4.7074599894257415e-06, + "loss": 0.297, + "step": 7722 + }, + { + "epoch": 0.36, + "grad_norm": 0.6542749459832027, + "learning_rate": 4.707370960707559e-06, + "loss": 0.2851, + "step": 7723 + }, + { + "epoch": 0.36, + "grad_norm": 0.5662868036543015, + "learning_rate": 4.707281919286511e-06, + "loss": 0.2824, + "step": 7724 + }, + { + "epoch": 0.36, + "grad_norm": 0.6184303941044846, + "learning_rate": 4.707192865163112e-06, + "loss": 0.2809, + "step": 7725 + }, + { + "epoch": 0.36, + "grad_norm": 0.6634289347533826, + "learning_rate": 4.707103798337871e-06, + "loss": 0.3096, + "step": 7726 + }, + { + "epoch": 0.36, + "grad_norm": 0.6022627118259771, + "learning_rate": 4.707014718811304e-06, + "loss": 0.3015, + "step": 7727 + }, + { + "epoch": 0.36, + "grad_norm": 0.7717181381129723, + "learning_rate": 4.70692562658392e-06, + "loss": 0.3247, + "step": 7728 + }, + { + "epoch": 0.36, + "grad_norm": 0.6169275065536336, + "learning_rate": 4.706836521656236e-06, + "loss": 0.2918, + "step": 7729 + }, + { + "epoch": 0.36, + "grad_norm": 0.6858094305834624, + "learning_rate": 4.706747404028761e-06, + "loss": 0.2893, + "step": 7730 + }, + { + "epoch": 0.36, + "grad_norm": 0.5978676002836691, + "learning_rate": 4.70665827370201e-06, + "loss": 0.2836, + "step": 7731 + }, + { + "epoch": 0.36, + "grad_norm": 0.5702610878558729, + "learning_rate": 4.706569130676495e-06, + "loss": 0.2598, + "step": 7732 + }, + { + "epoch": 0.36, + "grad_norm": 0.6220604434922183, + "learning_rate": 4.706479974952729e-06, + "loss": 0.2993, + "step": 7733 + }, + { + "epoch": 0.36, + "grad_norm": 0.587836808188556, + "learning_rate": 4.7063908065312255e-06, + "loss": 0.2952, + "step": 7734 + }, + { + "epoch": 0.36, + "grad_norm": 0.5853351583038789, + "learning_rate": 4.706301625412498e-06, + "loss": 0.3004, + "step": 7735 + }, + { + "epoch": 0.36, + "grad_norm": 0.6168432088294373, + "learning_rate": 4.706212431597058e-06, + "loss": 0.3191, + "step": 7736 + }, + { + "epoch": 0.36, + "grad_norm": 0.6206391034139919, + "learning_rate": 4.706123225085421e-06, + "loss": 0.3019, + "step": 7737 + }, + { + "epoch": 0.36, + "grad_norm": 0.5956190774826864, + "learning_rate": 4.706034005878099e-06, + "loss": 0.2912, + "step": 7738 + }, + { + "epoch": 0.36, + "grad_norm": 0.5517584672555077, + "learning_rate": 4.7059447739756056e-06, + "loss": 0.2751, + "step": 7739 + }, + { + "epoch": 0.36, + "grad_norm": 0.5863880998155926, + "learning_rate": 4.705855529378454e-06, + "loss": 0.279, + "step": 7740 + }, + { + "epoch": 0.36, + "grad_norm": 0.655563998298645, + "learning_rate": 4.70576627208716e-06, + "loss": 0.3141, + "step": 7741 + }, + { + "epoch": 0.36, + "grad_norm": 0.6013319720966585, + "learning_rate": 4.705677002102234e-06, + "loss": 0.2937, + "step": 7742 + }, + { + "epoch": 0.36, + "grad_norm": 0.6075717664594433, + "learning_rate": 4.705587719424192e-06, + "loss": 0.2785, + "step": 7743 + }, + { + "epoch": 0.36, + "grad_norm": 0.6417583069137612, + "learning_rate": 4.705498424053546e-06, + "loss": 0.2979, + "step": 7744 + }, + { + "epoch": 0.36, + "grad_norm": 0.5919097519741037, + "learning_rate": 4.7054091159908126e-06, + "loss": 0.2848, + "step": 7745 + }, + { + "epoch": 0.36, + "grad_norm": 0.6137911436753837, + "learning_rate": 4.705319795236503e-06, + "loss": 0.2862, + "step": 7746 + }, + { + "epoch": 0.36, + "grad_norm": 0.5855078334881106, + "learning_rate": 4.705230461791132e-06, + "loss": 0.2828, + "step": 7747 + }, + { + "epoch": 0.36, + "grad_norm": 0.6639086847299912, + "learning_rate": 4.705141115655214e-06, + "loss": 0.2978, + "step": 7748 + }, + { + "epoch": 0.36, + "grad_norm": 0.639024370575996, + "learning_rate": 4.705051756829263e-06, + "loss": 0.3025, + "step": 7749 + }, + { + "epoch": 0.36, + "grad_norm": 0.5897655780011504, + "learning_rate": 4.704962385313794e-06, + "loss": 0.2828, + "step": 7750 + }, + { + "epoch": 0.36, + "grad_norm": 0.6630604030961043, + "learning_rate": 4.70487300110932e-06, + "loss": 0.3176, + "step": 7751 + }, + { + "epoch": 0.36, + "grad_norm": 0.6712253614771628, + "learning_rate": 4.7047836042163564e-06, + "loss": 0.288, + "step": 7752 + }, + { + "epoch": 0.36, + "grad_norm": 0.5804144502492644, + "learning_rate": 4.704694194635418e-06, + "loss": 0.2655, + "step": 7753 + }, + { + "epoch": 0.36, + "grad_norm": 0.5888726888420472, + "learning_rate": 4.7046047723670174e-06, + "loss": 0.2786, + "step": 7754 + }, + { + "epoch": 0.36, + "grad_norm": 0.653423040876757, + "learning_rate": 4.704515337411671e-06, + "loss": 0.2985, + "step": 7755 + }, + { + "epoch": 0.36, + "grad_norm": 0.6150888880892509, + "learning_rate": 4.704425889769893e-06, + "loss": 0.3139, + "step": 7756 + }, + { + "epoch": 0.36, + "grad_norm": 0.5683746583235633, + "learning_rate": 4.704336429442198e-06, + "loss": 0.2749, + "step": 7757 + }, + { + "epoch": 0.36, + "grad_norm": 0.6293840902190744, + "learning_rate": 4.704246956429101e-06, + "loss": 0.3103, + "step": 7758 + }, + { + "epoch": 0.36, + "grad_norm": 0.6550641829618967, + "learning_rate": 4.704157470731116e-06, + "loss": 0.2989, + "step": 7759 + }, + { + "epoch": 0.36, + "grad_norm": 0.6492266589482649, + "learning_rate": 4.704067972348761e-06, + "loss": 0.3117, + "step": 7760 + }, + { + "epoch": 0.36, + "grad_norm": 0.5646136214683609, + "learning_rate": 4.703978461282546e-06, + "loss": 0.2716, + "step": 7761 + }, + { + "epoch": 0.36, + "grad_norm": 0.5622641506902338, + "learning_rate": 4.70388893753299e-06, + "loss": 0.3037, + "step": 7762 + }, + { + "epoch": 0.36, + "grad_norm": 0.648517749337197, + "learning_rate": 4.703799401100608e-06, + "loss": 0.2751, + "step": 7763 + }, + { + "epoch": 0.36, + "grad_norm": 0.6268121855485749, + "learning_rate": 4.703709851985914e-06, + "loss": 0.298, + "step": 7764 + }, + { + "epoch": 0.36, + "grad_norm": 0.6321276126942336, + "learning_rate": 4.703620290189423e-06, + "loss": 0.2782, + "step": 7765 + }, + { + "epoch": 0.36, + "grad_norm": 0.6482348397238785, + "learning_rate": 4.7035307157116505e-06, + "loss": 0.3032, + "step": 7766 + }, + { + "epoch": 0.36, + "grad_norm": 0.6458809570108384, + "learning_rate": 4.703441128553113e-06, + "loss": 0.3088, + "step": 7767 + }, + { + "epoch": 0.36, + "grad_norm": 0.6281751295064122, + "learning_rate": 4.703351528714327e-06, + "loss": 0.288, + "step": 7768 + }, + { + "epoch": 0.36, + "grad_norm": 0.619410653952691, + "learning_rate": 4.703261916195805e-06, + "loss": 0.2893, + "step": 7769 + }, + { + "epoch": 0.36, + "grad_norm": 0.6359723446561245, + "learning_rate": 4.703172290998066e-06, + "loss": 0.3102, + "step": 7770 + }, + { + "epoch": 0.36, + "grad_norm": 0.6150625417693381, + "learning_rate": 4.703082653121623e-06, + "loss": 0.2925, + "step": 7771 + }, + { + "epoch": 0.36, + "grad_norm": 0.6011282775158721, + "learning_rate": 4.702993002566993e-06, + "loss": 0.2917, + "step": 7772 + }, + { + "epoch": 0.36, + "grad_norm": 0.609009448439883, + "learning_rate": 4.702903339334693e-06, + "loss": 0.2812, + "step": 7773 + }, + { + "epoch": 0.36, + "grad_norm": 0.5977876786420239, + "learning_rate": 4.702813663425238e-06, + "loss": 0.2929, + "step": 7774 + }, + { + "epoch": 0.36, + "grad_norm": 0.5453600622996616, + "learning_rate": 4.702723974839143e-06, + "loss": 0.2731, + "step": 7775 + }, + { + "epoch": 0.36, + "grad_norm": 0.6011333035435652, + "learning_rate": 4.702634273576925e-06, + "loss": 0.2743, + "step": 7776 + }, + { + "epoch": 0.36, + "grad_norm": 0.669601306524421, + "learning_rate": 4.7025445596391014e-06, + "loss": 0.2969, + "step": 7777 + }, + { + "epoch": 0.36, + "grad_norm": 0.7442781668459115, + "learning_rate": 4.702454833026186e-06, + "loss": 0.2731, + "step": 7778 + }, + { + "epoch": 0.36, + "grad_norm": 0.6524023057154816, + "learning_rate": 4.702365093738699e-06, + "loss": 0.2947, + "step": 7779 + }, + { + "epoch": 0.36, + "grad_norm": 0.6263024768008335, + "learning_rate": 4.702275341777153e-06, + "loss": 0.3062, + "step": 7780 + }, + { + "epoch": 0.36, + "grad_norm": 0.6258235656249616, + "learning_rate": 4.702185577142065e-06, + "loss": 0.2973, + "step": 7781 + }, + { + "epoch": 0.36, + "grad_norm": 0.647550196717022, + "learning_rate": 4.702095799833954e-06, + "loss": 0.2648, + "step": 7782 + }, + { + "epoch": 0.36, + "grad_norm": 0.6049143794875999, + "learning_rate": 4.702006009853335e-06, + "loss": 0.286, + "step": 7783 + }, + { + "epoch": 0.36, + "grad_norm": 0.5815601318743924, + "learning_rate": 4.701916207200724e-06, + "loss": 0.2822, + "step": 7784 + }, + { + "epoch": 0.36, + "grad_norm": 0.632674338831161, + "learning_rate": 4.7018263918766394e-06, + "loss": 0.3143, + "step": 7785 + }, + { + "epoch": 0.36, + "grad_norm": 0.6124112815378865, + "learning_rate": 4.701736563881597e-06, + "loss": 0.2998, + "step": 7786 + }, + { + "epoch": 0.36, + "grad_norm": 0.6010986786680103, + "learning_rate": 4.701646723216114e-06, + "loss": 0.2889, + "step": 7787 + }, + { + "epoch": 0.36, + "grad_norm": 0.594019071905626, + "learning_rate": 4.701556869880708e-06, + "loss": 0.2826, + "step": 7788 + }, + { + "epoch": 0.36, + "grad_norm": 0.6015369284988177, + "learning_rate": 4.701467003875894e-06, + "loss": 0.2877, + "step": 7789 + }, + { + "epoch": 0.36, + "grad_norm": 0.5840689679920329, + "learning_rate": 4.701377125202192e-06, + "loss": 0.2875, + "step": 7790 + }, + { + "epoch": 0.36, + "grad_norm": 0.6308108563187159, + "learning_rate": 4.701287233860118e-06, + "loss": 0.2953, + "step": 7791 + }, + { + "epoch": 0.37, + "grad_norm": 0.6273721691738067, + "learning_rate": 4.701197329850189e-06, + "loss": 0.2918, + "step": 7792 + }, + { + "epoch": 0.37, + "grad_norm": 0.6401380429237857, + "learning_rate": 4.701107413172923e-06, + "loss": 0.3004, + "step": 7793 + }, + { + "epoch": 0.37, + "grad_norm": 0.5860859314690561, + "learning_rate": 4.7010174838288365e-06, + "loss": 0.2937, + "step": 7794 + }, + { + "epoch": 0.37, + "grad_norm": 0.6608888331142511, + "learning_rate": 4.700927541818448e-06, + "loss": 0.2963, + "step": 7795 + }, + { + "epoch": 0.37, + "grad_norm": 0.6286239924951222, + "learning_rate": 4.7008375871422745e-06, + "loss": 0.2819, + "step": 7796 + }, + { + "epoch": 0.37, + "grad_norm": 0.6270010129386842, + "learning_rate": 4.700747619800834e-06, + "loss": 0.2999, + "step": 7797 + }, + { + "epoch": 0.37, + "grad_norm": 0.6363846647537886, + "learning_rate": 4.700657639794644e-06, + "loss": 0.3003, + "step": 7798 + }, + { + "epoch": 0.37, + "grad_norm": 0.573856885534457, + "learning_rate": 4.700567647124222e-06, + "loss": 0.2711, + "step": 7799 + }, + { + "epoch": 0.37, + "grad_norm": 0.6116044597397478, + "learning_rate": 4.700477641790087e-06, + "loss": 0.3041, + "step": 7800 + }, + { + "epoch": 0.37, + "grad_norm": 0.615355112765537, + "learning_rate": 4.7003876237927555e-06, + "loss": 0.294, + "step": 7801 + }, + { + "epoch": 0.37, + "grad_norm": 0.7004832878958295, + "learning_rate": 4.700297593132747e-06, + "loss": 0.3123, + "step": 7802 + }, + { + "epoch": 0.37, + "grad_norm": 0.5751271359628699, + "learning_rate": 4.700207549810578e-06, + "loss": 0.2801, + "step": 7803 + }, + { + "epoch": 0.37, + "grad_norm": 0.6398836996220582, + "learning_rate": 4.700117493826768e-06, + "loss": 0.2839, + "step": 7804 + }, + { + "epoch": 0.37, + "grad_norm": 0.6128082125465235, + "learning_rate": 4.700027425181835e-06, + "loss": 0.2729, + "step": 7805 + }, + { + "epoch": 0.37, + "grad_norm": 0.5859305612496025, + "learning_rate": 4.699937343876297e-06, + "loss": 0.2852, + "step": 7806 + }, + { + "epoch": 0.37, + "grad_norm": 0.6109377769560826, + "learning_rate": 4.699847249910672e-06, + "loss": 0.3155, + "step": 7807 + }, + { + "epoch": 0.37, + "grad_norm": 0.6208716571623698, + "learning_rate": 4.69975714328548e-06, + "loss": 0.2796, + "step": 7808 + }, + { + "epoch": 0.37, + "grad_norm": 0.6452845239425294, + "learning_rate": 4.699667024001237e-06, + "loss": 0.3152, + "step": 7809 + }, + { + "epoch": 0.37, + "grad_norm": 0.6849235602401481, + "learning_rate": 4.699576892058465e-06, + "loss": 0.2848, + "step": 7810 + }, + { + "epoch": 0.37, + "grad_norm": 0.6840591699565356, + "learning_rate": 4.69948674745768e-06, + "loss": 0.3076, + "step": 7811 + }, + { + "epoch": 0.37, + "grad_norm": 0.604180109905256, + "learning_rate": 4.699396590199402e-06, + "loss": 0.2841, + "step": 7812 + }, + { + "epoch": 0.37, + "grad_norm": 0.6538269350988732, + "learning_rate": 4.699306420284149e-06, + "loss": 0.2926, + "step": 7813 + }, + { + "epoch": 0.37, + "grad_norm": 0.6056035917099287, + "learning_rate": 4.69921623771244e-06, + "loss": 0.313, + "step": 7814 + }, + { + "epoch": 0.37, + "grad_norm": 0.6069961101513964, + "learning_rate": 4.699126042484794e-06, + "loss": 0.2848, + "step": 7815 + }, + { + "epoch": 0.37, + "grad_norm": 0.5927312139316416, + "learning_rate": 4.699035834601732e-06, + "loss": 0.2927, + "step": 7816 + }, + { + "epoch": 0.37, + "grad_norm": 0.605858841035002, + "learning_rate": 4.698945614063769e-06, + "loss": 0.2969, + "step": 7817 + }, + { + "epoch": 0.37, + "grad_norm": 0.6251246225679867, + "learning_rate": 4.698855380871429e-06, + "loss": 0.3043, + "step": 7818 + }, + { + "epoch": 0.37, + "grad_norm": 0.5786180199329934, + "learning_rate": 4.698765135025228e-06, + "loss": 0.2662, + "step": 7819 + }, + { + "epoch": 0.37, + "grad_norm": 0.6288919933935104, + "learning_rate": 4.698674876525686e-06, + "loss": 0.291, + "step": 7820 + }, + { + "epoch": 0.37, + "grad_norm": 0.6602428804209477, + "learning_rate": 4.698584605373323e-06, + "loss": 0.311, + "step": 7821 + }, + { + "epoch": 0.37, + "grad_norm": 0.6176657793446733, + "learning_rate": 4.698494321568658e-06, + "loss": 0.3119, + "step": 7822 + }, + { + "epoch": 0.37, + "grad_norm": 0.6386774253716098, + "learning_rate": 4.698404025112212e-06, + "loss": 0.303, + "step": 7823 + }, + { + "epoch": 0.37, + "grad_norm": 0.5909407270522986, + "learning_rate": 4.698313716004503e-06, + "loss": 0.2944, + "step": 7824 + }, + { + "epoch": 0.37, + "grad_norm": 0.6671346489315099, + "learning_rate": 4.69822339424605e-06, + "loss": 0.2838, + "step": 7825 + }, + { + "epoch": 0.37, + "grad_norm": 0.6316159026017192, + "learning_rate": 4.698133059837374e-06, + "loss": 0.2911, + "step": 7826 + }, + { + "epoch": 0.37, + "grad_norm": 0.634940163000047, + "learning_rate": 4.698042712778995e-06, + "loss": 0.297, + "step": 7827 + }, + { + "epoch": 0.37, + "grad_norm": 0.5511790679351893, + "learning_rate": 4.697952353071432e-06, + "loss": 0.2613, + "step": 7828 + }, + { + "epoch": 0.37, + "grad_norm": 0.6108276069447744, + "learning_rate": 4.697861980715207e-06, + "loss": 0.3032, + "step": 7829 + }, + { + "epoch": 0.37, + "grad_norm": 0.5935954171021093, + "learning_rate": 4.697771595710837e-06, + "loss": 0.2774, + "step": 7830 + }, + { + "epoch": 0.37, + "grad_norm": 0.625904762435695, + "learning_rate": 4.697681198058846e-06, + "loss": 0.2886, + "step": 7831 + }, + { + "epoch": 0.37, + "grad_norm": 0.6584199564940985, + "learning_rate": 4.697590787759751e-06, + "loss": 0.3272, + "step": 7832 + }, + { + "epoch": 0.37, + "grad_norm": 0.7156994537924352, + "learning_rate": 4.697500364814073e-06, + "loss": 0.3059, + "step": 7833 + }, + { + "epoch": 0.37, + "grad_norm": 0.5946332422736031, + "learning_rate": 4.697409929222333e-06, + "loss": 0.303, + "step": 7834 + }, + { + "epoch": 0.37, + "grad_norm": 0.6070995371831206, + "learning_rate": 4.6973194809850505e-06, + "loss": 0.2891, + "step": 7835 + }, + { + "epoch": 0.37, + "grad_norm": 0.6041899180838869, + "learning_rate": 4.697229020102748e-06, + "loss": 0.2772, + "step": 7836 + }, + { + "epoch": 0.37, + "grad_norm": 0.6430585549778006, + "learning_rate": 4.6971385465759445e-06, + "loss": 0.2926, + "step": 7837 + }, + { + "epoch": 0.37, + "grad_norm": 0.5729353137300652, + "learning_rate": 4.697048060405161e-06, + "loss": 0.2839, + "step": 7838 + }, + { + "epoch": 0.37, + "grad_norm": 0.5665440188474059, + "learning_rate": 4.696957561590917e-06, + "loss": 0.2865, + "step": 7839 + }, + { + "epoch": 0.37, + "grad_norm": 0.6260632210324839, + "learning_rate": 4.696867050133735e-06, + "loss": 0.2989, + "step": 7840 + }, + { + "epoch": 0.37, + "grad_norm": 0.6486620310600872, + "learning_rate": 4.696776526034135e-06, + "loss": 0.3106, + "step": 7841 + }, + { + "epoch": 0.37, + "grad_norm": 0.6124733737836113, + "learning_rate": 4.696685989292639e-06, + "loss": 0.2895, + "step": 7842 + }, + { + "epoch": 0.37, + "grad_norm": 0.6303911704298711, + "learning_rate": 4.696595439909767e-06, + "loss": 0.2969, + "step": 7843 + }, + { + "epoch": 0.37, + "grad_norm": 0.63630855738645, + "learning_rate": 4.69650487788604e-06, + "loss": 0.2942, + "step": 7844 + }, + { + "epoch": 0.37, + "grad_norm": 0.597990290629458, + "learning_rate": 4.69641430322198e-06, + "loss": 0.308, + "step": 7845 + }, + { + "epoch": 0.37, + "grad_norm": 0.6029450981527762, + "learning_rate": 4.696323715918107e-06, + "loss": 0.2964, + "step": 7846 + }, + { + "epoch": 0.37, + "grad_norm": 0.5838569997757805, + "learning_rate": 4.696233115974943e-06, + "loss": 0.2772, + "step": 7847 + }, + { + "epoch": 0.37, + "grad_norm": 0.6166855554473893, + "learning_rate": 4.69614250339301e-06, + "loss": 0.3058, + "step": 7848 + }, + { + "epoch": 0.37, + "grad_norm": 0.6006553089002855, + "learning_rate": 4.696051878172829e-06, + "loss": 0.3074, + "step": 7849 + }, + { + "epoch": 0.37, + "grad_norm": 0.6053268867965773, + "learning_rate": 4.695961240314921e-06, + "loss": 0.2972, + "step": 7850 + }, + { + "epoch": 0.37, + "grad_norm": 0.5817281498761105, + "learning_rate": 4.695870589819808e-06, + "loss": 0.2733, + "step": 7851 + }, + { + "epoch": 0.37, + "grad_norm": 0.6059821443606532, + "learning_rate": 4.695779926688012e-06, + "loss": 0.3053, + "step": 7852 + }, + { + "epoch": 0.37, + "grad_norm": 0.6314729864315841, + "learning_rate": 4.695689250920054e-06, + "loss": 0.292, + "step": 7853 + }, + { + "epoch": 0.37, + "grad_norm": 0.5913746801140407, + "learning_rate": 4.695598562516457e-06, + "loss": 0.298, + "step": 7854 + }, + { + "epoch": 0.37, + "grad_norm": 0.6985200779256864, + "learning_rate": 4.695507861477741e-06, + "loss": 0.3059, + "step": 7855 + }, + { + "epoch": 0.37, + "grad_norm": 0.5740995777787801, + "learning_rate": 4.695417147804429e-06, + "loss": 0.2913, + "step": 7856 + }, + { + "epoch": 0.37, + "grad_norm": 0.5794710840209394, + "learning_rate": 4.695326421497044e-06, + "loss": 0.287, + "step": 7857 + }, + { + "epoch": 0.37, + "grad_norm": 0.6257917764815948, + "learning_rate": 4.695235682556108e-06, + "loss": 0.303, + "step": 7858 + }, + { + "epoch": 0.37, + "grad_norm": 0.6492307099245958, + "learning_rate": 4.695144930982141e-06, + "loss": 0.3106, + "step": 7859 + }, + { + "epoch": 0.37, + "grad_norm": 0.6791783416004068, + "learning_rate": 4.695054166775666e-06, + "loss": 0.3071, + "step": 7860 + }, + { + "epoch": 0.37, + "grad_norm": 0.5857193987293203, + "learning_rate": 4.694963389937208e-06, + "loss": 0.3091, + "step": 7861 + }, + { + "epoch": 0.37, + "grad_norm": 0.6081483002774918, + "learning_rate": 4.694872600467286e-06, + "loss": 0.2905, + "step": 7862 + }, + { + "epoch": 0.37, + "grad_norm": 0.5734491832189816, + "learning_rate": 4.6947817983664245e-06, + "loss": 0.2627, + "step": 7863 + }, + { + "epoch": 0.37, + "grad_norm": 0.6437225760975692, + "learning_rate": 4.694690983635145e-06, + "loss": 0.3086, + "step": 7864 + }, + { + "epoch": 0.37, + "grad_norm": 0.661101810216683, + "learning_rate": 4.69460015627397e-06, + "loss": 0.2826, + "step": 7865 + }, + { + "epoch": 0.37, + "grad_norm": 0.6944968622468727, + "learning_rate": 4.694509316283423e-06, + "loss": 0.2752, + "step": 7866 + }, + { + "epoch": 0.37, + "grad_norm": 0.6363918001402822, + "learning_rate": 4.694418463664027e-06, + "loss": 0.308, + "step": 7867 + }, + { + "epoch": 0.37, + "grad_norm": 0.5719947298191866, + "learning_rate": 4.694327598416304e-06, + "loss": 0.2961, + "step": 7868 + }, + { + "epoch": 0.37, + "grad_norm": 0.6231911986842524, + "learning_rate": 4.694236720540777e-06, + "loss": 0.3023, + "step": 7869 + }, + { + "epoch": 0.37, + "grad_norm": 0.6749938793618656, + "learning_rate": 4.694145830037969e-06, + "loss": 0.3072, + "step": 7870 + }, + { + "epoch": 0.37, + "grad_norm": 0.6799043999783516, + "learning_rate": 4.6940549269084046e-06, + "loss": 0.3062, + "step": 7871 + }, + { + "epoch": 0.37, + "grad_norm": 0.5935561317097681, + "learning_rate": 4.693964011152604e-06, + "loss": 0.2854, + "step": 7872 + }, + { + "epoch": 0.37, + "grad_norm": 0.6379504361708588, + "learning_rate": 4.6938730827710935e-06, + "loss": 0.2979, + "step": 7873 + }, + { + "epoch": 0.37, + "grad_norm": 0.615424207751934, + "learning_rate": 4.693782141764393e-06, + "loss": 0.2945, + "step": 7874 + }, + { + "epoch": 0.37, + "grad_norm": 0.6077488279839168, + "learning_rate": 4.6936911881330285e-06, + "loss": 0.292, + "step": 7875 + }, + { + "epoch": 0.37, + "grad_norm": 0.6916244680321015, + "learning_rate": 4.693600221877523e-06, + "loss": 0.3066, + "step": 7876 + }, + { + "epoch": 0.37, + "grad_norm": 0.6622515668257103, + "learning_rate": 4.693509242998399e-06, + "loss": 0.3047, + "step": 7877 + }, + { + "epoch": 0.37, + "grad_norm": 0.6467474194811859, + "learning_rate": 4.693418251496181e-06, + "loss": 0.3059, + "step": 7878 + }, + { + "epoch": 0.37, + "grad_norm": 0.6279547879889716, + "learning_rate": 4.693327247371392e-06, + "loss": 0.2855, + "step": 7879 + }, + { + "epoch": 0.37, + "grad_norm": 0.6005590308169272, + "learning_rate": 4.693236230624556e-06, + "loss": 0.2878, + "step": 7880 + }, + { + "epoch": 0.37, + "grad_norm": 0.6787204892930947, + "learning_rate": 4.693145201256196e-06, + "loss": 0.3053, + "step": 7881 + }, + { + "epoch": 0.37, + "grad_norm": 0.6203571776207674, + "learning_rate": 4.693054159266838e-06, + "loss": 0.3054, + "step": 7882 + }, + { + "epoch": 0.37, + "grad_norm": 0.6355079344612886, + "learning_rate": 4.6929631046570034e-06, + "loss": 0.3119, + "step": 7883 + }, + { + "epoch": 0.37, + "grad_norm": 0.6132265062304851, + "learning_rate": 4.692872037427218e-06, + "loss": 0.3027, + "step": 7884 + }, + { + "epoch": 0.37, + "grad_norm": 0.5585077623064606, + "learning_rate": 4.692780957578005e-06, + "loss": 0.2899, + "step": 7885 + }, + { + "epoch": 0.37, + "grad_norm": 0.5972324722435262, + "learning_rate": 4.692689865109888e-06, + "loss": 0.2892, + "step": 7886 + }, + { + "epoch": 0.37, + "grad_norm": 0.6332322419425506, + "learning_rate": 4.692598760023393e-06, + "loss": 0.3048, + "step": 7887 + }, + { + "epoch": 0.37, + "grad_norm": 0.6524495041847572, + "learning_rate": 4.692507642319043e-06, + "loss": 0.3017, + "step": 7888 + }, + { + "epoch": 0.37, + "grad_norm": 0.6321603272156315, + "learning_rate": 4.692416511997362e-06, + "loss": 0.2829, + "step": 7889 + }, + { + "epoch": 0.37, + "grad_norm": 0.5717118604895699, + "learning_rate": 4.6923253690588755e-06, + "loss": 0.3066, + "step": 7890 + }, + { + "epoch": 0.37, + "grad_norm": 0.643042059518752, + "learning_rate": 4.6922342135041075e-06, + "loss": 0.3001, + "step": 7891 + }, + { + "epoch": 0.37, + "grad_norm": 0.5929885818661151, + "learning_rate": 4.6921430453335824e-06, + "loss": 0.282, + "step": 7892 + }, + { + "epoch": 0.37, + "grad_norm": 0.6601233880363554, + "learning_rate": 4.6920518645478256e-06, + "loss": 0.3096, + "step": 7893 + }, + { + "epoch": 0.37, + "grad_norm": 0.5635374309689458, + "learning_rate": 4.691960671147361e-06, + "loss": 0.2789, + "step": 7894 + }, + { + "epoch": 0.37, + "grad_norm": 0.720953496166522, + "learning_rate": 4.6918694651327136e-06, + "loss": 0.3431, + "step": 7895 + }, + { + "epoch": 0.37, + "grad_norm": 0.6012231979256338, + "learning_rate": 4.691778246504408e-06, + "loss": 0.2943, + "step": 7896 + }, + { + "epoch": 0.37, + "grad_norm": 0.6512992295597774, + "learning_rate": 4.691687015262969e-06, + "loss": 0.2854, + "step": 7897 + }, + { + "epoch": 0.37, + "grad_norm": 0.6781749090589807, + "learning_rate": 4.691595771408923e-06, + "loss": 0.2943, + "step": 7898 + }, + { + "epoch": 0.37, + "grad_norm": 0.6078290950139383, + "learning_rate": 4.691504514942794e-06, + "loss": 0.2814, + "step": 7899 + }, + { + "epoch": 0.37, + "grad_norm": 0.6216789655325204, + "learning_rate": 4.691413245865107e-06, + "loss": 0.3061, + "step": 7900 + }, + { + "epoch": 0.37, + "grad_norm": 0.575291126903269, + "learning_rate": 4.691321964176389e-06, + "loss": 0.2908, + "step": 7901 + }, + { + "epoch": 0.37, + "grad_norm": 0.5657274823652346, + "learning_rate": 4.691230669877162e-06, + "loss": 0.2743, + "step": 7902 + }, + { + "epoch": 0.37, + "grad_norm": 0.6264810807634674, + "learning_rate": 4.691139362967954e-06, + "loss": 0.3063, + "step": 7903 + }, + { + "epoch": 0.37, + "grad_norm": 0.651992212417819, + "learning_rate": 4.69104804344929e-06, + "loss": 0.2979, + "step": 7904 + }, + { + "epoch": 0.37, + "grad_norm": 0.6542666465850167, + "learning_rate": 4.6909567113216945e-06, + "loss": 0.3051, + "step": 7905 + }, + { + "epoch": 0.37, + "grad_norm": 0.6720424772643706, + "learning_rate": 4.690865366585694e-06, + "loss": 0.3176, + "step": 7906 + }, + { + "epoch": 0.37, + "grad_norm": 0.6317638576649125, + "learning_rate": 4.6907740092418145e-06, + "loss": 0.2981, + "step": 7907 + }, + { + "epoch": 0.37, + "grad_norm": 0.5753259441088354, + "learning_rate": 4.690682639290581e-06, + "loss": 0.3019, + "step": 7908 + }, + { + "epoch": 0.37, + "grad_norm": 0.6218965504729793, + "learning_rate": 4.690591256732519e-06, + "loss": 0.2868, + "step": 7909 + }, + { + "epoch": 0.37, + "grad_norm": 0.6892311425121, + "learning_rate": 4.6904998615681554e-06, + "loss": 0.2945, + "step": 7910 + }, + { + "epoch": 0.37, + "grad_norm": 0.6081871100863288, + "learning_rate": 4.690408453798015e-06, + "loss": 0.3006, + "step": 7911 + }, + { + "epoch": 0.37, + "grad_norm": 0.569800410464724, + "learning_rate": 4.6903170334226255e-06, + "loss": 0.2753, + "step": 7912 + }, + { + "epoch": 0.37, + "grad_norm": 0.6205941766493258, + "learning_rate": 4.690225600442512e-06, + "loss": 0.2905, + "step": 7913 + }, + { + "epoch": 0.37, + "grad_norm": 0.5845025297809048, + "learning_rate": 4.6901341548582e-06, + "loss": 0.2851, + "step": 7914 + }, + { + "epoch": 0.37, + "grad_norm": 0.6036743013240284, + "learning_rate": 4.690042696670216e-06, + "loss": 0.2951, + "step": 7915 + }, + { + "epoch": 0.37, + "grad_norm": 0.6583903545901154, + "learning_rate": 4.689951225879088e-06, + "loss": 0.2923, + "step": 7916 + }, + { + "epoch": 0.37, + "grad_norm": 0.6429232917513239, + "learning_rate": 4.68985974248534e-06, + "loss": 0.285, + "step": 7917 + }, + { + "epoch": 0.37, + "grad_norm": 0.5943804667019489, + "learning_rate": 4.6897682464895e-06, + "loss": 0.2757, + "step": 7918 + }, + { + "epoch": 0.37, + "grad_norm": 0.6078507339610559, + "learning_rate": 4.689676737892093e-06, + "loss": 0.294, + "step": 7919 + }, + { + "epoch": 0.37, + "grad_norm": 0.639831716676586, + "learning_rate": 4.689585216693649e-06, + "loss": 0.3052, + "step": 7920 + }, + { + "epoch": 0.37, + "grad_norm": 0.6929878337051812, + "learning_rate": 4.689493682894692e-06, + "loss": 0.3029, + "step": 7921 + }, + { + "epoch": 0.37, + "grad_norm": 0.6275262220402575, + "learning_rate": 4.689402136495748e-06, + "loss": 0.2993, + "step": 7922 + }, + { + "epoch": 0.37, + "grad_norm": 0.6722310661006557, + "learning_rate": 4.689310577497345e-06, + "loss": 0.3024, + "step": 7923 + }, + { + "epoch": 0.37, + "grad_norm": 0.5867828842936603, + "learning_rate": 4.68921900590001e-06, + "loss": 0.3053, + "step": 7924 + }, + { + "epoch": 0.37, + "grad_norm": 0.5768903573481121, + "learning_rate": 4.689127421704271e-06, + "loss": 0.2853, + "step": 7925 + }, + { + "epoch": 0.37, + "grad_norm": 0.5726539936665879, + "learning_rate": 4.689035824910653e-06, + "loss": 0.3072, + "step": 7926 + }, + { + "epoch": 0.37, + "grad_norm": 0.5937197169212595, + "learning_rate": 4.6889442155196845e-06, + "loss": 0.3094, + "step": 7927 + }, + { + "epoch": 0.37, + "grad_norm": 0.6516048567508587, + "learning_rate": 4.6888525935318905e-06, + "loss": 0.2887, + "step": 7928 + }, + { + "epoch": 0.37, + "grad_norm": 0.6153479868204964, + "learning_rate": 4.688760958947802e-06, + "loss": 0.2801, + "step": 7929 + }, + { + "epoch": 0.37, + "grad_norm": 0.6509777772266402, + "learning_rate": 4.688669311767944e-06, + "loss": 0.3186, + "step": 7930 + }, + { + "epoch": 0.37, + "grad_norm": 0.6312133741418839, + "learning_rate": 4.688577651992843e-06, + "loss": 0.2976, + "step": 7931 + }, + { + "epoch": 0.37, + "grad_norm": 0.5687321915545803, + "learning_rate": 4.6884859796230285e-06, + "loss": 0.2908, + "step": 7932 + }, + { + "epoch": 0.37, + "grad_norm": 0.590137485509356, + "learning_rate": 4.688394294659028e-06, + "loss": 0.2783, + "step": 7933 + }, + { + "epoch": 0.37, + "grad_norm": 0.5948476502978023, + "learning_rate": 4.688302597101367e-06, + "loss": 0.2832, + "step": 7934 + }, + { + "epoch": 0.37, + "grad_norm": 0.6627626803183245, + "learning_rate": 4.688210886950575e-06, + "loss": 0.2894, + "step": 7935 + }, + { + "epoch": 0.37, + "grad_norm": 0.6095217417131086, + "learning_rate": 4.68811916420718e-06, + "loss": 0.2817, + "step": 7936 + }, + { + "epoch": 0.37, + "grad_norm": 0.6284214578704649, + "learning_rate": 4.6880274288717085e-06, + "loss": 0.2733, + "step": 7937 + }, + { + "epoch": 0.37, + "grad_norm": 0.5796613242321557, + "learning_rate": 4.687935680944689e-06, + "loss": 0.2825, + "step": 7938 + }, + { + "epoch": 0.37, + "grad_norm": 0.5660070870881792, + "learning_rate": 4.68784392042665e-06, + "loss": 0.2863, + "step": 7939 + }, + { + "epoch": 0.37, + "grad_norm": 0.6237347280302903, + "learning_rate": 4.687752147318119e-06, + "loss": 0.2936, + "step": 7940 + }, + { + "epoch": 0.37, + "grad_norm": 0.6682777829035075, + "learning_rate": 4.687660361619624e-06, + "loss": 0.3131, + "step": 7941 + }, + { + "epoch": 0.37, + "grad_norm": 0.5869609147511474, + "learning_rate": 4.687568563331693e-06, + "loss": 0.2701, + "step": 7942 + }, + { + "epoch": 0.37, + "grad_norm": 0.5746774788783204, + "learning_rate": 4.687476752454856e-06, + "loss": 0.2947, + "step": 7943 + }, + { + "epoch": 0.37, + "grad_norm": 0.624179286103026, + "learning_rate": 4.687384928989639e-06, + "loss": 0.3031, + "step": 7944 + }, + { + "epoch": 0.37, + "grad_norm": 0.6208554428012625, + "learning_rate": 4.687293092936573e-06, + "loss": 0.2779, + "step": 7945 + }, + { + "epoch": 0.37, + "grad_norm": 0.6833052021084399, + "learning_rate": 4.687201244296183e-06, + "loss": 0.2892, + "step": 7946 + }, + { + "epoch": 0.37, + "grad_norm": 0.6212049422120416, + "learning_rate": 4.687109383069001e-06, + "loss": 0.2948, + "step": 7947 + }, + { + "epoch": 0.37, + "grad_norm": 0.688273355863363, + "learning_rate": 4.687017509255553e-06, + "loss": 0.3147, + "step": 7948 + }, + { + "epoch": 0.37, + "grad_norm": 0.6281846683618091, + "learning_rate": 4.686925622856371e-06, + "loss": 0.2984, + "step": 7949 + }, + { + "epoch": 0.37, + "grad_norm": 0.572884923833013, + "learning_rate": 4.68683372387198e-06, + "loss": 0.2898, + "step": 7950 + }, + { + "epoch": 0.37, + "grad_norm": 0.6157266589364276, + "learning_rate": 4.686741812302911e-06, + "loss": 0.2787, + "step": 7951 + }, + { + "epoch": 0.37, + "grad_norm": 0.6216130056563692, + "learning_rate": 4.686649888149693e-06, + "loss": 0.294, + "step": 7952 + }, + { + "epoch": 0.37, + "grad_norm": 0.6634517729338609, + "learning_rate": 4.686557951412854e-06, + "loss": 0.3055, + "step": 7953 + }, + { + "epoch": 0.37, + "grad_norm": 0.6136170654224669, + "learning_rate": 4.686466002092923e-06, + "loss": 0.3045, + "step": 7954 + }, + { + "epoch": 0.37, + "grad_norm": 0.6474016669937301, + "learning_rate": 4.68637404019043e-06, + "loss": 0.2953, + "step": 7955 + }, + { + "epoch": 0.37, + "grad_norm": 0.6203977841865035, + "learning_rate": 4.6862820657059045e-06, + "loss": 0.2821, + "step": 7956 + }, + { + "epoch": 0.37, + "grad_norm": 0.613431251292297, + "learning_rate": 4.686190078639875e-06, + "loss": 0.3069, + "step": 7957 + }, + { + "epoch": 0.37, + "grad_norm": 0.6855564818614842, + "learning_rate": 4.686098078992871e-06, + "loss": 0.3112, + "step": 7958 + }, + { + "epoch": 0.37, + "grad_norm": 0.5764435360893988, + "learning_rate": 4.686006066765422e-06, + "loss": 0.2811, + "step": 7959 + }, + { + "epoch": 0.37, + "grad_norm": 0.5870935158006586, + "learning_rate": 4.685914041958058e-06, + "loss": 0.2884, + "step": 7960 + }, + { + "epoch": 0.37, + "grad_norm": 0.6516230700092052, + "learning_rate": 4.685822004571307e-06, + "loss": 0.3032, + "step": 7961 + }, + { + "epoch": 0.37, + "grad_norm": 0.6155887088613136, + "learning_rate": 4.6857299546057e-06, + "loss": 0.3029, + "step": 7962 + }, + { + "epoch": 0.37, + "grad_norm": 0.6609782138489936, + "learning_rate": 4.685637892061767e-06, + "loss": 0.3112, + "step": 7963 + }, + { + "epoch": 0.37, + "grad_norm": 0.6408093657878937, + "learning_rate": 4.685545816940037e-06, + "loss": 0.2915, + "step": 7964 + }, + { + "epoch": 0.37, + "grad_norm": 0.5970190583005369, + "learning_rate": 4.68545372924104e-06, + "loss": 0.2981, + "step": 7965 + }, + { + "epoch": 0.37, + "grad_norm": 0.6179500883715455, + "learning_rate": 4.685361628965306e-06, + "loss": 0.2941, + "step": 7966 + }, + { + "epoch": 0.37, + "grad_norm": 0.6257809854280456, + "learning_rate": 4.685269516113366e-06, + "loss": 0.3018, + "step": 7967 + }, + { + "epoch": 0.37, + "grad_norm": 0.6846005046072022, + "learning_rate": 4.6851773906857485e-06, + "loss": 0.3166, + "step": 7968 + }, + { + "epoch": 0.37, + "grad_norm": 0.6346152834380082, + "learning_rate": 4.685085252682984e-06, + "loss": 0.2946, + "step": 7969 + }, + { + "epoch": 0.37, + "grad_norm": 0.6739294623529666, + "learning_rate": 4.684993102105604e-06, + "loss": 0.3178, + "step": 7970 + }, + { + "epoch": 0.37, + "grad_norm": 0.5861388738384115, + "learning_rate": 4.6849009389541365e-06, + "loss": 0.2933, + "step": 7971 + }, + { + "epoch": 0.37, + "grad_norm": 0.6189933249314236, + "learning_rate": 4.684808763229115e-06, + "loss": 0.2947, + "step": 7972 + }, + { + "epoch": 0.37, + "grad_norm": 0.6141738187460444, + "learning_rate": 4.6847165749310675e-06, + "loss": 0.3135, + "step": 7973 + }, + { + "epoch": 0.37, + "grad_norm": 0.5966132873018127, + "learning_rate": 4.6846243740605244e-06, + "loss": 0.2903, + "step": 7974 + }, + { + "epoch": 0.37, + "grad_norm": 0.621243605511704, + "learning_rate": 4.684532160618018e-06, + "loss": 0.2734, + "step": 7975 + }, + { + "epoch": 0.37, + "grad_norm": 0.6049013166512276, + "learning_rate": 4.6844399346040774e-06, + "loss": 0.2888, + "step": 7976 + }, + { + "epoch": 0.37, + "grad_norm": 0.613502719068337, + "learning_rate": 4.684347696019235e-06, + "loss": 0.2988, + "step": 7977 + }, + { + "epoch": 0.37, + "grad_norm": 0.5865265883372957, + "learning_rate": 4.68425544486402e-06, + "loss": 0.2777, + "step": 7978 + }, + { + "epoch": 0.37, + "grad_norm": 0.628894371719093, + "learning_rate": 4.6841631811389635e-06, + "loss": 0.2881, + "step": 7979 + }, + { + "epoch": 0.37, + "grad_norm": 0.6801390125509238, + "learning_rate": 4.684070904844598e-06, + "loss": 0.2897, + "step": 7980 + }, + { + "epoch": 0.37, + "grad_norm": 0.5820315046100675, + "learning_rate": 4.683978615981452e-06, + "loss": 0.2833, + "step": 7981 + }, + { + "epoch": 0.37, + "grad_norm": 0.5816991144016991, + "learning_rate": 4.683886314550059e-06, + "loss": 0.2844, + "step": 7982 + }, + { + "epoch": 0.37, + "grad_norm": 0.6112152039533928, + "learning_rate": 4.6837940005509485e-06, + "loss": 0.2936, + "step": 7983 + }, + { + "epoch": 0.37, + "grad_norm": 0.5939161167786976, + "learning_rate": 4.683701673984653e-06, + "loss": 0.2782, + "step": 7984 + }, + { + "epoch": 0.37, + "grad_norm": 0.6568649371891433, + "learning_rate": 4.683609334851703e-06, + "loss": 0.2858, + "step": 7985 + }, + { + "epoch": 0.37, + "grad_norm": 0.5806508974629652, + "learning_rate": 4.68351698315263e-06, + "loss": 0.26, + "step": 7986 + }, + { + "epoch": 0.37, + "grad_norm": 0.590169205038156, + "learning_rate": 4.683424618887966e-06, + "loss": 0.3033, + "step": 7987 + }, + { + "epoch": 0.37, + "grad_norm": 0.60682592113871, + "learning_rate": 4.6833322420582415e-06, + "loss": 0.2978, + "step": 7988 + }, + { + "epoch": 0.37, + "grad_norm": 0.666443324233266, + "learning_rate": 4.683239852663989e-06, + "loss": 0.2918, + "step": 7989 + }, + { + "epoch": 0.37, + "grad_norm": 0.6473825002143476, + "learning_rate": 4.6831474507057395e-06, + "loss": 0.2711, + "step": 7990 + }, + { + "epoch": 0.37, + "grad_norm": 0.6325327374541804, + "learning_rate": 4.683055036184026e-06, + "loss": 0.2944, + "step": 7991 + }, + { + "epoch": 0.37, + "grad_norm": 0.6243652240082793, + "learning_rate": 4.682962609099378e-06, + "loss": 0.3012, + "step": 7992 + }, + { + "epoch": 0.37, + "grad_norm": 0.5747657854810039, + "learning_rate": 4.68287016945233e-06, + "loss": 0.2874, + "step": 7993 + }, + { + "epoch": 0.37, + "grad_norm": 0.5933691217974291, + "learning_rate": 4.682777717243413e-06, + "loss": 0.288, + "step": 7994 + }, + { + "epoch": 0.37, + "grad_norm": 0.5937830235526678, + "learning_rate": 4.682685252473158e-06, + "loss": 0.2923, + "step": 7995 + }, + { + "epoch": 0.37, + "grad_norm": 0.6791152174923264, + "learning_rate": 4.682592775142099e-06, + "loss": 0.3168, + "step": 7996 + }, + { + "epoch": 0.37, + "grad_norm": 0.6414898302203019, + "learning_rate": 4.682500285250766e-06, + "loss": 0.2763, + "step": 7997 + }, + { + "epoch": 0.37, + "grad_norm": 0.607928035352787, + "learning_rate": 4.682407782799693e-06, + "loss": 0.2761, + "step": 7998 + }, + { + "epoch": 0.37, + "grad_norm": 0.5933606596376021, + "learning_rate": 4.682315267789412e-06, + "loss": 0.2864, + "step": 7999 + }, + { + "epoch": 0.37, + "grad_norm": 0.6487509460848228, + "learning_rate": 4.682222740220455e-06, + "loss": 0.3304, + "step": 8000 + }, + { + "epoch": 0.37480676441654565, + "grad_norm": 0.6151063431301349, + "learning_rate": 4.682130200093355e-06, + "loss": 0.2935, + "step": 8001 + }, + { + "epoch": 0.374853609406474, + "grad_norm": 0.6436945007434538, + "learning_rate": 4.6820376474086435e-06, + "loss": 0.2905, + "step": 8002 + }, + { + "epoch": 0.3749004543964023, + "grad_norm": 0.5886203579645443, + "learning_rate": 4.6819450821668535e-06, + "loss": 0.2807, + "step": 8003 + }, + { + "epoch": 0.37494729938633065, + "grad_norm": 0.5862431720198265, + "learning_rate": 4.681852504368518e-06, + "loss": 0.2892, + "step": 8004 + }, + { + "epoch": 0.37499414437625894, + "grad_norm": 0.5628448980471719, + "learning_rate": 4.681759914014171e-06, + "loss": 0.268, + "step": 8005 + }, + { + "epoch": 0.3750409893661873, + "grad_norm": 0.5820621192016705, + "learning_rate": 4.681667311104343e-06, + "loss": 0.2951, + "step": 8006 + }, + { + "epoch": 0.3750878343561156, + "grad_norm": 0.6499490525350226, + "learning_rate": 4.681574695639568e-06, + "loss": 0.3184, + "step": 8007 + }, + { + "epoch": 0.37513467934604394, + "grad_norm": 0.5981756762917774, + "learning_rate": 4.681482067620379e-06, + "loss": 0.2881, + "step": 8008 + }, + { + "epoch": 0.3751815243359723, + "grad_norm": 0.6715450635655325, + "learning_rate": 4.681389427047309e-06, + "loss": 0.3281, + "step": 8009 + }, + { + "epoch": 0.3752283693259006, + "grad_norm": 0.6264050016555577, + "learning_rate": 4.681296773920891e-06, + "loss": 0.2957, + "step": 8010 + }, + { + "epoch": 0.37527521431582894, + "grad_norm": 0.5962531619351673, + "learning_rate": 4.681204108241658e-06, + "loss": 0.2999, + "step": 8011 + }, + { + "epoch": 0.37532205930575724, + "grad_norm": 0.6317879255777394, + "learning_rate": 4.681111430010144e-06, + "loss": 0.2767, + "step": 8012 + }, + { + "epoch": 0.3753689042956856, + "grad_norm": 0.56376408122957, + "learning_rate": 4.681018739226882e-06, + "loss": 0.2808, + "step": 8013 + }, + { + "epoch": 0.3754157492856139, + "grad_norm": 0.6297467017983789, + "learning_rate": 4.680926035892406e-06, + "loss": 0.2892, + "step": 8014 + }, + { + "epoch": 0.37546259427554224, + "grad_norm": 0.6413861478759617, + "learning_rate": 4.680833320007248e-06, + "loss": 0.2964, + "step": 8015 + }, + { + "epoch": 0.37550943926547053, + "grad_norm": 0.6022461625479768, + "learning_rate": 4.680740591571943e-06, + "loss": 0.3108, + "step": 8016 + }, + { + "epoch": 0.3755562842553989, + "grad_norm": 0.5581277876094017, + "learning_rate": 4.680647850587024e-06, + "loss": 0.2889, + "step": 8017 + }, + { + "epoch": 0.37560312924532724, + "grad_norm": 0.5996485950516065, + "learning_rate": 4.680555097053023e-06, + "loss": 0.2829, + "step": 8018 + }, + { + "epoch": 0.37564997423525553, + "grad_norm": 0.6409235855527884, + "learning_rate": 4.680462330970477e-06, + "loss": 0.291, + "step": 8019 + }, + { + "epoch": 0.3756968192251839, + "grad_norm": 0.6690411347180362, + "learning_rate": 4.680369552339918e-06, + "loss": 0.2989, + "step": 8020 + }, + { + "epoch": 0.3757436642151122, + "grad_norm": 0.6357444610882675, + "learning_rate": 4.6802767611618805e-06, + "loss": 0.28, + "step": 8021 + }, + { + "epoch": 0.37579050920504053, + "grad_norm": 0.6591185916845879, + "learning_rate": 4.680183957436898e-06, + "loss": 0.3009, + "step": 8022 + }, + { + "epoch": 0.37583735419496883, + "grad_norm": 0.5940102148879115, + "learning_rate": 4.6800911411655046e-06, + "loss": 0.2884, + "step": 8023 + }, + { + "epoch": 0.3758841991848972, + "grad_norm": 0.6059534662969357, + "learning_rate": 4.6799983123482355e-06, + "loss": 0.273, + "step": 8024 + }, + { + "epoch": 0.3759310441748255, + "grad_norm": 0.6264521910666999, + "learning_rate": 4.679905470985623e-06, + "loss": 0.2826, + "step": 8025 + }, + { + "epoch": 0.37597788916475383, + "grad_norm": 0.6441548243668929, + "learning_rate": 4.679812617078202e-06, + "loss": 0.3033, + "step": 8026 + }, + { + "epoch": 0.3760247341546822, + "grad_norm": 0.6629318601974437, + "learning_rate": 4.679719750626509e-06, + "loss": 0.3031, + "step": 8027 + }, + { + "epoch": 0.3760715791446105, + "grad_norm": 0.6417206463880644, + "learning_rate": 4.679626871631076e-06, + "loss": 0.2987, + "step": 8028 + }, + { + "epoch": 0.37611842413453883, + "grad_norm": 0.6182209328469348, + "learning_rate": 4.679533980092437e-06, + "loss": 0.2944, + "step": 8029 + }, + { + "epoch": 0.3761652691244671, + "grad_norm": 0.6693236624001603, + "learning_rate": 4.679441076011128e-06, + "loss": 0.3018, + "step": 8030 + }, + { + "epoch": 0.3762121141143955, + "grad_norm": 0.6027858884995437, + "learning_rate": 4.6793481593876846e-06, + "loss": 0.277, + "step": 8031 + }, + { + "epoch": 0.3762589591043238, + "grad_norm": 0.5953412346907294, + "learning_rate": 4.67925523022264e-06, + "loss": 0.2835, + "step": 8032 + }, + { + "epoch": 0.3763058040942521, + "grad_norm": 0.6553277090857541, + "learning_rate": 4.679162288516529e-06, + "loss": 0.2648, + "step": 8033 + }, + { + "epoch": 0.3763526490841804, + "grad_norm": 0.6361707983347428, + "learning_rate": 4.6790693342698865e-06, + "loss": 0.3216, + "step": 8034 + }, + { + "epoch": 0.3763994940741088, + "grad_norm": 0.6307830755797301, + "learning_rate": 4.678976367483249e-06, + "loss": 0.3022, + "step": 8035 + }, + { + "epoch": 0.3764463390640371, + "grad_norm": 0.6865414718459619, + "learning_rate": 4.678883388157148e-06, + "loss": 0.3178, + "step": 8036 + }, + { + "epoch": 0.3764931840539654, + "grad_norm": 0.6107757341788135, + "learning_rate": 4.6787903962921225e-06, + "loss": 0.2956, + "step": 8037 + }, + { + "epoch": 0.3765400290438938, + "grad_norm": 0.5702402055043431, + "learning_rate": 4.6786973918887054e-06, + "loss": 0.2895, + "step": 8038 + }, + { + "epoch": 0.37658687403382207, + "grad_norm": 0.5418478858696318, + "learning_rate": 4.678604374947432e-06, + "loss": 0.2868, + "step": 8039 + }, + { + "epoch": 0.3766337190237504, + "grad_norm": 0.6567027537965887, + "learning_rate": 4.678511345468838e-06, + "loss": 0.2936, + "step": 8040 + }, + { + "epoch": 0.3766805640136787, + "grad_norm": 0.5833882254505282, + "learning_rate": 4.678418303453459e-06, + "loss": 0.2761, + "step": 8041 + }, + { + "epoch": 0.37672740900360707, + "grad_norm": 0.6263517962488567, + "learning_rate": 4.678325248901831e-06, + "loss": 0.301, + "step": 8042 + }, + { + "epoch": 0.37677425399353537, + "grad_norm": 0.6159259372724848, + "learning_rate": 4.6782321818144875e-06, + "loss": 0.2853, + "step": 8043 + }, + { + "epoch": 0.3768210989834637, + "grad_norm": 0.5951914251547327, + "learning_rate": 4.6781391021919655e-06, + "loss": 0.2947, + "step": 8044 + }, + { + "epoch": 0.37686794397339207, + "grad_norm": 0.6083901863148247, + "learning_rate": 4.678046010034801e-06, + "loss": 0.2995, + "step": 8045 + }, + { + "epoch": 0.37691478896332037, + "grad_norm": 0.5998393077388384, + "learning_rate": 4.677952905343528e-06, + "loss": 0.285, + "step": 8046 + }, + { + "epoch": 0.3769616339532487, + "grad_norm": 0.5785821500633166, + "learning_rate": 4.6778597881186845e-06, + "loss": 0.2721, + "step": 8047 + }, + { + "epoch": 0.377008478943177, + "grad_norm": 0.6268475588925311, + "learning_rate": 4.677766658360805e-06, + "loss": 0.2996, + "step": 8048 + }, + { + "epoch": 0.37705532393310537, + "grad_norm": 0.5753097133171355, + "learning_rate": 4.677673516070426e-06, + "loss": 0.3026, + "step": 8049 + }, + { + "epoch": 0.37710216892303366, + "grad_norm": 0.5661139951163775, + "learning_rate": 4.677580361248083e-06, + "loss": 0.2788, + "step": 8050 + }, + { + "epoch": 0.377149013912962, + "grad_norm": 0.618216284854778, + "learning_rate": 4.677487193894312e-06, + "loss": 0.2866, + "step": 8051 + }, + { + "epoch": 0.3771958589028903, + "grad_norm": 0.6381819126458851, + "learning_rate": 4.677394014009649e-06, + "loss": 0.2854, + "step": 8052 + }, + { + "epoch": 0.37724270389281866, + "grad_norm": 0.6322872711320495, + "learning_rate": 4.677300821594633e-06, + "loss": 0.3032, + "step": 8053 + }, + { + "epoch": 0.377289548882747, + "grad_norm": 0.6120217620891641, + "learning_rate": 4.677207616649796e-06, + "loss": 0.3017, + "step": 8054 + }, + { + "epoch": 0.3773363938726753, + "grad_norm": 0.5934745390590933, + "learning_rate": 4.677114399175677e-06, + "loss": 0.2882, + "step": 8055 + }, + { + "epoch": 0.37738323886260366, + "grad_norm": 0.6246761373794376, + "learning_rate": 4.677021169172811e-06, + "loss": 0.2787, + "step": 8056 + }, + { + "epoch": 0.37743008385253196, + "grad_norm": 0.5832254837480733, + "learning_rate": 4.676927926641737e-06, + "loss": 0.279, + "step": 8057 + }, + { + "epoch": 0.3774769288424603, + "grad_norm": 0.6478737412041985, + "learning_rate": 4.676834671582988e-06, + "loss": 0.3062, + "step": 8058 + }, + { + "epoch": 0.3775237738323886, + "grad_norm": 0.6159356277808763, + "learning_rate": 4.6767414039971035e-06, + "loss": 0.3007, + "step": 8059 + }, + { + "epoch": 0.37757061882231696, + "grad_norm": 0.6279312440665882, + "learning_rate": 4.6766481238846205e-06, + "loss": 0.3165, + "step": 8060 + }, + { + "epoch": 0.37761746381224526, + "grad_norm": 0.6148811397789046, + "learning_rate": 4.676554831246073e-06, + "loss": 0.2858, + "step": 8061 + }, + { + "epoch": 0.3776643088021736, + "grad_norm": 0.5956499951723667, + "learning_rate": 4.676461526082e-06, + "loss": 0.3073, + "step": 8062 + }, + { + "epoch": 0.37771115379210196, + "grad_norm": 0.5956808229293836, + "learning_rate": 4.676368208392938e-06, + "loss": 0.2829, + "step": 8063 + }, + { + "epoch": 0.37775799878203026, + "grad_norm": 0.5851617640590717, + "learning_rate": 4.676274878179425e-06, + "loss": 0.2673, + "step": 8064 + }, + { + "epoch": 0.3778048437719586, + "grad_norm": 0.6404800079542692, + "learning_rate": 4.676181535441995e-06, + "loss": 0.2947, + "step": 8065 + }, + { + "epoch": 0.3778516887618869, + "grad_norm": 0.5749735280059926, + "learning_rate": 4.676088180181189e-06, + "loss": 0.2734, + "step": 8066 + }, + { + "epoch": 0.37789853375181526, + "grad_norm": 0.5844815901477007, + "learning_rate": 4.675994812397541e-06, + "loss": 0.3035, + "step": 8067 + }, + { + "epoch": 0.37794537874174355, + "grad_norm": 0.6667301339524921, + "learning_rate": 4.675901432091591e-06, + "loss": 0.2993, + "step": 8068 + }, + { + "epoch": 0.3779922237316719, + "grad_norm": 0.6261356895244029, + "learning_rate": 4.675808039263875e-06, + "loss": 0.3227, + "step": 8069 + }, + { + "epoch": 0.3780390687216002, + "grad_norm": 0.5553591560454437, + "learning_rate": 4.675714633914929e-06, + "loss": 0.2771, + "step": 8070 + }, + { + "epoch": 0.37808591371152855, + "grad_norm": 0.5974443554605229, + "learning_rate": 4.675621216045294e-06, + "loss": 0.294, + "step": 8071 + }, + { + "epoch": 0.3781327587014569, + "grad_norm": 0.6048721872354011, + "learning_rate": 4.675527785655505e-06, + "loss": 0.2929, + "step": 8072 + }, + { + "epoch": 0.3781796036913852, + "grad_norm": 0.5758633898898832, + "learning_rate": 4.675434342746101e-06, + "loss": 0.267, + "step": 8073 + }, + { + "epoch": 0.37822644868131355, + "grad_norm": 0.6052849632782316, + "learning_rate": 4.6753408873176184e-06, + "loss": 0.2955, + "step": 8074 + }, + { + "epoch": 0.37827329367124185, + "grad_norm": 0.6146624289032809, + "learning_rate": 4.6752474193705954e-06, + "loss": 0.2986, + "step": 8075 + }, + { + "epoch": 0.3783201386611702, + "grad_norm": 0.6325666315136316, + "learning_rate": 4.6751539389055715e-06, + "loss": 0.2856, + "step": 8076 + }, + { + "epoch": 0.3783669836510985, + "grad_norm": 0.5693842246982558, + "learning_rate": 4.675060445923082e-06, + "loss": 0.2587, + "step": 8077 + }, + { + "epoch": 0.37841382864102685, + "grad_norm": 0.6120321668235477, + "learning_rate": 4.674966940423667e-06, + "loss": 0.2862, + "step": 8078 + }, + { + "epoch": 0.37846067363095515, + "grad_norm": 0.5996179882692736, + "learning_rate": 4.6748734224078644e-06, + "loss": 0.2955, + "step": 8079 + }, + { + "epoch": 0.3785075186208835, + "grad_norm": 0.6326427101535815, + "learning_rate": 4.674779891876211e-06, + "loss": 0.303, + "step": 8080 + }, + { + "epoch": 0.37855436361081185, + "grad_norm": 0.6192269453196256, + "learning_rate": 4.6746863488292465e-06, + "loss": 0.2991, + "step": 8081 + }, + { + "epoch": 0.37860120860074015, + "grad_norm": 0.6003285051089137, + "learning_rate": 4.674592793267509e-06, + "loss": 0.292, + "step": 8082 + }, + { + "epoch": 0.3786480535906685, + "grad_norm": 0.5482876027901626, + "learning_rate": 4.674499225191535e-06, + "loss": 0.2878, + "step": 8083 + }, + { + "epoch": 0.3786948985805968, + "grad_norm": 0.6927493373538258, + "learning_rate": 4.674405644601866e-06, + "loss": 0.317, + "step": 8084 + }, + { + "epoch": 0.37874174357052515, + "grad_norm": 0.6247674858351956, + "learning_rate": 4.674312051499039e-06, + "loss": 0.3157, + "step": 8085 + }, + { + "epoch": 0.37878858856045344, + "grad_norm": 0.6216383389156286, + "learning_rate": 4.674218445883592e-06, + "loss": 0.2953, + "step": 8086 + }, + { + "epoch": 0.3788354335503818, + "grad_norm": 0.6440832486773679, + "learning_rate": 4.674124827756064e-06, + "loss": 0.3117, + "step": 8087 + }, + { + "epoch": 0.3788822785403101, + "grad_norm": 0.6142160703978009, + "learning_rate": 4.674031197116995e-06, + "loss": 0.2946, + "step": 8088 + }, + { + "epoch": 0.37892912353023844, + "grad_norm": 0.6136556376936015, + "learning_rate": 4.673937553966922e-06, + "loss": 0.2883, + "step": 8089 + }, + { + "epoch": 0.3789759685201668, + "grad_norm": 0.5798498119720688, + "learning_rate": 4.673843898306385e-06, + "loss": 0.2899, + "step": 8090 + }, + { + "epoch": 0.3790228135100951, + "grad_norm": 0.626593609524154, + "learning_rate": 4.6737502301359235e-06, + "loss": 0.3018, + "step": 8091 + }, + { + "epoch": 0.37906965850002344, + "grad_norm": 0.6835866591313815, + "learning_rate": 4.673656549456075e-06, + "loss": 0.2984, + "step": 8092 + }, + { + "epoch": 0.37911650348995174, + "grad_norm": 0.6376714545107013, + "learning_rate": 4.67356285626738e-06, + "loss": 0.2807, + "step": 8093 + }, + { + "epoch": 0.3791633484798801, + "grad_norm": 0.6701129004507743, + "learning_rate": 4.673469150570376e-06, + "loss": 0.2945, + "step": 8094 + }, + { + "epoch": 0.3792101934698084, + "grad_norm": 0.6384600927709628, + "learning_rate": 4.673375432365605e-06, + "loss": 0.2909, + "step": 8095 + }, + { + "epoch": 0.37925703845973674, + "grad_norm": 0.6022099703713238, + "learning_rate": 4.673281701653604e-06, + "loss": 0.2875, + "step": 8096 + }, + { + "epoch": 0.37930388344966504, + "grad_norm": 0.5898228036986238, + "learning_rate": 4.673187958434912e-06, + "loss": 0.2852, + "step": 8097 + }, + { + "epoch": 0.3793507284395934, + "grad_norm": 0.5843362040010518, + "learning_rate": 4.67309420271007e-06, + "loss": 0.2853, + "step": 8098 + }, + { + "epoch": 0.37939757342952174, + "grad_norm": 0.6293888793769303, + "learning_rate": 4.673000434479618e-06, + "loss": 0.301, + "step": 8099 + }, + { + "epoch": 0.37944441841945004, + "grad_norm": 0.6426458236611117, + "learning_rate": 4.672906653744094e-06, + "loss": 0.3068, + "step": 8100 + }, + { + "epoch": 0.3794912634093784, + "grad_norm": 0.6777935683213647, + "learning_rate": 4.672812860504038e-06, + "loss": 0.3196, + "step": 8101 + }, + { + "epoch": 0.3795381083993067, + "grad_norm": 0.6186776078693277, + "learning_rate": 4.67271905475999e-06, + "loss": 0.2996, + "step": 8102 + }, + { + "epoch": 0.37958495338923504, + "grad_norm": 0.6087388867537217, + "learning_rate": 4.67262523651249e-06, + "loss": 0.3106, + "step": 8103 + }, + { + "epoch": 0.37963179837916333, + "grad_norm": 0.5982271109788057, + "learning_rate": 4.672531405762078e-06, + "loss": 0.2931, + "step": 8104 + }, + { + "epoch": 0.3796786433690917, + "grad_norm": 0.6479593169227886, + "learning_rate": 4.672437562509295e-06, + "loss": 0.3215, + "step": 8105 + }, + { + "epoch": 0.37972548835902, + "grad_norm": 0.5918960883598913, + "learning_rate": 4.6723437067546785e-06, + "loss": 0.2831, + "step": 8106 + }, + { + "epoch": 0.37977233334894833, + "grad_norm": 0.5916483614730957, + "learning_rate": 4.67224983849877e-06, + "loss": 0.2974, + "step": 8107 + }, + { + "epoch": 0.3798191783388767, + "grad_norm": 0.6312527730160756, + "learning_rate": 4.67215595774211e-06, + "loss": 0.2945, + "step": 8108 + }, + { + "epoch": 0.379866023328805, + "grad_norm": 0.6043771941114381, + "learning_rate": 4.672062064485238e-06, + "loss": 0.3032, + "step": 8109 + }, + { + "epoch": 0.37991286831873333, + "grad_norm": 0.589515567562007, + "learning_rate": 4.671968158728696e-06, + "loss": 0.2984, + "step": 8110 + }, + { + "epoch": 0.37995971330866163, + "grad_norm": 0.5917486989215637, + "learning_rate": 4.671874240473022e-06, + "loss": 0.2964, + "step": 8111 + }, + { + "epoch": 0.38000655829859, + "grad_norm": 0.5981278641738449, + "learning_rate": 4.671780309718758e-06, + "loss": 0.2861, + "step": 8112 + }, + { + "epoch": 0.3800534032885183, + "grad_norm": 0.5893178851960308, + "learning_rate": 4.671686366466444e-06, + "loss": 0.2933, + "step": 8113 + }, + { + "epoch": 0.38010024827844663, + "grad_norm": 0.5684929942308025, + "learning_rate": 4.67159241071662e-06, + "loss": 0.2854, + "step": 8114 + }, + { + "epoch": 0.3801470932683749, + "grad_norm": 0.655873216547238, + "learning_rate": 4.671498442469828e-06, + "loss": 0.3063, + "step": 8115 + }, + { + "epoch": 0.3801939382583033, + "grad_norm": 0.624177722366291, + "learning_rate": 4.671404461726609e-06, + "loss": 0.2891, + "step": 8116 + }, + { + "epoch": 0.38024078324823163, + "grad_norm": 0.6777886417317127, + "learning_rate": 4.671310468487503e-06, + "loss": 0.3019, + "step": 8117 + }, + { + "epoch": 0.3802876282381599, + "grad_norm": 0.6086804296567769, + "learning_rate": 4.6712164627530505e-06, + "loss": 0.2945, + "step": 8118 + }, + { + "epoch": 0.3803344732280883, + "grad_norm": 0.6000958039043615, + "learning_rate": 4.671122444523793e-06, + "loss": 0.2941, + "step": 8119 + }, + { + "epoch": 0.3803813182180166, + "grad_norm": 0.6357709273610624, + "learning_rate": 4.671028413800271e-06, + "loss": 0.2983, + "step": 8120 + }, + { + "epoch": 0.3804281632079449, + "grad_norm": 0.6262986562759141, + "learning_rate": 4.670934370583028e-06, + "loss": 0.2895, + "step": 8121 + }, + { + "epoch": 0.3804750081978732, + "grad_norm": 0.5904963314162102, + "learning_rate": 4.670840314872602e-06, + "loss": 0.2923, + "step": 8122 + }, + { + "epoch": 0.3805218531878016, + "grad_norm": 0.589226356719282, + "learning_rate": 4.670746246669535e-06, + "loss": 0.304, + "step": 8123 + }, + { + "epoch": 0.38056869817772987, + "grad_norm": 0.6252311109148858, + "learning_rate": 4.67065216597437e-06, + "loss": 0.3033, + "step": 8124 + }, + { + "epoch": 0.3806155431676582, + "grad_norm": 0.6013758989053954, + "learning_rate": 4.670558072787647e-06, + "loss": 0.3051, + "step": 8125 + }, + { + "epoch": 0.3806623881575866, + "grad_norm": 0.5845058690223393, + "learning_rate": 4.6704639671099075e-06, + "loss": 0.3021, + "step": 8126 + }, + { + "epoch": 0.38070923314751487, + "grad_norm": 0.6207870805920859, + "learning_rate": 4.670369848941694e-06, + "loss": 0.2821, + "step": 8127 + }, + { + "epoch": 0.3807560781374432, + "grad_norm": 0.5905465585443967, + "learning_rate": 4.670275718283547e-06, + "loss": 0.2916, + "step": 8128 + }, + { + "epoch": 0.3808029231273715, + "grad_norm": 0.6026409153231279, + "learning_rate": 4.670181575136009e-06, + "loss": 0.2958, + "step": 8129 + }, + { + "epoch": 0.38084976811729987, + "grad_norm": 0.6346933588503281, + "learning_rate": 4.670087419499621e-06, + "loss": 0.3016, + "step": 8130 + }, + { + "epoch": 0.38089661310722817, + "grad_norm": 0.581111603793017, + "learning_rate": 4.669993251374927e-06, + "loss": 0.2832, + "step": 8131 + }, + { + "epoch": 0.3809434580971565, + "grad_norm": 0.6425726206616624, + "learning_rate": 4.669899070762466e-06, + "loss": 0.3105, + "step": 8132 + }, + { + "epoch": 0.3809903030870848, + "grad_norm": 0.6464819655760733, + "learning_rate": 4.669804877662782e-06, + "loss": 0.3152, + "step": 8133 + }, + { + "epoch": 0.38103714807701317, + "grad_norm": 0.5737831137196656, + "learning_rate": 4.669710672076414e-06, + "loss": 0.2902, + "step": 8134 + }, + { + "epoch": 0.3810839930669415, + "grad_norm": 0.6154128567825004, + "learning_rate": 4.669616454003908e-06, + "loss": 0.3043, + "step": 8135 + }, + { + "epoch": 0.3811308380568698, + "grad_norm": 0.5494989987056252, + "learning_rate": 4.669522223445805e-06, + "loss": 0.283, + "step": 8136 + }, + { + "epoch": 0.38117768304679817, + "grad_norm": 0.6098234251002066, + "learning_rate": 4.669427980402646e-06, + "loss": 0.2943, + "step": 8137 + }, + { + "epoch": 0.38122452803672646, + "grad_norm": 0.6257662942118829, + "learning_rate": 4.669333724874975e-06, + "loss": 0.2902, + "step": 8138 + }, + { + "epoch": 0.3812713730266548, + "grad_norm": 0.6008463900376514, + "learning_rate": 4.669239456863333e-06, + "loss": 0.2922, + "step": 8139 + }, + { + "epoch": 0.3813182180165831, + "grad_norm": 0.6863361411794141, + "learning_rate": 4.669145176368265e-06, + "loss": 0.3242, + "step": 8140 + }, + { + "epoch": 0.38136506300651146, + "grad_norm": 0.5541730119515982, + "learning_rate": 4.66905088339031e-06, + "loss": 0.2746, + "step": 8141 + }, + { + "epoch": 0.38141190799643976, + "grad_norm": 0.6258694673845171, + "learning_rate": 4.6689565779300126e-06, + "loss": 0.2914, + "step": 8142 + }, + { + "epoch": 0.3814587529863681, + "grad_norm": 0.5957849293212659, + "learning_rate": 4.668862259987916e-06, + "loss": 0.29, + "step": 8143 + }, + { + "epoch": 0.38150559797629646, + "grad_norm": 0.6943850509013298, + "learning_rate": 4.66876792956456e-06, + "loss": 0.2687, + "step": 8144 + }, + { + "epoch": 0.38155244296622476, + "grad_norm": 0.6458641565662584, + "learning_rate": 4.668673586660491e-06, + "loss": 0.2976, + "step": 8145 + }, + { + "epoch": 0.3815992879561531, + "grad_norm": 0.5678024724641344, + "learning_rate": 4.6685792312762516e-06, + "loss": 0.2895, + "step": 8146 + }, + { + "epoch": 0.3816461329460814, + "grad_norm": 0.6393775260942035, + "learning_rate": 4.668484863412382e-06, + "loss": 0.3033, + "step": 8147 + }, + { + "epoch": 0.38169297793600976, + "grad_norm": 0.5936451497843426, + "learning_rate": 4.668390483069428e-06, + "loss": 0.3082, + "step": 8148 + }, + { + "epoch": 0.38173982292593805, + "grad_norm": 0.6286851765819296, + "learning_rate": 4.668296090247932e-06, + "loss": 0.3131, + "step": 8149 + }, + { + "epoch": 0.3817866679158664, + "grad_norm": 0.5962376418501033, + "learning_rate": 4.6682016849484366e-06, + "loss": 0.2758, + "step": 8150 + }, + { + "epoch": 0.3818335129057947, + "grad_norm": 0.5764117898647125, + "learning_rate": 4.668107267171486e-06, + "loss": 0.2788, + "step": 8151 + }, + { + "epoch": 0.38188035789572305, + "grad_norm": 0.5785843515154998, + "learning_rate": 4.6680128369176226e-06, + "loss": 0.266, + "step": 8152 + }, + { + "epoch": 0.3819272028856514, + "grad_norm": 0.5956529908948814, + "learning_rate": 4.667918394187389e-06, + "loss": 0.2786, + "step": 8153 + }, + { + "epoch": 0.3819740478755797, + "grad_norm": 0.6126670609019486, + "learning_rate": 4.6678239389813315e-06, + "loss": 0.2864, + "step": 8154 + }, + { + "epoch": 0.38202089286550805, + "grad_norm": 0.619970599406927, + "learning_rate": 4.667729471299992e-06, + "loss": 0.3046, + "step": 8155 + }, + { + "epoch": 0.38206773785543635, + "grad_norm": 0.609767127949157, + "learning_rate": 4.667634991143914e-06, + "loss": 0.2986, + "step": 8156 + }, + { + "epoch": 0.3821145828453647, + "grad_norm": 0.5960648223430725, + "learning_rate": 4.6675404985136416e-06, + "loss": 0.2906, + "step": 8157 + }, + { + "epoch": 0.382161427835293, + "grad_norm": 0.5780156925006499, + "learning_rate": 4.667445993409718e-06, + "loss": 0.2922, + "step": 8158 + }, + { + "epoch": 0.38220827282522135, + "grad_norm": 0.5951840209325112, + "learning_rate": 4.667351475832687e-06, + "loss": 0.2903, + "step": 8159 + }, + { + "epoch": 0.38225511781514965, + "grad_norm": 0.7574599064014302, + "learning_rate": 4.667256945783094e-06, + "loss": 0.2968, + "step": 8160 + }, + { + "epoch": 0.382301962805078, + "grad_norm": 0.6040588804351511, + "learning_rate": 4.667162403261482e-06, + "loss": 0.2989, + "step": 8161 + }, + { + "epoch": 0.38234880779500635, + "grad_norm": 0.6164992883882928, + "learning_rate": 4.667067848268395e-06, + "loss": 0.2867, + "step": 8162 + }, + { + "epoch": 0.38239565278493465, + "grad_norm": 0.6457167773669297, + "learning_rate": 4.666973280804376e-06, + "loss": 0.3056, + "step": 8163 + }, + { + "epoch": 0.382442497774863, + "grad_norm": 0.6181798905616177, + "learning_rate": 4.666878700869973e-06, + "loss": 0.2832, + "step": 8164 + }, + { + "epoch": 0.3824893427647913, + "grad_norm": 0.6027141446474434, + "learning_rate": 4.666784108465725e-06, + "loss": 0.3148, + "step": 8165 + }, + { + "epoch": 0.38253618775471965, + "grad_norm": 0.6477298495160416, + "learning_rate": 4.66668950359218e-06, + "loss": 0.3097, + "step": 8166 + }, + { + "epoch": 0.38258303274464794, + "grad_norm": 0.5449262993204882, + "learning_rate": 4.666594886249882e-06, + "loss": 0.2733, + "step": 8167 + }, + { + "epoch": 0.3826298777345763, + "grad_norm": 0.5855318650708923, + "learning_rate": 4.666500256439375e-06, + "loss": 0.2872, + "step": 8168 + }, + { + "epoch": 0.3826767227245046, + "grad_norm": 0.58916767066723, + "learning_rate": 4.666405614161202e-06, + "loss": 0.2755, + "step": 8169 + }, + { + "epoch": 0.38272356771443294, + "grad_norm": 0.6192021164728868, + "learning_rate": 4.66631095941591e-06, + "loss": 0.2988, + "step": 8170 + }, + { + "epoch": 0.3827704127043613, + "grad_norm": 0.6349629091181175, + "learning_rate": 4.666216292204044e-06, + "loss": 0.2893, + "step": 8171 + }, + { + "epoch": 0.3828172576942896, + "grad_norm": 0.6175216977180994, + "learning_rate": 4.6661216125261465e-06, + "loss": 0.2865, + "step": 8172 + }, + { + "epoch": 0.38286410268421794, + "grad_norm": 0.6097952840210082, + "learning_rate": 4.666026920382765e-06, + "loss": 0.3074, + "step": 8173 + }, + { + "epoch": 0.38291094767414624, + "grad_norm": 0.656951498056689, + "learning_rate": 4.6659322157744415e-06, + "loss": 0.3088, + "step": 8174 + }, + { + "epoch": 0.3829577926640746, + "grad_norm": 0.622667799908864, + "learning_rate": 4.665837498701723e-06, + "loss": 0.2861, + "step": 8175 + }, + { + "epoch": 0.3830046376540029, + "grad_norm": 0.5812185410311721, + "learning_rate": 4.665742769165153e-06, + "loss": 0.2858, + "step": 8176 + }, + { + "epoch": 0.38305148264393124, + "grad_norm": 0.5994057853630027, + "learning_rate": 4.6656480271652795e-06, + "loss": 0.2654, + "step": 8177 + }, + { + "epoch": 0.38309832763385954, + "grad_norm": 0.6225492383278777, + "learning_rate": 4.665553272702646e-06, + "loss": 0.3046, + "step": 8178 + }, + { + "epoch": 0.3831451726237879, + "grad_norm": 0.5903836256878368, + "learning_rate": 4.665458505777796e-06, + "loss": 0.2964, + "step": 8179 + }, + { + "epoch": 0.38319201761371624, + "grad_norm": 0.5806459313447908, + "learning_rate": 4.665363726391278e-06, + "loss": 0.268, + "step": 8180 + }, + { + "epoch": 0.38323886260364454, + "grad_norm": 0.6120897680123123, + "learning_rate": 4.665268934543635e-06, + "loss": 0.2837, + "step": 8181 + }, + { + "epoch": 0.3832857075935729, + "grad_norm": 0.6047680163240364, + "learning_rate": 4.665174130235414e-06, + "loss": 0.289, + "step": 8182 + }, + { + "epoch": 0.3833325525835012, + "grad_norm": 0.6025308528342528, + "learning_rate": 4.66507931346716e-06, + "loss": 0.3166, + "step": 8183 + }, + { + "epoch": 0.38337939757342954, + "grad_norm": 0.6110473866823808, + "learning_rate": 4.664984484239418e-06, + "loss": 0.2998, + "step": 8184 + }, + { + "epoch": 0.38342624256335783, + "grad_norm": 0.6128884612082263, + "learning_rate": 4.664889642552736e-06, + "loss": 0.287, + "step": 8185 + }, + { + "epoch": 0.3834730875532862, + "grad_norm": 0.5681211491509454, + "learning_rate": 4.664794788407657e-06, + "loss": 0.2879, + "step": 8186 + }, + { + "epoch": 0.3835199325432145, + "grad_norm": 0.6096231178868724, + "learning_rate": 4.664699921804728e-06, + "loss": 0.2914, + "step": 8187 + }, + { + "epoch": 0.38356677753314283, + "grad_norm": 0.6520301335258848, + "learning_rate": 4.664605042744496e-06, + "loss": 0.2747, + "step": 8188 + }, + { + "epoch": 0.3836136225230712, + "grad_norm": 0.6277578964558441, + "learning_rate": 4.664510151227506e-06, + "loss": 0.2956, + "step": 8189 + }, + { + "epoch": 0.3836604675129995, + "grad_norm": 0.6043204598357554, + "learning_rate": 4.664415247254303e-06, + "loss": 0.2964, + "step": 8190 + }, + { + "epoch": 0.38370731250292783, + "grad_norm": 0.5692349717114435, + "learning_rate": 4.664320330825435e-06, + "loss": 0.287, + "step": 8191 + }, + { + "epoch": 0.38375415749285613, + "grad_norm": 0.621130409513103, + "learning_rate": 4.664225401941448e-06, + "loss": 0.2863, + "step": 8192 + }, + { + "epoch": 0.3838010024827845, + "grad_norm": 0.559272887287353, + "learning_rate": 4.664130460602887e-06, + "loss": 0.2932, + "step": 8193 + }, + { + "epoch": 0.3838478474727128, + "grad_norm": 0.628673415641359, + "learning_rate": 4.664035506810299e-06, + "loss": 0.2966, + "step": 8194 + }, + { + "epoch": 0.38389469246264113, + "grad_norm": 0.6122155871085467, + "learning_rate": 4.663940540564231e-06, + "loss": 0.2946, + "step": 8195 + }, + { + "epoch": 0.3839415374525694, + "grad_norm": 0.6064451340763274, + "learning_rate": 4.663845561865229e-06, + "loss": 0.3058, + "step": 8196 + }, + { + "epoch": 0.3839883824424978, + "grad_norm": 0.6005221508075357, + "learning_rate": 4.663750570713839e-06, + "loss": 0.3095, + "step": 8197 + }, + { + "epoch": 0.38403522743242613, + "grad_norm": 0.6696510549769334, + "learning_rate": 4.663655567110609e-06, + "loss": 0.3022, + "step": 8198 + }, + { + "epoch": 0.3840820724223544, + "grad_norm": 0.6142065678802524, + "learning_rate": 4.663560551056085e-06, + "loss": 0.2814, + "step": 8199 + }, + { + "epoch": 0.3841289174122828, + "grad_norm": 0.5973943581938601, + "learning_rate": 4.663465522550814e-06, + "loss": 0.2853, + "step": 8200 + }, + { + "epoch": 0.3841757624022111, + "grad_norm": 0.6078238189299897, + "learning_rate": 4.663370481595341e-06, + "loss": 0.308, + "step": 8201 + }, + { + "epoch": 0.3842226073921394, + "grad_norm": 0.5902964836243902, + "learning_rate": 4.6632754281902175e-06, + "loss": 0.2964, + "step": 8202 + }, + { + "epoch": 0.3842694523820677, + "grad_norm": 0.6205753165007762, + "learning_rate": 4.663180362335986e-06, + "loss": 0.2913, + "step": 8203 + }, + { + "epoch": 0.3843162973719961, + "grad_norm": 0.626100598466684, + "learning_rate": 4.663085284033195e-06, + "loss": 0.2964, + "step": 8204 + }, + { + "epoch": 0.38436314236192437, + "grad_norm": 0.6327381021456107, + "learning_rate": 4.662990193282393e-06, + "loss": 0.2953, + "step": 8205 + }, + { + "epoch": 0.3844099873518527, + "grad_norm": 0.571262464753857, + "learning_rate": 4.662895090084124e-06, + "loss": 0.2811, + "step": 8206 + }, + { + "epoch": 0.3844568323417811, + "grad_norm": 0.6322882320951126, + "learning_rate": 4.66279997443894e-06, + "loss": 0.3058, + "step": 8207 + }, + { + "epoch": 0.38450367733170937, + "grad_norm": 0.5773259296517064, + "learning_rate": 4.662704846347384e-06, + "loss": 0.2797, + "step": 8208 + }, + { + "epoch": 0.3845505223216377, + "grad_norm": 0.6421140426632032, + "learning_rate": 4.662609705810005e-06, + "loss": 0.298, + "step": 8209 + }, + { + "epoch": 0.384597367311566, + "grad_norm": 0.6566637105356957, + "learning_rate": 4.662514552827352e-06, + "loss": 0.3088, + "step": 8210 + }, + { + "epoch": 0.38464421230149437, + "grad_norm": 0.5758301081397769, + "learning_rate": 4.66241938739997e-06, + "loss": 0.3161, + "step": 8211 + }, + { + "epoch": 0.38469105729142267, + "grad_norm": 0.6234383621052008, + "learning_rate": 4.662324209528409e-06, + "loss": 0.298, + "step": 8212 + }, + { + "epoch": 0.384737902281351, + "grad_norm": 0.6699571741029555, + "learning_rate": 4.6622290192132155e-06, + "loss": 0.2999, + "step": 8213 + }, + { + "epoch": 0.3847847472712793, + "grad_norm": 0.5895500744984259, + "learning_rate": 4.662133816454937e-06, + "loss": 0.2852, + "step": 8214 + }, + { + "epoch": 0.38483159226120767, + "grad_norm": 0.6287599708687974, + "learning_rate": 4.662038601254122e-06, + "loss": 0.2779, + "step": 8215 + }, + { + "epoch": 0.384878437251136, + "grad_norm": 0.5902444442628162, + "learning_rate": 4.661943373611318e-06, + "loss": 0.2769, + "step": 8216 + }, + { + "epoch": 0.3849252822410643, + "grad_norm": 0.5985416033373315, + "learning_rate": 4.661848133527073e-06, + "loss": 0.2928, + "step": 8217 + }, + { + "epoch": 0.38497212723099267, + "grad_norm": 0.6183054652780026, + "learning_rate": 4.661752881001936e-06, + "loss": 0.2857, + "step": 8218 + }, + { + "epoch": 0.38501897222092096, + "grad_norm": 0.5403014546977989, + "learning_rate": 4.661657616036455e-06, + "loss": 0.2624, + "step": 8219 + }, + { + "epoch": 0.3850658172108493, + "grad_norm": 0.6223166239260621, + "learning_rate": 4.661562338631177e-06, + "loss": 0.2658, + "step": 8220 + }, + { + "epoch": 0.3851126622007776, + "grad_norm": 0.6006144160114317, + "learning_rate": 4.661467048786651e-06, + "loss": 0.2624, + "step": 8221 + }, + { + "epoch": 0.38515950719070596, + "grad_norm": 0.6415453708884973, + "learning_rate": 4.661371746503425e-06, + "loss": 0.3116, + "step": 8222 + }, + { + "epoch": 0.38520635218063426, + "grad_norm": 0.5769351070022724, + "learning_rate": 4.661276431782048e-06, + "loss": 0.2665, + "step": 8223 + }, + { + "epoch": 0.3852531971705626, + "grad_norm": 0.6472533829899499, + "learning_rate": 4.661181104623069e-06, + "loss": 0.2843, + "step": 8224 + }, + { + "epoch": 0.38530004216049096, + "grad_norm": 0.5962367660993023, + "learning_rate": 4.661085765027036e-06, + "loss": 0.2663, + "step": 8225 + }, + { + "epoch": 0.38534688715041926, + "grad_norm": 0.6867887834161461, + "learning_rate": 4.6609904129944974e-06, + "loss": 0.3106, + "step": 8226 + }, + { + "epoch": 0.3853937321403476, + "grad_norm": 0.6390223473462247, + "learning_rate": 4.660895048526002e-06, + "loss": 0.2815, + "step": 8227 + }, + { + "epoch": 0.3854405771302759, + "grad_norm": 0.5735754836292525, + "learning_rate": 4.660799671622098e-06, + "loss": 0.2964, + "step": 8228 + }, + { + "epoch": 0.38548742212020426, + "grad_norm": 0.5826599825469667, + "learning_rate": 4.660704282283336e-06, + "loss": 0.2755, + "step": 8229 + }, + { + "epoch": 0.38553426711013256, + "grad_norm": 0.6460346095623752, + "learning_rate": 4.660608880510264e-06, + "loss": 0.2746, + "step": 8230 + }, + { + "epoch": 0.3855811121000609, + "grad_norm": 0.6140490157245685, + "learning_rate": 4.66051346630343e-06, + "loss": 0.2763, + "step": 8231 + }, + { + "epoch": 0.3856279570899892, + "grad_norm": 0.6999769843395394, + "learning_rate": 4.6604180396633845e-06, + "loss": 0.2918, + "step": 8232 + }, + { + "epoch": 0.38567480207991756, + "grad_norm": 0.5922878366906232, + "learning_rate": 4.660322600590676e-06, + "loss": 0.2961, + "step": 8233 + }, + { + "epoch": 0.3857216470698459, + "grad_norm": 0.5894458015279491, + "learning_rate": 4.660227149085854e-06, + "loss": 0.2871, + "step": 8234 + }, + { + "epoch": 0.3857684920597742, + "grad_norm": 0.5613246367945897, + "learning_rate": 4.660131685149468e-06, + "loss": 0.2631, + "step": 8235 + }, + { + "epoch": 0.38581533704970256, + "grad_norm": 0.6314715583621936, + "learning_rate": 4.6600362087820675e-06, + "loss": 0.2954, + "step": 8236 + }, + { + "epoch": 0.38586218203963085, + "grad_norm": 0.5975369008015327, + "learning_rate": 4.659940719984201e-06, + "loss": 0.2601, + "step": 8237 + }, + { + "epoch": 0.3859090270295592, + "grad_norm": 0.6229138382024227, + "learning_rate": 4.659845218756418e-06, + "loss": 0.2858, + "step": 8238 + }, + { + "epoch": 0.3859558720194875, + "grad_norm": 0.6160393492692421, + "learning_rate": 4.659749705099269e-06, + "loss": 0.3026, + "step": 8239 + }, + { + "epoch": 0.38600271700941585, + "grad_norm": 0.6219577316429737, + "learning_rate": 4.659654179013303e-06, + "loss": 0.2997, + "step": 8240 + }, + { + "epoch": 0.38604956199934415, + "grad_norm": 0.6113958267141307, + "learning_rate": 4.659558640499071e-06, + "loss": 0.271, + "step": 8241 + }, + { + "epoch": 0.3860964069892725, + "grad_norm": 0.6494364871267216, + "learning_rate": 4.659463089557121e-06, + "loss": 0.3002, + "step": 8242 + }, + { + "epoch": 0.38614325197920085, + "grad_norm": 0.6349023494208589, + "learning_rate": 4.659367526188003e-06, + "loss": 0.2962, + "step": 8243 + }, + { + "epoch": 0.38619009696912915, + "grad_norm": 0.5717004134840422, + "learning_rate": 4.659271950392269e-06, + "loss": 0.2744, + "step": 8244 + }, + { + "epoch": 0.3862369419590575, + "grad_norm": 0.6128996843209176, + "learning_rate": 4.659176362170468e-06, + "loss": 0.2746, + "step": 8245 + }, + { + "epoch": 0.3862837869489858, + "grad_norm": 0.5832777576385152, + "learning_rate": 4.659080761523148e-06, + "loss": 0.2867, + "step": 8246 + }, + { + "epoch": 0.38633063193891415, + "grad_norm": 0.7095777513830323, + "learning_rate": 4.658985148450862e-06, + "loss": 0.3245, + "step": 8247 + }, + { + "epoch": 0.38637747692884244, + "grad_norm": 0.6291738828337329, + "learning_rate": 4.658889522954158e-06, + "loss": 0.28, + "step": 8248 + }, + { + "epoch": 0.3864243219187708, + "grad_norm": 0.6612845567302673, + "learning_rate": 4.658793885033589e-06, + "loss": 0.3038, + "step": 8249 + }, + { + "epoch": 0.3864711669086991, + "grad_norm": 0.6100278423903371, + "learning_rate": 4.6586982346897035e-06, + "loss": 0.2887, + "step": 8250 + }, + { + "epoch": 0.38651801189862744, + "grad_norm": 0.648235696734137, + "learning_rate": 4.658602571923051e-06, + "loss": 0.3041, + "step": 8251 + }, + { + "epoch": 0.38656485688855574, + "grad_norm": 0.6548828269836641, + "learning_rate": 4.6585068967341836e-06, + "loss": 0.2929, + "step": 8252 + }, + { + "epoch": 0.3866117018784841, + "grad_norm": 0.6503346398167132, + "learning_rate": 4.658411209123653e-06, + "loss": 0.3098, + "step": 8253 + }, + { + "epoch": 0.38665854686841244, + "grad_norm": 0.6194032194549836, + "learning_rate": 4.6583155090920074e-06, + "loss": 0.3142, + "step": 8254 + }, + { + "epoch": 0.38670539185834074, + "grad_norm": 0.5811311724175463, + "learning_rate": 4.658219796639798e-06, + "loss": 0.2801, + "step": 8255 + }, + { + "epoch": 0.3867522368482691, + "grad_norm": 0.6944613728852179, + "learning_rate": 4.658124071767577e-06, + "loss": 0.3177, + "step": 8256 + }, + { + "epoch": 0.3867990818381974, + "grad_norm": 0.5644531665451177, + "learning_rate": 4.658028334475893e-06, + "loss": 0.2993, + "step": 8257 + }, + { + "epoch": 0.38684592682812574, + "grad_norm": 0.6076699917281467, + "learning_rate": 4.6579325847653e-06, + "loss": 0.2979, + "step": 8258 + }, + { + "epoch": 0.38689277181805404, + "grad_norm": 0.6058134292093285, + "learning_rate": 4.657836822636347e-06, + "loss": 0.2949, + "step": 8259 + }, + { + "epoch": 0.3869396168079824, + "grad_norm": 0.577219295346553, + "learning_rate": 4.657741048089585e-06, + "loss": 0.2915, + "step": 8260 + }, + { + "epoch": 0.3869864617979107, + "grad_norm": 0.6135608521917661, + "learning_rate": 4.657645261125565e-06, + "loss": 0.3041, + "step": 8261 + }, + { + "epoch": 0.38703330678783904, + "grad_norm": 0.6230311767509685, + "learning_rate": 4.657549461744841e-06, + "loss": 0.2976, + "step": 8262 + }, + { + "epoch": 0.3870801517777674, + "grad_norm": 0.6227916811587138, + "learning_rate": 4.657453649947959e-06, + "loss": 0.2738, + "step": 8263 + }, + { + "epoch": 0.3871269967676957, + "grad_norm": 0.6108799008662636, + "learning_rate": 4.657357825735476e-06, + "loss": 0.2902, + "step": 8264 + }, + { + "epoch": 0.38717384175762404, + "grad_norm": 0.6272789891119681, + "learning_rate": 4.6572619891079395e-06, + "loss": 0.2843, + "step": 8265 + }, + { + "epoch": 0.38722068674755233, + "grad_norm": 0.6337436718501643, + "learning_rate": 4.657166140065903e-06, + "loss": 0.309, + "step": 8266 + }, + { + "epoch": 0.3872675317374807, + "grad_norm": 0.6442213567089193, + "learning_rate": 4.657070278609918e-06, + "loss": 0.2922, + "step": 8267 + }, + { + "epoch": 0.387314376727409, + "grad_norm": 0.6016188067955175, + "learning_rate": 4.656974404740535e-06, + "loss": 0.2878, + "step": 8268 + }, + { + "epoch": 0.38736122171733733, + "grad_norm": 0.5834893051200145, + "learning_rate": 4.656878518458306e-06, + "loss": 0.2809, + "step": 8269 + }, + { + "epoch": 0.38740806670726563, + "grad_norm": 0.5565982220511946, + "learning_rate": 4.656782619763785e-06, + "loss": 0.2923, + "step": 8270 + }, + { + "epoch": 0.387454911697194, + "grad_norm": 0.5484564181484827, + "learning_rate": 4.656686708657521e-06, + "loss": 0.2638, + "step": 8271 + }, + { + "epoch": 0.38750175668712233, + "grad_norm": 0.5645344656556361, + "learning_rate": 4.6565907851400674e-06, + "loss": 0.2856, + "step": 8272 + }, + { + "epoch": 0.38754860167705063, + "grad_norm": 0.6719831245296622, + "learning_rate": 4.6564948492119756e-06, + "loss": 0.3075, + "step": 8273 + }, + { + "epoch": 0.387595446666979, + "grad_norm": 0.6283242959213429, + "learning_rate": 4.656398900873797e-06, + "loss": 0.2796, + "step": 8274 + }, + { + "epoch": 0.3876422916569073, + "grad_norm": 0.6042504656414799, + "learning_rate": 4.656302940126086e-06, + "loss": 0.2944, + "step": 8275 + }, + { + "epoch": 0.38768913664683563, + "grad_norm": 0.6264687313167429, + "learning_rate": 4.656206966969393e-06, + "loss": 0.2925, + "step": 8276 + }, + { + "epoch": 0.3877359816367639, + "grad_norm": 0.5735062884349297, + "learning_rate": 4.656110981404271e-06, + "loss": 0.2782, + "step": 8277 + }, + { + "epoch": 0.3877828266266923, + "grad_norm": 0.6060552629455733, + "learning_rate": 4.6560149834312726e-06, + "loss": 0.3101, + "step": 8278 + }, + { + "epoch": 0.3878296716166206, + "grad_norm": 0.5970122049170239, + "learning_rate": 4.65591897305095e-06, + "loss": 0.2833, + "step": 8279 + }, + { + "epoch": 0.3878765166065489, + "grad_norm": 0.6266909615825043, + "learning_rate": 4.655822950263855e-06, + "loss": 0.2996, + "step": 8280 + }, + { + "epoch": 0.3879233615964773, + "grad_norm": 0.6712553380467635, + "learning_rate": 4.6557269150705406e-06, + "loss": 0.3227, + "step": 8281 + }, + { + "epoch": 0.3879702065864056, + "grad_norm": 0.6522492639339192, + "learning_rate": 4.65563086747156e-06, + "loss": 0.2979, + "step": 8282 + }, + { + "epoch": 0.3880170515763339, + "grad_norm": 0.6597470827617113, + "learning_rate": 4.655534807467466e-06, + "loss": 0.296, + "step": 8283 + }, + { + "epoch": 0.3880638965662622, + "grad_norm": 0.6048006286723869, + "learning_rate": 4.6554387350588094e-06, + "loss": 0.3063, + "step": 8284 + }, + { + "epoch": 0.3881107415561906, + "grad_norm": 0.589428280268448, + "learning_rate": 4.6553426502461466e-06, + "loss": 0.3006, + "step": 8285 + }, + { + "epoch": 0.38815758654611887, + "grad_norm": 0.6094486593361762, + "learning_rate": 4.655246553030027e-06, + "loss": 0.2841, + "step": 8286 + }, + { + "epoch": 0.3882044315360472, + "grad_norm": 0.6059426570243359, + "learning_rate": 4.6551504434110065e-06, + "loss": 0.2888, + "step": 8287 + }, + { + "epoch": 0.3882512765259755, + "grad_norm": 0.6558019261660635, + "learning_rate": 4.655054321389636e-06, + "loss": 0.2847, + "step": 8288 + }, + { + "epoch": 0.38829812151590387, + "grad_norm": 0.6168079469211606, + "learning_rate": 4.65495818696647e-06, + "loss": 0.2967, + "step": 8289 + }, + { + "epoch": 0.3883449665058322, + "grad_norm": 0.5769119140263914, + "learning_rate": 4.654862040142061e-06, + "loss": 0.2751, + "step": 8290 + }, + { + "epoch": 0.3883918114957605, + "grad_norm": 0.6020081559023102, + "learning_rate": 4.654765880916962e-06, + "loss": 0.2908, + "step": 8291 + }, + { + "epoch": 0.38843865648568887, + "grad_norm": 0.6032416472719793, + "learning_rate": 4.654669709291727e-06, + "loss": 0.2912, + "step": 8292 + }, + { + "epoch": 0.38848550147561717, + "grad_norm": 0.6092281576684184, + "learning_rate": 4.65457352526691e-06, + "loss": 0.306, + "step": 8293 + }, + { + "epoch": 0.3885323464655455, + "grad_norm": 0.6639399207388638, + "learning_rate": 4.654477328843065e-06, + "loss": 0.3166, + "step": 8294 + }, + { + "epoch": 0.3885791914554738, + "grad_norm": 0.6511275251641953, + "learning_rate": 4.654381120020742e-06, + "loss": 0.3198, + "step": 8295 + }, + { + "epoch": 0.38862603644540217, + "grad_norm": 0.6447411137998238, + "learning_rate": 4.654284898800498e-06, + "loss": 0.3023, + "step": 8296 + }, + { + "epoch": 0.38867288143533046, + "grad_norm": 0.5626689352298129, + "learning_rate": 4.654188665182887e-06, + "loss": 0.2841, + "step": 8297 + }, + { + "epoch": 0.3887197264252588, + "grad_norm": 0.5823158370970296, + "learning_rate": 4.654092419168461e-06, + "loss": 0.2804, + "step": 8298 + }, + { + "epoch": 0.38876657141518717, + "grad_norm": 0.6097815830780323, + "learning_rate": 4.653996160757775e-06, + "loss": 0.2884, + "step": 8299 + }, + { + "epoch": 0.38881341640511546, + "grad_norm": 0.5636817954714332, + "learning_rate": 4.653899889951381e-06, + "loss": 0.2884, + "step": 8300 + }, + { + "epoch": 0.3888602613950438, + "grad_norm": 0.5953058539041898, + "learning_rate": 4.653803606749836e-06, + "loss": 0.2848, + "step": 8301 + }, + { + "epoch": 0.3889071063849721, + "grad_norm": 0.6062772688504012, + "learning_rate": 4.6537073111536915e-06, + "loss": 0.2827, + "step": 8302 + }, + { + "epoch": 0.38895395137490046, + "grad_norm": 0.6538271735003733, + "learning_rate": 4.653611003163504e-06, + "loss": 0.3023, + "step": 8303 + }, + { + "epoch": 0.38900079636482876, + "grad_norm": 0.630890561081098, + "learning_rate": 4.653514682779825e-06, + "loss": 0.3077, + "step": 8304 + }, + { + "epoch": 0.3890476413547571, + "grad_norm": 0.5453998465266212, + "learning_rate": 4.653418350003211e-06, + "loss": 0.2865, + "step": 8305 + }, + { + "epoch": 0.3890944863446854, + "grad_norm": 0.6085862741489728, + "learning_rate": 4.653322004834216e-06, + "loss": 0.2809, + "step": 8306 + }, + { + "epoch": 0.38914133133461376, + "grad_norm": 0.6008943895098112, + "learning_rate": 4.653225647273394e-06, + "loss": 0.2918, + "step": 8307 + }, + { + "epoch": 0.3891881763245421, + "grad_norm": 0.5576885880611392, + "learning_rate": 4.653129277321299e-06, + "loss": 0.2666, + "step": 8308 + }, + { + "epoch": 0.3892350213144704, + "grad_norm": 0.6570819204042411, + "learning_rate": 4.653032894978487e-06, + "loss": 0.308, + "step": 8309 + }, + { + "epoch": 0.38928186630439876, + "grad_norm": 0.639530692812391, + "learning_rate": 4.652936500245511e-06, + "loss": 0.3002, + "step": 8310 + }, + { + "epoch": 0.38932871129432706, + "grad_norm": 0.5855875509147103, + "learning_rate": 4.652840093122927e-06, + "loss": 0.2758, + "step": 8311 + }, + { + "epoch": 0.3893755562842554, + "grad_norm": 0.6046737036290917, + "learning_rate": 4.652743673611288e-06, + "loss": 0.2935, + "step": 8312 + }, + { + "epoch": 0.3894224012741837, + "grad_norm": 0.5735936333128606, + "learning_rate": 4.652647241711153e-06, + "loss": 0.301, + "step": 8313 + }, + { + "epoch": 0.38946924626411206, + "grad_norm": 0.5305963778345104, + "learning_rate": 4.652550797423071e-06, + "loss": 0.2846, + "step": 8314 + }, + { + "epoch": 0.38951609125404035, + "grad_norm": 0.6169748870204033, + "learning_rate": 4.652454340747602e-06, + "loss": 0.2951, + "step": 8315 + }, + { + "epoch": 0.3895629362439687, + "grad_norm": 0.601371240809353, + "learning_rate": 4.6523578716852995e-06, + "loss": 0.2754, + "step": 8316 + }, + { + "epoch": 0.38960978123389706, + "grad_norm": 0.6725740597838952, + "learning_rate": 4.652261390236718e-06, + "loss": 0.3069, + "step": 8317 + }, + { + "epoch": 0.38965662622382535, + "grad_norm": 0.6210222912236725, + "learning_rate": 4.652164896402413e-06, + "loss": 0.3096, + "step": 8318 + }, + { + "epoch": 0.3897034712137537, + "grad_norm": 0.6087596105505217, + "learning_rate": 4.65206839018294e-06, + "loss": 0.2797, + "step": 8319 + }, + { + "epoch": 0.389750316203682, + "grad_norm": 0.5915713995697638, + "learning_rate": 4.6519718715788545e-06, + "loss": 0.2821, + "step": 8320 + }, + { + "epoch": 0.38979716119361035, + "grad_norm": 0.5967249624391171, + "learning_rate": 4.651875340590712e-06, + "loss": 0.2879, + "step": 8321 + }, + { + "epoch": 0.38984400618353865, + "grad_norm": 0.6013273339511359, + "learning_rate": 4.6517787972190674e-06, + "loss": 0.2921, + "step": 8322 + }, + { + "epoch": 0.389890851173467, + "grad_norm": 0.5819339728966771, + "learning_rate": 4.651682241464477e-06, + "loss": 0.2927, + "step": 8323 + }, + { + "epoch": 0.3899376961633953, + "grad_norm": 0.6519157011370083, + "learning_rate": 4.651585673327496e-06, + "loss": 0.308, + "step": 8324 + }, + { + "epoch": 0.38998454115332365, + "grad_norm": 0.5822415952166837, + "learning_rate": 4.651489092808679e-06, + "loss": 0.2869, + "step": 8325 + }, + { + "epoch": 0.390031386143252, + "grad_norm": 0.5819823913221206, + "learning_rate": 4.651392499908584e-06, + "loss": 0.2967, + "step": 8326 + }, + { + "epoch": 0.3900782311331803, + "grad_norm": 0.6201926934377635, + "learning_rate": 4.651295894627766e-06, + "loss": 0.3052, + "step": 8327 + }, + { + "epoch": 0.39012507612310865, + "grad_norm": 0.6575752966822874, + "learning_rate": 4.651199276966781e-06, + "loss": 0.2825, + "step": 8328 + }, + { + "epoch": 0.39017192111303695, + "grad_norm": 0.5942518104759139, + "learning_rate": 4.651102646926185e-06, + "loss": 0.2566, + "step": 8329 + }, + { + "epoch": 0.3902187661029653, + "grad_norm": 0.5934290397665086, + "learning_rate": 4.6510060045065335e-06, + "loss": 0.2873, + "step": 8330 + }, + { + "epoch": 0.3902656110928936, + "grad_norm": 0.6195132596789302, + "learning_rate": 4.6509093497083835e-06, + "loss": 0.2896, + "step": 8331 + }, + { + "epoch": 0.39031245608282195, + "grad_norm": 0.5989307924973631, + "learning_rate": 4.65081268253229e-06, + "loss": 0.2774, + "step": 8332 + }, + { + "epoch": 0.39035930107275024, + "grad_norm": 0.6240638712650672, + "learning_rate": 4.6507160029788105e-06, + "loss": 0.3006, + "step": 8333 + }, + { + "epoch": 0.3904061460626786, + "grad_norm": 0.6793783622528202, + "learning_rate": 4.6506193110485005e-06, + "loss": 0.3096, + "step": 8334 + }, + { + "epoch": 0.39045299105260695, + "grad_norm": 0.6393050283160352, + "learning_rate": 4.650522606741918e-06, + "loss": 0.2695, + "step": 8335 + }, + { + "epoch": 0.39049983604253524, + "grad_norm": 0.6007851446837887, + "learning_rate": 4.650425890059618e-06, + "loss": 0.2882, + "step": 8336 + }, + { + "epoch": 0.3905466810324636, + "grad_norm": 0.6547393258604237, + "learning_rate": 4.650329161002157e-06, + "loss": 0.3056, + "step": 8337 + }, + { + "epoch": 0.3905935260223919, + "grad_norm": 0.6804197612616003, + "learning_rate": 4.650232419570093e-06, + "loss": 0.2913, + "step": 8338 + }, + { + "epoch": 0.39064037101232024, + "grad_norm": 0.6122685827061997, + "learning_rate": 4.65013566576398e-06, + "loss": 0.299, + "step": 8339 + }, + { + "epoch": 0.39068721600224854, + "grad_norm": 0.6599530944098451, + "learning_rate": 4.650038899584378e-06, + "loss": 0.2918, + "step": 8340 + }, + { + "epoch": 0.3907340609921769, + "grad_norm": 0.6217702171438021, + "learning_rate": 4.649942121031843e-06, + "loss": 0.2943, + "step": 8341 + }, + { + "epoch": 0.3907809059821052, + "grad_norm": 0.6057534522847227, + "learning_rate": 4.649845330106931e-06, + "loss": 0.2685, + "step": 8342 + }, + { + "epoch": 0.39082775097203354, + "grad_norm": 0.6307915021153768, + "learning_rate": 4.6497485268101995e-06, + "loss": 0.2628, + "step": 8343 + }, + { + "epoch": 0.3908745959619619, + "grad_norm": 0.5521545055066627, + "learning_rate": 4.649651711142205e-06, + "loss": 0.2692, + "step": 8344 + }, + { + "epoch": 0.3909214409518902, + "grad_norm": 0.6210530524011972, + "learning_rate": 4.649554883103507e-06, + "loss": 0.2948, + "step": 8345 + }, + { + "epoch": 0.39096828594181854, + "grad_norm": 0.5933287736693179, + "learning_rate": 4.649458042694659e-06, + "loss": 0.2668, + "step": 8346 + }, + { + "epoch": 0.39101513093174683, + "grad_norm": 0.6115893397514462, + "learning_rate": 4.649361189916221e-06, + "loss": 0.3057, + "step": 8347 + }, + { + "epoch": 0.3910619759216752, + "grad_norm": 0.6298034052810149, + "learning_rate": 4.649264324768749e-06, + "loss": 0.289, + "step": 8348 + }, + { + "epoch": 0.3911088209116035, + "grad_norm": 0.6130359515201685, + "learning_rate": 4.649167447252802e-06, + "loss": 0.2968, + "step": 8349 + }, + { + "epoch": 0.39115566590153183, + "grad_norm": 0.5675449959592381, + "learning_rate": 4.649070557368935e-06, + "loss": 0.2705, + "step": 8350 + }, + { + "epoch": 0.39120251089146013, + "grad_norm": 0.5855970492251571, + "learning_rate": 4.648973655117709e-06, + "loss": 0.277, + "step": 8351 + }, + { + "epoch": 0.3912493558813885, + "grad_norm": 0.6415126754556666, + "learning_rate": 4.648876740499678e-06, + "loss": 0.3122, + "step": 8352 + }, + { + "epoch": 0.39129620087131683, + "grad_norm": 0.6977000676738492, + "learning_rate": 4.648779813515402e-06, + "loss": 0.2916, + "step": 8353 + }, + { + "epoch": 0.39134304586124513, + "grad_norm": 0.626378984146021, + "learning_rate": 4.648682874165439e-06, + "loss": 0.2922, + "step": 8354 + }, + { + "epoch": 0.3913898908511735, + "grad_norm": 0.5873081635876475, + "learning_rate": 4.648585922450346e-06, + "loss": 0.2907, + "step": 8355 + }, + { + "epoch": 0.3914367358411018, + "grad_norm": 0.6106660844592497, + "learning_rate": 4.64848895837068e-06, + "loss": 0.2815, + "step": 8356 + }, + { + "epoch": 0.39148358083103013, + "grad_norm": 0.6054321804046892, + "learning_rate": 4.648391981927e-06, + "loss": 0.291, + "step": 8357 + }, + { + "epoch": 0.3915304258209584, + "grad_norm": 0.6141919101039808, + "learning_rate": 4.648294993119865e-06, + "loss": 0.2822, + "step": 8358 + }, + { + "epoch": 0.3915772708108868, + "grad_norm": 0.6034680798741897, + "learning_rate": 4.648197991949832e-06, + "loss": 0.2862, + "step": 8359 + }, + { + "epoch": 0.3916241158008151, + "grad_norm": 0.580408212646096, + "learning_rate": 4.648100978417461e-06, + "loss": 0.2752, + "step": 8360 + }, + { + "epoch": 0.3916709607907434, + "grad_norm": 0.5726065386014855, + "learning_rate": 4.6480039525233064e-06, + "loss": 0.2794, + "step": 8361 + }, + { + "epoch": 0.3917178057806718, + "grad_norm": 0.5689529221156914, + "learning_rate": 4.6479069142679295e-06, + "loss": 0.285, + "step": 8362 + }, + { + "epoch": 0.3917646507706001, + "grad_norm": 0.6220595575784731, + "learning_rate": 4.647809863651889e-06, + "loss": 0.2887, + "step": 8363 + }, + { + "epoch": 0.3918114957605284, + "grad_norm": 0.6032975905322381, + "learning_rate": 4.647712800675742e-06, + "loss": 0.3002, + "step": 8364 + }, + { + "epoch": 0.3918583407504567, + "grad_norm": 0.6584846237991409, + "learning_rate": 4.647615725340047e-06, + "loss": 0.2953, + "step": 8365 + }, + { + "epoch": 0.3919051857403851, + "grad_norm": 0.6031705782908392, + "learning_rate": 4.647518637645364e-06, + "loss": 0.2932, + "step": 8366 + }, + { + "epoch": 0.39195203073031337, + "grad_norm": 0.6193285852121444, + "learning_rate": 4.647421537592252e-06, + "loss": 0.2829, + "step": 8367 + }, + { + "epoch": 0.3919988757202417, + "grad_norm": 0.6038469512564261, + "learning_rate": 4.647324425181266e-06, + "loss": 0.2482, + "step": 8368 + }, + { + "epoch": 0.39204572071017, + "grad_norm": 0.6166382234909087, + "learning_rate": 4.647227300412971e-06, + "loss": 0.3105, + "step": 8369 + }, + { + "epoch": 0.39209256570009837, + "grad_norm": 0.6723365466761426, + "learning_rate": 4.64713016328792e-06, + "loss": 0.2952, + "step": 8370 + }, + { + "epoch": 0.3921394106900267, + "grad_norm": 0.6261556837888183, + "learning_rate": 4.647033013806676e-06, + "loss": 0.2988, + "step": 8371 + }, + { + "epoch": 0.392186255679955, + "grad_norm": 0.6143507951995844, + "learning_rate": 4.646935851969796e-06, + "loss": 0.294, + "step": 8372 + }, + { + "epoch": 0.39223310066988337, + "grad_norm": 0.653923331369186, + "learning_rate": 4.64683867777784e-06, + "loss": 0.3045, + "step": 8373 + }, + { + "epoch": 0.39227994565981167, + "grad_norm": 0.6231910972340183, + "learning_rate": 4.646741491231367e-06, + "loss": 0.2832, + "step": 8374 + }, + { + "epoch": 0.39232679064974, + "grad_norm": 0.6064223981055197, + "learning_rate": 4.6466442923309365e-06, + "loss": 0.2886, + "step": 8375 + }, + { + "epoch": 0.3923736356396683, + "grad_norm": 0.6118492972844108, + "learning_rate": 4.6465470810771074e-06, + "loss": 0.2901, + "step": 8376 + }, + { + "epoch": 0.39242048062959667, + "grad_norm": 0.6047952224024875, + "learning_rate": 4.6464498574704396e-06, + "loss": 0.292, + "step": 8377 + }, + { + "epoch": 0.39246732561952496, + "grad_norm": 0.6257247898693807, + "learning_rate": 4.646352621511492e-06, + "loss": 0.2932, + "step": 8378 + }, + { + "epoch": 0.3925141706094533, + "grad_norm": 0.6523842969521585, + "learning_rate": 4.646255373200824e-06, + "loss": 0.2987, + "step": 8379 + }, + { + "epoch": 0.39256101559938167, + "grad_norm": 0.6312658457505721, + "learning_rate": 4.646158112538997e-06, + "loss": 0.3108, + "step": 8380 + }, + { + "epoch": 0.39260786058930996, + "grad_norm": 0.5884538238268158, + "learning_rate": 4.646060839526568e-06, + "loss": 0.3112, + "step": 8381 + }, + { + "epoch": 0.3926547055792383, + "grad_norm": 0.5732744823603169, + "learning_rate": 4.6459635541641006e-06, + "loss": 0.2933, + "step": 8382 + }, + { + "epoch": 0.3927015505691666, + "grad_norm": 0.58699110614516, + "learning_rate": 4.64586625645215e-06, + "loss": 0.2849, + "step": 8383 + }, + { + "epoch": 0.39274839555909496, + "grad_norm": 0.6346165148013496, + "learning_rate": 4.6457689463912795e-06, + "loss": 0.2793, + "step": 8384 + }, + { + "epoch": 0.39279524054902326, + "grad_norm": 0.6756361329313699, + "learning_rate": 4.645671623982048e-06, + "loss": 0.2902, + "step": 8385 + }, + { + "epoch": 0.3928420855389516, + "grad_norm": 0.6100999188242285, + "learning_rate": 4.6455742892250146e-06, + "loss": 0.2967, + "step": 8386 + }, + { + "epoch": 0.3928889305288799, + "grad_norm": 0.5856317320791825, + "learning_rate": 4.645476942120742e-06, + "loss": 0.2638, + "step": 8387 + }, + { + "epoch": 0.39293577551880826, + "grad_norm": 0.6031529577109979, + "learning_rate": 4.645379582669788e-06, + "loss": 0.3053, + "step": 8388 + }, + { + "epoch": 0.3929826205087366, + "grad_norm": 0.6387480212003025, + "learning_rate": 4.645282210872714e-06, + "loss": 0.3121, + "step": 8389 + }, + { + "epoch": 0.3930294654986649, + "grad_norm": 0.636782790458431, + "learning_rate": 4.6451848267300795e-06, + "loss": 0.2885, + "step": 8390 + }, + { + "epoch": 0.39307631048859326, + "grad_norm": 0.6162119597306828, + "learning_rate": 4.6450874302424455e-06, + "loss": 0.3015, + "step": 8391 + }, + { + "epoch": 0.39312315547852156, + "grad_norm": 0.5877947243859933, + "learning_rate": 4.644990021410374e-06, + "loss": 0.282, + "step": 8392 + }, + { + "epoch": 0.3931700004684499, + "grad_norm": 0.5963215463733444, + "learning_rate": 4.644892600234423e-06, + "loss": 0.2918, + "step": 8393 + }, + { + "epoch": 0.3932168454583782, + "grad_norm": 0.625451473466171, + "learning_rate": 4.644795166715154e-06, + "loss": 0.301, + "step": 8394 + }, + { + "epoch": 0.39326369044830656, + "grad_norm": 0.5859257419149542, + "learning_rate": 4.644697720853127e-06, + "loss": 0.2798, + "step": 8395 + }, + { + "epoch": 0.39331053543823485, + "grad_norm": 0.6681944929409046, + "learning_rate": 4.644600262648905e-06, + "loss": 0.31, + "step": 8396 + }, + { + "epoch": 0.3933573804281632, + "grad_norm": 0.6505507400032916, + "learning_rate": 4.644502792103048e-06, + "loss": 0.2836, + "step": 8397 + }, + { + "epoch": 0.39340422541809156, + "grad_norm": 0.6424300000872878, + "learning_rate": 4.644405309216114e-06, + "loss": 0.3006, + "step": 8398 + }, + { + "epoch": 0.39345107040801985, + "grad_norm": 0.5744938251956845, + "learning_rate": 4.644307813988669e-06, + "loss": 0.3015, + "step": 8399 + }, + { + "epoch": 0.3934979153979482, + "grad_norm": 0.6245747135402859, + "learning_rate": 4.64421030642127e-06, + "loss": 0.3092, + "step": 8400 + }, + { + "epoch": 0.3935447603878765, + "grad_norm": 0.591313735440372, + "learning_rate": 4.644112786514481e-06, + "loss": 0.2939, + "step": 8401 + }, + { + "epoch": 0.39359160537780485, + "grad_norm": 0.5766422899268779, + "learning_rate": 4.6440152542688605e-06, + "loss": 0.2754, + "step": 8402 + }, + { + "epoch": 0.39363845036773315, + "grad_norm": 0.5752795265907396, + "learning_rate": 4.643917709684971e-06, + "loss": 0.2984, + "step": 8403 + }, + { + "epoch": 0.3936852953576615, + "grad_norm": 0.5836804869172539, + "learning_rate": 4.6438201527633755e-06, + "loss": 0.2774, + "step": 8404 + }, + { + "epoch": 0.3937321403475898, + "grad_norm": 0.6231614079322763, + "learning_rate": 4.643722583504632e-06, + "loss": 0.3098, + "step": 8405 + }, + { + "epoch": 0.39377898533751815, + "grad_norm": 0.6522555836737851, + "learning_rate": 4.6436250019093045e-06, + "loss": 0.3218, + "step": 8406 + }, + { + "epoch": 0.3938258303274465, + "grad_norm": 0.602705355615152, + "learning_rate": 4.643527407977954e-06, + "loss": 0.2783, + "step": 8407 + }, + { + "epoch": 0.3938726753173748, + "grad_norm": 0.5909211035676495, + "learning_rate": 4.643429801711142e-06, + "loss": 0.2769, + "step": 8408 + }, + { + "epoch": 0.39391952030730315, + "grad_norm": 0.580944028196557, + "learning_rate": 4.643332183109431e-06, + "loss": 0.3011, + "step": 8409 + }, + { + "epoch": 0.39396636529723145, + "grad_norm": 0.6436048009285193, + "learning_rate": 4.6432345521733816e-06, + "loss": 0.2991, + "step": 8410 + }, + { + "epoch": 0.3940132102871598, + "grad_norm": 0.6215578813914117, + "learning_rate": 4.6431369089035556e-06, + "loss": 0.2765, + "step": 8411 + }, + { + "epoch": 0.3940600552770881, + "grad_norm": 0.6427196648136666, + "learning_rate": 4.643039253300516e-06, + "loss": 0.2856, + "step": 8412 + }, + { + "epoch": 0.39410690026701645, + "grad_norm": 0.6058177336967702, + "learning_rate": 4.642941585364823e-06, + "loss": 0.2698, + "step": 8413 + }, + { + "epoch": 0.39415374525694474, + "grad_norm": 0.57204110546751, + "learning_rate": 4.6428439050970405e-06, + "loss": 0.2538, + "step": 8414 + }, + { + "epoch": 0.3942005902468731, + "grad_norm": 0.602227638901857, + "learning_rate": 4.642746212497729e-06, + "loss": 0.3025, + "step": 8415 + }, + { + "epoch": 0.39424743523680145, + "grad_norm": 0.6331433625817466, + "learning_rate": 4.642648507567453e-06, + "loss": 0.2882, + "step": 8416 + }, + { + "epoch": 0.39429428022672974, + "grad_norm": 0.6178798437366548, + "learning_rate": 4.642550790306772e-06, + "loss": 0.3018, + "step": 8417 + }, + { + "epoch": 0.3943411252166581, + "grad_norm": 0.5972721499598933, + "learning_rate": 4.642453060716251e-06, + "loss": 0.2704, + "step": 8418 + }, + { + "epoch": 0.3943879702065864, + "grad_norm": 0.6537571122356087, + "learning_rate": 4.642355318796451e-06, + "loss": 0.3082, + "step": 8419 + }, + { + "epoch": 0.39443481519651474, + "grad_norm": 0.5705690719912716, + "learning_rate": 4.642257564547934e-06, + "loss": 0.2632, + "step": 8420 + }, + { + "epoch": 0.39448166018644304, + "grad_norm": 0.5710948263363763, + "learning_rate": 4.642159797971263e-06, + "loss": 0.2743, + "step": 8421 + }, + { + "epoch": 0.3945285051763714, + "grad_norm": 0.6064399580449704, + "learning_rate": 4.6420620190670015e-06, + "loss": 0.2871, + "step": 8422 + }, + { + "epoch": 0.3945753501662997, + "grad_norm": 0.6029550136874854, + "learning_rate": 4.641964227835711e-06, + "loss": 0.2869, + "step": 8423 + }, + { + "epoch": 0.39462219515622804, + "grad_norm": 0.643856375184798, + "learning_rate": 4.641866424277955e-06, + "loss": 0.2939, + "step": 8424 + }, + { + "epoch": 0.3946690401461564, + "grad_norm": 0.6073472764664151, + "learning_rate": 4.641768608394296e-06, + "loss": 0.2974, + "step": 8425 + }, + { + "epoch": 0.3947158851360847, + "grad_norm": 0.6511050169785941, + "learning_rate": 4.641670780185296e-06, + "loss": 0.302, + "step": 8426 + }, + { + "epoch": 0.39476273012601304, + "grad_norm": 0.571445097596282, + "learning_rate": 4.641572939651521e-06, + "loss": 0.2749, + "step": 8427 + }, + { + "epoch": 0.39480957511594134, + "grad_norm": 0.5882542665645545, + "learning_rate": 4.64147508679353e-06, + "loss": 0.2712, + "step": 8428 + }, + { + "epoch": 0.3948564201058697, + "grad_norm": 0.6944444559507394, + "learning_rate": 4.641377221611889e-06, + "loss": 0.2968, + "step": 8429 + }, + { + "epoch": 0.394903265095798, + "grad_norm": 0.6291186789814771, + "learning_rate": 4.64127934410716e-06, + "loss": 0.2937, + "step": 8430 + }, + { + "epoch": 0.39495011008572634, + "grad_norm": 0.660253719988038, + "learning_rate": 4.6411814542799075e-06, + "loss": 0.3039, + "step": 8431 + }, + { + "epoch": 0.39499695507565463, + "grad_norm": 0.6051759186074926, + "learning_rate": 4.641083552130693e-06, + "loss": 0.2674, + "step": 8432 + }, + { + "epoch": 0.395043800065583, + "grad_norm": 0.6387922120899999, + "learning_rate": 4.640985637660081e-06, + "loss": 0.3039, + "step": 8433 + }, + { + "epoch": 0.39509064505551134, + "grad_norm": 0.6079395516439433, + "learning_rate": 4.640887710868634e-06, + "loss": 0.2863, + "step": 8434 + }, + { + "epoch": 0.39513749004543963, + "grad_norm": 0.5931177037687306, + "learning_rate": 4.640789771756918e-06, + "loss": 0.2744, + "step": 8435 + }, + { + "epoch": 0.395184335035368, + "grad_norm": 0.5275537344182858, + "learning_rate": 4.6406918203254934e-06, + "loss": 0.2558, + "step": 8436 + }, + { + "epoch": 0.3952311800252963, + "grad_norm": 0.6620334493458856, + "learning_rate": 4.640593856574927e-06, + "loss": 0.3067, + "step": 8437 + }, + { + "epoch": 0.39527802501522463, + "grad_norm": 0.5359536653988121, + "learning_rate": 4.64049588050578e-06, + "loss": 0.2598, + "step": 8438 + }, + { + "epoch": 0.39532487000515293, + "grad_norm": 0.5318209539800548, + "learning_rate": 4.640397892118617e-06, + "loss": 0.2787, + "step": 8439 + }, + { + "epoch": 0.3953717149950813, + "grad_norm": 0.6279540714511265, + "learning_rate": 4.640299891414002e-06, + "loss": 0.2843, + "step": 8440 + }, + { + "epoch": 0.3954185599850096, + "grad_norm": 0.6439458004914475, + "learning_rate": 4.640201878392499e-06, + "loss": 0.2711, + "step": 8441 + }, + { + "epoch": 0.39546540497493793, + "grad_norm": 0.6503093495615582, + "learning_rate": 4.640103853054673e-06, + "loss": 0.3042, + "step": 8442 + }, + { + "epoch": 0.3955122499648663, + "grad_norm": 0.6517623852298757, + "learning_rate": 4.640005815401086e-06, + "loss": 0.3038, + "step": 8443 + }, + { + "epoch": 0.3955590949547946, + "grad_norm": 0.6058768378222285, + "learning_rate": 4.639907765432304e-06, + "loss": 0.2841, + "step": 8444 + }, + { + "epoch": 0.39560593994472293, + "grad_norm": 0.6132581417650567, + "learning_rate": 4.639809703148891e-06, + "loss": 0.2846, + "step": 8445 + }, + { + "epoch": 0.3956527849346512, + "grad_norm": 0.603869588228309, + "learning_rate": 4.639711628551411e-06, + "loss": 0.2995, + "step": 8446 + }, + { + "epoch": 0.3956996299245796, + "grad_norm": 0.6049544405419497, + "learning_rate": 4.639613541640428e-06, + "loss": 0.2862, + "step": 8447 + }, + { + "epoch": 0.3957464749145079, + "grad_norm": 0.6185132361747553, + "learning_rate": 4.6395154424165065e-06, + "loss": 0.282, + "step": 8448 + }, + { + "epoch": 0.3957933199044362, + "grad_norm": 0.5955666962382412, + "learning_rate": 4.639417330880213e-06, + "loss": 0.2858, + "step": 8449 + }, + { + "epoch": 0.3958401648943645, + "grad_norm": 0.567407030886221, + "learning_rate": 4.639319207032109e-06, + "loss": 0.2939, + "step": 8450 + }, + { + "epoch": 0.3958870098842929, + "grad_norm": 0.5903153256057972, + "learning_rate": 4.639221070872761e-06, + "loss": 0.2813, + "step": 8451 + }, + { + "epoch": 0.3959338548742212, + "grad_norm": 0.5741582159996932, + "learning_rate": 4.639122922402734e-06, + "loss": 0.2819, + "step": 8452 + }, + { + "epoch": 0.3959806998641495, + "grad_norm": 0.5690689988640152, + "learning_rate": 4.639024761622591e-06, + "loss": 0.2894, + "step": 8453 + }, + { + "epoch": 0.3960275448540779, + "grad_norm": 0.7024077633482259, + "learning_rate": 4.638926588532898e-06, + "loss": 0.2951, + "step": 8454 + }, + { + "epoch": 0.39607438984400617, + "grad_norm": 0.5638148819003919, + "learning_rate": 4.638828403134221e-06, + "loss": 0.2746, + "step": 8455 + }, + { + "epoch": 0.3961212348339345, + "grad_norm": 0.5806273774580201, + "learning_rate": 4.638730205427124e-06, + "loss": 0.2757, + "step": 8456 + }, + { + "epoch": 0.3961680798238628, + "grad_norm": 0.5674796855685887, + "learning_rate": 4.638631995412173e-06, + "loss": 0.2771, + "step": 8457 + }, + { + "epoch": 0.39621492481379117, + "grad_norm": 0.6480878954938778, + "learning_rate": 4.638533773089931e-06, + "loss": 0.3066, + "step": 8458 + }, + { + "epoch": 0.39626176980371947, + "grad_norm": 0.5792735164900108, + "learning_rate": 4.638435538460965e-06, + "loss": 0.2826, + "step": 8459 + }, + { + "epoch": 0.3963086147936478, + "grad_norm": 0.6170143574802713, + "learning_rate": 4.6383372915258406e-06, + "loss": 0.3214, + "step": 8460 + }, + { + "epoch": 0.39635545978357617, + "grad_norm": 0.6000515032976755, + "learning_rate": 4.6382390322851215e-06, + "loss": 0.2921, + "step": 8461 + }, + { + "epoch": 0.39640230477350447, + "grad_norm": 0.5740432226493524, + "learning_rate": 4.638140760739374e-06, + "loss": 0.273, + "step": 8462 + }, + { + "epoch": 0.3964491497634328, + "grad_norm": 0.6312124919276101, + "learning_rate": 4.638042476889166e-06, + "loss": 0.3073, + "step": 8463 + }, + { + "epoch": 0.3964959947533611, + "grad_norm": 0.6159196383376261, + "learning_rate": 4.637944180735059e-06, + "loss": 0.3012, + "step": 8464 + }, + { + "epoch": 0.39654283974328947, + "grad_norm": 0.5800722467474344, + "learning_rate": 4.637845872277621e-06, + "loss": 0.297, + "step": 8465 + }, + { + "epoch": 0.39658968473321776, + "grad_norm": 0.5767918723993213, + "learning_rate": 4.637747551517418e-06, + "loss": 0.2738, + "step": 8466 + }, + { + "epoch": 0.3966365297231461, + "grad_norm": 0.5890923535742847, + "learning_rate": 4.637649218455013e-06, + "loss": 0.2875, + "step": 8467 + }, + { + "epoch": 0.3966833747130744, + "grad_norm": 0.6010263219310967, + "learning_rate": 4.637550873090977e-06, + "loss": 0.2815, + "step": 8468 + }, + { + "epoch": 0.39673021970300276, + "grad_norm": 0.5867435557023579, + "learning_rate": 4.637452515425871e-06, + "loss": 0.2791, + "step": 8469 + }, + { + "epoch": 0.3967770646929311, + "grad_norm": 0.6048055497102511, + "learning_rate": 4.637354145460264e-06, + "loss": 0.2939, + "step": 8470 + }, + { + "epoch": 0.3968239096828594, + "grad_norm": 0.5926728118531254, + "learning_rate": 4.63725576319472e-06, + "loss": 0.2851, + "step": 8471 + }, + { + "epoch": 0.39687075467278776, + "grad_norm": 0.5783217234400929, + "learning_rate": 4.637157368629808e-06, + "loss": 0.2852, + "step": 8472 + }, + { + "epoch": 0.39691759966271606, + "grad_norm": 0.6135255277886561, + "learning_rate": 4.637058961766091e-06, + "loss": 0.2906, + "step": 8473 + }, + { + "epoch": 0.3969644446526444, + "grad_norm": 0.6007876883324231, + "learning_rate": 4.636960542604138e-06, + "loss": 0.3012, + "step": 8474 + }, + { + "epoch": 0.3970112896425727, + "grad_norm": 0.6401542127532298, + "learning_rate": 4.636862111144512e-06, + "loss": 0.3108, + "step": 8475 + }, + { + "epoch": 0.39705813463250106, + "grad_norm": 0.6135085387236441, + "learning_rate": 4.636763667387783e-06, + "loss": 0.2973, + "step": 8476 + }, + { + "epoch": 0.39710497962242935, + "grad_norm": 0.6392372871754302, + "learning_rate": 4.636665211334517e-06, + "loss": 0.3031, + "step": 8477 + }, + { + "epoch": 0.3971518246123577, + "grad_norm": 0.612613408403346, + "learning_rate": 4.636566742985279e-06, + "loss": 0.2899, + "step": 8478 + }, + { + "epoch": 0.39719866960228606, + "grad_norm": 0.5915708156092375, + "learning_rate": 4.636468262340637e-06, + "loss": 0.2973, + "step": 8479 + }, + { + "epoch": 0.39724551459221435, + "grad_norm": 0.6206669349645433, + "learning_rate": 4.636369769401156e-06, + "loss": 0.2712, + "step": 8480 + }, + { + "epoch": 0.3972923595821427, + "grad_norm": 0.6573043824716803, + "learning_rate": 4.636271264167404e-06, + "loss": 0.2995, + "step": 8481 + }, + { + "epoch": 0.397339204572071, + "grad_norm": 0.6499296730494152, + "learning_rate": 4.6361727466399484e-06, + "loss": 0.309, + "step": 8482 + }, + { + "epoch": 0.39738604956199935, + "grad_norm": 0.6394828162423198, + "learning_rate": 4.636074216819355e-06, + "loss": 0.2709, + "step": 8483 + }, + { + "epoch": 0.39743289455192765, + "grad_norm": 0.5794129090721195, + "learning_rate": 4.635975674706192e-06, + "loss": 0.2727, + "step": 8484 + }, + { + "epoch": 0.397479739541856, + "grad_norm": 0.6231476173296142, + "learning_rate": 4.635877120301025e-06, + "loss": 0.2786, + "step": 8485 + }, + { + "epoch": 0.3975265845317843, + "grad_norm": 0.5757717068695223, + "learning_rate": 4.635778553604423e-06, + "loss": 0.2888, + "step": 8486 + }, + { + "epoch": 0.39757342952171265, + "grad_norm": 0.6664877239355463, + "learning_rate": 4.6356799746169525e-06, + "loss": 0.3012, + "step": 8487 + }, + { + "epoch": 0.397620274511641, + "grad_norm": 0.6296138678782801, + "learning_rate": 4.635581383339179e-06, + "loss": 0.307, + "step": 8488 + }, + { + "epoch": 0.3976671195015693, + "grad_norm": 0.5821646930690898, + "learning_rate": 4.635482779771673e-06, + "loss": 0.2709, + "step": 8489 + }, + { + "epoch": 0.39771396449149765, + "grad_norm": 0.6045404446439399, + "learning_rate": 4.635384163914999e-06, + "loss": 0.2825, + "step": 8490 + }, + { + "epoch": 0.39776080948142595, + "grad_norm": 0.6163580948332301, + "learning_rate": 4.635285535769727e-06, + "loss": 0.2949, + "step": 8491 + }, + { + "epoch": 0.3978076544713543, + "grad_norm": 0.6654675167924248, + "learning_rate": 4.635186895336422e-06, + "loss": 0.3067, + "step": 8492 + }, + { + "epoch": 0.3978544994612826, + "grad_norm": 0.603438942607899, + "learning_rate": 4.635088242615654e-06, + "loss": 0.277, + "step": 8493 + }, + { + "epoch": 0.39790134445121095, + "grad_norm": 0.6281153603137672, + "learning_rate": 4.63498957760799e-06, + "loss": 0.2938, + "step": 8494 + }, + { + "epoch": 0.39794818944113924, + "grad_norm": 0.6712627980753875, + "learning_rate": 4.6348909003139976e-06, + "loss": 0.3094, + "step": 8495 + }, + { + "epoch": 0.3979950344310676, + "grad_norm": 0.6189573608005012, + "learning_rate": 4.634792210734244e-06, + "loss": 0.279, + "step": 8496 + }, + { + "epoch": 0.39804187942099595, + "grad_norm": 0.6293091758446266, + "learning_rate": 4.634693508869298e-06, + "loss": 0.2991, + "step": 8497 + }, + { + "epoch": 0.39808872441092424, + "grad_norm": 0.6034078023107531, + "learning_rate": 4.634594794719728e-06, + "loss": 0.2926, + "step": 8498 + }, + { + "epoch": 0.3981355694008526, + "grad_norm": 0.6822742948386064, + "learning_rate": 4.634496068286101e-06, + "loss": 0.2888, + "step": 8499 + }, + { + "epoch": 0.3981824143907809, + "grad_norm": 0.6539744023274111, + "learning_rate": 4.634397329568985e-06, + "loss": 0.299, + "step": 8500 + }, + { + "epoch": 0.39822925938070924, + "grad_norm": 0.587679782492564, + "learning_rate": 4.63429857856895e-06, + "loss": 0.2938, + "step": 8501 + }, + { + "epoch": 0.39827610437063754, + "grad_norm": 0.6039019462953538, + "learning_rate": 4.6341998152865626e-06, + "loss": 0.2751, + "step": 8502 + }, + { + "epoch": 0.3983229493605659, + "grad_norm": 0.6534960403806025, + "learning_rate": 4.6341010397223915e-06, + "loss": 0.2936, + "step": 8503 + }, + { + "epoch": 0.3983697943504942, + "grad_norm": 0.6441772307698309, + "learning_rate": 4.634002251877006e-06, + "loss": 0.2845, + "step": 8504 + }, + { + "epoch": 0.39841663934042254, + "grad_norm": 0.6519317837778967, + "learning_rate": 4.633903451750973e-06, + "loss": 0.2933, + "step": 8505 + }, + { + "epoch": 0.3984634843303509, + "grad_norm": 0.6681592968881009, + "learning_rate": 4.633804639344862e-06, + "loss": 0.3048, + "step": 8506 + }, + { + "epoch": 0.3985103293202792, + "grad_norm": 0.6600299608926891, + "learning_rate": 4.633705814659242e-06, + "loss": 0.306, + "step": 8507 + }, + { + "epoch": 0.39855717431020754, + "grad_norm": 0.6283688550923369, + "learning_rate": 4.6336069776946816e-06, + "loss": 0.3028, + "step": 8508 + }, + { + "epoch": 0.39860401930013584, + "grad_norm": 0.591066358800634, + "learning_rate": 4.6335081284517485e-06, + "loss": 0.292, + "step": 8509 + }, + { + "epoch": 0.3986508642900642, + "grad_norm": 0.5369968831773321, + "learning_rate": 4.633409266931013e-06, + "loss": 0.2778, + "step": 8510 + }, + { + "epoch": 0.3986977092799925, + "grad_norm": 0.6078349242226785, + "learning_rate": 4.633310393133043e-06, + "loss": 0.2912, + "step": 8511 + }, + { + "epoch": 0.39874455426992084, + "grad_norm": 0.6144980888904664, + "learning_rate": 4.633211507058408e-06, + "loss": 0.2898, + "step": 8512 + }, + { + "epoch": 0.39879139925984913, + "grad_norm": 0.6073299877077346, + "learning_rate": 4.633112608707677e-06, + "loss": 0.3016, + "step": 8513 + }, + { + "epoch": 0.3988382442497775, + "grad_norm": 0.6105453424819847, + "learning_rate": 4.633013698081419e-06, + "loss": 0.2916, + "step": 8514 + }, + { + "epoch": 0.39888508923970584, + "grad_norm": 0.5802154939072832, + "learning_rate": 4.632914775180204e-06, + "loss": 0.281, + "step": 8515 + }, + { + "epoch": 0.39893193422963413, + "grad_norm": 0.5961924893318525, + "learning_rate": 4.6328158400046e-06, + "loss": 0.2982, + "step": 8516 + }, + { + "epoch": 0.3989787792195625, + "grad_norm": 0.6020330036077153, + "learning_rate": 4.632716892555177e-06, + "loss": 0.2933, + "step": 8517 + }, + { + "epoch": 0.3990256242094908, + "grad_norm": 0.6790727458043493, + "learning_rate": 4.6326179328325035e-06, + "loss": 0.2976, + "step": 8518 + }, + { + "epoch": 0.39907246919941913, + "grad_norm": 0.6020639967209559, + "learning_rate": 4.632518960837151e-06, + "loss": 0.2909, + "step": 8519 + }, + { + "epoch": 0.39911931418934743, + "grad_norm": 0.6001874032747049, + "learning_rate": 4.632419976569687e-06, + "loss": 0.2848, + "step": 8520 + }, + { + "epoch": 0.3991661591792758, + "grad_norm": 0.5992942656787249, + "learning_rate": 4.632320980030682e-06, + "loss": 0.274, + "step": 8521 + }, + { + "epoch": 0.3992130041692041, + "grad_norm": 0.5954244059671997, + "learning_rate": 4.632221971220706e-06, + "loss": 0.2912, + "step": 8522 + }, + { + "epoch": 0.39925984915913243, + "grad_norm": 0.558734629398423, + "learning_rate": 4.6321229501403285e-06, + "loss": 0.2777, + "step": 8523 + }, + { + "epoch": 0.3993066941490608, + "grad_norm": 0.6659226454767755, + "learning_rate": 4.632023916790119e-06, + "loss": 0.3164, + "step": 8524 + }, + { + "epoch": 0.3993535391389891, + "grad_norm": 0.586600050101798, + "learning_rate": 4.631924871170649e-06, + "loss": 0.2869, + "step": 8525 + }, + { + "epoch": 0.39940038412891743, + "grad_norm": 0.598948867691713, + "learning_rate": 4.631825813282485e-06, + "loss": 0.3017, + "step": 8526 + }, + { + "epoch": 0.3994472291188457, + "grad_norm": 0.5871280633577078, + "learning_rate": 4.631726743126201e-06, + "loss": 0.288, + "step": 8527 + }, + { + "epoch": 0.3994940741087741, + "grad_norm": 0.6029733456769034, + "learning_rate": 4.6316276607023654e-06, + "loss": 0.2882, + "step": 8528 + }, + { + "epoch": 0.3995409190987024, + "grad_norm": 0.5715597189421711, + "learning_rate": 4.631528566011547e-06, + "loss": 0.2777, + "step": 8529 + }, + { + "epoch": 0.3995877640886307, + "grad_norm": 0.5602401815155361, + "learning_rate": 4.631429459054319e-06, + "loss": 0.2577, + "step": 8530 + }, + { + "epoch": 0.399634609078559, + "grad_norm": 0.638171133384287, + "learning_rate": 4.631330339831249e-06, + "loss": 0.3086, + "step": 8531 + }, + { + "epoch": 0.3996814540684874, + "grad_norm": 0.6311066396812712, + "learning_rate": 4.63123120834291e-06, + "loss": 0.2995, + "step": 8532 + }, + { + "epoch": 0.3997282990584157, + "grad_norm": 0.5404591037468374, + "learning_rate": 4.631132064589869e-06, + "loss": 0.2818, + "step": 8533 + }, + { + "epoch": 0.399775144048344, + "grad_norm": 0.586249845925964, + "learning_rate": 4.6310329085727e-06, + "loss": 0.2898, + "step": 8534 + }, + { + "epoch": 0.3998219890382724, + "grad_norm": 0.6091414579109318, + "learning_rate": 4.630933740291972e-06, + "loss": 0.2893, + "step": 8535 + }, + { + "epoch": 0.39986883402820067, + "grad_norm": 0.5897069627900386, + "learning_rate": 4.6308345597482565e-06, + "loss": 0.2728, + "step": 8536 + }, + { + "epoch": 0.399915679018129, + "grad_norm": 0.5788620796923593, + "learning_rate": 4.630735366942123e-06, + "loss": 0.2902, + "step": 8537 + }, + { + "epoch": 0.3999625240080573, + "grad_norm": 0.6085937901332714, + "learning_rate": 4.630636161874143e-06, + "loss": 0.3018, + "step": 8538 + }, + { + "epoch": 0.40000936899798567, + "grad_norm": 0.5898815596247531, + "learning_rate": 4.630536944544887e-06, + "loss": 0.2901, + "step": 8539 + }, + { + "epoch": 0.40005621398791397, + "grad_norm": 0.6046997650110302, + "learning_rate": 4.630437714954927e-06, + "loss": 0.2998, + "step": 8540 + }, + { + "epoch": 0.4001030589778423, + "grad_norm": 0.6396788828021214, + "learning_rate": 4.630338473104833e-06, + "loss": 0.3034, + "step": 8541 + }, + { + "epoch": 0.40014990396777067, + "grad_norm": 0.6156951510077852, + "learning_rate": 4.630239218995177e-06, + "loss": 0.2895, + "step": 8542 + }, + { + "epoch": 0.40019674895769897, + "grad_norm": 0.5755990150883756, + "learning_rate": 4.630139952626529e-06, + "loss": 0.2872, + "step": 8543 + }, + { + "epoch": 0.4002435939476273, + "grad_norm": 0.5686701604653406, + "learning_rate": 4.630040673999462e-06, + "loss": 0.2819, + "step": 8544 + }, + { + "epoch": 0.4002904389375556, + "grad_norm": 0.6521513763949693, + "learning_rate": 4.629941383114545e-06, + "loss": 0.318, + "step": 8545 + }, + { + "epoch": 0.40033728392748397, + "grad_norm": 0.6044595226001906, + "learning_rate": 4.629842079972352e-06, + "loss": 0.2678, + "step": 8546 + }, + { + "epoch": 0.40038412891741226, + "grad_norm": 0.6242506019969059, + "learning_rate": 4.629742764573453e-06, + "loss": 0.3116, + "step": 8547 + }, + { + "epoch": 0.4004309739073406, + "grad_norm": 0.6293240479095663, + "learning_rate": 4.629643436918419e-06, + "loss": 0.2978, + "step": 8548 + }, + { + "epoch": 0.4004778188972689, + "grad_norm": 0.6449527795435078, + "learning_rate": 4.629544097007822e-06, + "loss": 0.3156, + "step": 8549 + }, + { + "epoch": 0.40052466388719726, + "grad_norm": 0.6096831861210518, + "learning_rate": 4.629444744842235e-06, + "loss": 0.2839, + "step": 8550 + }, + { + "epoch": 0.4005715088771256, + "grad_norm": 0.6341565591339748, + "learning_rate": 4.629345380422228e-06, + "loss": 0.2878, + "step": 8551 + }, + { + "epoch": 0.4006183538670539, + "grad_norm": 0.6132709885441723, + "learning_rate": 4.629246003748374e-06, + "loss": 0.2813, + "step": 8552 + }, + { + "epoch": 0.40066519885698226, + "grad_norm": 0.6085042305145638, + "learning_rate": 4.6291466148212435e-06, + "loss": 0.2901, + "step": 8553 + }, + { + "epoch": 0.40071204384691056, + "grad_norm": 0.6414524390945582, + "learning_rate": 4.629047213641411e-06, + "loss": 0.283, + "step": 8554 + }, + { + "epoch": 0.4007588888368389, + "grad_norm": 0.6304948601451681, + "learning_rate": 4.6289478002094454e-06, + "loss": 0.2871, + "step": 8555 + }, + { + "epoch": 0.4008057338267672, + "grad_norm": 0.6837113924058461, + "learning_rate": 4.62884837452592e-06, + "loss": 0.2878, + "step": 8556 + }, + { + "epoch": 0.40085257881669556, + "grad_norm": 0.5886805762866953, + "learning_rate": 4.6287489365914085e-06, + "loss": 0.2909, + "step": 8557 + }, + { + "epoch": 0.40089942380662386, + "grad_norm": 0.5973080250080914, + "learning_rate": 4.628649486406482e-06, + "loss": 0.2845, + "step": 8558 + }, + { + "epoch": 0.4009462687965522, + "grad_norm": 0.616371102013071, + "learning_rate": 4.628550023971712e-06, + "loss": 0.2938, + "step": 8559 + }, + { + "epoch": 0.40099311378648056, + "grad_norm": 0.6408694353069422, + "learning_rate": 4.628450549287672e-06, + "loss": 0.3018, + "step": 8560 + }, + { + "epoch": 0.40103995877640886, + "grad_norm": 0.5698756446842308, + "learning_rate": 4.628351062354934e-06, + "loss": 0.2806, + "step": 8561 + }, + { + "epoch": 0.4010868037663372, + "grad_norm": 0.6287180167171269, + "learning_rate": 4.6282515631740695e-06, + "loss": 0.2812, + "step": 8562 + }, + { + "epoch": 0.4011336487562655, + "grad_norm": 0.62866841611808, + "learning_rate": 4.628152051745654e-06, + "loss": 0.2944, + "step": 8563 + }, + { + "epoch": 0.40118049374619386, + "grad_norm": 0.6666504094751692, + "learning_rate": 4.628052528070257e-06, + "loss": 0.2924, + "step": 8564 + }, + { + "epoch": 0.40122733873612215, + "grad_norm": 0.5558404515545976, + "learning_rate": 4.627952992148454e-06, + "loss": 0.2809, + "step": 8565 + }, + { + "epoch": 0.4012741837260505, + "grad_norm": 0.6413707827282725, + "learning_rate": 4.627853443980814e-06, + "loss": 0.2955, + "step": 8566 + }, + { + "epoch": 0.4013210287159788, + "grad_norm": 0.6658446277414343, + "learning_rate": 4.627753883567914e-06, + "loss": 0.2925, + "step": 8567 + }, + { + "epoch": 0.40136787370590715, + "grad_norm": 0.5616222711626142, + "learning_rate": 4.627654310910325e-06, + "loss": 0.2808, + "step": 8568 + }, + { + "epoch": 0.4014147186958355, + "grad_norm": 0.5762690796636502, + "learning_rate": 4.62755472600862e-06, + "loss": 0.323, + "step": 8569 + }, + { + "epoch": 0.4014615636857638, + "grad_norm": 0.5700645160925895, + "learning_rate": 4.627455128863372e-06, + "loss": 0.2916, + "step": 8570 + }, + { + "epoch": 0.40150840867569215, + "grad_norm": 0.6776535615447676, + "learning_rate": 4.627355519475155e-06, + "loss": 0.3165, + "step": 8571 + }, + { + "epoch": 0.40155525366562045, + "grad_norm": 0.6326608228466142, + "learning_rate": 4.627255897844541e-06, + "loss": 0.3129, + "step": 8572 + }, + { + "epoch": 0.4016020986555488, + "grad_norm": 0.5935670762611837, + "learning_rate": 4.627156263972105e-06, + "loss": 0.2825, + "step": 8573 + }, + { + "epoch": 0.4016489436454771, + "grad_norm": 0.5934278640657438, + "learning_rate": 4.6270566178584185e-06, + "loss": 0.2817, + "step": 8574 + }, + { + "epoch": 0.40169578863540545, + "grad_norm": 0.5838907725576887, + "learning_rate": 4.626956959504057e-06, + "loss": 0.2971, + "step": 8575 + }, + { + "epoch": 0.40174263362533374, + "grad_norm": 0.6267235470393918, + "learning_rate": 4.626857288909591e-06, + "loss": 0.2993, + "step": 8576 + }, + { + "epoch": 0.4017894786152621, + "grad_norm": 0.6319986461810286, + "learning_rate": 4.626757606075597e-06, + "loss": 0.282, + "step": 8577 + }, + { + "epoch": 0.40183632360519045, + "grad_norm": 0.5757399174191892, + "learning_rate": 4.6266579110026466e-06, + "loss": 0.2927, + "step": 8578 + }, + { + "epoch": 0.40188316859511874, + "grad_norm": 0.5809353276120217, + "learning_rate": 4.626558203691316e-06, + "loss": 0.2864, + "step": 8579 + }, + { + "epoch": 0.4019300135850471, + "grad_norm": 0.6104900128917722, + "learning_rate": 4.6264584841421764e-06, + "loss": 0.3112, + "step": 8580 + }, + { + "epoch": 0.4019768585749754, + "grad_norm": 0.599140720239169, + "learning_rate": 4.626358752355803e-06, + "loss": 0.291, + "step": 8581 + }, + { + "epoch": 0.40202370356490374, + "grad_norm": 0.6044983837373795, + "learning_rate": 4.626259008332768e-06, + "loss": 0.289, + "step": 8582 + }, + { + "epoch": 0.40207054855483204, + "grad_norm": 0.6038516489365906, + "learning_rate": 4.6261592520736485e-06, + "loss": 0.2811, + "step": 8583 + }, + { + "epoch": 0.4021173935447604, + "grad_norm": 0.5660690047819037, + "learning_rate": 4.626059483579017e-06, + "loss": 0.2838, + "step": 8584 + }, + { + "epoch": 0.4021642385346887, + "grad_norm": 0.5863025379102196, + "learning_rate": 4.625959702849446e-06, + "loss": 0.3042, + "step": 8585 + }, + { + "epoch": 0.40221108352461704, + "grad_norm": 0.6413694608762806, + "learning_rate": 4.625859909885513e-06, + "loss": 0.2928, + "step": 8586 + }, + { + "epoch": 0.4022579285145454, + "grad_norm": 0.6098641734479988, + "learning_rate": 4.62576010468779e-06, + "loss": 0.2698, + "step": 8587 + }, + { + "epoch": 0.4023047735044737, + "grad_norm": 0.6284089278923584, + "learning_rate": 4.625660287256851e-06, + "loss": 0.2961, + "step": 8588 + }, + { + "epoch": 0.40235161849440204, + "grad_norm": 0.6718956896398367, + "learning_rate": 4.625560457593272e-06, + "loss": 0.2811, + "step": 8589 + }, + { + "epoch": 0.40239846348433034, + "grad_norm": 0.6386896427972538, + "learning_rate": 4.6254606156976265e-06, + "loss": 0.3, + "step": 8590 + }, + { + "epoch": 0.4024453084742587, + "grad_norm": 0.5835213073675696, + "learning_rate": 4.6253607615704895e-06, + "loss": 0.2815, + "step": 8591 + }, + { + "epoch": 0.402492153464187, + "grad_norm": 0.5698394868446863, + "learning_rate": 4.6252608952124356e-06, + "loss": 0.2735, + "step": 8592 + }, + { + "epoch": 0.40253899845411534, + "grad_norm": 0.5814868621310145, + "learning_rate": 4.62516101662404e-06, + "loss": 0.2908, + "step": 8593 + }, + { + "epoch": 0.40258584344404363, + "grad_norm": 0.6593114687508782, + "learning_rate": 4.625061125805876e-06, + "loss": 0.2886, + "step": 8594 + }, + { + "epoch": 0.402632688433972, + "grad_norm": 0.6252230794348574, + "learning_rate": 4.62496122275852e-06, + "loss": 0.303, + "step": 8595 + }, + { + "epoch": 0.40267953342390034, + "grad_norm": 0.6012349139709574, + "learning_rate": 4.624861307482545e-06, + "loss": 0.302, + "step": 8596 + }, + { + "epoch": 0.40272637841382863, + "grad_norm": 0.6016499327777097, + "learning_rate": 4.624761379978529e-06, + "loss": 0.302, + "step": 8597 + }, + { + "epoch": 0.402773223403757, + "grad_norm": 0.5845788279742324, + "learning_rate": 4.624661440247045e-06, + "loss": 0.2799, + "step": 8598 + }, + { + "epoch": 0.4028200683936853, + "grad_norm": 0.6461262978623794, + "learning_rate": 4.624561488288667e-06, + "loss": 0.2831, + "step": 8599 + }, + { + "epoch": 0.40286691338361363, + "grad_norm": 0.5995997355612589, + "learning_rate": 4.6244615241039726e-06, + "loss": 0.2986, + "step": 8600 + }, + { + "epoch": 0.40291375837354193, + "grad_norm": 0.6379957625157874, + "learning_rate": 4.624361547693536e-06, + "loss": 0.3043, + "step": 8601 + }, + { + "epoch": 0.4029606033634703, + "grad_norm": 0.5330443648723713, + "learning_rate": 4.624261559057932e-06, + "loss": 0.2899, + "step": 8602 + }, + { + "epoch": 0.4030074483533986, + "grad_norm": 0.6591725514740655, + "learning_rate": 4.6241615581977375e-06, + "loss": 0.3006, + "step": 8603 + }, + { + "epoch": 0.40305429334332693, + "grad_norm": 0.6025533817629103, + "learning_rate": 4.624061545113527e-06, + "loss": 0.2969, + "step": 8604 + }, + { + "epoch": 0.4031011383332553, + "grad_norm": 0.621087723819579, + "learning_rate": 4.6239615198058764e-06, + "loss": 0.3037, + "step": 8605 + }, + { + "epoch": 0.4031479833231836, + "grad_norm": 0.5852944631435459, + "learning_rate": 4.62386148227536e-06, + "loss": 0.2933, + "step": 8606 + }, + { + "epoch": 0.40319482831311193, + "grad_norm": 0.5957696864271578, + "learning_rate": 4.623761432522555e-06, + "loss": 0.2871, + "step": 8607 + }, + { + "epoch": 0.4032416733030402, + "grad_norm": 0.6961905648357694, + "learning_rate": 4.623661370548038e-06, + "loss": 0.3002, + "step": 8608 + }, + { + "epoch": 0.4032885182929686, + "grad_norm": 0.7022502333552141, + "learning_rate": 4.623561296352382e-06, + "loss": 0.3062, + "step": 8609 + }, + { + "epoch": 0.4033353632828969, + "grad_norm": 0.6162487637711235, + "learning_rate": 4.6234612099361655e-06, + "loss": 0.3069, + "step": 8610 + }, + { + "epoch": 0.4033822082728252, + "grad_norm": 0.6298181271328221, + "learning_rate": 4.623361111299963e-06, + "loss": 0.3081, + "step": 8611 + }, + { + "epoch": 0.4034290532627535, + "grad_norm": 0.6240175237742414, + "learning_rate": 4.623261000444351e-06, + "loss": 0.2976, + "step": 8612 + }, + { + "epoch": 0.4034758982526819, + "grad_norm": 0.5728167345032632, + "learning_rate": 4.6231608773699055e-06, + "loss": 0.2781, + "step": 8613 + }, + { + "epoch": 0.4035227432426102, + "grad_norm": 0.616956679192847, + "learning_rate": 4.623060742077204e-06, + "loss": 0.2923, + "step": 8614 + }, + { + "epoch": 0.4035695882325385, + "grad_norm": 0.6866788682306535, + "learning_rate": 4.62296059456682e-06, + "loss": 0.3003, + "step": 8615 + }, + { + "epoch": 0.4036164332224669, + "grad_norm": 0.6261258756251793, + "learning_rate": 4.622860434839331e-06, + "loss": 0.2621, + "step": 8616 + }, + { + "epoch": 0.40366327821239517, + "grad_norm": 0.5636020703170888, + "learning_rate": 4.622760262895315e-06, + "loss": 0.2854, + "step": 8617 + }, + { + "epoch": 0.4037101232023235, + "grad_norm": 0.6111755939842697, + "learning_rate": 4.6226600787353475e-06, + "loss": 0.2896, + "step": 8618 + }, + { + "epoch": 0.4037569681922518, + "grad_norm": 0.5814695360354663, + "learning_rate": 4.622559882360004e-06, + "loss": 0.2799, + "step": 8619 + }, + { + "epoch": 0.40380381318218017, + "grad_norm": 0.6419358420205452, + "learning_rate": 4.622459673769861e-06, + "loss": 0.2916, + "step": 8620 + }, + { + "epoch": 0.40385065817210847, + "grad_norm": 0.6264759652435784, + "learning_rate": 4.622359452965497e-06, + "loss": 0.2886, + "step": 8621 + }, + { + "epoch": 0.4038975031620368, + "grad_norm": 0.6694052125972203, + "learning_rate": 4.622259219947488e-06, + "loss": 0.3005, + "step": 8622 + }, + { + "epoch": 0.40394434815196517, + "grad_norm": 0.6623994358756243, + "learning_rate": 4.622158974716411e-06, + "loss": 0.3087, + "step": 8623 + }, + { + "epoch": 0.40399119314189347, + "grad_norm": 0.6518706968436938, + "learning_rate": 4.622058717272841e-06, + "loss": 0.3103, + "step": 8624 + }, + { + "epoch": 0.4040380381318218, + "grad_norm": 0.6824021129816926, + "learning_rate": 4.621958447617357e-06, + "loss": 0.305, + "step": 8625 + }, + { + "epoch": 0.4040848831217501, + "grad_norm": 0.6228368943410892, + "learning_rate": 4.621858165750537e-06, + "loss": 0.2918, + "step": 8626 + }, + { + "epoch": 0.40413172811167847, + "grad_norm": 0.6055476658714016, + "learning_rate": 4.621757871672955e-06, + "loss": 0.2839, + "step": 8627 + }, + { + "epoch": 0.40417857310160676, + "grad_norm": 0.6304794036649758, + "learning_rate": 4.621657565385189e-06, + "loss": 0.2802, + "step": 8628 + }, + { + "epoch": 0.4042254180915351, + "grad_norm": 0.6228320465959053, + "learning_rate": 4.621557246887819e-06, + "loss": 0.3089, + "step": 8629 + }, + { + "epoch": 0.4042722630814634, + "grad_norm": 0.6059289907059473, + "learning_rate": 4.62145691618142e-06, + "loss": 0.2812, + "step": 8630 + }, + { + "epoch": 0.40431910807139176, + "grad_norm": 0.5715437924002671, + "learning_rate": 4.621356573266568e-06, + "loss": 0.2773, + "step": 8631 + }, + { + "epoch": 0.4043659530613201, + "grad_norm": 0.5887095323539946, + "learning_rate": 4.6212562181438435e-06, + "loss": 0.3015, + "step": 8632 + }, + { + "epoch": 0.4044127980512484, + "grad_norm": 0.6472036057355334, + "learning_rate": 4.621155850813822e-06, + "loss": 0.3231, + "step": 8633 + }, + { + "epoch": 0.40445964304117676, + "grad_norm": 0.6100243934826486, + "learning_rate": 4.621055471277082e-06, + "loss": 0.2886, + "step": 8634 + }, + { + "epoch": 0.40450648803110506, + "grad_norm": 0.6197731109323414, + "learning_rate": 4.6209550795342005e-06, + "loss": 0.3034, + "step": 8635 + }, + { + "epoch": 0.4045533330210334, + "grad_norm": 0.5720432740297311, + "learning_rate": 4.6208546755857556e-06, + "loss": 0.2929, + "step": 8636 + }, + { + "epoch": 0.4046001780109617, + "grad_norm": 0.6663414416961846, + "learning_rate": 4.620754259432326e-06, + "loss": 0.2858, + "step": 8637 + }, + { + "epoch": 0.40464702300089006, + "grad_norm": 0.6274516249234605, + "learning_rate": 4.620653831074488e-06, + "loss": 0.3093, + "step": 8638 + }, + { + "epoch": 0.40469386799081836, + "grad_norm": 0.6546045914650813, + "learning_rate": 4.62055339051282e-06, + "loss": 0.2878, + "step": 8639 + }, + { + "epoch": 0.4047407129807467, + "grad_norm": 0.6031585615088212, + "learning_rate": 4.6204529377479e-06, + "loss": 0.2889, + "step": 8640 + }, + { + "epoch": 0.40478755797067506, + "grad_norm": 0.6539261523048918, + "learning_rate": 4.620352472780307e-06, + "loss": 0.2962, + "step": 8641 + }, + { + "epoch": 0.40483440296060336, + "grad_norm": 0.6346823961959533, + "learning_rate": 4.6202519956106185e-06, + "loss": 0.3095, + "step": 8642 + }, + { + "epoch": 0.4048812479505317, + "grad_norm": 0.6628770059190068, + "learning_rate": 4.620151506239412e-06, + "loss": 0.3284, + "step": 8643 + }, + { + "epoch": 0.40492809294046, + "grad_norm": 0.6187281180221658, + "learning_rate": 4.620051004667268e-06, + "loss": 0.3241, + "step": 8644 + }, + { + "epoch": 0.40497493793038836, + "grad_norm": 0.6250251120445671, + "learning_rate": 4.619950490894761e-06, + "loss": 0.2859, + "step": 8645 + }, + { + "epoch": 0.40502178292031665, + "grad_norm": 0.5637791640660027, + "learning_rate": 4.619849964922473e-06, + "loss": 0.2631, + "step": 8646 + }, + { + "epoch": 0.405068627910245, + "grad_norm": 0.598384625016336, + "learning_rate": 4.6197494267509815e-06, + "loss": 0.2799, + "step": 8647 + }, + { + "epoch": 0.4051154729001733, + "grad_norm": 0.6148451192746766, + "learning_rate": 4.619648876380865e-06, + "loss": 0.2895, + "step": 8648 + }, + { + "epoch": 0.40516231789010165, + "grad_norm": 0.6036715087106567, + "learning_rate": 4.619548313812701e-06, + "loss": 0.2719, + "step": 8649 + }, + { + "epoch": 0.40520916288003, + "grad_norm": 0.6703598157242887, + "learning_rate": 4.6194477390470694e-06, + "loss": 0.2879, + "step": 8650 + }, + { + "epoch": 0.4052560078699583, + "grad_norm": 0.5955133107890987, + "learning_rate": 4.619347152084549e-06, + "loss": 0.2937, + "step": 8651 + }, + { + "epoch": 0.40530285285988665, + "grad_norm": 0.5874560248657017, + "learning_rate": 4.619246552925718e-06, + "loss": 0.2916, + "step": 8652 + }, + { + "epoch": 0.40534969784981495, + "grad_norm": 0.5833676198476183, + "learning_rate": 4.619145941571157e-06, + "loss": 0.2971, + "step": 8653 + }, + { + "epoch": 0.4053965428397433, + "grad_norm": 0.647687775643518, + "learning_rate": 4.619045318021442e-06, + "loss": 0.311, + "step": 8654 + }, + { + "epoch": 0.4054433878296716, + "grad_norm": 0.6529814777816152, + "learning_rate": 4.618944682277155e-06, + "loss": 0.2979, + "step": 8655 + }, + { + "epoch": 0.40549023281959995, + "grad_norm": 0.6409125388227094, + "learning_rate": 4.618844034338874e-06, + "loss": 0.303, + "step": 8656 + }, + { + "epoch": 0.40553707780952825, + "grad_norm": 0.5909003858406173, + "learning_rate": 4.618743374207178e-06, + "loss": 0.3011, + "step": 8657 + }, + { + "epoch": 0.4055839227994566, + "grad_norm": 0.5781934106780838, + "learning_rate": 4.618642701882646e-06, + "loss": 0.295, + "step": 8658 + }, + { + "epoch": 0.40563076778938495, + "grad_norm": 0.5989432744480301, + "learning_rate": 4.618542017365858e-06, + "loss": 0.2873, + "step": 8659 + }, + { + "epoch": 0.40567761277931325, + "grad_norm": 0.5760944323673816, + "learning_rate": 4.618441320657393e-06, + "loss": 0.2848, + "step": 8660 + }, + { + "epoch": 0.4057244577692416, + "grad_norm": 0.6255093945260068, + "learning_rate": 4.618340611757831e-06, + "loss": 0.2932, + "step": 8661 + }, + { + "epoch": 0.4057713027591699, + "grad_norm": 0.6049394455613514, + "learning_rate": 4.6182398906677505e-06, + "loss": 0.2954, + "step": 8662 + }, + { + "epoch": 0.40581814774909825, + "grad_norm": 0.597708546277453, + "learning_rate": 4.618139157387732e-06, + "loss": 0.2935, + "step": 8663 + }, + { + "epoch": 0.40586499273902654, + "grad_norm": 0.6379800919709265, + "learning_rate": 4.618038411918356e-06, + "loss": 0.3028, + "step": 8664 + }, + { + "epoch": 0.4059118377289549, + "grad_norm": 0.5883855660662012, + "learning_rate": 4.617937654260201e-06, + "loss": 0.2779, + "step": 8665 + }, + { + "epoch": 0.4059586827188832, + "grad_norm": 0.6036445838182213, + "learning_rate": 4.617836884413846e-06, + "loss": 0.2819, + "step": 8666 + }, + { + "epoch": 0.40600552770881154, + "grad_norm": 0.6086687034889314, + "learning_rate": 4.617736102379873e-06, + "loss": 0.2937, + "step": 8667 + }, + { + "epoch": 0.4060523726987399, + "grad_norm": 0.5710152566775081, + "learning_rate": 4.61763530815886e-06, + "loss": 0.2728, + "step": 8668 + }, + { + "epoch": 0.4060992176886682, + "grad_norm": 0.6009013190409853, + "learning_rate": 4.617534501751389e-06, + "loss": 0.2824, + "step": 8669 + }, + { + "epoch": 0.40614606267859654, + "grad_norm": 0.5630879140324582, + "learning_rate": 4.617433683158039e-06, + "loss": 0.2859, + "step": 8670 + }, + { + "epoch": 0.40619290766852484, + "grad_norm": 0.7828476206215812, + "learning_rate": 4.61733285237939e-06, + "loss": 0.3098, + "step": 8671 + }, + { + "epoch": 0.4062397526584532, + "grad_norm": 0.5798174625569532, + "learning_rate": 4.617232009416024e-06, + "loss": 0.2939, + "step": 8672 + }, + { + "epoch": 0.4062865976483815, + "grad_norm": 0.5769100396593089, + "learning_rate": 4.617131154268518e-06, + "loss": 0.2937, + "step": 8673 + }, + { + "epoch": 0.40633344263830984, + "grad_norm": 0.6316812100748027, + "learning_rate": 4.617030286937455e-06, + "loss": 0.3332, + "step": 8674 + }, + { + "epoch": 0.40638028762823813, + "grad_norm": 0.5871353721180803, + "learning_rate": 4.616929407423416e-06, + "loss": 0.2882, + "step": 8675 + }, + { + "epoch": 0.4064271326181665, + "grad_norm": 0.6284412508667113, + "learning_rate": 4.6168285157269785e-06, + "loss": 0.2903, + "step": 8676 + }, + { + "epoch": 0.40647397760809484, + "grad_norm": 0.5863136416099259, + "learning_rate": 4.616727611848726e-06, + "loss": 0.2815, + "step": 8677 + }, + { + "epoch": 0.40652082259802313, + "grad_norm": 0.6750663284792869, + "learning_rate": 4.616626695789238e-06, + "loss": 0.3012, + "step": 8678 + }, + { + "epoch": 0.4065676675879515, + "grad_norm": 0.648334764839544, + "learning_rate": 4.616525767549095e-06, + "loss": 0.3046, + "step": 8679 + }, + { + "epoch": 0.4066145125778798, + "grad_norm": 0.6002649752707524, + "learning_rate": 4.616424827128878e-06, + "loss": 0.3013, + "step": 8680 + }, + { + "epoch": 0.40666135756780813, + "grad_norm": 0.5860059922881815, + "learning_rate": 4.616323874529169e-06, + "loss": 0.2701, + "step": 8681 + }, + { + "epoch": 0.40670820255773643, + "grad_norm": 0.6160929412065946, + "learning_rate": 4.616222909750547e-06, + "loss": 0.2852, + "step": 8682 + }, + { + "epoch": 0.4067550475476648, + "grad_norm": 0.5678739404753828, + "learning_rate": 4.616121932793595e-06, + "loss": 0.2641, + "step": 8683 + }, + { + "epoch": 0.4068018925375931, + "grad_norm": 0.6010789286402058, + "learning_rate": 4.616020943658892e-06, + "loss": 0.2872, + "step": 8684 + }, + { + "epoch": 0.40684873752752143, + "grad_norm": 0.5911122803676647, + "learning_rate": 4.615919942347022e-06, + "loss": 0.3039, + "step": 8685 + }, + { + "epoch": 0.4068955825174498, + "grad_norm": 0.5856159746167968, + "learning_rate": 4.615818928858563e-06, + "loss": 0.2901, + "step": 8686 + }, + { + "epoch": 0.4069424275073781, + "grad_norm": 0.6201680344436782, + "learning_rate": 4.615717903194098e-06, + "loss": 0.2886, + "step": 8687 + }, + { + "epoch": 0.40698927249730643, + "grad_norm": 0.5885513929544374, + "learning_rate": 4.615616865354209e-06, + "loss": 0.2742, + "step": 8688 + }, + { + "epoch": 0.4070361174872347, + "grad_norm": 0.6099949698784651, + "learning_rate": 4.615515815339476e-06, + "loss": 0.3035, + "step": 8689 + }, + { + "epoch": 0.4070829624771631, + "grad_norm": 0.5979953738125338, + "learning_rate": 4.615414753150482e-06, + "loss": 0.2883, + "step": 8690 + }, + { + "epoch": 0.4071298074670914, + "grad_norm": 0.5869884506287417, + "learning_rate": 4.615313678787807e-06, + "loss": 0.2806, + "step": 8691 + }, + { + "epoch": 0.4071766524570197, + "grad_norm": 0.5977232684447903, + "learning_rate": 4.615212592252034e-06, + "loss": 0.3271, + "step": 8692 + }, + { + "epoch": 0.407223497446948, + "grad_norm": 0.6033129252080153, + "learning_rate": 4.615111493543744e-06, + "loss": 0.3098, + "step": 8693 + }, + { + "epoch": 0.4072703424368764, + "grad_norm": 0.7124140546078874, + "learning_rate": 4.615010382663519e-06, + "loss": 0.3035, + "step": 8694 + }, + { + "epoch": 0.4073171874268047, + "grad_norm": 0.5911942959593482, + "learning_rate": 4.61490925961194e-06, + "loss": 0.2817, + "step": 8695 + }, + { + "epoch": 0.407364032416733, + "grad_norm": 0.6578838165417463, + "learning_rate": 4.614808124389591e-06, + "loss": 0.3081, + "step": 8696 + }, + { + "epoch": 0.4074108774066614, + "grad_norm": 0.6089854802776196, + "learning_rate": 4.614706976997052e-06, + "loss": 0.2987, + "step": 8697 + }, + { + "epoch": 0.40745772239658967, + "grad_norm": 0.5482237216325192, + "learning_rate": 4.614605817434907e-06, + "loss": 0.273, + "step": 8698 + }, + { + "epoch": 0.407504567386518, + "grad_norm": 0.6568619829469103, + "learning_rate": 4.614504645703735e-06, + "loss": 0.3014, + "step": 8699 + }, + { + "epoch": 0.4075514123764463, + "grad_norm": 0.6115506009813719, + "learning_rate": 4.614403461804121e-06, + "loss": 0.2894, + "step": 8700 + }, + { + "epoch": 0.40759825736637467, + "grad_norm": 0.6286544277227188, + "learning_rate": 4.614302265736648e-06, + "loss": 0.3076, + "step": 8701 + }, + { + "epoch": 0.40764510235630297, + "grad_norm": 0.5775114478334397, + "learning_rate": 4.614201057501895e-06, + "loss": 0.2952, + "step": 8702 + }, + { + "epoch": 0.4076919473462313, + "grad_norm": 0.6077176254923677, + "learning_rate": 4.614099837100447e-06, + "loss": 0.2828, + "step": 8703 + }, + { + "epoch": 0.40773879233615967, + "grad_norm": 0.6155094205474976, + "learning_rate": 4.613998604532885e-06, + "loss": 0.2888, + "step": 8704 + }, + { + "epoch": 0.40778563732608797, + "grad_norm": 0.6409560128990501, + "learning_rate": 4.613897359799794e-06, + "loss": 0.284, + "step": 8705 + }, + { + "epoch": 0.4078324823160163, + "grad_norm": 0.5864176778066603, + "learning_rate": 4.613796102901754e-06, + "loss": 0.2993, + "step": 8706 + }, + { + "epoch": 0.4078793273059446, + "grad_norm": 0.5541243500843176, + "learning_rate": 4.613694833839349e-06, + "loss": 0.2586, + "step": 8707 + }, + { + "epoch": 0.40792617229587297, + "grad_norm": 0.6352780332227893, + "learning_rate": 4.613593552613162e-06, + "loss": 0.2976, + "step": 8708 + }, + { + "epoch": 0.40797301728580126, + "grad_norm": 0.6987571027569817, + "learning_rate": 4.613492259223774e-06, + "loss": 0.286, + "step": 8709 + }, + { + "epoch": 0.4080198622757296, + "grad_norm": 0.5734043454983905, + "learning_rate": 4.61339095367177e-06, + "loss": 0.2811, + "step": 8710 + }, + { + "epoch": 0.4080667072656579, + "grad_norm": 0.6473029828616277, + "learning_rate": 4.613289635957733e-06, + "loss": 0.3084, + "step": 8711 + }, + { + "epoch": 0.40811355225558626, + "grad_norm": 0.6632740984383084, + "learning_rate": 4.613188306082243e-06, + "loss": 0.2968, + "step": 8712 + }, + { + "epoch": 0.4081603972455146, + "grad_norm": 0.6764585765498883, + "learning_rate": 4.613086964045888e-06, + "loss": 0.3036, + "step": 8713 + }, + { + "epoch": 0.4082072422354429, + "grad_norm": 0.6261551388126996, + "learning_rate": 4.6129856098492474e-06, + "loss": 0.278, + "step": 8714 + }, + { + "epoch": 0.40825408722537126, + "grad_norm": 0.6174921104130826, + "learning_rate": 4.6128842434929054e-06, + "loss": 0.2981, + "step": 8715 + }, + { + "epoch": 0.40830093221529956, + "grad_norm": 0.6016703780659484, + "learning_rate": 4.612782864977446e-06, + "loss": 0.2808, + "step": 8716 + }, + { + "epoch": 0.4083477772052279, + "grad_norm": 0.6482959862576428, + "learning_rate": 4.612681474303453e-06, + "loss": 0.3039, + "step": 8717 + }, + { + "epoch": 0.4083946221951562, + "grad_norm": 0.5822968300251846, + "learning_rate": 4.6125800714715084e-06, + "loss": 0.2911, + "step": 8718 + }, + { + "epoch": 0.40844146718508456, + "grad_norm": 0.5858734096847206, + "learning_rate": 4.612478656482196e-06, + "loss": 0.2846, + "step": 8719 + }, + { + "epoch": 0.40848831217501286, + "grad_norm": 0.610824666857837, + "learning_rate": 4.6123772293361005e-06, + "loss": 0.2531, + "step": 8720 + }, + { + "epoch": 0.4085351571649412, + "grad_norm": 0.6086265782484644, + "learning_rate": 4.6122757900338054e-06, + "loss": 0.2931, + "step": 8721 + }, + { + "epoch": 0.40858200215486956, + "grad_norm": 0.6016427000737679, + "learning_rate": 4.612174338575893e-06, + "loss": 0.2885, + "step": 8722 + }, + { + "epoch": 0.40862884714479786, + "grad_norm": 0.5675323785132387, + "learning_rate": 4.612072874962949e-06, + "loss": 0.2694, + "step": 8723 + }, + { + "epoch": 0.4086756921347262, + "grad_norm": 0.6704311058190693, + "learning_rate": 4.611971399195556e-06, + "loss": 0.2968, + "step": 8724 + }, + { + "epoch": 0.4087225371246545, + "grad_norm": 0.6027875262815126, + "learning_rate": 4.6118699112742986e-06, + "loss": 0.3059, + "step": 8725 + }, + { + "epoch": 0.40876938211458286, + "grad_norm": 0.6232257312482397, + "learning_rate": 4.61176841119976e-06, + "loss": 0.2852, + "step": 8726 + }, + { + "epoch": 0.40881622710451115, + "grad_norm": 0.605841325966805, + "learning_rate": 4.611666898972526e-06, + "loss": 0.2923, + "step": 8727 + }, + { + "epoch": 0.4088630720944395, + "grad_norm": 0.6016132546975586, + "learning_rate": 4.61156537459318e-06, + "loss": 0.3029, + "step": 8728 + }, + { + "epoch": 0.4089099170843678, + "grad_norm": 0.6603647467854514, + "learning_rate": 4.611463838062305e-06, + "loss": 0.302, + "step": 8729 + }, + { + "epoch": 0.40895676207429615, + "grad_norm": 0.5729983474103172, + "learning_rate": 4.611362289380487e-06, + "loss": 0.2905, + "step": 8730 + }, + { + "epoch": 0.4090036070642245, + "grad_norm": 0.6483397257721644, + "learning_rate": 4.611260728548309e-06, + "loss": 0.3142, + "step": 8731 + }, + { + "epoch": 0.4090504520541528, + "grad_norm": 0.6113887063799347, + "learning_rate": 4.611159155566356e-06, + "loss": 0.2959, + "step": 8732 + }, + { + "epoch": 0.40909729704408115, + "grad_norm": 0.6368592750187374, + "learning_rate": 4.611057570435214e-06, + "loss": 0.305, + "step": 8733 + }, + { + "epoch": 0.40914414203400945, + "grad_norm": 0.5852951240798075, + "learning_rate": 4.610955973155464e-06, + "loss": 0.2736, + "step": 8734 + }, + { + "epoch": 0.4091909870239378, + "grad_norm": 0.5616919563831995, + "learning_rate": 4.610854363727694e-06, + "loss": 0.2749, + "step": 8735 + }, + { + "epoch": 0.4092378320138661, + "grad_norm": 0.5697504232763844, + "learning_rate": 4.610752742152489e-06, + "loss": 0.2579, + "step": 8736 + }, + { + "epoch": 0.40928467700379445, + "grad_norm": 0.5741662948129758, + "learning_rate": 4.6106511084304315e-06, + "loss": 0.2774, + "step": 8737 + }, + { + "epoch": 0.40933152199372275, + "grad_norm": 0.5574766935957883, + "learning_rate": 4.610549462562107e-06, + "loss": 0.2734, + "step": 8738 + }, + { + "epoch": 0.4093783669836511, + "grad_norm": 0.5839772064501909, + "learning_rate": 4.610447804548102e-06, + "loss": 0.27, + "step": 8739 + }, + { + "epoch": 0.40942521197357945, + "grad_norm": 0.655559374284203, + "learning_rate": 4.6103461343889994e-06, + "loss": 0.3117, + "step": 8740 + }, + { + "epoch": 0.40947205696350775, + "grad_norm": 0.6477245487929768, + "learning_rate": 4.610244452085385e-06, + "loss": 0.3068, + "step": 8741 + }, + { + "epoch": 0.4095189019534361, + "grad_norm": 0.629018942155701, + "learning_rate": 4.610142757637845e-06, + "loss": 0.3002, + "step": 8742 + }, + { + "epoch": 0.4095657469433644, + "grad_norm": 0.6170915664296767, + "learning_rate": 4.610041051046963e-06, + "loss": 0.2977, + "step": 8743 + }, + { + "epoch": 0.40961259193329275, + "grad_norm": 0.6343599175279094, + "learning_rate": 4.609939332313325e-06, + "loss": 0.2931, + "step": 8744 + }, + { + "epoch": 0.40965943692322104, + "grad_norm": 0.6430207265570429, + "learning_rate": 4.609837601437517e-06, + "loss": 0.3092, + "step": 8745 + }, + { + "epoch": 0.4097062819131494, + "grad_norm": 0.5786127939764387, + "learning_rate": 4.609735858420124e-06, + "loss": 0.2931, + "step": 8746 + }, + { + "epoch": 0.4097531269030777, + "grad_norm": 0.5486500945886934, + "learning_rate": 4.609634103261731e-06, + "loss": 0.2744, + "step": 8747 + }, + { + "epoch": 0.40979997189300604, + "grad_norm": 0.6166831406437296, + "learning_rate": 4.609532335962924e-06, + "loss": 0.2962, + "step": 8748 + }, + { + "epoch": 0.4098468168829344, + "grad_norm": 0.6570769808795228, + "learning_rate": 4.609430556524289e-06, + "loss": 0.2945, + "step": 8749 + }, + { + "epoch": 0.4098936618728627, + "grad_norm": 0.6779795118289781, + "learning_rate": 4.609328764946411e-06, + "loss": 0.2914, + "step": 8750 + }, + { + "epoch": 0.40994050686279104, + "grad_norm": 0.5738659811578843, + "learning_rate": 4.609226961229876e-06, + "loss": 0.2945, + "step": 8751 + }, + { + "epoch": 0.40998735185271934, + "grad_norm": 0.6075196568946366, + "learning_rate": 4.609125145375271e-06, + "loss": 0.2979, + "step": 8752 + }, + { + "epoch": 0.4100341968426477, + "grad_norm": 0.6024115865460041, + "learning_rate": 4.609023317383179e-06, + "loss": 0.2891, + "step": 8753 + }, + { + "epoch": 0.410081041832576, + "grad_norm": 0.5876865373108472, + "learning_rate": 4.608921477254189e-06, + "loss": 0.2875, + "step": 8754 + }, + { + "epoch": 0.41012788682250434, + "grad_norm": 0.5887755629998075, + "learning_rate": 4.608819624988886e-06, + "loss": 0.276, + "step": 8755 + }, + { + "epoch": 0.41017473181243264, + "grad_norm": 0.6017573409120713, + "learning_rate": 4.608717760587856e-06, + "loss": 0.283, + "step": 8756 + }, + { + "epoch": 0.410221576802361, + "grad_norm": 0.5717545590036358, + "learning_rate": 4.608615884051686e-06, + "loss": 0.2832, + "step": 8757 + }, + { + "epoch": 0.41026842179228934, + "grad_norm": 0.6010593569618756, + "learning_rate": 4.60851399538096e-06, + "loss": 0.2714, + "step": 8758 + }, + { + "epoch": 0.41031526678221764, + "grad_norm": 0.5962415134388862, + "learning_rate": 4.608412094576267e-06, + "loss": 0.2941, + "step": 8759 + }, + { + "epoch": 0.410362111772146, + "grad_norm": 0.6335470548204533, + "learning_rate": 4.608310181638192e-06, + "loss": 0.2828, + "step": 8760 + }, + { + "epoch": 0.4104089567620743, + "grad_norm": 0.6032254378676172, + "learning_rate": 4.608208256567322e-06, + "loss": 0.2935, + "step": 8761 + }, + { + "epoch": 0.41045580175200264, + "grad_norm": 0.5900454106788189, + "learning_rate": 4.6081063193642425e-06, + "loss": 0.3039, + "step": 8762 + }, + { + "epoch": 0.41050264674193093, + "grad_norm": 0.5727159227680806, + "learning_rate": 4.608004370029542e-06, + "loss": 0.2821, + "step": 8763 + }, + { + "epoch": 0.4105494917318593, + "grad_norm": 0.6623887186928964, + "learning_rate": 4.607902408563806e-06, + "loss": 0.3076, + "step": 8764 + }, + { + "epoch": 0.4105963367217876, + "grad_norm": 0.6234190450232915, + "learning_rate": 4.6078004349676215e-06, + "loss": 0.2948, + "step": 8765 + }, + { + "epoch": 0.41064318171171593, + "grad_norm": 0.5841698112576406, + "learning_rate": 4.607698449241575e-06, + "loss": 0.2679, + "step": 8766 + }, + { + "epoch": 0.4106900267016443, + "grad_norm": 0.5901953880621695, + "learning_rate": 4.6075964513862535e-06, + "loss": 0.2951, + "step": 8767 + }, + { + "epoch": 0.4107368716915726, + "grad_norm": 0.5926626556977563, + "learning_rate": 4.607494441402245e-06, + "loss": 0.2911, + "step": 8768 + }, + { + "epoch": 0.41078371668150093, + "grad_norm": 0.6061419606292714, + "learning_rate": 4.607392419290135e-06, + "loss": 0.2818, + "step": 8769 + }, + { + "epoch": 0.41083056167142923, + "grad_norm": 0.6497408329153112, + "learning_rate": 4.607290385050511e-06, + "loss": 0.295, + "step": 8770 + }, + { + "epoch": 0.4108774066613576, + "grad_norm": 0.6287288902901734, + "learning_rate": 4.607188338683961e-06, + "loss": 0.3214, + "step": 8771 + }, + { + "epoch": 0.4109242516512859, + "grad_norm": 0.6204227377561234, + "learning_rate": 4.607086280191072e-06, + "loss": 0.2755, + "step": 8772 + }, + { + "epoch": 0.41097109664121423, + "grad_norm": 0.5993568441859738, + "learning_rate": 4.606984209572431e-06, + "loss": 0.2994, + "step": 8773 + }, + { + "epoch": 0.4110179416311425, + "grad_norm": 0.5636492415779619, + "learning_rate": 4.606882126828625e-06, + "loss": 0.285, + "step": 8774 + }, + { + "epoch": 0.4110647866210709, + "grad_norm": 0.5958139920889239, + "learning_rate": 4.606780031960242e-06, + "loss": 0.2683, + "step": 8775 + }, + { + "epoch": 0.41111163161099923, + "grad_norm": 0.6463583994807994, + "learning_rate": 4.606677924967869e-06, + "loss": 0.3187, + "step": 8776 + }, + { + "epoch": 0.4111584766009275, + "grad_norm": 0.6592335358491249, + "learning_rate": 4.606575805852095e-06, + "loss": 0.3066, + "step": 8777 + }, + { + "epoch": 0.4112053215908559, + "grad_norm": 0.5986463475225497, + "learning_rate": 4.606473674613507e-06, + "loss": 0.2759, + "step": 8778 + }, + { + "epoch": 0.4112521665807842, + "grad_norm": 0.5603771565428856, + "learning_rate": 4.60637153125269e-06, + "loss": 0.2846, + "step": 8779 + }, + { + "epoch": 0.4112990115707125, + "grad_norm": 0.5643772396606281, + "learning_rate": 4.606269375770237e-06, + "loss": 0.2915, + "step": 8780 + }, + { + "epoch": 0.4113458565606408, + "grad_norm": 0.6000580637776152, + "learning_rate": 4.606167208166732e-06, + "loss": 0.3071, + "step": 8781 + }, + { + "epoch": 0.4113927015505692, + "grad_norm": 0.6187873036754209, + "learning_rate": 4.606065028442764e-06, + "loss": 0.2995, + "step": 8782 + }, + { + "epoch": 0.41143954654049747, + "grad_norm": 0.568539273533983, + "learning_rate": 4.605962836598921e-06, + "loss": 0.2948, + "step": 8783 + }, + { + "epoch": 0.4114863915304258, + "grad_norm": 0.6435394813952348, + "learning_rate": 4.605860632635791e-06, + "loss": 0.2953, + "step": 8784 + }, + { + "epoch": 0.4115332365203542, + "grad_norm": 0.5972458318369681, + "learning_rate": 4.605758416553963e-06, + "loss": 0.2884, + "step": 8785 + }, + { + "epoch": 0.41158008151028247, + "grad_norm": 0.6711625037971767, + "learning_rate": 4.605656188354025e-06, + "loss": 0.3119, + "step": 8786 + }, + { + "epoch": 0.4116269265002108, + "grad_norm": 0.6091356283855369, + "learning_rate": 4.605553948036564e-06, + "loss": 0.2849, + "step": 8787 + }, + { + "epoch": 0.4116737714901391, + "grad_norm": 0.6057388048758423, + "learning_rate": 4.605451695602169e-06, + "loss": 0.2707, + "step": 8788 + }, + { + "epoch": 0.41172061648006747, + "grad_norm": 0.5330382463328583, + "learning_rate": 4.605349431051429e-06, + "loss": 0.2726, + "step": 8789 + }, + { + "epoch": 0.41176746146999577, + "grad_norm": 0.5761556881965529, + "learning_rate": 4.605247154384933e-06, + "loss": 0.2743, + "step": 8790 + }, + { + "epoch": 0.4118143064599241, + "grad_norm": 0.5758117378836316, + "learning_rate": 4.605144865603268e-06, + "loss": 0.258, + "step": 8791 + }, + { + "epoch": 0.4118611514498524, + "grad_norm": 0.6617273057500944, + "learning_rate": 4.605042564707023e-06, + "loss": 0.3059, + "step": 8792 + }, + { + "epoch": 0.41190799643978077, + "grad_norm": 0.535744972122774, + "learning_rate": 4.604940251696788e-06, + "loss": 0.2577, + "step": 8793 + }, + { + "epoch": 0.4119548414297091, + "grad_norm": 0.6089192242665002, + "learning_rate": 4.60483792657315e-06, + "loss": 0.3083, + "step": 8794 + }, + { + "epoch": 0.4120016864196374, + "grad_norm": 0.6081422064745111, + "learning_rate": 4.604735589336699e-06, + "loss": 0.2993, + "step": 8795 + }, + { + "epoch": 0.41204853140956577, + "grad_norm": 0.5764958034473602, + "learning_rate": 4.604633239988025e-06, + "loss": 0.2778, + "step": 8796 + }, + { + "epoch": 0.41209537639949406, + "grad_norm": 0.6428706023534821, + "learning_rate": 4.604530878527714e-06, + "loss": 0.2987, + "step": 8797 + }, + { + "epoch": 0.4121422213894224, + "grad_norm": 0.5899739805935983, + "learning_rate": 4.604428504956357e-06, + "loss": 0.2918, + "step": 8798 + }, + { + "epoch": 0.4121890663793507, + "grad_norm": 0.5682196867872318, + "learning_rate": 4.604326119274544e-06, + "loss": 0.2908, + "step": 8799 + }, + { + "epoch": 0.41223591136927906, + "grad_norm": 0.6463863787327956, + "learning_rate": 4.604223721482862e-06, + "loss": 0.3046, + "step": 8800 + }, + { + "epoch": 0.41228275635920736, + "grad_norm": 0.6288753852965446, + "learning_rate": 4.604121311581902e-06, + "loss": 0.3121, + "step": 8801 + }, + { + "epoch": 0.4123296013491357, + "grad_norm": 0.631540125837496, + "learning_rate": 4.604018889572253e-06, + "loss": 0.3111, + "step": 8802 + }, + { + "epoch": 0.41237644633906406, + "grad_norm": 0.591927642998439, + "learning_rate": 4.603916455454504e-06, + "loss": 0.2887, + "step": 8803 + }, + { + "epoch": 0.41242329132899236, + "grad_norm": 0.6250935017717906, + "learning_rate": 4.603814009229243e-06, + "loss": 0.2714, + "step": 8804 + }, + { + "epoch": 0.4124701363189207, + "grad_norm": 0.5944279040639083, + "learning_rate": 4.603711550897062e-06, + "loss": 0.2793, + "step": 8805 + }, + { + "epoch": 0.412516981308849, + "grad_norm": 0.621144039970112, + "learning_rate": 4.603609080458551e-06, + "loss": 0.3084, + "step": 8806 + }, + { + "epoch": 0.41256382629877736, + "grad_norm": 0.5607852402210868, + "learning_rate": 4.603506597914297e-06, + "loss": 0.2932, + "step": 8807 + }, + { + "epoch": 0.41261067128870565, + "grad_norm": 0.6092239810391066, + "learning_rate": 4.603404103264892e-06, + "loss": 0.3008, + "step": 8808 + }, + { + "epoch": 0.412657516278634, + "grad_norm": 0.6404144842602949, + "learning_rate": 4.603301596510924e-06, + "loss": 0.2874, + "step": 8809 + }, + { + "epoch": 0.4127043612685623, + "grad_norm": 0.5686193248871748, + "learning_rate": 4.6031990776529855e-06, + "loss": 0.3039, + "step": 8810 + }, + { + "epoch": 0.41275120625849065, + "grad_norm": 0.6563422215225814, + "learning_rate": 4.603096546691664e-06, + "loss": 0.3128, + "step": 8811 + }, + { + "epoch": 0.412798051248419, + "grad_norm": 0.5574392561590708, + "learning_rate": 4.602994003627551e-06, + "loss": 0.286, + "step": 8812 + }, + { + "epoch": 0.4128448962383473, + "grad_norm": 0.5518941168185385, + "learning_rate": 4.602891448461236e-06, + "loss": 0.2689, + "step": 8813 + }, + { + "epoch": 0.41289174122827565, + "grad_norm": 0.6059601478511546, + "learning_rate": 4.602788881193308e-06, + "loss": 0.292, + "step": 8814 + }, + { + "epoch": 0.41293858621820395, + "grad_norm": 0.6392778381298259, + "learning_rate": 4.602686301824361e-06, + "loss": 0.3113, + "step": 8815 + }, + { + "epoch": 0.4129854312081323, + "grad_norm": 0.617144665957547, + "learning_rate": 4.602583710354981e-06, + "loss": 0.266, + "step": 8816 + }, + { + "epoch": 0.4130322761980606, + "grad_norm": 0.6062360669971044, + "learning_rate": 4.60248110678576e-06, + "loss": 0.2818, + "step": 8817 + }, + { + "epoch": 0.41307912118798895, + "grad_norm": 0.623118479872317, + "learning_rate": 4.602378491117289e-06, + "loss": 0.3175, + "step": 8818 + }, + { + "epoch": 0.41312596617791725, + "grad_norm": 0.5836987114015386, + "learning_rate": 4.6022758633501585e-06, + "loss": 0.2971, + "step": 8819 + }, + { + "epoch": 0.4131728111678456, + "grad_norm": 0.5912525754852438, + "learning_rate": 4.602173223484959e-06, + "loss": 0.2934, + "step": 8820 + }, + { + "epoch": 0.41321965615777395, + "grad_norm": 0.6412196738465701, + "learning_rate": 4.60207057152228e-06, + "loss": 0.2942, + "step": 8821 + }, + { + "epoch": 0.41326650114770225, + "grad_norm": 0.6359985483944358, + "learning_rate": 4.6019679074627145e-06, + "loss": 0.2977, + "step": 8822 + }, + { + "epoch": 0.4133133461376306, + "grad_norm": 0.578061454013568, + "learning_rate": 4.601865231306851e-06, + "loss": 0.2875, + "step": 8823 + }, + { + "epoch": 0.4133601911275589, + "grad_norm": 0.6127392758145647, + "learning_rate": 4.601762543055282e-06, + "loss": 0.285, + "step": 8824 + }, + { + "epoch": 0.41340703611748725, + "grad_norm": 0.6203319161532945, + "learning_rate": 4.601659842708598e-06, + "loss": 0.294, + "step": 8825 + }, + { + "epoch": 0.41345388110741554, + "grad_norm": 0.6437173337129869, + "learning_rate": 4.601557130267389e-06, + "loss": 0.3129, + "step": 8826 + }, + { + "epoch": 0.4135007260973439, + "grad_norm": 0.6634639880076189, + "learning_rate": 4.601454405732248e-06, + "loss": 0.2959, + "step": 8827 + }, + { + "epoch": 0.4135475710872722, + "grad_norm": 0.5996644894543111, + "learning_rate": 4.601351669103764e-06, + "loss": 0.2745, + "step": 8828 + }, + { + "epoch": 0.41359441607720054, + "grad_norm": 0.6444110020585889, + "learning_rate": 4.601248920382529e-06, + "loss": 0.2984, + "step": 8829 + }, + { + "epoch": 0.4136412610671289, + "grad_norm": 0.5834606584909904, + "learning_rate": 4.601146159569135e-06, + "loss": 0.2871, + "step": 8830 + }, + { + "epoch": 0.4136881060570572, + "grad_norm": 0.604622915302667, + "learning_rate": 4.601043386664174e-06, + "loss": 0.2825, + "step": 8831 + }, + { + "epoch": 0.41373495104698554, + "grad_norm": 0.5751427086734134, + "learning_rate": 4.600940601668236e-06, + "loss": 0.2711, + "step": 8832 + }, + { + "epoch": 0.41378179603691384, + "grad_norm": 0.6157190009542108, + "learning_rate": 4.600837804581912e-06, + "loss": 0.2816, + "step": 8833 + }, + { + "epoch": 0.4138286410268422, + "grad_norm": 0.6164873470099238, + "learning_rate": 4.600734995405795e-06, + "loss": 0.2887, + "step": 8834 + }, + { + "epoch": 0.4138754860167705, + "grad_norm": 0.6541549477345983, + "learning_rate": 4.600632174140476e-06, + "loss": 0.3017, + "step": 8835 + }, + { + "epoch": 0.41392233100669884, + "grad_norm": 0.606790561828752, + "learning_rate": 4.600529340786547e-06, + "loss": 0.2832, + "step": 8836 + }, + { + "epoch": 0.41396917599662714, + "grad_norm": 0.5859863405360527, + "learning_rate": 4.600426495344599e-06, + "loss": 0.2792, + "step": 8837 + }, + { + "epoch": 0.4140160209865555, + "grad_norm": 0.6445255725693138, + "learning_rate": 4.600323637815226e-06, + "loss": 0.2888, + "step": 8838 + }, + { + "epoch": 0.41406286597648384, + "grad_norm": 0.5926742669286823, + "learning_rate": 4.600220768199017e-06, + "loss": 0.2824, + "step": 8839 + }, + { + "epoch": 0.41410971096641214, + "grad_norm": 0.5940479892037973, + "learning_rate": 4.600117886496565e-06, + "loss": 0.2911, + "step": 8840 + }, + { + "epoch": 0.4141565559563405, + "grad_norm": 0.6261881487271513, + "learning_rate": 4.600014992708464e-06, + "loss": 0.3213, + "step": 8841 + }, + { + "epoch": 0.4142034009462688, + "grad_norm": 0.6298237506464963, + "learning_rate": 4.599912086835303e-06, + "loss": 0.2867, + "step": 8842 + }, + { + "epoch": 0.41425024593619714, + "grad_norm": 0.5723704311237036, + "learning_rate": 4.599809168877676e-06, + "loss": 0.2689, + "step": 8843 + }, + { + "epoch": 0.41429709092612543, + "grad_norm": 0.6282835268149312, + "learning_rate": 4.599706238836176e-06, + "loss": 0.2847, + "step": 8844 + }, + { + "epoch": 0.4143439359160538, + "grad_norm": 0.6270941751102574, + "learning_rate": 4.599603296711393e-06, + "loss": 0.2879, + "step": 8845 + }, + { + "epoch": 0.4143907809059821, + "grad_norm": 0.6193160657063644, + "learning_rate": 4.599500342503922e-06, + "loss": 0.2877, + "step": 8846 + }, + { + "epoch": 0.41443762589591043, + "grad_norm": 0.6266805970290261, + "learning_rate": 4.599397376214353e-06, + "loss": 0.2933, + "step": 8847 + }, + { + "epoch": 0.4144844708858388, + "grad_norm": 0.6596164832222476, + "learning_rate": 4.599294397843281e-06, + "loss": 0.3064, + "step": 8848 + }, + { + "epoch": 0.4145313158757671, + "grad_norm": 0.6348743076864957, + "learning_rate": 4.599191407391296e-06, + "loss": 0.2853, + "step": 8849 + }, + { + "epoch": 0.41457816086569543, + "grad_norm": 0.6122612572424079, + "learning_rate": 4.5990884048589935e-06, + "loss": 0.3074, + "step": 8850 + }, + { + "epoch": 0.41462500585562373, + "grad_norm": 0.5813923466817439, + "learning_rate": 4.5989853902469635e-06, + "loss": 0.28, + "step": 8851 + }, + { + "epoch": 0.4146718508455521, + "grad_norm": 0.5687330190671989, + "learning_rate": 4.598882363555801e-06, + "loss": 0.2717, + "step": 8852 + }, + { + "epoch": 0.4147186958354804, + "grad_norm": 0.6298692310200782, + "learning_rate": 4.598779324786098e-06, + "loss": 0.3158, + "step": 8853 + }, + { + "epoch": 0.41476554082540873, + "grad_norm": 0.6293803951353678, + "learning_rate": 4.598676273938447e-06, + "loss": 0.2873, + "step": 8854 + }, + { + "epoch": 0.414812385815337, + "grad_norm": 0.5877304746618169, + "learning_rate": 4.598573211013441e-06, + "loss": 0.284, + "step": 8855 + }, + { + "epoch": 0.4148592308052654, + "grad_norm": 0.6090768748539188, + "learning_rate": 4.598470136011676e-06, + "loss": 0.2888, + "step": 8856 + }, + { + "epoch": 0.41490607579519373, + "grad_norm": 0.6373405625557448, + "learning_rate": 4.598367048933741e-06, + "loss": 0.329, + "step": 8857 + }, + { + "epoch": 0.414952920785122, + "grad_norm": 0.6237825017909133, + "learning_rate": 4.598263949780231e-06, + "loss": 0.2896, + "step": 8858 + }, + { + "epoch": 0.4149997657750504, + "grad_norm": 0.5898458739310094, + "learning_rate": 4.598160838551739e-06, + "loss": 0.2904, + "step": 8859 + }, + { + "epoch": 0.4150466107649787, + "grad_norm": 0.6197624100557818, + "learning_rate": 4.59805771524886e-06, + "loss": 0.3084, + "step": 8860 + }, + { + "epoch": 0.415093455754907, + "grad_norm": 0.6325903020068809, + "learning_rate": 4.597954579872186e-06, + "loss": 0.3004, + "step": 8861 + }, + { + "epoch": 0.4151403007448353, + "grad_norm": 0.5401233464887465, + "learning_rate": 4.59785143242231e-06, + "loss": 0.2615, + "step": 8862 + }, + { + "epoch": 0.4151871457347637, + "grad_norm": 0.6326154464917867, + "learning_rate": 4.597748272899827e-06, + "loss": 0.2868, + "step": 8863 + }, + { + "epoch": 0.41523399072469197, + "grad_norm": 0.5990555137969267, + "learning_rate": 4.597645101305329e-06, + "loss": 0.2851, + "step": 8864 + }, + { + "epoch": 0.4152808357146203, + "grad_norm": 0.5759044520815833, + "learning_rate": 4.597541917639411e-06, + "loss": 0.2629, + "step": 8865 + }, + { + "epoch": 0.4153276807045487, + "grad_norm": 0.659900482181257, + "learning_rate": 4.5974387219026665e-06, + "loss": 0.2825, + "step": 8866 + }, + { + "epoch": 0.41537452569447697, + "grad_norm": 0.6165295250171617, + "learning_rate": 4.597335514095689e-06, + "loss": 0.2793, + "step": 8867 + }, + { + "epoch": 0.4154213706844053, + "grad_norm": 0.5808019729398056, + "learning_rate": 4.597232294219074e-06, + "loss": 0.302, + "step": 8868 + }, + { + "epoch": 0.4154682156743336, + "grad_norm": 0.5825867875171319, + "learning_rate": 4.597129062273413e-06, + "loss": 0.2857, + "step": 8869 + }, + { + "epoch": 0.41551506066426197, + "grad_norm": 0.615557111076171, + "learning_rate": 4.597025818259302e-06, + "loss": 0.2984, + "step": 8870 + }, + { + "epoch": 0.41556190565419027, + "grad_norm": 0.5733383009158812, + "learning_rate": 4.596922562177334e-06, + "loss": 0.2886, + "step": 8871 + }, + { + "epoch": 0.4156087506441186, + "grad_norm": 0.5871832206430398, + "learning_rate": 4.596819294028103e-06, + "loss": 0.2892, + "step": 8872 + }, + { + "epoch": 0.4156555956340469, + "grad_norm": 0.6707156027500023, + "learning_rate": 4.596716013812205e-06, + "loss": 0.2876, + "step": 8873 + }, + { + "epoch": 0.41570244062397527, + "grad_norm": 0.621869121761059, + "learning_rate": 4.596612721530234e-06, + "loss": 0.2935, + "step": 8874 + }, + { + "epoch": 0.4157492856139036, + "grad_norm": 0.671984815770959, + "learning_rate": 4.596509417182783e-06, + "loss": 0.3035, + "step": 8875 + }, + { + "epoch": 0.4157961306038319, + "grad_norm": 0.592453267879297, + "learning_rate": 4.596406100770448e-06, + "loss": 0.2908, + "step": 8876 + }, + { + "epoch": 0.41584297559376027, + "grad_norm": 0.5700702890737865, + "learning_rate": 4.596302772293821e-06, + "loss": 0.288, + "step": 8877 + }, + { + "epoch": 0.41588982058368856, + "grad_norm": 0.5513382635463704, + "learning_rate": 4.5961994317535e-06, + "loss": 0.2724, + "step": 8878 + }, + { + "epoch": 0.4159366655736169, + "grad_norm": 0.5546844171719169, + "learning_rate": 4.596096079150077e-06, + "loss": 0.278, + "step": 8879 + }, + { + "epoch": 0.4159835105635452, + "grad_norm": 0.6652694121186157, + "learning_rate": 4.595992714484149e-06, + "loss": 0.3077, + "step": 8880 + }, + { + "epoch": 0.41603035555347356, + "grad_norm": 0.5580558278217757, + "learning_rate": 4.59588933775631e-06, + "loss": 0.2914, + "step": 8881 + }, + { + "epoch": 0.41607720054340186, + "grad_norm": 0.6209547821343978, + "learning_rate": 4.595785948967153e-06, + "loss": 0.2992, + "step": 8882 + }, + { + "epoch": 0.4161240455333302, + "grad_norm": 0.6378582294185541, + "learning_rate": 4.5956825481172765e-06, + "loss": 0.2816, + "step": 8883 + }, + { + "epoch": 0.41617089052325856, + "grad_norm": 0.5763411853709415, + "learning_rate": 4.595579135207272e-06, + "loss": 0.2696, + "step": 8884 + }, + { + "epoch": 0.41621773551318686, + "grad_norm": 0.5900480048866749, + "learning_rate": 4.595475710237736e-06, + "loss": 0.2737, + "step": 8885 + }, + { + "epoch": 0.4162645805031152, + "grad_norm": 0.5953284748570105, + "learning_rate": 4.595372273209265e-06, + "loss": 0.2945, + "step": 8886 + }, + { + "epoch": 0.4163114254930435, + "grad_norm": 0.5931501488428844, + "learning_rate": 4.595268824122454e-06, + "loss": 0.297, + "step": 8887 + }, + { + "epoch": 0.41635827048297186, + "grad_norm": 0.6205126541661667, + "learning_rate": 4.595165362977897e-06, + "loss": 0.3061, + "step": 8888 + }, + { + "epoch": 0.41640511547290016, + "grad_norm": 0.5769282545218178, + "learning_rate": 4.595061889776189e-06, + "loss": 0.2723, + "step": 8889 + }, + { + "epoch": 0.4164519604628285, + "grad_norm": 0.6382377783710449, + "learning_rate": 4.594958404517927e-06, + "loss": 0.2977, + "step": 8890 + }, + { + "epoch": 0.4164988054527568, + "grad_norm": 0.5707753275016447, + "learning_rate": 4.594854907203706e-06, + "loss": 0.2895, + "step": 8891 + }, + { + "epoch": 0.41654565044268516, + "grad_norm": 0.5949219530110861, + "learning_rate": 4.594751397834122e-06, + "loss": 0.2769, + "step": 8892 + }, + { + "epoch": 0.4165924954326135, + "grad_norm": 0.6356226136996511, + "learning_rate": 4.594647876409769e-06, + "loss": 0.2873, + "step": 8893 + }, + { + "epoch": 0.4166393404225418, + "grad_norm": 0.6556403868791764, + "learning_rate": 4.594544342931245e-06, + "loss": 0.2773, + "step": 8894 + }, + { + "epoch": 0.41668618541247016, + "grad_norm": 0.5788126573435223, + "learning_rate": 4.594440797399145e-06, + "loss": 0.2713, + "step": 8895 + }, + { + "epoch": 0.41673303040239845, + "grad_norm": 0.62925013354837, + "learning_rate": 4.594337239814063e-06, + "loss": 0.2821, + "step": 8896 + }, + { + "epoch": 0.4167798753923268, + "grad_norm": 0.5762757765123517, + "learning_rate": 4.594233670176597e-06, + "loss": 0.2644, + "step": 8897 + }, + { + "epoch": 0.4168267203822551, + "grad_norm": 0.6298928077347891, + "learning_rate": 4.594130088487344e-06, + "loss": 0.3096, + "step": 8898 + }, + { + "epoch": 0.41687356537218345, + "grad_norm": 0.6088996775924495, + "learning_rate": 4.5940264947468986e-06, + "loss": 0.3035, + "step": 8899 + }, + { + "epoch": 0.41692041036211175, + "grad_norm": 0.650640900070595, + "learning_rate": 4.593922888955856e-06, + "loss": 0.2798, + "step": 8900 + }, + { + "epoch": 0.4169672553520401, + "grad_norm": 0.605056482219107, + "learning_rate": 4.593819271114814e-06, + "loss": 0.2825, + "step": 8901 + }, + { + "epoch": 0.41701410034196845, + "grad_norm": 0.5905013938458983, + "learning_rate": 4.593715641224368e-06, + "loss": 0.3057, + "step": 8902 + }, + { + "epoch": 0.41706094533189675, + "grad_norm": 0.5858275627827837, + "learning_rate": 4.593611999285115e-06, + "loss": 0.2729, + "step": 8903 + }, + { + "epoch": 0.4171077903218251, + "grad_norm": 0.5825002645168708, + "learning_rate": 4.59350834529765e-06, + "loss": 0.3097, + "step": 8904 + }, + { + "epoch": 0.4171546353117534, + "grad_norm": 0.5327796517245014, + "learning_rate": 4.593404679262572e-06, + "loss": 0.2964, + "step": 8905 + }, + { + "epoch": 0.41720148030168175, + "grad_norm": 0.6001376794497125, + "learning_rate": 4.593301001180476e-06, + "loss": 0.3055, + "step": 8906 + }, + { + "epoch": 0.41724832529161004, + "grad_norm": 0.6285910837797577, + "learning_rate": 4.593197311051959e-06, + "loss": 0.2868, + "step": 8907 + }, + { + "epoch": 0.4172951702815384, + "grad_norm": 0.6005220915351154, + "learning_rate": 4.5930936088776166e-06, + "loss": 0.2927, + "step": 8908 + }, + { + "epoch": 0.4173420152714667, + "grad_norm": 0.6188338453853557, + "learning_rate": 4.592989894658048e-06, + "loss": 0.3052, + "step": 8909 + }, + { + "epoch": 0.41738886026139504, + "grad_norm": 0.5864431664998095, + "learning_rate": 4.592886168393848e-06, + "loss": 0.2763, + "step": 8910 + }, + { + "epoch": 0.4174357052513234, + "grad_norm": 0.59093682419857, + "learning_rate": 4.592782430085614e-06, + "loss": 0.2949, + "step": 8911 + }, + { + "epoch": 0.4174825502412517, + "grad_norm": 0.5625699109448082, + "learning_rate": 4.5926786797339425e-06, + "loss": 0.2683, + "step": 8912 + }, + { + "epoch": 0.41752939523118004, + "grad_norm": 0.6693132935268062, + "learning_rate": 4.592574917339432e-06, + "loss": 0.3032, + "step": 8913 + }, + { + "epoch": 0.41757624022110834, + "grad_norm": 0.670530529015677, + "learning_rate": 4.592471142902679e-06, + "loss": 0.2972, + "step": 8914 + }, + { + "epoch": 0.4176230852110367, + "grad_norm": 0.5625816466990538, + "learning_rate": 4.59236735642428e-06, + "loss": 0.2858, + "step": 8915 + }, + { + "epoch": 0.417669930200965, + "grad_norm": 0.5676061442163095, + "learning_rate": 4.592263557904833e-06, + "loss": 0.2771, + "step": 8916 + }, + { + "epoch": 0.41771677519089334, + "grad_norm": 0.5805303602307419, + "learning_rate": 4.592159747344935e-06, + "loss": 0.3021, + "step": 8917 + }, + { + "epoch": 0.41776362018082164, + "grad_norm": 0.5818573910609278, + "learning_rate": 4.592055924745183e-06, + "loss": 0.2881, + "step": 8918 + }, + { + "epoch": 0.41781046517075, + "grad_norm": 0.583624582567928, + "learning_rate": 4.5919520901061755e-06, + "loss": 0.2702, + "step": 8919 + }, + { + "epoch": 0.41785731016067834, + "grad_norm": 0.5740943620059696, + "learning_rate": 4.591848243428509e-06, + "loss": 0.2802, + "step": 8920 + }, + { + "epoch": 0.41790415515060664, + "grad_norm": 0.6548996481806261, + "learning_rate": 4.591744384712783e-06, + "loss": 0.2834, + "step": 8921 + }, + { + "epoch": 0.417951000140535, + "grad_norm": 0.5907966078839836, + "learning_rate": 4.591640513959592e-06, + "loss": 0.2742, + "step": 8922 + }, + { + "epoch": 0.4179978451304633, + "grad_norm": 0.6271581180193766, + "learning_rate": 4.591536631169537e-06, + "loss": 0.3081, + "step": 8923 + }, + { + "epoch": 0.41804469012039164, + "grad_norm": 0.6263178826043996, + "learning_rate": 4.5914327363432135e-06, + "loss": 0.3059, + "step": 8924 + }, + { + "epoch": 0.41809153511031993, + "grad_norm": 0.6250314881951518, + "learning_rate": 4.5913288294812204e-06, + "loss": 0.3025, + "step": 8925 + }, + { + "epoch": 0.4181383801002483, + "grad_norm": 0.6477505327844706, + "learning_rate": 4.591224910584156e-06, + "loss": 0.3264, + "step": 8926 + }, + { + "epoch": 0.4181852250901766, + "grad_norm": 0.6111727216919538, + "learning_rate": 4.591120979652618e-06, + "loss": 0.3018, + "step": 8927 + }, + { + "epoch": 0.41823207008010493, + "grad_norm": 0.6221548652086919, + "learning_rate": 4.591017036687203e-06, + "loss": 0.2956, + "step": 8928 + }, + { + "epoch": 0.4182789150700333, + "grad_norm": 0.6321144715748002, + "learning_rate": 4.5909130816885114e-06, + "loss": 0.2998, + "step": 8929 + }, + { + "epoch": 0.4183257600599616, + "grad_norm": 0.6466967750784809, + "learning_rate": 4.590809114657141e-06, + "loss": 0.2855, + "step": 8930 + }, + { + "epoch": 0.41837260504988993, + "grad_norm": 0.6399230228213202, + "learning_rate": 4.590705135593689e-06, + "loss": 0.2989, + "step": 8931 + }, + { + "epoch": 0.41841945003981823, + "grad_norm": 0.6128635322034025, + "learning_rate": 4.5906011444987555e-06, + "loss": 0.2767, + "step": 8932 + }, + { + "epoch": 0.4184662950297466, + "grad_norm": 0.6137885701672573, + "learning_rate": 4.590497141372936e-06, + "loss": 0.2883, + "step": 8933 + }, + { + "epoch": 0.4185131400196749, + "grad_norm": 0.6114442325975394, + "learning_rate": 4.590393126216832e-06, + "loss": 0.2851, + "step": 8934 + }, + { + "epoch": 0.41855998500960323, + "grad_norm": 0.6155662710717387, + "learning_rate": 4.5902890990310415e-06, + "loss": 0.2699, + "step": 8935 + }, + { + "epoch": 0.4186068299995315, + "grad_norm": 0.5647580375400837, + "learning_rate": 4.590185059816161e-06, + "loss": 0.2866, + "step": 8936 + }, + { + "epoch": 0.4186536749894599, + "grad_norm": 0.6782122677108805, + "learning_rate": 4.590081008572792e-06, + "loss": 0.3031, + "step": 8937 + }, + { + "epoch": 0.41870051997938823, + "grad_norm": 0.5783485902181256, + "learning_rate": 4.589976945301532e-06, + "loss": 0.2997, + "step": 8938 + }, + { + "epoch": 0.4187473649693165, + "grad_norm": 0.635423976111719, + "learning_rate": 4.589872870002981e-06, + "loss": 0.3112, + "step": 8939 + }, + { + "epoch": 0.4187942099592449, + "grad_norm": 0.6198998806334087, + "learning_rate": 4.589768782677735e-06, + "loss": 0.2952, + "step": 8940 + }, + { + "epoch": 0.4188410549491732, + "grad_norm": 0.5849078966665331, + "learning_rate": 4.589664683326396e-06, + "loss": 0.2599, + "step": 8941 + }, + { + "epoch": 0.4188878999391015, + "grad_norm": 0.5596497018795142, + "learning_rate": 4.589560571949561e-06, + "loss": 0.2846, + "step": 8942 + }, + { + "epoch": 0.4189347449290298, + "grad_norm": 0.6600412006153294, + "learning_rate": 4.589456448547831e-06, + "loss": 0.3007, + "step": 8943 + }, + { + "epoch": 0.4189815899189582, + "grad_norm": 0.6330668288109949, + "learning_rate": 4.589352313121804e-06, + "loss": 0.2962, + "step": 8944 + }, + { + "epoch": 0.41902843490888647, + "grad_norm": 0.5965498299129439, + "learning_rate": 4.5892481656720785e-06, + "loss": 0.2813, + "step": 8945 + }, + { + "epoch": 0.4190752798988148, + "grad_norm": 0.5051064265830665, + "learning_rate": 4.5891440061992565e-06, + "loss": 0.2413, + "step": 8946 + }, + { + "epoch": 0.4191221248887432, + "grad_norm": 0.5892095270387068, + "learning_rate": 4.589039834703936e-06, + "loss": 0.3118, + "step": 8947 + }, + { + "epoch": 0.41916896987867147, + "grad_norm": 0.5805869088374296, + "learning_rate": 4.588935651186716e-06, + "loss": 0.2617, + "step": 8948 + }, + { + "epoch": 0.4192158148685998, + "grad_norm": 0.6084503182827439, + "learning_rate": 4.588831455648195e-06, + "loss": 0.3009, + "step": 8949 + }, + { + "epoch": 0.4192626598585281, + "grad_norm": 0.5456243513556537, + "learning_rate": 4.588727248088975e-06, + "loss": 0.2578, + "step": 8950 + }, + { + "epoch": 0.41930950484845647, + "grad_norm": 0.5760195701168013, + "learning_rate": 4.588623028509655e-06, + "loss": 0.2962, + "step": 8951 + }, + { + "epoch": 0.41935634983838477, + "grad_norm": 0.5742063873615674, + "learning_rate": 4.588518796910834e-06, + "loss": 0.2987, + "step": 8952 + }, + { + "epoch": 0.4194031948283131, + "grad_norm": 0.5459089690038788, + "learning_rate": 4.588414553293112e-06, + "loss": 0.2812, + "step": 8953 + }, + { + "epoch": 0.4194500398182414, + "grad_norm": 0.6025602208551842, + "learning_rate": 4.58831029765709e-06, + "loss": 0.2804, + "step": 8954 + }, + { + "epoch": 0.41949688480816977, + "grad_norm": 0.651815807753746, + "learning_rate": 4.588206030003367e-06, + "loss": 0.2992, + "step": 8955 + }, + { + "epoch": 0.4195437297980981, + "grad_norm": 0.5814205607627231, + "learning_rate": 4.5881017503325425e-06, + "loss": 0.2887, + "step": 8956 + }, + { + "epoch": 0.4195905747880264, + "grad_norm": 0.6589823868209643, + "learning_rate": 4.587997458645218e-06, + "loss": 0.3051, + "step": 8957 + }, + { + "epoch": 0.41963741977795477, + "grad_norm": 0.6253023784805336, + "learning_rate": 4.587893154941992e-06, + "loss": 0.2918, + "step": 8958 + }, + { + "epoch": 0.41968426476788306, + "grad_norm": 0.576869327554698, + "learning_rate": 4.587788839223466e-06, + "loss": 0.2836, + "step": 8959 + }, + { + "epoch": 0.4197311097578114, + "grad_norm": 0.6943372532302392, + "learning_rate": 4.58768451149024e-06, + "loss": 0.3077, + "step": 8960 + }, + { + "epoch": 0.4197779547477397, + "grad_norm": 0.5818127232310272, + "learning_rate": 4.587580171742915e-06, + "loss": 0.271, + "step": 8961 + }, + { + "epoch": 0.41982479973766806, + "grad_norm": 0.5689520922873543, + "learning_rate": 4.58747581998209e-06, + "loss": 0.2716, + "step": 8962 + }, + { + "epoch": 0.41987164472759636, + "grad_norm": 0.6150870918485615, + "learning_rate": 4.587371456208366e-06, + "loss": 0.2973, + "step": 8963 + }, + { + "epoch": 0.4199184897175247, + "grad_norm": 0.6086884546188855, + "learning_rate": 4.587267080422345e-06, + "loss": 0.3155, + "step": 8964 + }, + { + "epoch": 0.41996533470745306, + "grad_norm": 0.5720154404486028, + "learning_rate": 4.587162692624626e-06, + "loss": 0.2818, + "step": 8965 + }, + { + "epoch": 0.42001217969738136, + "grad_norm": 0.6182308619534707, + "learning_rate": 4.58705829281581e-06, + "loss": 0.2898, + "step": 8966 + }, + { + "epoch": 0.4200590246873097, + "grad_norm": 0.5971152441808417, + "learning_rate": 4.5869538809964986e-06, + "loss": 0.2687, + "step": 8967 + }, + { + "epoch": 0.420105869677238, + "grad_norm": 0.6476891551813444, + "learning_rate": 4.5868494571672916e-06, + "loss": 0.3117, + "step": 8968 + }, + { + "epoch": 0.42015271466716636, + "grad_norm": 0.6048751507269783, + "learning_rate": 4.586745021328791e-06, + "loss": 0.2972, + "step": 8969 + }, + { + "epoch": 0.42019955965709466, + "grad_norm": 0.5957023448467401, + "learning_rate": 4.5866405734815975e-06, + "loss": 0.2999, + "step": 8970 + }, + { + "epoch": 0.420246404647023, + "grad_norm": 0.5882290588022151, + "learning_rate": 4.586536113626312e-06, + "loss": 0.2566, + "step": 8971 + }, + { + "epoch": 0.4202932496369513, + "grad_norm": 0.5855245853594782, + "learning_rate": 4.586431641763535e-06, + "loss": 0.2787, + "step": 8972 + }, + { + "epoch": 0.42034009462687966, + "grad_norm": 0.6030751498086425, + "learning_rate": 4.5863271578938685e-06, + "loss": 0.296, + "step": 8973 + }, + { + "epoch": 0.420386939616808, + "grad_norm": 0.6051589549602626, + "learning_rate": 4.5862226620179135e-06, + "loss": 0.2732, + "step": 8974 + }, + { + "epoch": 0.4204337846067363, + "grad_norm": 0.5884567826590751, + "learning_rate": 4.586118154136271e-06, + "loss": 0.2776, + "step": 8975 + }, + { + "epoch": 0.42048062959666466, + "grad_norm": 0.5884592626253511, + "learning_rate": 4.5860136342495434e-06, + "loss": 0.2846, + "step": 8976 + }, + { + "epoch": 0.42052747458659295, + "grad_norm": 0.5803072691350356, + "learning_rate": 4.585909102358332e-06, + "loss": 0.2806, + "step": 8977 + }, + { + "epoch": 0.4205743195765213, + "grad_norm": 0.6344166873415253, + "learning_rate": 4.585804558463238e-06, + "loss": 0.2797, + "step": 8978 + }, + { + "epoch": 0.4206211645664496, + "grad_norm": 0.6063152847122046, + "learning_rate": 4.585700002564862e-06, + "loss": 0.2658, + "step": 8979 + }, + { + "epoch": 0.42066800955637795, + "grad_norm": 0.6948414128302612, + "learning_rate": 4.585595434663808e-06, + "loss": 0.3382, + "step": 8980 + }, + { + "epoch": 0.42071485454630625, + "grad_norm": 0.5487014816955788, + "learning_rate": 4.585490854760675e-06, + "loss": 0.2665, + "step": 8981 + }, + { + "epoch": 0.4207616995362346, + "grad_norm": 0.5801532403089379, + "learning_rate": 4.585386262856067e-06, + "loss": 0.2792, + "step": 8982 + }, + { + "epoch": 0.42080854452616295, + "grad_norm": 0.5965449246375123, + "learning_rate": 4.585281658950585e-06, + "loss": 0.273, + "step": 8983 + }, + { + "epoch": 0.42085538951609125, + "grad_norm": 0.6270032061854568, + "learning_rate": 4.585177043044831e-06, + "loss": 0.278, + "step": 8984 + }, + { + "epoch": 0.4209022345060196, + "grad_norm": 0.611716160478262, + "learning_rate": 4.585072415139409e-06, + "loss": 0.2819, + "step": 8985 + }, + { + "epoch": 0.4209490794959479, + "grad_norm": 0.6203765080528363, + "learning_rate": 4.584967775234918e-06, + "loss": 0.2933, + "step": 8986 + }, + { + "epoch": 0.42099592448587625, + "grad_norm": 0.6141272525312675, + "learning_rate": 4.584863123331962e-06, + "loss": 0.298, + "step": 8987 + }, + { + "epoch": 0.42104276947580455, + "grad_norm": 0.6444647255243179, + "learning_rate": 4.584758459431141e-06, + "loss": 0.2952, + "step": 8988 + }, + { + "epoch": 0.4210896144657329, + "grad_norm": 0.6349718178999652, + "learning_rate": 4.584653783533061e-06, + "loss": 0.2959, + "step": 8989 + }, + { + "epoch": 0.4211364594556612, + "grad_norm": 0.6304213650289728, + "learning_rate": 4.584549095638321e-06, + "loss": 0.3091, + "step": 8990 + }, + { + "epoch": 0.42118330444558955, + "grad_norm": 0.6170697814320156, + "learning_rate": 4.584444395747526e-06, + "loss": 0.3043, + "step": 8991 + }, + { + "epoch": 0.4212301494355179, + "grad_norm": 0.5932010360667674, + "learning_rate": 4.584339683861277e-06, + "loss": 0.2711, + "step": 8992 + }, + { + "epoch": 0.4212769944254462, + "grad_norm": 0.6481284301149566, + "learning_rate": 4.584234959980178e-06, + "loss": 0.2993, + "step": 8993 + }, + { + "epoch": 0.42132383941537455, + "grad_norm": 0.6225187386083681, + "learning_rate": 4.584130224104828e-06, + "loss": 0.3006, + "step": 8994 + }, + { + "epoch": 0.42137068440530284, + "grad_norm": 0.596874680035242, + "learning_rate": 4.584025476235835e-06, + "loss": 0.3131, + "step": 8995 + }, + { + "epoch": 0.4214175293952312, + "grad_norm": 0.6436350788957423, + "learning_rate": 4.583920716373798e-06, + "loss": 0.2949, + "step": 8996 + }, + { + "epoch": 0.4214643743851595, + "grad_norm": 0.6653437213720061, + "learning_rate": 4.583815944519321e-06, + "loss": 0.299, + "step": 8997 + }, + { + "epoch": 0.42151121937508784, + "grad_norm": 0.5890565858803467, + "learning_rate": 4.583711160673007e-06, + "loss": 0.2949, + "step": 8998 + }, + { + "epoch": 0.42155806436501614, + "grad_norm": 0.66133171424962, + "learning_rate": 4.583606364835459e-06, + "loss": 0.2983, + "step": 8999 + }, + { + "epoch": 0.4216049093549445, + "grad_norm": 0.643438222774161, + "learning_rate": 4.58350155700728e-06, + "loss": 0.2895, + "step": 9000 + }, + { + "epoch": 0.42165175434487284, + "grad_norm": 0.5907154727990711, + "learning_rate": 4.583396737189074e-06, + "loss": 0.2891, + "step": 9001 + }, + { + "epoch": 0.42169859933480114, + "grad_norm": 0.616188222168616, + "learning_rate": 4.583291905381443e-06, + "loss": 0.289, + "step": 9002 + }, + { + "epoch": 0.4217454443247295, + "grad_norm": 0.619467455611846, + "learning_rate": 4.5831870615849914e-06, + "loss": 0.2732, + "step": 9003 + }, + { + "epoch": 0.4217922893146578, + "grad_norm": 0.5902240994785306, + "learning_rate": 4.583082205800321e-06, + "loss": 0.2726, + "step": 9004 + }, + { + "epoch": 0.42183913430458614, + "grad_norm": 0.6157143392744506, + "learning_rate": 4.582977338028036e-06, + "loss": 0.3021, + "step": 9005 + }, + { + "epoch": 0.42188597929451443, + "grad_norm": 0.5747523130014752, + "learning_rate": 4.58287245826874e-06, + "loss": 0.2626, + "step": 9006 + }, + { + "epoch": 0.4219328242844428, + "grad_norm": 0.6628394597537147, + "learning_rate": 4.582767566523037e-06, + "loss": 0.2903, + "step": 9007 + }, + { + "epoch": 0.4219796692743711, + "grad_norm": 0.5715251548382873, + "learning_rate": 4.582662662791529e-06, + "loss": 0.2765, + "step": 9008 + }, + { + "epoch": 0.42202651426429943, + "grad_norm": 0.6478079955816443, + "learning_rate": 4.582557747074822e-06, + "loss": 0.3086, + "step": 9009 + }, + { + "epoch": 0.4220733592542278, + "grad_norm": 0.5962437069161897, + "learning_rate": 4.582452819373518e-06, + "loss": 0.3002, + "step": 9010 + }, + { + "epoch": 0.4221202042441561, + "grad_norm": 0.6047700286823006, + "learning_rate": 4.582347879688222e-06, + "loss": 0.2864, + "step": 9011 + }, + { + "epoch": 0.42216704923408443, + "grad_norm": 0.6262785802913869, + "learning_rate": 4.582242928019537e-06, + "loss": 0.3038, + "step": 9012 + }, + { + "epoch": 0.42221389422401273, + "grad_norm": 0.6053182287077127, + "learning_rate": 4.582137964368067e-06, + "loss": 0.289, + "step": 9013 + }, + { + "epoch": 0.4222607392139411, + "grad_norm": 0.6047818520029639, + "learning_rate": 4.582032988734416e-06, + "loss": 0.2903, + "step": 9014 + }, + { + "epoch": 0.4223075842038694, + "grad_norm": 0.5795191665386149, + "learning_rate": 4.581928001119189e-06, + "loss": 0.2706, + "step": 9015 + }, + { + "epoch": 0.42235442919379773, + "grad_norm": 0.6647222144405562, + "learning_rate": 4.58182300152299e-06, + "loss": 0.3017, + "step": 9016 + }, + { + "epoch": 0.422401274183726, + "grad_norm": 0.6402983524626673, + "learning_rate": 4.5817179899464226e-06, + "loss": 0.2844, + "step": 9017 + }, + { + "epoch": 0.4224481191736544, + "grad_norm": 0.6326170566219013, + "learning_rate": 4.581612966390091e-06, + "loss": 0.283, + "step": 9018 + }, + { + "epoch": 0.42249496416358273, + "grad_norm": 0.5700805091095109, + "learning_rate": 4.581507930854601e-06, + "loss": 0.2803, + "step": 9019 + }, + { + "epoch": 0.422541809153511, + "grad_norm": 0.5972107126067004, + "learning_rate": 4.581402883340555e-06, + "loss": 0.3035, + "step": 9020 + }, + { + "epoch": 0.4225886541434394, + "grad_norm": 0.6572518633042298, + "learning_rate": 4.581297823848559e-06, + "loss": 0.2833, + "step": 9021 + }, + { + "epoch": 0.4226354991333677, + "grad_norm": 0.6021518661029803, + "learning_rate": 4.581192752379217e-06, + "loss": 0.3048, + "step": 9022 + }, + { + "epoch": 0.422682344123296, + "grad_norm": 0.6140763226470541, + "learning_rate": 4.581087668933134e-06, + "loss": 0.2785, + "step": 9023 + }, + { + "epoch": 0.4227291891132243, + "grad_norm": 0.5750901122014328, + "learning_rate": 4.580982573510913e-06, + "loss": 0.2821, + "step": 9024 + }, + { + "epoch": 0.4227760341031527, + "grad_norm": 0.5929869708496421, + "learning_rate": 4.580877466113163e-06, + "loss": 0.2858, + "step": 9025 + }, + { + "epoch": 0.42282287909308097, + "grad_norm": 0.6179760010045708, + "learning_rate": 4.580772346740484e-06, + "loss": 0.3008, + "step": 9026 + }, + { + "epoch": 0.4228697240830093, + "grad_norm": 0.6286320566702132, + "learning_rate": 4.580667215393484e-06, + "loss": 0.307, + "step": 9027 + }, + { + "epoch": 0.4229165690729377, + "grad_norm": 0.6293134123524163, + "learning_rate": 4.580562072072767e-06, + "loss": 0.2966, + "step": 9028 + }, + { + "epoch": 0.42296341406286597, + "grad_norm": 0.607931191734931, + "learning_rate": 4.5804569167789396e-06, + "loss": 0.2821, + "step": 9029 + }, + { + "epoch": 0.4230102590527943, + "grad_norm": 0.5729767244161446, + "learning_rate": 4.580351749512604e-06, + "loss": 0.2901, + "step": 9030 + }, + { + "epoch": 0.4230571040427226, + "grad_norm": 0.5521417272278155, + "learning_rate": 4.580246570274367e-06, + "loss": 0.2848, + "step": 9031 + }, + { + "epoch": 0.42310394903265097, + "grad_norm": 0.5564283097956889, + "learning_rate": 4.580141379064834e-06, + "loss": 0.2674, + "step": 9032 + }, + { + "epoch": 0.42315079402257927, + "grad_norm": 0.590800814310942, + "learning_rate": 4.5800361758846105e-06, + "loss": 0.2863, + "step": 9033 + }, + { + "epoch": 0.4231976390125076, + "grad_norm": 0.5625147171918211, + "learning_rate": 4.579930960734301e-06, + "loss": 0.291, + "step": 9034 + }, + { + "epoch": 0.4232444840024359, + "grad_norm": 0.5846061531876323, + "learning_rate": 4.579825733614512e-06, + "loss": 0.2913, + "step": 9035 + }, + { + "epoch": 0.42329132899236427, + "grad_norm": 0.6314813935314837, + "learning_rate": 4.579720494525849e-06, + "loss": 0.2912, + "step": 9036 + }, + { + "epoch": 0.4233381739822926, + "grad_norm": 0.5817349278697461, + "learning_rate": 4.579615243468917e-06, + "loss": 0.2956, + "step": 9037 + }, + { + "epoch": 0.4233850189722209, + "grad_norm": 0.6594589328205174, + "learning_rate": 4.5795099804443214e-06, + "loss": 0.292, + "step": 9038 + }, + { + "epoch": 0.42343186396214927, + "grad_norm": 0.6000462298218384, + "learning_rate": 4.579404705452669e-06, + "loss": 0.2713, + "step": 9039 + }, + { + "epoch": 0.42347870895207756, + "grad_norm": 0.6303674828544102, + "learning_rate": 4.579299418494565e-06, + "loss": 0.3051, + "step": 9040 + }, + { + "epoch": 0.4235255539420059, + "grad_norm": 0.5813512265352443, + "learning_rate": 4.579194119570616e-06, + "loss": 0.2991, + "step": 9041 + }, + { + "epoch": 0.4235723989319342, + "grad_norm": 0.5739130148513841, + "learning_rate": 4.579088808681427e-06, + "loss": 0.2764, + "step": 9042 + }, + { + "epoch": 0.42361924392186256, + "grad_norm": 0.6472594984859726, + "learning_rate": 4.5789834858276035e-06, + "loss": 0.317, + "step": 9043 + }, + { + "epoch": 0.42366608891179086, + "grad_norm": 0.6131837019484176, + "learning_rate": 4.578878151009753e-06, + "loss": 0.3112, + "step": 9044 + }, + { + "epoch": 0.4237129339017192, + "grad_norm": 0.6376390205620563, + "learning_rate": 4.578772804228483e-06, + "loss": 0.2787, + "step": 9045 + }, + { + "epoch": 0.42375977889164756, + "grad_norm": 0.5536430487615116, + "learning_rate": 4.578667445484396e-06, + "loss": 0.2656, + "step": 9046 + }, + { + "epoch": 0.42380662388157586, + "grad_norm": 0.6726023158759408, + "learning_rate": 4.578562074778101e-06, + "loss": 0.2874, + "step": 9047 + }, + { + "epoch": 0.4238534688715042, + "grad_norm": 0.6281172999979413, + "learning_rate": 4.578456692110204e-06, + "loss": 0.3055, + "step": 9048 + }, + { + "epoch": 0.4239003138614325, + "grad_norm": 0.542130793381497, + "learning_rate": 4.578351297481312e-06, + "loss": 0.2706, + "step": 9049 + }, + { + "epoch": 0.42394715885136086, + "grad_norm": 0.5999403366351842, + "learning_rate": 4.578245890892029e-06, + "loss": 0.2936, + "step": 9050 + }, + { + "epoch": 0.42399400384128916, + "grad_norm": 0.6275466848646188, + "learning_rate": 4.578140472342963e-06, + "loss": 0.301, + "step": 9051 + }, + { + "epoch": 0.4240408488312175, + "grad_norm": 0.6073003966473015, + "learning_rate": 4.578035041834722e-06, + "loss": 0.2962, + "step": 9052 + }, + { + "epoch": 0.4240876938211458, + "grad_norm": 0.6138666040191146, + "learning_rate": 4.5779295993679115e-06, + "loss": 0.2724, + "step": 9053 + }, + { + "epoch": 0.42413453881107416, + "grad_norm": 0.6204466498441165, + "learning_rate": 4.577824144943138e-06, + "loss": 0.2794, + "step": 9054 + }, + { + "epoch": 0.4241813838010025, + "grad_norm": 0.5944450496617689, + "learning_rate": 4.57771867856101e-06, + "loss": 0.2924, + "step": 9055 + }, + { + "epoch": 0.4242282287909308, + "grad_norm": 0.6274459075729195, + "learning_rate": 4.5776132002221315e-06, + "loss": 0.3032, + "step": 9056 + }, + { + "epoch": 0.42427507378085916, + "grad_norm": 0.6109649158400863, + "learning_rate": 4.577507709927113e-06, + "loss": 0.3017, + "step": 9057 + }, + { + "epoch": 0.42432191877078745, + "grad_norm": 0.6414867843687596, + "learning_rate": 4.577402207676558e-06, + "loss": 0.3091, + "step": 9058 + }, + { + "epoch": 0.4243687637607158, + "grad_norm": 0.7146767030183019, + "learning_rate": 4.5772966934710765e-06, + "loss": 0.3172, + "step": 9059 + }, + { + "epoch": 0.4244156087506441, + "grad_norm": 0.7357146795852432, + "learning_rate": 4.577191167311274e-06, + "loss": 0.3083, + "step": 9060 + }, + { + "epoch": 0.42446245374057245, + "grad_norm": 0.6033505828324294, + "learning_rate": 4.5770856291977585e-06, + "loss": 0.2806, + "step": 9061 + }, + { + "epoch": 0.42450929873050075, + "grad_norm": 0.6175030890753617, + "learning_rate": 4.576980079131138e-06, + "loss": 0.2929, + "step": 9062 + }, + { + "epoch": 0.4245561437204291, + "grad_norm": 0.6113894898946485, + "learning_rate": 4.576874517112019e-06, + "loss": 0.2769, + "step": 9063 + }, + { + "epoch": 0.42460298871035745, + "grad_norm": 0.6262734046008132, + "learning_rate": 4.576768943141009e-06, + "loss": 0.2795, + "step": 9064 + }, + { + "epoch": 0.42464983370028575, + "grad_norm": 0.6245253317364766, + "learning_rate": 4.576663357218715e-06, + "loss": 0.3042, + "step": 9065 + }, + { + "epoch": 0.4246966786902141, + "grad_norm": 0.5949312941034254, + "learning_rate": 4.5765577593457454e-06, + "loss": 0.2794, + "step": 9066 + }, + { + "epoch": 0.4247435236801424, + "grad_norm": 0.6236171299679503, + "learning_rate": 4.576452149522708e-06, + "loss": 0.2907, + "step": 9067 + }, + { + "epoch": 0.42479036867007075, + "grad_norm": 0.6063357233809304, + "learning_rate": 4.576346527750212e-06, + "loss": 0.3029, + "step": 9068 + }, + { + "epoch": 0.42483721365999905, + "grad_norm": 0.5876066274699503, + "learning_rate": 4.576240894028862e-06, + "loss": 0.282, + "step": 9069 + }, + { + "epoch": 0.4248840586499274, + "grad_norm": 0.5888825658576031, + "learning_rate": 4.576135248359267e-06, + "loss": 0.2997, + "step": 9070 + }, + { + "epoch": 0.4249309036398557, + "grad_norm": 0.673823599225713, + "learning_rate": 4.576029590742037e-06, + "loss": 0.2929, + "step": 9071 + }, + { + "epoch": 0.42497774862978405, + "grad_norm": 0.6349789058653957, + "learning_rate": 4.575923921177777e-06, + "loss": 0.2819, + "step": 9072 + }, + { + "epoch": 0.4250245936197124, + "grad_norm": 0.6384819991125943, + "learning_rate": 4.575818239667098e-06, + "loss": 0.2815, + "step": 9073 + }, + { + "epoch": 0.4250714386096407, + "grad_norm": 0.5676425628861448, + "learning_rate": 4.575712546210606e-06, + "loss": 0.2938, + "step": 9074 + }, + { + "epoch": 0.42511828359956905, + "grad_norm": 0.5926271024811262, + "learning_rate": 4.575606840808911e-06, + "loss": 0.2851, + "step": 9075 + }, + { + "epoch": 0.42516512858949734, + "grad_norm": 0.6072085646087027, + "learning_rate": 4.5755011234626185e-06, + "loss": 0.2786, + "step": 9076 + }, + { + "epoch": 0.4252119735794257, + "grad_norm": 0.610102564111489, + "learning_rate": 4.575395394172339e-06, + "loss": 0.268, + "step": 9077 + }, + { + "epoch": 0.425258818569354, + "grad_norm": 0.65245620908248, + "learning_rate": 4.575289652938682e-06, + "loss": 0.2938, + "step": 9078 + }, + { + "epoch": 0.42530566355928234, + "grad_norm": 0.5612655619111657, + "learning_rate": 4.575183899762254e-06, + "loss": 0.2829, + "step": 9079 + }, + { + "epoch": 0.42535250854921064, + "grad_norm": 0.6568803818597201, + "learning_rate": 4.575078134643664e-06, + "loss": 0.2945, + "step": 9080 + }, + { + "epoch": 0.425399353539139, + "grad_norm": 0.6030337432879049, + "learning_rate": 4.574972357583522e-06, + "loss": 0.2945, + "step": 9081 + }, + { + "epoch": 0.42544619852906734, + "grad_norm": 0.5833867010009081, + "learning_rate": 4.574866568582433e-06, + "loss": 0.2925, + "step": 9082 + }, + { + "epoch": 0.42549304351899564, + "grad_norm": 0.6510415076959625, + "learning_rate": 4.574760767641011e-06, + "loss": 0.3146, + "step": 9083 + }, + { + "epoch": 0.425539888508924, + "grad_norm": 0.6494093784291418, + "learning_rate": 4.574654954759861e-06, + "loss": 0.2886, + "step": 9084 + }, + { + "epoch": 0.4255867334988523, + "grad_norm": 0.5706350832382303, + "learning_rate": 4.574549129939593e-06, + "loss": 0.284, + "step": 9085 + }, + { + "epoch": 0.42563357848878064, + "grad_norm": 0.5499701052615085, + "learning_rate": 4.574443293180817e-06, + "loss": 0.2766, + "step": 9086 + }, + { + "epoch": 0.42568042347870894, + "grad_norm": 0.5911625695628033, + "learning_rate": 4.574337444484141e-06, + "loss": 0.2641, + "step": 9087 + }, + { + "epoch": 0.4257272684686373, + "grad_norm": 0.5834822992445216, + "learning_rate": 4.574231583850175e-06, + "loss": 0.2877, + "step": 9088 + }, + { + "epoch": 0.4257741134585656, + "grad_norm": 0.6221021325644458, + "learning_rate": 4.5741257112795265e-06, + "loss": 0.294, + "step": 9089 + }, + { + "epoch": 0.42582095844849394, + "grad_norm": 0.6125659211470665, + "learning_rate": 4.574019826772806e-06, + "loss": 0.2774, + "step": 9090 + }, + { + "epoch": 0.4258678034384223, + "grad_norm": 0.5513927027930496, + "learning_rate": 4.573913930330622e-06, + "loss": 0.2766, + "step": 9091 + }, + { + "epoch": 0.4259146484283506, + "grad_norm": 0.6650261427728926, + "learning_rate": 4.573808021953587e-06, + "loss": 0.3053, + "step": 9092 + }, + { + "epoch": 0.42596149341827894, + "grad_norm": 0.5966652743973263, + "learning_rate": 4.573702101642306e-06, + "loss": 0.2764, + "step": 9093 + }, + { + "epoch": 0.42600833840820723, + "grad_norm": 0.6498662103088267, + "learning_rate": 4.573596169397391e-06, + "loss": 0.2764, + "step": 9094 + }, + { + "epoch": 0.4260551833981356, + "grad_norm": 0.5968021182361923, + "learning_rate": 4.573490225219451e-06, + "loss": 0.2915, + "step": 9095 + }, + { + "epoch": 0.4261020283880639, + "grad_norm": 0.6459758599444537, + "learning_rate": 4.573384269109096e-06, + "loss": 0.3144, + "step": 9096 + }, + { + "epoch": 0.42614887337799223, + "grad_norm": 0.6141613039184637, + "learning_rate": 4.573278301066937e-06, + "loss": 0.2967, + "step": 9097 + }, + { + "epoch": 0.42619571836792053, + "grad_norm": 0.648195036845452, + "learning_rate": 4.573172321093581e-06, + "loss": 0.2682, + "step": 9098 + }, + { + "epoch": 0.4262425633578489, + "grad_norm": 0.5525271710335709, + "learning_rate": 4.573066329189639e-06, + "loss": 0.2738, + "step": 9099 + }, + { + "epoch": 0.42628940834777723, + "grad_norm": 0.6431054790353623, + "learning_rate": 4.5729603253557226e-06, + "loss": 0.3026, + "step": 9100 + }, + { + "epoch": 0.42633625333770553, + "grad_norm": 0.625659735004161, + "learning_rate": 4.57285430959244e-06, + "loss": 0.2934, + "step": 9101 + }, + { + "epoch": 0.4263830983276339, + "grad_norm": 0.6769453133389642, + "learning_rate": 4.572748281900402e-06, + "loss": 0.2994, + "step": 9102 + }, + { + "epoch": 0.4264299433175622, + "grad_norm": 0.6446952280244533, + "learning_rate": 4.572642242280219e-06, + "loss": 0.2833, + "step": 9103 + }, + { + "epoch": 0.42647678830749053, + "grad_norm": 0.5703412326812198, + "learning_rate": 4.5725361907325e-06, + "loss": 0.2802, + "step": 9104 + }, + { + "epoch": 0.4265236332974188, + "grad_norm": 0.6151796552337906, + "learning_rate": 4.5724301272578565e-06, + "loss": 0.2723, + "step": 9105 + }, + { + "epoch": 0.4265704782873472, + "grad_norm": 0.6887554254971766, + "learning_rate": 4.572324051856898e-06, + "loss": 0.3258, + "step": 9106 + }, + { + "epoch": 0.4266173232772755, + "grad_norm": 0.5871403728892995, + "learning_rate": 4.572217964530235e-06, + "loss": 0.292, + "step": 9107 + }, + { + "epoch": 0.4266641682672038, + "grad_norm": 0.6588544792497426, + "learning_rate": 4.57211186527848e-06, + "loss": 0.2829, + "step": 9108 + }, + { + "epoch": 0.4267110132571322, + "grad_norm": 0.6161644219769637, + "learning_rate": 4.572005754102241e-06, + "loss": 0.2798, + "step": 9109 + }, + { + "epoch": 0.4267578582470605, + "grad_norm": 0.6012697976135856, + "learning_rate": 4.5718996310021305e-06, + "loss": 0.2812, + "step": 9110 + }, + { + "epoch": 0.4268047032369888, + "grad_norm": 0.5982989768850525, + "learning_rate": 4.571793495978758e-06, + "loss": 0.2794, + "step": 9111 + }, + { + "epoch": 0.4268515482269171, + "grad_norm": 0.6532857075110509, + "learning_rate": 4.571687349032735e-06, + "loss": 0.3032, + "step": 9112 + }, + { + "epoch": 0.4268983932168455, + "grad_norm": 0.6845015139819952, + "learning_rate": 4.571581190164671e-06, + "loss": 0.3286, + "step": 9113 + }, + { + "epoch": 0.42694523820677377, + "grad_norm": 0.6111335356498412, + "learning_rate": 4.571475019375178e-06, + "loss": 0.2823, + "step": 9114 + }, + { + "epoch": 0.4269920831967021, + "grad_norm": 0.6312110936081228, + "learning_rate": 4.5713688366648675e-06, + "loss": 0.3138, + "step": 9115 + }, + { + "epoch": 0.4270389281866304, + "grad_norm": 0.6257386114902181, + "learning_rate": 4.57126264203435e-06, + "loss": 0.3021, + "step": 9116 + }, + { + "epoch": 0.42708577317655877, + "grad_norm": 0.5958337874244711, + "learning_rate": 4.571156435484236e-06, + "loss": 0.2769, + "step": 9117 + }, + { + "epoch": 0.4271326181664871, + "grad_norm": 0.5956210050070108, + "learning_rate": 4.5710502170151385e-06, + "loss": 0.3018, + "step": 9118 + }, + { + "epoch": 0.4271794631564154, + "grad_norm": 0.5741448746022542, + "learning_rate": 4.5709439866276665e-06, + "loss": 0.288, + "step": 9119 + }, + { + "epoch": 0.42722630814634377, + "grad_norm": 0.6151639063306976, + "learning_rate": 4.5708377443224325e-06, + "loss": 0.3053, + "step": 9120 + }, + { + "epoch": 0.42727315313627207, + "grad_norm": 0.6286629121215159, + "learning_rate": 4.570731490100048e-06, + "loss": 0.3084, + "step": 9121 + }, + { + "epoch": 0.4273199981262004, + "grad_norm": 0.6040377780995928, + "learning_rate": 4.5706252239611245e-06, + "loss": 0.261, + "step": 9122 + }, + { + "epoch": 0.4273668431161287, + "grad_norm": 0.5860400364526792, + "learning_rate": 4.570518945906273e-06, + "loss": 0.2842, + "step": 9123 + }, + { + "epoch": 0.42741368810605707, + "grad_norm": 0.6244817443648508, + "learning_rate": 4.570412655936105e-06, + "loss": 0.3023, + "step": 9124 + }, + { + "epoch": 0.42746053309598536, + "grad_norm": 0.5992847619499481, + "learning_rate": 4.570306354051233e-06, + "loss": 0.2767, + "step": 9125 + }, + { + "epoch": 0.4275073780859137, + "grad_norm": 0.6053746930260723, + "learning_rate": 4.570200040252268e-06, + "loss": 0.2916, + "step": 9126 + }, + { + "epoch": 0.42755422307584207, + "grad_norm": 0.5708250790557494, + "learning_rate": 4.570093714539821e-06, + "loss": 0.2779, + "step": 9127 + }, + { + "epoch": 0.42760106806577036, + "grad_norm": 0.6268138916181561, + "learning_rate": 4.569987376914508e-06, + "loss": 0.2959, + "step": 9128 + }, + { + "epoch": 0.4276479130556987, + "grad_norm": 0.6167535436609839, + "learning_rate": 4.569881027376935e-06, + "loss": 0.2762, + "step": 9129 + }, + { + "epoch": 0.427694758045627, + "grad_norm": 0.5705649193410756, + "learning_rate": 4.5697746659277185e-06, + "loss": 0.3044, + "step": 9130 + }, + { + "epoch": 0.42774160303555536, + "grad_norm": 0.6018144836305834, + "learning_rate": 4.569668292567468e-06, + "loss": 0.3084, + "step": 9131 + }, + { + "epoch": 0.42778844802548366, + "grad_norm": 0.6117196121874803, + "learning_rate": 4.569561907296797e-06, + "loss": 0.2833, + "step": 9132 + }, + { + "epoch": 0.427835293015412, + "grad_norm": 0.6005338938819283, + "learning_rate": 4.569455510116318e-06, + "loss": 0.2748, + "step": 9133 + }, + { + "epoch": 0.4278821380053403, + "grad_norm": 0.6275983405978537, + "learning_rate": 4.569349101026642e-06, + "loss": 0.2991, + "step": 9134 + }, + { + "epoch": 0.42792898299526866, + "grad_norm": 0.6230784385275393, + "learning_rate": 4.569242680028383e-06, + "loss": 0.2985, + "step": 9135 + }, + { + "epoch": 0.427975827985197, + "grad_norm": 0.6232365475737063, + "learning_rate": 4.569136247122151e-06, + "loss": 0.2843, + "step": 9136 + }, + { + "epoch": 0.4280226729751253, + "grad_norm": 0.6178073138559655, + "learning_rate": 4.569029802308561e-06, + "loss": 0.2904, + "step": 9137 + }, + { + "epoch": 0.42806951796505366, + "grad_norm": 0.5831185963389354, + "learning_rate": 4.5689233455882244e-06, + "loss": 0.2826, + "step": 9138 + }, + { + "epoch": 0.42811636295498195, + "grad_norm": 0.583549107524166, + "learning_rate": 4.568816876961753e-06, + "loss": 0.2769, + "step": 9139 + }, + { + "epoch": 0.4281632079449103, + "grad_norm": 0.6071906234743701, + "learning_rate": 4.568710396429762e-06, + "loss": 0.3074, + "step": 9140 + }, + { + "epoch": 0.4282100529348386, + "grad_norm": 0.5626992987203271, + "learning_rate": 4.568603903992862e-06, + "loss": 0.2817, + "step": 9141 + }, + { + "epoch": 0.42825689792476695, + "grad_norm": 0.6155226044225205, + "learning_rate": 4.568497399651666e-06, + "loss": 0.2782, + "step": 9142 + }, + { + "epoch": 0.42830374291469525, + "grad_norm": 0.5687499817798072, + "learning_rate": 4.568390883406788e-06, + "loss": 0.2812, + "step": 9143 + }, + { + "epoch": 0.4283505879046236, + "grad_norm": 0.663088207092625, + "learning_rate": 4.56828435525884e-06, + "loss": 0.307, + "step": 9144 + }, + { + "epoch": 0.42839743289455195, + "grad_norm": 0.6175697973292901, + "learning_rate": 4.568177815208435e-06, + "loss": 0.2921, + "step": 9145 + }, + { + "epoch": 0.42844427788448025, + "grad_norm": 0.6125939732962166, + "learning_rate": 4.568071263256186e-06, + "loss": 0.2942, + "step": 9146 + }, + { + "epoch": 0.4284911228744086, + "grad_norm": 0.6101143883343012, + "learning_rate": 4.567964699402708e-06, + "loss": 0.2994, + "step": 9147 + }, + { + "epoch": 0.4285379678643369, + "grad_norm": 0.624027547457458, + "learning_rate": 4.567858123648613e-06, + "loss": 0.2954, + "step": 9148 + }, + { + "epoch": 0.42858481285426525, + "grad_norm": 0.6380369711913352, + "learning_rate": 4.567751535994514e-06, + "loss": 0.2903, + "step": 9149 + }, + { + "epoch": 0.42863165784419355, + "grad_norm": 0.606984301042552, + "learning_rate": 4.5676449364410234e-06, + "loss": 0.2707, + "step": 9150 + }, + { + "epoch": 0.4286785028341219, + "grad_norm": 0.5997547555942512, + "learning_rate": 4.567538324988757e-06, + "loss": 0.3023, + "step": 9151 + }, + { + "epoch": 0.4287253478240502, + "grad_norm": 0.6293242922264298, + "learning_rate": 4.567431701638328e-06, + "loss": 0.2977, + "step": 9152 + }, + { + "epoch": 0.42877219281397855, + "grad_norm": 0.5742389974488344, + "learning_rate": 4.567325066390347e-06, + "loss": 0.2731, + "step": 9153 + }, + { + "epoch": 0.4288190378039069, + "grad_norm": 0.6346321082287143, + "learning_rate": 4.567218419245431e-06, + "loss": 0.2961, + "step": 9154 + }, + { + "epoch": 0.4288658827938352, + "grad_norm": 0.6115188463305006, + "learning_rate": 4.567111760204193e-06, + "loss": 0.3132, + "step": 9155 + }, + { + "epoch": 0.42891272778376355, + "grad_norm": 0.604591417025153, + "learning_rate": 4.5670050892672455e-06, + "loss": 0.2982, + "step": 9156 + }, + { + "epoch": 0.42895957277369184, + "grad_norm": 0.6310606180155464, + "learning_rate": 4.566898406435204e-06, + "loss": 0.2957, + "step": 9157 + }, + { + "epoch": 0.4290064177636202, + "grad_norm": 0.6343343137653805, + "learning_rate": 4.566791711708681e-06, + "loss": 0.2985, + "step": 9158 + }, + { + "epoch": 0.4290532627535485, + "grad_norm": 0.6071768651226812, + "learning_rate": 4.566685005088292e-06, + "loss": 0.3062, + "step": 9159 + }, + { + "epoch": 0.42910010774347684, + "grad_norm": 0.564976567043917, + "learning_rate": 4.56657828657465e-06, + "loss": 0.2638, + "step": 9160 + }, + { + "epoch": 0.42914695273340514, + "grad_norm": 0.5743524317054479, + "learning_rate": 4.566471556168369e-06, + "loss": 0.3009, + "step": 9161 + }, + { + "epoch": 0.4291937977233335, + "grad_norm": 0.6239832723626015, + "learning_rate": 4.566364813870063e-06, + "loss": 0.2938, + "step": 9162 + }, + { + "epoch": 0.42924064271326184, + "grad_norm": 0.6465884468356471, + "learning_rate": 4.566258059680348e-06, + "loss": 0.3195, + "step": 9163 + }, + { + "epoch": 0.42928748770319014, + "grad_norm": 0.55499270408732, + "learning_rate": 4.566151293599836e-06, + "loss": 0.269, + "step": 9164 + }, + { + "epoch": 0.4293343326931185, + "grad_norm": 0.6040992155491597, + "learning_rate": 4.5660445156291445e-06, + "loss": 0.2698, + "step": 9165 + }, + { + "epoch": 0.4293811776830468, + "grad_norm": 0.622864271856502, + "learning_rate": 4.565937725768885e-06, + "loss": 0.3188, + "step": 9166 + }, + { + "epoch": 0.42942802267297514, + "grad_norm": 0.6597376384604032, + "learning_rate": 4.565830924019673e-06, + "loss": 0.3057, + "step": 9167 + }, + { + "epoch": 0.42947486766290344, + "grad_norm": 0.6086077513450048, + "learning_rate": 4.5657241103821234e-06, + "loss": 0.285, + "step": 9168 + }, + { + "epoch": 0.4295217126528318, + "grad_norm": 0.5832441082757736, + "learning_rate": 4.5656172848568505e-06, + "loss": 0.3034, + "step": 9169 + }, + { + "epoch": 0.4295685576427601, + "grad_norm": 0.656876014183591, + "learning_rate": 4.5655104474444704e-06, + "loss": 0.3144, + "step": 9170 + }, + { + "epoch": 0.42961540263268844, + "grad_norm": 0.6272083080526841, + "learning_rate": 4.565403598145595e-06, + "loss": 0.2953, + "step": 9171 + }, + { + "epoch": 0.4296622476226168, + "grad_norm": 0.619579220786761, + "learning_rate": 4.565296736960842e-06, + "loss": 0.2953, + "step": 9172 + }, + { + "epoch": 0.4297090926125451, + "grad_norm": 0.5757134685338497, + "learning_rate": 4.5651898638908255e-06, + "loss": 0.2955, + "step": 9173 + }, + { + "epoch": 0.42975593760247344, + "grad_norm": 0.6238209493592507, + "learning_rate": 4.565082978936161e-06, + "loss": 0.2992, + "step": 9174 + }, + { + "epoch": 0.42980278259240173, + "grad_norm": 0.5492582889373864, + "learning_rate": 4.564976082097461e-06, + "loss": 0.2817, + "step": 9175 + }, + { + "epoch": 0.4298496275823301, + "grad_norm": 0.6406883803926138, + "learning_rate": 4.564869173375345e-06, + "loss": 0.3008, + "step": 9176 + }, + { + "epoch": 0.4298964725722584, + "grad_norm": 0.5623588696989613, + "learning_rate": 4.564762252770424e-06, + "loss": 0.2772, + "step": 9177 + }, + { + "epoch": 0.42994331756218673, + "grad_norm": 0.6243671816217871, + "learning_rate": 4.564655320283317e-06, + "loss": 0.2883, + "step": 9178 + }, + { + "epoch": 0.42999016255211503, + "grad_norm": 0.6602823401751143, + "learning_rate": 4.564548375914636e-06, + "loss": 0.3067, + "step": 9179 + }, + { + "epoch": 0.4300370075420434, + "grad_norm": 0.6088994052818909, + "learning_rate": 4.564441419664998e-06, + "loss": 0.2896, + "step": 9180 + }, + { + "epoch": 0.43008385253197173, + "grad_norm": 0.6174816986757614, + "learning_rate": 4.564334451535019e-06, + "loss": 0.2878, + "step": 9181 + }, + { + "epoch": 0.43013069752190003, + "grad_norm": 0.5708763722805129, + "learning_rate": 4.564227471525314e-06, + "loss": 0.2948, + "step": 9182 + }, + { + "epoch": 0.4301775425118284, + "grad_norm": 0.5912919454131471, + "learning_rate": 4.564120479636499e-06, + "loss": 0.2916, + "step": 9183 + }, + { + "epoch": 0.4302243875017567, + "grad_norm": 0.6193055591340876, + "learning_rate": 4.564013475869189e-06, + "loss": 0.2814, + "step": 9184 + }, + { + "epoch": 0.43027123249168503, + "grad_norm": 0.5388010931410214, + "learning_rate": 4.563906460224001e-06, + "loss": 0.2632, + "step": 9185 + }, + { + "epoch": 0.4303180774816133, + "grad_norm": 0.60341285281852, + "learning_rate": 4.56379943270155e-06, + "loss": 0.2843, + "step": 9186 + }, + { + "epoch": 0.4303649224715417, + "grad_norm": 0.5744885557125559, + "learning_rate": 4.563692393302452e-06, + "loss": 0.2755, + "step": 9187 + }, + { + "epoch": 0.43041176746147, + "grad_norm": 0.5964365080864651, + "learning_rate": 4.563585342027322e-06, + "loss": 0.2725, + "step": 9188 + }, + { + "epoch": 0.4304586124513983, + "grad_norm": 0.6362601018493415, + "learning_rate": 4.563478278876777e-06, + "loss": 0.2881, + "step": 9189 + }, + { + "epoch": 0.4305054574413267, + "grad_norm": 0.6420016504147313, + "learning_rate": 4.563371203851434e-06, + "loss": 0.3121, + "step": 9190 + }, + { + "epoch": 0.430552302431255, + "grad_norm": 0.5662327339615825, + "learning_rate": 4.563264116951907e-06, + "loss": 0.2675, + "step": 9191 + }, + { + "epoch": 0.4305991474211833, + "grad_norm": 0.6164796110038906, + "learning_rate": 4.563157018178816e-06, + "loss": 0.2855, + "step": 9192 + }, + { + "epoch": 0.4306459924111116, + "grad_norm": 0.5937940587976083, + "learning_rate": 4.563049907532773e-06, + "loss": 0.2717, + "step": 9193 + }, + { + "epoch": 0.43069283740104, + "grad_norm": 0.5911836115694756, + "learning_rate": 4.562942785014397e-06, + "loss": 0.2896, + "step": 9194 + }, + { + "epoch": 0.43073968239096827, + "grad_norm": 0.5903638100844543, + "learning_rate": 4.562835650624303e-06, + "loss": 0.2896, + "step": 9195 + }, + { + "epoch": 0.4307865273808966, + "grad_norm": 0.6052866293535533, + "learning_rate": 4.56272850436311e-06, + "loss": 0.2808, + "step": 9196 + }, + { + "epoch": 0.4308333723708249, + "grad_norm": 0.5872401478646371, + "learning_rate": 4.562621346231431e-06, + "loss": 0.2899, + "step": 9197 + }, + { + "epoch": 0.43088021736075327, + "grad_norm": 0.6222059981216791, + "learning_rate": 4.562514176229886e-06, + "loss": 0.3193, + "step": 9198 + }, + { + "epoch": 0.4309270623506816, + "grad_norm": 0.5879590997808111, + "learning_rate": 4.562406994359089e-06, + "loss": 0.2692, + "step": 9199 + }, + { + "epoch": 0.4309739073406099, + "grad_norm": 0.6190107733902995, + "learning_rate": 4.562299800619659e-06, + "loss": 0.3086, + "step": 9200 + }, + { + "epoch": 0.43102075233053827, + "grad_norm": 0.5844366367212136, + "learning_rate": 4.562192595012212e-06, + "loss": 0.2975, + "step": 9201 + }, + { + "epoch": 0.43106759732046657, + "grad_norm": 0.5998363136741206, + "learning_rate": 4.562085377537365e-06, + "loss": 0.284, + "step": 9202 + }, + { + "epoch": 0.4311144423103949, + "grad_norm": 0.5677078766207213, + "learning_rate": 4.5619781481957345e-06, + "loss": 0.2872, + "step": 9203 + }, + { + "epoch": 0.4311612873003232, + "grad_norm": 0.6029494585271123, + "learning_rate": 4.561870906987938e-06, + "loss": 0.2962, + "step": 9204 + }, + { + "epoch": 0.43120813229025157, + "grad_norm": 0.6490853116926083, + "learning_rate": 4.5617636539145926e-06, + "loss": 0.3011, + "step": 9205 + }, + { + "epoch": 0.43125497728017986, + "grad_norm": 0.6427580641952997, + "learning_rate": 4.561656388976316e-06, + "loss": 0.2911, + "step": 9206 + }, + { + "epoch": 0.4313018222701082, + "grad_norm": 0.6131946827233713, + "learning_rate": 4.5615491121737245e-06, + "loss": 0.2854, + "step": 9207 + }, + { + "epoch": 0.43134866726003657, + "grad_norm": 0.6331252612937815, + "learning_rate": 4.561441823507436e-06, + "loss": 0.2935, + "step": 9208 + }, + { + "epoch": 0.43139551224996486, + "grad_norm": 0.6100237373324523, + "learning_rate": 4.561334522978069e-06, + "loss": 0.276, + "step": 9209 + }, + { + "epoch": 0.4314423572398932, + "grad_norm": 0.5760851891790276, + "learning_rate": 4.561227210586239e-06, + "loss": 0.2886, + "step": 9210 + }, + { + "epoch": 0.4314892022298215, + "grad_norm": 0.5656323111213498, + "learning_rate": 4.561119886332565e-06, + "loss": 0.2819, + "step": 9211 + }, + { + "epoch": 0.43153604721974986, + "grad_norm": 0.6741528651323819, + "learning_rate": 4.561012550217664e-06, + "loss": 0.2751, + "step": 9212 + }, + { + "epoch": 0.43158289220967816, + "grad_norm": 0.5872214921011225, + "learning_rate": 4.560905202242153e-06, + "loss": 0.2773, + "step": 9213 + }, + { + "epoch": 0.4316297371996065, + "grad_norm": 0.5690753064432492, + "learning_rate": 4.560797842406651e-06, + "loss": 0.2674, + "step": 9214 + }, + { + "epoch": 0.4316765821895348, + "grad_norm": 0.6092781584536318, + "learning_rate": 4.560690470711776e-06, + "loss": 0.282, + "step": 9215 + }, + { + "epoch": 0.43172342717946316, + "grad_norm": 0.6529556414995478, + "learning_rate": 4.560583087158145e-06, + "loss": 0.3172, + "step": 9216 + }, + { + "epoch": 0.4317702721693915, + "grad_norm": 0.568180393235941, + "learning_rate": 4.5604756917463764e-06, + "loss": 0.2644, + "step": 9217 + }, + { + "epoch": 0.4318171171593198, + "grad_norm": 0.5812924652171365, + "learning_rate": 4.560368284477087e-06, + "loss": 0.2799, + "step": 9218 + }, + { + "epoch": 0.43186396214924816, + "grad_norm": 0.5949432741750247, + "learning_rate": 4.560260865350897e-06, + "loss": 0.2845, + "step": 9219 + }, + { + "epoch": 0.43191080713917646, + "grad_norm": 0.6376842740169248, + "learning_rate": 4.560153434368423e-06, + "loss": 0.3021, + "step": 9220 + }, + { + "epoch": 0.4319576521291048, + "grad_norm": 0.6092317661924138, + "learning_rate": 4.560045991530284e-06, + "loss": 0.3088, + "step": 9221 + }, + { + "epoch": 0.4320044971190331, + "grad_norm": 0.6075594167011112, + "learning_rate": 4.559938536837098e-06, + "loss": 0.2781, + "step": 9222 + }, + { + "epoch": 0.43205134210896146, + "grad_norm": 0.6339225861075876, + "learning_rate": 4.559831070289484e-06, + "loss": 0.2878, + "step": 9223 + }, + { + "epoch": 0.43209818709888975, + "grad_norm": 0.5736494909157599, + "learning_rate": 4.559723591888059e-06, + "loss": 0.2788, + "step": 9224 + }, + { + "epoch": 0.4321450320888181, + "grad_norm": 0.636599121564416, + "learning_rate": 4.559616101633443e-06, + "loss": 0.2994, + "step": 9225 + }, + { + "epoch": 0.4321918770787464, + "grad_norm": 0.6093227293298564, + "learning_rate": 4.5595085995262545e-06, + "loss": 0.2843, + "step": 9226 + }, + { + "epoch": 0.43223872206867475, + "grad_norm": 0.6365111667398141, + "learning_rate": 4.559401085567111e-06, + "loss": 0.3058, + "step": 9227 + }, + { + "epoch": 0.4322855670586031, + "grad_norm": 0.6307579623095745, + "learning_rate": 4.559293559756631e-06, + "loss": 0.2849, + "step": 9228 + }, + { + "epoch": 0.4323324120485314, + "grad_norm": 0.620501956615436, + "learning_rate": 4.559186022095435e-06, + "loss": 0.302, + "step": 9229 + }, + { + "epoch": 0.43237925703845975, + "grad_norm": 0.5807742655887319, + "learning_rate": 4.559078472584142e-06, + "loss": 0.2905, + "step": 9230 + }, + { + "epoch": 0.43242610202838805, + "grad_norm": 0.6004842904157849, + "learning_rate": 4.5589709112233684e-06, + "loss": 0.2937, + "step": 9231 + }, + { + "epoch": 0.4324729470183164, + "grad_norm": 0.5534082272222637, + "learning_rate": 4.558863338013736e-06, + "loss": 0.2777, + "step": 9232 + }, + { + "epoch": 0.4325197920082447, + "grad_norm": 0.6492004373970247, + "learning_rate": 4.558755752955861e-06, + "loss": 0.303, + "step": 9233 + }, + { + "epoch": 0.43256663699817305, + "grad_norm": 0.5690554155268348, + "learning_rate": 4.558648156050365e-06, + "loss": 0.2782, + "step": 9234 + }, + { + "epoch": 0.43261348198810134, + "grad_norm": 0.5537859370852455, + "learning_rate": 4.558540547297867e-06, + "loss": 0.2683, + "step": 9235 + }, + { + "epoch": 0.4326603269780297, + "grad_norm": 0.7006493234925909, + "learning_rate": 4.558432926698985e-06, + "loss": 0.3338, + "step": 9236 + }, + { + "epoch": 0.43270717196795805, + "grad_norm": 0.5694862696646684, + "learning_rate": 4.558325294254338e-06, + "loss": 0.2786, + "step": 9237 + }, + { + "epoch": 0.43275401695788634, + "grad_norm": 0.5931291443265352, + "learning_rate": 4.558217649964547e-06, + "loss": 0.2951, + "step": 9238 + }, + { + "epoch": 0.4328008619478147, + "grad_norm": 0.5843770903573181, + "learning_rate": 4.558109993830231e-06, + "loss": 0.2805, + "step": 9239 + }, + { + "epoch": 0.432847706937743, + "grad_norm": 0.6108289801038492, + "learning_rate": 4.558002325852009e-06, + "loss": 0.3135, + "step": 9240 + }, + { + "epoch": 0.43289455192767134, + "grad_norm": 0.6499008398905805, + "learning_rate": 4.557894646030501e-06, + "loss": 0.301, + "step": 9241 + }, + { + "epoch": 0.43294139691759964, + "grad_norm": 0.6090959344338257, + "learning_rate": 4.557786954366325e-06, + "loss": 0.2899, + "step": 9242 + }, + { + "epoch": 0.432988241907528, + "grad_norm": 0.6082589869104642, + "learning_rate": 4.557679250860104e-06, + "loss": 0.2952, + "step": 9243 + }, + { + "epoch": 0.4330350868974563, + "grad_norm": 0.5871577349544592, + "learning_rate": 4.557571535512456e-06, + "loss": 0.267, + "step": 9244 + }, + { + "epoch": 0.43308193188738464, + "grad_norm": 0.6081669372177206, + "learning_rate": 4.557463808324001e-06, + "loss": 0.2815, + "step": 9245 + }, + { + "epoch": 0.433128776877313, + "grad_norm": 0.5361610551668403, + "learning_rate": 4.557356069295359e-06, + "loss": 0.2571, + "step": 9246 + }, + { + "epoch": 0.4331756218672413, + "grad_norm": 0.6278185679548549, + "learning_rate": 4.557248318427149e-06, + "loss": 0.2872, + "step": 9247 + }, + { + "epoch": 0.43322246685716964, + "grad_norm": 0.6684552030741275, + "learning_rate": 4.557140555719992e-06, + "loss": 0.3024, + "step": 9248 + }, + { + "epoch": 0.43326931184709794, + "grad_norm": 0.5809345518279103, + "learning_rate": 4.55703278117451e-06, + "loss": 0.2834, + "step": 9249 + }, + { + "epoch": 0.4333161568370263, + "grad_norm": 0.6062387328126018, + "learning_rate": 4.556924994791319e-06, + "loss": 0.278, + "step": 9250 + }, + { + "epoch": 0.4333630018269546, + "grad_norm": 0.6740213150167043, + "learning_rate": 4.556817196571043e-06, + "loss": 0.3072, + "step": 9251 + }, + { + "epoch": 0.43340984681688294, + "grad_norm": 0.6256322953005489, + "learning_rate": 4.5567093865143005e-06, + "loss": 0.2984, + "step": 9252 + }, + { + "epoch": 0.43345669180681123, + "grad_norm": 0.5973129134323689, + "learning_rate": 4.556601564621713e-06, + "loss": 0.2935, + "step": 9253 + }, + { + "epoch": 0.4335035367967396, + "grad_norm": 0.5883356787103908, + "learning_rate": 4.5564937308938995e-06, + "loss": 0.2678, + "step": 9254 + }, + { + "epoch": 0.43355038178666794, + "grad_norm": 0.5951792272985706, + "learning_rate": 4.556385885331483e-06, + "loss": 0.292, + "step": 9255 + }, + { + "epoch": 0.43359722677659623, + "grad_norm": 0.5970526198417524, + "learning_rate": 4.55627802793508e-06, + "loss": 0.2869, + "step": 9256 + }, + { + "epoch": 0.4336440717665246, + "grad_norm": 0.6475257989864979, + "learning_rate": 4.556170158705316e-06, + "loss": 0.2915, + "step": 9257 + }, + { + "epoch": 0.4336909167564529, + "grad_norm": 0.5529184568734667, + "learning_rate": 4.556062277642809e-06, + "loss": 0.2892, + "step": 9258 + }, + { + "epoch": 0.43373776174638123, + "grad_norm": 0.5950972151243014, + "learning_rate": 4.555954384748181e-06, + "loss": 0.2878, + "step": 9259 + }, + { + "epoch": 0.43378460673630953, + "grad_norm": 0.5894770629373538, + "learning_rate": 4.5558464800220505e-06, + "loss": 0.2899, + "step": 9260 + }, + { + "epoch": 0.4338314517262379, + "grad_norm": 0.6082350858272866, + "learning_rate": 4.555738563465041e-06, + "loss": 0.2966, + "step": 9261 + }, + { + "epoch": 0.4338782967161662, + "grad_norm": 0.6437725475131386, + "learning_rate": 4.555630635077773e-06, + "loss": 0.3047, + "step": 9262 + }, + { + "epoch": 0.43392514170609453, + "grad_norm": 0.5675291744276737, + "learning_rate": 4.555522694860867e-06, + "loss": 0.2956, + "step": 9263 + }, + { + "epoch": 0.4339719866960229, + "grad_norm": 0.5557789435134339, + "learning_rate": 4.555414742814946e-06, + "loss": 0.2989, + "step": 9264 + }, + { + "epoch": 0.4340188316859512, + "grad_norm": 0.8586038308122453, + "learning_rate": 4.555306778940628e-06, + "loss": 0.2993, + "step": 9265 + }, + { + "epoch": 0.43406567667587953, + "grad_norm": 0.6030498466670531, + "learning_rate": 4.555198803238536e-06, + "loss": 0.3, + "step": 9266 + }, + { + "epoch": 0.4341125216658078, + "grad_norm": 0.6127660490540042, + "learning_rate": 4.555090815709292e-06, + "loss": 0.2804, + "step": 9267 + }, + { + "epoch": 0.4341593666557362, + "grad_norm": 0.673077208153458, + "learning_rate": 4.554982816353518e-06, + "loss": 0.3017, + "step": 9268 + }, + { + "epoch": 0.4342062116456645, + "grad_norm": 0.6242771971625818, + "learning_rate": 4.554874805171833e-06, + "loss": 0.2933, + "step": 9269 + }, + { + "epoch": 0.4342530566355928, + "grad_norm": 0.5859005904344182, + "learning_rate": 4.55476678216486e-06, + "loss": 0.3035, + "step": 9270 + }, + { + "epoch": 0.4342999016255211, + "grad_norm": 0.5923794593246604, + "learning_rate": 4.554658747333221e-06, + "loss": 0.3091, + "step": 9271 + }, + { + "epoch": 0.4343467466154495, + "grad_norm": 0.5954765184474548, + "learning_rate": 4.554550700677537e-06, + "loss": 0.29, + "step": 9272 + }, + { + "epoch": 0.4343935916053778, + "grad_norm": 0.6043969384143315, + "learning_rate": 4.554442642198431e-06, + "loss": 0.3049, + "step": 9273 + }, + { + "epoch": 0.4344404365953061, + "grad_norm": 0.6064564296186133, + "learning_rate": 4.554334571896523e-06, + "loss": 0.2759, + "step": 9274 + }, + { + "epoch": 0.4344872815852345, + "grad_norm": 0.6315395431233468, + "learning_rate": 4.554226489772437e-06, + "loss": 0.3004, + "step": 9275 + }, + { + "epoch": 0.43453412657516277, + "grad_norm": 0.5740497063133485, + "learning_rate": 4.554118395826792e-06, + "loss": 0.2713, + "step": 9276 + }, + { + "epoch": 0.4345809715650911, + "grad_norm": 0.6314356933223013, + "learning_rate": 4.554010290060214e-06, + "loss": 0.2861, + "step": 9277 + }, + { + "epoch": 0.4346278165550194, + "grad_norm": 0.5974368714820999, + "learning_rate": 4.5539021724733216e-06, + "loss": 0.3007, + "step": 9278 + }, + { + "epoch": 0.43467466154494777, + "grad_norm": 0.5512338042552772, + "learning_rate": 4.553794043066739e-06, + "loss": 0.2766, + "step": 9279 + }, + { + "epoch": 0.43472150653487607, + "grad_norm": 0.5784642631730783, + "learning_rate": 4.553685901841087e-06, + "loss": 0.2823, + "step": 9280 + }, + { + "epoch": 0.4347683515248044, + "grad_norm": 0.6550321082740598, + "learning_rate": 4.55357774879699e-06, + "loss": 0.2936, + "step": 9281 + }, + { + "epoch": 0.43481519651473277, + "grad_norm": 0.591304304645967, + "learning_rate": 4.553469583935069e-06, + "loss": 0.2676, + "step": 9282 + }, + { + "epoch": 0.43486204150466107, + "grad_norm": 0.5956002034593325, + "learning_rate": 4.553361407255946e-06, + "loss": 0.2822, + "step": 9283 + }, + { + "epoch": 0.4349088864945894, + "grad_norm": 0.5987313765931175, + "learning_rate": 4.5532532187602455e-06, + "loss": 0.288, + "step": 9284 + }, + { + "epoch": 0.4349557314845177, + "grad_norm": 0.5323302792993754, + "learning_rate": 4.553145018448587e-06, + "loss": 0.2712, + "step": 9285 + }, + { + "epoch": 0.43500257647444607, + "grad_norm": 0.6041709703167041, + "learning_rate": 4.5530368063215966e-06, + "loss": 0.2715, + "step": 9286 + }, + { + "epoch": 0.43504942146437436, + "grad_norm": 0.6533177496971195, + "learning_rate": 4.552928582379894e-06, + "loss": 0.2869, + "step": 9287 + }, + { + "epoch": 0.4350962664543027, + "grad_norm": 0.6571358969625524, + "learning_rate": 4.552820346624105e-06, + "loss": 0.2891, + "step": 9288 + }, + { + "epoch": 0.435143111444231, + "grad_norm": 0.6116501244053085, + "learning_rate": 4.552712099054849e-06, + "loss": 0.3008, + "step": 9289 + }, + { + "epoch": 0.43518995643415936, + "grad_norm": 0.5989992672378389, + "learning_rate": 4.552603839672752e-06, + "loss": 0.2935, + "step": 9290 + }, + { + "epoch": 0.4352368014240877, + "grad_norm": 0.5946146874709934, + "learning_rate": 4.552495568478436e-06, + "loss": 0.2998, + "step": 9291 + }, + { + "epoch": 0.435283646414016, + "grad_norm": 0.5452235077188149, + "learning_rate": 4.552387285472523e-06, + "loss": 0.2769, + "step": 9292 + }, + { + "epoch": 0.43533049140394436, + "grad_norm": 0.6440149322602404, + "learning_rate": 4.552278990655637e-06, + "loss": 0.2998, + "step": 9293 + }, + { + "epoch": 0.43537733639387266, + "grad_norm": 0.6510307321980953, + "learning_rate": 4.552170684028402e-06, + "loss": 0.309, + "step": 9294 + }, + { + "epoch": 0.435424181383801, + "grad_norm": 0.5887957683399998, + "learning_rate": 4.5520623655914405e-06, + "loss": 0.2807, + "step": 9295 + }, + { + "epoch": 0.4354710263737293, + "grad_norm": 0.6115104573465706, + "learning_rate": 4.551954035345375e-06, + "loss": 0.3001, + "step": 9296 + }, + { + "epoch": 0.43551787136365766, + "grad_norm": 0.5988086133590937, + "learning_rate": 4.55184569329083e-06, + "loss": 0.2935, + "step": 9297 + }, + { + "epoch": 0.43556471635358596, + "grad_norm": 0.6590892153151622, + "learning_rate": 4.551737339428429e-06, + "loss": 0.3083, + "step": 9298 + }, + { + "epoch": 0.4356115613435143, + "grad_norm": 0.5186327239948626, + "learning_rate": 4.551628973758796e-06, + "loss": 0.2669, + "step": 9299 + }, + { + "epoch": 0.43565840633344266, + "grad_norm": 0.6322664069196791, + "learning_rate": 4.551520596282553e-06, + "loss": 0.2832, + "step": 9300 + }, + { + "epoch": 0.43570525132337096, + "grad_norm": 0.6468519843624938, + "learning_rate": 4.551412207000325e-06, + "loss": 0.2984, + "step": 9301 + }, + { + "epoch": 0.4357520963132993, + "grad_norm": 0.5718654829363622, + "learning_rate": 4.551303805912735e-06, + "loss": 0.2846, + "step": 9302 + }, + { + "epoch": 0.4357989413032276, + "grad_norm": 0.6611890489328248, + "learning_rate": 4.551195393020408e-06, + "loss": 0.3026, + "step": 9303 + }, + { + "epoch": 0.43584578629315596, + "grad_norm": 0.5649501910411417, + "learning_rate": 4.551086968323965e-06, + "loss": 0.2967, + "step": 9304 + }, + { + "epoch": 0.43589263128308425, + "grad_norm": 0.5945763987062138, + "learning_rate": 4.550978531824035e-06, + "loss": 0.284, + "step": 9305 + }, + { + "epoch": 0.4359394762730126, + "grad_norm": 0.5952970648128799, + "learning_rate": 4.550870083521237e-06, + "loss": 0.2925, + "step": 9306 + }, + { + "epoch": 0.4359863212629409, + "grad_norm": 0.5893196839346273, + "learning_rate": 4.550761623416198e-06, + "loss": 0.2814, + "step": 9307 + }, + { + "epoch": 0.43603316625286925, + "grad_norm": 0.6023594033498152, + "learning_rate": 4.550653151509541e-06, + "loss": 0.3024, + "step": 9308 + }, + { + "epoch": 0.4360800112427976, + "grad_norm": 0.6642143075221709, + "learning_rate": 4.55054466780189e-06, + "loss": 0.2783, + "step": 9309 + }, + { + "epoch": 0.4361268562327259, + "grad_norm": 0.655373834985085, + "learning_rate": 4.5504361722938705e-06, + "loss": 0.298, + "step": 9310 + }, + { + "epoch": 0.43617370122265425, + "grad_norm": 0.5950677154727367, + "learning_rate": 4.5503276649861054e-06, + "loss": 0.2844, + "step": 9311 + }, + { + "epoch": 0.43622054621258255, + "grad_norm": 0.6153983129442624, + "learning_rate": 4.5502191458792214e-06, + "loss": 0.3094, + "step": 9312 + }, + { + "epoch": 0.4362673912025109, + "grad_norm": 0.5849519355413463, + "learning_rate": 4.550110614973839e-06, + "loss": 0.2734, + "step": 9313 + }, + { + "epoch": 0.4363142361924392, + "grad_norm": 0.5639569936748935, + "learning_rate": 4.550002072270588e-06, + "loss": 0.288, + "step": 9314 + }, + { + "epoch": 0.43636108118236755, + "grad_norm": 0.5911375627244932, + "learning_rate": 4.549893517770089e-06, + "loss": 0.2892, + "step": 9315 + }, + { + "epoch": 0.43640792617229585, + "grad_norm": 0.6219708965150834, + "learning_rate": 4.549784951472968e-06, + "loss": 0.3062, + "step": 9316 + }, + { + "epoch": 0.4364547711622242, + "grad_norm": 0.6084928467610956, + "learning_rate": 4.549676373379851e-06, + "loss": 0.2754, + "step": 9317 + }, + { + "epoch": 0.43650161615215255, + "grad_norm": 0.5908457244420238, + "learning_rate": 4.54956778349136e-06, + "loss": 0.2841, + "step": 9318 + }, + { + "epoch": 0.43654846114208085, + "grad_norm": 0.607841097525629, + "learning_rate": 4.549459181808122e-06, + "loss": 0.291, + "step": 9319 + }, + { + "epoch": 0.4365953061320092, + "grad_norm": 0.5967720672365016, + "learning_rate": 4.5493505683307616e-06, + "loss": 0.2836, + "step": 9320 + }, + { + "epoch": 0.4366421511219375, + "grad_norm": 0.6067353742345539, + "learning_rate": 4.5492419430599045e-06, + "loss": 0.2966, + "step": 9321 + }, + { + "epoch": 0.43668899611186585, + "grad_norm": 0.5615832132422391, + "learning_rate": 4.549133305996174e-06, + "loss": 0.2844, + "step": 9322 + }, + { + "epoch": 0.43673584110179414, + "grad_norm": 0.6205688404828391, + "learning_rate": 4.549024657140197e-06, + "loss": 0.281, + "step": 9323 + }, + { + "epoch": 0.4367826860917225, + "grad_norm": 0.5737330791930265, + "learning_rate": 4.548915996492598e-06, + "loss": 0.2768, + "step": 9324 + }, + { + "epoch": 0.4368295310816508, + "grad_norm": 0.6723713007436397, + "learning_rate": 4.548807324054002e-06, + "loss": 0.2839, + "step": 9325 + }, + { + "epoch": 0.43687637607157914, + "grad_norm": 0.6633046536355478, + "learning_rate": 4.5486986398250355e-06, + "loss": 0.2928, + "step": 9326 + }, + { + "epoch": 0.4369232210615075, + "grad_norm": 0.6161866649201698, + "learning_rate": 4.548589943806323e-06, + "loss": 0.274, + "step": 9327 + }, + { + "epoch": 0.4369700660514358, + "grad_norm": 0.5992411715414568, + "learning_rate": 4.54848123599849e-06, + "loss": 0.2888, + "step": 9328 + }, + { + "epoch": 0.43701691104136414, + "grad_norm": 0.6008641040820683, + "learning_rate": 4.548372516402162e-06, + "loss": 0.2886, + "step": 9329 + }, + { + "epoch": 0.43706375603129244, + "grad_norm": 0.6481602970219275, + "learning_rate": 4.548263785017966e-06, + "loss": 0.3031, + "step": 9330 + }, + { + "epoch": 0.4371106010212208, + "grad_norm": 0.5998066992398418, + "learning_rate": 4.548155041846526e-06, + "loss": 0.3069, + "step": 9331 + }, + { + "epoch": 0.4371574460111491, + "grad_norm": 0.6079023584631082, + "learning_rate": 4.548046286888469e-06, + "loss": 0.2862, + "step": 9332 + }, + { + "epoch": 0.43720429100107744, + "grad_norm": 0.6676416563907838, + "learning_rate": 4.54793752014442e-06, + "loss": 0.2901, + "step": 9333 + }, + { + "epoch": 0.43725113599100573, + "grad_norm": 0.570366345248749, + "learning_rate": 4.547828741615006e-06, + "loss": 0.2706, + "step": 9334 + }, + { + "epoch": 0.4372979809809341, + "grad_norm": 0.6353108203703303, + "learning_rate": 4.5477199513008514e-06, + "loss": 0.3102, + "step": 9335 + }, + { + "epoch": 0.43734482597086244, + "grad_norm": 0.6399103617202018, + "learning_rate": 4.547611149202583e-06, + "loss": 0.3085, + "step": 9336 + }, + { + "epoch": 0.43739167096079073, + "grad_norm": 0.63802080408167, + "learning_rate": 4.547502335320828e-06, + "loss": 0.2935, + "step": 9337 + }, + { + "epoch": 0.4374385159507191, + "grad_norm": 0.5816606387984045, + "learning_rate": 4.547393509656212e-06, + "loss": 0.2774, + "step": 9338 + }, + { + "epoch": 0.4374853609406474, + "grad_norm": 0.6288465160042661, + "learning_rate": 4.5472846722093585e-06, + "loss": 0.2908, + "step": 9339 + }, + { + "epoch": 0.43753220593057573, + "grad_norm": 0.6568364271511951, + "learning_rate": 4.547175822980899e-06, + "loss": 0.316, + "step": 9340 + }, + { + "epoch": 0.43757905092050403, + "grad_norm": 0.5840901406095407, + "learning_rate": 4.547066961971456e-06, + "loss": 0.2712, + "step": 9341 + }, + { + "epoch": 0.4376258959104324, + "grad_norm": 0.610632718178045, + "learning_rate": 4.546958089181657e-06, + "loss": 0.2964, + "step": 9342 + }, + { + "epoch": 0.4376727409003607, + "grad_norm": 0.6329038972175036, + "learning_rate": 4.546849204612129e-06, + "loss": 0.3042, + "step": 9343 + }, + { + "epoch": 0.43771958589028903, + "grad_norm": 0.6019392125641141, + "learning_rate": 4.5467403082634985e-06, + "loss": 0.3018, + "step": 9344 + }, + { + "epoch": 0.4377664308802174, + "grad_norm": 0.6817532690941581, + "learning_rate": 4.546631400136392e-06, + "loss": 0.3107, + "step": 9345 + }, + { + "epoch": 0.4378132758701457, + "grad_norm": 0.6395607001939451, + "learning_rate": 4.5465224802314355e-06, + "loss": 0.3122, + "step": 9346 + }, + { + "epoch": 0.43786012086007403, + "grad_norm": 0.6414116164521196, + "learning_rate": 4.5464135485492575e-06, + "loss": 0.2912, + "step": 9347 + }, + { + "epoch": 0.4379069658500023, + "grad_norm": 0.6156375602594851, + "learning_rate": 4.546304605090482e-06, + "loss": 0.293, + "step": 9348 + }, + { + "epoch": 0.4379538108399307, + "grad_norm": 0.5405944726190204, + "learning_rate": 4.54619564985574e-06, + "loss": 0.2608, + "step": 9349 + }, + { + "epoch": 0.438000655829859, + "grad_norm": 0.6378245395460393, + "learning_rate": 4.546086682845655e-06, + "loss": 0.2923, + "step": 9350 + }, + { + "epoch": 0.4380475008197873, + "grad_norm": 0.5750459364473008, + "learning_rate": 4.5459777040608565e-06, + "loss": 0.2787, + "step": 9351 + }, + { + "epoch": 0.4380943458097156, + "grad_norm": 0.6569439370597667, + "learning_rate": 4.54586871350197e-06, + "loss": 0.2923, + "step": 9352 + }, + { + "epoch": 0.438141190799644, + "grad_norm": 0.6063248580639987, + "learning_rate": 4.545759711169622e-06, + "loss": 0.2858, + "step": 9353 + }, + { + "epoch": 0.4381880357895723, + "grad_norm": 0.6055886773478619, + "learning_rate": 4.545650697064443e-06, + "loss": 0.299, + "step": 9354 + }, + { + "epoch": 0.4382348807795006, + "grad_norm": 0.5587208439624445, + "learning_rate": 4.545541671187058e-06, + "loss": 0.2693, + "step": 9355 + }, + { + "epoch": 0.438281725769429, + "grad_norm": 0.5732639251224061, + "learning_rate": 4.545432633538094e-06, + "loss": 0.3045, + "step": 9356 + }, + { + "epoch": 0.43832857075935727, + "grad_norm": 0.6060518338751361, + "learning_rate": 4.54532358411818e-06, + "loss": 0.2912, + "step": 9357 + }, + { + "epoch": 0.4383754157492856, + "grad_norm": 0.6391594612640443, + "learning_rate": 4.5452145229279425e-06, + "loss": 0.3127, + "step": 9358 + }, + { + "epoch": 0.4384222607392139, + "grad_norm": 0.6667356325617008, + "learning_rate": 4.545105449968009e-06, + "loss": 0.2965, + "step": 9359 + }, + { + "epoch": 0.43846910572914227, + "grad_norm": 0.5936707385325988, + "learning_rate": 4.544996365239008e-06, + "loss": 0.2735, + "step": 9360 + }, + { + "epoch": 0.43851595071907057, + "grad_norm": 0.6466208206593598, + "learning_rate": 4.5448872687415675e-06, + "loss": 0.2998, + "step": 9361 + }, + { + "epoch": 0.4385627957089989, + "grad_norm": 0.5677234113958435, + "learning_rate": 4.544778160476314e-06, + "loss": 0.2945, + "step": 9362 + }, + { + "epoch": 0.43860964069892727, + "grad_norm": 0.6654148678067348, + "learning_rate": 4.544669040443877e-06, + "loss": 0.3136, + "step": 9363 + }, + { + "epoch": 0.43865648568885557, + "grad_norm": 0.5884446850313448, + "learning_rate": 4.544559908644883e-06, + "loss": 0.2791, + "step": 9364 + }, + { + "epoch": 0.4387033306787839, + "grad_norm": 0.6246548321268984, + "learning_rate": 4.54445076507996e-06, + "loss": 0.3073, + "step": 9365 + }, + { + "epoch": 0.4387501756687122, + "grad_norm": 0.552985047261081, + "learning_rate": 4.544341609749737e-06, + "loss": 0.2722, + "step": 9366 + }, + { + "epoch": 0.43879702065864057, + "grad_norm": 0.6236673141960866, + "learning_rate": 4.544232442654842e-06, + "loss": 0.2905, + "step": 9367 + }, + { + "epoch": 0.43884386564856886, + "grad_norm": 0.5905899199039967, + "learning_rate": 4.544123263795904e-06, + "loss": 0.273, + "step": 9368 + }, + { + "epoch": 0.4388907106384972, + "grad_norm": 0.5781935717953592, + "learning_rate": 4.544014073173549e-06, + "loss": 0.2736, + "step": 9369 + }, + { + "epoch": 0.4389375556284255, + "grad_norm": 0.5657403255817173, + "learning_rate": 4.543904870788408e-06, + "loss": 0.2805, + "step": 9370 + }, + { + "epoch": 0.43898440061835386, + "grad_norm": 0.7006803123957143, + "learning_rate": 4.543795656641108e-06, + "loss": 0.3171, + "step": 9371 + }, + { + "epoch": 0.4390312456082822, + "grad_norm": 0.5958391110548665, + "learning_rate": 4.543686430732277e-06, + "loss": 0.2907, + "step": 9372 + }, + { + "epoch": 0.4390780905982105, + "grad_norm": 0.5641766973069423, + "learning_rate": 4.543577193062545e-06, + "loss": 0.2988, + "step": 9373 + }, + { + "epoch": 0.43912493558813886, + "grad_norm": 0.6195868459641068, + "learning_rate": 4.543467943632539e-06, + "loss": 0.2889, + "step": 9374 + }, + { + "epoch": 0.43917178057806716, + "grad_norm": 0.5920223153475089, + "learning_rate": 4.543358682442889e-06, + "loss": 0.2934, + "step": 9375 + }, + { + "epoch": 0.4392186255679955, + "grad_norm": 0.568949782333206, + "learning_rate": 4.543249409494223e-06, + "loss": 0.2796, + "step": 9376 + }, + { + "epoch": 0.4392654705579238, + "grad_norm": 0.5659095495174649, + "learning_rate": 4.543140124787171e-06, + "loss": 0.2816, + "step": 9377 + }, + { + "epoch": 0.43931231554785216, + "grad_norm": 0.6144303408180588, + "learning_rate": 4.54303082832236e-06, + "loss": 0.3147, + "step": 9378 + }, + { + "epoch": 0.43935916053778046, + "grad_norm": 0.5751053111743802, + "learning_rate": 4.542921520100421e-06, + "loss": 0.2721, + "step": 9379 + }, + { + "epoch": 0.4394060055277088, + "grad_norm": 0.63077962577165, + "learning_rate": 4.542812200121982e-06, + "loss": 0.2805, + "step": 9380 + }, + { + "epoch": 0.43945285051763716, + "grad_norm": 0.6526880135455067, + "learning_rate": 4.542702868387672e-06, + "loss": 0.293, + "step": 9381 + }, + { + "epoch": 0.43949969550756546, + "grad_norm": 0.6228816357829994, + "learning_rate": 4.542593524898121e-06, + "loss": 0.303, + "step": 9382 + }, + { + "epoch": 0.4395465404974938, + "grad_norm": 0.6020153237534078, + "learning_rate": 4.542484169653957e-06, + "loss": 0.3, + "step": 9383 + }, + { + "epoch": 0.4395933854874221, + "grad_norm": 0.5766608676996181, + "learning_rate": 4.54237480265581e-06, + "loss": 0.2886, + "step": 9384 + }, + { + "epoch": 0.43964023047735046, + "grad_norm": 0.563771498716549, + "learning_rate": 4.5422654239043095e-06, + "loss": 0.2802, + "step": 9385 + }, + { + "epoch": 0.43968707546727875, + "grad_norm": 0.5777016966399956, + "learning_rate": 4.5421560334000845e-06, + "loss": 0.2794, + "step": 9386 + }, + { + "epoch": 0.4397339204572071, + "grad_norm": 0.6168225766754095, + "learning_rate": 4.542046631143765e-06, + "loss": 0.2879, + "step": 9387 + }, + { + "epoch": 0.4397807654471354, + "grad_norm": 0.6267045593837297, + "learning_rate": 4.5419372171359804e-06, + "loss": 0.2977, + "step": 9388 + }, + { + "epoch": 0.43982761043706375, + "grad_norm": 0.63393120330197, + "learning_rate": 4.541827791377361e-06, + "loss": 0.3053, + "step": 9389 + }, + { + "epoch": 0.4398744554269921, + "grad_norm": 0.6248613702943838, + "learning_rate": 4.541718353868535e-06, + "loss": 0.2916, + "step": 9390 + }, + { + "epoch": 0.4399213004169204, + "grad_norm": 0.6196515951408856, + "learning_rate": 4.541608904610133e-06, + "loss": 0.3092, + "step": 9391 + }, + { + "epoch": 0.43996814540684875, + "grad_norm": 0.6244071985510593, + "learning_rate": 4.541499443602785e-06, + "loss": 0.3012, + "step": 9392 + }, + { + "epoch": 0.44001499039677705, + "grad_norm": 0.6143733135008766, + "learning_rate": 4.541389970847121e-06, + "loss": 0.305, + "step": 9393 + }, + { + "epoch": 0.4400618353867054, + "grad_norm": 0.6194096404214705, + "learning_rate": 4.541280486343771e-06, + "loss": 0.3038, + "step": 9394 + }, + { + "epoch": 0.4401086803766337, + "grad_norm": 0.6122779260817552, + "learning_rate": 4.541170990093364e-06, + "loss": 0.295, + "step": 9395 + }, + { + "epoch": 0.44015552536656205, + "grad_norm": 0.6072873368677716, + "learning_rate": 4.541061482096532e-06, + "loss": 0.2948, + "step": 9396 + }, + { + "epoch": 0.44020237035649035, + "grad_norm": 0.6342008268817747, + "learning_rate": 4.540951962353904e-06, + "loss": 0.3018, + "step": 9397 + }, + { + "epoch": 0.4402492153464187, + "grad_norm": 0.6209685728536581, + "learning_rate": 4.5408424308661104e-06, + "loss": 0.3152, + "step": 9398 + }, + { + "epoch": 0.44029606033634705, + "grad_norm": 0.579612417688979, + "learning_rate": 4.5407328876337805e-06, + "loss": 0.2986, + "step": 9399 + }, + { + "epoch": 0.44034290532627535, + "grad_norm": 0.64404763026545, + "learning_rate": 4.540623332657547e-06, + "loss": 0.3018, + "step": 9400 + }, + { + "epoch": 0.4403897503162037, + "grad_norm": 0.5692647087773425, + "learning_rate": 4.54051376593804e-06, + "loss": 0.2951, + "step": 9401 + }, + { + "epoch": 0.440436595306132, + "grad_norm": 0.6020072559958531, + "learning_rate": 4.540404187475887e-06, + "loss": 0.2805, + "step": 9402 + }, + { + "epoch": 0.44048344029606035, + "grad_norm": 0.5761196930561395, + "learning_rate": 4.5402945972717216e-06, + "loss": 0.2597, + "step": 9403 + }, + { + "epoch": 0.44053028528598864, + "grad_norm": 0.5871863925634113, + "learning_rate": 4.540184995326174e-06, + "loss": 0.2873, + "step": 9404 + }, + { + "epoch": 0.440577130275917, + "grad_norm": 0.5870289901985517, + "learning_rate": 4.540075381639874e-06, + "loss": 0.2752, + "step": 9405 + }, + { + "epoch": 0.4406239752658453, + "grad_norm": 0.5842085942472465, + "learning_rate": 4.539965756213453e-06, + "loss": 0.2745, + "step": 9406 + }, + { + "epoch": 0.44067082025577364, + "grad_norm": 0.5957318580651969, + "learning_rate": 4.539856119047542e-06, + "loss": 0.2786, + "step": 9407 + }, + { + "epoch": 0.440717665245702, + "grad_norm": 0.5647034354115262, + "learning_rate": 4.539746470142772e-06, + "loss": 0.2923, + "step": 9408 + }, + { + "epoch": 0.4407645102356303, + "grad_norm": 0.595789586639055, + "learning_rate": 4.539636809499772e-06, + "loss": 0.3026, + "step": 9409 + }, + { + "epoch": 0.44081135522555864, + "grad_norm": 0.6147303024933349, + "learning_rate": 4.539527137119177e-06, + "loss": 0.3096, + "step": 9410 + }, + { + "epoch": 0.44085820021548694, + "grad_norm": 0.5997515252121832, + "learning_rate": 4.539417453001615e-06, + "loss": 0.3033, + "step": 9411 + }, + { + "epoch": 0.4409050452054153, + "grad_norm": 0.6402559903589173, + "learning_rate": 4.539307757147718e-06, + "loss": 0.2997, + "step": 9412 + }, + { + "epoch": 0.4409518901953436, + "grad_norm": 0.6026403401213944, + "learning_rate": 4.539198049558118e-06, + "loss": 0.2824, + "step": 9413 + }, + { + "epoch": 0.44099873518527194, + "grad_norm": 0.5592111433394948, + "learning_rate": 4.539088330233446e-06, + "loss": 0.2748, + "step": 9414 + }, + { + "epoch": 0.44104558017520024, + "grad_norm": 0.6123592703149627, + "learning_rate": 4.538978599174332e-06, + "loss": 0.3071, + "step": 9415 + }, + { + "epoch": 0.4410924251651286, + "grad_norm": 0.5769731264479584, + "learning_rate": 4.538868856381408e-06, + "loss": 0.2915, + "step": 9416 + }, + { + "epoch": 0.44113927015505694, + "grad_norm": 0.5689988172746797, + "learning_rate": 4.538759101855309e-06, + "loss": 0.2756, + "step": 9417 + }, + { + "epoch": 0.44118611514498524, + "grad_norm": 0.5980415253807763, + "learning_rate": 4.538649335596662e-06, + "loss": 0.2803, + "step": 9418 + }, + { + "epoch": 0.4412329601349136, + "grad_norm": 0.5795293088575483, + "learning_rate": 4.538539557606101e-06, + "loss": 0.2802, + "step": 9419 + }, + { + "epoch": 0.4412798051248419, + "grad_norm": 0.633970949037154, + "learning_rate": 4.5384297678842555e-06, + "loss": 0.2797, + "step": 9420 + }, + { + "epoch": 0.44132665011477024, + "grad_norm": 0.6486492247164224, + "learning_rate": 4.5383199664317615e-06, + "loss": 0.2856, + "step": 9421 + }, + { + "epoch": 0.44137349510469853, + "grad_norm": 0.6050645828290053, + "learning_rate": 4.538210153249247e-06, + "loss": 0.282, + "step": 9422 + }, + { + "epoch": 0.4414203400946269, + "grad_norm": 0.5601149712268889, + "learning_rate": 4.538100328337346e-06, + "loss": 0.2593, + "step": 9423 + }, + { + "epoch": 0.4414671850845552, + "grad_norm": 0.6621805676842736, + "learning_rate": 4.53799049169669e-06, + "loss": 0.2824, + "step": 9424 + }, + { + "epoch": 0.44151403007448353, + "grad_norm": 0.6644643905444123, + "learning_rate": 4.53788064332791e-06, + "loss": 0.3108, + "step": 9425 + }, + { + "epoch": 0.4415608750644119, + "grad_norm": 0.6714698526986808, + "learning_rate": 4.537770783231639e-06, + "loss": 0.3004, + "step": 9426 + }, + { + "epoch": 0.4416077200543402, + "grad_norm": 0.5884691289075693, + "learning_rate": 4.53766091140851e-06, + "loss": 0.2675, + "step": 9427 + }, + { + "epoch": 0.44165456504426853, + "grad_norm": 0.5525264311087901, + "learning_rate": 4.537551027859155e-06, + "loss": 0.2543, + "step": 9428 + }, + { + "epoch": 0.44170141003419683, + "grad_norm": 0.6147821324580715, + "learning_rate": 4.537441132584205e-06, + "loss": 0.2737, + "step": 9429 + }, + { + "epoch": 0.4417482550241252, + "grad_norm": 0.6130174784436152, + "learning_rate": 4.537331225584294e-06, + "loss": 0.285, + "step": 9430 + }, + { + "epoch": 0.4417951000140535, + "grad_norm": 0.6060181994227921, + "learning_rate": 4.537221306860053e-06, + "loss": 0.2872, + "step": 9431 + }, + { + "epoch": 0.44184194500398183, + "grad_norm": 0.6744733483131375, + "learning_rate": 4.537111376412117e-06, + "loss": 0.2935, + "step": 9432 + }, + { + "epoch": 0.4418887899939101, + "grad_norm": 0.6898774226935142, + "learning_rate": 4.537001434241116e-06, + "loss": 0.3099, + "step": 9433 + }, + { + "epoch": 0.4419356349838385, + "grad_norm": 0.5867713267287009, + "learning_rate": 4.5368914803476835e-06, + "loss": 0.2877, + "step": 9434 + }, + { + "epoch": 0.44198247997376683, + "grad_norm": 0.5993689766203883, + "learning_rate": 4.536781514732453e-06, + "loss": 0.2984, + "step": 9435 + }, + { + "epoch": 0.4420293249636951, + "grad_norm": 0.5914722402763616, + "learning_rate": 4.536671537396057e-06, + "loss": 0.2702, + "step": 9436 + }, + { + "epoch": 0.4420761699536235, + "grad_norm": 0.6962042116883265, + "learning_rate": 4.536561548339128e-06, + "loss": 0.3357, + "step": 9437 + }, + { + "epoch": 0.4421230149435518, + "grad_norm": 0.6171702907344379, + "learning_rate": 4.536451547562299e-06, + "loss": 0.2844, + "step": 9438 + }, + { + "epoch": 0.4421698599334801, + "grad_norm": 0.5823095099980226, + "learning_rate": 4.536341535066203e-06, + "loss": 0.2682, + "step": 9439 + }, + { + "epoch": 0.4422167049234084, + "grad_norm": 0.6410457540996066, + "learning_rate": 4.536231510851473e-06, + "loss": 0.3056, + "step": 9440 + }, + { + "epoch": 0.4422635499133368, + "grad_norm": 0.5953425083018281, + "learning_rate": 4.536121474918743e-06, + "loss": 0.2964, + "step": 9441 + }, + { + "epoch": 0.44231039490326507, + "grad_norm": 0.6481201827374317, + "learning_rate": 4.536011427268646e-06, + "loss": 0.2942, + "step": 9442 + }, + { + "epoch": 0.4423572398931934, + "grad_norm": 0.6096262914191414, + "learning_rate": 4.535901367901815e-06, + "loss": 0.3008, + "step": 9443 + }, + { + "epoch": 0.4424040848831218, + "grad_norm": 0.6284015094529971, + "learning_rate": 4.535791296818882e-06, + "loss": 0.2897, + "step": 9444 + }, + { + "epoch": 0.44245092987305007, + "grad_norm": 0.6271844699865382, + "learning_rate": 4.535681214020483e-06, + "loss": 0.3008, + "step": 9445 + }, + { + "epoch": 0.4424977748629784, + "grad_norm": 0.6663981545294174, + "learning_rate": 4.53557111950725e-06, + "loss": 0.2928, + "step": 9446 + }, + { + "epoch": 0.4425446198529067, + "grad_norm": 0.6426184121859768, + "learning_rate": 4.535461013279817e-06, + "loss": 0.2873, + "step": 9447 + }, + { + "epoch": 0.44259146484283507, + "grad_norm": 0.6268864835967052, + "learning_rate": 4.535350895338817e-06, + "loss": 0.2937, + "step": 9448 + }, + { + "epoch": 0.44263830983276337, + "grad_norm": 0.6190771553679678, + "learning_rate": 4.535240765684884e-06, + "loss": 0.3047, + "step": 9449 + }, + { + "epoch": 0.4426851548226917, + "grad_norm": 0.6077490055271559, + "learning_rate": 4.535130624318653e-06, + "loss": 0.29, + "step": 9450 + }, + { + "epoch": 0.44273199981262, + "grad_norm": 0.5638491551831574, + "learning_rate": 4.535020471240756e-06, + "loss": 0.2842, + "step": 9451 + }, + { + "epoch": 0.44277884480254837, + "grad_norm": 0.604531994302314, + "learning_rate": 4.534910306451827e-06, + "loss": 0.2899, + "step": 9452 + }, + { + "epoch": 0.4428256897924767, + "grad_norm": 0.6210488081870936, + "learning_rate": 4.534800129952502e-06, + "loss": 0.2662, + "step": 9453 + }, + { + "epoch": 0.442872534782405, + "grad_norm": 0.6663672010686691, + "learning_rate": 4.5346899417434135e-06, + "loss": 0.2923, + "step": 9454 + }, + { + "epoch": 0.44291937977233337, + "grad_norm": 0.6012486893836281, + "learning_rate": 4.534579741825195e-06, + "loss": 0.2952, + "step": 9455 + }, + { + "epoch": 0.44296622476226166, + "grad_norm": 0.6001356393795939, + "learning_rate": 4.5344695301984824e-06, + "loss": 0.3, + "step": 9456 + }, + { + "epoch": 0.44301306975219, + "grad_norm": 0.6019378376216635, + "learning_rate": 4.534359306863908e-06, + "loss": 0.3009, + "step": 9457 + }, + { + "epoch": 0.4430599147421183, + "grad_norm": 0.5629618815881618, + "learning_rate": 4.534249071822108e-06, + "loss": 0.2905, + "step": 9458 + }, + { + "epoch": 0.44310675973204666, + "grad_norm": 0.6209248004261066, + "learning_rate": 4.534138825073715e-06, + "loss": 0.2991, + "step": 9459 + }, + { + "epoch": 0.44315360472197496, + "grad_norm": 0.6503190905649749, + "learning_rate": 4.534028566619365e-06, + "loss": 0.2927, + "step": 9460 + }, + { + "epoch": 0.4432004497119033, + "grad_norm": 0.620861194776798, + "learning_rate": 4.533918296459691e-06, + "loss": 0.2953, + "step": 9461 + }, + { + "epoch": 0.44324729470183166, + "grad_norm": 0.6344731710549285, + "learning_rate": 4.533808014595329e-06, + "loss": 0.3051, + "step": 9462 + }, + { + "epoch": 0.44329413969175996, + "grad_norm": 0.6249487851899547, + "learning_rate": 4.533697721026913e-06, + "loss": 0.2903, + "step": 9463 + }, + { + "epoch": 0.4433409846816883, + "grad_norm": 0.595739778978344, + "learning_rate": 4.533587415755078e-06, + "loss": 0.297, + "step": 9464 + }, + { + "epoch": 0.4433878296716166, + "grad_norm": 0.6416516182600785, + "learning_rate": 4.533477098780458e-06, + "loss": 0.3101, + "step": 9465 + }, + { + "epoch": 0.44343467466154496, + "grad_norm": 0.6737312724477099, + "learning_rate": 4.533366770103689e-06, + "loss": 0.302, + "step": 9466 + }, + { + "epoch": 0.44348151965147325, + "grad_norm": 0.5928640354046686, + "learning_rate": 4.533256429725405e-06, + "loss": 0.2824, + "step": 9467 + }, + { + "epoch": 0.4435283646414016, + "grad_norm": 0.6067648816604144, + "learning_rate": 4.533146077646241e-06, + "loss": 0.2884, + "step": 9468 + }, + { + "epoch": 0.4435752096313299, + "grad_norm": 0.563497436419075, + "learning_rate": 4.533035713866833e-06, + "loss": 0.2896, + "step": 9469 + }, + { + "epoch": 0.44362205462125825, + "grad_norm": 0.5881128684012878, + "learning_rate": 4.5329253383878145e-06, + "loss": 0.2715, + "step": 9470 + }, + { + "epoch": 0.4436688996111866, + "grad_norm": 0.5674584884872781, + "learning_rate": 4.532814951209823e-06, + "loss": 0.2794, + "step": 9471 + }, + { + "epoch": 0.4437157446011149, + "grad_norm": 0.5636236261691013, + "learning_rate": 4.5327045523334915e-06, + "loss": 0.2722, + "step": 9472 + }, + { + "epoch": 0.44376258959104325, + "grad_norm": 0.5774220012466392, + "learning_rate": 4.532594141759457e-06, + "loss": 0.269, + "step": 9473 + }, + { + "epoch": 0.44380943458097155, + "grad_norm": 0.6196950607220103, + "learning_rate": 4.532483719488353e-06, + "loss": 0.2757, + "step": 9474 + }, + { + "epoch": 0.4438562795708999, + "grad_norm": 0.5652294955677389, + "learning_rate": 4.532373285520816e-06, + "loss": 0.2947, + "step": 9475 + }, + { + "epoch": 0.4439031245608282, + "grad_norm": 0.6372239636583749, + "learning_rate": 4.532262839857482e-06, + "loss": 0.2744, + "step": 9476 + }, + { + "epoch": 0.44394996955075655, + "grad_norm": 0.611560974696411, + "learning_rate": 4.532152382498987e-06, + "loss": 0.2846, + "step": 9477 + }, + { + "epoch": 0.44399681454068485, + "grad_norm": 0.5875869563460056, + "learning_rate": 4.532041913445964e-06, + "loss": 0.2939, + "step": 9478 + }, + { + "epoch": 0.4440436595306132, + "grad_norm": 0.630108026974609, + "learning_rate": 4.5319314326990515e-06, + "loss": 0.3002, + "step": 9479 + }, + { + "epoch": 0.44409050452054155, + "grad_norm": 0.5975515784058472, + "learning_rate": 4.531820940258885e-06, + "loss": 0.2759, + "step": 9480 + }, + { + "epoch": 0.44413734951046985, + "grad_norm": 0.5632413348156288, + "learning_rate": 4.531710436126099e-06, + "loss": 0.2892, + "step": 9481 + }, + { + "epoch": 0.4441841945003982, + "grad_norm": 0.6173147242265828, + "learning_rate": 4.531599920301329e-06, + "loss": 0.2929, + "step": 9482 + }, + { + "epoch": 0.4442310394903265, + "grad_norm": 0.5758769164216413, + "learning_rate": 4.531489392785214e-06, + "loss": 0.2932, + "step": 9483 + }, + { + "epoch": 0.44427788448025485, + "grad_norm": 0.6134054381979789, + "learning_rate": 4.531378853578388e-06, + "loss": 0.3047, + "step": 9484 + }, + { + "epoch": 0.44432472947018314, + "grad_norm": 0.592524710534006, + "learning_rate": 4.531268302681486e-06, + "loss": 0.2949, + "step": 9485 + }, + { + "epoch": 0.4443715744601115, + "grad_norm": 0.619149103958697, + "learning_rate": 4.531157740095146e-06, + "loss": 0.2892, + "step": 9486 + }, + { + "epoch": 0.4444184194500398, + "grad_norm": 0.5738730566672744, + "learning_rate": 4.5310471658200035e-06, + "loss": 0.301, + "step": 9487 + }, + { + "epoch": 0.44446526443996814, + "grad_norm": 0.5633223188666737, + "learning_rate": 4.530936579856695e-06, + "loss": 0.2746, + "step": 9488 + }, + { + "epoch": 0.4445121094298965, + "grad_norm": 0.6188319901536856, + "learning_rate": 4.530825982205857e-06, + "loss": 0.3001, + "step": 9489 + }, + { + "epoch": 0.4445589544198248, + "grad_norm": 0.6460933510630835, + "learning_rate": 4.5307153728681266e-06, + "loss": 0.3239, + "step": 9490 + }, + { + "epoch": 0.44460579940975314, + "grad_norm": 0.6547019270974116, + "learning_rate": 4.530604751844139e-06, + "loss": 0.3013, + "step": 9491 + }, + { + "epoch": 0.44465264439968144, + "grad_norm": 0.6512963284774879, + "learning_rate": 4.530494119134532e-06, + "loss": 0.2962, + "step": 9492 + }, + { + "epoch": 0.4446994893896098, + "grad_norm": 0.6204997847417038, + "learning_rate": 4.530383474739941e-06, + "loss": 0.2936, + "step": 9493 + }, + { + "epoch": 0.4447463343795381, + "grad_norm": 0.5918726817055359, + "learning_rate": 4.530272818661003e-06, + "loss": 0.2935, + "step": 9494 + }, + { + "epoch": 0.44479317936946644, + "grad_norm": 0.6104755361232311, + "learning_rate": 4.530162150898356e-06, + "loss": 0.2934, + "step": 9495 + }, + { + "epoch": 0.44484002435939474, + "grad_norm": 0.5734206989302305, + "learning_rate": 4.5300514714526365e-06, + "loss": 0.2788, + "step": 9496 + }, + { + "epoch": 0.4448868693493231, + "grad_norm": 0.5778145704043074, + "learning_rate": 4.52994078032448e-06, + "loss": 0.2756, + "step": 9497 + }, + { + "epoch": 0.44493371433925144, + "grad_norm": 0.624201022928397, + "learning_rate": 4.529830077514525e-06, + "loss": 0.299, + "step": 9498 + }, + { + "epoch": 0.44498055932917974, + "grad_norm": 0.6221947963312174, + "learning_rate": 4.529719363023409e-06, + "loss": 0.2943, + "step": 9499 + }, + { + "epoch": 0.4450274043191081, + "grad_norm": 0.6265331621384819, + "learning_rate": 4.529608636851766e-06, + "loss": 0.2999, + "step": 9500 + }, + { + "epoch": 0.4450742493090364, + "grad_norm": 0.6360490679616181, + "learning_rate": 4.529497899000238e-06, + "loss": 0.3174, + "step": 9501 + }, + { + "epoch": 0.44512109429896474, + "grad_norm": 0.5862344435522403, + "learning_rate": 4.529387149469458e-06, + "loss": 0.2856, + "step": 9502 + }, + { + "epoch": 0.44516793928889303, + "grad_norm": 0.6054656449080777, + "learning_rate": 4.529276388260066e-06, + "loss": 0.2739, + "step": 9503 + }, + { + "epoch": 0.4452147842788214, + "grad_norm": 0.5494218249396977, + "learning_rate": 4.529165615372697e-06, + "loss": 0.2845, + "step": 9504 + }, + { + "epoch": 0.4452616292687497, + "grad_norm": 0.6198506427357584, + "learning_rate": 4.529054830807991e-06, + "loss": 0.2945, + "step": 9505 + }, + { + "epoch": 0.44530847425867803, + "grad_norm": 0.57884333970234, + "learning_rate": 4.528944034566585e-06, + "loss": 0.307, + "step": 9506 + }, + { + "epoch": 0.4453553192486064, + "grad_norm": 0.5881495733357699, + "learning_rate": 4.528833226649115e-06, + "loss": 0.2829, + "step": 9507 + }, + { + "epoch": 0.4454021642385347, + "grad_norm": 0.5753839726636603, + "learning_rate": 4.528722407056219e-06, + "loss": 0.2938, + "step": 9508 + }, + { + "epoch": 0.44544900922846303, + "grad_norm": 0.6342967646606078, + "learning_rate": 4.528611575788537e-06, + "loss": 0.3102, + "step": 9509 + }, + { + "epoch": 0.44549585421839133, + "grad_norm": 0.597108607412318, + "learning_rate": 4.528500732846705e-06, + "loss": 0.2861, + "step": 9510 + }, + { + "epoch": 0.4455426992083197, + "grad_norm": 0.537723289292712, + "learning_rate": 4.528389878231361e-06, + "loss": 0.2711, + "step": 9511 + }, + { + "epoch": 0.445589544198248, + "grad_norm": 0.6088940335963915, + "learning_rate": 4.528279011943142e-06, + "loss": 0.3094, + "step": 9512 + }, + { + "epoch": 0.44563638918817633, + "grad_norm": 0.627151018123468, + "learning_rate": 4.528168133982688e-06, + "loss": 0.2833, + "step": 9513 + }, + { + "epoch": 0.4456832341781046, + "grad_norm": 0.58877521659489, + "learning_rate": 4.528057244350636e-06, + "loss": 0.2894, + "step": 9514 + }, + { + "epoch": 0.445730079168033, + "grad_norm": 0.6578462576942999, + "learning_rate": 4.527946343047625e-06, + "loss": 0.2974, + "step": 9515 + }, + { + "epoch": 0.44577692415796133, + "grad_norm": 0.5953360357937445, + "learning_rate": 4.527835430074291e-06, + "loss": 0.2943, + "step": 9516 + }, + { + "epoch": 0.4458237691478896, + "grad_norm": 0.6537420839824416, + "learning_rate": 4.527724505431275e-06, + "loss": 0.3037, + "step": 9517 + }, + { + "epoch": 0.445870614137818, + "grad_norm": 0.5981350665526789, + "learning_rate": 4.527613569119213e-06, + "loss": 0.2993, + "step": 9518 + }, + { + "epoch": 0.4459174591277463, + "grad_norm": 0.52741200295637, + "learning_rate": 4.527502621138746e-06, + "loss": 0.2571, + "step": 9519 + }, + { + "epoch": 0.4459643041176746, + "grad_norm": 0.6369835885490587, + "learning_rate": 4.52739166149051e-06, + "loss": 0.2863, + "step": 9520 + }, + { + "epoch": 0.4460111491076029, + "grad_norm": 0.5792577618527626, + "learning_rate": 4.527280690175145e-06, + "loss": 0.2614, + "step": 9521 + }, + { + "epoch": 0.4460579940975313, + "grad_norm": 0.6213661096655889, + "learning_rate": 4.5271697071932895e-06, + "loss": 0.2736, + "step": 9522 + }, + { + "epoch": 0.44610483908745957, + "grad_norm": 0.5533115773740684, + "learning_rate": 4.527058712545581e-06, + "loss": 0.2764, + "step": 9523 + }, + { + "epoch": 0.4461516840773879, + "grad_norm": 0.6182977870809199, + "learning_rate": 4.5269477062326595e-06, + "loss": 0.2939, + "step": 9524 + }, + { + "epoch": 0.4461985290673163, + "grad_norm": 0.549635913428749, + "learning_rate": 4.526836688255163e-06, + "loss": 0.2558, + "step": 9525 + }, + { + "epoch": 0.44624537405724457, + "grad_norm": 0.6278422263973577, + "learning_rate": 4.526725658613731e-06, + "loss": 0.2809, + "step": 9526 + }, + { + "epoch": 0.4462922190471729, + "grad_norm": 0.6103919004715236, + "learning_rate": 4.526614617309003e-06, + "loss": 0.2851, + "step": 9527 + }, + { + "epoch": 0.4463390640371012, + "grad_norm": 0.6448300660961481, + "learning_rate": 4.5265035643416164e-06, + "loss": 0.3029, + "step": 9528 + }, + { + "epoch": 0.44638590902702957, + "grad_norm": 0.6012533887766403, + "learning_rate": 4.526392499712211e-06, + "loss": 0.2987, + "step": 9529 + }, + { + "epoch": 0.44643275401695787, + "grad_norm": 0.6458881623253457, + "learning_rate": 4.526281423421427e-06, + "loss": 0.2875, + "step": 9530 + }, + { + "epoch": 0.4464795990068862, + "grad_norm": 0.6346951470174488, + "learning_rate": 4.526170335469901e-06, + "loss": 0.3015, + "step": 9531 + }, + { + "epoch": 0.4465264439968145, + "grad_norm": 0.6015001683048397, + "learning_rate": 4.526059235858276e-06, + "loss": 0.286, + "step": 9532 + }, + { + "epoch": 0.44657328898674287, + "grad_norm": 0.6548906402427375, + "learning_rate": 4.5259481245871885e-06, + "loss": 0.301, + "step": 9533 + }, + { + "epoch": 0.4466201339766712, + "grad_norm": 0.6291067540278896, + "learning_rate": 4.525837001657279e-06, + "loss": 0.2983, + "step": 9534 + }, + { + "epoch": 0.4466669789665995, + "grad_norm": 0.6141790822282214, + "learning_rate": 4.525725867069186e-06, + "loss": 0.2915, + "step": 9535 + }, + { + "epoch": 0.44671382395652787, + "grad_norm": 0.6281714471417609, + "learning_rate": 4.52561472082355e-06, + "loss": 0.2826, + "step": 9536 + }, + { + "epoch": 0.44676066894645616, + "grad_norm": 0.6427570864907378, + "learning_rate": 4.525503562921011e-06, + "loss": 0.304, + "step": 9537 + }, + { + "epoch": 0.4468075139363845, + "grad_norm": 0.5936889397609365, + "learning_rate": 4.525392393362207e-06, + "loss": 0.2878, + "step": 9538 + }, + { + "epoch": 0.4468543589263128, + "grad_norm": 0.5628269602011119, + "learning_rate": 4.52528121214778e-06, + "loss": 0.2868, + "step": 9539 + }, + { + "epoch": 0.44690120391624116, + "grad_norm": 0.562010092159554, + "learning_rate": 4.525170019278368e-06, + "loss": 0.2748, + "step": 9540 + }, + { + "epoch": 0.44694804890616946, + "grad_norm": 0.6527070239744619, + "learning_rate": 4.5250588147546115e-06, + "loss": 0.3035, + "step": 9541 + }, + { + "epoch": 0.4469948938960978, + "grad_norm": 0.6329449983216956, + "learning_rate": 4.524947598577151e-06, + "loss": 0.2994, + "step": 9542 + }, + { + "epoch": 0.44704173888602616, + "grad_norm": 0.559736297476402, + "learning_rate": 4.5248363707466255e-06, + "loss": 0.2766, + "step": 9543 + }, + { + "epoch": 0.44708858387595446, + "grad_norm": 0.6068773766302515, + "learning_rate": 4.524725131263676e-06, + "loss": 0.2922, + "step": 9544 + }, + { + "epoch": 0.4471354288658828, + "grad_norm": 0.6324450376875683, + "learning_rate": 4.524613880128942e-06, + "loss": 0.3001, + "step": 9545 + }, + { + "epoch": 0.4471822738558111, + "grad_norm": 0.586749890766047, + "learning_rate": 4.5245026173430646e-06, + "loss": 0.2986, + "step": 9546 + }, + { + "epoch": 0.44722911884573946, + "grad_norm": 0.6513971513746809, + "learning_rate": 4.524391342906683e-06, + "loss": 0.3121, + "step": 9547 + }, + { + "epoch": 0.44727596383566776, + "grad_norm": 0.5703383282337076, + "learning_rate": 4.524280056820438e-06, + "loss": 0.275, + "step": 9548 + }, + { + "epoch": 0.4473228088255961, + "grad_norm": 0.5973382759003248, + "learning_rate": 4.52416875908497e-06, + "loss": 0.301, + "step": 9549 + }, + { + "epoch": 0.4473696538155244, + "grad_norm": 0.6166778327899648, + "learning_rate": 4.5240574497009195e-06, + "loss": 0.2836, + "step": 9550 + }, + { + "epoch": 0.44741649880545276, + "grad_norm": 0.5546759378634603, + "learning_rate": 4.523946128668927e-06, + "loss": 0.2801, + "step": 9551 + }, + { + "epoch": 0.4474633437953811, + "grad_norm": 0.599852052616321, + "learning_rate": 4.523834795989633e-06, + "loss": 0.2816, + "step": 9552 + }, + { + "epoch": 0.4475101887853094, + "grad_norm": 0.6021526078067265, + "learning_rate": 4.52372345166368e-06, + "loss": 0.2857, + "step": 9553 + }, + { + "epoch": 0.44755703377523776, + "grad_norm": 0.6389609040019344, + "learning_rate": 4.5236120956917054e-06, + "loss": 0.2988, + "step": 9554 + }, + { + "epoch": 0.44760387876516605, + "grad_norm": 0.6055422711066196, + "learning_rate": 4.523500728074353e-06, + "loss": 0.2919, + "step": 9555 + }, + { + "epoch": 0.4476507237550944, + "grad_norm": 0.6337256721973981, + "learning_rate": 4.523389348812262e-06, + "loss": 0.316, + "step": 9556 + }, + { + "epoch": 0.4476975687450227, + "grad_norm": 0.6511727482416685, + "learning_rate": 4.523277957906074e-06, + "loss": 0.2983, + "step": 9557 + }, + { + "epoch": 0.44774441373495105, + "grad_norm": 0.6043134315306427, + "learning_rate": 4.523166555356428e-06, + "loss": 0.3103, + "step": 9558 + }, + { + "epoch": 0.44779125872487935, + "grad_norm": 0.5901426512813643, + "learning_rate": 4.52305514116397e-06, + "loss": 0.2926, + "step": 9559 + }, + { + "epoch": 0.4478381037148077, + "grad_norm": 0.5771669496150272, + "learning_rate": 4.522943715329336e-06, + "loss": 0.2696, + "step": 9560 + }, + { + "epoch": 0.44788494870473605, + "grad_norm": 0.6104877582931553, + "learning_rate": 4.522832277853171e-06, + "loss": 0.2738, + "step": 9561 + }, + { + "epoch": 0.44793179369466435, + "grad_norm": 0.6526216600987311, + "learning_rate": 4.522720828736114e-06, + "loss": 0.3142, + "step": 9562 + }, + { + "epoch": 0.4479786386845927, + "grad_norm": 0.5555846040908528, + "learning_rate": 4.522609367978806e-06, + "loss": 0.27, + "step": 9563 + }, + { + "epoch": 0.448025483674521, + "grad_norm": 0.5915434740059343, + "learning_rate": 4.522497895581891e-06, + "loss": 0.2743, + "step": 9564 + }, + { + "epoch": 0.44807232866444935, + "grad_norm": 0.5913479921180789, + "learning_rate": 4.5223864115460085e-06, + "loss": 0.2832, + "step": 9565 + }, + { + "epoch": 0.44811917365437764, + "grad_norm": 0.6353932682876884, + "learning_rate": 4.522274915871799e-06, + "loss": 0.3096, + "step": 9566 + }, + { + "epoch": 0.448166018644306, + "grad_norm": 0.6304193083238063, + "learning_rate": 4.522163408559907e-06, + "loss": 0.2701, + "step": 9567 + }, + { + "epoch": 0.4482128636342343, + "grad_norm": 0.6291497238969764, + "learning_rate": 4.522051889610973e-06, + "loss": 0.2989, + "step": 9568 + }, + { + "epoch": 0.44825970862416264, + "grad_norm": 0.5378666041474424, + "learning_rate": 4.521940359025638e-06, + "loss": 0.2937, + "step": 9569 + }, + { + "epoch": 0.448306553614091, + "grad_norm": 0.5931774995542544, + "learning_rate": 4.521828816804546e-06, + "loss": 0.2921, + "step": 9570 + }, + { + "epoch": 0.4483533986040193, + "grad_norm": 0.5694865083777723, + "learning_rate": 4.521717262948336e-06, + "loss": 0.3048, + "step": 9571 + }, + { + "epoch": 0.44840024359394764, + "grad_norm": 0.5980086463339606, + "learning_rate": 4.521605697457651e-06, + "loss": 0.2796, + "step": 9572 + }, + { + "epoch": 0.44844708858387594, + "grad_norm": 0.6186243157774948, + "learning_rate": 4.5214941203331335e-06, + "loss": 0.3027, + "step": 9573 + }, + { + "epoch": 0.4484939335738043, + "grad_norm": 0.6293511403970001, + "learning_rate": 4.521382531575426e-06, + "loss": 0.2903, + "step": 9574 + }, + { + "epoch": 0.4485407785637326, + "grad_norm": 0.6096989099004726, + "learning_rate": 4.521270931185169e-06, + "loss": 0.2814, + "step": 9575 + }, + { + "epoch": 0.44858762355366094, + "grad_norm": 0.5972642080286227, + "learning_rate": 4.521159319163007e-06, + "loss": 0.2917, + "step": 9576 + }, + { + "epoch": 0.44863446854358924, + "grad_norm": 0.6421587874722295, + "learning_rate": 4.5210476955095804e-06, + "loss": 0.3159, + "step": 9577 + }, + { + "epoch": 0.4486813135335176, + "grad_norm": 0.5986889182407964, + "learning_rate": 4.520936060225533e-06, + "loss": 0.2904, + "step": 9578 + }, + { + "epoch": 0.44872815852344594, + "grad_norm": 0.604887861654326, + "learning_rate": 4.520824413311506e-06, + "loss": 0.2998, + "step": 9579 + }, + { + "epoch": 0.44877500351337424, + "grad_norm": 0.6165855737074736, + "learning_rate": 4.520712754768143e-06, + "loss": 0.2983, + "step": 9580 + }, + { + "epoch": 0.4488218485033026, + "grad_norm": 0.5462896100481005, + "learning_rate": 4.520601084596085e-06, + "loss": 0.2615, + "step": 9581 + }, + { + "epoch": 0.4488686934932309, + "grad_norm": 0.6242543635325998, + "learning_rate": 4.520489402795976e-06, + "loss": 0.2754, + "step": 9582 + }, + { + "epoch": 0.44891553848315924, + "grad_norm": 0.6210771747050209, + "learning_rate": 4.520377709368459e-06, + "loss": 0.2861, + "step": 9583 + }, + { + "epoch": 0.44896238347308753, + "grad_norm": 0.5926182971380921, + "learning_rate": 4.520266004314174e-06, + "loss": 0.2909, + "step": 9584 + }, + { + "epoch": 0.4490092284630159, + "grad_norm": 0.6002857961721327, + "learning_rate": 4.520154287633768e-06, + "loss": 0.3055, + "step": 9585 + }, + { + "epoch": 0.4490560734529442, + "grad_norm": 0.6017881148606296, + "learning_rate": 4.520042559327881e-06, + "loss": 0.2834, + "step": 9586 + }, + { + "epoch": 0.44910291844287253, + "grad_norm": 0.6447439379698738, + "learning_rate": 4.5199308193971575e-06, + "loss": 0.2852, + "step": 9587 + }, + { + "epoch": 0.4491497634328009, + "grad_norm": 0.5819424092627619, + "learning_rate": 4.51981906784224e-06, + "loss": 0.2783, + "step": 9588 + }, + { + "epoch": 0.4491966084227292, + "grad_norm": 0.613425730112215, + "learning_rate": 4.519707304663769e-06, + "loss": 0.304, + "step": 9589 + }, + { + "epoch": 0.44924345341265753, + "grad_norm": 0.6146337383882439, + "learning_rate": 4.519595529862393e-06, + "loss": 0.2872, + "step": 9590 + }, + { + "epoch": 0.44929029840258583, + "grad_norm": 0.5751644220414146, + "learning_rate": 4.519483743438751e-06, + "loss": 0.3039, + "step": 9591 + }, + { + "epoch": 0.4493371433925142, + "grad_norm": 0.583337082287852, + "learning_rate": 4.519371945393488e-06, + "loss": 0.2901, + "step": 9592 + }, + { + "epoch": 0.4493839883824425, + "grad_norm": 0.6408745103138708, + "learning_rate": 4.519260135727247e-06, + "loss": 0.2919, + "step": 9593 + }, + { + "epoch": 0.44943083337237083, + "grad_norm": 0.5944808352248772, + "learning_rate": 4.519148314440671e-06, + "loss": 0.2827, + "step": 9594 + }, + { + "epoch": 0.4494776783622991, + "grad_norm": 0.5738381350100982, + "learning_rate": 4.519036481534405e-06, + "loss": 0.2696, + "step": 9595 + }, + { + "epoch": 0.4495245233522275, + "grad_norm": 0.5995669172731882, + "learning_rate": 4.51892463700909e-06, + "loss": 0.2728, + "step": 9596 + }, + { + "epoch": 0.44957136834215583, + "grad_norm": 0.6265008973822763, + "learning_rate": 4.518812780865371e-06, + "loss": 0.2819, + "step": 9597 + }, + { + "epoch": 0.4496182133320841, + "grad_norm": 0.5729263225971102, + "learning_rate": 4.5187009131038935e-06, + "loss": 0.268, + "step": 9598 + }, + { + "epoch": 0.4496650583220125, + "grad_norm": 0.5635861599032379, + "learning_rate": 4.518589033725299e-06, + "loss": 0.2654, + "step": 9599 + }, + { + "epoch": 0.4497119033119408, + "grad_norm": 0.6380746489778761, + "learning_rate": 4.518477142730232e-06, + "loss": 0.2826, + "step": 9600 + }, + { + "epoch": 0.4497587483018691, + "grad_norm": 0.572118535226733, + "learning_rate": 4.518365240119336e-06, + "loss": 0.267, + "step": 9601 + }, + { + "epoch": 0.4498055932917974, + "grad_norm": 0.5821771418340612, + "learning_rate": 4.518253325893255e-06, + "loss": 0.2735, + "step": 9602 + }, + { + "epoch": 0.4498524382817258, + "grad_norm": 0.6551051414257538, + "learning_rate": 4.518141400052635e-06, + "loss": 0.2949, + "step": 9603 + }, + { + "epoch": 0.44989928327165407, + "grad_norm": 0.610888744065006, + "learning_rate": 4.518029462598117e-06, + "loss": 0.2808, + "step": 9604 + }, + { + "epoch": 0.4499461282615824, + "grad_norm": 0.66478953242056, + "learning_rate": 4.517917513530347e-06, + "loss": 0.3199, + "step": 9605 + }, + { + "epoch": 0.4499929732515108, + "grad_norm": 0.6011436287106693, + "learning_rate": 4.517805552849969e-06, + "loss": 0.2994, + "step": 9606 + }, + { + "epoch": 0.45003981824143907, + "grad_norm": 0.6060782806074002, + "learning_rate": 4.517693580557626e-06, + "loss": 0.2711, + "step": 9607 + }, + { + "epoch": 0.4500866632313674, + "grad_norm": 0.5982232046036212, + "learning_rate": 4.517581596653966e-06, + "loss": 0.2877, + "step": 9608 + }, + { + "epoch": 0.4501335082212957, + "grad_norm": 0.6436906658413317, + "learning_rate": 4.517469601139629e-06, + "loss": 0.2904, + "step": 9609 + }, + { + "epoch": 0.45018035321122407, + "grad_norm": 0.5638735854868067, + "learning_rate": 4.517357594015262e-06, + "loss": 0.2717, + "step": 9610 + }, + { + "epoch": 0.45022719820115237, + "grad_norm": 0.6056994980014117, + "learning_rate": 4.51724557528151e-06, + "loss": 0.288, + "step": 9611 + }, + { + "epoch": 0.4502740431910807, + "grad_norm": 0.5708986548332758, + "learning_rate": 4.517133544939016e-06, + "loss": 0.2705, + "step": 9612 + }, + { + "epoch": 0.450320888181009, + "grad_norm": 0.6435847031106972, + "learning_rate": 4.517021502988426e-06, + "loss": 0.3145, + "step": 9613 + }, + { + "epoch": 0.45036773317093737, + "grad_norm": 0.6183918489238485, + "learning_rate": 4.516909449430383e-06, + "loss": 0.293, + "step": 9614 + }, + { + "epoch": 0.4504145781608657, + "grad_norm": 0.6549288945617616, + "learning_rate": 4.516797384265534e-06, + "loss": 0.3248, + "step": 9615 + }, + { + "epoch": 0.450461423150794, + "grad_norm": 0.6054353624979363, + "learning_rate": 4.516685307494523e-06, + "loss": 0.2976, + "step": 9616 + }, + { + "epoch": 0.45050826814072237, + "grad_norm": 0.6041446737997058, + "learning_rate": 4.516573219117995e-06, + "loss": 0.3005, + "step": 9617 + }, + { + "epoch": 0.45055511313065066, + "grad_norm": 0.600136346061137, + "learning_rate": 4.516461119136593e-06, + "loss": 0.2793, + "step": 9618 + }, + { + "epoch": 0.450601958120579, + "grad_norm": 0.5553364587552834, + "learning_rate": 4.516349007550966e-06, + "loss": 0.2824, + "step": 9619 + }, + { + "epoch": 0.4506488031105073, + "grad_norm": 0.5700317846520051, + "learning_rate": 4.516236884361758e-06, + "loss": 0.2984, + "step": 9620 + }, + { + "epoch": 0.45069564810043566, + "grad_norm": 0.6371108823491317, + "learning_rate": 4.516124749569612e-06, + "loss": 0.3098, + "step": 9621 + }, + { + "epoch": 0.45074249309036396, + "grad_norm": 0.5969394662791384, + "learning_rate": 4.516012603175175e-06, + "loss": 0.3025, + "step": 9622 + }, + { + "epoch": 0.4507893380802923, + "grad_norm": 0.6027858650999857, + "learning_rate": 4.5159004451790925e-06, + "loss": 0.2898, + "step": 9623 + }, + { + "epoch": 0.45083618307022066, + "grad_norm": 0.5850997346038387, + "learning_rate": 4.515788275582009e-06, + "loss": 0.2854, + "step": 9624 + }, + { + "epoch": 0.45088302806014896, + "grad_norm": 0.5751451118200366, + "learning_rate": 4.515676094384572e-06, + "loss": 0.2859, + "step": 9625 + }, + { + "epoch": 0.4509298730500773, + "grad_norm": 0.6353052330350184, + "learning_rate": 4.515563901587424e-06, + "loss": 0.3055, + "step": 9626 + }, + { + "epoch": 0.4509767180400056, + "grad_norm": 0.5665952141185507, + "learning_rate": 4.515451697191214e-06, + "loss": 0.2806, + "step": 9627 + }, + { + "epoch": 0.45102356302993396, + "grad_norm": 0.6163977223172434, + "learning_rate": 4.515339481196585e-06, + "loss": 0.3044, + "step": 9628 + }, + { + "epoch": 0.45107040801986226, + "grad_norm": 0.627669350001631, + "learning_rate": 4.5152272536041844e-06, + "loss": 0.3004, + "step": 9629 + }, + { + "epoch": 0.4511172530097906, + "grad_norm": 0.5990893607675968, + "learning_rate": 4.515115014414657e-06, + "loss": 0.285, + "step": 9630 + }, + { + "epoch": 0.4511640979997189, + "grad_norm": 0.5829987607915249, + "learning_rate": 4.515002763628648e-06, + "loss": 0.2773, + "step": 9631 + }, + { + "epoch": 0.45121094298964726, + "grad_norm": 0.5901602047840601, + "learning_rate": 4.514890501246807e-06, + "loss": 0.2779, + "step": 9632 + }, + { + "epoch": 0.4512577879795756, + "grad_norm": 0.6491004532914175, + "learning_rate": 4.514778227269776e-06, + "loss": 0.299, + "step": 9633 + }, + { + "epoch": 0.4513046329695039, + "grad_norm": 0.5732643405883969, + "learning_rate": 4.5146659416982035e-06, + "loss": 0.2811, + "step": 9634 + }, + { + "epoch": 0.45135147795943226, + "grad_norm": 0.582984451500609, + "learning_rate": 4.5145536445327345e-06, + "loss": 0.2916, + "step": 9635 + }, + { + "epoch": 0.45139832294936055, + "grad_norm": 0.6112728565210781, + "learning_rate": 4.514441335774015e-06, + "loss": 0.2753, + "step": 9636 + }, + { + "epoch": 0.4514451679392889, + "grad_norm": 0.618831378220304, + "learning_rate": 4.514329015422693e-06, + "loss": 0.2873, + "step": 9637 + }, + { + "epoch": 0.4514920129292172, + "grad_norm": 0.6062297429642151, + "learning_rate": 4.514216683479413e-06, + "loss": 0.2799, + "step": 9638 + }, + { + "epoch": 0.45153885791914555, + "grad_norm": 0.5757994043260244, + "learning_rate": 4.514104339944823e-06, + "loss": 0.297, + "step": 9639 + }, + { + "epoch": 0.45158570290907385, + "grad_norm": 0.6193010065912986, + "learning_rate": 4.513991984819568e-06, + "loss": 0.2877, + "step": 9640 + }, + { + "epoch": 0.4516325478990022, + "grad_norm": 0.6108712450069865, + "learning_rate": 4.513879618104296e-06, + "loss": 0.297, + "step": 9641 + }, + { + "epoch": 0.45167939288893055, + "grad_norm": 0.5719230854300209, + "learning_rate": 4.513767239799653e-06, + "loss": 0.2675, + "step": 9642 + }, + { + "epoch": 0.45172623787885885, + "grad_norm": 0.6047289942007416, + "learning_rate": 4.513654849906285e-06, + "loss": 0.303, + "step": 9643 + }, + { + "epoch": 0.4517730828687872, + "grad_norm": 0.6361316272854707, + "learning_rate": 4.5135424484248394e-06, + "loss": 0.3295, + "step": 9644 + }, + { + "epoch": 0.4518199278587155, + "grad_norm": 0.569253979249529, + "learning_rate": 4.5134300353559635e-06, + "loss": 0.287, + "step": 9645 + }, + { + "epoch": 0.45186677284864385, + "grad_norm": 0.6234791638371582, + "learning_rate": 4.513317610700304e-06, + "loss": 0.2904, + "step": 9646 + }, + { + "epoch": 0.45191361783857215, + "grad_norm": 0.5704311664715489, + "learning_rate": 4.513205174458507e-06, + "loss": 0.2825, + "step": 9647 + }, + { + "epoch": 0.4519604628285005, + "grad_norm": 0.5972636201218078, + "learning_rate": 4.513092726631221e-06, + "loss": 0.3041, + "step": 9648 + }, + { + "epoch": 0.4520073078184288, + "grad_norm": 0.61441977099996, + "learning_rate": 4.512980267219092e-06, + "loss": 0.2913, + "step": 9649 + }, + { + "epoch": 0.45205415280835715, + "grad_norm": 0.6208373393173017, + "learning_rate": 4.512867796222767e-06, + "loss": 0.2802, + "step": 9650 + }, + { + "epoch": 0.4521009977982855, + "grad_norm": 0.5930498596488138, + "learning_rate": 4.512755313642895e-06, + "loss": 0.2931, + "step": 9651 + }, + { + "epoch": 0.4521478427882138, + "grad_norm": 0.6100059097746048, + "learning_rate": 4.5126428194801205e-06, + "loss": 0.303, + "step": 9652 + }, + { + "epoch": 0.45219468777814215, + "grad_norm": 0.6035347438224943, + "learning_rate": 4.512530313735094e-06, + "loss": 0.2762, + "step": 9653 + }, + { + "epoch": 0.45224153276807044, + "grad_norm": 0.6034851685648088, + "learning_rate": 4.51241779640846e-06, + "loss": 0.3134, + "step": 9654 + }, + { + "epoch": 0.4522883777579988, + "grad_norm": 0.5580022623115172, + "learning_rate": 4.512305267500868e-06, + "loss": 0.2784, + "step": 9655 + }, + { + "epoch": 0.4523352227479271, + "grad_norm": 0.5893411355373063, + "learning_rate": 4.512192727012965e-06, + "loss": 0.2777, + "step": 9656 + }, + { + "epoch": 0.45238206773785544, + "grad_norm": 0.5651301231915469, + "learning_rate": 4.512080174945399e-06, + "loss": 0.2683, + "step": 9657 + }, + { + "epoch": 0.45242891272778374, + "grad_norm": 0.6348131092255221, + "learning_rate": 4.511967611298817e-06, + "loss": 0.283, + "step": 9658 + }, + { + "epoch": 0.4524757577177121, + "grad_norm": 0.5613979636703845, + "learning_rate": 4.511855036073866e-06, + "loss": 0.284, + "step": 9659 + }, + { + "epoch": 0.45252260270764044, + "grad_norm": 0.6174969375091333, + "learning_rate": 4.511742449271197e-06, + "loss": 0.3057, + "step": 9660 + }, + { + "epoch": 0.45256944769756874, + "grad_norm": 0.5948935927896455, + "learning_rate": 4.5116298508914545e-06, + "loss": 0.2868, + "step": 9661 + }, + { + "epoch": 0.4526162926874971, + "grad_norm": 0.547916294968488, + "learning_rate": 4.511517240935288e-06, + "loss": 0.2728, + "step": 9662 + }, + { + "epoch": 0.4526631376774254, + "grad_norm": 0.6114375800963737, + "learning_rate": 4.511404619403345e-06, + "loss": 0.2696, + "step": 9663 + }, + { + "epoch": 0.45270998266735374, + "grad_norm": 0.6084647632487292, + "learning_rate": 4.511291986296274e-06, + "loss": 0.2838, + "step": 9664 + }, + { + "epoch": 0.45275682765728203, + "grad_norm": 0.5973159986606097, + "learning_rate": 4.511179341614724e-06, + "loss": 0.2817, + "step": 9665 + }, + { + "epoch": 0.4528036726472104, + "grad_norm": 0.6143523097705462, + "learning_rate": 4.511066685359342e-06, + "loss": 0.2895, + "step": 9666 + }, + { + "epoch": 0.4528505176371387, + "grad_norm": 0.5668631510786984, + "learning_rate": 4.510954017530776e-06, + "loss": 0.269, + "step": 9667 + }, + { + "epoch": 0.45289736262706703, + "grad_norm": 0.6242503394439323, + "learning_rate": 4.5108413381296756e-06, + "loss": 0.3147, + "step": 9668 + }, + { + "epoch": 0.4529442076169954, + "grad_norm": 0.5692657769723413, + "learning_rate": 4.510728647156689e-06, + "loss": 0.292, + "step": 9669 + }, + { + "epoch": 0.4529910526069237, + "grad_norm": 0.5924765630570229, + "learning_rate": 4.510615944612464e-06, + "loss": 0.2858, + "step": 9670 + }, + { + "epoch": 0.45303789759685203, + "grad_norm": 0.5771610351003139, + "learning_rate": 4.510503230497649e-06, + "loss": 0.3149, + "step": 9671 + }, + { + "epoch": 0.45308474258678033, + "grad_norm": 0.6011544792670175, + "learning_rate": 4.5103905048128945e-06, + "loss": 0.2885, + "step": 9672 + }, + { + "epoch": 0.4531315875767087, + "grad_norm": 0.5705561714530325, + "learning_rate": 4.510277767558847e-06, + "loss": 0.283, + "step": 9673 + }, + { + "epoch": 0.453178432566637, + "grad_norm": 0.6331581321667616, + "learning_rate": 4.510165018736157e-06, + "loss": 0.2908, + "step": 9674 + }, + { + "epoch": 0.45322527755656533, + "grad_norm": 0.5874867734762319, + "learning_rate": 4.510052258345472e-06, + "loss": 0.2859, + "step": 9675 + }, + { + "epoch": 0.4532721225464936, + "grad_norm": 0.5781848370364345, + "learning_rate": 4.5099394863874414e-06, + "loss": 0.2791, + "step": 9676 + }, + { + "epoch": 0.453318967536422, + "grad_norm": 0.5925909464211897, + "learning_rate": 4.509826702862715e-06, + "loss": 0.2746, + "step": 9677 + }, + { + "epoch": 0.45336581252635033, + "grad_norm": 0.6291882415208565, + "learning_rate": 4.50971390777194e-06, + "loss": 0.2989, + "step": 9678 + }, + { + "epoch": 0.4534126575162786, + "grad_norm": 0.6148093412893011, + "learning_rate": 4.509601101115768e-06, + "loss": 0.2881, + "step": 9679 + }, + { + "epoch": 0.453459502506207, + "grad_norm": 0.5843225433736228, + "learning_rate": 4.509488282894845e-06, + "loss": 0.291, + "step": 9680 + }, + { + "epoch": 0.4535063474961353, + "grad_norm": 0.5805253181278061, + "learning_rate": 4.509375453109823e-06, + "loss": 0.3002, + "step": 9681 + }, + { + "epoch": 0.4535531924860636, + "grad_norm": 0.5948374860984084, + "learning_rate": 4.509262611761351e-06, + "loss": 0.2911, + "step": 9682 + }, + { + "epoch": 0.4536000374759919, + "grad_norm": 0.6485636031542104, + "learning_rate": 4.509149758850076e-06, + "loss": 0.3227, + "step": 9683 + }, + { + "epoch": 0.4536468824659203, + "grad_norm": 0.5945752089490708, + "learning_rate": 4.509036894376651e-06, + "loss": 0.2798, + "step": 9684 + }, + { + "epoch": 0.45369372745584857, + "grad_norm": 0.6211285400941501, + "learning_rate": 4.508924018341722e-06, + "loss": 0.2946, + "step": 9685 + }, + { + "epoch": 0.4537405724457769, + "grad_norm": 0.5726416306206742, + "learning_rate": 4.508811130745941e-06, + "loss": 0.2737, + "step": 9686 + }, + { + "epoch": 0.4537874174357053, + "grad_norm": 0.6457137349066, + "learning_rate": 4.5086982315899565e-06, + "loss": 0.2871, + "step": 9687 + }, + { + "epoch": 0.45383426242563357, + "grad_norm": 0.5751603956272184, + "learning_rate": 4.508585320874419e-06, + "loss": 0.3003, + "step": 9688 + }, + { + "epoch": 0.4538811074155619, + "grad_norm": 0.584740161351357, + "learning_rate": 4.5084723985999765e-06, + "loss": 0.2735, + "step": 9689 + }, + { + "epoch": 0.4539279524054902, + "grad_norm": 0.6863586181825903, + "learning_rate": 4.508359464767282e-06, + "loss": 0.3014, + "step": 9690 + }, + { + "epoch": 0.45397479739541857, + "grad_norm": 0.5523080511366344, + "learning_rate": 4.508246519376982e-06, + "loss": 0.2787, + "step": 9691 + }, + { + "epoch": 0.45402164238534687, + "grad_norm": 0.6028309508243932, + "learning_rate": 4.508133562429728e-06, + "loss": 0.3014, + "step": 9692 + }, + { + "epoch": 0.4540684873752752, + "grad_norm": 0.5853039116553532, + "learning_rate": 4.508020593926172e-06, + "loss": 0.2834, + "step": 9693 + }, + { + "epoch": 0.4541153323652035, + "grad_norm": 0.6631830696686883, + "learning_rate": 4.507907613866961e-06, + "loss": 0.3067, + "step": 9694 + }, + { + "epoch": 0.45416217735513187, + "grad_norm": 0.6090282354899487, + "learning_rate": 4.507794622252746e-06, + "loss": 0.2979, + "step": 9695 + }, + { + "epoch": 0.4542090223450602, + "grad_norm": 0.5752312918582709, + "learning_rate": 4.507681619084178e-06, + "loss": 0.2673, + "step": 9696 + }, + { + "epoch": 0.4542558673349885, + "grad_norm": 0.6145632439275779, + "learning_rate": 4.5075686043619066e-06, + "loss": 0.2912, + "step": 9697 + }, + { + "epoch": 0.45430271232491687, + "grad_norm": 0.6259414862822944, + "learning_rate": 4.507455578086583e-06, + "loss": 0.2942, + "step": 9698 + }, + { + "epoch": 0.45434955731484516, + "grad_norm": 0.6321044073820306, + "learning_rate": 4.507342540258856e-06, + "loss": 0.3069, + "step": 9699 + }, + { + "epoch": 0.4543964023047735, + "grad_norm": 0.6013313764415522, + "learning_rate": 4.5072294908793784e-06, + "loss": 0.2778, + "step": 9700 + }, + { + "epoch": 0.4544432472947018, + "grad_norm": 0.5974319154937193, + "learning_rate": 4.507116429948799e-06, + "loss": 0.296, + "step": 9701 + }, + { + "epoch": 0.45449009228463016, + "grad_norm": 0.5551314266843687, + "learning_rate": 4.507003357467769e-06, + "loss": 0.2931, + "step": 9702 + }, + { + "epoch": 0.45453693727455846, + "grad_norm": 0.6215198357371209, + "learning_rate": 4.50689027343694e-06, + "loss": 0.3037, + "step": 9703 + }, + { + "epoch": 0.4545837822644868, + "grad_norm": 0.5562402793606644, + "learning_rate": 4.506777177856961e-06, + "loss": 0.2712, + "step": 9704 + }, + { + "epoch": 0.45463062725441516, + "grad_norm": 0.5996961351435004, + "learning_rate": 4.506664070728484e-06, + "loss": 0.2871, + "step": 9705 + }, + { + "epoch": 0.45467747224434346, + "grad_norm": 0.6454499559541117, + "learning_rate": 4.50655095205216e-06, + "loss": 0.3279, + "step": 9706 + }, + { + "epoch": 0.4547243172342718, + "grad_norm": 0.582237157666163, + "learning_rate": 4.506437821828638e-06, + "loss": 0.2823, + "step": 9707 + }, + { + "epoch": 0.4547711622242001, + "grad_norm": 0.5937744893643905, + "learning_rate": 4.5063246800585724e-06, + "loss": 0.2709, + "step": 9708 + }, + { + "epoch": 0.45481800721412846, + "grad_norm": 0.6419215894968824, + "learning_rate": 4.506211526742612e-06, + "loss": 0.2988, + "step": 9709 + }, + { + "epoch": 0.45486485220405676, + "grad_norm": 0.6147008317003402, + "learning_rate": 4.506098361881408e-06, + "loss": 0.2791, + "step": 9710 + }, + { + "epoch": 0.4549116971939851, + "grad_norm": 0.6354845601203691, + "learning_rate": 4.505985185475613e-06, + "loss": 0.2901, + "step": 9711 + }, + { + "epoch": 0.4549585421839134, + "grad_norm": 0.6500213518874287, + "learning_rate": 4.5058719975258765e-06, + "loss": 0.3157, + "step": 9712 + }, + { + "epoch": 0.45500538717384176, + "grad_norm": 0.6084279282524836, + "learning_rate": 4.505758798032852e-06, + "loss": 0.3041, + "step": 9713 + }, + { + "epoch": 0.4550522321637701, + "grad_norm": 0.6158721881325162, + "learning_rate": 4.505645586997189e-06, + "loss": 0.304, + "step": 9714 + }, + { + "epoch": 0.4550990771536984, + "grad_norm": 0.5838215494630022, + "learning_rate": 4.505532364419539e-06, + "loss": 0.2886, + "step": 9715 + }, + { + "epoch": 0.45514592214362676, + "grad_norm": 0.5781668960248624, + "learning_rate": 4.505419130300556e-06, + "loss": 0.2819, + "step": 9716 + }, + { + "epoch": 0.45519276713355505, + "grad_norm": 0.5850153133095763, + "learning_rate": 4.5053058846408885e-06, + "loss": 0.2923, + "step": 9717 + }, + { + "epoch": 0.4552396121234834, + "grad_norm": 0.5886803878314222, + "learning_rate": 4.505192627441191e-06, + "loss": 0.2863, + "step": 9718 + }, + { + "epoch": 0.4552864571134117, + "grad_norm": 0.638233643578569, + "learning_rate": 4.5050793587021125e-06, + "loss": 0.303, + "step": 9719 + }, + { + "epoch": 0.45533330210334005, + "grad_norm": 0.6366592506547499, + "learning_rate": 4.504966078424307e-06, + "loss": 0.2934, + "step": 9720 + }, + { + "epoch": 0.45538014709326835, + "grad_norm": 0.570647925083397, + "learning_rate": 4.504852786608426e-06, + "loss": 0.3, + "step": 9721 + }, + { + "epoch": 0.4554269920831967, + "grad_norm": 0.5611947309687542, + "learning_rate": 4.504739483255121e-06, + "loss": 0.2823, + "step": 9722 + }, + { + "epoch": 0.45547383707312505, + "grad_norm": 0.6065819016210895, + "learning_rate": 4.5046261683650425e-06, + "loss": 0.2933, + "step": 9723 + }, + { + "epoch": 0.45552068206305335, + "grad_norm": 0.6251311760303898, + "learning_rate": 4.504512841938846e-06, + "loss": 0.2901, + "step": 9724 + }, + { + "epoch": 0.4555675270529817, + "grad_norm": 0.630477938474995, + "learning_rate": 4.504399503977181e-06, + "loss": 0.2856, + "step": 9725 + }, + { + "epoch": 0.45561437204291, + "grad_norm": 0.5833989592152403, + "learning_rate": 4.504286154480701e-06, + "loss": 0.2791, + "step": 9726 + }, + { + "epoch": 0.45566121703283835, + "grad_norm": 0.5881425072817442, + "learning_rate": 4.504172793450059e-06, + "loss": 0.2743, + "step": 9727 + }, + { + "epoch": 0.45570806202276665, + "grad_norm": 0.6440246854200239, + "learning_rate": 4.504059420885905e-06, + "loss": 0.3249, + "step": 9728 + }, + { + "epoch": 0.455754907012695, + "grad_norm": 0.5793456291997447, + "learning_rate": 4.503946036788893e-06, + "loss": 0.2897, + "step": 9729 + }, + { + "epoch": 0.4558017520026233, + "grad_norm": 0.5683792420051393, + "learning_rate": 4.503832641159675e-06, + "loss": 0.2636, + "step": 9730 + }, + { + "epoch": 0.45584859699255165, + "grad_norm": 0.5888080717493961, + "learning_rate": 4.503719233998905e-06, + "loss": 0.2955, + "step": 9731 + }, + { + "epoch": 0.45589544198248, + "grad_norm": 0.6248830289363112, + "learning_rate": 4.503605815307232e-06, + "loss": 0.2982, + "step": 9732 + }, + { + "epoch": 0.4559422869724083, + "grad_norm": 0.6147092214626727, + "learning_rate": 4.503492385085313e-06, + "loss": 0.2932, + "step": 9733 + }, + { + "epoch": 0.45598913196233665, + "grad_norm": 0.6246079627727532, + "learning_rate": 4.503378943333799e-06, + "loss": 0.3182, + "step": 9734 + }, + { + "epoch": 0.45603597695226494, + "grad_norm": 0.5870078169383961, + "learning_rate": 4.5032654900533424e-06, + "loss": 0.2824, + "step": 9735 + }, + { + "epoch": 0.4560828219421933, + "grad_norm": 0.5526848413953626, + "learning_rate": 4.503152025244596e-06, + "loss": 0.2583, + "step": 9736 + }, + { + "epoch": 0.4561296669321216, + "grad_norm": 0.5764626214196487, + "learning_rate": 4.503038548908214e-06, + "loss": 0.2768, + "step": 9737 + }, + { + "epoch": 0.45617651192204994, + "grad_norm": 0.5992449034145204, + "learning_rate": 4.502925061044847e-06, + "loss": 0.2843, + "step": 9738 + }, + { + "epoch": 0.45622335691197824, + "grad_norm": 0.5640269214095478, + "learning_rate": 4.502811561655151e-06, + "loss": 0.2865, + "step": 9739 + }, + { + "epoch": 0.4562702019019066, + "grad_norm": 0.5950367749869959, + "learning_rate": 4.502698050739778e-06, + "loss": 0.3053, + "step": 9740 + }, + { + "epoch": 0.45631704689183494, + "grad_norm": 0.5778692750322821, + "learning_rate": 4.502584528299381e-06, + "loss": 0.2863, + "step": 9741 + }, + { + "epoch": 0.45636389188176324, + "grad_norm": 0.6478089932766207, + "learning_rate": 4.502470994334612e-06, + "loss": 0.3034, + "step": 9742 + }, + { + "epoch": 0.4564107368716916, + "grad_norm": 0.6317196352725545, + "learning_rate": 4.5023574488461276e-06, + "loss": 0.2783, + "step": 9743 + }, + { + "epoch": 0.4564575818616199, + "grad_norm": 0.633004998966678, + "learning_rate": 4.502243891834578e-06, + "loss": 0.2874, + "step": 9744 + }, + { + "epoch": 0.45650442685154824, + "grad_norm": 0.6088704330545817, + "learning_rate": 4.502130323300618e-06, + "loss": 0.27, + "step": 9745 + }, + { + "epoch": 0.45655127184147654, + "grad_norm": 0.5817735305896031, + "learning_rate": 4.502016743244902e-06, + "loss": 0.2829, + "step": 9746 + }, + { + "epoch": 0.4565981168314049, + "grad_norm": 0.5654661402002522, + "learning_rate": 4.501903151668083e-06, + "loss": 0.2717, + "step": 9747 + }, + { + "epoch": 0.4566449618213332, + "grad_norm": 0.6263667110371018, + "learning_rate": 4.501789548570814e-06, + "loss": 0.3009, + "step": 9748 + }, + { + "epoch": 0.45669180681126154, + "grad_norm": 0.5489609647847205, + "learning_rate": 4.50167593395375e-06, + "loss": 0.2826, + "step": 9749 + }, + { + "epoch": 0.4567386518011899, + "grad_norm": 0.6285235834054099, + "learning_rate": 4.501562307817543e-06, + "loss": 0.3044, + "step": 9750 + }, + { + "epoch": 0.4567854967911182, + "grad_norm": 0.6147345842925699, + "learning_rate": 4.5014486701628475e-06, + "loss": 0.2779, + "step": 9751 + }, + { + "epoch": 0.45683234178104654, + "grad_norm": 0.5940769124472767, + "learning_rate": 4.5013350209903196e-06, + "loss": 0.284, + "step": 9752 + }, + { + "epoch": 0.45687918677097483, + "grad_norm": 0.6324226729025392, + "learning_rate": 4.501221360300611e-06, + "loss": 0.3168, + "step": 9753 + }, + { + "epoch": 0.4569260317609032, + "grad_norm": 0.5636355180958712, + "learning_rate": 4.501107688094376e-06, + "loss": 0.2813, + "step": 9754 + }, + { + "epoch": 0.4569728767508315, + "grad_norm": 0.5618567921706209, + "learning_rate": 4.50099400437227e-06, + "loss": 0.278, + "step": 9755 + }, + { + "epoch": 0.45701972174075983, + "grad_norm": 0.670841088977289, + "learning_rate": 4.500880309134946e-06, + "loss": 0.2727, + "step": 9756 + }, + { + "epoch": 0.45706656673068813, + "grad_norm": 0.593423493350436, + "learning_rate": 4.500766602383059e-06, + "loss": 0.2982, + "step": 9757 + }, + { + "epoch": 0.4571134117206165, + "grad_norm": 0.5898685682025946, + "learning_rate": 4.500652884117263e-06, + "loss": 0.2899, + "step": 9758 + }, + { + "epoch": 0.45716025671054483, + "grad_norm": 0.5900276609453198, + "learning_rate": 4.5005391543382125e-06, + "loss": 0.2794, + "step": 9759 + }, + { + "epoch": 0.45720710170047313, + "grad_norm": 0.6105483318542234, + "learning_rate": 4.500425413046562e-06, + "loss": 0.2717, + "step": 9760 + }, + { + "epoch": 0.4572539466904015, + "grad_norm": 0.5854687474942891, + "learning_rate": 4.500311660242965e-06, + "loss": 0.29, + "step": 9761 + }, + { + "epoch": 0.4573007916803298, + "grad_norm": 0.5673469750712514, + "learning_rate": 4.500197895928078e-06, + "loss": 0.2701, + "step": 9762 + }, + { + "epoch": 0.45734763667025813, + "grad_norm": 0.5792956775229693, + "learning_rate": 4.500084120102556e-06, + "loss": 0.2737, + "step": 9763 + }, + { + "epoch": 0.4573944816601864, + "grad_norm": 0.5575277048816188, + "learning_rate": 4.499970332767051e-06, + "loss": 0.2811, + "step": 9764 + }, + { + "epoch": 0.4574413266501148, + "grad_norm": 0.6441688452704488, + "learning_rate": 4.49985653392222e-06, + "loss": 0.2987, + "step": 9765 + }, + { + "epoch": 0.4574881716400431, + "grad_norm": 0.7457885794696099, + "learning_rate": 4.499742723568718e-06, + "loss": 0.2952, + "step": 9766 + }, + { + "epoch": 0.4575350166299714, + "grad_norm": 0.6378301365466512, + "learning_rate": 4.499628901707198e-06, + "loss": 0.3124, + "step": 9767 + }, + { + "epoch": 0.4575818616198998, + "grad_norm": 0.5678324636498266, + "learning_rate": 4.499515068338316e-06, + "loss": 0.2855, + "step": 9768 + }, + { + "epoch": 0.4576287066098281, + "grad_norm": 0.6180320332799043, + "learning_rate": 4.4994012234627285e-06, + "loss": 0.2701, + "step": 9769 + }, + { + "epoch": 0.4576755515997564, + "grad_norm": 0.607761954855303, + "learning_rate": 4.499287367081089e-06, + "loss": 0.2896, + "step": 9770 + }, + { + "epoch": 0.4577223965896847, + "grad_norm": 0.6525646902031769, + "learning_rate": 4.4991734991940524e-06, + "loss": 0.2997, + "step": 9771 + }, + { + "epoch": 0.4577692415796131, + "grad_norm": 0.6193069116221311, + "learning_rate": 4.4990596198022766e-06, + "loss": 0.269, + "step": 9772 + }, + { + "epoch": 0.45781608656954137, + "grad_norm": 0.6403234876028518, + "learning_rate": 4.4989457289064134e-06, + "loss": 0.2853, + "step": 9773 + }, + { + "epoch": 0.4578629315594697, + "grad_norm": 0.5714910598331261, + "learning_rate": 4.49883182650712e-06, + "loss": 0.28, + "step": 9774 + }, + { + "epoch": 0.457909776549398, + "grad_norm": 0.6171548065164888, + "learning_rate": 4.498717912605054e-06, + "loss": 0.2887, + "step": 9775 + }, + { + "epoch": 0.45795662153932637, + "grad_norm": 0.6158616371266961, + "learning_rate": 4.498603987200867e-06, + "loss": 0.3024, + "step": 9776 + }, + { + "epoch": 0.4580034665292547, + "grad_norm": 0.6161338146446803, + "learning_rate": 4.498490050295217e-06, + "loss": 0.3099, + "step": 9777 + }, + { + "epoch": 0.458050311519183, + "grad_norm": 0.6080345842568802, + "learning_rate": 4.498376101888758e-06, + "loss": 0.2769, + "step": 9778 + }, + { + "epoch": 0.45809715650911137, + "grad_norm": 0.6659002202440111, + "learning_rate": 4.498262141982148e-06, + "loss": 0.3219, + "step": 9779 + }, + { + "epoch": 0.45814400149903967, + "grad_norm": 0.6262974922040315, + "learning_rate": 4.498148170576041e-06, + "loss": 0.2816, + "step": 9780 + }, + { + "epoch": 0.458190846488968, + "grad_norm": 0.6569596144526745, + "learning_rate": 4.498034187671094e-06, + "loss": 0.3047, + "step": 9781 + }, + { + "epoch": 0.4582376914788963, + "grad_norm": 0.5686382984493563, + "learning_rate": 4.497920193267962e-06, + "loss": 0.2717, + "step": 9782 + }, + { + "epoch": 0.45828453646882467, + "grad_norm": 0.5715685239345808, + "learning_rate": 4.4978061873673015e-06, + "loss": 0.273, + "step": 9783 + }, + { + "epoch": 0.45833138145875296, + "grad_norm": 0.6443151328322702, + "learning_rate": 4.497692169969768e-06, + "loss": 0.2987, + "step": 9784 + }, + { + "epoch": 0.4583782264486813, + "grad_norm": 0.638558500123935, + "learning_rate": 4.497578141076019e-06, + "loss": 0.2796, + "step": 9785 + }, + { + "epoch": 0.45842507143860967, + "grad_norm": 0.6261387775817862, + "learning_rate": 4.4974641006867094e-06, + "loss": 0.2924, + "step": 9786 + }, + { + "epoch": 0.45847191642853796, + "grad_norm": 0.6012786554207269, + "learning_rate": 4.497350048802496e-06, + "loss": 0.3015, + "step": 9787 + }, + { + "epoch": 0.4585187614184663, + "grad_norm": 0.5854509827300363, + "learning_rate": 4.497235985424035e-06, + "loss": 0.3036, + "step": 9788 + }, + { + "epoch": 0.4585656064083946, + "grad_norm": 0.6344826276599372, + "learning_rate": 4.497121910551984e-06, + "loss": 0.2833, + "step": 9789 + }, + { + "epoch": 0.45861245139832296, + "grad_norm": 0.5854872210981593, + "learning_rate": 4.497007824186998e-06, + "loss": 0.263, + "step": 9790 + }, + { + "epoch": 0.45865929638825126, + "grad_norm": 0.5743581486116123, + "learning_rate": 4.4968937263297326e-06, + "loss": 0.2836, + "step": 9791 + }, + { + "epoch": 0.4587061413781796, + "grad_norm": 0.6002594445889424, + "learning_rate": 4.496779616980847e-06, + "loss": 0.2952, + "step": 9792 + }, + { + "epoch": 0.4587529863681079, + "grad_norm": 0.6492711046382786, + "learning_rate": 4.496665496140995e-06, + "loss": 0.3213, + "step": 9793 + }, + { + "epoch": 0.45879983135803626, + "grad_norm": 0.6299640811556146, + "learning_rate": 4.496551363810836e-06, + "loss": 0.3082, + "step": 9794 + }, + { + "epoch": 0.4588466763479646, + "grad_norm": 0.6041846653882386, + "learning_rate": 4.496437219991026e-06, + "loss": 0.2731, + "step": 9795 + }, + { + "epoch": 0.4588935213378929, + "grad_norm": 0.6469468367689897, + "learning_rate": 4.496323064682221e-06, + "loss": 0.2892, + "step": 9796 + }, + { + "epoch": 0.45894036632782126, + "grad_norm": 0.6043928123431438, + "learning_rate": 4.496208897885079e-06, + "loss": 0.2877, + "step": 9797 + }, + { + "epoch": 0.45898721131774955, + "grad_norm": 0.7065594066995592, + "learning_rate": 4.496094719600257e-06, + "loss": 0.3029, + "step": 9798 + }, + { + "epoch": 0.4590340563076779, + "grad_norm": 0.6324317912407916, + "learning_rate": 4.49598052982841e-06, + "loss": 0.2965, + "step": 9799 + }, + { + "epoch": 0.4590809012976062, + "grad_norm": 0.6176730321541518, + "learning_rate": 4.4958663285701974e-06, + "loss": 0.3046, + "step": 9800 + }, + { + "epoch": 0.45912774628753455, + "grad_norm": 0.6135459605127606, + "learning_rate": 4.495752115826276e-06, + "loss": 0.2979, + "step": 9801 + }, + { + "epoch": 0.45917459127746285, + "grad_norm": 0.6154737338772284, + "learning_rate": 4.495637891597303e-06, + "loss": 0.2667, + "step": 9802 + }, + { + "epoch": 0.4592214362673912, + "grad_norm": 0.6289722002830685, + "learning_rate": 4.495523655883935e-06, + "loss": 0.3044, + "step": 9803 + }, + { + "epoch": 0.45926828125731955, + "grad_norm": 0.6596095422319229, + "learning_rate": 4.495409408686831e-06, + "loss": 0.2727, + "step": 9804 + }, + { + "epoch": 0.45931512624724785, + "grad_norm": 0.5793835476545777, + "learning_rate": 4.495295150006646e-06, + "loss": 0.2739, + "step": 9805 + }, + { + "epoch": 0.4593619712371762, + "grad_norm": 0.5736127080276042, + "learning_rate": 4.495180879844039e-06, + "loss": 0.2851, + "step": 9806 + }, + { + "epoch": 0.4594088162271045, + "grad_norm": 0.565877907417802, + "learning_rate": 4.495066598199669e-06, + "loss": 0.2574, + "step": 9807 + }, + { + "epoch": 0.45945566121703285, + "grad_norm": 0.6113181611979821, + "learning_rate": 4.49495230507419e-06, + "loss": 0.2705, + "step": 9808 + }, + { + "epoch": 0.45950250620696115, + "grad_norm": 0.571996851536526, + "learning_rate": 4.494838000468264e-06, + "loss": 0.2804, + "step": 9809 + }, + { + "epoch": 0.4595493511968895, + "grad_norm": 0.583608121024647, + "learning_rate": 4.494723684382546e-06, + "loss": 0.2766, + "step": 9810 + }, + { + "epoch": 0.4595961961868178, + "grad_norm": 0.7212707023123434, + "learning_rate": 4.494609356817693e-06, + "loss": 0.2826, + "step": 9811 + }, + { + "epoch": 0.45964304117674615, + "grad_norm": 0.6847405619728271, + "learning_rate": 4.494495017774366e-06, + "loss": 0.3224, + "step": 9812 + }, + { + "epoch": 0.4596898861666745, + "grad_norm": 0.6208705570888741, + "learning_rate": 4.494380667253222e-06, + "loss": 0.2761, + "step": 9813 + }, + { + "epoch": 0.4597367311566028, + "grad_norm": 0.6006759366128955, + "learning_rate": 4.4942663052549175e-06, + "loss": 0.2879, + "step": 9814 + }, + { + "epoch": 0.45978357614653115, + "grad_norm": 0.6266364793691499, + "learning_rate": 4.494151931780112e-06, + "loss": 0.2929, + "step": 9815 + }, + { + "epoch": 0.45983042113645944, + "grad_norm": 0.6953955179768375, + "learning_rate": 4.494037546829463e-06, + "loss": 0.2797, + "step": 9816 + }, + { + "epoch": 0.4598772661263878, + "grad_norm": 0.6061298094870524, + "learning_rate": 4.493923150403629e-06, + "loss": 0.2876, + "step": 9817 + }, + { + "epoch": 0.4599241111163161, + "grad_norm": 0.6051457662970197, + "learning_rate": 4.493808742503269e-06, + "loss": 0.2846, + "step": 9818 + }, + { + "epoch": 0.45997095610624444, + "grad_norm": 0.6578524169190091, + "learning_rate": 4.493694323129041e-06, + "loss": 0.3106, + "step": 9819 + }, + { + "epoch": 0.46001780109617274, + "grad_norm": 0.5964857805551301, + "learning_rate": 4.493579892281602e-06, + "loss": 0.2783, + "step": 9820 + }, + { + "epoch": 0.4600646460861011, + "grad_norm": 0.6204451068430223, + "learning_rate": 4.493465449961613e-06, + "loss": 0.2737, + "step": 9821 + }, + { + "epoch": 0.46011149107602944, + "grad_norm": 0.5728338754389233, + "learning_rate": 4.493350996169731e-06, + "loss": 0.2698, + "step": 9822 + }, + { + "epoch": 0.46015833606595774, + "grad_norm": 0.592676980757148, + "learning_rate": 4.493236530906615e-06, + "loss": 0.3015, + "step": 9823 + }, + { + "epoch": 0.4602051810558861, + "grad_norm": 0.6950872653838744, + "learning_rate": 4.4931220541729234e-06, + "loss": 0.2973, + "step": 9824 + }, + { + "epoch": 0.4602520260458144, + "grad_norm": 0.5979460860280904, + "learning_rate": 4.493007565969316e-06, + "loss": 0.2617, + "step": 9825 + }, + { + "epoch": 0.46029887103574274, + "grad_norm": 0.6287661005242443, + "learning_rate": 4.492893066296451e-06, + "loss": 0.3099, + "step": 9826 + }, + { + "epoch": 0.46034571602567104, + "grad_norm": 0.585732875597475, + "learning_rate": 4.4927785551549865e-06, + "loss": 0.2742, + "step": 9827 + }, + { + "epoch": 0.4603925610155994, + "grad_norm": 0.5865537755032347, + "learning_rate": 4.4926640325455826e-06, + "loss": 0.3046, + "step": 9828 + }, + { + "epoch": 0.4604394060055277, + "grad_norm": 0.5980470130321374, + "learning_rate": 4.492549498468899e-06, + "loss": 0.2753, + "step": 9829 + }, + { + "epoch": 0.46048625099545604, + "grad_norm": 0.6456242294060619, + "learning_rate": 4.492434952925593e-06, + "loss": 0.2919, + "step": 9830 + }, + { + "epoch": 0.4605330959853844, + "grad_norm": 0.6071500806972171, + "learning_rate": 4.492320395916324e-06, + "loss": 0.2865, + "step": 9831 + }, + { + "epoch": 0.4605799409753127, + "grad_norm": 0.6334246693404576, + "learning_rate": 4.492205827441753e-06, + "loss": 0.2862, + "step": 9832 + }, + { + "epoch": 0.46062678596524104, + "grad_norm": 0.524619532456226, + "learning_rate": 4.492091247502538e-06, + "loss": 0.2605, + "step": 9833 + }, + { + "epoch": 0.46067363095516933, + "grad_norm": 0.5876425810672105, + "learning_rate": 4.491976656099338e-06, + "loss": 0.2807, + "step": 9834 + }, + { + "epoch": 0.4607204759450977, + "grad_norm": 0.6054044653477488, + "learning_rate": 4.491862053232813e-06, + "loss": 0.2921, + "step": 9835 + }, + { + "epoch": 0.460767320935026, + "grad_norm": 0.5761982602219218, + "learning_rate": 4.491747438903623e-06, + "loss": 0.2858, + "step": 9836 + }, + { + "epoch": 0.46081416592495433, + "grad_norm": 0.5715389473595264, + "learning_rate": 4.491632813112427e-06, + "loss": 0.2833, + "step": 9837 + }, + { + "epoch": 0.46086101091488263, + "grad_norm": 0.6247632544782937, + "learning_rate": 4.491518175859885e-06, + "loss": 0.3034, + "step": 9838 + }, + { + "epoch": 0.460907855904811, + "grad_norm": 0.6322605934057435, + "learning_rate": 4.491403527146656e-06, + "loss": 0.2874, + "step": 9839 + }, + { + "epoch": 0.46095470089473933, + "grad_norm": 0.6548724895593366, + "learning_rate": 4.4912888669734e-06, + "loss": 0.3123, + "step": 9840 + }, + { + "epoch": 0.46100154588466763, + "grad_norm": 0.6257186244375859, + "learning_rate": 4.491174195340777e-06, + "loss": 0.2904, + "step": 9841 + }, + { + "epoch": 0.461048390874596, + "grad_norm": 0.6013871381623184, + "learning_rate": 4.491059512249448e-06, + "loss": 0.3016, + "step": 9842 + }, + { + "epoch": 0.4610952358645243, + "grad_norm": 0.6680127028579222, + "learning_rate": 4.490944817700071e-06, + "loss": 0.3082, + "step": 9843 + }, + { + "epoch": 0.46114208085445263, + "grad_norm": 0.5751425727427909, + "learning_rate": 4.490830111693307e-06, + "loss": 0.2898, + "step": 9844 + }, + { + "epoch": 0.4611889258443809, + "grad_norm": 0.5953150131741198, + "learning_rate": 4.490715394229817e-06, + "loss": 0.2853, + "step": 9845 + }, + { + "epoch": 0.4612357708343093, + "grad_norm": 0.5905734384823506, + "learning_rate": 4.49060066531026e-06, + "loss": 0.257, + "step": 9846 + }, + { + "epoch": 0.4612826158242376, + "grad_norm": 0.6188598307756374, + "learning_rate": 4.490485924935295e-06, + "loss": 0.2705, + "step": 9847 + }, + { + "epoch": 0.4613294608141659, + "grad_norm": 0.5693222154804748, + "learning_rate": 4.490371173105586e-06, + "loss": 0.2879, + "step": 9848 + }, + { + "epoch": 0.4613763058040943, + "grad_norm": 0.6556834124356301, + "learning_rate": 4.4902564098217894e-06, + "loss": 0.285, + "step": 9849 + }, + { + "epoch": 0.4614231507940226, + "grad_norm": 0.5897621709145767, + "learning_rate": 4.490141635084568e-06, + "loss": 0.2908, + "step": 9850 + }, + { + "epoch": 0.4614699957839509, + "grad_norm": 0.6428429265929588, + "learning_rate": 4.490026848894582e-06, + "loss": 0.2844, + "step": 9851 + }, + { + "epoch": 0.4615168407738792, + "grad_norm": 0.6395362792333221, + "learning_rate": 4.489912051252491e-06, + "loss": 0.2955, + "step": 9852 + }, + { + "epoch": 0.4615636857638076, + "grad_norm": 0.6047022087137364, + "learning_rate": 4.4897972421589565e-06, + "loss": 0.2775, + "step": 9853 + }, + { + "epoch": 0.46161053075373587, + "grad_norm": 0.6098190161474499, + "learning_rate": 4.489682421614639e-06, + "loss": 0.2771, + "step": 9854 + }, + { + "epoch": 0.4616573757436642, + "grad_norm": 0.5795043007376117, + "learning_rate": 4.4895675896201995e-06, + "loss": 0.2811, + "step": 9855 + }, + { + "epoch": 0.4617042207335925, + "grad_norm": 0.6172083920123657, + "learning_rate": 4.489452746176299e-06, + "loss": 0.3069, + "step": 9856 + }, + { + "epoch": 0.46175106572352087, + "grad_norm": 0.6423002486016549, + "learning_rate": 4.4893378912835975e-06, + "loss": 0.3112, + "step": 9857 + }, + { + "epoch": 0.4617979107134492, + "grad_norm": 0.5937654531241355, + "learning_rate": 4.489223024942756e-06, + "loss": 0.2748, + "step": 9858 + }, + { + "epoch": 0.4618447557033775, + "grad_norm": 0.5570756767082712, + "learning_rate": 4.489108147154436e-06, + "loss": 0.2568, + "step": 9859 + }, + { + "epoch": 0.46189160069330587, + "grad_norm": 0.5730296808774741, + "learning_rate": 4.488993257919299e-06, + "loss": 0.2998, + "step": 9860 + }, + { + "epoch": 0.46193844568323417, + "grad_norm": 0.5417140539644731, + "learning_rate": 4.488878357238007e-06, + "loss": 0.2906, + "step": 9861 + }, + { + "epoch": 0.4619852906731625, + "grad_norm": 0.604078774240237, + "learning_rate": 4.4887634451112175e-06, + "loss": 0.2902, + "step": 9862 + }, + { + "epoch": 0.4620321356630908, + "grad_norm": 0.5754278475429199, + "learning_rate": 4.488648521539596e-06, + "loss": 0.2809, + "step": 9863 + }, + { + "epoch": 0.46207898065301917, + "grad_norm": 0.607149174107249, + "learning_rate": 4.488533586523801e-06, + "loss": 0.2698, + "step": 9864 + }, + { + "epoch": 0.46212582564294746, + "grad_norm": 0.6113965074432147, + "learning_rate": 4.4884186400644956e-06, + "loss": 0.2907, + "step": 9865 + }, + { + "epoch": 0.4621726706328758, + "grad_norm": 0.5844703412143262, + "learning_rate": 4.48830368216234e-06, + "loss": 0.2933, + "step": 9866 + }, + { + "epoch": 0.46221951562280417, + "grad_norm": 0.6030879951851275, + "learning_rate": 4.488188712817998e-06, + "loss": 0.2883, + "step": 9867 + }, + { + "epoch": 0.46226636061273246, + "grad_norm": 0.5951789948985251, + "learning_rate": 4.488073732032128e-06, + "loss": 0.2808, + "step": 9868 + }, + { + "epoch": 0.4623132056026608, + "grad_norm": 0.6236760811380746, + "learning_rate": 4.487958739805394e-06, + "loss": 0.2983, + "step": 9869 + }, + { + "epoch": 0.4623600505925891, + "grad_norm": 0.5740583836605945, + "learning_rate": 4.487843736138458e-06, + "loss": 0.2797, + "step": 9870 + }, + { + "epoch": 0.46240689558251746, + "grad_norm": 0.5569117737211525, + "learning_rate": 4.4877287210319794e-06, + "loss": 0.2822, + "step": 9871 + }, + { + "epoch": 0.46245374057244576, + "grad_norm": 0.5929612416954891, + "learning_rate": 4.487613694486622e-06, + "loss": 0.2921, + "step": 9872 + }, + { + "epoch": 0.4625005855623741, + "grad_norm": 0.5976562386914761, + "learning_rate": 4.487498656503048e-06, + "loss": 0.2874, + "step": 9873 + }, + { + "epoch": 0.4625474305523024, + "grad_norm": 0.5866991080827422, + "learning_rate": 4.487383607081917e-06, + "loss": 0.2623, + "step": 9874 + }, + { + "epoch": 0.46259427554223076, + "grad_norm": 0.5447183784168168, + "learning_rate": 4.487268546223895e-06, + "loss": 0.2734, + "step": 9875 + }, + { + "epoch": 0.4626411205321591, + "grad_norm": 0.5774853772236911, + "learning_rate": 4.487153473929642e-06, + "loss": 0.2707, + "step": 9876 + }, + { + "epoch": 0.4626879655220874, + "grad_norm": 0.5974487942106527, + "learning_rate": 4.487038390199819e-06, + "loss": 0.2782, + "step": 9877 + }, + { + "epoch": 0.46273481051201576, + "grad_norm": 0.6610155457679212, + "learning_rate": 4.48692329503509e-06, + "loss": 0.2837, + "step": 9878 + }, + { + "epoch": 0.46278165550194406, + "grad_norm": 0.6209366271352756, + "learning_rate": 4.4868081884361165e-06, + "loss": 0.279, + "step": 9879 + }, + { + "epoch": 0.4628285004918724, + "grad_norm": 0.5865760907708643, + "learning_rate": 4.4866930704035615e-06, + "loss": 0.2623, + "step": 9880 + }, + { + "epoch": 0.4628753454818007, + "grad_norm": 0.5659345254931112, + "learning_rate": 4.486577940938087e-06, + "loss": 0.2707, + "step": 9881 + }, + { + "epoch": 0.46292219047172906, + "grad_norm": 0.6090832472999586, + "learning_rate": 4.486462800040357e-06, + "loss": 0.2884, + "step": 9882 + }, + { + "epoch": 0.46296903546165735, + "grad_norm": 0.6019878361421664, + "learning_rate": 4.486347647711031e-06, + "loss": 0.281, + "step": 9883 + }, + { + "epoch": 0.4630158804515857, + "grad_norm": 0.5721516564684038, + "learning_rate": 4.486232483950774e-06, + "loss": 0.2668, + "step": 9884 + }, + { + "epoch": 0.46306272544151406, + "grad_norm": 0.5843997503757148, + "learning_rate": 4.486117308760249e-06, + "loss": 0.3165, + "step": 9885 + }, + { + "epoch": 0.46310957043144235, + "grad_norm": 0.5597122319313441, + "learning_rate": 4.486002122140118e-06, + "loss": 0.2849, + "step": 9886 + }, + { + "epoch": 0.4631564154213707, + "grad_norm": 0.6683888526368346, + "learning_rate": 4.485886924091043e-06, + "loss": 0.2957, + "step": 9887 + }, + { + "epoch": 0.463203260411299, + "grad_norm": 0.7040337934438687, + "learning_rate": 4.485771714613689e-06, + "loss": 0.3194, + "step": 9888 + }, + { + "epoch": 0.46325010540122735, + "grad_norm": 0.6024291169801356, + "learning_rate": 4.485656493708717e-06, + "loss": 0.3106, + "step": 9889 + }, + { + "epoch": 0.46329695039115565, + "grad_norm": 0.6440313094402347, + "learning_rate": 4.485541261376791e-06, + "loss": 0.3044, + "step": 9890 + }, + { + "epoch": 0.463343795381084, + "grad_norm": 0.5502293701278557, + "learning_rate": 4.485426017618575e-06, + "loss": 0.2732, + "step": 9891 + }, + { + "epoch": 0.4633906403710123, + "grad_norm": 0.5637653196692205, + "learning_rate": 4.485310762434731e-06, + "loss": 0.2812, + "step": 9892 + }, + { + "epoch": 0.46343748536094065, + "grad_norm": 0.5821793112226178, + "learning_rate": 4.485195495825922e-06, + "loss": 0.2884, + "step": 9893 + }, + { + "epoch": 0.463484330350869, + "grad_norm": 0.6478312482929383, + "learning_rate": 4.485080217792812e-06, + "loss": 0.2967, + "step": 9894 + }, + { + "epoch": 0.4635311753407973, + "grad_norm": 0.5671855594843432, + "learning_rate": 4.484964928336065e-06, + "loss": 0.278, + "step": 9895 + }, + { + "epoch": 0.46357802033072565, + "grad_norm": 0.5903941770684059, + "learning_rate": 4.484849627456343e-06, + "loss": 0.2828, + "step": 9896 + }, + { + "epoch": 0.46362486532065394, + "grad_norm": 0.5453191272178507, + "learning_rate": 4.4847343151543104e-06, + "loss": 0.2856, + "step": 9897 + }, + { + "epoch": 0.4636717103105823, + "grad_norm": 0.5551696136717379, + "learning_rate": 4.48461899143063e-06, + "loss": 0.2886, + "step": 9898 + }, + { + "epoch": 0.4637185553005106, + "grad_norm": 0.5557990184005301, + "learning_rate": 4.484503656285968e-06, + "loss": 0.2439, + "step": 9899 + }, + { + "epoch": 0.46376540029043894, + "grad_norm": 0.5954822371545339, + "learning_rate": 4.484388309720985e-06, + "loss": 0.2939, + "step": 9900 + }, + { + "epoch": 0.46381224528036724, + "grad_norm": 0.5272778882409271, + "learning_rate": 4.484272951736346e-06, + "loss": 0.2701, + "step": 9901 + }, + { + "epoch": 0.4638590902702956, + "grad_norm": 0.6526217404838364, + "learning_rate": 4.484157582332715e-06, + "loss": 0.3252, + "step": 9902 + }, + { + "epoch": 0.46390593526022394, + "grad_norm": 0.5840830981309135, + "learning_rate": 4.484042201510756e-06, + "loss": 0.2904, + "step": 9903 + }, + { + "epoch": 0.46395278025015224, + "grad_norm": 0.6278704494668911, + "learning_rate": 4.4839268092711316e-06, + "loss": 0.2979, + "step": 9904 + }, + { + "epoch": 0.4639996252400806, + "grad_norm": 0.563359686571619, + "learning_rate": 4.483811405614509e-06, + "loss": 0.2821, + "step": 9905 + }, + { + "epoch": 0.4640464702300089, + "grad_norm": 0.613445864304411, + "learning_rate": 4.48369599054155e-06, + "loss": 0.3101, + "step": 9906 + }, + { + "epoch": 0.46409331521993724, + "grad_norm": 0.5785818480238716, + "learning_rate": 4.4835805640529175e-06, + "loss": 0.2871, + "step": 9907 + }, + { + "epoch": 0.46414016020986554, + "grad_norm": 0.5588448514065941, + "learning_rate": 4.483465126149279e-06, + "loss": 0.2603, + "step": 9908 + }, + { + "epoch": 0.4641870051997939, + "grad_norm": 0.5520742364967758, + "learning_rate": 4.483349676831297e-06, + "loss": 0.2794, + "step": 9909 + }, + { + "epoch": 0.4642338501897222, + "grad_norm": 0.5946483307757331, + "learning_rate": 4.483234216099636e-06, + "loss": 0.304, + "step": 9910 + }, + { + "epoch": 0.46428069517965054, + "grad_norm": 0.5796686675133059, + "learning_rate": 4.4831187439549604e-06, + "loss": 0.3068, + "step": 9911 + }, + { + "epoch": 0.4643275401695789, + "grad_norm": 0.6301484857477158, + "learning_rate": 4.483003260397935e-06, + "loss": 0.2827, + "step": 9912 + }, + { + "epoch": 0.4643743851595072, + "grad_norm": 0.6568333508345626, + "learning_rate": 4.482887765429223e-06, + "loss": 0.2898, + "step": 9913 + }, + { + "epoch": 0.46442123014943554, + "grad_norm": 0.5940084889634127, + "learning_rate": 4.482772259049492e-06, + "loss": 0.2774, + "step": 9914 + }, + { + "epoch": 0.46446807513936383, + "grad_norm": 0.6551568536106269, + "learning_rate": 4.482656741259405e-06, + "loss": 0.2785, + "step": 9915 + }, + { + "epoch": 0.4645149201292922, + "grad_norm": 0.605771700818105, + "learning_rate": 4.482541212059626e-06, + "loss": 0.2847, + "step": 9916 + }, + { + "epoch": 0.4645617651192205, + "grad_norm": 0.5701596261633678, + "learning_rate": 4.482425671450821e-06, + "loss": 0.2951, + "step": 9917 + }, + { + "epoch": 0.46460861010914883, + "grad_norm": 0.6194457517347679, + "learning_rate": 4.482310119433654e-06, + "loss": 0.3006, + "step": 9918 + }, + { + "epoch": 0.46465545509907713, + "grad_norm": 0.6145080503146569, + "learning_rate": 4.482194556008791e-06, + "loss": 0.307, + "step": 9919 + }, + { + "epoch": 0.4647023000890055, + "grad_norm": 0.5621852429345444, + "learning_rate": 4.482078981176896e-06, + "loss": 0.2797, + "step": 9920 + }, + { + "epoch": 0.46474914507893383, + "grad_norm": 0.5979684564936323, + "learning_rate": 4.481963394938636e-06, + "loss": 0.3014, + "step": 9921 + }, + { + "epoch": 0.46479599006886213, + "grad_norm": 0.621893583116383, + "learning_rate": 4.481847797294673e-06, + "loss": 0.2989, + "step": 9922 + }, + { + "epoch": 0.4648428350587905, + "grad_norm": 0.5770340378389381, + "learning_rate": 4.481732188245675e-06, + "loss": 0.2717, + "step": 9923 + }, + { + "epoch": 0.4648896800487188, + "grad_norm": 0.6487570577371196, + "learning_rate": 4.481616567792306e-06, + "loss": 0.3113, + "step": 9924 + }, + { + "epoch": 0.46493652503864713, + "grad_norm": 0.5964423300559654, + "learning_rate": 4.481500935935232e-06, + "loss": 0.2883, + "step": 9925 + }, + { + "epoch": 0.4649833700285754, + "grad_norm": 0.6142144326025804, + "learning_rate": 4.481385292675118e-06, + "loss": 0.3002, + "step": 9926 + }, + { + "epoch": 0.4650302150185038, + "grad_norm": 0.5889231136878037, + "learning_rate": 4.48126963801263e-06, + "loss": 0.2786, + "step": 9927 + }, + { + "epoch": 0.4650770600084321, + "grad_norm": 0.5634614977979021, + "learning_rate": 4.4811539719484325e-06, + "loss": 0.2775, + "step": 9928 + }, + { + "epoch": 0.4651239049983604, + "grad_norm": 0.5874700326730417, + "learning_rate": 4.481038294483192e-06, + "loss": 0.2955, + "step": 9929 + }, + { + "epoch": 0.4651707499882888, + "grad_norm": 0.6219730210238898, + "learning_rate": 4.480922605617575e-06, + "loss": 0.3096, + "step": 9930 + }, + { + "epoch": 0.4652175949782171, + "grad_norm": 0.5949268885455244, + "learning_rate": 4.480806905352245e-06, + "loss": 0.2976, + "step": 9931 + }, + { + "epoch": 0.4652644399681454, + "grad_norm": 0.6242972555190136, + "learning_rate": 4.48069119368787e-06, + "loss": 0.2813, + "step": 9932 + }, + { + "epoch": 0.4653112849580737, + "grad_norm": 0.5517608868510971, + "learning_rate": 4.480575470625115e-06, + "loss": 0.2862, + "step": 9933 + }, + { + "epoch": 0.4653581299480021, + "grad_norm": 0.618543148702215, + "learning_rate": 4.480459736164645e-06, + "loss": 0.312, + "step": 9934 + }, + { + "epoch": 0.46540497493793037, + "grad_norm": 0.6435864932672064, + "learning_rate": 4.480343990307128e-06, + "loss": 0.3008, + "step": 9935 + }, + { + "epoch": 0.4654518199278587, + "grad_norm": 0.623357047332773, + "learning_rate": 4.4802282330532285e-06, + "loss": 0.2988, + "step": 9936 + }, + { + "epoch": 0.465498664917787, + "grad_norm": 0.642656500317996, + "learning_rate": 4.480112464403614e-06, + "loss": 0.3089, + "step": 9937 + }, + { + "epoch": 0.46554550990771537, + "grad_norm": 0.6837285585628486, + "learning_rate": 4.479996684358949e-06, + "loss": 0.3027, + "step": 9938 + }, + { + "epoch": 0.4655923548976437, + "grad_norm": 0.6003282039661413, + "learning_rate": 4.479880892919901e-06, + "loss": 0.2753, + "step": 9939 + }, + { + "epoch": 0.465639199887572, + "grad_norm": 0.5797828744142962, + "learning_rate": 4.479765090087136e-06, + "loss": 0.2783, + "step": 9940 + }, + { + "epoch": 0.46568604487750037, + "grad_norm": 0.6554920287176954, + "learning_rate": 4.479649275861321e-06, + "loss": 0.2944, + "step": 9941 + }, + { + "epoch": 0.46573288986742867, + "grad_norm": 0.5788068584031911, + "learning_rate": 4.479533450243122e-06, + "loss": 0.299, + "step": 9942 + }, + { + "epoch": 0.465779734857357, + "grad_norm": 0.5928714845931936, + "learning_rate": 4.479417613233205e-06, + "loss": 0.2738, + "step": 9943 + }, + { + "epoch": 0.4658265798472853, + "grad_norm": 0.5717916815953822, + "learning_rate": 4.4793017648322375e-06, + "loss": 0.2891, + "step": 9944 + }, + { + "epoch": 0.46587342483721367, + "grad_norm": 0.6086051104909284, + "learning_rate": 4.479185905040886e-06, + "loss": 0.2908, + "step": 9945 + }, + { + "epoch": 0.46592026982714196, + "grad_norm": 0.6594738680821062, + "learning_rate": 4.479070033859817e-06, + "loss": 0.3152, + "step": 9946 + }, + { + "epoch": 0.4659671148170703, + "grad_norm": 0.6433991764091077, + "learning_rate": 4.478954151289697e-06, + "loss": 0.3196, + "step": 9947 + }, + { + "epoch": 0.46601395980699867, + "grad_norm": 0.5975260762902878, + "learning_rate": 4.478838257331193e-06, + "loss": 0.294, + "step": 9948 + }, + { + "epoch": 0.46606080479692696, + "grad_norm": 0.7354351970075191, + "learning_rate": 4.478722351984973e-06, + "loss": 0.2961, + "step": 9949 + }, + { + "epoch": 0.4661076497868553, + "grad_norm": 0.5595403749960216, + "learning_rate": 4.478606435251702e-06, + "loss": 0.2719, + "step": 9950 + }, + { + "epoch": 0.4661544947767836, + "grad_norm": 0.6109100494081812, + "learning_rate": 4.47849050713205e-06, + "loss": 0.2894, + "step": 9951 + }, + { + "epoch": 0.46620133976671196, + "grad_norm": 0.6495901915377411, + "learning_rate": 4.478374567626681e-06, + "loss": 0.3115, + "step": 9952 + }, + { + "epoch": 0.46624818475664026, + "grad_norm": 0.5958137260837356, + "learning_rate": 4.478258616736264e-06, + "loss": 0.2891, + "step": 9953 + }, + { + "epoch": 0.4662950297465686, + "grad_norm": 0.6066321875720345, + "learning_rate": 4.4781426544614656e-06, + "loss": 0.2861, + "step": 9954 + }, + { + "epoch": 0.4663418747364969, + "grad_norm": 0.5938779095627695, + "learning_rate": 4.4780266808029546e-06, + "loss": 0.2911, + "step": 9955 + }, + { + "epoch": 0.46638871972642526, + "grad_norm": 0.5986618123242292, + "learning_rate": 4.477910695761396e-06, + "loss": 0.2821, + "step": 9956 + }, + { + "epoch": 0.4664355647163536, + "grad_norm": 0.6594455520776724, + "learning_rate": 4.477794699337459e-06, + "loss": 0.2714, + "step": 9957 + }, + { + "epoch": 0.4664824097062819, + "grad_norm": 0.6088922662612455, + "learning_rate": 4.477678691531811e-06, + "loss": 0.3018, + "step": 9958 + }, + { + "epoch": 0.46652925469621026, + "grad_norm": 0.6513986287112926, + "learning_rate": 4.477562672345118e-06, + "loss": 0.294, + "step": 9959 + }, + { + "epoch": 0.46657609968613856, + "grad_norm": 0.556470606483092, + "learning_rate": 4.4774466417780495e-06, + "loss": 0.2792, + "step": 9960 + }, + { + "epoch": 0.4666229446760669, + "grad_norm": 0.5549774440900717, + "learning_rate": 4.477330599831273e-06, + "loss": 0.2886, + "step": 9961 + }, + { + "epoch": 0.4666697896659952, + "grad_norm": 0.5900212401815718, + "learning_rate": 4.477214546505455e-06, + "loss": 0.2847, + "step": 9962 + }, + { + "epoch": 0.46671663465592356, + "grad_norm": 0.6174451653876495, + "learning_rate": 4.477098481801265e-06, + "loss": 0.2895, + "step": 9963 + }, + { + "epoch": 0.46676347964585185, + "grad_norm": 0.5962493198243363, + "learning_rate": 4.47698240571937e-06, + "loss": 0.2806, + "step": 9964 + }, + { + "epoch": 0.4668103246357802, + "grad_norm": 0.5925605099024367, + "learning_rate": 4.4768663182604375e-06, + "loss": 0.2899, + "step": 9965 + }, + { + "epoch": 0.46685716962570856, + "grad_norm": 0.5971769343405965, + "learning_rate": 4.476750219425137e-06, + "loss": 0.2815, + "step": 9966 + }, + { + "epoch": 0.46690401461563685, + "grad_norm": 0.5866138699787343, + "learning_rate": 4.4766341092141354e-06, + "loss": 0.2828, + "step": 9967 + }, + { + "epoch": 0.4669508596055652, + "grad_norm": 0.5901096266191356, + "learning_rate": 4.4765179876281016e-06, + "loss": 0.2916, + "step": 9968 + }, + { + "epoch": 0.4669977045954935, + "grad_norm": 0.5973894140311794, + "learning_rate": 4.4764018546677036e-06, + "loss": 0.298, + "step": 9969 + }, + { + "epoch": 0.46704454958542185, + "grad_norm": 0.557508620327156, + "learning_rate": 4.47628571033361e-06, + "loss": 0.2719, + "step": 9970 + }, + { + "epoch": 0.46709139457535015, + "grad_norm": 0.5877398604093397, + "learning_rate": 4.476169554626488e-06, + "loss": 0.2763, + "step": 9971 + }, + { + "epoch": 0.4671382395652785, + "grad_norm": 0.5983454786701395, + "learning_rate": 4.476053387547007e-06, + "loss": 0.2842, + "step": 9972 + }, + { + "epoch": 0.4671850845552068, + "grad_norm": 0.6358488318081298, + "learning_rate": 4.475937209095836e-06, + "loss": 0.2969, + "step": 9973 + }, + { + "epoch": 0.46723192954513515, + "grad_norm": 0.6086526978864839, + "learning_rate": 4.475821019273643e-06, + "loss": 0.2799, + "step": 9974 + }, + { + "epoch": 0.4672787745350635, + "grad_norm": 0.5708230201770427, + "learning_rate": 4.4757048180810955e-06, + "loss": 0.2894, + "step": 9975 + }, + { + "epoch": 0.4673256195249918, + "grad_norm": 0.5768486880024495, + "learning_rate": 4.4755886055188645e-06, + "loss": 0.2871, + "step": 9976 + }, + { + "epoch": 0.46737246451492015, + "grad_norm": 0.5803344727328237, + "learning_rate": 4.4754723815876175e-06, + "loss": 0.281, + "step": 9977 + }, + { + "epoch": 0.46741930950484845, + "grad_norm": 0.5795793509319618, + "learning_rate": 4.475356146288024e-06, + "loss": 0.2818, + "step": 9978 + }, + { + "epoch": 0.4674661544947768, + "grad_norm": 0.5714874030619884, + "learning_rate": 4.4752398996207514e-06, + "loss": 0.2739, + "step": 9979 + }, + { + "epoch": 0.4675129994847051, + "grad_norm": 0.6783180246742124, + "learning_rate": 4.47512364158647e-06, + "loss": 0.2817, + "step": 9980 + }, + { + "epoch": 0.46755984447463345, + "grad_norm": 0.6175294578456527, + "learning_rate": 4.475007372185848e-06, + "loss": 0.3002, + "step": 9981 + }, + { + "epoch": 0.46760668946456174, + "grad_norm": 0.5580996342035534, + "learning_rate": 4.474891091419555e-06, + "loss": 0.2811, + "step": 9982 + }, + { + "epoch": 0.4676535344544901, + "grad_norm": 0.5466993580977497, + "learning_rate": 4.47477479928826e-06, + "loss": 0.282, + "step": 9983 + }, + { + "epoch": 0.46770037944441845, + "grad_norm": 0.5595826045647448, + "learning_rate": 4.474658495792633e-06, + "loss": 0.2919, + "step": 9984 + }, + { + "epoch": 0.46774722443434674, + "grad_norm": 0.6281707267165335, + "learning_rate": 4.4745421809333424e-06, + "loss": 0.2972, + "step": 9985 + }, + { + "epoch": 0.4677940694242751, + "grad_norm": 0.5319419605141187, + "learning_rate": 4.474425854711059e-06, + "loss": 0.2731, + "step": 9986 + }, + { + "epoch": 0.4678409144142034, + "grad_norm": 0.6421395829996586, + "learning_rate": 4.4743095171264495e-06, + "loss": 0.3181, + "step": 9987 + }, + { + "epoch": 0.46788775940413174, + "grad_norm": 0.6025293405502004, + "learning_rate": 4.474193168180185e-06, + "loss": 0.3043, + "step": 9988 + }, + { + "epoch": 0.46793460439406004, + "grad_norm": 0.5683719681805611, + "learning_rate": 4.4740768078729355e-06, + "loss": 0.2733, + "step": 9989 + }, + { + "epoch": 0.4679814493839884, + "grad_norm": 0.6271308933149244, + "learning_rate": 4.47396043620537e-06, + "loss": 0.2864, + "step": 9990 + }, + { + "epoch": 0.4680282943739167, + "grad_norm": 0.6333864849972753, + "learning_rate": 4.473844053178159e-06, + "loss": 0.3041, + "step": 9991 + }, + { + "epoch": 0.46807513936384504, + "grad_norm": 0.6405186142940908, + "learning_rate": 4.473727658791971e-06, + "loss": 0.2922, + "step": 9992 + }, + { + "epoch": 0.4681219843537734, + "grad_norm": 0.5764088789252599, + "learning_rate": 4.473611253047476e-06, + "loss": 0.2834, + "step": 9993 + }, + { + "epoch": 0.4681688293437017, + "grad_norm": 0.6247354413589774, + "learning_rate": 4.473494835945344e-06, + "loss": 0.3048, + "step": 9994 + }, + { + "epoch": 0.46821567433363004, + "grad_norm": 0.6053594971123629, + "learning_rate": 4.473378407486246e-06, + "loss": 0.3127, + "step": 9995 + }, + { + "epoch": 0.46826251932355833, + "grad_norm": 0.581019890805408, + "learning_rate": 4.4732619676708524e-06, + "loss": 0.2807, + "step": 9996 + }, + { + "epoch": 0.4683093643134867, + "grad_norm": 0.6350234217488573, + "learning_rate": 4.473145516499831e-06, + "loss": 0.2948, + "step": 9997 + }, + { + "epoch": 0.468356209303415, + "grad_norm": 0.6139063876994338, + "learning_rate": 4.473029053973852e-06, + "loss": 0.3166, + "step": 9998 + }, + { + "epoch": 0.46840305429334333, + "grad_norm": 0.5925668876586535, + "learning_rate": 4.472912580093588e-06, + "loss": 0.306, + "step": 9999 + }, + { + "epoch": 0.46844989928327163, + "grad_norm": 0.5754499459374639, + "learning_rate": 4.472796094859707e-06, + "loss": 0.2756, + "step": 10000 + }, + { + "epoch": 0.4684967442732, + "grad_norm": 0.5747768450887732, + "learning_rate": 4.472679598272881e-06, + "loss": 0.2781, + "step": 10001 + }, + { + "epoch": 0.46854358926312833, + "grad_norm": 0.5576488230624447, + "learning_rate": 4.47256309033378e-06, + "loss": 0.2806, + "step": 10002 + }, + { + "epoch": 0.46859043425305663, + "grad_norm": 0.576306796437993, + "learning_rate": 4.472446571043074e-06, + "loss": 0.2826, + "step": 10003 + }, + { + "epoch": 0.468637279242985, + "grad_norm": 0.5605668955642369, + "learning_rate": 4.4723300404014335e-06, + "loss": 0.2816, + "step": 10004 + }, + { + "epoch": 0.4686841242329133, + "grad_norm": 0.6227659148485529, + "learning_rate": 4.47221349840953e-06, + "loss": 0.2822, + "step": 10005 + }, + { + "epoch": 0.46873096922284163, + "grad_norm": 0.6684889196054119, + "learning_rate": 4.472096945068033e-06, + "loss": 0.2936, + "step": 10006 + }, + { + "epoch": 0.4687778142127699, + "grad_norm": 0.6035797310498127, + "learning_rate": 4.471980380377613e-06, + "loss": 0.2717, + "step": 10007 + }, + { + "epoch": 0.4688246592026983, + "grad_norm": 0.5960278858586311, + "learning_rate": 4.471863804338943e-06, + "loss": 0.2899, + "step": 10008 + }, + { + "epoch": 0.4688715041926266, + "grad_norm": 0.5794547012564639, + "learning_rate": 4.471747216952692e-06, + "loss": 0.3081, + "step": 10009 + }, + { + "epoch": 0.4689183491825549, + "grad_norm": 0.5260952879294899, + "learning_rate": 4.471630618219531e-06, + "loss": 0.2585, + "step": 10010 + }, + { + "epoch": 0.4689651941724833, + "grad_norm": 0.6005142509605415, + "learning_rate": 4.471514008140131e-06, + "loss": 0.2896, + "step": 10011 + }, + { + "epoch": 0.4690120391624116, + "grad_norm": 0.605382241402844, + "learning_rate": 4.471397386715164e-06, + "loss": 0.3044, + "step": 10012 + }, + { + "epoch": 0.4690588841523399, + "grad_norm": 0.5783600614038443, + "learning_rate": 4.4712807539453004e-06, + "loss": 0.2862, + "step": 10013 + }, + { + "epoch": 0.4691057291422682, + "grad_norm": 0.6767369555932453, + "learning_rate": 4.471164109831211e-06, + "loss": 0.2847, + "step": 10014 + }, + { + "epoch": 0.4691525741321966, + "grad_norm": 0.6878758384366906, + "learning_rate": 4.471047454373568e-06, + "loss": 0.2821, + "step": 10015 + }, + { + "epoch": 0.46919941912212487, + "grad_norm": 0.5758281550098973, + "learning_rate": 4.470930787573042e-06, + "loss": 0.2785, + "step": 10016 + }, + { + "epoch": 0.4692462641120532, + "grad_norm": 0.5681411516598498, + "learning_rate": 4.470814109430306e-06, + "loss": 0.2856, + "step": 10017 + }, + { + "epoch": 0.4692931091019815, + "grad_norm": 0.5709379516403466, + "learning_rate": 4.4706974199460285e-06, + "loss": 0.2771, + "step": 10018 + }, + { + "epoch": 0.4693399540919099, + "grad_norm": 0.6496513656123256, + "learning_rate": 4.4705807191208835e-06, + "loss": 0.291, + "step": 10019 + }, + { + "epoch": 0.4693867990818382, + "grad_norm": 0.6646576134492549, + "learning_rate": 4.470464006955542e-06, + "loss": 0.2936, + "step": 10020 + }, + { + "epoch": 0.4694336440717665, + "grad_norm": 0.6146258920120939, + "learning_rate": 4.470347283450675e-06, + "loss": 0.2968, + "step": 10021 + }, + { + "epoch": 0.4694804890616949, + "grad_norm": 0.6123120931707648, + "learning_rate": 4.4702305486069544e-06, + "loss": 0.2911, + "step": 10022 + }, + { + "epoch": 0.46952733405162317, + "grad_norm": 0.6031816880156043, + "learning_rate": 4.470113802425053e-06, + "loss": 0.2781, + "step": 10023 + }, + { + "epoch": 0.4695741790415515, + "grad_norm": 0.5951742668700652, + "learning_rate": 4.46999704490564e-06, + "loss": 0.2786, + "step": 10024 + }, + { + "epoch": 0.4696210240314798, + "grad_norm": 0.5897531427377855, + "learning_rate": 4.469880276049391e-06, + "loss": 0.284, + "step": 10025 + }, + { + "epoch": 0.46966786902140817, + "grad_norm": 0.6109755921773926, + "learning_rate": 4.469763495856976e-06, + "loss": 0.2833, + "step": 10026 + }, + { + "epoch": 0.46971471401133646, + "grad_norm": 0.6200351801473512, + "learning_rate": 4.469646704329066e-06, + "loss": 0.2963, + "step": 10027 + }, + { + "epoch": 0.4697615590012648, + "grad_norm": 0.598103924214478, + "learning_rate": 4.469529901466335e-06, + "loss": 0.2845, + "step": 10028 + }, + { + "epoch": 0.46980840399119317, + "grad_norm": 0.5831398996715301, + "learning_rate": 4.469413087269454e-06, + "loss": 0.2802, + "step": 10029 + }, + { + "epoch": 0.46985524898112146, + "grad_norm": 0.5832844845385788, + "learning_rate": 4.469296261739097e-06, + "loss": 0.2828, + "step": 10030 + }, + { + "epoch": 0.4699020939710498, + "grad_norm": 0.5680125215133927, + "learning_rate": 4.469179424875933e-06, + "loss": 0.2779, + "step": 10031 + }, + { + "epoch": 0.4699489389609781, + "grad_norm": 0.6204274948506032, + "learning_rate": 4.469062576680638e-06, + "loss": 0.3106, + "step": 10032 + }, + { + "epoch": 0.46999578395090646, + "grad_norm": 0.5921114674553006, + "learning_rate": 4.4689457171538816e-06, + "loss": 0.2626, + "step": 10033 + }, + { + "epoch": 0.47004262894083476, + "grad_norm": 0.6014819036358873, + "learning_rate": 4.468828846296339e-06, + "loss": 0.2842, + "step": 10034 + }, + { + "epoch": 0.4700894739307631, + "grad_norm": 0.6174389955355921, + "learning_rate": 4.468711964108679e-06, + "loss": 0.2925, + "step": 10035 + }, + { + "epoch": 0.4701363189206914, + "grad_norm": 0.6058809532527851, + "learning_rate": 4.468595070591579e-06, + "loss": 0.2979, + "step": 10036 + }, + { + "epoch": 0.47018316391061976, + "grad_norm": 0.620881484338299, + "learning_rate": 4.4684781657457074e-06, + "loss": 0.2794, + "step": 10037 + }, + { + "epoch": 0.4702300089005481, + "grad_norm": 0.6267283861077745, + "learning_rate": 4.468361249571739e-06, + "loss": 0.2871, + "step": 10038 + }, + { + "epoch": 0.4702768538904764, + "grad_norm": 0.6660435153963283, + "learning_rate": 4.468244322070347e-06, + "loss": 0.283, + "step": 10039 + }, + { + "epoch": 0.47032369888040476, + "grad_norm": 0.6015721205092446, + "learning_rate": 4.4681273832422035e-06, + "loss": 0.299, + "step": 10040 + }, + { + "epoch": 0.47037054387033306, + "grad_norm": 0.5846334987996283, + "learning_rate": 4.468010433087981e-06, + "loss": 0.2711, + "step": 10041 + }, + { + "epoch": 0.4704173888602614, + "grad_norm": 0.6272659670751697, + "learning_rate": 4.467893471608353e-06, + "loss": 0.2874, + "step": 10042 + }, + { + "epoch": 0.4704642338501897, + "grad_norm": 0.6336999271497756, + "learning_rate": 4.467776498803993e-06, + "loss": 0.2853, + "step": 10043 + }, + { + "epoch": 0.47051107884011806, + "grad_norm": 0.6601734970093801, + "learning_rate": 4.467659514675574e-06, + "loss": 0.3141, + "step": 10044 + }, + { + "epoch": 0.47055792383004635, + "grad_norm": 0.5968966526365903, + "learning_rate": 4.467542519223769e-06, + "loss": 0.3078, + "step": 10045 + }, + { + "epoch": 0.4706047688199747, + "grad_norm": 0.6155058248714216, + "learning_rate": 4.467425512449252e-06, + "loss": 0.2951, + "step": 10046 + }, + { + "epoch": 0.47065161380990306, + "grad_norm": 0.5989315540912072, + "learning_rate": 4.467308494352694e-06, + "loss": 0.2689, + "step": 10047 + }, + { + "epoch": 0.47069845879983135, + "grad_norm": 0.5760495045031275, + "learning_rate": 4.467191464934772e-06, + "loss": 0.2847, + "step": 10048 + }, + { + "epoch": 0.4707453037897597, + "grad_norm": 0.6727756993263567, + "learning_rate": 4.467074424196155e-06, + "loss": 0.3063, + "step": 10049 + }, + { + "epoch": 0.470792148779688, + "grad_norm": 0.5674061371190405, + "learning_rate": 4.4669573721375214e-06, + "loss": 0.2698, + "step": 10050 + }, + { + "epoch": 0.47083899376961635, + "grad_norm": 0.6513094072368525, + "learning_rate": 4.4668403087595415e-06, + "loss": 0.3028, + "step": 10051 + }, + { + "epoch": 0.47088583875954465, + "grad_norm": 0.5729849802341241, + "learning_rate": 4.466723234062891e-06, + "loss": 0.2655, + "step": 10052 + }, + { + "epoch": 0.470932683749473, + "grad_norm": 0.5842023472962642, + "learning_rate": 4.466606148048241e-06, + "loss": 0.2893, + "step": 10053 + }, + { + "epoch": 0.4709795287394013, + "grad_norm": 0.6395228689667967, + "learning_rate": 4.466489050716268e-06, + "loss": 0.2992, + "step": 10054 + }, + { + "epoch": 0.47102637372932965, + "grad_norm": 0.632713635273358, + "learning_rate": 4.466371942067644e-06, + "loss": 0.2814, + "step": 10055 + }, + { + "epoch": 0.471073218719258, + "grad_norm": 0.615578081266522, + "learning_rate": 4.466254822103045e-06, + "loss": 0.3112, + "step": 10056 + }, + { + "epoch": 0.4711200637091863, + "grad_norm": 0.5461623107886433, + "learning_rate": 4.466137690823142e-06, + "loss": 0.2599, + "step": 10057 + }, + { + "epoch": 0.47116690869911465, + "grad_norm": 0.6295308123184475, + "learning_rate": 4.466020548228612e-06, + "loss": 0.2826, + "step": 10058 + }, + { + "epoch": 0.47121375368904295, + "grad_norm": 0.6046624995799929, + "learning_rate": 4.465903394320128e-06, + "loss": 0.2806, + "step": 10059 + }, + { + "epoch": 0.4712605986789713, + "grad_norm": 0.5866529303281521, + "learning_rate": 4.465786229098363e-06, + "loss": 0.292, + "step": 10060 + }, + { + "epoch": 0.4713074436688996, + "grad_norm": 0.6266848073923104, + "learning_rate": 4.465669052563994e-06, + "loss": 0.2963, + "step": 10061 + }, + { + "epoch": 0.47135428865882795, + "grad_norm": 0.6418532347808489, + "learning_rate": 4.465551864717692e-06, + "loss": 0.3078, + "step": 10062 + }, + { + "epoch": 0.47140113364875624, + "grad_norm": 0.6066545381612557, + "learning_rate": 4.465434665560133e-06, + "loss": 0.2931, + "step": 10063 + }, + { + "epoch": 0.4714479786386846, + "grad_norm": 0.6573202203613208, + "learning_rate": 4.465317455091992e-06, + "loss": 0.2518, + "step": 10064 + }, + { + "epoch": 0.47149482362861295, + "grad_norm": 0.5941103558744459, + "learning_rate": 4.465200233313943e-06, + "loss": 0.2877, + "step": 10065 + }, + { + "epoch": 0.47154166861854124, + "grad_norm": 0.5666458725742679, + "learning_rate": 4.465083000226661e-06, + "loss": 0.2881, + "step": 10066 + }, + { + "epoch": 0.4715885136084696, + "grad_norm": 0.5962250927027322, + "learning_rate": 4.46496575583082e-06, + "loss": 0.2681, + "step": 10067 + }, + { + "epoch": 0.4716353585983979, + "grad_norm": 0.5761279557920082, + "learning_rate": 4.464848500127095e-06, + "loss": 0.2828, + "step": 10068 + }, + { + "epoch": 0.47168220358832624, + "grad_norm": 0.5645197786231875, + "learning_rate": 4.464731233116161e-06, + "loss": 0.2855, + "step": 10069 + }, + { + "epoch": 0.47172904857825454, + "grad_norm": 0.6579793823972099, + "learning_rate": 4.464613954798692e-06, + "loss": 0.2931, + "step": 10070 + }, + { + "epoch": 0.4717758935681829, + "grad_norm": 0.6059670333790216, + "learning_rate": 4.464496665175363e-06, + "loss": 0.2744, + "step": 10071 + }, + { + "epoch": 0.4718227385581112, + "grad_norm": 0.5715193862814307, + "learning_rate": 4.464379364246851e-06, + "loss": 0.2673, + "step": 10072 + }, + { + "epoch": 0.47186958354803954, + "grad_norm": 0.5741572783896196, + "learning_rate": 4.464262052013828e-06, + "loss": 0.2926, + "step": 10073 + }, + { + "epoch": 0.4719164285379679, + "grad_norm": 0.6649384302023867, + "learning_rate": 4.464144728476971e-06, + "loss": 0.3144, + "step": 10074 + }, + { + "epoch": 0.4719632735278962, + "grad_norm": 0.5938914490035172, + "learning_rate": 4.464027393636956e-06, + "loss": 0.2771, + "step": 10075 + }, + { + "epoch": 0.47201011851782454, + "grad_norm": 0.5972330185738819, + "learning_rate": 4.463910047494455e-06, + "loss": 0.2975, + "step": 10076 + }, + { + "epoch": 0.47205696350775284, + "grad_norm": 0.6012246470574742, + "learning_rate": 4.463792690050147e-06, + "loss": 0.28, + "step": 10077 + }, + { + "epoch": 0.4721038084976812, + "grad_norm": 0.569165722726075, + "learning_rate": 4.463675321304705e-06, + "loss": 0.2959, + "step": 10078 + }, + { + "epoch": 0.4721506534876095, + "grad_norm": 0.5752193907205789, + "learning_rate": 4.463557941258805e-06, + "loss": 0.2978, + "step": 10079 + }, + { + "epoch": 0.47219749847753784, + "grad_norm": 0.5449419776033853, + "learning_rate": 4.463440549913123e-06, + "loss": 0.2509, + "step": 10080 + }, + { + "epoch": 0.47224434346746613, + "grad_norm": 0.6082009845156267, + "learning_rate": 4.463323147268333e-06, + "loss": 0.2919, + "step": 10081 + }, + { + "epoch": 0.4722911884573945, + "grad_norm": 0.6261055313733933, + "learning_rate": 4.463205733325112e-06, + "loss": 0.2897, + "step": 10082 + }, + { + "epoch": 0.47233803344732284, + "grad_norm": 0.5882828453403555, + "learning_rate": 4.463088308084136e-06, + "loss": 0.2736, + "step": 10083 + }, + { + "epoch": 0.47238487843725113, + "grad_norm": 0.5267759454159758, + "learning_rate": 4.46297087154608e-06, + "loss": 0.2684, + "step": 10084 + }, + { + "epoch": 0.4724317234271795, + "grad_norm": 0.5856739572128303, + "learning_rate": 4.4628534237116205e-06, + "loss": 0.2828, + "step": 10085 + }, + { + "epoch": 0.4724785684171078, + "grad_norm": 0.6272026348786263, + "learning_rate": 4.462735964581432e-06, + "loss": 0.2858, + "step": 10086 + }, + { + "epoch": 0.47252541340703613, + "grad_norm": 0.547381625564227, + "learning_rate": 4.462618494156193e-06, + "loss": 0.2665, + "step": 10087 + }, + { + "epoch": 0.47257225839696443, + "grad_norm": 0.6559578560002726, + "learning_rate": 4.462501012436575e-06, + "loss": 0.3092, + "step": 10088 + }, + { + "epoch": 0.4726191033868928, + "grad_norm": 0.580873303821446, + "learning_rate": 4.462383519423259e-06, + "loss": 0.3003, + "step": 10089 + }, + { + "epoch": 0.4726659483768211, + "grad_norm": 0.6780324923974057, + "learning_rate": 4.462266015116918e-06, + "loss": 0.3241, + "step": 10090 + }, + { + "epoch": 0.47271279336674943, + "grad_norm": 0.6285654198149678, + "learning_rate": 4.462148499518231e-06, + "loss": 0.297, + "step": 10091 + }, + { + "epoch": 0.4727596383566778, + "grad_norm": 0.5720746278744915, + "learning_rate": 4.462030972627871e-06, + "loss": 0.2576, + "step": 10092 + }, + { + "epoch": 0.4728064833466061, + "grad_norm": 0.6832617849342189, + "learning_rate": 4.461913434446517e-06, + "loss": 0.2986, + "step": 10093 + }, + { + "epoch": 0.47285332833653443, + "grad_norm": 0.6229314199440901, + "learning_rate": 4.4617958849748435e-06, + "loss": 0.3113, + "step": 10094 + }, + { + "epoch": 0.4729001733264627, + "grad_norm": 0.6305310993076149, + "learning_rate": 4.461678324213527e-06, + "loss": 0.2951, + "step": 10095 + }, + { + "epoch": 0.4729470183163911, + "grad_norm": 0.5965713465264006, + "learning_rate": 4.461560752163247e-06, + "loss": 0.2866, + "step": 10096 + }, + { + "epoch": 0.4729938633063194, + "grad_norm": 0.6189890813682262, + "learning_rate": 4.4614431688246764e-06, + "loss": 0.279, + "step": 10097 + }, + { + "epoch": 0.4730407082962477, + "grad_norm": 0.5694331540589717, + "learning_rate": 4.461325574198493e-06, + "loss": 0.2747, + "step": 10098 + }, + { + "epoch": 0.473087553286176, + "grad_norm": 0.5957679197993401, + "learning_rate": 4.4612079682853746e-06, + "loss": 0.2812, + "step": 10099 + }, + { + "epoch": 0.4731343982761044, + "grad_norm": 0.6188963035560959, + "learning_rate": 4.461090351085997e-06, + "loss": 0.2892, + "step": 10100 + }, + { + "epoch": 0.4731812432660327, + "grad_norm": 0.6149102754634475, + "learning_rate": 4.460972722601038e-06, + "loss": 0.2965, + "step": 10101 + }, + { + "epoch": 0.473228088255961, + "grad_norm": 0.6123785125448672, + "learning_rate": 4.4608550828311725e-06, + "loss": 0.2962, + "step": 10102 + }, + { + "epoch": 0.4732749332458894, + "grad_norm": 0.5900890847399101, + "learning_rate": 4.46073743177708e-06, + "loss": 0.2916, + "step": 10103 + }, + { + "epoch": 0.47332177823581767, + "grad_norm": 0.703000477476826, + "learning_rate": 4.460619769439436e-06, + "loss": 0.3014, + "step": 10104 + }, + { + "epoch": 0.473368623225746, + "grad_norm": 0.6514433860976241, + "learning_rate": 4.460502095818919e-06, + "loss": 0.2916, + "step": 10105 + }, + { + "epoch": 0.4734154682156743, + "grad_norm": 0.6105127507842908, + "learning_rate": 4.460384410916203e-06, + "loss": 0.291, + "step": 10106 + }, + { + "epoch": 0.47346231320560267, + "grad_norm": 0.6226222532433554, + "learning_rate": 4.460266714731969e-06, + "loss": 0.2866, + "step": 10107 + }, + { + "epoch": 0.47350915819553097, + "grad_norm": 0.6453340588621718, + "learning_rate": 4.460149007266893e-06, + "loss": 0.3049, + "step": 10108 + }, + { + "epoch": 0.4735560031854593, + "grad_norm": 0.6785211595040312, + "learning_rate": 4.460031288521651e-06, + "loss": 0.3171, + "step": 10109 + }, + { + "epoch": 0.47360284817538767, + "grad_norm": 0.635532020629571, + "learning_rate": 4.459913558496923e-06, + "loss": 0.2924, + "step": 10110 + }, + { + "epoch": 0.47364969316531597, + "grad_norm": 0.6392664747841503, + "learning_rate": 4.459795817193383e-06, + "loss": 0.2995, + "step": 10111 + }, + { + "epoch": 0.4736965381552443, + "grad_norm": 0.6242900015851515, + "learning_rate": 4.459678064611712e-06, + "loss": 0.2822, + "step": 10112 + }, + { + "epoch": 0.4737433831451726, + "grad_norm": 0.6243715252718982, + "learning_rate": 4.459560300752586e-06, + "loss": 0.2819, + "step": 10113 + }, + { + "epoch": 0.47379022813510097, + "grad_norm": 0.640404343060409, + "learning_rate": 4.459442525616683e-06, + "loss": 0.3045, + "step": 10114 + }, + { + "epoch": 0.47383707312502926, + "grad_norm": 0.627007771339414, + "learning_rate": 4.459324739204681e-06, + "loss": 0.284, + "step": 10115 + }, + { + "epoch": 0.4738839181149576, + "grad_norm": 0.6063319338604503, + "learning_rate": 4.459206941517258e-06, + "loss": 0.2825, + "step": 10116 + }, + { + "epoch": 0.4739307631048859, + "grad_norm": 0.6506500677057911, + "learning_rate": 4.4590891325550905e-06, + "loss": 0.2985, + "step": 10117 + }, + { + "epoch": 0.47397760809481426, + "grad_norm": 0.6115637874389095, + "learning_rate": 4.458971312318858e-06, + "loss": 0.3022, + "step": 10118 + }, + { + "epoch": 0.4740244530847426, + "grad_norm": 0.562081387333904, + "learning_rate": 4.458853480809238e-06, + "loss": 0.264, + "step": 10119 + }, + { + "epoch": 0.4740712980746709, + "grad_norm": 0.6214751872332828, + "learning_rate": 4.458735638026908e-06, + "loss": 0.2895, + "step": 10120 + }, + { + "epoch": 0.47411814306459926, + "grad_norm": 0.605700061587971, + "learning_rate": 4.458617783972548e-06, + "loss": 0.2751, + "step": 10121 + }, + { + "epoch": 0.47416498805452756, + "grad_norm": 0.6433753182601296, + "learning_rate": 4.458499918646834e-06, + "loss": 0.3205, + "step": 10122 + }, + { + "epoch": 0.4742118330444559, + "grad_norm": 0.6420737773784949, + "learning_rate": 4.4583820420504444e-06, + "loss": 0.3026, + "step": 10123 + }, + { + "epoch": 0.4742586780343842, + "grad_norm": 0.6061960132976972, + "learning_rate": 4.458264154184059e-06, + "loss": 0.3058, + "step": 10124 + }, + { + "epoch": 0.47430552302431256, + "grad_norm": 0.5596529503766344, + "learning_rate": 4.458146255048357e-06, + "loss": 0.2904, + "step": 10125 + }, + { + "epoch": 0.47435236801424085, + "grad_norm": 0.5895446912915351, + "learning_rate": 4.458028344644013e-06, + "loss": 0.2914, + "step": 10126 + }, + { + "epoch": 0.4743992130041692, + "grad_norm": 0.6152159907381956, + "learning_rate": 4.4579104229717094e-06, + "loss": 0.2895, + "step": 10127 + }, + { + "epoch": 0.47444605799409756, + "grad_norm": 0.666079071710392, + "learning_rate": 4.457792490032123e-06, + "loss": 0.2841, + "step": 10128 + }, + { + "epoch": 0.47449290298402585, + "grad_norm": 0.6452027224088824, + "learning_rate": 4.457674545825934e-06, + "loss": 0.2981, + "step": 10129 + }, + { + "epoch": 0.4745397479739542, + "grad_norm": 0.5795360695760182, + "learning_rate": 4.457556590353819e-06, + "loss": 0.2942, + "step": 10130 + }, + { + "epoch": 0.4745865929638825, + "grad_norm": 0.548850786728285, + "learning_rate": 4.457438623616458e-06, + "loss": 0.271, + "step": 10131 + }, + { + "epoch": 0.47463343795381085, + "grad_norm": 0.58150921397782, + "learning_rate": 4.45732064561453e-06, + "loss": 0.2938, + "step": 10132 + }, + { + "epoch": 0.47468028294373915, + "grad_norm": 0.6329450497336861, + "learning_rate": 4.457202656348714e-06, + "loss": 0.2982, + "step": 10133 + }, + { + "epoch": 0.4747271279336675, + "grad_norm": 0.6027882105681782, + "learning_rate": 4.457084655819687e-06, + "loss": 0.3051, + "step": 10134 + }, + { + "epoch": 0.4747739729235958, + "grad_norm": 0.576000987316435, + "learning_rate": 4.456966644028131e-06, + "loss": 0.2945, + "step": 10135 + }, + { + "epoch": 0.47482081791352415, + "grad_norm": 0.674037450607678, + "learning_rate": 4.456848620974724e-06, + "loss": 0.3066, + "step": 10136 + }, + { + "epoch": 0.4748676629034525, + "grad_norm": 0.6017140186672761, + "learning_rate": 4.456730586660144e-06, + "loss": 0.2734, + "step": 10137 + }, + { + "epoch": 0.4749145078933808, + "grad_norm": 0.611617480205596, + "learning_rate": 4.456612541085072e-06, + "loss": 0.3046, + "step": 10138 + }, + { + "epoch": 0.47496135288330915, + "grad_norm": 0.6111242040693126, + "learning_rate": 4.456494484250187e-06, + "loss": 0.2971, + "step": 10139 + }, + { + "epoch": 0.47500819787323745, + "grad_norm": 0.6007590046062454, + "learning_rate": 4.456376416156168e-06, + "loss": 0.2898, + "step": 10140 + }, + { + "epoch": 0.4750550428631658, + "grad_norm": 0.6102392659434246, + "learning_rate": 4.456258336803693e-06, + "loss": 0.3065, + "step": 10141 + }, + { + "epoch": 0.4751018878530941, + "grad_norm": 0.604462654417268, + "learning_rate": 4.456140246193444e-06, + "loss": 0.2894, + "step": 10142 + }, + { + "epoch": 0.47514873284302245, + "grad_norm": 0.5779758505744677, + "learning_rate": 4.456022144326099e-06, + "loss": 0.2771, + "step": 10143 + }, + { + "epoch": 0.47519557783295074, + "grad_norm": 0.645750336094592, + "learning_rate": 4.455904031202339e-06, + "loss": 0.2946, + "step": 10144 + }, + { + "epoch": 0.4752424228228791, + "grad_norm": 0.612053544963038, + "learning_rate": 4.455785906822843e-06, + "loss": 0.2839, + "step": 10145 + }, + { + "epoch": 0.47528926781280745, + "grad_norm": 0.6592753911589371, + "learning_rate": 4.455667771188289e-06, + "loss": 0.3222, + "step": 10146 + }, + { + "epoch": 0.47533611280273574, + "grad_norm": 0.5589462428422582, + "learning_rate": 4.4555496242993605e-06, + "loss": 0.2995, + "step": 10147 + }, + { + "epoch": 0.4753829577926641, + "grad_norm": 0.5896191158102227, + "learning_rate": 4.455431466156734e-06, + "loss": 0.2959, + "step": 10148 + }, + { + "epoch": 0.4754298027825924, + "grad_norm": 0.623114104344042, + "learning_rate": 4.455313296761092e-06, + "loss": 0.2731, + "step": 10149 + }, + { + "epoch": 0.47547664777252074, + "grad_norm": 0.6052037683803521, + "learning_rate": 4.4551951161131126e-06, + "loss": 0.2908, + "step": 10150 + }, + { + "epoch": 0.47552349276244904, + "grad_norm": 0.7089389890851443, + "learning_rate": 4.455076924213477e-06, + "loss": 0.3185, + "step": 10151 + }, + { + "epoch": 0.4755703377523774, + "grad_norm": 0.6761983588088383, + "learning_rate": 4.454958721062865e-06, + "loss": 0.295, + "step": 10152 + }, + { + "epoch": 0.4756171827423057, + "grad_norm": 0.5756989667138034, + "learning_rate": 4.454840506661957e-06, + "loss": 0.2645, + "step": 10153 + }, + { + "epoch": 0.47566402773223404, + "grad_norm": 0.6368424007068979, + "learning_rate": 4.454722281011434e-06, + "loss": 0.3246, + "step": 10154 + }, + { + "epoch": 0.4757108727221624, + "grad_norm": 0.6852248559085438, + "learning_rate": 4.454604044111974e-06, + "loss": 0.293, + "step": 10155 + }, + { + "epoch": 0.4757577177120907, + "grad_norm": 0.5809090452530071, + "learning_rate": 4.45448579596426e-06, + "loss": 0.2847, + "step": 10156 + }, + { + "epoch": 0.47580456270201904, + "grad_norm": 0.6265284794733995, + "learning_rate": 4.454367536568972e-06, + "loss": 0.3173, + "step": 10157 + }, + { + "epoch": 0.47585140769194734, + "grad_norm": 0.5971039558588696, + "learning_rate": 4.454249265926789e-06, + "loss": 0.2699, + "step": 10158 + }, + { + "epoch": 0.4758982526818757, + "grad_norm": 0.5541085144603646, + "learning_rate": 4.454130984038393e-06, + "loss": 0.2863, + "step": 10159 + }, + { + "epoch": 0.475945097671804, + "grad_norm": 0.5945165931158037, + "learning_rate": 4.4540126909044644e-06, + "loss": 0.2624, + "step": 10160 + }, + { + "epoch": 0.47599194266173234, + "grad_norm": 0.6081412866727784, + "learning_rate": 4.453894386525684e-06, + "loss": 0.2731, + "step": 10161 + }, + { + "epoch": 0.47603878765166063, + "grad_norm": 0.5908475104970051, + "learning_rate": 4.453776070902732e-06, + "loss": 0.2857, + "step": 10162 + }, + { + "epoch": 0.476085632641589, + "grad_norm": 0.6107277448326827, + "learning_rate": 4.4536577440362905e-06, + "loss": 0.2898, + "step": 10163 + }, + { + "epoch": 0.47613247763151734, + "grad_norm": 0.6604862559049434, + "learning_rate": 4.453539405927039e-06, + "loss": 0.2952, + "step": 10164 + }, + { + "epoch": 0.47617932262144563, + "grad_norm": 0.5898551121204664, + "learning_rate": 4.453421056575659e-06, + "loss": 0.3032, + "step": 10165 + }, + { + "epoch": 0.476226167611374, + "grad_norm": 0.5638720612104449, + "learning_rate": 4.453302695982833e-06, + "loss": 0.2991, + "step": 10166 + }, + { + "epoch": 0.4762730126013023, + "grad_norm": 0.58483341602883, + "learning_rate": 4.45318432414924e-06, + "loss": 0.2956, + "step": 10167 + }, + { + "epoch": 0.47631985759123063, + "grad_norm": 0.6140504536764948, + "learning_rate": 4.453065941075563e-06, + "loss": 0.2963, + "step": 10168 + }, + { + "epoch": 0.47636670258115893, + "grad_norm": 0.568819233643343, + "learning_rate": 4.452947546762482e-06, + "loss": 0.2755, + "step": 10169 + }, + { + "epoch": 0.4764135475710873, + "grad_norm": 0.6160131769342697, + "learning_rate": 4.452829141210678e-06, + "loss": 0.2838, + "step": 10170 + }, + { + "epoch": 0.4764603925610156, + "grad_norm": 0.5832869083499105, + "learning_rate": 4.452710724420834e-06, + "loss": 0.2914, + "step": 10171 + }, + { + "epoch": 0.47650723755094393, + "grad_norm": 0.5668902743133455, + "learning_rate": 4.452592296393629e-06, + "loss": 0.2907, + "step": 10172 + }, + { + "epoch": 0.4765540825408723, + "grad_norm": 0.5977584632528851, + "learning_rate": 4.452473857129749e-06, + "loss": 0.2975, + "step": 10173 + }, + { + "epoch": 0.4766009275308006, + "grad_norm": 0.5937316977604746, + "learning_rate": 4.45235540662987e-06, + "loss": 0.265, + "step": 10174 + }, + { + "epoch": 0.47664777252072893, + "grad_norm": 0.5874419407257919, + "learning_rate": 4.452236944894678e-06, + "loss": 0.2811, + "step": 10175 + }, + { + "epoch": 0.4766946175106572, + "grad_norm": 0.6175161424535958, + "learning_rate": 4.452118471924852e-06, + "loss": 0.2921, + "step": 10176 + }, + { + "epoch": 0.4767414625005856, + "grad_norm": 0.5691734907990732, + "learning_rate": 4.451999987721076e-06, + "loss": 0.2636, + "step": 10177 + }, + { + "epoch": 0.4767883074905139, + "grad_norm": 0.6059719663330049, + "learning_rate": 4.4518814922840295e-06, + "loss": 0.2867, + "step": 10178 + }, + { + "epoch": 0.4768351524804422, + "grad_norm": 0.5977050410388356, + "learning_rate": 4.451762985614396e-06, + "loss": 0.3051, + "step": 10179 + }, + { + "epoch": 0.4768819974703705, + "grad_norm": 0.6181537742054168, + "learning_rate": 4.451644467712857e-06, + "loss": 0.2805, + "step": 10180 + }, + { + "epoch": 0.4769288424602989, + "grad_norm": 0.6107593293232838, + "learning_rate": 4.451525938580094e-06, + "loss": 0.2725, + "step": 10181 + }, + { + "epoch": 0.4769756874502272, + "grad_norm": 0.5997974206219624, + "learning_rate": 4.4514073982167915e-06, + "loss": 0.2767, + "step": 10182 + }, + { + "epoch": 0.4770225324401555, + "grad_norm": 0.6496566731913539, + "learning_rate": 4.4512888466236285e-06, + "loss": 0.3269, + "step": 10183 + }, + { + "epoch": 0.4770693774300839, + "grad_norm": 0.5908162802931156, + "learning_rate": 4.4511702838012895e-06, + "loss": 0.2849, + "step": 10184 + }, + { + "epoch": 0.47711622242001217, + "grad_norm": 0.5967934732168362, + "learning_rate": 4.451051709750456e-06, + "loss": 0.3168, + "step": 10185 + }, + { + "epoch": 0.4771630674099405, + "grad_norm": 0.5858033077969385, + "learning_rate": 4.450933124471809e-06, + "loss": 0.2691, + "step": 10186 + }, + { + "epoch": 0.4772099123998688, + "grad_norm": 0.628166269093498, + "learning_rate": 4.450814527966032e-06, + "loss": 0.2947, + "step": 10187 + }, + { + "epoch": 0.47725675738979717, + "grad_norm": 0.6060546461468882, + "learning_rate": 4.45069592023381e-06, + "loss": 0.2908, + "step": 10188 + }, + { + "epoch": 0.47730360237972547, + "grad_norm": 0.6330949338831582, + "learning_rate": 4.450577301275821e-06, + "loss": 0.3144, + "step": 10189 + }, + { + "epoch": 0.4773504473696538, + "grad_norm": 0.5996359805739752, + "learning_rate": 4.4504586710927515e-06, + "loss": 0.3134, + "step": 10190 + }, + { + "epoch": 0.4773972923595821, + "grad_norm": 0.6368506478074599, + "learning_rate": 4.450340029685282e-06, + "loss": 0.2909, + "step": 10191 + }, + { + "epoch": 0.47744413734951047, + "grad_norm": 0.6145970735343056, + "learning_rate": 4.450221377054095e-06, + "loss": 0.2878, + "step": 10192 + }, + { + "epoch": 0.4774909823394388, + "grad_norm": 0.5980294607645056, + "learning_rate": 4.450102713199874e-06, + "loss": 0.2919, + "step": 10193 + }, + { + "epoch": 0.4775378273293671, + "grad_norm": 0.5742059031867851, + "learning_rate": 4.449984038123304e-06, + "loss": 0.2815, + "step": 10194 + }, + { + "epoch": 0.47758467231929547, + "grad_norm": 0.6051680505095309, + "learning_rate": 4.449865351825065e-06, + "loss": 0.3061, + "step": 10195 + }, + { + "epoch": 0.47763151730922376, + "grad_norm": 0.6284179768846159, + "learning_rate": 4.44974665430584e-06, + "loss": 0.2947, + "step": 10196 + }, + { + "epoch": 0.4776783622991521, + "grad_norm": 0.6099068425790448, + "learning_rate": 4.449627945566314e-06, + "loss": 0.2976, + "step": 10197 + }, + { + "epoch": 0.4777252072890804, + "grad_norm": 0.5563686206529984, + "learning_rate": 4.4495092256071696e-06, + "loss": 0.2825, + "step": 10198 + }, + { + "epoch": 0.47777205227900876, + "grad_norm": 0.6239760500213724, + "learning_rate": 4.449390494429089e-06, + "loss": 0.2774, + "step": 10199 + }, + { + "epoch": 0.47781889726893706, + "grad_norm": 0.6373479591148744, + "learning_rate": 4.4492717520327555e-06, + "loss": 0.3056, + "step": 10200 + }, + { + "epoch": 0.4778657422588654, + "grad_norm": 0.577063665323822, + "learning_rate": 4.449152998418854e-06, + "loss": 0.2931, + "step": 10201 + }, + { + "epoch": 0.47791258724879376, + "grad_norm": 0.6938423463425779, + "learning_rate": 4.449034233588066e-06, + "loss": 0.284, + "step": 10202 + }, + { + "epoch": 0.47795943223872206, + "grad_norm": 0.5898931692067264, + "learning_rate": 4.4489154575410765e-06, + "loss": 0.2831, + "step": 10203 + }, + { + "epoch": 0.4780062772286504, + "grad_norm": 0.5871835769260453, + "learning_rate": 4.4487966702785685e-06, + "loss": 0.2931, + "step": 10204 + }, + { + "epoch": 0.4780531222185787, + "grad_norm": 0.6057644276953237, + "learning_rate": 4.448677871801225e-06, + "loss": 0.3012, + "step": 10205 + }, + { + "epoch": 0.47809996720850706, + "grad_norm": 0.6134284218797618, + "learning_rate": 4.448559062109731e-06, + "loss": 0.2988, + "step": 10206 + }, + { + "epoch": 0.47814681219843536, + "grad_norm": 0.6568738722144137, + "learning_rate": 4.448440241204768e-06, + "loss": 0.3273, + "step": 10207 + }, + { + "epoch": 0.4781936571883637, + "grad_norm": 0.6004389989673021, + "learning_rate": 4.448321409087022e-06, + "loss": 0.2706, + "step": 10208 + }, + { + "epoch": 0.478240502178292, + "grad_norm": 0.6565412104371956, + "learning_rate": 4.448202565757176e-06, + "loss": 0.2919, + "step": 10209 + }, + { + "epoch": 0.47828734716822036, + "grad_norm": 0.6108634433007244, + "learning_rate": 4.4480837112159135e-06, + "loss": 0.2765, + "step": 10210 + }, + { + "epoch": 0.4783341921581487, + "grad_norm": 0.6041471415373115, + "learning_rate": 4.447964845463919e-06, + "loss": 0.2876, + "step": 10211 + }, + { + "epoch": 0.478381037148077, + "grad_norm": 0.5440959881266396, + "learning_rate": 4.447845968501876e-06, + "loss": 0.2657, + "step": 10212 + }, + { + "epoch": 0.47842788213800536, + "grad_norm": 0.5994826469100312, + "learning_rate": 4.44772708033047e-06, + "loss": 0.2869, + "step": 10213 + }, + { + "epoch": 0.47847472712793365, + "grad_norm": 0.6021992882128473, + "learning_rate": 4.447608180950384e-06, + "loss": 0.2929, + "step": 10214 + }, + { + "epoch": 0.478521572117862, + "grad_norm": 0.6314164477365841, + "learning_rate": 4.447489270362302e-06, + "loss": 0.2971, + "step": 10215 + }, + { + "epoch": 0.4785684171077903, + "grad_norm": 0.5957588176357447, + "learning_rate": 4.4473703485669085e-06, + "loss": 0.2714, + "step": 10216 + }, + { + "epoch": 0.47861526209771865, + "grad_norm": 0.5858398221999493, + "learning_rate": 4.447251415564889e-06, + "loss": 0.2741, + "step": 10217 + }, + { + "epoch": 0.47866210708764695, + "grad_norm": 0.5631364230136019, + "learning_rate": 4.447132471356926e-06, + "loss": 0.2633, + "step": 10218 + }, + { + "epoch": 0.4787089520775753, + "grad_norm": 0.5661443500299053, + "learning_rate": 4.447013515943706e-06, + "loss": 0.2688, + "step": 10219 + }, + { + "epoch": 0.47875579706750365, + "grad_norm": 0.5188978948208441, + "learning_rate": 4.446894549325912e-06, + "loss": 0.2843, + "step": 10220 + }, + { + "epoch": 0.47880264205743195, + "grad_norm": 0.5281145418666033, + "learning_rate": 4.446775571504229e-06, + "loss": 0.2565, + "step": 10221 + }, + { + "epoch": 0.4788494870473603, + "grad_norm": 0.6656581500875038, + "learning_rate": 4.446656582479343e-06, + "loss": 0.2878, + "step": 10222 + }, + { + "epoch": 0.4788963320372886, + "grad_norm": 0.6143490092243592, + "learning_rate": 4.446537582251936e-06, + "loss": 0.3026, + "step": 10223 + }, + { + "epoch": 0.47894317702721695, + "grad_norm": 0.6285605988909991, + "learning_rate": 4.446418570822696e-06, + "loss": 0.3154, + "step": 10224 + }, + { + "epoch": 0.47899002201714524, + "grad_norm": 0.5862005986019284, + "learning_rate": 4.446299548192306e-06, + "loss": 0.2857, + "step": 10225 + }, + { + "epoch": 0.4790368670070736, + "grad_norm": 0.6153955717547043, + "learning_rate": 4.44618051436145e-06, + "loss": 0.2881, + "step": 10226 + }, + { + "epoch": 0.4790837119970019, + "grad_norm": 0.5750066080071546, + "learning_rate": 4.446061469330816e-06, + "loss": 0.2783, + "step": 10227 + }, + { + "epoch": 0.47913055698693024, + "grad_norm": 0.546495627360414, + "learning_rate": 4.445942413101086e-06, + "loss": 0.2711, + "step": 10228 + }, + { + "epoch": 0.4791774019768586, + "grad_norm": 0.6375247381357154, + "learning_rate": 4.4458233456729475e-06, + "loss": 0.3184, + "step": 10229 + }, + { + "epoch": 0.4792242469667869, + "grad_norm": 0.5638126352497609, + "learning_rate": 4.445704267047084e-06, + "loss": 0.2814, + "step": 10230 + }, + { + "epoch": 0.47927109195671524, + "grad_norm": 0.5577383457237703, + "learning_rate": 4.445585177224182e-06, + "loss": 0.2778, + "step": 10231 + }, + { + "epoch": 0.47931793694664354, + "grad_norm": 0.5993246599923617, + "learning_rate": 4.445466076204926e-06, + "loss": 0.2974, + "step": 10232 + }, + { + "epoch": 0.4793647819365719, + "grad_norm": 0.6412663359353473, + "learning_rate": 4.445346963990002e-06, + "loss": 0.3133, + "step": 10233 + }, + { + "epoch": 0.4794116269265002, + "grad_norm": 0.5689233693467379, + "learning_rate": 4.445227840580094e-06, + "loss": 0.2852, + "step": 10234 + }, + { + "epoch": 0.47945847191642854, + "grad_norm": 0.5997754188948878, + "learning_rate": 4.44510870597589e-06, + "loss": 0.3049, + "step": 10235 + }, + { + "epoch": 0.47950531690635684, + "grad_norm": 0.5960826400647661, + "learning_rate": 4.444989560178074e-06, + "loss": 0.265, + "step": 10236 + }, + { + "epoch": 0.4795521618962852, + "grad_norm": 0.5799943358855366, + "learning_rate": 4.444870403187332e-06, + "loss": 0.2858, + "step": 10237 + }, + { + "epoch": 0.47959900688621354, + "grad_norm": 0.5926978410659465, + "learning_rate": 4.44475123500435e-06, + "loss": 0.2959, + "step": 10238 + }, + { + "epoch": 0.47964585187614184, + "grad_norm": 0.6289479646209962, + "learning_rate": 4.444632055629813e-06, + "loss": 0.3101, + "step": 10239 + }, + { + "epoch": 0.4796926968660702, + "grad_norm": 0.6188704088063598, + "learning_rate": 4.444512865064407e-06, + "loss": 0.3001, + "step": 10240 + }, + { + "epoch": 0.4797395418559985, + "grad_norm": 0.6372948787256663, + "learning_rate": 4.444393663308818e-06, + "loss": 0.3028, + "step": 10241 + }, + { + "epoch": 0.47978638684592684, + "grad_norm": 0.5901459834689636, + "learning_rate": 4.444274450363732e-06, + "loss": 0.2739, + "step": 10242 + }, + { + "epoch": 0.47983323183585513, + "grad_norm": 0.6422291165511158, + "learning_rate": 4.444155226229837e-06, + "loss": 0.2796, + "step": 10243 + }, + { + "epoch": 0.4798800768257835, + "grad_norm": 0.6321942099094676, + "learning_rate": 4.444035990907816e-06, + "loss": 0.3149, + "step": 10244 + }, + { + "epoch": 0.4799269218157118, + "grad_norm": 0.6021204297099311, + "learning_rate": 4.443916744398357e-06, + "loss": 0.2904, + "step": 10245 + }, + { + "epoch": 0.47997376680564013, + "grad_norm": 0.6225916027625685, + "learning_rate": 4.443797486702145e-06, + "loss": 0.2896, + "step": 10246 + }, + { + "epoch": 0.4800206117955685, + "grad_norm": 0.5711683922819453, + "learning_rate": 4.443678217819867e-06, + "loss": 0.2926, + "step": 10247 + }, + { + "epoch": 0.4800674567854968, + "grad_norm": 0.6311923382917409, + "learning_rate": 4.44355893775221e-06, + "loss": 0.2915, + "step": 10248 + }, + { + "epoch": 0.48011430177542513, + "grad_norm": 0.5931353553188531, + "learning_rate": 4.44343964649986e-06, + "loss": 0.287, + "step": 10249 + }, + { + "epoch": 0.48016114676535343, + "grad_norm": 0.56224665566381, + "learning_rate": 4.4433203440635025e-06, + "loss": 0.2638, + "step": 10250 + }, + { + "epoch": 0.4802079917552818, + "grad_norm": 0.5835286899158911, + "learning_rate": 4.443201030443826e-06, + "loss": 0.2796, + "step": 10251 + }, + { + "epoch": 0.4802548367452101, + "grad_norm": 0.5762693107116247, + "learning_rate": 4.443081705641515e-06, + "loss": 0.2933, + "step": 10252 + }, + { + "epoch": 0.48030168173513843, + "grad_norm": 0.6370501788034229, + "learning_rate": 4.442962369657257e-06, + "loss": 0.2902, + "step": 10253 + }, + { + "epoch": 0.4803485267250667, + "grad_norm": 0.6551945910958891, + "learning_rate": 4.442843022491739e-06, + "loss": 0.3079, + "step": 10254 + }, + { + "epoch": 0.4803953717149951, + "grad_norm": 0.5677966552893909, + "learning_rate": 4.442723664145648e-06, + "loss": 0.2828, + "step": 10255 + }, + { + "epoch": 0.48044221670492343, + "grad_norm": 0.5732478805427698, + "learning_rate": 4.44260429461967e-06, + "loss": 0.2757, + "step": 10256 + }, + { + "epoch": 0.4804890616948517, + "grad_norm": 0.6168318756251305, + "learning_rate": 4.442484913914493e-06, + "loss": 0.2668, + "step": 10257 + }, + { + "epoch": 0.4805359066847801, + "grad_norm": 0.5955287435516651, + "learning_rate": 4.442365522030804e-06, + "loss": 0.2979, + "step": 10258 + }, + { + "epoch": 0.4805827516747084, + "grad_norm": 0.5591869402973076, + "learning_rate": 4.44224611896929e-06, + "loss": 0.2801, + "step": 10259 + }, + { + "epoch": 0.4806295966646367, + "grad_norm": 0.5929891842042478, + "learning_rate": 4.442126704730637e-06, + "loss": 0.2894, + "step": 10260 + }, + { + "epoch": 0.480676441654565, + "grad_norm": 0.6219723765060039, + "learning_rate": 4.442007279315532e-06, + "loss": 0.2872, + "step": 10261 + }, + { + "epoch": 0.4807232866444934, + "grad_norm": 0.6649403219530651, + "learning_rate": 4.441887842724665e-06, + "loss": 0.3028, + "step": 10262 + }, + { + "epoch": 0.48077013163442167, + "grad_norm": 0.5828086394424266, + "learning_rate": 4.44176839495872e-06, + "loss": 0.291, + "step": 10263 + }, + { + "epoch": 0.48081697662435, + "grad_norm": 0.6176241725863678, + "learning_rate": 4.441648936018387e-06, + "loss": 0.303, + "step": 10264 + }, + { + "epoch": 0.4808638216142784, + "grad_norm": 0.578514946263962, + "learning_rate": 4.441529465904352e-06, + "loss": 0.2853, + "step": 10265 + }, + { + "epoch": 0.48091066660420667, + "grad_norm": 0.5331193043168444, + "learning_rate": 4.441409984617303e-06, + "loss": 0.2631, + "step": 10266 + }, + { + "epoch": 0.480957511594135, + "grad_norm": 0.5803878985243388, + "learning_rate": 4.441290492157928e-06, + "loss": 0.2849, + "step": 10267 + }, + { + "epoch": 0.4810043565840633, + "grad_norm": 0.605759292717855, + "learning_rate": 4.441170988526913e-06, + "loss": 0.2957, + "step": 10268 + }, + { + "epoch": 0.48105120157399167, + "grad_norm": 0.630727059046045, + "learning_rate": 4.441051473724948e-06, + "loss": 0.2951, + "step": 10269 + }, + { + "epoch": 0.48109804656391997, + "grad_norm": 0.6197577645254866, + "learning_rate": 4.440931947752719e-06, + "loss": 0.304, + "step": 10270 + }, + { + "epoch": 0.4811448915538483, + "grad_norm": 0.5714643304655255, + "learning_rate": 4.440812410610914e-06, + "loss": 0.2837, + "step": 10271 + }, + { + "epoch": 0.4811917365437766, + "grad_norm": 0.6180088025987516, + "learning_rate": 4.440692862300222e-06, + "loss": 0.2862, + "step": 10272 + }, + { + "epoch": 0.48123858153370497, + "grad_norm": 0.6076434480061395, + "learning_rate": 4.440573302821331e-06, + "loss": 0.2776, + "step": 10273 + }, + { + "epoch": 0.4812854265236333, + "grad_norm": 0.5812322903079357, + "learning_rate": 4.440453732174927e-06, + "loss": 0.2749, + "step": 10274 + }, + { + "epoch": 0.4813322715135616, + "grad_norm": 0.594944295046824, + "learning_rate": 4.4403341503617e-06, + "loss": 0.2765, + "step": 10275 + }, + { + "epoch": 0.48137911650348997, + "grad_norm": 0.6011526985146388, + "learning_rate": 4.4402145573823384e-06, + "loss": 0.296, + "step": 10276 + }, + { + "epoch": 0.48142596149341826, + "grad_norm": 0.5933361770542074, + "learning_rate": 4.440094953237529e-06, + "loss": 0.2876, + "step": 10277 + }, + { + "epoch": 0.4814728064833466, + "grad_norm": 0.5497427929793038, + "learning_rate": 4.439975337927961e-06, + "loss": 0.2663, + "step": 10278 + }, + { + "epoch": 0.4815196514732749, + "grad_norm": 0.5647243412164494, + "learning_rate": 4.439855711454323e-06, + "loss": 0.2814, + "step": 10279 + }, + { + "epoch": 0.48156649646320326, + "grad_norm": 0.5489953760030856, + "learning_rate": 4.439736073817301e-06, + "loss": 0.2498, + "step": 10280 + }, + { + "epoch": 0.48161334145313156, + "grad_norm": 0.600583359725135, + "learning_rate": 4.439616425017587e-06, + "loss": 0.292, + "step": 10281 + }, + { + "epoch": 0.4816601864430599, + "grad_norm": 0.6254552937683334, + "learning_rate": 4.439496765055868e-06, + "loss": 0.3038, + "step": 10282 + }, + { + "epoch": 0.48170703143298826, + "grad_norm": 0.577360817993271, + "learning_rate": 4.439377093932832e-06, + "loss": 0.269, + "step": 10283 + }, + { + "epoch": 0.48175387642291656, + "grad_norm": 0.59847550194786, + "learning_rate": 4.439257411649169e-06, + "loss": 0.293, + "step": 10284 + }, + { + "epoch": 0.4818007214128449, + "grad_norm": 0.6068603304326747, + "learning_rate": 4.439137718205567e-06, + "loss": 0.3045, + "step": 10285 + }, + { + "epoch": 0.4818475664027732, + "grad_norm": 0.556111459064891, + "learning_rate": 4.439018013602714e-06, + "loss": 0.2753, + "step": 10286 + }, + { + "epoch": 0.48189441139270156, + "grad_norm": 0.5969401112115278, + "learning_rate": 4.4388982978413e-06, + "loss": 0.2943, + "step": 10287 + }, + { + "epoch": 0.48194125638262986, + "grad_norm": 0.5992268029904018, + "learning_rate": 4.4387785709220146e-06, + "loss": 0.2729, + "step": 10288 + }, + { + "epoch": 0.4819881013725582, + "grad_norm": 0.6047292170487601, + "learning_rate": 4.438658832845544e-06, + "loss": 0.3026, + "step": 10289 + }, + { + "epoch": 0.4820349463624865, + "grad_norm": 0.57779653052187, + "learning_rate": 4.4385390836125805e-06, + "loss": 0.3025, + "step": 10290 + }, + { + "epoch": 0.48208179135241486, + "grad_norm": 0.5771687420311788, + "learning_rate": 4.438419323223811e-06, + "loss": 0.2809, + "step": 10291 + }, + { + "epoch": 0.4821286363423432, + "grad_norm": 0.6036176768805308, + "learning_rate": 4.438299551679926e-06, + "loss": 0.3022, + "step": 10292 + }, + { + "epoch": 0.4821754813322715, + "grad_norm": 0.5289376566815455, + "learning_rate": 4.438179768981614e-06, + "loss": 0.2731, + "step": 10293 + }, + { + "epoch": 0.48222232632219986, + "grad_norm": 0.5901920850581492, + "learning_rate": 4.438059975129565e-06, + "loss": 0.3047, + "step": 10294 + }, + { + "epoch": 0.48226917131212815, + "grad_norm": 0.5673717297380784, + "learning_rate": 4.437940170124467e-06, + "loss": 0.2755, + "step": 10295 + }, + { + "epoch": 0.4823160163020565, + "grad_norm": 0.576063575912949, + "learning_rate": 4.4378203539670115e-06, + "loss": 0.3054, + "step": 10296 + }, + { + "epoch": 0.4823628612919848, + "grad_norm": 0.6047754764701726, + "learning_rate": 4.4377005266578866e-06, + "loss": 0.2973, + "step": 10297 + }, + { + "epoch": 0.48240970628191315, + "grad_norm": 0.5496826089399183, + "learning_rate": 4.437580688197782e-06, + "loss": 0.2824, + "step": 10298 + }, + { + "epoch": 0.48245655127184145, + "grad_norm": 0.6060846693143631, + "learning_rate": 4.437460838587388e-06, + "loss": 0.2861, + "step": 10299 + }, + { + "epoch": 0.4825033962617698, + "grad_norm": 0.5947761969248732, + "learning_rate": 4.4373409778273925e-06, + "loss": 0.29, + "step": 10300 + }, + { + "epoch": 0.48255024125169815, + "grad_norm": 0.6052242160963133, + "learning_rate": 4.437221105918488e-06, + "loss": 0.2881, + "step": 10301 + }, + { + "epoch": 0.48259708624162645, + "grad_norm": 0.6148209636326217, + "learning_rate": 4.437101222861363e-06, + "loss": 0.2906, + "step": 10302 + }, + { + "epoch": 0.4826439312315548, + "grad_norm": 0.6143747218709243, + "learning_rate": 4.436981328656706e-06, + "loss": 0.3014, + "step": 10303 + }, + { + "epoch": 0.4826907762214831, + "grad_norm": 0.5871403409360166, + "learning_rate": 4.436861423305209e-06, + "loss": 0.298, + "step": 10304 + }, + { + "epoch": 0.48273762121141145, + "grad_norm": 0.5752288764917449, + "learning_rate": 4.436741506807561e-06, + "loss": 0.2663, + "step": 10305 + }, + { + "epoch": 0.48278446620133975, + "grad_norm": 0.5765822493289211, + "learning_rate": 4.436621579164453e-06, + "loss": 0.2831, + "step": 10306 + }, + { + "epoch": 0.4828313111912681, + "grad_norm": 0.5846056555599507, + "learning_rate": 4.4365016403765746e-06, + "loss": 0.3177, + "step": 10307 + }, + { + "epoch": 0.4828781561811964, + "grad_norm": 0.5526747958703915, + "learning_rate": 4.436381690444616e-06, + "loss": 0.2696, + "step": 10308 + }, + { + "epoch": 0.48292500117112475, + "grad_norm": 0.6252989509969329, + "learning_rate": 4.436261729369266e-06, + "loss": 0.3099, + "step": 10309 + }, + { + "epoch": 0.4829718461610531, + "grad_norm": 0.6364875428631683, + "learning_rate": 4.436141757151219e-06, + "loss": 0.2708, + "step": 10310 + }, + { + "epoch": 0.4830186911509814, + "grad_norm": 0.5451813398157686, + "learning_rate": 4.4360217737911614e-06, + "loss": 0.2907, + "step": 10311 + }, + { + "epoch": 0.48306553614090975, + "grad_norm": 0.61252368796526, + "learning_rate": 4.435901779289785e-06, + "loss": 0.2993, + "step": 10312 + }, + { + "epoch": 0.48311238113083804, + "grad_norm": 0.5898182902159902, + "learning_rate": 4.43578177364778e-06, + "loss": 0.2755, + "step": 10313 + }, + { + "epoch": 0.4831592261207664, + "grad_norm": 0.5887081008829324, + "learning_rate": 4.435661756865838e-06, + "loss": 0.2955, + "step": 10314 + }, + { + "epoch": 0.4832060711106947, + "grad_norm": 0.6102931503608756, + "learning_rate": 4.435541728944649e-06, + "loss": 0.2975, + "step": 10315 + }, + { + "epoch": 0.48325291610062304, + "grad_norm": 0.5789416685826021, + "learning_rate": 4.435421689884905e-06, + "loss": 0.3068, + "step": 10316 + }, + { + "epoch": 0.48329976109055134, + "grad_norm": 0.5916706999392626, + "learning_rate": 4.435301639687294e-06, + "loss": 0.2792, + "step": 10317 + }, + { + "epoch": 0.4833466060804797, + "grad_norm": 0.6249302845433883, + "learning_rate": 4.435181578352509e-06, + "loss": 0.2896, + "step": 10318 + }, + { + "epoch": 0.48339345107040804, + "grad_norm": 0.6248446179105608, + "learning_rate": 4.435061505881241e-06, + "loss": 0.3072, + "step": 10319 + }, + { + "epoch": 0.48344029606033634, + "grad_norm": 0.6006263218063508, + "learning_rate": 4.43494142227418e-06, + "loss": 0.2821, + "step": 10320 + }, + { + "epoch": 0.4834871410502647, + "grad_norm": 0.6918177242198102, + "learning_rate": 4.4348213275320166e-06, + "loss": 0.3306, + "step": 10321 + }, + { + "epoch": 0.483533986040193, + "grad_norm": 0.6653597096689339, + "learning_rate": 4.434701221655444e-06, + "loss": 0.3013, + "step": 10322 + }, + { + "epoch": 0.48358083103012134, + "grad_norm": 0.6039770434261189, + "learning_rate": 4.434581104645151e-06, + "loss": 0.286, + "step": 10323 + }, + { + "epoch": 0.48362767602004963, + "grad_norm": 0.6201580816430466, + "learning_rate": 4.434460976501831e-06, + "loss": 0.2853, + "step": 10324 + }, + { + "epoch": 0.483674521009978, + "grad_norm": 0.5538130797602507, + "learning_rate": 4.434340837226173e-06, + "loss": 0.2883, + "step": 10325 + }, + { + "epoch": 0.4837213659999063, + "grad_norm": 0.5842853625473642, + "learning_rate": 4.4342206868188705e-06, + "loss": 0.2765, + "step": 10326 + }, + { + "epoch": 0.48376821098983463, + "grad_norm": 0.6096716175767452, + "learning_rate": 4.434100525280615e-06, + "loss": 0.2939, + "step": 10327 + }, + { + "epoch": 0.483815055979763, + "grad_norm": 0.6007739527447223, + "learning_rate": 4.433980352612096e-06, + "loss": 0.2845, + "step": 10328 + }, + { + "epoch": 0.4838619009696913, + "grad_norm": 0.6530032583605504, + "learning_rate": 4.433860168814006e-06, + "loss": 0.3181, + "step": 10329 + }, + { + "epoch": 0.48390874595961964, + "grad_norm": 0.6440131235920231, + "learning_rate": 4.433739973887037e-06, + "loss": 0.3028, + "step": 10330 + }, + { + "epoch": 0.48395559094954793, + "grad_norm": 0.5844936614531437, + "learning_rate": 4.433619767831881e-06, + "loss": 0.266, + "step": 10331 + }, + { + "epoch": 0.4840024359394763, + "grad_norm": 0.6026228902725801, + "learning_rate": 4.433499550649229e-06, + "loss": 0.2928, + "step": 10332 + }, + { + "epoch": 0.4840492809294046, + "grad_norm": 0.6133177047555928, + "learning_rate": 4.433379322339773e-06, + "loss": 0.3066, + "step": 10333 + }, + { + "epoch": 0.48409612591933293, + "grad_norm": 0.5996346212608568, + "learning_rate": 4.433259082904204e-06, + "loss": 0.2919, + "step": 10334 + }, + { + "epoch": 0.4841429709092612, + "grad_norm": 0.6121496929542382, + "learning_rate": 4.433138832343216e-06, + "loss": 0.2946, + "step": 10335 + }, + { + "epoch": 0.4841898158991896, + "grad_norm": 0.6268974479243129, + "learning_rate": 4.4330185706575e-06, + "loss": 0.3061, + "step": 10336 + }, + { + "epoch": 0.48423666088911793, + "grad_norm": 0.5828244895016983, + "learning_rate": 4.4328982978477474e-06, + "loss": 0.3044, + "step": 10337 + }, + { + "epoch": 0.4842835058790462, + "grad_norm": 0.5649098053828636, + "learning_rate": 4.432778013914651e-06, + "loss": 0.2776, + "step": 10338 + }, + { + "epoch": 0.4843303508689746, + "grad_norm": 0.5575094513193153, + "learning_rate": 4.432657718858903e-06, + "loss": 0.2898, + "step": 10339 + }, + { + "epoch": 0.4843771958589029, + "grad_norm": 0.5769082001025057, + "learning_rate": 4.432537412681196e-06, + "loss": 0.2765, + "step": 10340 + }, + { + "epoch": 0.4844240408488312, + "grad_norm": 0.5921217004977362, + "learning_rate": 4.432417095382221e-06, + "loss": 0.2911, + "step": 10341 + }, + { + "epoch": 0.4844708858387595, + "grad_norm": 0.6290618325285926, + "learning_rate": 4.432296766962672e-06, + "loss": 0.2933, + "step": 10342 + }, + { + "epoch": 0.4845177308286879, + "grad_norm": 0.5834230949460812, + "learning_rate": 4.43217642742324e-06, + "loss": 0.2742, + "step": 10343 + }, + { + "epoch": 0.48456457581861617, + "grad_norm": 0.6126354978671925, + "learning_rate": 4.432056076764619e-06, + "loss": 0.2958, + "step": 10344 + }, + { + "epoch": 0.4846114208085445, + "grad_norm": 0.5819082316013742, + "learning_rate": 4.431935714987501e-06, + "loss": 0.2615, + "step": 10345 + }, + { + "epoch": 0.4846582657984729, + "grad_norm": 0.5850523188967638, + "learning_rate": 4.4318153420925795e-06, + "loss": 0.2827, + "step": 10346 + }, + { + "epoch": 0.4847051107884012, + "grad_norm": 0.6142022550772077, + "learning_rate": 4.431694958080544e-06, + "loss": 0.286, + "step": 10347 + }, + { + "epoch": 0.4847519557783295, + "grad_norm": 0.5528758030110389, + "learning_rate": 4.431574562952091e-06, + "loss": 0.2769, + "step": 10348 + }, + { + "epoch": 0.4847988007682578, + "grad_norm": 0.5736034918466059, + "learning_rate": 4.431454156707912e-06, + "loss": 0.2776, + "step": 10349 + }, + { + "epoch": 0.4848456457581862, + "grad_norm": 0.6814476904871992, + "learning_rate": 4.431333739348699e-06, + "loss": 0.3, + "step": 10350 + }, + { + "epoch": 0.48489249074811447, + "grad_norm": 0.6413522482379174, + "learning_rate": 4.431213310875145e-06, + "loss": 0.3003, + "step": 10351 + }, + { + "epoch": 0.4849393357380428, + "grad_norm": 0.5778376674206273, + "learning_rate": 4.431092871287945e-06, + "loss": 0.2849, + "step": 10352 + }, + { + "epoch": 0.4849861807279711, + "grad_norm": 0.600530535918328, + "learning_rate": 4.430972420587792e-06, + "loss": 0.2852, + "step": 10353 + }, + { + "epoch": 0.48503302571789947, + "grad_norm": 0.620568032148611, + "learning_rate": 4.4308519587753755e-06, + "loss": 0.2862, + "step": 10354 + }, + { + "epoch": 0.4850798707078278, + "grad_norm": 0.7043990352061655, + "learning_rate": 4.430731485851393e-06, + "loss": 0.3201, + "step": 10355 + }, + { + "epoch": 0.4851267156977561, + "grad_norm": 0.6725394856381176, + "learning_rate": 4.4306110018165355e-06, + "loss": 0.3003, + "step": 10356 + }, + { + "epoch": 0.48517356068768447, + "grad_norm": 0.5989884281105177, + "learning_rate": 4.430490506671497e-06, + "loss": 0.2878, + "step": 10357 + }, + { + "epoch": 0.48522040567761276, + "grad_norm": 0.5976476925518227, + "learning_rate": 4.430370000416971e-06, + "loss": 0.2896, + "step": 10358 + }, + { + "epoch": 0.4852672506675411, + "grad_norm": 0.6232550155988821, + "learning_rate": 4.430249483053651e-06, + "loss": 0.289, + "step": 10359 + }, + { + "epoch": 0.4853140956574694, + "grad_norm": 0.5866458933410513, + "learning_rate": 4.430128954582229e-06, + "loss": 0.2841, + "step": 10360 + }, + { + "epoch": 0.48536094064739776, + "grad_norm": 0.5585883380487613, + "learning_rate": 4.430008415003401e-06, + "loss": 0.2879, + "step": 10361 + }, + { + "epoch": 0.48540778563732606, + "grad_norm": 0.6010342975103407, + "learning_rate": 4.42988786431786e-06, + "loss": 0.2774, + "step": 10362 + }, + { + "epoch": 0.4854546306272544, + "grad_norm": 0.605285326285586, + "learning_rate": 4.4297673025262985e-06, + "loss": 0.2914, + "step": 10363 + }, + { + "epoch": 0.48550147561718277, + "grad_norm": 0.5344034581759779, + "learning_rate": 4.429646729629413e-06, + "loss": 0.2552, + "step": 10364 + }, + { + "epoch": 0.48554832060711106, + "grad_norm": 0.6286565097112926, + "learning_rate": 4.429526145627894e-06, + "loss": 0.3106, + "step": 10365 + }, + { + "epoch": 0.4855951655970394, + "grad_norm": 0.5303353519424945, + "learning_rate": 4.4294055505224375e-06, + "loss": 0.2739, + "step": 10366 + }, + { + "epoch": 0.4856420105869677, + "grad_norm": 0.5746117941171908, + "learning_rate": 4.4292849443137365e-06, + "loss": 0.2962, + "step": 10367 + }, + { + "epoch": 0.48568885557689606, + "grad_norm": 0.544601391000232, + "learning_rate": 4.429164327002486e-06, + "loss": 0.2744, + "step": 10368 + }, + { + "epoch": 0.48573570056682436, + "grad_norm": 0.6060077275065509, + "learning_rate": 4.429043698589379e-06, + "loss": 0.301, + "step": 10369 + }, + { + "epoch": 0.4857825455567527, + "grad_norm": 0.5995471166183923, + "learning_rate": 4.4289230590751115e-06, + "loss": 0.3035, + "step": 10370 + }, + { + "epoch": 0.485829390546681, + "grad_norm": 0.5808524508739403, + "learning_rate": 4.428802408460376e-06, + "loss": 0.2736, + "step": 10371 + }, + { + "epoch": 0.48587623553660936, + "grad_norm": 0.7385531328779612, + "learning_rate": 4.428681746745868e-06, + "loss": 0.2879, + "step": 10372 + }, + { + "epoch": 0.4859230805265377, + "grad_norm": 0.5991781109905202, + "learning_rate": 4.42856107393228e-06, + "loss": 0.2715, + "step": 10373 + }, + { + "epoch": 0.485969925516466, + "grad_norm": 0.5943436211646232, + "learning_rate": 4.4284403900203085e-06, + "loss": 0.2921, + "step": 10374 + }, + { + "epoch": 0.48601677050639436, + "grad_norm": 0.6246918368353026, + "learning_rate": 4.428319695010648e-06, + "loss": 0.3055, + "step": 10375 + }, + { + "epoch": 0.48606361549632265, + "grad_norm": 0.5655234933657235, + "learning_rate": 4.428198988903991e-06, + "loss": 0.2903, + "step": 10376 + }, + { + "epoch": 0.486110460486251, + "grad_norm": 0.5711134353384023, + "learning_rate": 4.428078271701034e-06, + "loss": 0.2745, + "step": 10377 + }, + { + "epoch": 0.4861573054761793, + "grad_norm": 0.6063317295893532, + "learning_rate": 4.4279575434024716e-06, + "loss": 0.3006, + "step": 10378 + }, + { + "epoch": 0.48620415046610765, + "grad_norm": 0.6087263736534477, + "learning_rate": 4.427836804008998e-06, + "loss": 0.2953, + "step": 10379 + }, + { + "epoch": 0.48625099545603595, + "grad_norm": 0.6498818314952829, + "learning_rate": 4.427716053521308e-06, + "loss": 0.2926, + "step": 10380 + }, + { + "epoch": 0.4862978404459643, + "grad_norm": 0.584637440930463, + "learning_rate": 4.427595291940096e-06, + "loss": 0.263, + "step": 10381 + }, + { + "epoch": 0.48634468543589265, + "grad_norm": 0.6253413279580748, + "learning_rate": 4.427474519266058e-06, + "loss": 0.2972, + "step": 10382 + }, + { + "epoch": 0.48639153042582095, + "grad_norm": 0.6178465169045343, + "learning_rate": 4.427353735499889e-06, + "loss": 0.2903, + "step": 10383 + }, + { + "epoch": 0.4864383754157493, + "grad_norm": 0.544150993472323, + "learning_rate": 4.427232940642283e-06, + "loss": 0.2817, + "step": 10384 + }, + { + "epoch": 0.4864852204056776, + "grad_norm": 0.6407540590776748, + "learning_rate": 4.427112134693936e-06, + "loss": 0.2809, + "step": 10385 + }, + { + "epoch": 0.48653206539560595, + "grad_norm": 0.6268458971247446, + "learning_rate": 4.426991317655543e-06, + "loss": 0.2862, + "step": 10386 + }, + { + "epoch": 0.48657891038553425, + "grad_norm": 0.610377586615275, + "learning_rate": 4.4268704895278005e-06, + "loss": 0.2785, + "step": 10387 + }, + { + "epoch": 0.4866257553754626, + "grad_norm": 0.6455477728872282, + "learning_rate": 4.426749650311401e-06, + "loss": 0.2833, + "step": 10388 + }, + { + "epoch": 0.4866726003653909, + "grad_norm": 0.591207231455331, + "learning_rate": 4.426628800007043e-06, + "loss": 0.2992, + "step": 10389 + }, + { + "epoch": 0.48671944535531925, + "grad_norm": 0.5595833227043733, + "learning_rate": 4.426507938615418e-06, + "loss": 0.2737, + "step": 10390 + }, + { + "epoch": 0.4867662903452476, + "grad_norm": 0.6202481722336198, + "learning_rate": 4.426387066137227e-06, + "loss": 0.3027, + "step": 10391 + }, + { + "epoch": 0.4868131353351759, + "grad_norm": 0.5706301508316101, + "learning_rate": 4.426266182573161e-06, + "loss": 0.2705, + "step": 10392 + }, + { + "epoch": 0.48685998032510425, + "grad_norm": 0.6132841820769069, + "learning_rate": 4.426145287923918e-06, + "loss": 0.2733, + "step": 10393 + }, + { + "epoch": 0.48690682531503254, + "grad_norm": 0.6437334263172074, + "learning_rate": 4.426024382190191e-06, + "loss": 0.3069, + "step": 10394 + }, + { + "epoch": 0.4869536703049609, + "grad_norm": 0.6166732135494062, + "learning_rate": 4.42590346537268e-06, + "loss": 0.2931, + "step": 10395 + }, + { + "epoch": 0.4870005152948892, + "grad_norm": 0.5749390234643316, + "learning_rate": 4.425782537472077e-06, + "loss": 0.292, + "step": 10396 + }, + { + "epoch": 0.48704736028481754, + "grad_norm": 0.585194993482895, + "learning_rate": 4.4256615984890816e-06, + "loss": 0.2913, + "step": 10397 + }, + { + "epoch": 0.48709420527474584, + "grad_norm": 0.6103608671687979, + "learning_rate": 4.425540648424386e-06, + "loss": 0.3032, + "step": 10398 + }, + { + "epoch": 0.4871410502646742, + "grad_norm": 0.6202801039086613, + "learning_rate": 4.425419687278689e-06, + "loss": 0.2979, + "step": 10399 + }, + { + "epoch": 0.48718789525460254, + "grad_norm": 0.5752609533473378, + "learning_rate": 4.425298715052685e-06, + "loss": 0.2743, + "step": 10400 + }, + { + "epoch": 0.48723474024453084, + "grad_norm": 0.6079251384142244, + "learning_rate": 4.4251777317470706e-06, + "loss": 0.294, + "step": 10401 + }, + { + "epoch": 0.4872815852344592, + "grad_norm": 0.5823479250149112, + "learning_rate": 4.425056737362543e-06, + "loss": 0.2918, + "step": 10402 + }, + { + "epoch": 0.4873284302243875, + "grad_norm": 0.6150277052965428, + "learning_rate": 4.424935731899796e-06, + "loss": 0.2859, + "step": 10403 + }, + { + "epoch": 0.48737527521431584, + "grad_norm": 0.5989037443607084, + "learning_rate": 4.42481471535953e-06, + "loss": 0.2825, + "step": 10404 + }, + { + "epoch": 0.48742212020424414, + "grad_norm": 0.6045576816029559, + "learning_rate": 4.424693687742438e-06, + "loss": 0.29, + "step": 10405 + }, + { + "epoch": 0.4874689651941725, + "grad_norm": 0.5946449587445526, + "learning_rate": 4.424572649049218e-06, + "loss": 0.286, + "step": 10406 + }, + { + "epoch": 0.4875158101841008, + "grad_norm": 0.5964752877624383, + "learning_rate": 4.424451599280565e-06, + "loss": 0.2795, + "step": 10407 + }, + { + "epoch": 0.48756265517402914, + "grad_norm": 0.5724991091571576, + "learning_rate": 4.424330538437178e-06, + "loss": 0.2627, + "step": 10408 + }, + { + "epoch": 0.4876095001639575, + "grad_norm": 0.5540899222877983, + "learning_rate": 4.4242094665197514e-06, + "loss": 0.2955, + "step": 10409 + }, + { + "epoch": 0.4876563451538858, + "grad_norm": 0.5722788960772155, + "learning_rate": 4.4240883835289835e-06, + "loss": 0.266, + "step": 10410 + }, + { + "epoch": 0.48770319014381414, + "grad_norm": 0.645853777018566, + "learning_rate": 4.4239672894655705e-06, + "loss": 0.296, + "step": 10411 + }, + { + "epoch": 0.48775003513374243, + "grad_norm": 0.5568609389456952, + "learning_rate": 4.42384618433021e-06, + "loss": 0.2948, + "step": 10412 + }, + { + "epoch": 0.4877968801236708, + "grad_norm": 0.606719799310657, + "learning_rate": 4.423725068123597e-06, + "loss": 0.298, + "step": 10413 + }, + { + "epoch": 0.4878437251135991, + "grad_norm": 0.5868520516128116, + "learning_rate": 4.4236039408464305e-06, + "loss": 0.2765, + "step": 10414 + }, + { + "epoch": 0.48789057010352743, + "grad_norm": 0.6183329266004381, + "learning_rate": 4.4234828024994066e-06, + "loss": 0.2964, + "step": 10415 + }, + { + "epoch": 0.48793741509345573, + "grad_norm": 0.5692129285175067, + "learning_rate": 4.423361653083222e-06, + "loss": 0.2737, + "step": 10416 + }, + { + "epoch": 0.4879842600833841, + "grad_norm": 0.5636393789917656, + "learning_rate": 4.4232404925985755e-06, + "loss": 0.2848, + "step": 10417 + }, + { + "epoch": 0.48803110507331243, + "grad_norm": 0.5672798136752242, + "learning_rate": 4.4231193210461635e-06, + "loss": 0.2783, + "step": 10418 + }, + { + "epoch": 0.48807795006324073, + "grad_norm": 0.5802964596019543, + "learning_rate": 4.4229981384266815e-06, + "loss": 0.2757, + "step": 10419 + }, + { + "epoch": 0.4881247950531691, + "grad_norm": 0.607664606281963, + "learning_rate": 4.42287694474083e-06, + "loss": 0.2828, + "step": 10420 + }, + { + "epoch": 0.4881716400430974, + "grad_norm": 0.5699826403200053, + "learning_rate": 4.422755739989305e-06, + "loss": 0.2842, + "step": 10421 + }, + { + "epoch": 0.48821848503302573, + "grad_norm": 0.6336330142527618, + "learning_rate": 4.422634524172804e-06, + "loss": 0.2871, + "step": 10422 + }, + { + "epoch": 0.488265330022954, + "grad_norm": 0.5960199556225758, + "learning_rate": 4.422513297292024e-06, + "loss": 0.2735, + "step": 10423 + }, + { + "epoch": 0.4883121750128824, + "grad_norm": 0.5485459374990069, + "learning_rate": 4.422392059347663e-06, + "loss": 0.2708, + "step": 10424 + }, + { + "epoch": 0.4883590200028107, + "grad_norm": 0.6074921853217913, + "learning_rate": 4.42227081034042e-06, + "loss": 0.2964, + "step": 10425 + }, + { + "epoch": 0.488405864992739, + "grad_norm": 0.5770983356332113, + "learning_rate": 4.42214955027099e-06, + "loss": 0.2827, + "step": 10426 + }, + { + "epoch": 0.4884527099826674, + "grad_norm": 0.5575348581398857, + "learning_rate": 4.422028279140074e-06, + "loss": 0.2791, + "step": 10427 + }, + { + "epoch": 0.4884995549725957, + "grad_norm": 0.5679323026810541, + "learning_rate": 4.421906996948367e-06, + "loss": 0.2935, + "step": 10428 + }, + { + "epoch": 0.488546399962524, + "grad_norm": 0.6401415809142403, + "learning_rate": 4.4217857036965695e-06, + "loss": 0.2959, + "step": 10429 + }, + { + "epoch": 0.4885932449524523, + "grad_norm": 0.5582010523567484, + "learning_rate": 4.4216643993853785e-06, + "loss": 0.2779, + "step": 10430 + }, + { + "epoch": 0.4886400899423807, + "grad_norm": 0.6047292574134094, + "learning_rate": 4.4215430840154905e-06, + "loss": 0.288, + "step": 10431 + }, + { + "epoch": 0.48868693493230897, + "grad_norm": 0.5687243172257095, + "learning_rate": 4.421421757587606e-06, + "loss": 0.2663, + "step": 10432 + }, + { + "epoch": 0.4887337799222373, + "grad_norm": 0.5892508451391588, + "learning_rate": 4.421300420102421e-06, + "loss": 0.2977, + "step": 10433 + }, + { + "epoch": 0.4887806249121656, + "grad_norm": 0.5717639575569738, + "learning_rate": 4.421179071560636e-06, + "loss": 0.2741, + "step": 10434 + }, + { + "epoch": 0.48882746990209397, + "grad_norm": 0.6123225718263984, + "learning_rate": 4.421057711962948e-06, + "loss": 0.2742, + "step": 10435 + }, + { + "epoch": 0.4888743148920223, + "grad_norm": 0.6046266881557898, + "learning_rate": 4.420936341310056e-06, + "loss": 0.3051, + "step": 10436 + }, + { + "epoch": 0.4889211598819506, + "grad_norm": 0.5692840321435114, + "learning_rate": 4.420814959602659e-06, + "loss": 0.2708, + "step": 10437 + }, + { + "epoch": 0.48896800487187897, + "grad_norm": 0.5892860572825174, + "learning_rate": 4.420693566841453e-06, + "loss": 0.2819, + "step": 10438 + }, + { + "epoch": 0.48901484986180727, + "grad_norm": 0.6213515179599924, + "learning_rate": 4.420572163027139e-06, + "loss": 0.2745, + "step": 10439 + }, + { + "epoch": 0.4890616948517356, + "grad_norm": 0.5754144320714389, + "learning_rate": 4.420450748160415e-06, + "loss": 0.2934, + "step": 10440 + }, + { + "epoch": 0.4891085398416639, + "grad_norm": 0.5563729446193508, + "learning_rate": 4.42032932224198e-06, + "loss": 0.2893, + "step": 10441 + }, + { + "epoch": 0.48915538483159227, + "grad_norm": 0.582026465596137, + "learning_rate": 4.420207885272531e-06, + "loss": 0.285, + "step": 10442 + }, + { + "epoch": 0.48920222982152056, + "grad_norm": 0.5513990860790332, + "learning_rate": 4.42008643725277e-06, + "loss": 0.2862, + "step": 10443 + }, + { + "epoch": 0.4892490748114489, + "grad_norm": 0.5386914855135684, + "learning_rate": 4.4199649781833935e-06, + "loss": 0.2812, + "step": 10444 + }, + { + "epoch": 0.48929591980137727, + "grad_norm": 0.5838190256810184, + "learning_rate": 4.419843508065101e-06, + "loss": 0.2834, + "step": 10445 + }, + { + "epoch": 0.48934276479130556, + "grad_norm": 0.585618505920112, + "learning_rate": 4.4197220268985904e-06, + "loss": 0.2777, + "step": 10446 + }, + { + "epoch": 0.4893896097812339, + "grad_norm": 0.5870112213444473, + "learning_rate": 4.419600534684564e-06, + "loss": 0.291, + "step": 10447 + }, + { + "epoch": 0.4894364547711622, + "grad_norm": 0.5755331906879138, + "learning_rate": 4.419479031423718e-06, + "loss": 0.2812, + "step": 10448 + }, + { + "epoch": 0.48948329976109056, + "grad_norm": 0.5867862249987748, + "learning_rate": 4.419357517116753e-06, + "loss": 0.2977, + "step": 10449 + }, + { + "epoch": 0.48953014475101886, + "grad_norm": 0.624189386308373, + "learning_rate": 4.419235991764368e-06, + "loss": 0.3151, + "step": 10450 + }, + { + "epoch": 0.4895769897409472, + "grad_norm": 0.5798368953621742, + "learning_rate": 4.419114455367262e-06, + "loss": 0.2888, + "step": 10451 + }, + { + "epoch": 0.4896238347308755, + "grad_norm": 0.5706824468947832, + "learning_rate": 4.418992907926134e-06, + "loss": 0.2657, + "step": 10452 + }, + { + "epoch": 0.48967067972080386, + "grad_norm": 0.6099553564708066, + "learning_rate": 4.418871349441685e-06, + "loss": 0.2707, + "step": 10453 + }, + { + "epoch": 0.4897175247107322, + "grad_norm": 0.5453146635314848, + "learning_rate": 4.418749779914614e-06, + "loss": 0.287, + "step": 10454 + }, + { + "epoch": 0.4897643697006605, + "grad_norm": 0.6110191317755138, + "learning_rate": 4.418628199345619e-06, + "loss": 0.3157, + "step": 10455 + }, + { + "epoch": 0.48981121469058886, + "grad_norm": 0.6158869286619569, + "learning_rate": 4.4185066077354025e-06, + "loss": 0.2973, + "step": 10456 + }, + { + "epoch": 0.48985805968051715, + "grad_norm": 0.6329415019753023, + "learning_rate": 4.418385005084661e-06, + "loss": 0.2884, + "step": 10457 + }, + { + "epoch": 0.4899049046704455, + "grad_norm": 0.641421352289711, + "learning_rate": 4.418263391394098e-06, + "loss": 0.3019, + "step": 10458 + }, + { + "epoch": 0.4899517496603738, + "grad_norm": 0.570308003344298, + "learning_rate": 4.4181417666644095e-06, + "loss": 0.2997, + "step": 10459 + }, + { + "epoch": 0.48999859465030215, + "grad_norm": 0.5561493185609575, + "learning_rate": 4.418020130896298e-06, + "loss": 0.2732, + "step": 10460 + }, + { + "epoch": 0.49004543964023045, + "grad_norm": 0.6720761440402737, + "learning_rate": 4.417898484090463e-06, + "loss": 0.3166, + "step": 10461 + }, + { + "epoch": 0.4900922846301588, + "grad_norm": 0.6092261906765385, + "learning_rate": 4.417776826247604e-06, + "loss": 0.2959, + "step": 10462 + }, + { + "epoch": 0.49013912962008716, + "grad_norm": 0.579039650999089, + "learning_rate": 4.41765515736842e-06, + "loss": 0.2918, + "step": 10463 + }, + { + "epoch": 0.49018597461001545, + "grad_norm": 0.6202614453336461, + "learning_rate": 4.417533477453615e-06, + "loss": 0.2742, + "step": 10464 + }, + { + "epoch": 0.4902328195999438, + "grad_norm": 0.6175422029916995, + "learning_rate": 4.417411786503885e-06, + "loss": 0.3064, + "step": 10465 + }, + { + "epoch": 0.4902796645898721, + "grad_norm": 0.5873198692031549, + "learning_rate": 4.417290084519933e-06, + "loss": 0.2897, + "step": 10466 + }, + { + "epoch": 0.49032650957980045, + "grad_norm": 0.6031666276813993, + "learning_rate": 4.417168371502459e-06, + "loss": 0.276, + "step": 10467 + }, + { + "epoch": 0.49037335456972875, + "grad_norm": 0.6567535274200524, + "learning_rate": 4.417046647452161e-06, + "loss": 0.3129, + "step": 10468 + }, + { + "epoch": 0.4904201995596571, + "grad_norm": 0.5786896945047805, + "learning_rate": 4.416924912369742e-06, + "loss": 0.2954, + "step": 10469 + }, + { + "epoch": 0.4904670445495854, + "grad_norm": 0.6118913968546027, + "learning_rate": 4.416803166255903e-06, + "loss": 0.2688, + "step": 10470 + }, + { + "epoch": 0.49051388953951375, + "grad_norm": 0.6822486614607723, + "learning_rate": 4.416681409111343e-06, + "loss": 0.2923, + "step": 10471 + }, + { + "epoch": 0.4905607345294421, + "grad_norm": 0.6032655680326172, + "learning_rate": 4.416559640936763e-06, + "loss": 0.2732, + "step": 10472 + }, + { + "epoch": 0.4906075795193704, + "grad_norm": 0.5605447099776373, + "learning_rate": 4.416437861732864e-06, + "loss": 0.2866, + "step": 10473 + }, + { + "epoch": 0.49065442450929875, + "grad_norm": 0.6199802957874482, + "learning_rate": 4.4163160715003465e-06, + "loss": 0.3029, + "step": 10474 + }, + { + "epoch": 0.49070126949922704, + "grad_norm": 0.6042667627851634, + "learning_rate": 4.416194270239912e-06, + "loss": 0.2749, + "step": 10475 + }, + { + "epoch": 0.4907481144891554, + "grad_norm": 0.5840702317604788, + "learning_rate": 4.416072457952261e-06, + "loss": 0.2698, + "step": 10476 + }, + { + "epoch": 0.4907949594790837, + "grad_norm": 0.5878892627982361, + "learning_rate": 4.415950634638095e-06, + "loss": 0.2797, + "step": 10477 + }, + { + "epoch": 0.49084180446901204, + "grad_norm": 0.594260379233208, + "learning_rate": 4.4158288002981145e-06, + "loss": 0.2755, + "step": 10478 + }, + { + "epoch": 0.49088864945894034, + "grad_norm": 0.5659490069168559, + "learning_rate": 4.415706954933021e-06, + "loss": 0.2788, + "step": 10479 + }, + { + "epoch": 0.4909354944488687, + "grad_norm": 0.580744388239348, + "learning_rate": 4.4155850985435144e-06, + "loss": 0.2768, + "step": 10480 + }, + { + "epoch": 0.49098233943879704, + "grad_norm": 0.5649121262873212, + "learning_rate": 4.415463231130298e-06, + "loss": 0.2823, + "step": 10481 + }, + { + "epoch": 0.49102918442872534, + "grad_norm": 0.6424952441161341, + "learning_rate": 4.415341352694072e-06, + "loss": 0.2904, + "step": 10482 + }, + { + "epoch": 0.4910760294186537, + "grad_norm": 0.6385501471776006, + "learning_rate": 4.4152194632355375e-06, + "loss": 0.2939, + "step": 10483 + }, + { + "epoch": 0.491122874408582, + "grad_norm": 0.5896593515226731, + "learning_rate": 4.415097562755397e-06, + "loss": 0.2807, + "step": 10484 + }, + { + "epoch": 0.49116971939851034, + "grad_norm": 0.566197652871045, + "learning_rate": 4.41497565125435e-06, + "loss": 0.277, + "step": 10485 + }, + { + "epoch": 0.49121656438843864, + "grad_norm": 0.636068074661671, + "learning_rate": 4.414853728733102e-06, + "loss": 0.2745, + "step": 10486 + }, + { + "epoch": 0.491263409378367, + "grad_norm": 0.5924301514155413, + "learning_rate": 4.41473179519235e-06, + "loss": 0.287, + "step": 10487 + }, + { + "epoch": 0.4913102543682953, + "grad_norm": 0.635749076938315, + "learning_rate": 4.414609850632798e-06, + "loss": 0.3063, + "step": 10488 + }, + { + "epoch": 0.49135709935822364, + "grad_norm": 0.6670142279158497, + "learning_rate": 4.414487895055148e-06, + "loss": 0.2923, + "step": 10489 + }, + { + "epoch": 0.491403944348152, + "grad_norm": 0.5642175118680686, + "learning_rate": 4.414365928460101e-06, + "loss": 0.2529, + "step": 10490 + }, + { + "epoch": 0.4914507893380803, + "grad_norm": 0.5737000245195836, + "learning_rate": 4.414243950848361e-06, + "loss": 0.2929, + "step": 10491 + }, + { + "epoch": 0.49149763432800864, + "grad_norm": 0.6054492805862389, + "learning_rate": 4.4141219622206255e-06, + "loss": 0.2877, + "step": 10492 + }, + { + "epoch": 0.49154447931793693, + "grad_norm": 0.641865280138882, + "learning_rate": 4.4139999625776e-06, + "loss": 0.3157, + "step": 10493 + }, + { + "epoch": 0.4915913243078653, + "grad_norm": 0.6122049351519252, + "learning_rate": 4.413877951919987e-06, + "loss": 0.2965, + "step": 10494 + }, + { + "epoch": 0.4916381692977936, + "grad_norm": 0.6392985957891135, + "learning_rate": 4.4137559302484864e-06, + "loss": 0.3091, + "step": 10495 + }, + { + "epoch": 0.49168501428772193, + "grad_norm": 0.5753087849085486, + "learning_rate": 4.413633897563801e-06, + "loss": 0.2966, + "step": 10496 + }, + { + "epoch": 0.49173185927765023, + "grad_norm": 0.5678860790844992, + "learning_rate": 4.413511853866634e-06, + "loss": 0.3068, + "step": 10497 + }, + { + "epoch": 0.4917787042675786, + "grad_norm": 0.5803431104598553, + "learning_rate": 4.413389799157688e-06, + "loss": 0.2946, + "step": 10498 + }, + { + "epoch": 0.49182554925750693, + "grad_norm": 0.6305987635706701, + "learning_rate": 4.413267733437663e-06, + "loss": 0.2921, + "step": 10499 + }, + { + "epoch": 0.49187239424743523, + "grad_norm": 0.5909917380776674, + "learning_rate": 4.413145656707265e-06, + "loss": 0.2906, + "step": 10500 + }, + { + "epoch": 0.4919192392373636, + "grad_norm": 0.6511964545807791, + "learning_rate": 4.413023568967193e-06, + "loss": 0.3103, + "step": 10501 + }, + { + "epoch": 0.4919660842272919, + "grad_norm": 0.6075529901839171, + "learning_rate": 4.412901470218152e-06, + "loss": 0.3121, + "step": 10502 + }, + { + "epoch": 0.49201292921722023, + "grad_norm": 0.6212806747080051, + "learning_rate": 4.4127793604608435e-06, + "loss": 0.3053, + "step": 10503 + }, + { + "epoch": 0.4920597742071485, + "grad_norm": 0.5663967748054411, + "learning_rate": 4.41265723969597e-06, + "loss": 0.299, + "step": 10504 + }, + { + "epoch": 0.4921066191970769, + "grad_norm": 0.5784447704709126, + "learning_rate": 4.412535107924236e-06, + "loss": 0.2901, + "step": 10505 + }, + { + "epoch": 0.4921534641870052, + "grad_norm": 0.5634491230908305, + "learning_rate": 4.412412965146342e-06, + "loss": 0.2846, + "step": 10506 + }, + { + "epoch": 0.4922003091769335, + "grad_norm": 0.5614605443061346, + "learning_rate": 4.412290811362993e-06, + "loss": 0.2718, + "step": 10507 + }, + { + "epoch": 0.4922471541668619, + "grad_norm": 0.5713945875987849, + "learning_rate": 4.4121686465748904e-06, + "loss": 0.2944, + "step": 10508 + }, + { + "epoch": 0.4922939991567902, + "grad_norm": 0.5312934895744084, + "learning_rate": 4.4120464707827386e-06, + "loss": 0.2723, + "step": 10509 + }, + { + "epoch": 0.4923408441467185, + "grad_norm": 0.6115345824055527, + "learning_rate": 4.411924283987239e-06, + "loss": 0.2778, + "step": 10510 + }, + { + "epoch": 0.4923876891366468, + "grad_norm": 0.6050448135721561, + "learning_rate": 4.411802086189097e-06, + "loss": 0.2776, + "step": 10511 + }, + { + "epoch": 0.4924345341265752, + "grad_norm": 0.5851661709545082, + "learning_rate": 4.411679877389013e-06, + "loss": 0.279, + "step": 10512 + }, + { + "epoch": 0.49248137911650347, + "grad_norm": 0.641445279258362, + "learning_rate": 4.411557657587692e-06, + "loss": 0.2825, + "step": 10513 + }, + { + "epoch": 0.4925282241064318, + "grad_norm": 0.5476381259039529, + "learning_rate": 4.4114354267858376e-06, + "loss": 0.2755, + "step": 10514 + }, + { + "epoch": 0.4925750690963601, + "grad_norm": 0.5520589753746513, + "learning_rate": 4.411313184984153e-06, + "loss": 0.2785, + "step": 10515 + }, + { + "epoch": 0.49262191408628847, + "grad_norm": 0.6046854596617082, + "learning_rate": 4.411190932183341e-06, + "loss": 0.2879, + "step": 10516 + }, + { + "epoch": 0.4926687590762168, + "grad_norm": 0.6155570542682839, + "learning_rate": 4.411068668384105e-06, + "loss": 0.2894, + "step": 10517 + }, + { + "epoch": 0.4927156040661451, + "grad_norm": 0.5741654840875479, + "learning_rate": 4.41094639358715e-06, + "loss": 0.2844, + "step": 10518 + }, + { + "epoch": 0.49276244905607347, + "grad_norm": 0.5757911508816093, + "learning_rate": 4.410824107793179e-06, + "loss": 0.2763, + "step": 10519 + }, + { + "epoch": 0.49280929404600177, + "grad_norm": 0.6200851857342646, + "learning_rate": 4.410701811002895e-06, + "loss": 0.2897, + "step": 10520 + }, + { + "epoch": 0.4928561390359301, + "grad_norm": 0.558149636452917, + "learning_rate": 4.410579503217003e-06, + "loss": 0.2709, + "step": 10521 + }, + { + "epoch": 0.4929029840258584, + "grad_norm": 0.6377779031752692, + "learning_rate": 4.410457184436205e-06, + "loss": 0.3087, + "step": 10522 + }, + { + "epoch": 0.49294982901578677, + "grad_norm": 0.6431040183734157, + "learning_rate": 4.410334854661207e-06, + "loss": 0.2978, + "step": 10523 + }, + { + "epoch": 0.49299667400571506, + "grad_norm": 0.6248068174779507, + "learning_rate": 4.4102125138927114e-06, + "loss": 0.2885, + "step": 10524 + }, + { + "epoch": 0.4930435189956434, + "grad_norm": 0.6203552986274288, + "learning_rate": 4.410090162131423e-06, + "loss": 0.297, + "step": 10525 + }, + { + "epoch": 0.49309036398557177, + "grad_norm": 0.621673750725323, + "learning_rate": 4.409967799378047e-06, + "loss": 0.3007, + "step": 10526 + }, + { + "epoch": 0.49313720897550006, + "grad_norm": 0.6634986108085691, + "learning_rate": 4.409845425633285e-06, + "loss": 0.2855, + "step": 10527 + }, + { + "epoch": 0.4931840539654284, + "grad_norm": 0.5916356654732703, + "learning_rate": 4.409723040897843e-06, + "loss": 0.3055, + "step": 10528 + }, + { + "epoch": 0.4932308989553567, + "grad_norm": 0.6473992919974981, + "learning_rate": 4.409600645172425e-06, + "loss": 0.2907, + "step": 10529 + }, + { + "epoch": 0.49327774394528506, + "grad_norm": 0.6608162082049959, + "learning_rate": 4.409478238457735e-06, + "loss": 0.3019, + "step": 10530 + }, + { + "epoch": 0.49332458893521336, + "grad_norm": 0.6934292264148306, + "learning_rate": 4.4093558207544776e-06, + "loss": 0.2841, + "step": 10531 + }, + { + "epoch": 0.4933714339251417, + "grad_norm": 0.6324348805553001, + "learning_rate": 4.409233392063358e-06, + "loss": 0.2817, + "step": 10532 + }, + { + "epoch": 0.49341827891507, + "grad_norm": 0.6066078828091412, + "learning_rate": 4.40911095238508e-06, + "loss": 0.2884, + "step": 10533 + }, + { + "epoch": 0.49346512390499836, + "grad_norm": 0.6624826413674888, + "learning_rate": 4.4089885017203475e-06, + "loss": 0.2879, + "step": 10534 + }, + { + "epoch": 0.4935119688949267, + "grad_norm": 0.6321379249580279, + "learning_rate": 4.4088660400698666e-06, + "loss": 0.2881, + "step": 10535 + }, + { + "epoch": 0.493558813884855, + "grad_norm": 0.5878437257195565, + "learning_rate": 4.408743567434341e-06, + "loss": 0.2711, + "step": 10536 + }, + { + "epoch": 0.49360565887478336, + "grad_norm": 0.5735946735405013, + "learning_rate": 4.408621083814476e-06, + "loss": 0.2856, + "step": 10537 + }, + { + "epoch": 0.49365250386471166, + "grad_norm": 0.6326751560123721, + "learning_rate": 4.4084985892109765e-06, + "loss": 0.2919, + "step": 10538 + }, + { + "epoch": 0.49369934885464, + "grad_norm": 0.5722134005813506, + "learning_rate": 4.408376083624547e-06, + "loss": 0.2748, + "step": 10539 + }, + { + "epoch": 0.4937461938445683, + "grad_norm": 0.6192442531837061, + "learning_rate": 4.408253567055894e-06, + "loss": 0.2947, + "step": 10540 + }, + { + "epoch": 0.49379303883449666, + "grad_norm": 0.6213412974700844, + "learning_rate": 4.40813103950572e-06, + "loss": 0.2939, + "step": 10541 + }, + { + "epoch": 0.49383988382442495, + "grad_norm": 0.6047109822787208, + "learning_rate": 4.408008500974732e-06, + "loss": 0.2884, + "step": 10542 + }, + { + "epoch": 0.4938867288143533, + "grad_norm": 0.5997955194319233, + "learning_rate": 4.407885951463634e-06, + "loss": 0.297, + "step": 10543 + }, + { + "epoch": 0.49393357380428166, + "grad_norm": 0.5930102508710956, + "learning_rate": 4.407763390973133e-06, + "loss": 0.269, + "step": 10544 + }, + { + "epoch": 0.49398041879420995, + "grad_norm": 0.6149866012096963, + "learning_rate": 4.407640819503932e-06, + "loss": 0.3046, + "step": 10545 + }, + { + "epoch": 0.4940272637841383, + "grad_norm": 0.6022997602369314, + "learning_rate": 4.407518237056738e-06, + "loss": 0.3105, + "step": 10546 + }, + { + "epoch": 0.4940741087740666, + "grad_norm": 0.5580879192533168, + "learning_rate": 4.407395643632257e-06, + "loss": 0.2835, + "step": 10547 + }, + { + "epoch": 0.49412095376399495, + "grad_norm": 0.6382861586557532, + "learning_rate": 4.407273039231193e-06, + "loss": 0.2993, + "step": 10548 + }, + { + "epoch": 0.49416779875392325, + "grad_norm": 0.613811423743024, + "learning_rate": 4.4071504238542515e-06, + "loss": 0.2922, + "step": 10549 + }, + { + "epoch": 0.4942146437438516, + "grad_norm": 0.608166092940092, + "learning_rate": 4.407027797502138e-06, + "loss": 0.3081, + "step": 10550 + }, + { + "epoch": 0.4942614887337799, + "grad_norm": 0.5480360537150866, + "learning_rate": 4.40690516017556e-06, + "loss": 0.2785, + "step": 10551 + }, + { + "epoch": 0.49430833372370825, + "grad_norm": 0.64519237791046, + "learning_rate": 4.406782511875222e-06, + "loss": 0.2951, + "step": 10552 + }, + { + "epoch": 0.4943551787136366, + "grad_norm": 0.6063499357011846, + "learning_rate": 4.40665985260183e-06, + "loss": 0.2787, + "step": 10553 + }, + { + "epoch": 0.4944020237035649, + "grad_norm": 0.6027732058900204, + "learning_rate": 4.406537182356089e-06, + "loss": 0.2927, + "step": 10554 + }, + { + "epoch": 0.49444886869349325, + "grad_norm": 0.5971949154437068, + "learning_rate": 4.406414501138706e-06, + "loss": 0.2771, + "step": 10555 + }, + { + "epoch": 0.49449571368342154, + "grad_norm": 0.5722342051860939, + "learning_rate": 4.406291808950387e-06, + "loss": 0.2829, + "step": 10556 + }, + { + "epoch": 0.4945425586733499, + "grad_norm": 0.5685702458673944, + "learning_rate": 4.406169105791838e-06, + "loss": 0.2736, + "step": 10557 + }, + { + "epoch": 0.4945894036632782, + "grad_norm": 0.5881985564638542, + "learning_rate": 4.406046391663764e-06, + "loss": 0.2916, + "step": 10558 + }, + { + "epoch": 0.49463624865320655, + "grad_norm": 0.5391549880981249, + "learning_rate": 4.405923666566872e-06, + "loss": 0.2767, + "step": 10559 + }, + { + "epoch": 0.49468309364313484, + "grad_norm": 0.5523106354450035, + "learning_rate": 4.405800930501869e-06, + "loss": 0.2703, + "step": 10560 + }, + { + "epoch": 0.4947299386330632, + "grad_norm": 0.5760478353963118, + "learning_rate": 4.405678183469461e-06, + "loss": 0.2753, + "step": 10561 + }, + { + "epoch": 0.49477678362299155, + "grad_norm": 0.6111376046395004, + "learning_rate": 4.405555425470353e-06, + "loss": 0.2983, + "step": 10562 + }, + { + "epoch": 0.49482362861291984, + "grad_norm": 0.6843770125805442, + "learning_rate": 4.4054326565052535e-06, + "loss": 0.2909, + "step": 10563 + }, + { + "epoch": 0.4948704736028482, + "grad_norm": 0.6325187415207723, + "learning_rate": 4.4053098765748675e-06, + "loss": 0.3029, + "step": 10564 + }, + { + "epoch": 0.4949173185927765, + "grad_norm": 0.6345998000508571, + "learning_rate": 4.405187085679903e-06, + "loss": 0.3115, + "step": 10565 + }, + { + "epoch": 0.49496416358270484, + "grad_norm": 0.6836151298851737, + "learning_rate": 4.405064283821064e-06, + "loss": 0.309, + "step": 10566 + }, + { + "epoch": 0.49501100857263314, + "grad_norm": 0.5633842331598332, + "learning_rate": 4.40494147099906e-06, + "loss": 0.2741, + "step": 10567 + }, + { + "epoch": 0.4950578535625615, + "grad_norm": 0.5913178969358985, + "learning_rate": 4.404818647214596e-06, + "loss": 0.2873, + "step": 10568 + }, + { + "epoch": 0.4951046985524898, + "grad_norm": 0.5811764104983114, + "learning_rate": 4.4046958124683796e-06, + "loss": 0.2835, + "step": 10569 + }, + { + "epoch": 0.49515154354241814, + "grad_norm": 0.5566964822761922, + "learning_rate": 4.404572966761117e-06, + "loss": 0.2706, + "step": 10570 + }, + { + "epoch": 0.4951983885323465, + "grad_norm": 0.5804921164061914, + "learning_rate": 4.404450110093516e-06, + "loss": 0.273, + "step": 10571 + }, + { + "epoch": 0.4952452335222748, + "grad_norm": 0.6374863316757939, + "learning_rate": 4.404327242466284e-06, + "loss": 0.279, + "step": 10572 + }, + { + "epoch": 0.49529207851220314, + "grad_norm": 0.5788761082248961, + "learning_rate": 4.4042043638801265e-06, + "loss": 0.2967, + "step": 10573 + }, + { + "epoch": 0.49533892350213143, + "grad_norm": 0.6071899208153001, + "learning_rate": 4.404081474335753e-06, + "loss": 0.2777, + "step": 10574 + }, + { + "epoch": 0.4953857684920598, + "grad_norm": 0.6072244689663729, + "learning_rate": 4.403958573833868e-06, + "loss": 0.2799, + "step": 10575 + }, + { + "epoch": 0.4954326134819881, + "grad_norm": 0.6374991086665828, + "learning_rate": 4.40383566237518e-06, + "loss": 0.2833, + "step": 10576 + }, + { + "epoch": 0.49547945847191643, + "grad_norm": 0.584639084027702, + "learning_rate": 4.403712739960396e-06, + "loss": 0.2883, + "step": 10577 + }, + { + "epoch": 0.49552630346184473, + "grad_norm": 0.6188530075533366, + "learning_rate": 4.403589806590224e-06, + "loss": 0.3038, + "step": 10578 + }, + { + "epoch": 0.4955731484517731, + "grad_norm": 0.603843196100808, + "learning_rate": 4.403466862265371e-06, + "loss": 0.2864, + "step": 10579 + }, + { + "epoch": 0.49561999344170143, + "grad_norm": 0.5603291625625365, + "learning_rate": 4.403343906986545e-06, + "loss": 0.2839, + "step": 10580 + }, + { + "epoch": 0.49566683843162973, + "grad_norm": 0.6129232550790172, + "learning_rate": 4.403220940754452e-06, + "loss": 0.2885, + "step": 10581 + }, + { + "epoch": 0.4957136834215581, + "grad_norm": 0.5899632129400249, + "learning_rate": 4.403097963569802e-06, + "loss": 0.2721, + "step": 10582 + }, + { + "epoch": 0.4957605284114864, + "grad_norm": 0.6146284314702909, + "learning_rate": 4.402974975433302e-06, + "loss": 0.2762, + "step": 10583 + }, + { + "epoch": 0.49580737340141473, + "grad_norm": 0.6171188227411355, + "learning_rate": 4.402851976345658e-06, + "loss": 0.2847, + "step": 10584 + }, + { + "epoch": 0.495854218391343, + "grad_norm": 0.6123556340114893, + "learning_rate": 4.40272896630758e-06, + "loss": 0.2939, + "step": 10585 + }, + { + "epoch": 0.4959010633812714, + "grad_norm": 0.6337057043325847, + "learning_rate": 4.402605945319775e-06, + "loss": 0.3108, + "step": 10586 + }, + { + "epoch": 0.4959479083711997, + "grad_norm": 0.5943113700773797, + "learning_rate": 4.40248291338295e-06, + "loss": 0.2844, + "step": 10587 + }, + { + "epoch": 0.495994753361128, + "grad_norm": 0.6034583399766547, + "learning_rate": 4.4023598704978156e-06, + "loss": 0.2926, + "step": 10588 + }, + { + "epoch": 0.4960415983510564, + "grad_norm": 0.5982389587505523, + "learning_rate": 4.402236816665077e-06, + "loss": 0.2795, + "step": 10589 + }, + { + "epoch": 0.4960884433409847, + "grad_norm": 0.6234612623022586, + "learning_rate": 4.402113751885444e-06, + "loss": 0.3082, + "step": 10590 + }, + { + "epoch": 0.496135288330913, + "grad_norm": 0.6248581292986394, + "learning_rate": 4.401990676159625e-06, + "loss": 0.296, + "step": 10591 + }, + { + "epoch": 0.4961821333208413, + "grad_norm": 0.5895942928816023, + "learning_rate": 4.401867589488327e-06, + "loss": 0.2904, + "step": 10592 + }, + { + "epoch": 0.4962289783107697, + "grad_norm": 0.6017869912867273, + "learning_rate": 4.4017444918722596e-06, + "loss": 0.2903, + "step": 10593 + }, + { + "epoch": 0.49627582330069797, + "grad_norm": 0.5670507859243595, + "learning_rate": 4.4016213833121305e-06, + "loss": 0.2888, + "step": 10594 + }, + { + "epoch": 0.4963226682906263, + "grad_norm": 0.5790458357425043, + "learning_rate": 4.401498263808648e-06, + "loss": 0.2811, + "step": 10595 + }, + { + "epoch": 0.4963695132805546, + "grad_norm": 0.6210578766639236, + "learning_rate": 4.401375133362521e-06, + "loss": 0.2833, + "step": 10596 + }, + { + "epoch": 0.49641635827048297, + "grad_norm": 0.605859180532853, + "learning_rate": 4.401251991974458e-06, + "loss": 0.2966, + "step": 10597 + }, + { + "epoch": 0.4964632032604113, + "grad_norm": 0.6163380905307992, + "learning_rate": 4.401128839645168e-06, + "loss": 0.3058, + "step": 10598 + }, + { + "epoch": 0.4965100482503396, + "grad_norm": 0.6515385301646791, + "learning_rate": 4.401005676375358e-06, + "loss": 0.3062, + "step": 10599 + }, + { + "epoch": 0.49655689324026797, + "grad_norm": 0.5322002972186407, + "learning_rate": 4.40088250216574e-06, + "loss": 0.2657, + "step": 10600 + }, + { + "epoch": 0.49660373823019627, + "grad_norm": 0.5712380279894632, + "learning_rate": 4.40075931701702e-06, + "loss": 0.2762, + "step": 10601 + }, + { + "epoch": 0.4966505832201246, + "grad_norm": 0.5469798698309883, + "learning_rate": 4.400636120929908e-06, + "loss": 0.2856, + "step": 10602 + }, + { + "epoch": 0.4966974282100529, + "grad_norm": 0.6066394315320169, + "learning_rate": 4.4005129139051125e-06, + "loss": 0.303, + "step": 10603 + }, + { + "epoch": 0.49674427319998127, + "grad_norm": 0.6473267603786294, + "learning_rate": 4.400389695943344e-06, + "loss": 0.28, + "step": 10604 + }, + { + "epoch": 0.49679111818990956, + "grad_norm": 0.5984845185327091, + "learning_rate": 4.400266467045309e-06, + "loss": 0.2995, + "step": 10605 + }, + { + "epoch": 0.4968379631798379, + "grad_norm": 0.5690880050757992, + "learning_rate": 4.400143227211718e-06, + "loss": 0.2892, + "step": 10606 + }, + { + "epoch": 0.49688480816976627, + "grad_norm": 0.6152542588043767, + "learning_rate": 4.400019976443282e-06, + "loss": 0.3109, + "step": 10607 + }, + { + "epoch": 0.49693165315969456, + "grad_norm": 0.5882438562269933, + "learning_rate": 4.399896714740707e-06, + "loss": 0.2894, + "step": 10608 + }, + { + "epoch": 0.4969784981496229, + "grad_norm": 0.6099690164673115, + "learning_rate": 4.399773442104705e-06, + "loss": 0.2773, + "step": 10609 + }, + { + "epoch": 0.4970253431395512, + "grad_norm": 0.6024840818126723, + "learning_rate": 4.399650158535984e-06, + "loss": 0.2855, + "step": 10610 + }, + { + "epoch": 0.49707218812947956, + "grad_norm": 0.6532373030799519, + "learning_rate": 4.3995268640352535e-06, + "loss": 0.3051, + "step": 10611 + }, + { + "epoch": 0.49711903311940786, + "grad_norm": 0.5981036516645261, + "learning_rate": 4.399403558603223e-06, + "loss": 0.2931, + "step": 10612 + }, + { + "epoch": 0.4971658781093362, + "grad_norm": 0.6143225601684834, + "learning_rate": 4.399280242240604e-06, + "loss": 0.2782, + "step": 10613 + }, + { + "epoch": 0.4972127230992645, + "grad_norm": 0.6508813998190248, + "learning_rate": 4.399156914948103e-06, + "loss": 0.2897, + "step": 10614 + }, + { + "epoch": 0.49725956808919286, + "grad_norm": 0.6118316318222369, + "learning_rate": 4.399033576726431e-06, + "loss": 0.297, + "step": 10615 + }, + { + "epoch": 0.4973064130791212, + "grad_norm": 0.5852841151887541, + "learning_rate": 4.3989102275762995e-06, + "loss": 0.2914, + "step": 10616 + }, + { + "epoch": 0.4973532580690495, + "grad_norm": 0.6650730824770402, + "learning_rate": 4.398786867498416e-06, + "loss": 0.2963, + "step": 10617 + }, + { + "epoch": 0.49740010305897786, + "grad_norm": 0.5574372972274032, + "learning_rate": 4.398663496493491e-06, + "loss": 0.2763, + "step": 10618 + }, + { + "epoch": 0.49744694804890616, + "grad_norm": 0.6025047905470281, + "learning_rate": 4.398540114562237e-06, + "loss": 0.3068, + "step": 10619 + }, + { + "epoch": 0.4974937930388345, + "grad_norm": 0.5890207960827839, + "learning_rate": 4.39841672170536e-06, + "loss": 0.2896, + "step": 10620 + }, + { + "epoch": 0.4975406380287628, + "grad_norm": 0.6116193991764604, + "learning_rate": 4.398293317923572e-06, + "loss": 0.2827, + "step": 10621 + }, + { + "epoch": 0.49758748301869116, + "grad_norm": 0.6610744942671487, + "learning_rate": 4.398169903217583e-06, + "loss": 0.2802, + "step": 10622 + }, + { + "epoch": 0.49763432800861945, + "grad_norm": 0.5433361747566672, + "learning_rate": 4.3980464775881034e-06, + "loss": 0.2628, + "step": 10623 + }, + { + "epoch": 0.4976811729985478, + "grad_norm": 0.6224199373734496, + "learning_rate": 4.397923041035843e-06, + "loss": 0.2907, + "step": 10624 + }, + { + "epoch": 0.49772801798847616, + "grad_norm": 0.5344248931694383, + "learning_rate": 4.3977995935615136e-06, + "loss": 0.272, + "step": 10625 + }, + { + "epoch": 0.49777486297840445, + "grad_norm": 0.5674482230858046, + "learning_rate": 4.397676135165823e-06, + "loss": 0.291, + "step": 10626 + }, + { + "epoch": 0.4978217079683328, + "grad_norm": 0.6038164082809035, + "learning_rate": 4.397552665849485e-06, + "loss": 0.277, + "step": 10627 + }, + { + "epoch": 0.4978685529582611, + "grad_norm": 0.6098164269959905, + "learning_rate": 4.397429185613208e-06, + "loss": 0.308, + "step": 10628 + }, + { + "epoch": 0.49791539794818945, + "grad_norm": 0.6345315644454577, + "learning_rate": 4.397305694457702e-06, + "loss": 0.2526, + "step": 10629 + }, + { + "epoch": 0.49796224293811775, + "grad_norm": 0.5833536039890457, + "learning_rate": 4.397182192383679e-06, + "loss": 0.2686, + "step": 10630 + }, + { + "epoch": 0.4980090879280461, + "grad_norm": 0.6215454265542117, + "learning_rate": 4.397058679391849e-06, + "loss": 0.2843, + "step": 10631 + }, + { + "epoch": 0.4980559329179744, + "grad_norm": 0.6238497647681341, + "learning_rate": 4.396935155482923e-06, + "loss": 0.2974, + "step": 10632 + }, + { + "epoch": 0.49810277790790275, + "grad_norm": 0.5766728892673229, + "learning_rate": 4.396811620657613e-06, + "loss": 0.261, + "step": 10633 + }, + { + "epoch": 0.4981496228978311, + "grad_norm": 0.5987981067625008, + "learning_rate": 4.396688074916628e-06, + "loss": 0.2834, + "step": 10634 + }, + { + "epoch": 0.4981964678877594, + "grad_norm": 0.6053087340397262, + "learning_rate": 4.39656451826068e-06, + "loss": 0.2909, + "step": 10635 + }, + { + "epoch": 0.49824331287768775, + "grad_norm": 0.5825885807721856, + "learning_rate": 4.3964409506904806e-06, + "loss": 0.2761, + "step": 10636 + }, + { + "epoch": 0.49829015786761605, + "grad_norm": 0.6104389496436652, + "learning_rate": 4.396317372206738e-06, + "loss": 0.2916, + "step": 10637 + }, + { + "epoch": 0.4983370028575444, + "grad_norm": 0.5969937963534767, + "learning_rate": 4.396193782810168e-06, + "loss": 0.2838, + "step": 10638 + }, + { + "epoch": 0.4983838478474727, + "grad_norm": 0.5889856801858276, + "learning_rate": 4.396070182501479e-06, + "loss": 0.2876, + "step": 10639 + }, + { + "epoch": 0.49843069283740105, + "grad_norm": 0.6075365660753523, + "learning_rate": 4.395946571281382e-06, + "loss": 0.2785, + "step": 10640 + }, + { + "epoch": 0.49847753782732934, + "grad_norm": 0.6026456758033156, + "learning_rate": 4.3958229491505885e-06, + "loss": 0.2908, + "step": 10641 + }, + { + "epoch": 0.4985243828172577, + "grad_norm": 0.6019332215143998, + "learning_rate": 4.395699316109812e-06, + "loss": 0.2944, + "step": 10642 + }, + { + "epoch": 0.49857122780718605, + "grad_norm": 0.6139558043239176, + "learning_rate": 4.395575672159761e-06, + "loss": 0.2963, + "step": 10643 + }, + { + "epoch": 0.49861807279711434, + "grad_norm": 0.6347491797592717, + "learning_rate": 4.395452017301149e-06, + "loss": 0.3014, + "step": 10644 + }, + { + "epoch": 0.4986649177870427, + "grad_norm": 0.5497913786312655, + "learning_rate": 4.395328351534687e-06, + "loss": 0.2559, + "step": 10645 + }, + { + "epoch": 0.498711762776971, + "grad_norm": 0.5771445981969324, + "learning_rate": 4.395204674861087e-06, + "loss": 0.2866, + "step": 10646 + }, + { + "epoch": 0.49875860776689934, + "grad_norm": 0.5599205219512469, + "learning_rate": 4.395080987281059e-06, + "loss": 0.2692, + "step": 10647 + }, + { + "epoch": 0.49880545275682764, + "grad_norm": 0.5671905758937511, + "learning_rate": 4.394957288795318e-06, + "loss": 0.2729, + "step": 10648 + }, + { + "epoch": 0.498852297746756, + "grad_norm": 0.5684820991763594, + "learning_rate": 4.394833579404573e-06, + "loss": 0.3052, + "step": 10649 + }, + { + "epoch": 0.4988991427366843, + "grad_norm": 0.563693269333779, + "learning_rate": 4.394709859109537e-06, + "loss": 0.2734, + "step": 10650 + }, + { + "epoch": 0.49894598772661264, + "grad_norm": 0.5762100159216111, + "learning_rate": 4.3945861279109225e-06, + "loss": 0.2719, + "step": 10651 + }, + { + "epoch": 0.498992832716541, + "grad_norm": 0.6642051520563651, + "learning_rate": 4.394462385809442e-06, + "loss": 0.2832, + "step": 10652 + }, + { + "epoch": 0.4990396777064693, + "grad_norm": 0.6024822022911245, + "learning_rate": 4.3943386328058044e-06, + "loss": 0.3021, + "step": 10653 + }, + { + "epoch": 0.49908652269639764, + "grad_norm": 0.610714702331871, + "learning_rate": 4.394214868900726e-06, + "loss": 0.3074, + "step": 10654 + }, + { + "epoch": 0.49913336768632593, + "grad_norm": 0.5285315157144246, + "learning_rate": 4.394091094094916e-06, + "loss": 0.2676, + "step": 10655 + }, + { + "epoch": 0.4991802126762543, + "grad_norm": 0.5892492000816001, + "learning_rate": 4.393967308389088e-06, + "loss": 0.305, + "step": 10656 + }, + { + "epoch": 0.4992270576661826, + "grad_norm": 0.5869921652178035, + "learning_rate": 4.393843511783955e-06, + "loss": 0.2855, + "step": 10657 + }, + { + "epoch": 0.49927390265611094, + "grad_norm": 0.5643454150937809, + "learning_rate": 4.393719704280228e-06, + "loss": 0.2925, + "step": 10658 + }, + { + "epoch": 0.49932074764603923, + "grad_norm": 0.5835470210616824, + "learning_rate": 4.39359588587862e-06, + "loss": 0.285, + "step": 10659 + }, + { + "epoch": 0.4993675926359676, + "grad_norm": 0.6296627248589441, + "learning_rate": 4.393472056579843e-06, + "loss": 0.2813, + "step": 10660 + }, + { + "epoch": 0.49941443762589594, + "grad_norm": 0.6396315247661989, + "learning_rate": 4.393348216384611e-06, + "loss": 0.3132, + "step": 10661 + }, + { + "epoch": 0.49946128261582423, + "grad_norm": 0.6390230273694176, + "learning_rate": 4.393224365293636e-06, + "loss": 0.2726, + "step": 10662 + }, + { + "epoch": 0.4995081276057526, + "grad_norm": 0.5920914260435434, + "learning_rate": 4.39310050330763e-06, + "loss": 0.2855, + "step": 10663 + }, + { + "epoch": 0.4995549725956809, + "grad_norm": 0.5903964372596113, + "learning_rate": 4.392976630427307e-06, + "loss": 0.2487, + "step": 10664 + }, + { + "epoch": 0.49960181758560923, + "grad_norm": 0.5802322814679552, + "learning_rate": 4.3928527466533786e-06, + "loss": 0.3045, + "step": 10665 + }, + { + "epoch": 0.4996486625755375, + "grad_norm": 0.5773175085649668, + "learning_rate": 4.392728851986559e-06, + "loss": 0.2776, + "step": 10666 + }, + { + "epoch": 0.4996955075654659, + "grad_norm": 0.6367651292416884, + "learning_rate": 4.39260494642756e-06, + "loss": 0.2935, + "step": 10667 + }, + { + "epoch": 0.4997423525553942, + "grad_norm": 0.6187348063573072, + "learning_rate": 4.392481029977096e-06, + "loss": 0.3033, + "step": 10668 + }, + { + "epoch": 0.4997891975453225, + "grad_norm": 0.5424084741868621, + "learning_rate": 4.392357102635879e-06, + "loss": 0.2623, + "step": 10669 + }, + { + "epoch": 0.4998360425352509, + "grad_norm": 0.5630399394305639, + "learning_rate": 4.392233164404622e-06, + "loss": 0.2741, + "step": 10670 + }, + { + "epoch": 0.4998828875251792, + "grad_norm": 0.6264281910043926, + "learning_rate": 4.39210921528404e-06, + "loss": 0.2917, + "step": 10671 + }, + { + "epoch": 0.4999297325151075, + "grad_norm": 0.6098961599771104, + "learning_rate": 4.391985255274844e-06, + "loss": 0.3002, + "step": 10672 + }, + { + "epoch": 0.4999765775050358, + "grad_norm": 0.5632739476871446, + "learning_rate": 4.391861284377749e-06, + "loss": 0.3075, + "step": 10673 + }, + { + "epoch": 0.5000234224949641, + "grad_norm": 0.536336675250781, + "learning_rate": 4.391737302593467e-06, + "loss": 0.2723, + "step": 10674 + }, + { + "epoch": 0.5000702674848925, + "grad_norm": 0.5567463384452543, + "learning_rate": 4.391613309922712e-06, + "loss": 0.2639, + "step": 10675 + }, + { + "epoch": 0.5001171124748208, + "grad_norm": 0.5850333937326637, + "learning_rate": 4.391489306366199e-06, + "loss": 0.2811, + "step": 10676 + }, + { + "epoch": 0.5001639574647492, + "grad_norm": 0.5779644500528679, + "learning_rate": 4.39136529192464e-06, + "loss": 0.2768, + "step": 10677 + }, + { + "epoch": 0.5002108024546774, + "grad_norm": 0.5710018175786533, + "learning_rate": 4.391241266598749e-06, + "loss": 0.2897, + "step": 10678 + }, + { + "epoch": 0.5002576474446058, + "grad_norm": 0.6466787078355192, + "learning_rate": 4.39111723038924e-06, + "loss": 0.2995, + "step": 10679 + }, + { + "epoch": 0.5003044924345341, + "grad_norm": 0.677256649524843, + "learning_rate": 4.390993183296827e-06, + "loss": 0.2998, + "step": 10680 + }, + { + "epoch": 0.5003513374244625, + "grad_norm": 0.5579001205707733, + "learning_rate": 4.390869125322223e-06, + "loss": 0.2749, + "step": 10681 + }, + { + "epoch": 0.5003981824143908, + "grad_norm": 0.5691202550637986, + "learning_rate": 4.390745056466143e-06, + "loss": 0.2888, + "step": 10682 + }, + { + "epoch": 0.5004450274043191, + "grad_norm": 0.5844602215965454, + "learning_rate": 4.390620976729299e-06, + "loss": 0.3101, + "step": 10683 + }, + { + "epoch": 0.5004918723942474, + "grad_norm": 0.5840611221822517, + "learning_rate": 4.390496886112408e-06, + "loss": 0.28, + "step": 10684 + }, + { + "epoch": 0.5005387173841758, + "grad_norm": 0.5926399077175061, + "learning_rate": 4.390372784616182e-06, + "loss": 0.2857, + "step": 10685 + }, + { + "epoch": 0.5005855623741041, + "grad_norm": 0.5753497449530179, + "learning_rate": 4.390248672241336e-06, + "loss": 0.2825, + "step": 10686 + }, + { + "epoch": 0.5006324073640324, + "grad_norm": 0.6084711379653757, + "learning_rate": 4.390124548988583e-06, + "loss": 0.2896, + "step": 10687 + }, + { + "epoch": 0.5006792523539607, + "grad_norm": 0.5924282252750391, + "learning_rate": 4.390000414858639e-06, + "loss": 0.2912, + "step": 10688 + }, + { + "epoch": 0.5007260973438891, + "grad_norm": 0.6424315638089945, + "learning_rate": 4.389876269852219e-06, + "loss": 0.3101, + "step": 10689 + }, + { + "epoch": 0.5007729423338174, + "grad_norm": 0.5632363832366378, + "learning_rate": 4.389752113970034e-06, + "loss": 0.2808, + "step": 10690 + }, + { + "epoch": 0.5008197873237458, + "grad_norm": 0.5910022323097303, + "learning_rate": 4.389627947212801e-06, + "loss": 0.2938, + "step": 10691 + }, + { + "epoch": 0.500866632313674, + "grad_norm": 0.5893446345819374, + "learning_rate": 4.3895037695812345e-06, + "loss": 0.2766, + "step": 10692 + }, + { + "epoch": 0.5009134773036024, + "grad_norm": 0.6192241269776861, + "learning_rate": 4.389379581076049e-06, + "loss": 0.2726, + "step": 10693 + }, + { + "epoch": 0.5009603222935307, + "grad_norm": 0.5591707124880501, + "learning_rate": 4.389255381697959e-06, + "loss": 0.2918, + "step": 10694 + }, + { + "epoch": 0.5010071672834591, + "grad_norm": 0.6275806767657902, + "learning_rate": 4.389131171447678e-06, + "loss": 0.2848, + "step": 10695 + }, + { + "epoch": 0.5010540122733873, + "grad_norm": 0.5752329121441041, + "learning_rate": 4.389006950325923e-06, + "loss": 0.2745, + "step": 10696 + }, + { + "epoch": 0.5011008572633157, + "grad_norm": 0.6027824207715465, + "learning_rate": 4.388882718333407e-06, + "loss": 0.2592, + "step": 10697 + }, + { + "epoch": 0.501147702253244, + "grad_norm": 0.5982995050524794, + "learning_rate": 4.388758475470847e-06, + "loss": 0.3032, + "step": 10698 + }, + { + "epoch": 0.5011945472431724, + "grad_norm": 0.652687678130228, + "learning_rate": 4.388634221738955e-06, + "loss": 0.3024, + "step": 10699 + }, + { + "epoch": 0.5012413922331007, + "grad_norm": 0.6466341865903922, + "learning_rate": 4.388509957138448e-06, + "loss": 0.295, + "step": 10700 + }, + { + "epoch": 0.501288237223029, + "grad_norm": 0.6425360728819707, + "learning_rate": 4.388385681670042e-06, + "loss": 0.2825, + "step": 10701 + }, + { + "epoch": 0.5013350822129573, + "grad_norm": 0.6073445285379795, + "learning_rate": 4.388261395334451e-06, + "loss": 0.2753, + "step": 10702 + }, + { + "epoch": 0.5013819272028857, + "grad_norm": 0.5860953244662801, + "learning_rate": 4.388137098132389e-06, + "loss": 0.276, + "step": 10703 + }, + { + "epoch": 0.501428772192814, + "grad_norm": 0.6125994102404749, + "learning_rate": 4.388012790064574e-06, + "loss": 0.2829, + "step": 10704 + }, + { + "epoch": 0.5014756171827423, + "grad_norm": 0.6266485424046636, + "learning_rate": 4.387888471131719e-06, + "loss": 0.2888, + "step": 10705 + }, + { + "epoch": 0.5015224621726706, + "grad_norm": 0.5522980469682852, + "learning_rate": 4.387764141334541e-06, + "loss": 0.2711, + "step": 10706 + }, + { + "epoch": 0.501569307162599, + "grad_norm": 0.5948388371363859, + "learning_rate": 4.387639800673753e-06, + "loss": 0.3032, + "step": 10707 + }, + { + "epoch": 0.5016161521525273, + "grad_norm": 0.6230355643848601, + "learning_rate": 4.387515449150075e-06, + "loss": 0.298, + "step": 10708 + }, + { + "epoch": 0.5016629971424557, + "grad_norm": 0.5942107468611812, + "learning_rate": 4.3873910867642175e-06, + "loss": 0.2963, + "step": 10709 + }, + { + "epoch": 0.5017098421323839, + "grad_norm": 0.6217662171599312, + "learning_rate": 4.3872667135169e-06, + "loss": 0.2804, + "step": 10710 + }, + { + "epoch": 0.5017566871223123, + "grad_norm": 0.5991270252751395, + "learning_rate": 4.387142329408838e-06, + "loss": 0.2878, + "step": 10711 + }, + { + "epoch": 0.5018035321122406, + "grad_norm": 0.6328229686337538, + "learning_rate": 4.387017934440745e-06, + "loss": 0.2804, + "step": 10712 + }, + { + "epoch": 0.501850377102169, + "grad_norm": 0.6113556654534655, + "learning_rate": 4.386893528613337e-06, + "loss": 0.2903, + "step": 10713 + }, + { + "epoch": 0.5018972220920972, + "grad_norm": 0.6007536795777548, + "learning_rate": 4.386769111927333e-06, + "loss": 0.2997, + "step": 10714 + }, + { + "epoch": 0.5019440670820255, + "grad_norm": 0.5426511474893553, + "learning_rate": 4.386644684383446e-06, + "loss": 0.2768, + "step": 10715 + }, + { + "epoch": 0.5019909120719539, + "grad_norm": 0.5581014728394624, + "learning_rate": 4.386520245982393e-06, + "loss": 0.2736, + "step": 10716 + }, + { + "epoch": 0.5020377570618823, + "grad_norm": 0.5782036538969856, + "learning_rate": 4.38639579672489e-06, + "loss": 0.2985, + "step": 10717 + }, + { + "epoch": 0.5020846020518106, + "grad_norm": 0.6056851388483283, + "learning_rate": 4.3862713366116535e-06, + "loss": 0.2827, + "step": 10718 + }, + { + "epoch": 0.5021314470417388, + "grad_norm": 0.6064927975136039, + "learning_rate": 4.386146865643401e-06, + "loss": 0.2946, + "step": 10719 + }, + { + "epoch": 0.5021782920316672, + "grad_norm": 0.6050960726587021, + "learning_rate": 4.386022383820845e-06, + "loss": 0.295, + "step": 10720 + }, + { + "epoch": 0.5022251370215955, + "grad_norm": 0.6232336541580361, + "learning_rate": 4.3858978911447044e-06, + "loss": 0.3181, + "step": 10721 + }, + { + "epoch": 0.5022719820115239, + "grad_norm": 0.5833706760710589, + "learning_rate": 4.385773387615697e-06, + "loss": 0.2744, + "step": 10722 + }, + { + "epoch": 0.5023188270014521, + "grad_norm": 0.6546346947351562, + "learning_rate": 4.385648873234537e-06, + "loss": 0.3068, + "step": 10723 + }, + { + "epoch": 0.5023656719913805, + "grad_norm": 0.6547526895471433, + "learning_rate": 4.385524348001942e-06, + "loss": 0.2883, + "step": 10724 + }, + { + "epoch": 0.5024125169813088, + "grad_norm": 0.5902960814083054, + "learning_rate": 4.385399811918627e-06, + "loss": 0.2797, + "step": 10725 + }, + { + "epoch": 0.5024593619712372, + "grad_norm": 0.542415482045684, + "learning_rate": 4.385275264985311e-06, + "loss": 0.2746, + "step": 10726 + }, + { + "epoch": 0.5025062069611655, + "grad_norm": 0.6077514984881778, + "learning_rate": 4.38515070720271e-06, + "loss": 0.2973, + "step": 10727 + }, + { + "epoch": 0.5025530519510938, + "grad_norm": 0.5490855154868265, + "learning_rate": 4.385026138571541e-06, + "loss": 0.2736, + "step": 10728 + }, + { + "epoch": 0.5025998969410221, + "grad_norm": 0.5841157062787226, + "learning_rate": 4.3849015590925184e-06, + "loss": 0.2957, + "step": 10729 + }, + { + "epoch": 0.5026467419309505, + "grad_norm": 0.6130253238727682, + "learning_rate": 4.384776968766362e-06, + "loss": 0.271, + "step": 10730 + }, + { + "epoch": 0.5026935869208788, + "grad_norm": 0.5786167247507337, + "learning_rate": 4.384652367593788e-06, + "loss": 0.2776, + "step": 10731 + }, + { + "epoch": 0.5027404319108071, + "grad_norm": 0.5906072416809108, + "learning_rate": 4.384527755575514e-06, + "loss": 0.2964, + "step": 10732 + }, + { + "epoch": 0.5027872769007354, + "grad_norm": 0.5811584973750189, + "learning_rate": 4.3844031327122554e-06, + "loss": 0.3003, + "step": 10733 + }, + { + "epoch": 0.5028341218906638, + "grad_norm": 0.5710111594301884, + "learning_rate": 4.38427849900473e-06, + "loss": 0.2789, + "step": 10734 + }, + { + "epoch": 0.5028809668805921, + "grad_norm": 0.5577708107946294, + "learning_rate": 4.3841538544536564e-06, + "loss": 0.2838, + "step": 10735 + }, + { + "epoch": 0.5029278118705205, + "grad_norm": 0.6295421305204274, + "learning_rate": 4.384029199059752e-06, + "loss": 0.2993, + "step": 10736 + }, + { + "epoch": 0.5029746568604487, + "grad_norm": 0.5861741846670446, + "learning_rate": 4.383904532823731e-06, + "loss": 0.2921, + "step": 10737 + }, + { + "epoch": 0.5030215018503771, + "grad_norm": 0.6226167511894756, + "learning_rate": 4.383779855746314e-06, + "loss": 0.2934, + "step": 10738 + }, + { + "epoch": 0.5030683468403054, + "grad_norm": 0.5623152537378444, + "learning_rate": 4.383655167828217e-06, + "loss": 0.294, + "step": 10739 + }, + { + "epoch": 0.5031151918302338, + "grad_norm": 0.5939850315556633, + "learning_rate": 4.383530469070158e-06, + "loss": 0.3046, + "step": 10740 + }, + { + "epoch": 0.503162036820162, + "grad_norm": 0.5691092909944567, + "learning_rate": 4.383405759472855e-06, + "loss": 0.2763, + "step": 10741 + }, + { + "epoch": 0.5032088818100904, + "grad_norm": 0.571675695244153, + "learning_rate": 4.383281039037024e-06, + "loss": 0.2902, + "step": 10742 + }, + { + "epoch": 0.5032557268000187, + "grad_norm": 0.6321450014700746, + "learning_rate": 4.383156307763386e-06, + "loss": 0.2839, + "step": 10743 + }, + { + "epoch": 0.5033025717899471, + "grad_norm": 0.5913140554725556, + "learning_rate": 4.383031565652654e-06, + "loss": 0.2998, + "step": 10744 + }, + { + "epoch": 0.5033494167798754, + "grad_norm": 0.6170714433010881, + "learning_rate": 4.382906812705551e-06, + "loss": 0.2829, + "step": 10745 + }, + { + "epoch": 0.5033962617698037, + "grad_norm": 0.6582408964477904, + "learning_rate": 4.3827820489227915e-06, + "loss": 0.2853, + "step": 10746 + }, + { + "epoch": 0.503443106759732, + "grad_norm": 0.7064659385321725, + "learning_rate": 4.382657274305095e-06, + "loss": 0.311, + "step": 10747 + }, + { + "epoch": 0.5034899517496604, + "grad_norm": 0.6032337250591607, + "learning_rate": 4.382532488853178e-06, + "loss": 0.2915, + "step": 10748 + }, + { + "epoch": 0.5035367967395887, + "grad_norm": 0.6379385426171162, + "learning_rate": 4.382407692567761e-06, + "loss": 0.2905, + "step": 10749 + }, + { + "epoch": 0.503583641729517, + "grad_norm": 0.5956269299808049, + "learning_rate": 4.38228288544956e-06, + "loss": 0.2868, + "step": 10750 + }, + { + "epoch": 0.5036304867194453, + "grad_norm": 0.6759354997081367, + "learning_rate": 4.382158067499294e-06, + "loss": 0.3039, + "step": 10751 + }, + { + "epoch": 0.5036773317093737, + "grad_norm": 0.6259095468211945, + "learning_rate": 4.382033238717683e-06, + "loss": 0.2921, + "step": 10752 + }, + { + "epoch": 0.503724176699302, + "grad_norm": 0.6223371567958846, + "learning_rate": 4.381908399105442e-06, + "loss": 0.31, + "step": 10753 + }, + { + "epoch": 0.5037710216892304, + "grad_norm": 0.596317414114125, + "learning_rate": 4.381783548663292e-06, + "loss": 0.3145, + "step": 10754 + }, + { + "epoch": 0.5038178666791586, + "grad_norm": 0.5855188800398702, + "learning_rate": 4.381658687391951e-06, + "loss": 0.297, + "step": 10755 + }, + { + "epoch": 0.503864711669087, + "grad_norm": 0.6574566877089931, + "learning_rate": 4.3815338152921364e-06, + "loss": 0.3138, + "step": 10756 + }, + { + "epoch": 0.5039115566590153, + "grad_norm": 0.5683141357441224, + "learning_rate": 4.381408932364568e-06, + "loss": 0.2753, + "step": 10757 + }, + { + "epoch": 0.5039584016489437, + "grad_norm": 0.6093800780396905, + "learning_rate": 4.3812840386099635e-06, + "loss": 0.2868, + "step": 10758 + }, + { + "epoch": 0.5040052466388719, + "grad_norm": 0.5925140823603094, + "learning_rate": 4.381159134029043e-06, + "loss": 0.3212, + "step": 10759 + }, + { + "epoch": 0.5040520916288003, + "grad_norm": 0.5683614013860613, + "learning_rate": 4.381034218622524e-06, + "loss": 0.2886, + "step": 10760 + }, + { + "epoch": 0.5040989366187286, + "grad_norm": 0.5900855353932115, + "learning_rate": 4.380909292391126e-06, + "loss": 0.2876, + "step": 10761 + }, + { + "epoch": 0.504145781608657, + "grad_norm": 0.6080226326665873, + "learning_rate": 4.380784355335567e-06, + "loss": 0.2778, + "step": 10762 + }, + { + "epoch": 0.5041926265985853, + "grad_norm": 0.602075472298293, + "learning_rate": 4.380659407456568e-06, + "loss": 0.2846, + "step": 10763 + }, + { + "epoch": 0.5042394715885136, + "grad_norm": 0.6430619485504037, + "learning_rate": 4.3805344487548455e-06, + "loss": 0.2861, + "step": 10764 + }, + { + "epoch": 0.5042863165784419, + "grad_norm": 0.6082295440144866, + "learning_rate": 4.380409479231121e-06, + "loss": 0.2988, + "step": 10765 + }, + { + "epoch": 0.5043331615683703, + "grad_norm": 0.5954983858541951, + "learning_rate": 4.380284498886112e-06, + "loss": 0.2767, + "step": 10766 + }, + { + "epoch": 0.5043800065582986, + "grad_norm": 0.6104111438206544, + "learning_rate": 4.3801595077205385e-06, + "loss": 0.2773, + "step": 10767 + }, + { + "epoch": 0.5044268515482269, + "grad_norm": 0.558385174468706, + "learning_rate": 4.380034505735119e-06, + "loss": 0.2749, + "step": 10768 + }, + { + "epoch": 0.5044736965381552, + "grad_norm": 0.6314710867618026, + "learning_rate": 4.3799094929305744e-06, + "loss": 0.3066, + "step": 10769 + }, + { + "epoch": 0.5045205415280836, + "grad_norm": 0.7004291486966566, + "learning_rate": 4.379784469307623e-06, + "loss": 0.3028, + "step": 10770 + }, + { + "epoch": 0.5045673865180119, + "grad_norm": 0.6626207976638818, + "learning_rate": 4.379659434866984e-06, + "loss": 0.2631, + "step": 10771 + }, + { + "epoch": 0.5046142315079403, + "grad_norm": 0.681939350402645, + "learning_rate": 4.379534389609378e-06, + "loss": 0.3022, + "step": 10772 + }, + { + "epoch": 0.5046610764978685, + "grad_norm": 0.6036842179129124, + "learning_rate": 4.379409333535524e-06, + "loss": 0.2784, + "step": 10773 + }, + { + "epoch": 0.5047079214877969, + "grad_norm": 0.6224488474940424, + "learning_rate": 4.379284266646141e-06, + "loss": 0.3145, + "step": 10774 + }, + { + "epoch": 0.5047547664777252, + "grad_norm": 0.6074366040085506, + "learning_rate": 4.379159188941949e-06, + "loss": 0.2808, + "step": 10775 + }, + { + "epoch": 0.5048016114676536, + "grad_norm": 0.559165820274422, + "learning_rate": 4.379034100423669e-06, + "loss": 0.2675, + "step": 10776 + }, + { + "epoch": 0.5048484564575818, + "grad_norm": 0.6300976351747426, + "learning_rate": 4.3789090010920204e-06, + "loss": 0.2969, + "step": 10777 + }, + { + "epoch": 0.5048953014475102, + "grad_norm": 0.5565052184367485, + "learning_rate": 4.378783890947722e-06, + "loss": 0.2662, + "step": 10778 + }, + { + "epoch": 0.5049421464374385, + "grad_norm": 0.5764431365793331, + "learning_rate": 4.378658769991495e-06, + "loss": 0.2783, + "step": 10779 + }, + { + "epoch": 0.5049889914273669, + "grad_norm": 0.6266879991397843, + "learning_rate": 4.378533638224059e-06, + "loss": 0.2846, + "step": 10780 + }, + { + "epoch": 0.5050358364172952, + "grad_norm": 0.5708971954483889, + "learning_rate": 4.378408495646134e-06, + "loss": 0.2613, + "step": 10781 + }, + { + "epoch": 0.5050826814072235, + "grad_norm": 0.5968032707178422, + "learning_rate": 4.378283342258439e-06, + "loss": 0.2802, + "step": 10782 + }, + { + "epoch": 0.5051295263971518, + "grad_norm": 0.5957425421798036, + "learning_rate": 4.378158178061697e-06, + "loss": 0.2854, + "step": 10783 + }, + { + "epoch": 0.5051763713870802, + "grad_norm": 0.5905588604152398, + "learning_rate": 4.378033003056626e-06, + "loss": 0.2985, + "step": 10784 + }, + { + "epoch": 0.5052232163770085, + "grad_norm": 0.6303952628532212, + "learning_rate": 4.377907817243947e-06, + "loss": 0.3047, + "step": 10785 + }, + { + "epoch": 0.5052700613669368, + "grad_norm": 0.6092206350330243, + "learning_rate": 4.377782620624381e-06, + "loss": 0.283, + "step": 10786 + }, + { + "epoch": 0.5053169063568651, + "grad_norm": 0.6228764328017171, + "learning_rate": 4.377657413198648e-06, + "loss": 0.2881, + "step": 10787 + }, + { + "epoch": 0.5053637513467935, + "grad_norm": 0.6602411764454648, + "learning_rate": 4.377532194967468e-06, + "loss": 0.2948, + "step": 10788 + }, + { + "epoch": 0.5054105963367218, + "grad_norm": 0.5736029150698888, + "learning_rate": 4.377406965931563e-06, + "loss": 0.3011, + "step": 10789 + }, + { + "epoch": 0.5054574413266502, + "grad_norm": 0.6028684170646093, + "learning_rate": 4.377281726091652e-06, + "loss": 0.2738, + "step": 10790 + }, + { + "epoch": 0.5055042863165784, + "grad_norm": 0.6595238542205525, + "learning_rate": 4.3771564754484565e-06, + "loss": 0.3057, + "step": 10791 + }, + { + "epoch": 0.5055511313065068, + "grad_norm": 0.6348554374470501, + "learning_rate": 4.3770312140026975e-06, + "loss": 0.3012, + "step": 10792 + }, + { + "epoch": 0.5055979762964351, + "grad_norm": 0.6412558648349361, + "learning_rate": 4.376905941755095e-06, + "loss": 0.3051, + "step": 10793 + }, + { + "epoch": 0.5056448212863635, + "grad_norm": 0.5863940590128883, + "learning_rate": 4.376780658706371e-06, + "loss": 0.2749, + "step": 10794 + }, + { + "epoch": 0.5056916662762917, + "grad_norm": 0.622610561027951, + "learning_rate": 4.376655364857247e-06, + "loss": 0.3069, + "step": 10795 + }, + { + "epoch": 0.50573851126622, + "grad_norm": 0.6224153305154216, + "learning_rate": 4.376530060208442e-06, + "loss": 0.2878, + "step": 10796 + }, + { + "epoch": 0.5057853562561484, + "grad_norm": 0.6211958214419402, + "learning_rate": 4.376404744760678e-06, + "loss": 0.2964, + "step": 10797 + }, + { + "epoch": 0.5058322012460768, + "grad_norm": 0.6212948389521105, + "learning_rate": 4.376279418514677e-06, + "loss": 0.296, + "step": 10798 + }, + { + "epoch": 0.5058790462360051, + "grad_norm": 0.6124903603111945, + "learning_rate": 4.37615408147116e-06, + "loss": 0.3139, + "step": 10799 + }, + { + "epoch": 0.5059258912259333, + "grad_norm": 0.6100274803433494, + "learning_rate": 4.3760287336308465e-06, + "loss": 0.2792, + "step": 10800 + }, + { + "epoch": 0.5059727362158617, + "grad_norm": 0.566297581985701, + "learning_rate": 4.37590337499446e-06, + "loss": 0.2847, + "step": 10801 + }, + { + "epoch": 0.50601958120579, + "grad_norm": 0.5686749636014263, + "learning_rate": 4.375778005562721e-06, + "loss": 0.282, + "step": 10802 + }, + { + "epoch": 0.5060664261957184, + "grad_norm": 0.5867317298745042, + "learning_rate": 4.375652625336351e-06, + "loss": 0.2766, + "step": 10803 + }, + { + "epoch": 0.5061132711856466, + "grad_norm": 0.6023503801817168, + "learning_rate": 4.3755272343160705e-06, + "loss": 0.3018, + "step": 10804 + }, + { + "epoch": 0.506160116175575, + "grad_norm": 0.635360030310758, + "learning_rate": 4.3754018325026035e-06, + "loss": 0.3247, + "step": 10805 + }, + { + "epoch": 0.5062069611655033, + "grad_norm": 0.5951406186797622, + "learning_rate": 4.3752764198966695e-06, + "loss": 0.3019, + "step": 10806 + }, + { + "epoch": 0.5062538061554317, + "grad_norm": 0.5992517287798798, + "learning_rate": 4.375150996498991e-06, + "loss": 0.2812, + "step": 10807 + }, + { + "epoch": 0.50630065114536, + "grad_norm": 0.5895334691959453, + "learning_rate": 4.375025562310291e-06, + "loss": 0.284, + "step": 10808 + }, + { + "epoch": 0.5063474961352883, + "grad_norm": 0.5911917237134738, + "learning_rate": 4.374900117331289e-06, + "loss": 0.2835, + "step": 10809 + }, + { + "epoch": 0.5063943411252166, + "grad_norm": 0.5846922407657591, + "learning_rate": 4.374774661562709e-06, + "loss": 0.291, + "step": 10810 + }, + { + "epoch": 0.506441186115145, + "grad_norm": 0.6222315261226526, + "learning_rate": 4.374649195005271e-06, + "loss": 0.3062, + "step": 10811 + }, + { + "epoch": 0.5064880311050733, + "grad_norm": 0.573251021119514, + "learning_rate": 4.374523717659699e-06, + "loss": 0.2747, + "step": 10812 + }, + { + "epoch": 0.5065348760950016, + "grad_norm": 0.6008588358963926, + "learning_rate": 4.374398229526713e-06, + "loss": 0.2671, + "step": 10813 + }, + { + "epoch": 0.5065817210849299, + "grad_norm": 0.6140094432557286, + "learning_rate": 4.374272730607037e-06, + "loss": 0.2891, + "step": 10814 + }, + { + "epoch": 0.5066285660748583, + "grad_norm": 0.615306737497435, + "learning_rate": 4.374147220901392e-06, + "loss": 0.294, + "step": 10815 + }, + { + "epoch": 0.5066754110647866, + "grad_norm": 0.6359691220511875, + "learning_rate": 4.374021700410502e-06, + "loss": 0.2974, + "step": 10816 + }, + { + "epoch": 0.506722256054715, + "grad_norm": 0.6138676752174841, + "learning_rate": 4.373896169135087e-06, + "loss": 0.288, + "step": 10817 + }, + { + "epoch": 0.5067691010446432, + "grad_norm": 0.6212026196676708, + "learning_rate": 4.373770627075871e-06, + "loss": 0.2951, + "step": 10818 + }, + { + "epoch": 0.5068159460345716, + "grad_norm": 0.712878659623635, + "learning_rate": 4.373645074233576e-06, + "loss": 0.3088, + "step": 10819 + }, + { + "epoch": 0.5068627910244999, + "grad_norm": 0.5978224072840111, + "learning_rate": 4.373519510608925e-06, + "loss": 0.31, + "step": 10820 + }, + { + "epoch": 0.5069096360144283, + "grad_norm": 0.6045564783831148, + "learning_rate": 4.373393936202639e-06, + "loss": 0.2922, + "step": 10821 + }, + { + "epoch": 0.5069564810043565, + "grad_norm": 0.612429304756673, + "learning_rate": 4.3732683510154425e-06, + "loss": 0.2982, + "step": 10822 + }, + { + "epoch": 0.5070033259942849, + "grad_norm": 0.6831008970138881, + "learning_rate": 4.373142755048058e-06, + "loss": 0.3085, + "step": 10823 + }, + { + "epoch": 0.5070501709842132, + "grad_norm": 0.6427488074947759, + "learning_rate": 4.373017148301206e-06, + "loss": 0.2708, + "step": 10824 + }, + { + "epoch": 0.5070970159741416, + "grad_norm": 0.5899617719672324, + "learning_rate": 4.3728915307756125e-06, + "loss": 0.2954, + "step": 10825 + }, + { + "epoch": 0.5071438609640699, + "grad_norm": 0.6059109547533169, + "learning_rate": 4.372765902471999e-06, + "loss": 0.2963, + "step": 10826 + }, + { + "epoch": 0.5071907059539982, + "grad_norm": 0.6137615869534286, + "learning_rate": 4.372640263391088e-06, + "loss": 0.2943, + "step": 10827 + }, + { + "epoch": 0.5072375509439265, + "grad_norm": 0.6432094695833764, + "learning_rate": 4.372514613533602e-06, + "loss": 0.2965, + "step": 10828 + }, + { + "epoch": 0.5072843959338549, + "grad_norm": 0.6253783830062826, + "learning_rate": 4.372388952900267e-06, + "loss": 0.2878, + "step": 10829 + }, + { + "epoch": 0.5073312409237832, + "grad_norm": 0.5832170355625556, + "learning_rate": 4.372263281491803e-06, + "loss": 0.2946, + "step": 10830 + }, + { + "epoch": 0.5073780859137115, + "grad_norm": 0.5701875361091671, + "learning_rate": 4.372137599308935e-06, + "loss": 0.2802, + "step": 10831 + }, + { + "epoch": 0.5074249309036398, + "grad_norm": 0.6475472538733678, + "learning_rate": 4.372011906352385e-06, + "loss": 0.3008, + "step": 10832 + }, + { + "epoch": 0.5074717758935682, + "grad_norm": 0.5594302420085703, + "learning_rate": 4.371886202622877e-06, + "loss": 0.2971, + "step": 10833 + }, + { + "epoch": 0.5075186208834965, + "grad_norm": 0.5336082352507528, + "learning_rate": 4.371760488121134e-06, + "loss": 0.2701, + "step": 10834 + }, + { + "epoch": 0.5075654658734249, + "grad_norm": 0.6218814675314804, + "learning_rate": 4.371634762847881e-06, + "loss": 0.3018, + "step": 10835 + }, + { + "epoch": 0.5076123108633531, + "grad_norm": 0.5702592622284004, + "learning_rate": 4.37150902680384e-06, + "loss": 0.2776, + "step": 10836 + }, + { + "epoch": 0.5076591558532815, + "grad_norm": 0.6065937533989485, + "learning_rate": 4.3713832799897345e-06, + "loss": 0.2861, + "step": 10837 + }, + { + "epoch": 0.5077060008432098, + "grad_norm": 0.5943661026767255, + "learning_rate": 4.371257522406289e-06, + "loss": 0.3023, + "step": 10838 + }, + { + "epoch": 0.5077528458331382, + "grad_norm": 0.5532297398993009, + "learning_rate": 4.371131754054226e-06, + "loss": 0.2691, + "step": 10839 + }, + { + "epoch": 0.5077996908230664, + "grad_norm": 0.5551306809463611, + "learning_rate": 4.3710059749342714e-06, + "loss": 0.2692, + "step": 10840 + }, + { + "epoch": 0.5078465358129948, + "grad_norm": 0.693242448078701, + "learning_rate": 4.370880185047147e-06, + "loss": 0.2951, + "step": 10841 + }, + { + "epoch": 0.5078933808029231, + "grad_norm": 0.5980214471546337, + "learning_rate": 4.3707543843935775e-06, + "loss": 0.2905, + "step": 10842 + }, + { + "epoch": 0.5079402257928515, + "grad_norm": 0.647201761811614, + "learning_rate": 4.370628572974286e-06, + "loss": 0.3181, + "step": 10843 + }, + { + "epoch": 0.5079870707827798, + "grad_norm": 0.638192582862921, + "learning_rate": 4.370502750789998e-06, + "loss": 0.3019, + "step": 10844 + }, + { + "epoch": 0.5080339157727081, + "grad_norm": 0.6511212796531295, + "learning_rate": 4.370376917841437e-06, + "loss": 0.2835, + "step": 10845 + }, + { + "epoch": 0.5080807607626364, + "grad_norm": 0.5637422158445544, + "learning_rate": 4.3702510741293255e-06, + "loss": 0.2818, + "step": 10846 + }, + { + "epoch": 0.5081276057525648, + "grad_norm": 0.6274491028456378, + "learning_rate": 4.3701252196543905e-06, + "loss": 0.2976, + "step": 10847 + }, + { + "epoch": 0.5081744507424931, + "grad_norm": 0.568377868204566, + "learning_rate": 4.369999354417355e-06, + "loss": 0.2809, + "step": 10848 + }, + { + "epoch": 0.5082212957324214, + "grad_norm": 0.6057275269078596, + "learning_rate": 4.3698734784189425e-06, + "loss": 0.2792, + "step": 10849 + }, + { + "epoch": 0.5082681407223497, + "grad_norm": 0.600027496432478, + "learning_rate": 4.369747591659879e-06, + "loss": 0.2791, + "step": 10850 + }, + { + "epoch": 0.5083149857122781, + "grad_norm": 0.6636402359759861, + "learning_rate": 4.369621694140887e-06, + "loss": 0.297, + "step": 10851 + }, + { + "epoch": 0.5083618307022064, + "grad_norm": 0.5856928118254461, + "learning_rate": 4.369495785862692e-06, + "loss": 0.2782, + "step": 10852 + }, + { + "epoch": 0.5084086756921348, + "grad_norm": 0.615385313058964, + "learning_rate": 4.3693698668260195e-06, + "loss": 0.2933, + "step": 10853 + }, + { + "epoch": 0.508455520682063, + "grad_norm": 0.5977328259556339, + "learning_rate": 4.369243937031593e-06, + "loss": 0.2872, + "step": 10854 + }, + { + "epoch": 0.5085023656719914, + "grad_norm": 0.5809247096306257, + "learning_rate": 4.369117996480136e-06, + "loss": 0.2649, + "step": 10855 + }, + { + "epoch": 0.5085492106619197, + "grad_norm": 0.599862733641651, + "learning_rate": 4.368992045172377e-06, + "loss": 0.286, + "step": 10856 + }, + { + "epoch": 0.5085960556518481, + "grad_norm": 0.6091280701121821, + "learning_rate": 4.368866083109038e-06, + "loss": 0.2979, + "step": 10857 + }, + { + "epoch": 0.5086429006417763, + "grad_norm": 0.615539635981425, + "learning_rate": 4.368740110290843e-06, + "loss": 0.2827, + "step": 10858 + }, + { + "epoch": 0.5086897456317047, + "grad_norm": 0.5842691238222787, + "learning_rate": 4.36861412671852e-06, + "loss": 0.3018, + "step": 10859 + }, + { + "epoch": 0.508736590621633, + "grad_norm": 0.6288645846116823, + "learning_rate": 4.3684881323927915e-06, + "loss": 0.3018, + "step": 10860 + }, + { + "epoch": 0.5087834356115614, + "grad_norm": 0.5895016278758751, + "learning_rate": 4.368362127314384e-06, + "loss": 0.2802, + "step": 10861 + }, + { + "epoch": 0.5088302806014897, + "grad_norm": 0.7134576229313263, + "learning_rate": 4.368236111484022e-06, + "loss": 0.2948, + "step": 10862 + }, + { + "epoch": 0.508877125591418, + "grad_norm": 0.6292054785327147, + "learning_rate": 4.368110084902429e-06, + "loss": 0.2869, + "step": 10863 + }, + { + "epoch": 0.5089239705813463, + "grad_norm": 0.6421274992070274, + "learning_rate": 4.367984047570334e-06, + "loss": 0.2748, + "step": 10864 + }, + { + "epoch": 0.5089708155712747, + "grad_norm": 0.594969857089847, + "learning_rate": 4.36785799948846e-06, + "loss": 0.286, + "step": 10865 + }, + { + "epoch": 0.509017660561203, + "grad_norm": 0.6059485011420628, + "learning_rate": 4.367731940657532e-06, + "loss": 0.2867, + "step": 10866 + }, + { + "epoch": 0.5090645055511313, + "grad_norm": 0.609500828366851, + "learning_rate": 4.367605871078277e-06, + "loss": 0.2835, + "step": 10867 + }, + { + "epoch": 0.5091113505410596, + "grad_norm": 0.5916102003503755, + "learning_rate": 4.367479790751419e-06, + "loss": 0.2929, + "step": 10868 + }, + { + "epoch": 0.509158195530988, + "grad_norm": 0.5314815703651073, + "learning_rate": 4.3673536996776846e-06, + "loss": 0.2636, + "step": 10869 + }, + { + "epoch": 0.5092050405209163, + "grad_norm": 0.5744000123716474, + "learning_rate": 4.367227597857798e-06, + "loss": 0.2856, + "step": 10870 + }, + { + "epoch": 0.5092518855108447, + "grad_norm": 0.6232693087243759, + "learning_rate": 4.3671014852924875e-06, + "loss": 0.2649, + "step": 10871 + }, + { + "epoch": 0.5092987305007729, + "grad_norm": 0.626522208562942, + "learning_rate": 4.366975361982476e-06, + "loss": 0.2771, + "step": 10872 + }, + { + "epoch": 0.5093455754907013, + "grad_norm": 0.5435737816578535, + "learning_rate": 4.366849227928491e-06, + "loss": 0.281, + "step": 10873 + }, + { + "epoch": 0.5093924204806296, + "grad_norm": 0.6063190522805165, + "learning_rate": 4.366723083131258e-06, + "loss": 0.2797, + "step": 10874 + }, + { + "epoch": 0.509439265470558, + "grad_norm": 0.5647069034738688, + "learning_rate": 4.366596927591502e-06, + "loss": 0.2908, + "step": 10875 + }, + { + "epoch": 0.5094861104604862, + "grad_norm": 0.6717548405954846, + "learning_rate": 4.366470761309951e-06, + "loss": 0.2817, + "step": 10876 + }, + { + "epoch": 0.5095329554504145, + "grad_norm": 0.6517109377488439, + "learning_rate": 4.366344584287329e-06, + "loss": 0.2925, + "step": 10877 + }, + { + "epoch": 0.5095798004403429, + "grad_norm": 0.5887986268864666, + "learning_rate": 4.366218396524363e-06, + "loss": 0.2888, + "step": 10878 + }, + { + "epoch": 0.5096266454302713, + "grad_norm": 0.6415183857111919, + "learning_rate": 4.36609219802178e-06, + "loss": 0.2915, + "step": 10879 + }, + { + "epoch": 0.5096734904201996, + "grad_norm": 0.681079484497841, + "learning_rate": 4.3659659887803055e-06, + "loss": 0.3231, + "step": 10880 + }, + { + "epoch": 0.5097203354101278, + "grad_norm": 0.5860397570167261, + "learning_rate": 4.365839768800665e-06, + "loss": 0.2636, + "step": 10881 + }, + { + "epoch": 0.5097671804000562, + "grad_norm": 0.6007410957539492, + "learning_rate": 4.365713538083585e-06, + "loss": 0.2919, + "step": 10882 + }, + { + "epoch": 0.5098140253899845, + "grad_norm": 0.5593450816276071, + "learning_rate": 4.365587296629794e-06, + "loss": 0.294, + "step": 10883 + }, + { + "epoch": 0.5098608703799129, + "grad_norm": 0.6030268044279107, + "learning_rate": 4.3654610444400155e-06, + "loss": 0.2864, + "step": 10884 + }, + { + "epoch": 0.5099077153698411, + "grad_norm": 0.6074715821360485, + "learning_rate": 4.3653347815149785e-06, + "loss": 0.2886, + "step": 10885 + }, + { + "epoch": 0.5099545603597695, + "grad_norm": 0.5920414762659657, + "learning_rate": 4.365208507855408e-06, + "loss": 0.3112, + "step": 10886 + }, + { + "epoch": 0.5100014053496978, + "grad_norm": 0.5933199889346827, + "learning_rate": 4.365082223462033e-06, + "loss": 0.2783, + "step": 10887 + }, + { + "epoch": 0.5100482503396262, + "grad_norm": 0.5346553292501096, + "learning_rate": 4.364955928335577e-06, + "loss": 0.2491, + "step": 10888 + }, + { + "epoch": 0.5100950953295545, + "grad_norm": 0.6450928094190488, + "learning_rate": 4.364829622476769e-06, + "loss": 0.2932, + "step": 10889 + }, + { + "epoch": 0.5101419403194828, + "grad_norm": 0.5716103847260949, + "learning_rate": 4.364703305886336e-06, + "loss": 0.2945, + "step": 10890 + }, + { + "epoch": 0.5101887853094111, + "grad_norm": 0.6573438598684109, + "learning_rate": 4.364576978565002e-06, + "loss": 0.2964, + "step": 10891 + }, + { + "epoch": 0.5102356302993395, + "grad_norm": 0.5427946817734562, + "learning_rate": 4.364450640513498e-06, + "loss": 0.2713, + "step": 10892 + }, + { + "epoch": 0.5102824752892678, + "grad_norm": 0.6123984081647678, + "learning_rate": 4.364324291732548e-06, + "loss": 0.2973, + "step": 10893 + }, + { + "epoch": 0.5103293202791961, + "grad_norm": 0.5892363753774525, + "learning_rate": 4.364197932222881e-06, + "loss": 0.2971, + "step": 10894 + }, + { + "epoch": 0.5103761652691244, + "grad_norm": 0.6635280565833397, + "learning_rate": 4.364071561985223e-06, + "loss": 0.2921, + "step": 10895 + }, + { + "epoch": 0.5104230102590528, + "grad_norm": 0.5947864294136348, + "learning_rate": 4.363945181020302e-06, + "loss": 0.3022, + "step": 10896 + }, + { + "epoch": 0.5104698552489811, + "grad_norm": 0.5417606339567944, + "learning_rate": 4.363818789328844e-06, + "loss": 0.2614, + "step": 10897 + }, + { + "epoch": 0.5105167002389095, + "grad_norm": 0.5743963455427283, + "learning_rate": 4.363692386911579e-06, + "loss": 0.2665, + "step": 10898 + }, + { + "epoch": 0.5105635452288377, + "grad_norm": 0.6251943957815442, + "learning_rate": 4.3635659737692314e-06, + "loss": 0.2891, + "step": 10899 + }, + { + "epoch": 0.5106103902187661, + "grad_norm": 0.6056779953047704, + "learning_rate": 4.36343954990253e-06, + "loss": 0.294, + "step": 10900 + }, + { + "epoch": 0.5106572352086944, + "grad_norm": 0.5850690652738985, + "learning_rate": 4.363313115312204e-06, + "loss": 0.299, + "step": 10901 + }, + { + "epoch": 0.5107040801986228, + "grad_norm": 0.5729962809492793, + "learning_rate": 4.363186669998978e-06, + "loss": 0.2947, + "step": 10902 + }, + { + "epoch": 0.510750925188551, + "grad_norm": 0.6620572342768816, + "learning_rate": 4.3630602139635815e-06, + "loss": 0.292, + "step": 10903 + }, + { + "epoch": 0.5107977701784794, + "grad_norm": 0.5928407872333019, + "learning_rate": 4.362933747206741e-06, + "loss": 0.3005, + "step": 10904 + }, + { + "epoch": 0.5108446151684077, + "grad_norm": 0.6239937397357609, + "learning_rate": 4.362807269729186e-06, + "loss": 0.2976, + "step": 10905 + }, + { + "epoch": 0.5108914601583361, + "grad_norm": 0.6247381757852981, + "learning_rate": 4.362680781531643e-06, + "loss": 0.2802, + "step": 10906 + }, + { + "epoch": 0.5109383051482644, + "grad_norm": 0.5782163858851751, + "learning_rate": 4.3625542826148395e-06, + "loss": 0.2824, + "step": 10907 + }, + { + "epoch": 0.5109851501381927, + "grad_norm": 0.6367281085933494, + "learning_rate": 4.362427772979505e-06, + "loss": 0.3124, + "step": 10908 + }, + { + "epoch": 0.511031995128121, + "grad_norm": 0.5449279785112618, + "learning_rate": 4.362301252626366e-06, + "loss": 0.2859, + "step": 10909 + }, + { + "epoch": 0.5110788401180494, + "grad_norm": 0.5905921599462509, + "learning_rate": 4.362174721556153e-06, + "loss": 0.2851, + "step": 10910 + }, + { + "epoch": 0.5111256851079777, + "grad_norm": 0.6225192416516391, + "learning_rate": 4.3620481797695916e-06, + "loss": 0.2931, + "step": 10911 + }, + { + "epoch": 0.511172530097906, + "grad_norm": 0.5944863779583155, + "learning_rate": 4.36192162726741e-06, + "loss": 0.2905, + "step": 10912 + }, + { + "epoch": 0.5112193750878343, + "grad_norm": 0.6013244688800703, + "learning_rate": 4.3617950640503385e-06, + "loss": 0.2762, + "step": 10913 + }, + { + "epoch": 0.5112662200777627, + "grad_norm": 0.6064049168021676, + "learning_rate": 4.361668490119105e-06, + "loss": 0.2983, + "step": 10914 + }, + { + "epoch": 0.511313065067691, + "grad_norm": 0.58146417794149, + "learning_rate": 4.361541905474435e-06, + "loss": 0.2891, + "step": 10915 + }, + { + "epoch": 0.5113599100576194, + "grad_norm": 0.6670627203473183, + "learning_rate": 4.361415310117062e-06, + "loss": 0.2825, + "step": 10916 + }, + { + "epoch": 0.5114067550475476, + "grad_norm": 0.608111138786237, + "learning_rate": 4.361288704047709e-06, + "loss": 0.2711, + "step": 10917 + }, + { + "epoch": 0.511453600037476, + "grad_norm": 0.5629151119946032, + "learning_rate": 4.361162087267109e-06, + "loss": 0.2929, + "step": 10918 + }, + { + "epoch": 0.5115004450274043, + "grad_norm": 0.6066905689987779, + "learning_rate": 4.361035459775988e-06, + "loss": 0.2787, + "step": 10919 + }, + { + "epoch": 0.5115472900173327, + "grad_norm": 0.5845125401045368, + "learning_rate": 4.3609088215750775e-06, + "loss": 0.2929, + "step": 10920 + }, + { + "epoch": 0.5115941350072609, + "grad_norm": 0.6380456367435742, + "learning_rate": 4.360782172665103e-06, + "loss": 0.3034, + "step": 10921 + }, + { + "epoch": 0.5116409799971893, + "grad_norm": 0.6386282175280821, + "learning_rate": 4.360655513046795e-06, + "loss": 0.2952, + "step": 10922 + }, + { + "epoch": 0.5116878249871176, + "grad_norm": 0.552245415844012, + "learning_rate": 4.360528842720882e-06, + "loss": 0.2803, + "step": 10923 + }, + { + "epoch": 0.511734669977046, + "grad_norm": 0.6078303904093414, + "learning_rate": 4.3604021616880935e-06, + "loss": 0.2837, + "step": 10924 + }, + { + "epoch": 0.5117815149669743, + "grad_norm": 0.5890836216677383, + "learning_rate": 4.3602754699491574e-06, + "loss": 0.2981, + "step": 10925 + }, + { + "epoch": 0.5118283599569026, + "grad_norm": 0.5886095040970194, + "learning_rate": 4.360148767504805e-06, + "loss": 0.2661, + "step": 10926 + }, + { + "epoch": 0.5118752049468309, + "grad_norm": 0.607561020688491, + "learning_rate": 4.360022054355763e-06, + "loss": 0.2792, + "step": 10927 + }, + { + "epoch": 0.5119220499367593, + "grad_norm": 0.586151193166057, + "learning_rate": 4.359895330502762e-06, + "loss": 0.2892, + "step": 10928 + }, + { + "epoch": 0.5119688949266876, + "grad_norm": 0.6085742490300448, + "learning_rate": 4.35976859594653e-06, + "loss": 0.283, + "step": 10929 + }, + { + "epoch": 0.5120157399166159, + "grad_norm": 0.5918603868257238, + "learning_rate": 4.359641850687798e-06, + "loss": 0.287, + "step": 10930 + }, + { + "epoch": 0.5120625849065442, + "grad_norm": 0.6403681776496993, + "learning_rate": 4.359515094727294e-06, + "loss": 0.3092, + "step": 10931 + }, + { + "epoch": 0.5121094298964726, + "grad_norm": 0.6338117747909188, + "learning_rate": 4.3593883280657485e-06, + "loss": 0.303, + "step": 10932 + }, + { + "epoch": 0.5121562748864009, + "grad_norm": 0.5728585459039284, + "learning_rate": 4.35926155070389e-06, + "loss": 0.2857, + "step": 10933 + }, + { + "epoch": 0.5122031198763293, + "grad_norm": 0.5412835865592368, + "learning_rate": 4.359134762642448e-06, + "loss": 0.268, + "step": 10934 + }, + { + "epoch": 0.5122499648662575, + "grad_norm": 0.6250932275251829, + "learning_rate": 4.359007963882155e-06, + "loss": 0.2867, + "step": 10935 + }, + { + "epoch": 0.5122968098561859, + "grad_norm": 0.5833694587648298, + "learning_rate": 4.358881154423737e-06, + "loss": 0.2976, + "step": 10936 + }, + { + "epoch": 0.5123436548461142, + "grad_norm": 0.5842196864446895, + "learning_rate": 4.358754334267924e-06, + "loss": 0.2858, + "step": 10937 + }, + { + "epoch": 0.5123904998360426, + "grad_norm": 0.6363711520921457, + "learning_rate": 4.358627503415449e-06, + "loss": 0.2856, + "step": 10938 + }, + { + "epoch": 0.5124373448259708, + "grad_norm": 0.5882446258772631, + "learning_rate": 4.358500661867039e-06, + "loss": 0.2853, + "step": 10939 + }, + { + "epoch": 0.5124841898158992, + "grad_norm": 0.6106785391710393, + "learning_rate": 4.358373809623424e-06, + "loss": 0.2794, + "step": 10940 + }, + { + "epoch": 0.5125310348058275, + "grad_norm": 0.5783978192579202, + "learning_rate": 4.358246946685336e-06, + "loss": 0.2919, + "step": 10941 + }, + { + "epoch": 0.5125778797957559, + "grad_norm": 0.5995502753759056, + "learning_rate": 4.358120073053503e-06, + "loss": 0.3021, + "step": 10942 + }, + { + "epoch": 0.5126247247856842, + "grad_norm": 0.5465857582900612, + "learning_rate": 4.357993188728657e-06, + "loss": 0.2826, + "step": 10943 + }, + { + "epoch": 0.5126715697756125, + "grad_norm": 0.5809416061384174, + "learning_rate": 4.357866293711527e-06, + "loss": 0.2955, + "step": 10944 + }, + { + "epoch": 0.5127184147655408, + "grad_norm": 0.5926711275219101, + "learning_rate": 4.357739388002843e-06, + "loss": 0.2832, + "step": 10945 + }, + { + "epoch": 0.5127652597554692, + "grad_norm": 0.5849063469133279, + "learning_rate": 4.357612471603336e-06, + "loss": 0.3007, + "step": 10946 + }, + { + "epoch": 0.5128121047453975, + "grad_norm": 0.5768954416976567, + "learning_rate": 4.357485544513735e-06, + "loss": 0.288, + "step": 10947 + }, + { + "epoch": 0.5128589497353258, + "grad_norm": 0.5576920960543003, + "learning_rate": 4.357358606734773e-06, + "loss": 0.2761, + "step": 10948 + }, + { + "epoch": 0.5129057947252541, + "grad_norm": 0.6114051958704301, + "learning_rate": 4.357231658267179e-06, + "loss": 0.2858, + "step": 10949 + }, + { + "epoch": 0.5129526397151825, + "grad_norm": 0.6097657053638755, + "learning_rate": 4.3571046991116825e-06, + "loss": 0.2834, + "step": 10950 + }, + { + "epoch": 0.5129994847051108, + "grad_norm": 0.6373980005676643, + "learning_rate": 4.356977729269016e-06, + "loss": 0.2905, + "step": 10951 + }, + { + "epoch": 0.5130463296950392, + "grad_norm": 0.5922924229853713, + "learning_rate": 4.35685074873991e-06, + "loss": 0.2947, + "step": 10952 + }, + { + "epoch": 0.5130931746849674, + "grad_norm": 0.5788626105694363, + "learning_rate": 4.356723757525093e-06, + "loss": 0.2853, + "step": 10953 + }, + { + "epoch": 0.5131400196748958, + "grad_norm": 0.5934831826921472, + "learning_rate": 4.3565967556252985e-06, + "loss": 0.2702, + "step": 10954 + }, + { + "epoch": 0.5131868646648241, + "grad_norm": 0.6301133856951998, + "learning_rate": 4.356469743041257e-06, + "loss": 0.3111, + "step": 10955 + }, + { + "epoch": 0.5132337096547525, + "grad_norm": 0.5744319387453717, + "learning_rate": 4.3563427197736974e-06, + "loss": 0.2815, + "step": 10956 + }, + { + "epoch": 0.5132805546446807, + "grad_norm": 0.6484008107529765, + "learning_rate": 4.356215685823353e-06, + "loss": 0.2988, + "step": 10957 + }, + { + "epoch": 0.513327399634609, + "grad_norm": 0.6143220193645244, + "learning_rate": 4.356088641190953e-06, + "loss": 0.2994, + "step": 10958 + }, + { + "epoch": 0.5133742446245374, + "grad_norm": 0.6086160160505258, + "learning_rate": 4.35596158587723e-06, + "loss": 0.2867, + "step": 10959 + }, + { + "epoch": 0.5134210896144658, + "grad_norm": 0.5592530211039295, + "learning_rate": 4.355834519882914e-06, + "loss": 0.2691, + "step": 10960 + }, + { + "epoch": 0.5134679346043941, + "grad_norm": 0.6307151175068796, + "learning_rate": 4.355707443208737e-06, + "loss": 0.2929, + "step": 10961 + }, + { + "epoch": 0.5135147795943223, + "grad_norm": 0.5843080384297433, + "learning_rate": 4.355580355855431e-06, + "loss": 0.2861, + "step": 10962 + }, + { + "epoch": 0.5135616245842507, + "grad_norm": 0.5612821389786762, + "learning_rate": 4.355453257823725e-06, + "loss": 0.2817, + "step": 10963 + }, + { + "epoch": 0.513608469574179, + "grad_norm": 0.6656790969377437, + "learning_rate": 4.355326149114353e-06, + "loss": 0.3049, + "step": 10964 + }, + { + "epoch": 0.5136553145641074, + "grad_norm": 0.5990240379586759, + "learning_rate": 4.355199029728044e-06, + "loss": 0.2915, + "step": 10965 + }, + { + "epoch": 0.5137021595540356, + "grad_norm": 0.6042689863427488, + "learning_rate": 4.355071899665533e-06, + "loss": 0.283, + "step": 10966 + }, + { + "epoch": 0.513749004543964, + "grad_norm": 0.5851652515020958, + "learning_rate": 4.354944758927548e-06, + "loss": 0.2931, + "step": 10967 + }, + { + "epoch": 0.5137958495338923, + "grad_norm": 0.6254266332206565, + "learning_rate": 4.354817607514822e-06, + "loss": 0.2974, + "step": 10968 + }, + { + "epoch": 0.5138426945238207, + "grad_norm": 0.6182095899919435, + "learning_rate": 4.354690445428087e-06, + "loss": 0.2791, + "step": 10969 + }, + { + "epoch": 0.513889539513749, + "grad_norm": 0.5870859676614121, + "learning_rate": 4.354563272668076e-06, + "loss": 0.2718, + "step": 10970 + }, + { + "epoch": 0.5139363845036773, + "grad_norm": 0.591257064721887, + "learning_rate": 4.354436089235518e-06, + "loss": 0.2945, + "step": 10971 + }, + { + "epoch": 0.5139832294936056, + "grad_norm": 0.6547385483308447, + "learning_rate": 4.354308895131147e-06, + "loss": 0.2883, + "step": 10972 + }, + { + "epoch": 0.514030074483534, + "grad_norm": 0.5739288045154834, + "learning_rate": 4.354181690355693e-06, + "loss": 0.2758, + "step": 10973 + }, + { + "epoch": 0.5140769194734623, + "grad_norm": 0.6198010601650219, + "learning_rate": 4.354054474909891e-06, + "loss": 0.2712, + "step": 10974 + }, + { + "epoch": 0.5141237644633906, + "grad_norm": 0.6068636670167341, + "learning_rate": 4.353927248794471e-06, + "loss": 0.2909, + "step": 10975 + }, + { + "epoch": 0.5141706094533189, + "grad_norm": 0.5534999496060814, + "learning_rate": 4.3538000120101654e-06, + "loss": 0.2733, + "step": 10976 + }, + { + "epoch": 0.5142174544432473, + "grad_norm": 0.5772088371375789, + "learning_rate": 4.353672764557707e-06, + "loss": 0.2846, + "step": 10977 + }, + { + "epoch": 0.5142642994331756, + "grad_norm": 0.5370010584142416, + "learning_rate": 4.3535455064378275e-06, + "loss": 0.2577, + "step": 10978 + }, + { + "epoch": 0.514311144423104, + "grad_norm": 0.6240570626257836, + "learning_rate": 4.35341823765126e-06, + "loss": 0.2959, + "step": 10979 + }, + { + "epoch": 0.5143579894130322, + "grad_norm": 0.5867470984947015, + "learning_rate": 4.353290958198736e-06, + "loss": 0.2877, + "step": 10980 + }, + { + "epoch": 0.5144048344029606, + "grad_norm": 0.5695363187230797, + "learning_rate": 4.353163668080988e-06, + "loss": 0.2623, + "step": 10981 + }, + { + "epoch": 0.5144516793928889, + "grad_norm": 0.6454276058577122, + "learning_rate": 4.353036367298749e-06, + "loss": 0.3003, + "step": 10982 + }, + { + "epoch": 0.5144985243828173, + "grad_norm": 0.6767868449265192, + "learning_rate": 4.352909055852752e-06, + "loss": 0.2947, + "step": 10983 + }, + { + "epoch": 0.5145453693727455, + "grad_norm": 0.5416581821138132, + "learning_rate": 4.3527817337437286e-06, + "loss": 0.2789, + "step": 10984 + }, + { + "epoch": 0.5145922143626739, + "grad_norm": 0.5862165205154818, + "learning_rate": 4.352654400972413e-06, + "loss": 0.2838, + "step": 10985 + }, + { + "epoch": 0.5146390593526022, + "grad_norm": 0.6398292809227069, + "learning_rate": 4.352527057539535e-06, + "loss": 0.2853, + "step": 10986 + }, + { + "epoch": 0.5146859043425306, + "grad_norm": 0.6201580011569658, + "learning_rate": 4.352399703445831e-06, + "loss": 0.2813, + "step": 10987 + }, + { + "epoch": 0.5147327493324589, + "grad_norm": 0.641302388292579, + "learning_rate": 4.352272338692032e-06, + "loss": 0.2958, + "step": 10988 + }, + { + "epoch": 0.5147795943223872, + "grad_norm": 0.6212965976903155, + "learning_rate": 4.35214496327887e-06, + "loss": 0.2865, + "step": 10989 + }, + { + "epoch": 0.5148264393123155, + "grad_norm": 0.6645670804844257, + "learning_rate": 4.3520175772070815e-06, + "loss": 0.2944, + "step": 10990 + }, + { + "epoch": 0.5148732843022439, + "grad_norm": 0.6407531838506201, + "learning_rate": 4.3518901804773956e-06, + "loss": 0.295, + "step": 10991 + }, + { + "epoch": 0.5149201292921722, + "grad_norm": 0.6699213854301312, + "learning_rate": 4.351762773090548e-06, + "loss": 0.3181, + "step": 10992 + }, + { + "epoch": 0.5149669742821005, + "grad_norm": 0.6117399599331638, + "learning_rate": 4.351635355047271e-06, + "loss": 0.2714, + "step": 10993 + }, + { + "epoch": 0.5150138192720288, + "grad_norm": 0.5579120074821137, + "learning_rate": 4.351507926348297e-06, + "loss": 0.2804, + "step": 10994 + }, + { + "epoch": 0.5150606642619572, + "grad_norm": 0.5848823794541352, + "learning_rate": 4.351380486994361e-06, + "loss": 0.273, + "step": 10995 + }, + { + "epoch": 0.5151075092518855, + "grad_norm": 0.6502564144771003, + "learning_rate": 4.351253036986196e-06, + "loss": 0.2739, + "step": 10996 + }, + { + "epoch": 0.5151543542418139, + "grad_norm": 0.6010382415563921, + "learning_rate": 4.351125576324534e-06, + "loss": 0.3051, + "step": 10997 + }, + { + "epoch": 0.5152011992317421, + "grad_norm": 0.6363110748440205, + "learning_rate": 4.350998105010111e-06, + "loss": 0.3062, + "step": 10998 + }, + { + "epoch": 0.5152480442216705, + "grad_norm": 0.619525914392577, + "learning_rate": 4.350870623043658e-06, + "loss": 0.313, + "step": 10999 + }, + { + "epoch": 0.5152948892115988, + "grad_norm": 0.6083752844572994, + "learning_rate": 4.35074313042591e-06, + "loss": 0.2864, + "step": 11000 + }, + { + "epoch": 0.5153417342015272, + "grad_norm": 0.5472840212187731, + "learning_rate": 4.3506156271576e-06, + "loss": 0.2739, + "step": 11001 + }, + { + "epoch": 0.5153885791914554, + "grad_norm": 0.6097517623887676, + "learning_rate": 4.3504881132394635e-06, + "loss": 0.29, + "step": 11002 + }, + { + "epoch": 0.5154354241813838, + "grad_norm": 0.5035491114009069, + "learning_rate": 4.350360588672232e-06, + "loss": 0.244, + "step": 11003 + }, + { + "epoch": 0.5154822691713121, + "grad_norm": 0.5676850184631465, + "learning_rate": 4.35023305345664e-06, + "loss": 0.2828, + "step": 11004 + }, + { + "epoch": 0.5155291141612405, + "grad_norm": 0.5500082119837534, + "learning_rate": 4.3501055075934235e-06, + "loss": 0.2854, + "step": 11005 + }, + { + "epoch": 0.5155759591511688, + "grad_norm": 0.6169947907546166, + "learning_rate": 4.349977951083314e-06, + "loss": 0.3022, + "step": 11006 + }, + { + "epoch": 0.5156228041410971, + "grad_norm": 0.5974461483385453, + "learning_rate": 4.349850383927046e-06, + "loss": 0.269, + "step": 11007 + }, + { + "epoch": 0.5156696491310254, + "grad_norm": 0.6041703799533459, + "learning_rate": 4.349722806125354e-06, + "loss": 0.2941, + "step": 11008 + }, + { + "epoch": 0.5157164941209538, + "grad_norm": 0.6142984152213213, + "learning_rate": 4.349595217678972e-06, + "loss": 0.2892, + "step": 11009 + }, + { + "epoch": 0.5157633391108821, + "grad_norm": 0.5809878353414932, + "learning_rate": 4.349467618588635e-06, + "loss": 0.2892, + "step": 11010 + }, + { + "epoch": 0.5158101841008104, + "grad_norm": 0.5826923830397778, + "learning_rate": 4.349340008855077e-06, + "loss": 0.3019, + "step": 11011 + }, + { + "epoch": 0.5158570290907387, + "grad_norm": 0.6504833368151896, + "learning_rate": 4.3492123884790304e-06, + "loss": 0.3053, + "step": 11012 + }, + { + "epoch": 0.5159038740806671, + "grad_norm": 0.6527297536246299, + "learning_rate": 4.349084757461232e-06, + "loss": 0.2863, + "step": 11013 + }, + { + "epoch": 0.5159507190705954, + "grad_norm": 0.6477121662514643, + "learning_rate": 4.348957115802417e-06, + "loss": 0.3159, + "step": 11014 + }, + { + "epoch": 0.5159975640605238, + "grad_norm": 0.6363976134473628, + "learning_rate": 4.348829463503317e-06, + "loss": 0.2897, + "step": 11015 + }, + { + "epoch": 0.516044409050452, + "grad_norm": 0.619238293889244, + "learning_rate": 4.348701800564669e-06, + "loss": 0.3007, + "step": 11016 + }, + { + "epoch": 0.5160912540403804, + "grad_norm": 0.6334827911551695, + "learning_rate": 4.3485741269872064e-06, + "loss": 0.283, + "step": 11017 + }, + { + "epoch": 0.5161380990303087, + "grad_norm": 0.5626615439214172, + "learning_rate": 4.348446442771664e-06, + "loss": 0.2559, + "step": 11018 + }, + { + "epoch": 0.5161849440202371, + "grad_norm": 0.5723950736503084, + "learning_rate": 4.348318747918777e-06, + "loss": 0.2722, + "step": 11019 + }, + { + "epoch": 0.5162317890101653, + "grad_norm": 0.590279551766315, + "learning_rate": 4.348191042429281e-06, + "loss": 0.3034, + "step": 11020 + }, + { + "epoch": 0.5162786340000937, + "grad_norm": 0.691093258683552, + "learning_rate": 4.348063326303909e-06, + "loss": 0.2999, + "step": 11021 + }, + { + "epoch": 0.516325478990022, + "grad_norm": 0.5922206775444705, + "learning_rate": 4.347935599543397e-06, + "loss": 0.3043, + "step": 11022 + }, + { + "epoch": 0.5163723239799504, + "grad_norm": 0.5655936046577124, + "learning_rate": 4.347807862148481e-06, + "loss": 0.2737, + "step": 11023 + }, + { + "epoch": 0.5164191689698787, + "grad_norm": 0.6103555186440048, + "learning_rate": 4.3476801141198945e-06, + "loss": 0.2781, + "step": 11024 + }, + { + "epoch": 0.516466013959807, + "grad_norm": 0.6197777506051857, + "learning_rate": 4.347552355458373e-06, + "loss": 0.3191, + "step": 11025 + }, + { + "epoch": 0.5165128589497353, + "grad_norm": 0.6464222186915843, + "learning_rate": 4.347424586164653e-06, + "loss": 0.2957, + "step": 11026 + }, + { + "epoch": 0.5165597039396637, + "grad_norm": 0.570614128724917, + "learning_rate": 4.347296806239468e-06, + "loss": 0.2669, + "step": 11027 + }, + { + "epoch": 0.516606548929592, + "grad_norm": 0.5808819425874109, + "learning_rate": 4.347169015683555e-06, + "loss": 0.2713, + "step": 11028 + }, + { + "epoch": 0.5166533939195203, + "grad_norm": 0.6367110387389219, + "learning_rate": 4.347041214497649e-06, + "loss": 0.3094, + "step": 11029 + }, + { + "epoch": 0.5167002389094486, + "grad_norm": 0.6164573819719218, + "learning_rate": 4.346913402682484e-06, + "loss": 0.2974, + "step": 11030 + }, + { + "epoch": 0.516747083899377, + "grad_norm": 0.5715632525033899, + "learning_rate": 4.346785580238797e-06, + "loss": 0.269, + "step": 11031 + }, + { + "epoch": 0.5167939288893053, + "grad_norm": 0.6006135260873297, + "learning_rate": 4.346657747167323e-06, + "loss": 0.2943, + "step": 11032 + }, + { + "epoch": 0.5168407738792337, + "grad_norm": 0.5919619549251229, + "learning_rate": 4.346529903468798e-06, + "loss": 0.2755, + "step": 11033 + }, + { + "epoch": 0.5168876188691619, + "grad_norm": 0.6262577229342742, + "learning_rate": 4.346402049143957e-06, + "loss": 0.2961, + "step": 11034 + }, + { + "epoch": 0.5169344638590903, + "grad_norm": 0.597155993569627, + "learning_rate": 4.346274184193537e-06, + "loss": 0.2941, + "step": 11035 + }, + { + "epoch": 0.5169813088490186, + "grad_norm": 0.6329025129874728, + "learning_rate": 4.346146308618272e-06, + "loss": 0.2878, + "step": 11036 + }, + { + "epoch": 0.517028153838947, + "grad_norm": 0.5886180411452757, + "learning_rate": 4.346018422418901e-06, + "loss": 0.2841, + "step": 11037 + }, + { + "epoch": 0.5170749988288752, + "grad_norm": 0.6423790043572538, + "learning_rate": 4.345890525596156e-06, + "loss": 0.3154, + "step": 11038 + }, + { + "epoch": 0.5171218438188036, + "grad_norm": 0.6025440455036738, + "learning_rate": 4.345762618150776e-06, + "loss": 0.2892, + "step": 11039 + }, + { + "epoch": 0.5171686888087319, + "grad_norm": 0.636145860183095, + "learning_rate": 4.345634700083495e-06, + "loss": 0.2784, + "step": 11040 + }, + { + "epoch": 0.5172155337986603, + "grad_norm": 0.6232945330334855, + "learning_rate": 4.345506771395051e-06, + "loss": 0.2921, + "step": 11041 + }, + { + "epoch": 0.5172623787885886, + "grad_norm": 0.5759200052556299, + "learning_rate": 4.34537883208618e-06, + "loss": 0.2772, + "step": 11042 + }, + { + "epoch": 0.5173092237785168, + "grad_norm": 0.6601667394032723, + "learning_rate": 4.345250882157617e-06, + "loss": 0.2907, + "step": 11043 + }, + { + "epoch": 0.5173560687684452, + "grad_norm": 0.5751770512092533, + "learning_rate": 4.345122921610098e-06, + "loss": 0.2818, + "step": 11044 + }, + { + "epoch": 0.5174029137583736, + "grad_norm": 0.6145733251806964, + "learning_rate": 4.3449949504443615e-06, + "loss": 0.289, + "step": 11045 + }, + { + "epoch": 0.5174497587483019, + "grad_norm": 0.6114288813587997, + "learning_rate": 4.344866968661142e-06, + "loss": 0.3069, + "step": 11046 + }, + { + "epoch": 0.5174966037382301, + "grad_norm": 0.6654717956966402, + "learning_rate": 4.3447389762611776e-06, + "loss": 0.291, + "step": 11047 + }, + { + "epoch": 0.5175434487281585, + "grad_norm": 0.6393737303657416, + "learning_rate": 4.344610973245204e-06, + "loss": 0.311, + "step": 11048 + }, + { + "epoch": 0.5175902937180868, + "grad_norm": 0.5938972628526403, + "learning_rate": 4.344482959613957e-06, + "loss": 0.2931, + "step": 11049 + }, + { + "epoch": 0.5176371387080152, + "grad_norm": 0.5674770338535831, + "learning_rate": 4.3443549353681755e-06, + "loss": 0.296, + "step": 11050 + }, + { + "epoch": 0.5176839836979436, + "grad_norm": 0.624320952342484, + "learning_rate": 4.344226900508594e-06, + "loss": 0.2822, + "step": 11051 + }, + { + "epoch": 0.5177308286878718, + "grad_norm": 0.5900038964458734, + "learning_rate": 4.34409885503595e-06, + "loss": 0.2843, + "step": 11052 + }, + { + "epoch": 0.5177776736778001, + "grad_norm": 0.6146983667857396, + "learning_rate": 4.343970798950981e-06, + "loss": 0.3005, + "step": 11053 + }, + { + "epoch": 0.5178245186677285, + "grad_norm": 0.6122804050879163, + "learning_rate": 4.343842732254424e-06, + "loss": 0.2777, + "step": 11054 + }, + { + "epoch": 0.5178713636576568, + "grad_norm": 0.5744214755268583, + "learning_rate": 4.343714654947015e-06, + "loss": 0.2739, + "step": 11055 + }, + { + "epoch": 0.5179182086475851, + "grad_norm": 0.633736541180878, + "learning_rate": 4.3435865670294915e-06, + "loss": 0.2844, + "step": 11056 + }, + { + "epoch": 0.5179650536375134, + "grad_norm": 0.6225508648066066, + "learning_rate": 4.34345846850259e-06, + "loss": 0.2842, + "step": 11057 + }, + { + "epoch": 0.5180118986274418, + "grad_norm": 0.6669966614419691, + "learning_rate": 4.343330359367049e-06, + "loss": 0.286, + "step": 11058 + }, + { + "epoch": 0.5180587436173701, + "grad_norm": 0.568765790672564, + "learning_rate": 4.343202239623606e-06, + "loss": 0.2579, + "step": 11059 + }, + { + "epoch": 0.5181055886072985, + "grad_norm": 0.5817054191869325, + "learning_rate": 4.343074109272997e-06, + "loss": 0.2888, + "step": 11060 + }, + { + "epoch": 0.5181524335972267, + "grad_norm": 0.6169551127856375, + "learning_rate": 4.342945968315959e-06, + "loss": 0.2996, + "step": 11061 + }, + { + "epoch": 0.5181992785871551, + "grad_norm": 0.5685703692317077, + "learning_rate": 4.34281781675323e-06, + "loss": 0.2799, + "step": 11062 + }, + { + "epoch": 0.5182461235770834, + "grad_norm": 0.5978874780514013, + "learning_rate": 4.342689654585548e-06, + "loss": 0.3048, + "step": 11063 + }, + { + "epoch": 0.5182929685670118, + "grad_norm": 0.6344792258754772, + "learning_rate": 4.342561481813651e-06, + "loss": 0.3104, + "step": 11064 + }, + { + "epoch": 0.51833981355694, + "grad_norm": 0.6032304816459773, + "learning_rate": 4.342433298438275e-06, + "loss": 0.2959, + "step": 11065 + }, + { + "epoch": 0.5183866585468684, + "grad_norm": 0.6008341647273985, + "learning_rate": 4.342305104460158e-06, + "loss": 0.2806, + "step": 11066 + }, + { + "epoch": 0.5184335035367967, + "grad_norm": 0.5970968870092914, + "learning_rate": 4.342176899880038e-06, + "loss": 0.3051, + "step": 11067 + }, + { + "epoch": 0.5184803485267251, + "grad_norm": 0.5613420794218114, + "learning_rate": 4.342048684698654e-06, + "loss": 0.283, + "step": 11068 + }, + { + "epoch": 0.5185271935166534, + "grad_norm": 0.5825841468761527, + "learning_rate": 4.341920458916743e-06, + "loss": 0.2796, + "step": 11069 + }, + { + "epoch": 0.5185740385065817, + "grad_norm": 0.7039616710432619, + "learning_rate": 4.3417922225350415e-06, + "loss": 0.3089, + "step": 11070 + }, + { + "epoch": 0.51862088349651, + "grad_norm": 0.5917791608727438, + "learning_rate": 4.34166397555429e-06, + "loss": 0.2713, + "step": 11071 + }, + { + "epoch": 0.5186677284864384, + "grad_norm": 0.7302619500399576, + "learning_rate": 4.341535717975225e-06, + "loss": 0.2945, + "step": 11072 + }, + { + "epoch": 0.5187145734763667, + "grad_norm": 0.6535464067134279, + "learning_rate": 4.341407449798583e-06, + "loss": 0.2939, + "step": 11073 + }, + { + "epoch": 0.518761418466295, + "grad_norm": 0.6326136312494309, + "learning_rate": 4.341279171025105e-06, + "loss": 0.2901, + "step": 11074 + }, + { + "epoch": 0.5188082634562233, + "grad_norm": 0.6033623811026761, + "learning_rate": 4.341150881655529e-06, + "loss": 0.2805, + "step": 11075 + }, + { + "epoch": 0.5188551084461517, + "grad_norm": 0.5871201270671512, + "learning_rate": 4.341022581690592e-06, + "loss": 0.2844, + "step": 11076 + }, + { + "epoch": 0.51890195343608, + "grad_norm": 0.620956423437137, + "learning_rate": 4.340894271131033e-06, + "loss": 0.3123, + "step": 11077 + }, + { + "epoch": 0.5189487984260084, + "grad_norm": 0.6027917836785641, + "learning_rate": 4.3407659499775904e-06, + "loss": 0.309, + "step": 11078 + }, + { + "epoch": 0.5189956434159366, + "grad_norm": 0.590278243429245, + "learning_rate": 4.3406376182310014e-06, + "loss": 0.2847, + "step": 11079 + }, + { + "epoch": 0.519042488405865, + "grad_norm": 0.5594569680442453, + "learning_rate": 4.340509275892005e-06, + "loss": 0.2813, + "step": 11080 + }, + { + "epoch": 0.5190893333957933, + "grad_norm": 0.6062031854442725, + "learning_rate": 4.340380922961342e-06, + "loss": 0.2696, + "step": 11081 + }, + { + "epoch": 0.5191361783857217, + "grad_norm": 0.6408621466306025, + "learning_rate": 4.340252559439748e-06, + "loss": 0.2954, + "step": 11082 + }, + { + "epoch": 0.5191830233756499, + "grad_norm": 0.6342495993082392, + "learning_rate": 4.3401241853279645e-06, + "loss": 0.2955, + "step": 11083 + }, + { + "epoch": 0.5192298683655783, + "grad_norm": 0.6360197732215863, + "learning_rate": 4.339995800626727e-06, + "loss": 0.2917, + "step": 11084 + }, + { + "epoch": 0.5192767133555066, + "grad_norm": 0.5789009518631654, + "learning_rate": 4.339867405336777e-06, + "loss": 0.2821, + "step": 11085 + }, + { + "epoch": 0.519323558345435, + "grad_norm": 0.6285802610942233, + "learning_rate": 4.339738999458854e-06, + "loss": 0.2938, + "step": 11086 + }, + { + "epoch": 0.5193704033353633, + "grad_norm": 0.6317635853958837, + "learning_rate": 4.339610582993694e-06, + "loss": 0.2991, + "step": 11087 + }, + { + "epoch": 0.5194172483252916, + "grad_norm": 0.6483314956008173, + "learning_rate": 4.339482155942037e-06, + "loss": 0.3068, + "step": 11088 + }, + { + "epoch": 0.5194640933152199, + "grad_norm": 0.633187680261084, + "learning_rate": 4.339353718304623e-06, + "loss": 0.2793, + "step": 11089 + }, + { + "epoch": 0.5195109383051483, + "grad_norm": 0.5636564515737358, + "learning_rate": 4.3392252700821904e-06, + "loss": 0.2643, + "step": 11090 + }, + { + "epoch": 0.5195577832950766, + "grad_norm": 0.6513021162777123, + "learning_rate": 4.33909681127548e-06, + "loss": 0.3021, + "step": 11091 + }, + { + "epoch": 0.5196046282850049, + "grad_norm": 0.699842856298427, + "learning_rate": 4.338968341885228e-06, + "loss": 0.3294, + "step": 11092 + }, + { + "epoch": 0.5196514732749332, + "grad_norm": 0.5662589727297924, + "learning_rate": 4.338839861912176e-06, + "loss": 0.2841, + "step": 11093 + }, + { + "epoch": 0.5196983182648616, + "grad_norm": 0.5738133169179584, + "learning_rate": 4.338711371357063e-06, + "loss": 0.2925, + "step": 11094 + }, + { + "epoch": 0.5197451632547899, + "grad_norm": 0.5844963321795227, + "learning_rate": 4.338582870220628e-06, + "loss": 0.3012, + "step": 11095 + }, + { + "epoch": 0.5197920082447183, + "grad_norm": 0.6548315634505245, + "learning_rate": 4.33845435850361e-06, + "loss": 0.3097, + "step": 11096 + }, + { + "epoch": 0.5198388532346465, + "grad_norm": 0.6139986550474338, + "learning_rate": 4.33832583620675e-06, + "loss": 0.2922, + "step": 11097 + }, + { + "epoch": 0.5198856982245749, + "grad_norm": 0.5934690572835432, + "learning_rate": 4.338197303330787e-06, + "loss": 0.3079, + "step": 11098 + }, + { + "epoch": 0.5199325432145032, + "grad_norm": 0.6317736405990328, + "learning_rate": 4.3380687598764604e-06, + "loss": 0.2945, + "step": 11099 + }, + { + "epoch": 0.5199793882044316, + "grad_norm": 0.5793709234237141, + "learning_rate": 4.33794020584451e-06, + "loss": 0.2861, + "step": 11100 + }, + { + "epoch": 0.5200262331943598, + "grad_norm": 0.5994727388931396, + "learning_rate": 4.337811641235676e-06, + "loss": 0.2811, + "step": 11101 + }, + { + "epoch": 0.5200730781842882, + "grad_norm": 0.5929356795445042, + "learning_rate": 4.337683066050698e-06, + "loss": 0.2667, + "step": 11102 + }, + { + "epoch": 0.5201199231742165, + "grad_norm": 0.6208713708966874, + "learning_rate": 4.3375544802903144e-06, + "loss": 0.3135, + "step": 11103 + }, + { + "epoch": 0.5201667681641449, + "grad_norm": 0.5569919628106705, + "learning_rate": 4.337425883955269e-06, + "loss": 0.2669, + "step": 11104 + }, + { + "epoch": 0.5202136131540732, + "grad_norm": 0.6021219756154674, + "learning_rate": 4.337297277046297e-06, + "loss": 0.2669, + "step": 11105 + }, + { + "epoch": 0.5202604581440015, + "grad_norm": 0.5766657142445404, + "learning_rate": 4.337168659564143e-06, + "loss": 0.283, + "step": 11106 + }, + { + "epoch": 0.5203073031339298, + "grad_norm": 0.5647432356616247, + "learning_rate": 4.337040031509544e-06, + "loss": 0.2645, + "step": 11107 + }, + { + "epoch": 0.5203541481238582, + "grad_norm": 0.659986018937969, + "learning_rate": 4.336911392883241e-06, + "loss": 0.2988, + "step": 11108 + }, + { + "epoch": 0.5204009931137865, + "grad_norm": 0.6149099324654016, + "learning_rate": 4.336782743685976e-06, + "loss": 0.2743, + "step": 11109 + }, + { + "epoch": 0.5204478381037148, + "grad_norm": 0.6841760566749358, + "learning_rate": 4.336654083918487e-06, + "loss": 0.2852, + "step": 11110 + }, + { + "epoch": 0.5204946830936431, + "grad_norm": 0.5895099649577563, + "learning_rate": 4.336525413581515e-06, + "loss": 0.2955, + "step": 11111 + }, + { + "epoch": 0.5205415280835715, + "grad_norm": 0.6344601766683874, + "learning_rate": 4.336396732675802e-06, + "loss": 0.3242, + "step": 11112 + }, + { + "epoch": 0.5205883730734998, + "grad_norm": 0.6149456180147616, + "learning_rate": 4.336268041202087e-06, + "loss": 0.3023, + "step": 11113 + }, + { + "epoch": 0.5206352180634282, + "grad_norm": 0.6047002062721029, + "learning_rate": 4.3361393391611094e-06, + "loss": 0.2825, + "step": 11114 + }, + { + "epoch": 0.5206820630533564, + "grad_norm": 0.6460515518810478, + "learning_rate": 4.336010626553613e-06, + "loss": 0.2976, + "step": 11115 + }, + { + "epoch": 0.5207289080432848, + "grad_norm": 0.6176623187139234, + "learning_rate": 4.335881903380337e-06, + "loss": 0.2897, + "step": 11116 + }, + { + "epoch": 0.5207757530332131, + "grad_norm": 0.63678259941562, + "learning_rate": 4.33575316964202e-06, + "loss": 0.2615, + "step": 11117 + }, + { + "epoch": 0.5208225980231415, + "grad_norm": 0.5677566821748921, + "learning_rate": 4.335624425339407e-06, + "loss": 0.2785, + "step": 11118 + }, + { + "epoch": 0.5208694430130697, + "grad_norm": 0.5685376968261605, + "learning_rate": 4.335495670473237e-06, + "loss": 0.2648, + "step": 11119 + }, + { + "epoch": 0.520916288002998, + "grad_norm": 0.6202865540668495, + "learning_rate": 4.33536690504425e-06, + "loss": 0.3089, + "step": 11120 + }, + { + "epoch": 0.5209631329929264, + "grad_norm": 0.6162822809217452, + "learning_rate": 4.335238129053188e-06, + "loss": 0.3108, + "step": 11121 + }, + { + "epoch": 0.5210099779828548, + "grad_norm": 0.5645379154517977, + "learning_rate": 4.335109342500792e-06, + "loss": 0.2753, + "step": 11122 + }, + { + "epoch": 0.5210568229727831, + "grad_norm": 0.5967602918593906, + "learning_rate": 4.334980545387802e-06, + "loss": 0.2851, + "step": 11123 + }, + { + "epoch": 0.5211036679627113, + "grad_norm": 0.6024150218691483, + "learning_rate": 4.334851737714961e-06, + "loss": 0.2673, + "step": 11124 + }, + { + "epoch": 0.5211505129526397, + "grad_norm": 0.5747780440967651, + "learning_rate": 4.3347229194830095e-06, + "loss": 0.282, + "step": 11125 + }, + { + "epoch": 0.521197357942568, + "grad_norm": 0.5671842510761486, + "learning_rate": 4.334594090692689e-06, + "loss": 0.2696, + "step": 11126 + }, + { + "epoch": 0.5212442029324964, + "grad_norm": 0.524457122620761, + "learning_rate": 4.334465251344739e-06, + "loss": 0.2618, + "step": 11127 + }, + { + "epoch": 0.5212910479224246, + "grad_norm": 0.6524386854318016, + "learning_rate": 4.334336401439904e-06, + "loss": 0.2835, + "step": 11128 + }, + { + "epoch": 0.521337892912353, + "grad_norm": 0.6264405421877305, + "learning_rate": 4.334207540978924e-06, + "loss": 0.2744, + "step": 11129 + }, + { + "epoch": 0.5213847379022813, + "grad_norm": 0.6367494847602089, + "learning_rate": 4.3340786699625405e-06, + "loss": 0.3038, + "step": 11130 + }, + { + "epoch": 0.5214315828922097, + "grad_norm": 0.5803937159678048, + "learning_rate": 4.333949788391495e-06, + "loss": 0.2842, + "step": 11131 + }, + { + "epoch": 0.521478427882138, + "grad_norm": 0.5614792791519654, + "learning_rate": 4.333820896266529e-06, + "loss": 0.2773, + "step": 11132 + }, + { + "epoch": 0.5215252728720663, + "grad_norm": 0.5772995621941769, + "learning_rate": 4.333691993588385e-06, + "loss": 0.2811, + "step": 11133 + }, + { + "epoch": 0.5215721178619946, + "grad_norm": 0.6366040226092267, + "learning_rate": 4.333563080357805e-06, + "loss": 0.3106, + "step": 11134 + }, + { + "epoch": 0.521618962851923, + "grad_norm": 0.6924293035604718, + "learning_rate": 4.33343415657553e-06, + "loss": 0.3136, + "step": 11135 + }, + { + "epoch": 0.5216658078418513, + "grad_norm": 0.6400077720137565, + "learning_rate": 4.3333052222423024e-06, + "loss": 0.2912, + "step": 11136 + }, + { + "epoch": 0.5217126528317796, + "grad_norm": 0.5955845693348228, + "learning_rate": 4.333176277358864e-06, + "loss": 0.3114, + "step": 11137 + }, + { + "epoch": 0.5217594978217079, + "grad_norm": 0.5831059829029404, + "learning_rate": 4.333047321925956e-06, + "loss": 0.3026, + "step": 11138 + }, + { + "epoch": 0.5218063428116363, + "grad_norm": 0.6402354132736229, + "learning_rate": 4.3329183559443226e-06, + "loss": 0.2958, + "step": 11139 + }, + { + "epoch": 0.5218531878015646, + "grad_norm": 0.6162048751344211, + "learning_rate": 4.332789379414704e-06, + "loss": 0.3124, + "step": 11140 + }, + { + "epoch": 0.521900032791493, + "grad_norm": 0.6355054005550503, + "learning_rate": 4.332660392337843e-06, + "loss": 0.2696, + "step": 11141 + }, + { + "epoch": 0.5219468777814212, + "grad_norm": 0.6106197946493224, + "learning_rate": 4.332531394714482e-06, + "loss": 0.2798, + "step": 11142 + }, + { + "epoch": 0.5219937227713496, + "grad_norm": 0.6336691850131995, + "learning_rate": 4.332402386545365e-06, + "loss": 0.3026, + "step": 11143 + }, + { + "epoch": 0.5220405677612779, + "grad_norm": 0.5690508362141015, + "learning_rate": 4.332273367831231e-06, + "loss": 0.2693, + "step": 11144 + }, + { + "epoch": 0.5220874127512063, + "grad_norm": 0.5675484351628478, + "learning_rate": 4.332144338572825e-06, + "loss": 0.2983, + "step": 11145 + }, + { + "epoch": 0.5221342577411345, + "grad_norm": 0.552801376765233, + "learning_rate": 4.3320152987708885e-06, + "loss": 0.2707, + "step": 11146 + }, + { + "epoch": 0.5221811027310629, + "grad_norm": 0.5970721522825074, + "learning_rate": 4.331886248426164e-06, + "loss": 0.2941, + "step": 11147 + }, + { + "epoch": 0.5222279477209912, + "grad_norm": 0.6085513994281997, + "learning_rate": 4.331757187539396e-06, + "loss": 0.2826, + "step": 11148 + }, + { + "epoch": 0.5222747927109196, + "grad_norm": 0.5889752523138961, + "learning_rate": 4.331628116111326e-06, + "loss": 0.2936, + "step": 11149 + }, + { + "epoch": 0.5223216377008479, + "grad_norm": 0.5590676828071188, + "learning_rate": 4.331499034142695e-06, + "loss": 0.266, + "step": 11150 + }, + { + "epoch": 0.5223684826907762, + "grad_norm": 0.6137356720870348, + "learning_rate": 4.331369941634248e-06, + "loss": 0.2876, + "step": 11151 + }, + { + "epoch": 0.5224153276807045, + "grad_norm": 0.6025004788892071, + "learning_rate": 4.331240838586727e-06, + "loss": 0.3014, + "step": 11152 + }, + { + "epoch": 0.5224621726706329, + "grad_norm": 0.5544764127775614, + "learning_rate": 4.331111725000876e-06, + "loss": 0.2875, + "step": 11153 + }, + { + "epoch": 0.5225090176605612, + "grad_norm": 0.6241733339922937, + "learning_rate": 4.3309826008774365e-06, + "loss": 0.3045, + "step": 11154 + }, + { + "epoch": 0.5225558626504895, + "grad_norm": 0.6096403688117996, + "learning_rate": 4.330853466217153e-06, + "loss": 0.3041, + "step": 11155 + }, + { + "epoch": 0.5226027076404178, + "grad_norm": 0.5812958940905015, + "learning_rate": 4.330724321020767e-06, + "loss": 0.2846, + "step": 11156 + }, + { + "epoch": 0.5226495526303462, + "grad_norm": 0.615778521783335, + "learning_rate": 4.3305951652890235e-06, + "loss": 0.2852, + "step": 11157 + }, + { + "epoch": 0.5226963976202745, + "grad_norm": 0.5949445472510068, + "learning_rate": 4.330465999022665e-06, + "loss": 0.2785, + "step": 11158 + }, + { + "epoch": 0.5227432426102029, + "grad_norm": 0.6578038247842574, + "learning_rate": 4.330336822222434e-06, + "loss": 0.3067, + "step": 11159 + }, + { + "epoch": 0.5227900876001311, + "grad_norm": 0.587853906354717, + "learning_rate": 4.330207634889075e-06, + "loss": 0.2954, + "step": 11160 + }, + { + "epoch": 0.5228369325900595, + "grad_norm": 0.6113799066145682, + "learning_rate": 4.330078437023331e-06, + "loss": 0.291, + "step": 11161 + }, + { + "epoch": 0.5228837775799878, + "grad_norm": 0.6143376785590579, + "learning_rate": 4.329949228625946e-06, + "loss": 0.3006, + "step": 11162 + }, + { + "epoch": 0.5229306225699162, + "grad_norm": 0.606968898696699, + "learning_rate": 4.329820009697663e-06, + "loss": 0.2741, + "step": 11163 + }, + { + "epoch": 0.5229774675598444, + "grad_norm": 0.6153974264382114, + "learning_rate": 4.329690780239225e-06, + "loss": 0.2973, + "step": 11164 + }, + { + "epoch": 0.5230243125497728, + "grad_norm": 0.6202157566990727, + "learning_rate": 4.329561540251377e-06, + "loss": 0.2903, + "step": 11165 + }, + { + "epoch": 0.5230711575397011, + "grad_norm": 0.5852564271139712, + "learning_rate": 4.329432289734862e-06, + "loss": 0.2856, + "step": 11166 + }, + { + "epoch": 0.5231180025296295, + "grad_norm": 0.607489401794436, + "learning_rate": 4.329303028690425e-06, + "loss": 0.2979, + "step": 11167 + }, + { + "epoch": 0.5231648475195578, + "grad_norm": 0.6350340317039198, + "learning_rate": 4.329173757118808e-06, + "loss": 0.3144, + "step": 11168 + }, + { + "epoch": 0.5232116925094861, + "grad_norm": 0.5825203744031735, + "learning_rate": 4.329044475020755e-06, + "loss": 0.3021, + "step": 11169 + }, + { + "epoch": 0.5232585374994144, + "grad_norm": 0.6340621915312294, + "learning_rate": 4.328915182397012e-06, + "loss": 0.2989, + "step": 11170 + }, + { + "epoch": 0.5233053824893428, + "grad_norm": 0.5631226444299061, + "learning_rate": 4.328785879248321e-06, + "loss": 0.2706, + "step": 11171 + }, + { + "epoch": 0.5233522274792711, + "grad_norm": 0.597621318908221, + "learning_rate": 4.328656565575427e-06, + "loss": 0.2778, + "step": 11172 + }, + { + "epoch": 0.5233990724691994, + "grad_norm": 0.6181459003720743, + "learning_rate": 4.3285272413790745e-06, + "loss": 0.2761, + "step": 11173 + }, + { + "epoch": 0.5234459174591277, + "grad_norm": 0.645492135616877, + "learning_rate": 4.328397906660007e-06, + "loss": 0.2863, + "step": 11174 + }, + { + "epoch": 0.5234927624490561, + "grad_norm": 0.5386895381524486, + "learning_rate": 4.32826856141897e-06, + "loss": 0.2785, + "step": 11175 + }, + { + "epoch": 0.5235396074389844, + "grad_norm": 0.5452722545270224, + "learning_rate": 4.328139205656705e-06, + "loss": 0.2648, + "step": 11176 + }, + { + "epoch": 0.5235864524289128, + "grad_norm": 0.6245906043137692, + "learning_rate": 4.32800983937396e-06, + "loss": 0.2831, + "step": 11177 + }, + { + "epoch": 0.523633297418841, + "grad_norm": 0.5559201576271079, + "learning_rate": 4.327880462571478e-06, + "loss": 0.2737, + "step": 11178 + }, + { + "epoch": 0.5236801424087694, + "grad_norm": 0.6023751567550435, + "learning_rate": 4.327751075250003e-06, + "loss": 0.2893, + "step": 11179 + }, + { + "epoch": 0.5237269873986977, + "grad_norm": 0.619190679001321, + "learning_rate": 4.3276216774102795e-06, + "loss": 0.2908, + "step": 11180 + }, + { + "epoch": 0.5237738323886261, + "grad_norm": 0.6514077501103835, + "learning_rate": 4.3274922690530525e-06, + "loss": 0.312, + "step": 11181 + }, + { + "epoch": 0.5238206773785543, + "grad_norm": 0.5612996652147356, + "learning_rate": 4.327362850179067e-06, + "loss": 0.2821, + "step": 11182 + }, + { + "epoch": 0.5238675223684827, + "grad_norm": 0.6005174829110926, + "learning_rate": 4.3272334207890675e-06, + "loss": 0.2894, + "step": 11183 + }, + { + "epoch": 0.523914367358411, + "grad_norm": 0.642668833973062, + "learning_rate": 4.3271039808837985e-06, + "loss": 0.3099, + "step": 11184 + }, + { + "epoch": 0.5239612123483394, + "grad_norm": 0.5848491544343796, + "learning_rate": 4.326974530464006e-06, + "loss": 0.3009, + "step": 11185 + }, + { + "epoch": 0.5240080573382677, + "grad_norm": 0.6020490665146434, + "learning_rate": 4.326845069530435e-06, + "loss": 0.2962, + "step": 11186 + }, + { + "epoch": 0.524054902328196, + "grad_norm": 0.5661507929371233, + "learning_rate": 4.326715598083829e-06, + "loss": 0.286, + "step": 11187 + }, + { + "epoch": 0.5241017473181243, + "grad_norm": 0.5634578536447936, + "learning_rate": 4.326586116124933e-06, + "loss": 0.2827, + "step": 11188 + }, + { + "epoch": 0.5241485923080527, + "grad_norm": 0.5443235810413771, + "learning_rate": 4.326456623654495e-06, + "loss": 0.2628, + "step": 11189 + }, + { + "epoch": 0.524195437297981, + "grad_norm": 0.6183546922197048, + "learning_rate": 4.326327120673256e-06, + "loss": 0.2744, + "step": 11190 + }, + { + "epoch": 0.5242422822879093, + "grad_norm": 0.5813307024929032, + "learning_rate": 4.326197607181965e-06, + "loss": 0.2997, + "step": 11191 + }, + { + "epoch": 0.5242891272778376, + "grad_norm": 0.5558420561555407, + "learning_rate": 4.326068083181365e-06, + "loss": 0.2871, + "step": 11192 + }, + { + "epoch": 0.524335972267766, + "grad_norm": 0.519323759634061, + "learning_rate": 4.325938548672203e-06, + "loss": 0.2637, + "step": 11193 + }, + { + "epoch": 0.5243828172576943, + "grad_norm": 0.5737252428737483, + "learning_rate": 4.325809003655222e-06, + "loss": 0.2774, + "step": 11194 + }, + { + "epoch": 0.5244296622476227, + "grad_norm": 0.5998531869625489, + "learning_rate": 4.325679448131171e-06, + "loss": 0.3023, + "step": 11195 + }, + { + "epoch": 0.5244765072375509, + "grad_norm": 0.6210627059957736, + "learning_rate": 4.3255498821007925e-06, + "loss": 0.3017, + "step": 11196 + }, + { + "epoch": 0.5245233522274793, + "grad_norm": 0.545605169114864, + "learning_rate": 4.325420305564833e-06, + "loss": 0.263, + "step": 11197 + }, + { + "epoch": 0.5245701972174076, + "grad_norm": 0.5641712878033072, + "learning_rate": 4.325290718524039e-06, + "loss": 0.2665, + "step": 11198 + }, + { + "epoch": 0.524617042207336, + "grad_norm": 0.5834024627465492, + "learning_rate": 4.325161120979156e-06, + "loss": 0.2855, + "step": 11199 + }, + { + "epoch": 0.5246638871972642, + "grad_norm": 0.5442000997349271, + "learning_rate": 4.325031512930929e-06, + "loss": 0.278, + "step": 11200 + }, + { + "epoch": 0.5247107321871926, + "grad_norm": 0.6681653540052613, + "learning_rate": 4.3249018943801045e-06, + "loss": 0.3031, + "step": 11201 + }, + { + "epoch": 0.5247575771771209, + "grad_norm": 0.6647178855276488, + "learning_rate": 4.324772265327428e-06, + "loss": 0.2989, + "step": 11202 + }, + { + "epoch": 0.5248044221670493, + "grad_norm": 0.624018524224535, + "learning_rate": 4.324642625773646e-06, + "loss": 0.2775, + "step": 11203 + }, + { + "epoch": 0.5248512671569776, + "grad_norm": 0.6173240671450925, + "learning_rate": 4.324512975719504e-06, + "loss": 0.2762, + "step": 11204 + }, + { + "epoch": 0.5248981121469058, + "grad_norm": 0.5926707779905205, + "learning_rate": 4.3243833151657484e-06, + "loss": 0.2917, + "step": 11205 + }, + { + "epoch": 0.5249449571368342, + "grad_norm": 0.604008956707494, + "learning_rate": 4.324253644113126e-06, + "loss": 0.2873, + "step": 11206 + }, + { + "epoch": 0.5249918021267626, + "grad_norm": 0.6123615366961542, + "learning_rate": 4.324123962562382e-06, + "loss": 0.2864, + "step": 11207 + }, + { + "epoch": 0.5250386471166909, + "grad_norm": 0.6749602090468068, + "learning_rate": 4.323994270514263e-06, + "loss": 0.3044, + "step": 11208 + }, + { + "epoch": 0.5250854921066191, + "grad_norm": 0.6440117524476826, + "learning_rate": 4.323864567969516e-06, + "loss": 0.2992, + "step": 11209 + }, + { + "epoch": 0.5251323370965475, + "grad_norm": 0.6238580169359921, + "learning_rate": 4.323734854928886e-06, + "loss": 0.3072, + "step": 11210 + }, + { + "epoch": 0.5251791820864758, + "grad_norm": 0.5552410064971803, + "learning_rate": 4.32360513139312e-06, + "loss": 0.2678, + "step": 11211 + }, + { + "epoch": 0.5252260270764042, + "grad_norm": 0.6035837450348757, + "learning_rate": 4.323475397362966e-06, + "loss": 0.2817, + "step": 11212 + }, + { + "epoch": 0.5252728720663326, + "grad_norm": 0.6089050511406808, + "learning_rate": 4.323345652839168e-06, + "loss": 0.3047, + "step": 11213 + }, + { + "epoch": 0.5253197170562608, + "grad_norm": 0.6198678103858211, + "learning_rate": 4.323215897822476e-06, + "loss": 0.2923, + "step": 11214 + }, + { + "epoch": 0.5253665620461891, + "grad_norm": 0.651090756923342, + "learning_rate": 4.323086132313633e-06, + "loss": 0.304, + "step": 11215 + }, + { + "epoch": 0.5254134070361175, + "grad_norm": 0.5944503201931962, + "learning_rate": 4.322956356313388e-06, + "loss": 0.2879, + "step": 11216 + }, + { + "epoch": 0.5254602520260458, + "grad_norm": 0.5795637817937612, + "learning_rate": 4.322826569822488e-06, + "loss": 0.287, + "step": 11217 + }, + { + "epoch": 0.5255070970159741, + "grad_norm": 0.6333825875486857, + "learning_rate": 4.322696772841679e-06, + "loss": 0.2859, + "step": 11218 + }, + { + "epoch": 0.5255539420059024, + "grad_norm": 0.6428318312589405, + "learning_rate": 4.322566965371707e-06, + "loss": 0.32, + "step": 11219 + }, + { + "epoch": 0.5256007869958308, + "grad_norm": 0.6197250107079925, + "learning_rate": 4.322437147413321e-06, + "loss": 0.303, + "step": 11220 + }, + { + "epoch": 0.5256476319857591, + "grad_norm": 0.6443247206071037, + "learning_rate": 4.322307318967268e-06, + "loss": 0.305, + "step": 11221 + }, + { + "epoch": 0.5256944769756875, + "grad_norm": 0.6301589678890631, + "learning_rate": 4.322177480034293e-06, + "loss": 0.3122, + "step": 11222 + }, + { + "epoch": 0.5257413219656157, + "grad_norm": 0.5809818536070755, + "learning_rate": 4.322047630615146e-06, + "loss": 0.2925, + "step": 11223 + }, + { + "epoch": 0.5257881669555441, + "grad_norm": 0.5905678573177042, + "learning_rate": 4.321917770710572e-06, + "loss": 0.2983, + "step": 11224 + }, + { + "epoch": 0.5258350119454724, + "grad_norm": 0.552183848470397, + "learning_rate": 4.321787900321319e-06, + "loss": 0.2857, + "step": 11225 + }, + { + "epoch": 0.5258818569354008, + "grad_norm": 0.6355753007616421, + "learning_rate": 4.321658019448135e-06, + "loss": 0.2899, + "step": 11226 + }, + { + "epoch": 0.525928701925329, + "grad_norm": 0.6040471430997754, + "learning_rate": 4.321528128091768e-06, + "loss": 0.2962, + "step": 11227 + }, + { + "epoch": 0.5259755469152574, + "grad_norm": 0.5879236961651527, + "learning_rate": 4.321398226252963e-06, + "loss": 0.2816, + "step": 11228 + }, + { + "epoch": 0.5260223919051857, + "grad_norm": 0.5960948399304394, + "learning_rate": 4.321268313932469e-06, + "loss": 0.2676, + "step": 11229 + }, + { + "epoch": 0.5260692368951141, + "grad_norm": 0.6201553339298324, + "learning_rate": 4.3211383911310335e-06, + "loss": 0.2687, + "step": 11230 + }, + { + "epoch": 0.5261160818850424, + "grad_norm": 0.5594483864799087, + "learning_rate": 4.321008457849405e-06, + "loss": 0.2685, + "step": 11231 + }, + { + "epoch": 0.5261629268749707, + "grad_norm": 0.6383183110138034, + "learning_rate": 4.3208785140883305e-06, + "loss": 0.2985, + "step": 11232 + }, + { + "epoch": 0.526209771864899, + "grad_norm": 0.5857403563191396, + "learning_rate": 4.320748559848558e-06, + "loss": 0.2759, + "step": 11233 + }, + { + "epoch": 0.5262566168548274, + "grad_norm": 0.5675172091341794, + "learning_rate": 4.320618595130834e-06, + "loss": 0.2752, + "step": 11234 + }, + { + "epoch": 0.5263034618447557, + "grad_norm": 0.6036301509435799, + "learning_rate": 4.320488619935909e-06, + "loss": 0.2898, + "step": 11235 + }, + { + "epoch": 0.526350306834684, + "grad_norm": 0.6216328004540036, + "learning_rate": 4.320358634264527e-06, + "loss": 0.3174, + "step": 11236 + }, + { + "epoch": 0.5263971518246123, + "grad_norm": 0.5869446151371548, + "learning_rate": 4.320228638117442e-06, + "loss": 0.2699, + "step": 11237 + }, + { + "epoch": 0.5264439968145407, + "grad_norm": 0.6147176771643923, + "learning_rate": 4.320098631495397e-06, + "loss": 0.2904, + "step": 11238 + }, + { + "epoch": 0.526490841804469, + "grad_norm": 0.6007825143172656, + "learning_rate": 4.319968614399141e-06, + "loss": 0.2782, + "step": 11239 + }, + { + "epoch": 0.5265376867943974, + "grad_norm": 0.6142162129144018, + "learning_rate": 4.3198385868294245e-06, + "loss": 0.2869, + "step": 11240 + }, + { + "epoch": 0.5265845317843256, + "grad_norm": 0.5920078283446599, + "learning_rate": 4.319708548786994e-06, + "loss": 0.2793, + "step": 11241 + }, + { + "epoch": 0.526631376774254, + "grad_norm": 0.631644813626642, + "learning_rate": 4.319578500272598e-06, + "loss": 0.297, + "step": 11242 + }, + { + "epoch": 0.5266782217641823, + "grad_norm": 0.5863564964508207, + "learning_rate": 4.3194484412869845e-06, + "loss": 0.2681, + "step": 11243 + }, + { + "epoch": 0.5267250667541107, + "grad_norm": 0.5881293989218185, + "learning_rate": 4.319318371830903e-06, + "loss": 0.2806, + "step": 11244 + }, + { + "epoch": 0.5267719117440389, + "grad_norm": 0.6334908918928498, + "learning_rate": 4.319188291905102e-06, + "loss": 0.301, + "step": 11245 + }, + { + "epoch": 0.5268187567339673, + "grad_norm": 0.5798310257190257, + "learning_rate": 4.319058201510329e-06, + "loss": 0.278, + "step": 11246 + }, + { + "epoch": 0.5268656017238956, + "grad_norm": 0.6168057187331119, + "learning_rate": 4.318928100647334e-06, + "loss": 0.3083, + "step": 11247 + }, + { + "epoch": 0.526912446713824, + "grad_norm": 0.6030897978828261, + "learning_rate": 4.318797989316864e-06, + "loss": 0.3022, + "step": 11248 + }, + { + "epoch": 0.5269592917037523, + "grad_norm": 0.5954481073038266, + "learning_rate": 4.318667867519669e-06, + "loss": 0.2607, + "step": 11249 + }, + { + "epoch": 0.5270061366936806, + "grad_norm": 0.6878411164337394, + "learning_rate": 4.318537735256498e-06, + "loss": 0.305, + "step": 11250 + }, + { + "epoch": 0.5270529816836089, + "grad_norm": 0.5799120147129967, + "learning_rate": 4.318407592528099e-06, + "loss": 0.2881, + "step": 11251 + }, + { + "epoch": 0.5270998266735373, + "grad_norm": 0.57759969070608, + "learning_rate": 4.3182774393352216e-06, + "loss": 0.288, + "step": 11252 + }, + { + "epoch": 0.5271466716634656, + "grad_norm": 0.6182600571846496, + "learning_rate": 4.318147275678615e-06, + "loss": 0.2957, + "step": 11253 + }, + { + "epoch": 0.5271935166533939, + "grad_norm": 0.5664742971585459, + "learning_rate": 4.318017101559027e-06, + "loss": 0.291, + "step": 11254 + }, + { + "epoch": 0.5272403616433222, + "grad_norm": 0.6149664202969206, + "learning_rate": 4.317886916977208e-06, + "loss": 0.2702, + "step": 11255 + }, + { + "epoch": 0.5272872066332506, + "grad_norm": 0.5798044227280318, + "learning_rate": 4.317756721933906e-06, + "loss": 0.2899, + "step": 11256 + }, + { + "epoch": 0.5273340516231789, + "grad_norm": 0.5809952440872196, + "learning_rate": 4.317626516429871e-06, + "loss": 0.2753, + "step": 11257 + }, + { + "epoch": 0.5273808966131073, + "grad_norm": 0.6605740348512601, + "learning_rate": 4.317496300465852e-06, + "loss": 0.3084, + "step": 11258 + }, + { + "epoch": 0.5274277416030355, + "grad_norm": 0.5905848890735769, + "learning_rate": 4.3173660740426e-06, + "loss": 0.286, + "step": 11259 + }, + { + "epoch": 0.5274745865929639, + "grad_norm": 0.6168534964938415, + "learning_rate": 4.317235837160861e-06, + "loss": 0.2679, + "step": 11260 + }, + { + "epoch": 0.5275214315828922, + "grad_norm": 0.5833688190595254, + "learning_rate": 4.317105589821389e-06, + "loss": 0.2857, + "step": 11261 + }, + { + "epoch": 0.5275682765728206, + "grad_norm": 0.5750734849596388, + "learning_rate": 4.316975332024929e-06, + "loss": 0.2975, + "step": 11262 + }, + { + "epoch": 0.5276151215627488, + "grad_norm": 0.5863298911873385, + "learning_rate": 4.316845063772234e-06, + "loss": 0.2799, + "step": 11263 + }, + { + "epoch": 0.5276619665526772, + "grad_norm": 0.5739099167250009, + "learning_rate": 4.316714785064051e-06, + "loss": 0.2825, + "step": 11264 + }, + { + "epoch": 0.5277088115426055, + "grad_norm": 0.6024671289068025, + "learning_rate": 4.316584495901132e-06, + "loss": 0.2952, + "step": 11265 + }, + { + "epoch": 0.5277556565325339, + "grad_norm": 0.6502946608526564, + "learning_rate": 4.316454196284226e-06, + "loss": 0.3096, + "step": 11266 + }, + { + "epoch": 0.5278025015224622, + "grad_norm": 0.5932284549092569, + "learning_rate": 4.316323886214082e-06, + "loss": 0.2903, + "step": 11267 + }, + { + "epoch": 0.5278493465123905, + "grad_norm": 0.6058733365878423, + "learning_rate": 4.3161935656914505e-06, + "loss": 0.2969, + "step": 11268 + }, + { + "epoch": 0.5278961915023188, + "grad_norm": 0.5693324184715548, + "learning_rate": 4.3160632347170815e-06, + "loss": 0.276, + "step": 11269 + }, + { + "epoch": 0.5279430364922472, + "grad_norm": 0.5863394062353212, + "learning_rate": 4.315932893291725e-06, + "loss": 0.2758, + "step": 11270 + }, + { + "epoch": 0.5279898814821755, + "grad_norm": 0.5796132386413186, + "learning_rate": 4.315802541416132e-06, + "loss": 0.287, + "step": 11271 + }, + { + "epoch": 0.5280367264721038, + "grad_norm": 0.59047276855687, + "learning_rate": 4.315672179091051e-06, + "loss": 0.3062, + "step": 11272 + }, + { + "epoch": 0.5280835714620321, + "grad_norm": 0.5561638733427162, + "learning_rate": 4.315541806317233e-06, + "loss": 0.2733, + "step": 11273 + }, + { + "epoch": 0.5281304164519605, + "grad_norm": 0.5674715611833585, + "learning_rate": 4.315411423095428e-06, + "loss": 0.282, + "step": 11274 + }, + { + "epoch": 0.5281772614418888, + "grad_norm": 0.6164861356255212, + "learning_rate": 4.315281029426388e-06, + "loss": 0.2909, + "step": 11275 + }, + { + "epoch": 0.5282241064318172, + "grad_norm": 0.5950736059873349, + "learning_rate": 4.31515062531086e-06, + "loss": 0.2708, + "step": 11276 + }, + { + "epoch": 0.5282709514217454, + "grad_norm": 0.630554853225131, + "learning_rate": 4.3150202107495975e-06, + "loss": 0.3097, + "step": 11277 + }, + { + "epoch": 0.5283177964116738, + "grad_norm": 0.6079355272073889, + "learning_rate": 4.314889785743349e-06, + "loss": 0.2692, + "step": 11278 + }, + { + "epoch": 0.5283646414016021, + "grad_norm": 0.595154144836531, + "learning_rate": 4.314759350292867e-06, + "loss": 0.2831, + "step": 11279 + }, + { + "epoch": 0.5284114863915305, + "grad_norm": 0.5678957248874362, + "learning_rate": 4.3146289043989e-06, + "loss": 0.2952, + "step": 11280 + }, + { + "epoch": 0.5284583313814587, + "grad_norm": 0.5519357331360858, + "learning_rate": 4.314498448062201e-06, + "loss": 0.2664, + "step": 11281 + }, + { + "epoch": 0.528505176371387, + "grad_norm": 0.5926193374083015, + "learning_rate": 4.314367981283518e-06, + "loss": 0.2945, + "step": 11282 + }, + { + "epoch": 0.5285520213613154, + "grad_norm": 0.591284237567691, + "learning_rate": 4.314237504063604e-06, + "loss": 0.2841, + "step": 11283 + }, + { + "epoch": 0.5285988663512438, + "grad_norm": 0.6374228409006919, + "learning_rate": 4.31410701640321e-06, + "loss": 0.2905, + "step": 11284 + }, + { + "epoch": 0.5286457113411721, + "grad_norm": 0.5871945538309367, + "learning_rate": 4.313976518303085e-06, + "loss": 0.299, + "step": 11285 + }, + { + "epoch": 0.5286925563311003, + "grad_norm": 0.598894796201752, + "learning_rate": 4.313846009763981e-06, + "loss": 0.2783, + "step": 11286 + }, + { + "epoch": 0.5287394013210287, + "grad_norm": 0.5834824504076689, + "learning_rate": 4.3137154907866505e-06, + "loss": 0.2841, + "step": 11287 + }, + { + "epoch": 0.528786246310957, + "grad_norm": 0.5273800142393499, + "learning_rate": 4.313584961371842e-06, + "loss": 0.2706, + "step": 11288 + }, + { + "epoch": 0.5288330913008854, + "grad_norm": 0.6813690460009445, + "learning_rate": 4.313454421520308e-06, + "loss": 0.3243, + "step": 11289 + }, + { + "epoch": 0.5288799362908136, + "grad_norm": 0.5679117948897489, + "learning_rate": 4.3133238712328e-06, + "loss": 0.3041, + "step": 11290 + }, + { + "epoch": 0.528926781280742, + "grad_norm": 0.5991442388206435, + "learning_rate": 4.313193310510068e-06, + "loss": 0.3045, + "step": 11291 + }, + { + "epoch": 0.5289736262706703, + "grad_norm": 0.597682301375767, + "learning_rate": 4.3130627393528645e-06, + "loss": 0.2885, + "step": 11292 + }, + { + "epoch": 0.5290204712605987, + "grad_norm": 0.681208511137408, + "learning_rate": 4.312932157761941e-06, + "loss": 0.284, + "step": 11293 + }, + { + "epoch": 0.529067316250527, + "grad_norm": 0.6559497544659332, + "learning_rate": 4.312801565738049e-06, + "loss": 0.3062, + "step": 11294 + }, + { + "epoch": 0.5291141612404553, + "grad_norm": 0.6173524428851636, + "learning_rate": 4.312670963281938e-06, + "loss": 0.2968, + "step": 11295 + }, + { + "epoch": 0.5291610062303836, + "grad_norm": 0.6421693507982457, + "learning_rate": 4.312540350394363e-06, + "loss": 0.3255, + "step": 11296 + }, + { + "epoch": 0.529207851220312, + "grad_norm": 0.5859463821578861, + "learning_rate": 4.3124097270760735e-06, + "loss": 0.2735, + "step": 11297 + }, + { + "epoch": 0.5292546962102403, + "grad_norm": 0.6294564699898239, + "learning_rate": 4.312279093327822e-06, + "loss": 0.2977, + "step": 11298 + }, + { + "epoch": 0.5293015412001686, + "grad_norm": 0.5152388770414839, + "learning_rate": 4.312148449150358e-06, + "loss": 0.2449, + "step": 11299 + }, + { + "epoch": 0.5293483861900969, + "grad_norm": 0.544923304051618, + "learning_rate": 4.312017794544437e-06, + "loss": 0.2632, + "step": 11300 + }, + { + "epoch": 0.5293952311800253, + "grad_norm": 0.5929483532282955, + "learning_rate": 4.311887129510808e-06, + "loss": 0.273, + "step": 11301 + }, + { + "epoch": 0.5294420761699536, + "grad_norm": 0.5769037464221285, + "learning_rate": 4.311756454050224e-06, + "loss": 0.2851, + "step": 11302 + }, + { + "epoch": 0.529488921159882, + "grad_norm": 0.6265418888505405, + "learning_rate": 4.3116257681634365e-06, + "loss": 0.3065, + "step": 11303 + }, + { + "epoch": 0.5295357661498102, + "grad_norm": 0.5616621223125908, + "learning_rate": 4.311495071851199e-06, + "loss": 0.2754, + "step": 11304 + }, + { + "epoch": 0.5295826111397386, + "grad_norm": 0.5452020167515149, + "learning_rate": 4.311364365114262e-06, + "loss": 0.29, + "step": 11305 + }, + { + "epoch": 0.5296294561296669, + "grad_norm": 0.6489677081350045, + "learning_rate": 4.311233647953379e-06, + "loss": 0.2887, + "step": 11306 + }, + { + "epoch": 0.5296763011195953, + "grad_norm": 0.6286663034973345, + "learning_rate": 4.311102920369301e-06, + "loss": 0.2895, + "step": 11307 + }, + { + "epoch": 0.5297231461095235, + "grad_norm": 0.5997464492256149, + "learning_rate": 4.310972182362782e-06, + "loss": 0.2732, + "step": 11308 + }, + { + "epoch": 0.5297699910994519, + "grad_norm": 0.590705892782721, + "learning_rate": 4.310841433934572e-06, + "loss": 0.2807, + "step": 11309 + }, + { + "epoch": 0.5298168360893802, + "grad_norm": 0.6138295205364634, + "learning_rate": 4.310710675085426e-06, + "loss": 0.2987, + "step": 11310 + }, + { + "epoch": 0.5298636810793086, + "grad_norm": 0.651364695086753, + "learning_rate": 4.3105799058160935e-06, + "loss": 0.2844, + "step": 11311 + }, + { + "epoch": 0.5299105260692369, + "grad_norm": 0.6698509901924303, + "learning_rate": 4.310449126127329e-06, + "loss": 0.2973, + "step": 11312 + }, + { + "epoch": 0.5299573710591652, + "grad_norm": 0.6399057561211476, + "learning_rate": 4.310318336019886e-06, + "loss": 0.2929, + "step": 11313 + }, + { + "epoch": 0.5300042160490935, + "grad_norm": 0.6529761209348478, + "learning_rate": 4.310187535494516e-06, + "loss": 0.3297, + "step": 11314 + }, + { + "epoch": 0.5300510610390219, + "grad_norm": 0.617429270526789, + "learning_rate": 4.3100567245519705e-06, + "loss": 0.2932, + "step": 11315 + }, + { + "epoch": 0.5300979060289502, + "grad_norm": 0.6242095200479084, + "learning_rate": 4.309925903193004e-06, + "loss": 0.3067, + "step": 11316 + }, + { + "epoch": 0.5301447510188785, + "grad_norm": 0.6125739640662836, + "learning_rate": 4.309795071418369e-06, + "loss": 0.3273, + "step": 11317 + }, + { + "epoch": 0.5301915960088068, + "grad_norm": 0.5629428787014248, + "learning_rate": 4.3096642292288185e-06, + "loss": 0.2637, + "step": 11318 + }, + { + "epoch": 0.5302384409987352, + "grad_norm": 0.6172769463068702, + "learning_rate": 4.309533376625104e-06, + "loss": 0.2803, + "step": 11319 + }, + { + "epoch": 0.5302852859886635, + "grad_norm": 0.5969420061384545, + "learning_rate": 4.309402513607981e-06, + "loss": 0.2748, + "step": 11320 + }, + { + "epoch": 0.5303321309785919, + "grad_norm": 0.5508704447143699, + "learning_rate": 4.3092716401782005e-06, + "loss": 0.2704, + "step": 11321 + }, + { + "epoch": 0.5303789759685201, + "grad_norm": 0.5886030200484023, + "learning_rate": 4.309140756336518e-06, + "loss": 0.2703, + "step": 11322 + }, + { + "epoch": 0.5304258209584485, + "grad_norm": 0.6064156060448119, + "learning_rate": 4.309009862083683e-06, + "loss": 0.2899, + "step": 11323 + }, + { + "epoch": 0.5304726659483768, + "grad_norm": 0.6384749924393588, + "learning_rate": 4.308878957420451e-06, + "loss": 0.2819, + "step": 11324 + }, + { + "epoch": 0.5305195109383052, + "grad_norm": 0.5842682405137487, + "learning_rate": 4.308748042347577e-06, + "loss": 0.2753, + "step": 11325 + }, + { + "epoch": 0.5305663559282334, + "grad_norm": 0.5809090353012908, + "learning_rate": 4.308617116865812e-06, + "loss": 0.2847, + "step": 11326 + }, + { + "epoch": 0.5306132009181618, + "grad_norm": 0.6387561197445051, + "learning_rate": 4.30848618097591e-06, + "loss": 0.3133, + "step": 11327 + }, + { + "epoch": 0.5306600459080901, + "grad_norm": 0.5906275526272372, + "learning_rate": 4.308355234678624e-06, + "loss": 0.2677, + "step": 11328 + }, + { + "epoch": 0.5307068908980185, + "grad_norm": 0.598615985664105, + "learning_rate": 4.308224277974708e-06, + "loss": 0.2854, + "step": 11329 + }, + { + "epoch": 0.5307537358879468, + "grad_norm": 0.557484838110234, + "learning_rate": 4.308093310864917e-06, + "loss": 0.2742, + "step": 11330 + }, + { + "epoch": 0.5308005808778751, + "grad_norm": 0.6424123296001039, + "learning_rate": 4.307962333350002e-06, + "loss": 0.2794, + "step": 11331 + }, + { + "epoch": 0.5308474258678034, + "grad_norm": 0.5802314721915315, + "learning_rate": 4.307831345430719e-06, + "loss": 0.284, + "step": 11332 + }, + { + "epoch": 0.5308942708577318, + "grad_norm": 0.5907501667427894, + "learning_rate": 4.307700347107821e-06, + "loss": 0.2879, + "step": 11333 + }, + { + "epoch": 0.5309411158476601, + "grad_norm": 0.6365903717184603, + "learning_rate": 4.307569338382061e-06, + "loss": 0.3036, + "step": 11334 + }, + { + "epoch": 0.5309879608375884, + "grad_norm": 0.5895898880103324, + "learning_rate": 4.307438319254196e-06, + "loss": 0.2851, + "step": 11335 + }, + { + "epoch": 0.5310348058275167, + "grad_norm": 0.545559824246059, + "learning_rate": 4.307307289724975e-06, + "loss": 0.2673, + "step": 11336 + }, + { + "epoch": 0.5310816508174451, + "grad_norm": 0.5711252520976919, + "learning_rate": 4.307176249795157e-06, + "loss": 0.2937, + "step": 11337 + }, + { + "epoch": 0.5311284958073734, + "grad_norm": 0.5825139148466763, + "learning_rate": 4.307045199465493e-06, + "loss": 0.2854, + "step": 11338 + }, + { + "epoch": 0.5311753407973018, + "grad_norm": 0.6422298178226802, + "learning_rate": 4.306914138736738e-06, + "loss": 0.311, + "step": 11339 + }, + { + "epoch": 0.53122218578723, + "grad_norm": 0.5941619283233918, + "learning_rate": 4.3067830676096465e-06, + "loss": 0.3009, + "step": 11340 + }, + { + "epoch": 0.5312690307771584, + "grad_norm": 0.6119972817080275, + "learning_rate": 4.306651986084972e-06, + "loss": 0.3009, + "step": 11341 + }, + { + "epoch": 0.5313158757670867, + "grad_norm": 0.5952035176236423, + "learning_rate": 4.30652089416347e-06, + "loss": 0.2796, + "step": 11342 + }, + { + "epoch": 0.5313627207570151, + "grad_norm": 0.5942250182666166, + "learning_rate": 4.306389791845894e-06, + "loss": 0.2807, + "step": 11343 + }, + { + "epoch": 0.5314095657469433, + "grad_norm": 0.6226474085791369, + "learning_rate": 4.306258679132999e-06, + "loss": 0.3149, + "step": 11344 + }, + { + "epoch": 0.5314564107368717, + "grad_norm": 0.6313319787149545, + "learning_rate": 4.306127556025539e-06, + "loss": 0.3284, + "step": 11345 + }, + { + "epoch": 0.5315032557268, + "grad_norm": 0.6048092066473686, + "learning_rate": 4.305996422524269e-06, + "loss": 0.2934, + "step": 11346 + }, + { + "epoch": 0.5315501007167284, + "grad_norm": 0.5794844100581438, + "learning_rate": 4.305865278629944e-06, + "loss": 0.2873, + "step": 11347 + }, + { + "epoch": 0.5315969457066567, + "grad_norm": 0.5960257328663404, + "learning_rate": 4.305734124343317e-06, + "loss": 0.2882, + "step": 11348 + }, + { + "epoch": 0.531643790696585, + "grad_norm": 0.647168890944848, + "learning_rate": 4.305602959665145e-06, + "loss": 0.2977, + "step": 11349 + }, + { + "epoch": 0.5316906356865133, + "grad_norm": 0.5960008705362227, + "learning_rate": 4.3054717845961814e-06, + "loss": 0.2945, + "step": 11350 + }, + { + "epoch": 0.5317374806764417, + "grad_norm": 0.5955119136425254, + "learning_rate": 4.305340599137183e-06, + "loss": 0.2996, + "step": 11351 + }, + { + "epoch": 0.53178432566637, + "grad_norm": 0.6328018669373507, + "learning_rate": 4.305209403288901e-06, + "loss": 0.2926, + "step": 11352 + }, + { + "epoch": 0.5318311706562983, + "grad_norm": 0.579061497139973, + "learning_rate": 4.305078197052093e-06, + "loss": 0.2854, + "step": 11353 + }, + { + "epoch": 0.5318780156462266, + "grad_norm": 0.6414871302962672, + "learning_rate": 4.304946980427514e-06, + "loss": 0.2941, + "step": 11354 + }, + { + "epoch": 0.531924860636155, + "grad_norm": 0.5841405817926082, + "learning_rate": 4.304815753415918e-06, + "loss": 0.299, + "step": 11355 + }, + { + "epoch": 0.5319717056260833, + "grad_norm": 0.5717091134018313, + "learning_rate": 4.304684516018063e-06, + "loss": 0.2649, + "step": 11356 + }, + { + "epoch": 0.5320185506160117, + "grad_norm": 0.6679978284192308, + "learning_rate": 4.304553268234701e-06, + "loss": 0.2904, + "step": 11357 + }, + { + "epoch": 0.5320653956059399, + "grad_norm": 0.6165100263585362, + "learning_rate": 4.304422010066588e-06, + "loss": 0.2834, + "step": 11358 + }, + { + "epoch": 0.5321122405958683, + "grad_norm": 0.6390544283089918, + "learning_rate": 4.304290741514481e-06, + "loss": 0.2868, + "step": 11359 + }, + { + "epoch": 0.5321590855857966, + "grad_norm": 0.5575798229983181, + "learning_rate": 4.3041594625791324e-06, + "loss": 0.2789, + "step": 11360 + }, + { + "epoch": 0.532205930575725, + "grad_norm": 0.5619187225031966, + "learning_rate": 4.3040281732613014e-06, + "loss": 0.2704, + "step": 11361 + }, + { + "epoch": 0.5322527755656532, + "grad_norm": 0.6686157859514064, + "learning_rate": 4.303896873561742e-06, + "loss": 0.3192, + "step": 11362 + }, + { + "epoch": 0.5322996205555816, + "grad_norm": 0.5738412651372552, + "learning_rate": 4.303765563481208e-06, + "loss": 0.2714, + "step": 11363 + }, + { + "epoch": 0.5323464655455099, + "grad_norm": 0.6262998120145054, + "learning_rate": 4.303634243020457e-06, + "loss": 0.2751, + "step": 11364 + }, + { + "epoch": 0.5323933105354383, + "grad_norm": 0.6471801620716114, + "learning_rate": 4.303502912180244e-06, + "loss": 0.2997, + "step": 11365 + }, + { + "epoch": 0.5324401555253666, + "grad_norm": 0.5369364033447693, + "learning_rate": 4.303371570961326e-06, + "loss": 0.2615, + "step": 11366 + }, + { + "epoch": 0.5324870005152949, + "grad_norm": 0.6262140235778382, + "learning_rate": 4.303240219364457e-06, + "loss": 0.3104, + "step": 11367 + }, + { + "epoch": 0.5325338455052232, + "grad_norm": 0.6349285573170685, + "learning_rate": 4.303108857390394e-06, + "loss": 0.2793, + "step": 11368 + }, + { + "epoch": 0.5325806904951516, + "grad_norm": 0.5746929341024775, + "learning_rate": 4.302977485039892e-06, + "loss": 0.2753, + "step": 11369 + }, + { + "epoch": 0.5326275354850799, + "grad_norm": 0.6054583322694195, + "learning_rate": 4.302846102313709e-06, + "loss": 0.2875, + "step": 11370 + }, + { + "epoch": 0.5326743804750081, + "grad_norm": 0.586565608167303, + "learning_rate": 4.3027147092125996e-06, + "loss": 0.2754, + "step": 11371 + }, + { + "epoch": 0.5327212254649365, + "grad_norm": 0.6174172461843961, + "learning_rate": 4.302583305737319e-06, + "loss": 0.2781, + "step": 11372 + }, + { + "epoch": 0.5327680704548649, + "grad_norm": 0.6080426490652256, + "learning_rate": 4.302451891888626e-06, + "loss": 0.2786, + "step": 11373 + }, + { + "epoch": 0.5328149154447932, + "grad_norm": 0.5838824203510917, + "learning_rate": 4.302320467667275e-06, + "loss": 0.2708, + "step": 11374 + }, + { + "epoch": 0.5328617604347216, + "grad_norm": 0.6071208466532764, + "learning_rate": 4.302189033074023e-06, + "loss": 0.2641, + "step": 11375 + }, + { + "epoch": 0.5329086054246498, + "grad_norm": 0.5756736186899585, + "learning_rate": 4.302057588109625e-06, + "loss": 0.2911, + "step": 11376 + }, + { + "epoch": 0.5329554504145781, + "grad_norm": 0.6064188407641148, + "learning_rate": 4.301926132774838e-06, + "loss": 0.2807, + "step": 11377 + }, + { + "epoch": 0.5330022954045065, + "grad_norm": 0.6035302053756196, + "learning_rate": 4.301794667070421e-06, + "loss": 0.2818, + "step": 11378 + }, + { + "epoch": 0.5330491403944349, + "grad_norm": 0.5979021196651829, + "learning_rate": 4.3016631909971275e-06, + "loss": 0.2939, + "step": 11379 + }, + { + "epoch": 0.5330959853843631, + "grad_norm": 0.6358903423730443, + "learning_rate": 4.3015317045557155e-06, + "loss": 0.284, + "step": 11380 + }, + { + "epoch": 0.5331428303742914, + "grad_norm": 0.5946920293253163, + "learning_rate": 4.30140020774694e-06, + "loss": 0.3123, + "step": 11381 + }, + { + "epoch": 0.5331896753642198, + "grad_norm": 0.6693066333608908, + "learning_rate": 4.301268700571561e-06, + "loss": 0.2853, + "step": 11382 + }, + { + "epoch": 0.5332365203541481, + "grad_norm": 0.6147190590456739, + "learning_rate": 4.301137183030333e-06, + "loss": 0.2943, + "step": 11383 + }, + { + "epoch": 0.5332833653440765, + "grad_norm": 0.6255615472506166, + "learning_rate": 4.301005655124012e-06, + "loss": 0.2932, + "step": 11384 + }, + { + "epoch": 0.5333302103340047, + "grad_norm": 0.5203075726302592, + "learning_rate": 4.300874116853357e-06, + "loss": 0.2727, + "step": 11385 + }, + { + "epoch": 0.5333770553239331, + "grad_norm": 0.5917447285733448, + "learning_rate": 4.300742568219124e-06, + "loss": 0.2915, + "step": 11386 + }, + { + "epoch": 0.5334239003138614, + "grad_norm": 0.5896488406822133, + "learning_rate": 4.30061100922207e-06, + "loss": 0.2872, + "step": 11387 + }, + { + "epoch": 0.5334707453037898, + "grad_norm": 0.5963711262638348, + "learning_rate": 4.300479439862952e-06, + "loss": 0.2769, + "step": 11388 + }, + { + "epoch": 0.533517590293718, + "grad_norm": 0.6094926614787939, + "learning_rate": 4.300347860142528e-06, + "loss": 0.2948, + "step": 11389 + }, + { + "epoch": 0.5335644352836464, + "grad_norm": 0.6528320373098491, + "learning_rate": 4.300216270061555e-06, + "loss": 0.3162, + "step": 11390 + }, + { + "epoch": 0.5336112802735747, + "grad_norm": 0.642943681659117, + "learning_rate": 4.300084669620789e-06, + "loss": 0.2954, + "step": 11391 + }, + { + "epoch": 0.5336581252635031, + "grad_norm": 0.5808185198922908, + "learning_rate": 4.299953058820988e-06, + "loss": 0.263, + "step": 11392 + }, + { + "epoch": 0.5337049702534314, + "grad_norm": 0.615431619049176, + "learning_rate": 4.29982143766291e-06, + "loss": 0.2939, + "step": 11393 + }, + { + "epoch": 0.5337518152433597, + "grad_norm": 0.5927510188542722, + "learning_rate": 4.299689806147311e-06, + "loss": 0.2672, + "step": 11394 + }, + { + "epoch": 0.533798660233288, + "grad_norm": 0.664898973359716, + "learning_rate": 4.299558164274951e-06, + "loss": 0.3019, + "step": 11395 + }, + { + "epoch": 0.5338455052232164, + "grad_norm": 0.5527678024915794, + "learning_rate": 4.299426512046585e-06, + "loss": 0.2734, + "step": 11396 + }, + { + "epoch": 0.5338923502131447, + "grad_norm": 0.5754837461450508, + "learning_rate": 4.299294849462971e-06, + "loss": 0.316, + "step": 11397 + }, + { + "epoch": 0.533939195203073, + "grad_norm": 0.5991640153352094, + "learning_rate": 4.299163176524869e-06, + "loss": 0.3017, + "step": 11398 + }, + { + "epoch": 0.5339860401930013, + "grad_norm": 0.5812850698900138, + "learning_rate": 4.2990314932330345e-06, + "loss": 0.3057, + "step": 11399 + }, + { + "epoch": 0.5340328851829297, + "grad_norm": 0.6103429303564384, + "learning_rate": 4.298899799588226e-06, + "loss": 0.2992, + "step": 11400 + }, + { + "epoch": 0.534079730172858, + "grad_norm": 0.590840119339859, + "learning_rate": 4.298768095591201e-06, + "loss": 0.2815, + "step": 11401 + }, + { + "epoch": 0.5341265751627864, + "grad_norm": 0.6314740868810684, + "learning_rate": 4.298636381242718e-06, + "loss": 0.2792, + "step": 11402 + }, + { + "epoch": 0.5341734201527146, + "grad_norm": 0.6311704389315523, + "learning_rate": 4.2985046565435355e-06, + "loss": 0.2732, + "step": 11403 + }, + { + "epoch": 0.534220265142643, + "grad_norm": 0.6081662694162839, + "learning_rate": 4.2983729214944095e-06, + "loss": 0.2876, + "step": 11404 + }, + { + "epoch": 0.5342671101325713, + "grad_norm": 0.6277221496897568, + "learning_rate": 4.298241176096099e-06, + "loss": 0.2982, + "step": 11405 + }, + { + "epoch": 0.5343139551224997, + "grad_norm": 0.6026631451208251, + "learning_rate": 4.298109420349363e-06, + "loss": 0.3, + "step": 11406 + }, + { + "epoch": 0.5343608001124279, + "grad_norm": 0.5683708294884219, + "learning_rate": 4.29797765425496e-06, + "loss": 0.2865, + "step": 11407 + }, + { + "epoch": 0.5344076451023563, + "grad_norm": 0.5905948486901923, + "learning_rate": 4.297845877813646e-06, + "loss": 0.2985, + "step": 11408 + }, + { + "epoch": 0.5344544900922846, + "grad_norm": 0.5762373688665717, + "learning_rate": 4.2977140910261826e-06, + "loss": 0.2745, + "step": 11409 + }, + { + "epoch": 0.534501335082213, + "grad_norm": 0.5722237515607155, + "learning_rate": 4.297582293893326e-06, + "loss": 0.2756, + "step": 11410 + }, + { + "epoch": 0.5345481800721413, + "grad_norm": 0.6526841617303434, + "learning_rate": 4.297450486415834e-06, + "loss": 0.2967, + "step": 11411 + }, + { + "epoch": 0.5345950250620696, + "grad_norm": 0.5427804162082527, + "learning_rate": 4.297318668594468e-06, + "loss": 0.2949, + "step": 11412 + }, + { + "epoch": 0.5346418700519979, + "grad_norm": 0.5722303990959109, + "learning_rate": 4.297186840429983e-06, + "loss": 0.2919, + "step": 11413 + }, + { + "epoch": 0.5346887150419263, + "grad_norm": 0.5773523162133947, + "learning_rate": 4.29705500192314e-06, + "loss": 0.285, + "step": 11414 + }, + { + "epoch": 0.5347355600318546, + "grad_norm": 0.6070491529117119, + "learning_rate": 4.296923153074698e-06, + "loss": 0.302, + "step": 11415 + }, + { + "epoch": 0.5347824050217829, + "grad_norm": 0.5588203498181832, + "learning_rate": 4.296791293885414e-06, + "loss": 0.2784, + "step": 11416 + }, + { + "epoch": 0.5348292500117112, + "grad_norm": 0.5839921050221059, + "learning_rate": 4.296659424356048e-06, + "loss": 0.2854, + "step": 11417 + }, + { + "epoch": 0.5348760950016396, + "grad_norm": 0.6168012311534857, + "learning_rate": 4.2965275444873584e-06, + "loss": 0.2863, + "step": 11418 + }, + { + "epoch": 0.5349229399915679, + "grad_norm": 0.5801850543649936, + "learning_rate": 4.296395654280104e-06, + "loss": 0.2793, + "step": 11419 + }, + { + "epoch": 0.5349697849814963, + "grad_norm": 0.6182499600707058, + "learning_rate": 4.296263753735045e-06, + "loss": 0.2718, + "step": 11420 + }, + { + "epoch": 0.5350166299714245, + "grad_norm": 0.6243879446811565, + "learning_rate": 4.296131842852939e-06, + "loss": 0.2909, + "step": 11421 + }, + { + "epoch": 0.5350634749613529, + "grad_norm": 0.5757987869142331, + "learning_rate": 4.295999921634546e-06, + "loss": 0.2801, + "step": 11422 + }, + { + "epoch": 0.5351103199512812, + "grad_norm": 0.5692357928830372, + "learning_rate": 4.295867990080625e-06, + "loss": 0.2939, + "step": 11423 + }, + { + "epoch": 0.5351571649412096, + "grad_norm": 0.5674700297555985, + "learning_rate": 4.295736048191935e-06, + "loss": 0.285, + "step": 11424 + }, + { + "epoch": 0.5352040099311378, + "grad_norm": 0.591316285463412, + "learning_rate": 4.2956040959692345e-06, + "loss": 0.2883, + "step": 11425 + }, + { + "epoch": 0.5352508549210662, + "grad_norm": 0.6454652648590282, + "learning_rate": 4.2954721334132846e-06, + "loss": 0.3077, + "step": 11426 + }, + { + "epoch": 0.5352976999109945, + "grad_norm": 0.5417235844340065, + "learning_rate": 4.295340160524844e-06, + "loss": 0.2654, + "step": 11427 + }, + { + "epoch": 0.5353445449009229, + "grad_norm": 0.5915079356474181, + "learning_rate": 4.295208177304671e-06, + "loss": 0.3001, + "step": 11428 + }, + { + "epoch": 0.5353913898908512, + "grad_norm": 0.6403852993567286, + "learning_rate": 4.295076183753527e-06, + "loss": 0.3031, + "step": 11429 + }, + { + "epoch": 0.5354382348807795, + "grad_norm": 0.6552660025924434, + "learning_rate": 4.29494417987217e-06, + "loss": 0.3022, + "step": 11430 + }, + { + "epoch": 0.5354850798707078, + "grad_norm": 0.5988164511890456, + "learning_rate": 4.2948121656613616e-06, + "loss": 0.2849, + "step": 11431 + }, + { + "epoch": 0.5355319248606362, + "grad_norm": 0.6161937299309677, + "learning_rate": 4.29468014112186e-06, + "loss": 0.2871, + "step": 11432 + }, + { + "epoch": 0.5355787698505645, + "grad_norm": 0.5807248283113529, + "learning_rate": 4.294548106254425e-06, + "loss": 0.2719, + "step": 11433 + }, + { + "epoch": 0.5356256148404928, + "grad_norm": 0.5693811701152013, + "learning_rate": 4.294416061059816e-06, + "loss": 0.2705, + "step": 11434 + }, + { + "epoch": 0.5356724598304211, + "grad_norm": 0.556881839419766, + "learning_rate": 4.294284005538794e-06, + "loss": 0.2556, + "step": 11435 + }, + { + "epoch": 0.5357193048203495, + "grad_norm": 0.6079962257934488, + "learning_rate": 4.294151939692119e-06, + "loss": 0.3036, + "step": 11436 + }, + { + "epoch": 0.5357661498102778, + "grad_norm": 0.5723042194991153, + "learning_rate": 4.29401986352055e-06, + "loss": 0.259, + "step": 11437 + }, + { + "epoch": 0.5358129948002062, + "grad_norm": 0.6024039693985693, + "learning_rate": 4.293887777024847e-06, + "loss": 0.2766, + "step": 11438 + }, + { + "epoch": 0.5358598397901344, + "grad_norm": 0.6769600845545584, + "learning_rate": 4.293755680205772e-06, + "loss": 0.2815, + "step": 11439 + }, + { + "epoch": 0.5359066847800628, + "grad_norm": 0.5739217586999655, + "learning_rate": 4.293623573064084e-06, + "loss": 0.2958, + "step": 11440 + }, + { + "epoch": 0.5359535297699911, + "grad_norm": 0.5631734412148228, + "learning_rate": 4.293491455600543e-06, + "loss": 0.2926, + "step": 11441 + }, + { + "epoch": 0.5360003747599195, + "grad_norm": 0.606045062940084, + "learning_rate": 4.2933593278159085e-06, + "loss": 0.2996, + "step": 11442 + }, + { + "epoch": 0.5360472197498477, + "grad_norm": 0.6007225520432381, + "learning_rate": 4.293227189710942e-06, + "loss": 0.2615, + "step": 11443 + }, + { + "epoch": 0.536094064739776, + "grad_norm": 0.6133004338370007, + "learning_rate": 4.2930950412864044e-06, + "loss": 0.3138, + "step": 11444 + }, + { + "epoch": 0.5361409097297044, + "grad_norm": 0.6033737614202144, + "learning_rate": 4.292962882543055e-06, + "loss": 0.3063, + "step": 11445 + }, + { + "epoch": 0.5361877547196328, + "grad_norm": 0.5092415196230977, + "learning_rate": 4.292830713481655e-06, + "loss": 0.2798, + "step": 11446 + }, + { + "epoch": 0.5362345997095611, + "grad_norm": 0.5510971936804995, + "learning_rate": 4.292698534102965e-06, + "loss": 0.2896, + "step": 11447 + }, + { + "epoch": 0.5362814446994894, + "grad_norm": 0.594340268868751, + "learning_rate": 4.292566344407746e-06, + "loss": 0.3069, + "step": 11448 + }, + { + "epoch": 0.5363282896894177, + "grad_norm": 0.5757408765324636, + "learning_rate": 4.292434144396757e-06, + "loss": 0.3017, + "step": 11449 + }, + { + "epoch": 0.536375134679346, + "grad_norm": 0.6146379764977888, + "learning_rate": 4.29230193407076e-06, + "loss": 0.2786, + "step": 11450 + }, + { + "epoch": 0.5364219796692744, + "grad_norm": 0.546200769247726, + "learning_rate": 4.292169713430517e-06, + "loss": 0.261, + "step": 11451 + }, + { + "epoch": 0.5364688246592026, + "grad_norm": 0.586574068544976, + "learning_rate": 4.292037482476787e-06, + "loss": 0.2991, + "step": 11452 + }, + { + "epoch": 0.536515669649131, + "grad_norm": 0.5614292447746732, + "learning_rate": 4.291905241210332e-06, + "loss": 0.2663, + "step": 11453 + }, + { + "epoch": 0.5365625146390594, + "grad_norm": 0.6295039815660636, + "learning_rate": 4.291772989631913e-06, + "loss": 0.3026, + "step": 11454 + }, + { + "epoch": 0.5366093596289877, + "grad_norm": 0.5477857819132934, + "learning_rate": 4.29164072774229e-06, + "loss": 0.2847, + "step": 11455 + }, + { + "epoch": 0.536656204618916, + "grad_norm": 0.6062640653359281, + "learning_rate": 4.291508455542226e-06, + "loss": 0.2855, + "step": 11456 + }, + { + "epoch": 0.5367030496088443, + "grad_norm": 0.5861619877239231, + "learning_rate": 4.291376173032481e-06, + "loss": 0.2935, + "step": 11457 + }, + { + "epoch": 0.5367498945987726, + "grad_norm": 0.5536828077342258, + "learning_rate": 4.291243880213815e-06, + "loss": 0.2825, + "step": 11458 + }, + { + "epoch": 0.536796739588701, + "grad_norm": 0.6299350405868566, + "learning_rate": 4.291111577086992e-06, + "loss": 0.2916, + "step": 11459 + }, + { + "epoch": 0.5368435845786294, + "grad_norm": 0.5655474799057287, + "learning_rate": 4.290979263652772e-06, + "loss": 0.2637, + "step": 11460 + }, + { + "epoch": 0.5368904295685576, + "grad_norm": 0.5628761374877658, + "learning_rate": 4.2908469399119154e-06, + "loss": 0.2827, + "step": 11461 + }, + { + "epoch": 0.5369372745584859, + "grad_norm": 0.5851804077161303, + "learning_rate": 4.290714605865186e-06, + "loss": 0.2601, + "step": 11462 + }, + { + "epoch": 0.5369841195484143, + "grad_norm": 0.5974831649068771, + "learning_rate": 4.290582261513343e-06, + "loss": 0.3087, + "step": 11463 + }, + { + "epoch": 0.5370309645383426, + "grad_norm": 0.5985595191373599, + "learning_rate": 4.29044990685715e-06, + "loss": 0.3054, + "step": 11464 + }, + { + "epoch": 0.537077809528271, + "grad_norm": 0.6105673015798393, + "learning_rate": 4.290317541897367e-06, + "loss": 0.2775, + "step": 11465 + }, + { + "epoch": 0.5371246545181992, + "grad_norm": 0.5649808626608448, + "learning_rate": 4.290185166634757e-06, + "loss": 0.2855, + "step": 11466 + }, + { + "epoch": 0.5371714995081276, + "grad_norm": 0.6316254240370165, + "learning_rate": 4.290052781070082e-06, + "loss": 0.2927, + "step": 11467 + }, + { + "epoch": 0.5372183444980559, + "grad_norm": 0.6106454600664013, + "learning_rate": 4.289920385204103e-06, + "loss": 0.2922, + "step": 11468 + }, + { + "epoch": 0.5372651894879843, + "grad_norm": 0.5786839083182875, + "learning_rate": 4.289787979037581e-06, + "loss": 0.2824, + "step": 11469 + }, + { + "epoch": 0.5373120344779125, + "grad_norm": 0.581232652959623, + "learning_rate": 4.289655562571279e-06, + "loss": 0.291, + "step": 11470 + }, + { + "epoch": 0.5373588794678409, + "grad_norm": 0.6136813997609598, + "learning_rate": 4.28952313580596e-06, + "loss": 0.2907, + "step": 11471 + }, + { + "epoch": 0.5374057244577692, + "grad_norm": 0.5809089249753481, + "learning_rate": 4.289390698742385e-06, + "loss": 0.2763, + "step": 11472 + }, + { + "epoch": 0.5374525694476976, + "grad_norm": 0.618053708246491, + "learning_rate": 4.2892582513813165e-06, + "loss": 0.3057, + "step": 11473 + }, + { + "epoch": 0.5374994144376259, + "grad_norm": 0.5855249879958725, + "learning_rate": 4.289125793723515e-06, + "loss": 0.2951, + "step": 11474 + }, + { + "epoch": 0.5375462594275542, + "grad_norm": 0.5914263412181038, + "learning_rate": 4.288993325769746e-06, + "loss": 0.2863, + "step": 11475 + }, + { + "epoch": 0.5375931044174825, + "grad_norm": 0.5767450407570393, + "learning_rate": 4.288860847520769e-06, + "loss": 0.2675, + "step": 11476 + }, + { + "epoch": 0.5376399494074109, + "grad_norm": 0.5862724533362904, + "learning_rate": 4.288728358977348e-06, + "loss": 0.2875, + "step": 11477 + }, + { + "epoch": 0.5376867943973392, + "grad_norm": 0.5821075129156134, + "learning_rate": 4.288595860140245e-06, + "loss": 0.2894, + "step": 11478 + }, + { + "epoch": 0.5377336393872675, + "grad_norm": 0.6202794147169427, + "learning_rate": 4.288463351010221e-06, + "loss": 0.2981, + "step": 11479 + }, + { + "epoch": 0.5377804843771958, + "grad_norm": 0.6112564003429687, + "learning_rate": 4.288330831588041e-06, + "loss": 0.2999, + "step": 11480 + }, + { + "epoch": 0.5378273293671242, + "grad_norm": 0.564009361911459, + "learning_rate": 4.288198301874467e-06, + "loss": 0.2734, + "step": 11481 + }, + { + "epoch": 0.5378741743570525, + "grad_norm": 0.605440395193702, + "learning_rate": 4.2880657618702606e-06, + "loss": 0.2945, + "step": 11482 + }, + { + "epoch": 0.5379210193469809, + "grad_norm": 0.5703863192354761, + "learning_rate": 4.287933211576185e-06, + "loss": 0.2815, + "step": 11483 + }, + { + "epoch": 0.5379678643369091, + "grad_norm": 0.5383277732750498, + "learning_rate": 4.287800650993003e-06, + "loss": 0.2616, + "step": 11484 + }, + { + "epoch": 0.5380147093268375, + "grad_norm": 0.5651702681633196, + "learning_rate": 4.287668080121479e-06, + "loss": 0.3093, + "step": 11485 + }, + { + "epoch": 0.5380615543167658, + "grad_norm": 0.5747009614798562, + "learning_rate": 4.287535498962373e-06, + "loss": 0.2734, + "step": 11486 + }, + { + "epoch": 0.5381083993066942, + "grad_norm": 0.6259219917393574, + "learning_rate": 4.287402907516451e-06, + "loss": 0.2977, + "step": 11487 + }, + { + "epoch": 0.5381552442966224, + "grad_norm": 0.6291945730889474, + "learning_rate": 4.2872703057844726e-06, + "loss": 0.303, + "step": 11488 + }, + { + "epoch": 0.5382020892865508, + "grad_norm": 0.5612063089588164, + "learning_rate": 4.287137693767204e-06, + "loss": 0.2655, + "step": 11489 + }, + { + "epoch": 0.5382489342764791, + "grad_norm": 0.5907288209118201, + "learning_rate": 4.2870050714654066e-06, + "loss": 0.2837, + "step": 11490 + }, + { + "epoch": 0.5382957792664075, + "grad_norm": 0.6164353784726613, + "learning_rate": 4.286872438879844e-06, + "loss": 0.2839, + "step": 11491 + }, + { + "epoch": 0.5383426242563358, + "grad_norm": 0.6088708603735482, + "learning_rate": 4.286739796011281e-06, + "loss": 0.282, + "step": 11492 + }, + { + "epoch": 0.5383894692462641, + "grad_norm": 0.6663561841481135, + "learning_rate": 4.286607142860478e-06, + "loss": 0.3101, + "step": 11493 + }, + { + "epoch": 0.5384363142361924, + "grad_norm": 0.5727227702745593, + "learning_rate": 4.286474479428201e-06, + "loss": 0.274, + "step": 11494 + }, + { + "epoch": 0.5384831592261208, + "grad_norm": 0.5915099435358641, + "learning_rate": 4.286341805715212e-06, + "loss": 0.3036, + "step": 11495 + }, + { + "epoch": 0.5385300042160491, + "grad_norm": 0.6211592822603361, + "learning_rate": 4.286209121722274e-06, + "loss": 0.3098, + "step": 11496 + }, + { + "epoch": 0.5385768492059774, + "grad_norm": 0.5555578291779123, + "learning_rate": 4.286076427450153e-06, + "loss": 0.2791, + "step": 11497 + }, + { + "epoch": 0.5386236941959057, + "grad_norm": 0.596811736505642, + "learning_rate": 4.285943722899611e-06, + "loss": 0.2917, + "step": 11498 + }, + { + "epoch": 0.5386705391858341, + "grad_norm": 0.5723933101147677, + "learning_rate": 4.28581100807141e-06, + "loss": 0.2847, + "step": 11499 + }, + { + "epoch": 0.5387173841757624, + "grad_norm": 0.6112743881832284, + "learning_rate": 4.2856782829663165e-06, + "loss": 0.2879, + "step": 11500 + }, + { + "epoch": 0.5387642291656908, + "grad_norm": 0.5675901339598433, + "learning_rate": 4.285545547585094e-06, + "loss": 0.2744, + "step": 11501 + }, + { + "epoch": 0.538811074155619, + "grad_norm": 0.6058449969684189, + "learning_rate": 4.285412801928505e-06, + "loss": 0.2808, + "step": 11502 + }, + { + "epoch": 0.5388579191455474, + "grad_norm": 0.5789725941397529, + "learning_rate": 4.285280045997313e-06, + "loss": 0.2785, + "step": 11503 + }, + { + "epoch": 0.5389047641354757, + "grad_norm": 0.5831947238619302, + "learning_rate": 4.285147279792285e-06, + "loss": 0.2765, + "step": 11504 + }, + { + "epoch": 0.5389516091254041, + "grad_norm": 0.5723903179431575, + "learning_rate": 4.285014503314182e-06, + "loss": 0.2935, + "step": 11505 + }, + { + "epoch": 0.5389984541153323, + "grad_norm": 0.6029288404172345, + "learning_rate": 4.28488171656377e-06, + "loss": 0.2798, + "step": 11506 + }, + { + "epoch": 0.5390452991052607, + "grad_norm": 0.5437342200176383, + "learning_rate": 4.284748919541811e-06, + "loss": 0.2722, + "step": 11507 + }, + { + "epoch": 0.539092144095189, + "grad_norm": 0.6091449625516553, + "learning_rate": 4.284616112249071e-06, + "loss": 0.2999, + "step": 11508 + }, + { + "epoch": 0.5391389890851174, + "grad_norm": 0.633248468982462, + "learning_rate": 4.284483294686314e-06, + "loss": 0.294, + "step": 11509 + }, + { + "epoch": 0.5391858340750457, + "grad_norm": 0.6082563450283217, + "learning_rate": 4.284350466854305e-06, + "loss": 0.2872, + "step": 11510 + }, + { + "epoch": 0.539232679064974, + "grad_norm": 0.5962815430793414, + "learning_rate": 4.284217628753807e-06, + "loss": 0.2979, + "step": 11511 + }, + { + "epoch": 0.5392795240549023, + "grad_norm": 0.6047945263771167, + "learning_rate": 4.284084780385584e-06, + "loss": 0.2779, + "step": 11512 + }, + { + "epoch": 0.5393263690448307, + "grad_norm": 0.583141435947207, + "learning_rate": 4.283951921750403e-06, + "loss": 0.2745, + "step": 11513 + }, + { + "epoch": 0.539373214034759, + "grad_norm": 0.5887261841129084, + "learning_rate": 4.2838190528490265e-06, + "loss": 0.3055, + "step": 11514 + }, + { + "epoch": 0.5394200590246873, + "grad_norm": 0.5965882760895878, + "learning_rate": 4.28368617368222e-06, + "loss": 0.2769, + "step": 11515 + }, + { + "epoch": 0.5394669040146156, + "grad_norm": 0.5760203595187342, + "learning_rate": 4.283553284250747e-06, + "loss": 0.2835, + "step": 11516 + }, + { + "epoch": 0.539513749004544, + "grad_norm": 0.5773059515476314, + "learning_rate": 4.283420384555374e-06, + "loss": 0.2932, + "step": 11517 + }, + { + "epoch": 0.5395605939944723, + "grad_norm": 0.6625599775793972, + "learning_rate": 4.2832874745968645e-06, + "loss": 0.3163, + "step": 11518 + }, + { + "epoch": 0.5396074389844007, + "grad_norm": 0.6630093684553485, + "learning_rate": 4.2831545543759835e-06, + "loss": 0.2944, + "step": 11519 + }, + { + "epoch": 0.5396542839743289, + "grad_norm": 0.6813338056136711, + "learning_rate": 4.283021623893497e-06, + "loss": 0.2787, + "step": 11520 + }, + { + "epoch": 0.5397011289642573, + "grad_norm": 0.6191838054182739, + "learning_rate": 4.282888683150168e-06, + "loss": 0.2949, + "step": 11521 + }, + { + "epoch": 0.5397479739541856, + "grad_norm": 0.6036151295007349, + "learning_rate": 4.2827557321467635e-06, + "loss": 0.2933, + "step": 11522 + }, + { + "epoch": 0.539794818944114, + "grad_norm": 0.6097624502406729, + "learning_rate": 4.282622770884049e-06, + "loss": 0.3009, + "step": 11523 + }, + { + "epoch": 0.5398416639340422, + "grad_norm": 0.5907084429711538, + "learning_rate": 4.282489799362787e-06, + "loss": 0.2942, + "step": 11524 + }, + { + "epoch": 0.5398885089239706, + "grad_norm": 0.6624185994591122, + "learning_rate": 4.282356817583744e-06, + "loss": 0.2967, + "step": 11525 + }, + { + "epoch": 0.5399353539138989, + "grad_norm": 0.5835308030635463, + "learning_rate": 4.282223825547686e-06, + "loss": 0.2731, + "step": 11526 + }, + { + "epoch": 0.5399821989038273, + "grad_norm": 0.6109747801182461, + "learning_rate": 4.282090823255378e-06, + "loss": 0.2754, + "step": 11527 + }, + { + "epoch": 0.5400290438937556, + "grad_norm": 0.6066046978023258, + "learning_rate": 4.281957810707585e-06, + "loss": 0.297, + "step": 11528 + }, + { + "epoch": 0.5400758888836839, + "grad_norm": 0.65831415817742, + "learning_rate": 4.281824787905073e-06, + "loss": 0.305, + "step": 11529 + }, + { + "epoch": 0.5401227338736122, + "grad_norm": 0.5979639507083694, + "learning_rate": 4.281691754848607e-06, + "loss": 0.2819, + "step": 11530 + }, + { + "epoch": 0.5401695788635406, + "grad_norm": 0.6189533841288419, + "learning_rate": 4.281558711538952e-06, + "loss": 0.284, + "step": 11531 + }, + { + "epoch": 0.5402164238534689, + "grad_norm": 0.5795128269001542, + "learning_rate": 4.281425657976875e-06, + "loss": 0.2891, + "step": 11532 + }, + { + "epoch": 0.5402632688433971, + "grad_norm": 0.6544994442467679, + "learning_rate": 4.281292594163142e-06, + "loss": 0.3145, + "step": 11533 + }, + { + "epoch": 0.5403101138333255, + "grad_norm": 0.5870158368190377, + "learning_rate": 4.281159520098517e-06, + "loss": 0.2692, + "step": 11534 + }, + { + "epoch": 0.5403569588232539, + "grad_norm": 0.6332751726033764, + "learning_rate": 4.281026435783766e-06, + "loss": 0.2882, + "step": 11535 + }, + { + "epoch": 0.5404038038131822, + "grad_norm": 0.5591174916345129, + "learning_rate": 4.280893341219656e-06, + "loss": 0.2897, + "step": 11536 + }, + { + "epoch": 0.5404506488031106, + "grad_norm": 0.5814885041333052, + "learning_rate": 4.2807602364069535e-06, + "loss": 0.3008, + "step": 11537 + }, + { + "epoch": 0.5404974937930388, + "grad_norm": 0.5956510601251621, + "learning_rate": 4.280627121346423e-06, + "loss": 0.2855, + "step": 11538 + }, + { + "epoch": 0.5405443387829671, + "grad_norm": 0.5518597434362029, + "learning_rate": 4.2804939960388294e-06, + "loss": 0.264, + "step": 11539 + }, + { + "epoch": 0.5405911837728955, + "grad_norm": 0.589017394634095, + "learning_rate": 4.280360860484942e-06, + "loss": 0.3011, + "step": 11540 + }, + { + "epoch": 0.5406380287628239, + "grad_norm": 0.5840477075592758, + "learning_rate": 4.280227714685525e-06, + "loss": 0.2997, + "step": 11541 + }, + { + "epoch": 0.5406848737527521, + "grad_norm": 0.5885383272403364, + "learning_rate": 4.280094558641344e-06, + "loss": 0.2904, + "step": 11542 + }, + { + "epoch": 0.5407317187426804, + "grad_norm": 0.5825015430544851, + "learning_rate": 4.279961392353167e-06, + "loss": 0.2807, + "step": 11543 + }, + { + "epoch": 0.5407785637326088, + "grad_norm": 0.5665802752214036, + "learning_rate": 4.27982821582176e-06, + "loss": 0.2649, + "step": 11544 + }, + { + "epoch": 0.5408254087225371, + "grad_norm": 0.6385832751679049, + "learning_rate": 4.279695029047888e-06, + "loss": 0.2709, + "step": 11545 + }, + { + "epoch": 0.5408722537124655, + "grad_norm": 0.581271355881992, + "learning_rate": 4.279561832032319e-06, + "loss": 0.2949, + "step": 11546 + }, + { + "epoch": 0.5409190987023937, + "grad_norm": 0.5960388986270082, + "learning_rate": 4.279428624775818e-06, + "loss": 0.2778, + "step": 11547 + }, + { + "epoch": 0.5409659436923221, + "grad_norm": 0.6755912341950169, + "learning_rate": 4.279295407279154e-06, + "loss": 0.3076, + "step": 11548 + }, + { + "epoch": 0.5410127886822504, + "grad_norm": 0.5428703264726867, + "learning_rate": 4.279162179543092e-06, + "loss": 0.2754, + "step": 11549 + }, + { + "epoch": 0.5410596336721788, + "grad_norm": 0.6199735641526392, + "learning_rate": 4.279028941568398e-06, + "loss": 0.2729, + "step": 11550 + }, + { + "epoch": 0.541106478662107, + "grad_norm": 0.6245840392139496, + "learning_rate": 4.2788956933558396e-06, + "loss": 0.292, + "step": 11551 + }, + { + "epoch": 0.5411533236520354, + "grad_norm": 0.5976664721215968, + "learning_rate": 4.278762434906184e-06, + "loss": 0.288, + "step": 11552 + }, + { + "epoch": 0.5412001686419637, + "grad_norm": 0.6422062930199113, + "learning_rate": 4.278629166220197e-06, + "loss": 0.2933, + "step": 11553 + }, + { + "epoch": 0.5412470136318921, + "grad_norm": 0.5769609777398961, + "learning_rate": 4.278495887298647e-06, + "loss": 0.2791, + "step": 11554 + }, + { + "epoch": 0.5412938586218204, + "grad_norm": 0.5746317715536525, + "learning_rate": 4.2783625981423e-06, + "loss": 0.2819, + "step": 11555 + }, + { + "epoch": 0.5413407036117487, + "grad_norm": 0.6069000012026053, + "learning_rate": 4.278229298751924e-06, + "loss": 0.2923, + "step": 11556 + }, + { + "epoch": 0.541387548601677, + "grad_norm": 0.6206570307670639, + "learning_rate": 4.2780959891282835e-06, + "loss": 0.2745, + "step": 11557 + }, + { + "epoch": 0.5414343935916054, + "grad_norm": 0.6541235899340759, + "learning_rate": 4.277962669272149e-06, + "loss": 0.2902, + "step": 11558 + }, + { + "epoch": 0.5414812385815337, + "grad_norm": 0.605117096733631, + "learning_rate": 4.277829339184285e-06, + "loss": 0.274, + "step": 11559 + }, + { + "epoch": 0.541528083571462, + "grad_norm": 0.5953309222100853, + "learning_rate": 4.277695998865461e-06, + "loss": 0.2791, + "step": 11560 + }, + { + "epoch": 0.5415749285613903, + "grad_norm": 0.5823453518046336, + "learning_rate": 4.2775626483164426e-06, + "loss": 0.2699, + "step": 11561 + }, + { + "epoch": 0.5416217735513187, + "grad_norm": 0.5448965405492708, + "learning_rate": 4.277429287537999e-06, + "loss": 0.2623, + "step": 11562 + }, + { + "epoch": 0.541668618541247, + "grad_norm": 0.602964445188202, + "learning_rate": 4.277295916530896e-06, + "loss": 0.2862, + "step": 11563 + }, + { + "epoch": 0.5417154635311754, + "grad_norm": 0.5843024752013797, + "learning_rate": 4.277162535295901e-06, + "loss": 0.2845, + "step": 11564 + }, + { + "epoch": 0.5417623085211036, + "grad_norm": 0.578234114759847, + "learning_rate": 4.277029143833783e-06, + "loss": 0.28, + "step": 11565 + }, + { + "epoch": 0.541809153511032, + "grad_norm": 0.6147594434507022, + "learning_rate": 4.276895742145309e-06, + "loss": 0.2786, + "step": 11566 + }, + { + "epoch": 0.5418559985009603, + "grad_norm": 0.6140878001127577, + "learning_rate": 4.2767623302312466e-06, + "loss": 0.2883, + "step": 11567 + }, + { + "epoch": 0.5419028434908887, + "grad_norm": 0.6114346289507196, + "learning_rate": 4.276628908092363e-06, + "loss": 0.2773, + "step": 11568 + }, + { + "epoch": 0.5419496884808169, + "grad_norm": 0.5516148242633928, + "learning_rate": 4.276495475729428e-06, + "loss": 0.2843, + "step": 11569 + }, + { + "epoch": 0.5419965334707453, + "grad_norm": 0.5932330238233872, + "learning_rate": 4.276362033143206e-06, + "loss": 0.2985, + "step": 11570 + }, + { + "epoch": 0.5420433784606736, + "grad_norm": 0.6125926041046568, + "learning_rate": 4.2762285803344685e-06, + "loss": 0.3159, + "step": 11571 + }, + { + "epoch": 0.542090223450602, + "grad_norm": 0.5608382865369207, + "learning_rate": 4.276095117303981e-06, + "loss": 0.2829, + "step": 11572 + }, + { + "epoch": 0.5421370684405303, + "grad_norm": 0.5718673496364614, + "learning_rate": 4.275961644052513e-06, + "loss": 0.2857, + "step": 11573 + }, + { + "epoch": 0.5421839134304586, + "grad_norm": 0.6755506765361178, + "learning_rate": 4.275828160580832e-06, + "loss": 0.297, + "step": 11574 + }, + { + "epoch": 0.5422307584203869, + "grad_norm": 0.5838253066090856, + "learning_rate": 4.275694666889706e-06, + "loss": 0.2821, + "step": 11575 + }, + { + "epoch": 0.5422776034103153, + "grad_norm": 0.5640948822198908, + "learning_rate": 4.275561162979904e-06, + "loss": 0.2806, + "step": 11576 + }, + { + "epoch": 0.5423244484002436, + "grad_norm": 0.6028535146064022, + "learning_rate": 4.275427648852193e-06, + "loss": 0.2979, + "step": 11577 + }, + { + "epoch": 0.5423712933901719, + "grad_norm": 0.5793148026203916, + "learning_rate": 4.275294124507343e-06, + "loss": 0.2961, + "step": 11578 + }, + { + "epoch": 0.5424181383801002, + "grad_norm": 0.6077337200141707, + "learning_rate": 4.275160589946122e-06, + "loss": 0.2653, + "step": 11579 + }, + { + "epoch": 0.5424649833700286, + "grad_norm": 0.6352923304300738, + "learning_rate": 4.275027045169297e-06, + "loss": 0.3098, + "step": 11580 + }, + { + "epoch": 0.5425118283599569, + "grad_norm": 0.6025349704989984, + "learning_rate": 4.2748934901776375e-06, + "loss": 0.2931, + "step": 11581 + }, + { + "epoch": 0.5425586733498853, + "grad_norm": 0.5444109148118126, + "learning_rate": 4.274759924971912e-06, + "loss": 0.2906, + "step": 11582 + }, + { + "epoch": 0.5426055183398135, + "grad_norm": 0.6470639251457571, + "learning_rate": 4.274626349552889e-06, + "loss": 0.2944, + "step": 11583 + }, + { + "epoch": 0.5426523633297419, + "grad_norm": 0.5909086303056535, + "learning_rate": 4.2744927639213385e-06, + "loss": 0.2734, + "step": 11584 + }, + { + "epoch": 0.5426992083196702, + "grad_norm": 0.5853362778412303, + "learning_rate": 4.274359168078027e-06, + "loss": 0.282, + "step": 11585 + }, + { + "epoch": 0.5427460533095986, + "grad_norm": 0.576366858361252, + "learning_rate": 4.274225562023725e-06, + "loss": 0.296, + "step": 11586 + }, + { + "epoch": 0.5427928982995268, + "grad_norm": 0.6193538852882906, + "learning_rate": 4.274091945759201e-06, + "loss": 0.282, + "step": 11587 + }, + { + "epoch": 0.5428397432894552, + "grad_norm": 0.6264691486183169, + "learning_rate": 4.273958319285224e-06, + "loss": 0.3048, + "step": 11588 + }, + { + "epoch": 0.5428865882793835, + "grad_norm": 0.6148202292197176, + "learning_rate": 4.273824682602562e-06, + "loss": 0.2719, + "step": 11589 + }, + { + "epoch": 0.5429334332693119, + "grad_norm": 0.6043075688477632, + "learning_rate": 4.2736910357119854e-06, + "loss": 0.3152, + "step": 11590 + }, + { + "epoch": 0.5429802782592402, + "grad_norm": 0.5748659840872277, + "learning_rate": 4.273557378614262e-06, + "loss": 0.2725, + "step": 11591 + }, + { + "epoch": 0.5430271232491685, + "grad_norm": 0.6145759832518181, + "learning_rate": 4.273423711310162e-06, + "loss": 0.2804, + "step": 11592 + }, + { + "epoch": 0.5430739682390968, + "grad_norm": 0.5881800966141698, + "learning_rate": 4.273290033800455e-06, + "loss": 0.2731, + "step": 11593 + }, + { + "epoch": 0.5431208132290252, + "grad_norm": 0.6159662364689613, + "learning_rate": 4.2731563460859085e-06, + "loss": 0.2649, + "step": 11594 + }, + { + "epoch": 0.5431676582189535, + "grad_norm": 0.5771887270654179, + "learning_rate": 4.273022648167293e-06, + "loss": 0.2905, + "step": 11595 + }, + { + "epoch": 0.5432145032088818, + "grad_norm": 0.6516654065130414, + "learning_rate": 4.2728889400453776e-06, + "loss": 0.2995, + "step": 11596 + }, + { + "epoch": 0.5432613481988101, + "grad_norm": 0.5728379918750586, + "learning_rate": 4.272755221720933e-06, + "loss": 0.2854, + "step": 11597 + }, + { + "epoch": 0.5433081931887385, + "grad_norm": 0.5690457919126981, + "learning_rate": 4.2726214931947264e-06, + "loss": 0.2906, + "step": 11598 + }, + { + "epoch": 0.5433550381786668, + "grad_norm": 0.5906656571350063, + "learning_rate": 4.27248775446753e-06, + "loss": 0.291, + "step": 11599 + }, + { + "epoch": 0.5434018831685952, + "grad_norm": 0.6144203561942135, + "learning_rate": 4.2723540055401104e-06, + "loss": 0.288, + "step": 11600 + }, + { + "epoch": 0.5434487281585234, + "grad_norm": 0.5536383431969372, + "learning_rate": 4.27222024641324e-06, + "loss": 0.2715, + "step": 11601 + }, + { + "epoch": 0.5434955731484518, + "grad_norm": 0.6519863798899487, + "learning_rate": 4.272086477087688e-06, + "loss": 0.2869, + "step": 11602 + }, + { + "epoch": 0.5435424181383801, + "grad_norm": 0.557142787666856, + "learning_rate": 4.271952697564223e-06, + "loss": 0.2746, + "step": 11603 + }, + { + "epoch": 0.5435892631283085, + "grad_norm": 0.6758789078023365, + "learning_rate": 4.271818907843616e-06, + "loss": 0.3, + "step": 11604 + }, + { + "epoch": 0.5436361081182367, + "grad_norm": 0.6336719247642899, + "learning_rate": 4.271685107926636e-06, + "loss": 0.2934, + "step": 11605 + }, + { + "epoch": 0.5436829531081651, + "grad_norm": 0.6217875966560271, + "learning_rate": 4.2715512978140534e-06, + "loss": 0.2864, + "step": 11606 + }, + { + "epoch": 0.5437297980980934, + "grad_norm": 0.599798294903596, + "learning_rate": 4.2714174775066384e-06, + "loss": 0.3037, + "step": 11607 + }, + { + "epoch": 0.5437766430880218, + "grad_norm": 0.5826207490235479, + "learning_rate": 4.271283647005162e-06, + "loss": 0.267, + "step": 11608 + }, + { + "epoch": 0.5438234880779501, + "grad_norm": 0.5759452945627495, + "learning_rate": 4.271149806310393e-06, + "loss": 0.3107, + "step": 11609 + }, + { + "epoch": 0.5438703330678784, + "grad_norm": 0.5498119848006019, + "learning_rate": 4.271015955423101e-06, + "loss": 0.2774, + "step": 11610 + }, + { + "epoch": 0.5439171780578067, + "grad_norm": 0.6368249820496539, + "learning_rate": 4.2708820943440585e-06, + "loss": 0.2889, + "step": 11611 + }, + { + "epoch": 0.5439640230477351, + "grad_norm": 0.6461751700967882, + "learning_rate": 4.270748223074034e-06, + "loss": 0.314, + "step": 11612 + }, + { + "epoch": 0.5440108680376634, + "grad_norm": 0.5745696045612224, + "learning_rate": 4.270614341613799e-06, + "loss": 0.2755, + "step": 11613 + }, + { + "epoch": 0.5440577130275916, + "grad_norm": 0.5636462704741713, + "learning_rate": 4.270480449964123e-06, + "loss": 0.2796, + "step": 11614 + }, + { + "epoch": 0.54410455801752, + "grad_norm": 0.6057681647613908, + "learning_rate": 4.270346548125777e-06, + "loss": 0.3163, + "step": 11615 + }, + { + "epoch": 0.5441514030074484, + "grad_norm": 0.6357905267916465, + "learning_rate": 4.270212636099532e-06, + "loss": 0.2974, + "step": 11616 + }, + { + "epoch": 0.5441982479973767, + "grad_norm": 0.6184384812687669, + "learning_rate": 4.270078713886158e-06, + "loss": 0.3084, + "step": 11617 + }, + { + "epoch": 0.5442450929873051, + "grad_norm": 0.5490107154419791, + "learning_rate": 4.269944781486426e-06, + "loss": 0.2688, + "step": 11618 + }, + { + "epoch": 0.5442919379772333, + "grad_norm": 0.5724937345573515, + "learning_rate": 4.269810838901106e-06, + "loss": 0.2887, + "step": 11619 + }, + { + "epoch": 0.5443387829671616, + "grad_norm": 0.6001464762263421, + "learning_rate": 4.269676886130971e-06, + "loss": 0.3007, + "step": 11620 + }, + { + "epoch": 0.54438562795709, + "grad_norm": 0.5515379356620531, + "learning_rate": 4.269542923176789e-06, + "loss": 0.2792, + "step": 11621 + }, + { + "epoch": 0.5444324729470184, + "grad_norm": 0.6001249232082724, + "learning_rate": 4.269408950039332e-06, + "loss": 0.2643, + "step": 11622 + }, + { + "epoch": 0.5444793179369466, + "grad_norm": 0.6252981630493384, + "learning_rate": 4.269274966719372e-06, + "loss": 0.2828, + "step": 11623 + }, + { + "epoch": 0.544526162926875, + "grad_norm": 0.6332370802787766, + "learning_rate": 4.269140973217679e-06, + "loss": 0.29, + "step": 11624 + }, + { + "epoch": 0.5445730079168033, + "grad_norm": 0.6005691003120657, + "learning_rate": 4.2690069695350244e-06, + "loss": 0.2962, + "step": 11625 + }, + { + "epoch": 0.5446198529067316, + "grad_norm": 0.5618581464510803, + "learning_rate": 4.26887295567218e-06, + "loss": 0.2795, + "step": 11626 + }, + { + "epoch": 0.54466669789666, + "grad_norm": 0.6073945369987817, + "learning_rate": 4.268738931629916e-06, + "loss": 0.2718, + "step": 11627 + }, + { + "epoch": 0.5447135428865882, + "grad_norm": 0.591725305129573, + "learning_rate": 4.268604897409003e-06, + "loss": 0.29, + "step": 11628 + }, + { + "epoch": 0.5447603878765166, + "grad_norm": 0.5462715533180225, + "learning_rate": 4.2684708530102134e-06, + "loss": 0.2903, + "step": 11629 + }, + { + "epoch": 0.544807232866445, + "grad_norm": 0.6065028172455551, + "learning_rate": 4.26833679843432e-06, + "loss": 0.2752, + "step": 11630 + }, + { + "epoch": 0.5448540778563733, + "grad_norm": 0.6430642561888551, + "learning_rate": 4.268202733682092e-06, + "loss": 0.3148, + "step": 11631 + }, + { + "epoch": 0.5449009228463015, + "grad_norm": 0.6064330422765397, + "learning_rate": 4.268068658754301e-06, + "loss": 0.3105, + "step": 11632 + }, + { + "epoch": 0.5449477678362299, + "grad_norm": 0.5820178657507462, + "learning_rate": 4.26793457365172e-06, + "loss": 0.2887, + "step": 11633 + }, + { + "epoch": 0.5449946128261582, + "grad_norm": 0.5969032028909151, + "learning_rate": 4.267800478375119e-06, + "loss": 0.2889, + "step": 11634 + }, + { + "epoch": 0.5450414578160866, + "grad_norm": 0.5624186035142743, + "learning_rate": 4.267666372925272e-06, + "loss": 0.2947, + "step": 11635 + }, + { + "epoch": 0.545088302806015, + "grad_norm": 0.6092509286642934, + "learning_rate": 4.267532257302948e-06, + "loss": 0.2841, + "step": 11636 + }, + { + "epoch": 0.5451351477959432, + "grad_norm": 0.5743060010094679, + "learning_rate": 4.267398131508921e-06, + "loss": 0.2728, + "step": 11637 + }, + { + "epoch": 0.5451819927858715, + "grad_norm": 0.5700005990386416, + "learning_rate": 4.267263995543962e-06, + "loss": 0.2751, + "step": 11638 + }, + { + "epoch": 0.5452288377757999, + "grad_norm": 0.587309156574532, + "learning_rate": 4.267129849408842e-06, + "loss": 0.2905, + "step": 11639 + }, + { + "epoch": 0.5452756827657282, + "grad_norm": 0.6181260795703413, + "learning_rate": 4.2669956931043345e-06, + "loss": 0.2808, + "step": 11640 + }, + { + "epoch": 0.5453225277556565, + "grad_norm": 0.6242796314058631, + "learning_rate": 4.266861526631211e-06, + "loss": 0.2765, + "step": 11641 + }, + { + "epoch": 0.5453693727455848, + "grad_norm": 0.5213529277760317, + "learning_rate": 4.266727349990243e-06, + "loss": 0.2772, + "step": 11642 + }, + { + "epoch": 0.5454162177355132, + "grad_norm": 0.580626566668986, + "learning_rate": 4.266593163182203e-06, + "loss": 0.2836, + "step": 11643 + }, + { + "epoch": 0.5454630627254415, + "grad_norm": 0.5848598713959071, + "learning_rate": 4.266458966207865e-06, + "loss": 0.2875, + "step": 11644 + }, + { + "epoch": 0.5455099077153699, + "grad_norm": 0.6193599925356992, + "learning_rate": 4.266324759067998e-06, + "loss": 0.299, + "step": 11645 + }, + { + "epoch": 0.5455567527052981, + "grad_norm": 0.6249066207922455, + "learning_rate": 4.266190541763377e-06, + "loss": 0.2997, + "step": 11646 + }, + { + "epoch": 0.5456035976952265, + "grad_norm": 0.5771230695399084, + "learning_rate": 4.266056314294772e-06, + "loss": 0.2695, + "step": 11647 + }, + { + "epoch": 0.5456504426851548, + "grad_norm": 0.5727178531130666, + "learning_rate": 4.265922076662958e-06, + "loss": 0.277, + "step": 11648 + }, + { + "epoch": 0.5456972876750832, + "grad_norm": 0.5678057344051536, + "learning_rate": 4.265787828868705e-06, + "loss": 0.28, + "step": 11649 + }, + { + "epoch": 0.5457441326650114, + "grad_norm": 0.5591373633722366, + "learning_rate": 4.2656535709127885e-06, + "loss": 0.2828, + "step": 11650 + }, + { + "epoch": 0.5457909776549398, + "grad_norm": 0.6553789591942714, + "learning_rate": 4.265519302795979e-06, + "loss": 0.3134, + "step": 11651 + }, + { + "epoch": 0.5458378226448681, + "grad_norm": 0.5672524533905338, + "learning_rate": 4.265385024519049e-06, + "loss": 0.2725, + "step": 11652 + }, + { + "epoch": 0.5458846676347965, + "grad_norm": 0.5734199999100186, + "learning_rate": 4.2652507360827726e-06, + "loss": 0.2971, + "step": 11653 + }, + { + "epoch": 0.5459315126247248, + "grad_norm": 0.5972547442718568, + "learning_rate": 4.265116437487921e-06, + "loss": 0.2858, + "step": 11654 + }, + { + "epoch": 0.5459783576146531, + "grad_norm": 0.5992078010950813, + "learning_rate": 4.264982128735269e-06, + "loss": 0.2902, + "step": 11655 + }, + { + "epoch": 0.5460252026045814, + "grad_norm": 0.6393364901453896, + "learning_rate": 4.264847809825587e-06, + "loss": 0.3047, + "step": 11656 + }, + { + "epoch": 0.5460720475945098, + "grad_norm": 0.6184506599944446, + "learning_rate": 4.264713480759651e-06, + "loss": 0.2914, + "step": 11657 + }, + { + "epoch": 0.5461188925844381, + "grad_norm": 0.5977944662577737, + "learning_rate": 4.264579141538232e-06, + "loss": 0.2967, + "step": 11658 + }, + { + "epoch": 0.5461657375743664, + "grad_norm": 0.6504437044236967, + "learning_rate": 4.2644447921621035e-06, + "loss": 0.2764, + "step": 11659 + }, + { + "epoch": 0.5462125825642947, + "grad_norm": 0.6306389474926565, + "learning_rate": 4.264310432632039e-06, + "loss": 0.2951, + "step": 11660 + }, + { + "epoch": 0.5462594275542231, + "grad_norm": 0.5922354507249947, + "learning_rate": 4.26417606294881e-06, + "loss": 0.2766, + "step": 11661 + }, + { + "epoch": 0.5463062725441514, + "grad_norm": 0.6141821381013552, + "learning_rate": 4.264041683113192e-06, + "loss": 0.2877, + "step": 11662 + }, + { + "epoch": 0.5463531175340798, + "grad_norm": 0.5947279003400403, + "learning_rate": 4.263907293125958e-06, + "loss": 0.2811, + "step": 11663 + }, + { + "epoch": 0.546399962524008, + "grad_norm": 0.6800759619318053, + "learning_rate": 4.263772892987881e-06, + "loss": 0.2913, + "step": 11664 + }, + { + "epoch": 0.5464468075139364, + "grad_norm": 0.6222817590520275, + "learning_rate": 4.263638482699734e-06, + "loss": 0.2715, + "step": 11665 + }, + { + "epoch": 0.5464936525038647, + "grad_norm": 0.5933360085779813, + "learning_rate": 4.26350406226229e-06, + "loss": 0.2755, + "step": 11666 + }, + { + "epoch": 0.5465404974937931, + "grad_norm": 0.606253522497514, + "learning_rate": 4.263369631676324e-06, + "loss": 0.2803, + "step": 11667 + }, + { + "epoch": 0.5465873424837213, + "grad_norm": 0.6125557358858806, + "learning_rate": 4.26323519094261e-06, + "loss": 0.2983, + "step": 11668 + }, + { + "epoch": 0.5466341874736497, + "grad_norm": 0.5641750714623375, + "learning_rate": 4.2631007400619186e-06, + "loss": 0.293, + "step": 11669 + }, + { + "epoch": 0.546681032463578, + "grad_norm": 0.6615335642136089, + "learning_rate": 4.262966279035027e-06, + "loss": 0.2956, + "step": 11670 + }, + { + "epoch": 0.5467278774535064, + "grad_norm": 0.5912227464304934, + "learning_rate": 4.262831807862707e-06, + "loss": 0.295, + "step": 11671 + }, + { + "epoch": 0.5467747224434347, + "grad_norm": 0.5809669505544514, + "learning_rate": 4.262697326545734e-06, + "loss": 0.2938, + "step": 11672 + }, + { + "epoch": 0.546821567433363, + "grad_norm": 0.5692933155324608, + "learning_rate": 4.262562835084879e-06, + "loss": 0.2732, + "step": 11673 + }, + { + "epoch": 0.5468684124232913, + "grad_norm": 0.6341450146366681, + "learning_rate": 4.262428333480919e-06, + "loss": 0.2851, + "step": 11674 + }, + { + "epoch": 0.5469152574132197, + "grad_norm": 0.6122379414773198, + "learning_rate": 4.262293821734627e-06, + "loss": 0.2693, + "step": 11675 + }, + { + "epoch": 0.546962102403148, + "grad_norm": 0.6039949543809647, + "learning_rate": 4.262159299846777e-06, + "loss": 0.2827, + "step": 11676 + }, + { + "epoch": 0.5470089473930763, + "grad_norm": 0.5678420525693642, + "learning_rate": 4.262024767818144e-06, + "loss": 0.291, + "step": 11677 + }, + { + "epoch": 0.5470557923830046, + "grad_norm": 0.6149000827284502, + "learning_rate": 4.2618902256495e-06, + "loss": 0.2855, + "step": 11678 + }, + { + "epoch": 0.547102637372933, + "grad_norm": 0.5855439777533558, + "learning_rate": 4.26175567334162e-06, + "loss": 0.2613, + "step": 11679 + }, + { + "epoch": 0.5471494823628613, + "grad_norm": 0.6028474002179341, + "learning_rate": 4.261621110895281e-06, + "loss": 0.2802, + "step": 11680 + }, + { + "epoch": 0.5471963273527897, + "grad_norm": 0.5855165650557654, + "learning_rate": 4.261486538311254e-06, + "loss": 0.2694, + "step": 11681 + }, + { + "epoch": 0.5472431723427179, + "grad_norm": 0.6440614947553257, + "learning_rate": 4.261351955590314e-06, + "loss": 0.2961, + "step": 11682 + }, + { + "epoch": 0.5472900173326463, + "grad_norm": 0.589861366360887, + "learning_rate": 4.2612173627332375e-06, + "loss": 0.2867, + "step": 11683 + }, + { + "epoch": 0.5473368623225746, + "grad_norm": 0.6193181541986297, + "learning_rate": 4.261082759740798e-06, + "loss": 0.2824, + "step": 11684 + }, + { + "epoch": 0.547383707312503, + "grad_norm": 0.6138037000667512, + "learning_rate": 4.260948146613768e-06, + "loss": 0.2863, + "step": 11685 + }, + { + "epoch": 0.5474305523024312, + "grad_norm": 0.5642223373298108, + "learning_rate": 4.2608135233529256e-06, + "loss": 0.2975, + "step": 11686 + }, + { + "epoch": 0.5474773972923596, + "grad_norm": 0.5368677588832058, + "learning_rate": 4.260678889959043e-06, + "loss": 0.2737, + "step": 11687 + }, + { + "epoch": 0.5475242422822879, + "grad_norm": 0.631464992873002, + "learning_rate": 4.260544246432896e-06, + "loss": 0.2889, + "step": 11688 + }, + { + "epoch": 0.5475710872722163, + "grad_norm": 0.627579599585543, + "learning_rate": 4.2604095927752604e-06, + "loss": 0.2728, + "step": 11689 + }, + { + "epoch": 0.5476179322621446, + "grad_norm": 0.6198939947291822, + "learning_rate": 4.26027492898691e-06, + "loss": 0.2974, + "step": 11690 + }, + { + "epoch": 0.5476647772520729, + "grad_norm": 0.6079518606650044, + "learning_rate": 4.260140255068619e-06, + "loss": 0.2914, + "step": 11691 + }, + { + "epoch": 0.5477116222420012, + "grad_norm": 0.6308283223293144, + "learning_rate": 4.260005571021163e-06, + "loss": 0.2913, + "step": 11692 + }, + { + "epoch": 0.5477584672319296, + "grad_norm": 0.5640781063571396, + "learning_rate": 4.259870876845318e-06, + "loss": 0.2871, + "step": 11693 + }, + { + "epoch": 0.5478053122218579, + "grad_norm": 0.5689899195061381, + "learning_rate": 4.2597361725418575e-06, + "loss": 0.2911, + "step": 11694 + }, + { + "epoch": 0.5478521572117862, + "grad_norm": 0.6559960465713536, + "learning_rate": 4.259601458111558e-06, + "loss": 0.3118, + "step": 11695 + }, + { + "epoch": 0.5478990022017145, + "grad_norm": 0.5990796837001603, + "learning_rate": 4.259466733555195e-06, + "loss": 0.2884, + "step": 11696 + }, + { + "epoch": 0.5479458471916429, + "grad_norm": 0.5596211176254055, + "learning_rate": 4.259331998873544e-06, + "loss": 0.2795, + "step": 11697 + }, + { + "epoch": 0.5479926921815712, + "grad_norm": 0.6254837082260651, + "learning_rate": 4.2591972540673775e-06, + "loss": 0.2845, + "step": 11698 + }, + { + "epoch": 0.5480395371714996, + "grad_norm": 0.5931101737689322, + "learning_rate": 4.259062499137474e-06, + "loss": 0.2878, + "step": 11699 + }, + { + "epoch": 0.5480863821614278, + "grad_norm": 0.5162396711225367, + "learning_rate": 4.258927734084608e-06, + "loss": 0.2618, + "step": 11700 + }, + { + "epoch": 0.5481332271513562, + "grad_norm": 0.5796501237283245, + "learning_rate": 4.258792958909555e-06, + "loss": 0.2627, + "step": 11701 + }, + { + "epoch": 0.5481800721412845, + "grad_norm": 0.5633618379114402, + "learning_rate": 4.25865817361309e-06, + "loss": 0.2993, + "step": 11702 + }, + { + "epoch": 0.5482269171312129, + "grad_norm": 0.6073598003936899, + "learning_rate": 4.25852337819599e-06, + "loss": 0.2837, + "step": 11703 + }, + { + "epoch": 0.5482737621211411, + "grad_norm": 0.5274837988009657, + "learning_rate": 4.258388572659029e-06, + "loss": 0.2829, + "step": 11704 + }, + { + "epoch": 0.5483206071110694, + "grad_norm": 0.5878939155483457, + "learning_rate": 4.258253757002984e-06, + "loss": 0.2751, + "step": 11705 + }, + { + "epoch": 0.5483674521009978, + "grad_norm": 0.5911360365758741, + "learning_rate": 4.258118931228631e-06, + "loss": 0.2808, + "step": 11706 + }, + { + "epoch": 0.5484142970909262, + "grad_norm": 0.670895685962835, + "learning_rate": 4.257984095336745e-06, + "loss": 0.3168, + "step": 11707 + }, + { + "epoch": 0.5484611420808545, + "grad_norm": 0.5975031574458909, + "learning_rate": 4.257849249328103e-06, + "loss": 0.2802, + "step": 11708 + }, + { + "epoch": 0.5485079870707827, + "grad_norm": 0.6101853420407087, + "learning_rate": 4.25771439320348e-06, + "loss": 0.2987, + "step": 11709 + }, + { + "epoch": 0.5485548320607111, + "grad_norm": 0.6004972588442331, + "learning_rate": 4.257579526963652e-06, + "loss": 0.272, + "step": 11710 + }, + { + "epoch": 0.5486016770506394, + "grad_norm": 0.5251627012902812, + "learning_rate": 4.257444650609396e-06, + "loss": 0.2562, + "step": 11711 + }, + { + "epoch": 0.5486485220405678, + "grad_norm": 0.5916286132742631, + "learning_rate": 4.257309764141488e-06, + "loss": 0.29, + "step": 11712 + }, + { + "epoch": 0.548695367030496, + "grad_norm": 0.607672405831568, + "learning_rate": 4.257174867560704e-06, + "loss": 0.2982, + "step": 11713 + }, + { + "epoch": 0.5487422120204244, + "grad_norm": 0.5732870631023869, + "learning_rate": 4.25703996086782e-06, + "loss": 0.2748, + "step": 11714 + }, + { + "epoch": 0.5487890570103527, + "grad_norm": 0.5868662070985664, + "learning_rate": 4.256905044063613e-06, + "loss": 0.2807, + "step": 11715 + }, + { + "epoch": 0.5488359020002811, + "grad_norm": 0.6668020634033125, + "learning_rate": 4.256770117148859e-06, + "loss": 0.296, + "step": 11716 + }, + { + "epoch": 0.5488827469902094, + "grad_norm": 0.5885818837219615, + "learning_rate": 4.256635180124334e-06, + "loss": 0.2821, + "step": 11717 + }, + { + "epoch": 0.5489295919801377, + "grad_norm": 0.6084829289387562, + "learning_rate": 4.256500232990815e-06, + "loss": 0.2913, + "step": 11718 + }, + { + "epoch": 0.548976436970066, + "grad_norm": 0.6226274438573223, + "learning_rate": 4.2563652757490795e-06, + "loss": 0.2989, + "step": 11719 + }, + { + "epoch": 0.5490232819599944, + "grad_norm": 0.6022630474162024, + "learning_rate": 4.256230308399902e-06, + "loss": 0.3018, + "step": 11720 + }, + { + "epoch": 0.5490701269499227, + "grad_norm": 0.558326934010503, + "learning_rate": 4.256095330944061e-06, + "loss": 0.2587, + "step": 11721 + }, + { + "epoch": 0.549116971939851, + "grad_norm": 0.6276237873604641, + "learning_rate": 4.255960343382333e-06, + "loss": 0.2977, + "step": 11722 + }, + { + "epoch": 0.5491638169297793, + "grad_norm": 0.5543726834241873, + "learning_rate": 4.255825345715495e-06, + "loss": 0.2739, + "step": 11723 + }, + { + "epoch": 0.5492106619197077, + "grad_norm": 0.5918033295619207, + "learning_rate": 4.255690337944323e-06, + "loss": 0.2866, + "step": 11724 + }, + { + "epoch": 0.549257506909636, + "grad_norm": 0.5843406468980031, + "learning_rate": 4.255555320069594e-06, + "loss": 0.2821, + "step": 11725 + }, + { + "epoch": 0.5493043518995644, + "grad_norm": 0.6217486450818129, + "learning_rate": 4.255420292092086e-06, + "loss": 0.263, + "step": 11726 + }, + { + "epoch": 0.5493511968894926, + "grad_norm": 0.6152874514037783, + "learning_rate": 4.255285254012576e-06, + "loss": 0.2848, + "step": 11727 + }, + { + "epoch": 0.549398041879421, + "grad_norm": 0.6102393004648204, + "learning_rate": 4.255150205831839e-06, + "loss": 0.2961, + "step": 11728 + }, + { + "epoch": 0.5494448868693493, + "grad_norm": 0.5877051466296037, + "learning_rate": 4.255015147550654e-06, + "loss": 0.2936, + "step": 11729 + }, + { + "epoch": 0.5494917318592777, + "grad_norm": 0.5764164536309418, + "learning_rate": 4.254880079169799e-06, + "loss": 0.2929, + "step": 11730 + }, + { + "epoch": 0.5495385768492059, + "grad_norm": 0.6329223335843147, + "learning_rate": 4.25474500069005e-06, + "loss": 0.2921, + "step": 11731 + }, + { + "epoch": 0.5495854218391343, + "grad_norm": 0.6067960018468528, + "learning_rate": 4.254609912112183e-06, + "loss": 0.2935, + "step": 11732 + }, + { + "epoch": 0.5496322668290626, + "grad_norm": 0.5594019230036501, + "learning_rate": 4.254474813436978e-06, + "loss": 0.2657, + "step": 11733 + }, + { + "epoch": 0.549679111818991, + "grad_norm": 0.5620215597923394, + "learning_rate": 4.2543397046652115e-06, + "loss": 0.2706, + "step": 11734 + }, + { + "epoch": 0.5497259568089193, + "grad_norm": 0.6033273106977365, + "learning_rate": 4.254204585797661e-06, + "loss": 0.2938, + "step": 11735 + }, + { + "epoch": 0.5497728017988476, + "grad_norm": 0.5882094048781974, + "learning_rate": 4.254069456835104e-06, + "loss": 0.2764, + "step": 11736 + }, + { + "epoch": 0.5498196467887759, + "grad_norm": 0.5808183791907818, + "learning_rate": 4.253934317778318e-06, + "loss": 0.3002, + "step": 11737 + }, + { + "epoch": 0.5498664917787043, + "grad_norm": 0.5813258353928653, + "learning_rate": 4.253799168628081e-06, + "loss": 0.2693, + "step": 11738 + }, + { + "epoch": 0.5499133367686326, + "grad_norm": 0.6508312904854089, + "learning_rate": 4.253664009385171e-06, + "loss": 0.3028, + "step": 11739 + }, + { + "epoch": 0.5499601817585609, + "grad_norm": 0.6062937619126075, + "learning_rate": 4.253528840050365e-06, + "loss": 0.2882, + "step": 11740 + }, + { + "epoch": 0.5500070267484892, + "grad_norm": 0.612967220535609, + "learning_rate": 4.25339366062444e-06, + "loss": 0.2853, + "step": 11741 + }, + { + "epoch": 0.5500538717384176, + "grad_norm": 0.5845423902267031, + "learning_rate": 4.253258471108177e-06, + "loss": 0.292, + "step": 11742 + }, + { + "epoch": 0.5501007167283459, + "grad_norm": 0.641285451643179, + "learning_rate": 4.253123271502352e-06, + "loss": 0.2986, + "step": 11743 + }, + { + "epoch": 0.5501475617182743, + "grad_norm": 0.5915961482166875, + "learning_rate": 4.252988061807742e-06, + "loss": 0.2952, + "step": 11744 + }, + { + "epoch": 0.5501944067082025, + "grad_norm": 0.5905202558756267, + "learning_rate": 4.2528528420251275e-06, + "loss": 0.2852, + "step": 11745 + }, + { + "epoch": 0.5502412516981309, + "grad_norm": 0.5889987992461758, + "learning_rate": 4.252717612155285e-06, + "loss": 0.272, + "step": 11746 + }, + { + "epoch": 0.5502880966880592, + "grad_norm": 0.6075589319179485, + "learning_rate": 4.252582372198993e-06, + "loss": 0.3, + "step": 11747 + }, + { + "epoch": 0.5503349416779876, + "grad_norm": 0.5909466745915504, + "learning_rate": 4.25244712215703e-06, + "loss": 0.2764, + "step": 11748 + }, + { + "epoch": 0.5503817866679158, + "grad_norm": 0.6052358785310732, + "learning_rate": 4.252311862030175e-06, + "loss": 0.2772, + "step": 11749 + }, + { + "epoch": 0.5504286316578442, + "grad_norm": 0.5958512556015368, + "learning_rate": 4.2521765918192046e-06, + "loss": 0.2747, + "step": 11750 + }, + { + "epoch": 0.5504754766477725, + "grad_norm": 0.592303449884679, + "learning_rate": 4.252041311524899e-06, + "loss": 0.2829, + "step": 11751 + }, + { + "epoch": 0.5505223216377009, + "grad_norm": 0.6171470932616911, + "learning_rate": 4.2519060211480356e-06, + "loss": 0.3009, + "step": 11752 + }, + { + "epoch": 0.5505691666276292, + "grad_norm": 0.6060070465885754, + "learning_rate": 4.2517707206893935e-06, + "loss": 0.2786, + "step": 11753 + }, + { + "epoch": 0.5506160116175575, + "grad_norm": 0.557552928504734, + "learning_rate": 4.251635410149752e-06, + "loss": 0.2786, + "step": 11754 + }, + { + "epoch": 0.5506628566074858, + "grad_norm": 0.6219817695604295, + "learning_rate": 4.251500089529887e-06, + "loss": 0.3027, + "step": 11755 + }, + { + "epoch": 0.5507097015974142, + "grad_norm": 0.6165941671208828, + "learning_rate": 4.251364758830581e-06, + "loss": 0.2768, + "step": 11756 + }, + { + "epoch": 0.5507565465873425, + "grad_norm": 0.5885079316541417, + "learning_rate": 4.251229418052611e-06, + "loss": 0.2616, + "step": 11757 + }, + { + "epoch": 0.5508033915772708, + "grad_norm": 0.6135189278330992, + "learning_rate": 4.251094067196754e-06, + "loss": 0.2742, + "step": 11758 + }, + { + "epoch": 0.5508502365671991, + "grad_norm": 0.6696294471329368, + "learning_rate": 4.250958706263793e-06, + "loss": 0.3017, + "step": 11759 + }, + { + "epoch": 0.5508970815571275, + "grad_norm": 0.6190074964273172, + "learning_rate": 4.250823335254503e-06, + "loss": 0.2694, + "step": 11760 + }, + { + "epoch": 0.5509439265470558, + "grad_norm": 0.6078691482177216, + "learning_rate": 4.250687954169666e-06, + "loss": 0.2876, + "step": 11761 + }, + { + "epoch": 0.5509907715369842, + "grad_norm": 0.6153220464442731, + "learning_rate": 4.250552563010059e-06, + "loss": 0.2859, + "step": 11762 + }, + { + "epoch": 0.5510376165269124, + "grad_norm": 0.5727735996484913, + "learning_rate": 4.250417161776462e-06, + "loss": 0.284, + "step": 11763 + }, + { + "epoch": 0.5510844615168408, + "grad_norm": 0.5719086127289769, + "learning_rate": 4.250281750469655e-06, + "loss": 0.2706, + "step": 11764 + }, + { + "epoch": 0.5511313065067691, + "grad_norm": 0.6056102767901219, + "learning_rate": 4.250146329090416e-06, + "loss": 0.2986, + "step": 11765 + }, + { + "epoch": 0.5511781514966975, + "grad_norm": 0.620913909539941, + "learning_rate": 4.250010897639525e-06, + "loss": 0.2869, + "step": 11766 + }, + { + "epoch": 0.5512249964866257, + "grad_norm": 0.6040457708090654, + "learning_rate": 4.24987545611776e-06, + "loss": 0.2943, + "step": 11767 + }, + { + "epoch": 0.5512718414765541, + "grad_norm": 0.56816259285063, + "learning_rate": 4.249740004525903e-06, + "loss": 0.2924, + "step": 11768 + }, + { + "epoch": 0.5513186864664824, + "grad_norm": 0.6200778758755036, + "learning_rate": 4.249604542864731e-06, + "loss": 0.2975, + "step": 11769 + }, + { + "epoch": 0.5513655314564108, + "grad_norm": 0.6399841310629862, + "learning_rate": 4.249469071135025e-06, + "loss": 0.2813, + "step": 11770 + }, + { + "epoch": 0.5514123764463391, + "grad_norm": 0.5780305676937828, + "learning_rate": 4.2493335893375645e-06, + "loss": 0.2805, + "step": 11771 + }, + { + "epoch": 0.5514592214362674, + "grad_norm": 0.5836427559590776, + "learning_rate": 4.249198097473129e-06, + "loss": 0.2922, + "step": 11772 + }, + { + "epoch": 0.5515060664261957, + "grad_norm": 0.6532395960597535, + "learning_rate": 4.249062595542497e-06, + "loss": 0.2935, + "step": 11773 + }, + { + "epoch": 0.5515529114161241, + "grad_norm": 0.5667896690467924, + "learning_rate": 4.24892708354645e-06, + "loss": 0.2799, + "step": 11774 + }, + { + "epoch": 0.5515997564060524, + "grad_norm": 0.6037572483720367, + "learning_rate": 4.2487915614857675e-06, + "loss": 0.2613, + "step": 11775 + }, + { + "epoch": 0.5516466013959807, + "grad_norm": 0.6226284135585737, + "learning_rate": 4.248656029361229e-06, + "loss": 0.3103, + "step": 11776 + }, + { + "epoch": 0.551693446385909, + "grad_norm": 0.6576998335509239, + "learning_rate": 4.248520487173615e-06, + "loss": 0.2877, + "step": 11777 + }, + { + "epoch": 0.5517402913758374, + "grad_norm": 0.588993568956143, + "learning_rate": 4.248384934923704e-06, + "loss": 0.2645, + "step": 11778 + }, + { + "epoch": 0.5517871363657657, + "grad_norm": 0.624701201641651, + "learning_rate": 4.248249372612278e-06, + "loss": 0.2931, + "step": 11779 + }, + { + "epoch": 0.5518339813556941, + "grad_norm": 0.571162827823553, + "learning_rate": 4.248113800240115e-06, + "loss": 0.2675, + "step": 11780 + }, + { + "epoch": 0.5518808263456223, + "grad_norm": 0.6091362664051984, + "learning_rate": 4.247978217807999e-06, + "loss": 0.2741, + "step": 11781 + }, + { + "epoch": 0.5519276713355507, + "grad_norm": 0.6010341953302663, + "learning_rate": 4.247842625316706e-06, + "loss": 0.2923, + "step": 11782 + }, + { + "epoch": 0.551974516325479, + "grad_norm": 0.5724759836330141, + "learning_rate": 4.247707022767017e-06, + "loss": 0.28, + "step": 11783 + }, + { + "epoch": 0.5520213613154074, + "grad_norm": 0.600648676904622, + "learning_rate": 4.247571410159716e-06, + "loss": 0.2965, + "step": 11784 + }, + { + "epoch": 0.5520682063053356, + "grad_norm": 0.5601146603308288, + "learning_rate": 4.247435787495578e-06, + "loss": 0.2862, + "step": 11785 + }, + { + "epoch": 0.552115051295264, + "grad_norm": 0.6008096529623764, + "learning_rate": 4.247300154775388e-06, + "loss": 0.3066, + "step": 11786 + }, + { + "epoch": 0.5521618962851923, + "grad_norm": 0.5181780530917934, + "learning_rate": 4.2471645119999235e-06, + "loss": 0.2547, + "step": 11787 + }, + { + "epoch": 0.5522087412751207, + "grad_norm": 0.6600236501383026, + "learning_rate": 4.247028859169967e-06, + "loss": 0.2974, + "step": 11788 + }, + { + "epoch": 0.552255586265049, + "grad_norm": 0.6259914183176974, + "learning_rate": 4.246893196286299e-06, + "loss": 0.3018, + "step": 11789 + }, + { + "epoch": 0.5523024312549772, + "grad_norm": 0.5864564535125284, + "learning_rate": 4.2467575233497e-06, + "loss": 0.2736, + "step": 11790 + }, + { + "epoch": 0.5523492762449056, + "grad_norm": 0.5483634774965214, + "learning_rate": 4.246621840360949e-06, + "loss": 0.2773, + "step": 11791 + }, + { + "epoch": 0.552396121234834, + "grad_norm": 0.5994606267025662, + "learning_rate": 4.2464861473208296e-06, + "loss": 0.279, + "step": 11792 + }, + { + "epoch": 0.5524429662247623, + "grad_norm": 0.6054001360675931, + "learning_rate": 4.246350444230121e-06, + "loss": 0.3121, + "step": 11793 + }, + { + "epoch": 0.5524898112146905, + "grad_norm": 0.5843353625968613, + "learning_rate": 4.2462147310896045e-06, + "loss": 0.284, + "step": 11794 + }, + { + "epoch": 0.5525366562046189, + "grad_norm": 0.5836027853130115, + "learning_rate": 4.246079007900061e-06, + "loss": 0.2886, + "step": 11795 + }, + { + "epoch": 0.5525835011945472, + "grad_norm": 0.6272019711082212, + "learning_rate": 4.245943274662272e-06, + "loss": 0.2926, + "step": 11796 + }, + { + "epoch": 0.5526303461844756, + "grad_norm": 0.5355178718903468, + "learning_rate": 4.245807531377017e-06, + "loss": 0.2774, + "step": 11797 + }, + { + "epoch": 0.552677191174404, + "grad_norm": 0.6023823699149768, + "learning_rate": 4.2456717780450805e-06, + "loss": 0.29, + "step": 11798 + }, + { + "epoch": 0.5527240361643322, + "grad_norm": 0.5850043195148666, + "learning_rate": 4.245536014667241e-06, + "loss": 0.283, + "step": 11799 + }, + { + "epoch": 0.5527708811542605, + "grad_norm": 0.6361631846871126, + "learning_rate": 4.24540024124428e-06, + "loss": 0.3026, + "step": 11800 + }, + { + "epoch": 0.5528177261441889, + "grad_norm": 0.6974080004164418, + "learning_rate": 4.245264457776978e-06, + "loss": 0.2911, + "step": 11801 + }, + { + "epoch": 0.5528645711341172, + "grad_norm": 0.564059272056094, + "learning_rate": 4.245128664266121e-06, + "loss": 0.2719, + "step": 11802 + }, + { + "epoch": 0.5529114161240455, + "grad_norm": 0.5565920610144767, + "learning_rate": 4.244992860712484e-06, + "loss": 0.2791, + "step": 11803 + }, + { + "epoch": 0.5529582611139738, + "grad_norm": 0.5999622668184771, + "learning_rate": 4.244857047116853e-06, + "loss": 0.2752, + "step": 11804 + }, + { + "epoch": 0.5530051061039022, + "grad_norm": 0.5932771952117023, + "learning_rate": 4.244721223480008e-06, + "loss": 0.2924, + "step": 11805 + }, + { + "epoch": 0.5530519510938305, + "grad_norm": 0.6275284289150753, + "learning_rate": 4.244585389802731e-06, + "loss": 0.2701, + "step": 11806 + }, + { + "epoch": 0.5530987960837589, + "grad_norm": 0.6046393913748757, + "learning_rate": 4.244449546085804e-06, + "loss": 0.2885, + "step": 11807 + }, + { + "epoch": 0.5531456410736871, + "grad_norm": 0.5970674858353576, + "learning_rate": 4.2443136923300086e-06, + "loss": 0.2809, + "step": 11808 + }, + { + "epoch": 0.5531924860636155, + "grad_norm": 0.5634629095169982, + "learning_rate": 4.244177828536125e-06, + "loss": 0.2786, + "step": 11809 + }, + { + "epoch": 0.5532393310535438, + "grad_norm": 0.6125051968776348, + "learning_rate": 4.244041954704937e-06, + "loss": 0.2925, + "step": 11810 + }, + { + "epoch": 0.5532861760434722, + "grad_norm": 0.6047040745561364, + "learning_rate": 4.243906070837226e-06, + "loss": 0.2785, + "step": 11811 + }, + { + "epoch": 0.5533330210334004, + "grad_norm": 0.6087456938192309, + "learning_rate": 4.243770176933774e-06, + "loss": 0.2681, + "step": 11812 + }, + { + "epoch": 0.5533798660233288, + "grad_norm": 0.5984127200612529, + "learning_rate": 4.243634272995363e-06, + "loss": 0.2769, + "step": 11813 + }, + { + "epoch": 0.5534267110132571, + "grad_norm": 0.6255961924378528, + "learning_rate": 4.243498359022775e-06, + "loss": 0.2778, + "step": 11814 + }, + { + "epoch": 0.5534735560031855, + "grad_norm": 0.5769278000525402, + "learning_rate": 4.243362435016791e-06, + "loss": 0.2798, + "step": 11815 + }, + { + "epoch": 0.5535204009931138, + "grad_norm": 0.5714665873429701, + "learning_rate": 4.243226500978196e-06, + "loss": 0.2734, + "step": 11816 + }, + { + "epoch": 0.5535672459830421, + "grad_norm": 0.6306635999205226, + "learning_rate": 4.2430905569077696e-06, + "loss": 0.2938, + "step": 11817 + }, + { + "epoch": 0.5536140909729704, + "grad_norm": 0.6490339607474889, + "learning_rate": 4.242954602806295e-06, + "loss": 0.2918, + "step": 11818 + }, + { + "epoch": 0.5536609359628988, + "grad_norm": 0.6095488674216258, + "learning_rate": 4.242818638674555e-06, + "loss": 0.2875, + "step": 11819 + }, + { + "epoch": 0.5537077809528271, + "grad_norm": 0.6212807660165001, + "learning_rate": 4.242682664513332e-06, + "loss": 0.2738, + "step": 11820 + }, + { + "epoch": 0.5537546259427554, + "grad_norm": 0.5737675918582451, + "learning_rate": 4.242546680323409e-06, + "loss": 0.2608, + "step": 11821 + }, + { + "epoch": 0.5538014709326837, + "grad_norm": 0.6232203401820334, + "learning_rate": 4.242410686105567e-06, + "loss": 0.2797, + "step": 11822 + }, + { + "epoch": 0.5538483159226121, + "grad_norm": 0.5869916615683644, + "learning_rate": 4.242274681860589e-06, + "loss": 0.2756, + "step": 11823 + }, + { + "epoch": 0.5538951609125404, + "grad_norm": 0.6352482033028841, + "learning_rate": 4.242138667589258e-06, + "loss": 0.2915, + "step": 11824 + }, + { + "epoch": 0.5539420059024688, + "grad_norm": 0.6518840173061804, + "learning_rate": 4.242002643292357e-06, + "loss": 0.3071, + "step": 11825 + }, + { + "epoch": 0.553988850892397, + "grad_norm": 0.5813735336988625, + "learning_rate": 4.241866608970671e-06, + "loss": 0.268, + "step": 11826 + }, + { + "epoch": 0.5540356958823254, + "grad_norm": 0.6200251547440749, + "learning_rate": 4.241730564624978e-06, + "loss": 0.2748, + "step": 11827 + }, + { + "epoch": 0.5540825408722537, + "grad_norm": 0.6157052749387547, + "learning_rate": 4.241594510256064e-06, + "loss": 0.29, + "step": 11828 + }, + { + "epoch": 0.5541293858621821, + "grad_norm": 0.6067393285521747, + "learning_rate": 4.241458445864711e-06, + "loss": 0.2951, + "step": 11829 + }, + { + "epoch": 0.5541762308521103, + "grad_norm": 0.5907472263205231, + "learning_rate": 4.241322371451703e-06, + "loss": 0.2756, + "step": 11830 + }, + { + "epoch": 0.5542230758420387, + "grad_norm": 0.5637197742857973, + "learning_rate": 4.241186287017821e-06, + "loss": 0.2703, + "step": 11831 + }, + { + "epoch": 0.554269920831967, + "grad_norm": 0.5966623957915175, + "learning_rate": 4.241050192563851e-06, + "loss": 0.283, + "step": 11832 + }, + { + "epoch": 0.5543167658218954, + "grad_norm": 0.5896727781451361, + "learning_rate": 4.240914088090574e-06, + "loss": 0.2852, + "step": 11833 + }, + { + "epoch": 0.5543636108118237, + "grad_norm": 0.6090647084167041, + "learning_rate": 4.240777973598774e-06, + "loss": 0.2863, + "step": 11834 + }, + { + "epoch": 0.554410455801752, + "grad_norm": 0.6231513260961341, + "learning_rate": 4.240641849089235e-06, + "loss": 0.2743, + "step": 11835 + }, + { + "epoch": 0.5544573007916803, + "grad_norm": 0.617245386289185, + "learning_rate": 4.2405057145627395e-06, + "loss": 0.2767, + "step": 11836 + }, + { + "epoch": 0.5545041457816087, + "grad_norm": 0.6775694275402887, + "learning_rate": 4.240369570020071e-06, + "loss": 0.3304, + "step": 11837 + }, + { + "epoch": 0.554550990771537, + "grad_norm": 0.6118023178336666, + "learning_rate": 4.240233415462013e-06, + "loss": 0.3017, + "step": 11838 + }, + { + "epoch": 0.5545978357614653, + "grad_norm": 0.6013716508835897, + "learning_rate": 4.2400972508893484e-06, + "loss": 0.2853, + "step": 11839 + }, + { + "epoch": 0.5546446807513936, + "grad_norm": 0.5882764569600843, + "learning_rate": 4.239961076302862e-06, + "loss": 0.3047, + "step": 11840 + }, + { + "epoch": 0.554691525741322, + "grad_norm": 0.6098336286070313, + "learning_rate": 4.239824891703337e-06, + "loss": 0.2984, + "step": 11841 + }, + { + "epoch": 0.5547383707312503, + "grad_norm": 0.5899339472487407, + "learning_rate": 4.239688697091557e-06, + "loss": 0.2941, + "step": 11842 + }, + { + "epoch": 0.5547852157211787, + "grad_norm": 0.5957124533906044, + "learning_rate": 4.239552492468305e-06, + "loss": 0.2761, + "step": 11843 + }, + { + "epoch": 0.5548320607111069, + "grad_norm": 0.5923834181382134, + "learning_rate": 4.2394162778343665e-06, + "loss": 0.2676, + "step": 11844 + }, + { + "epoch": 0.5548789057010353, + "grad_norm": 0.5744958533414678, + "learning_rate": 4.2392800531905246e-06, + "loss": 0.2801, + "step": 11845 + }, + { + "epoch": 0.5549257506909636, + "grad_norm": 0.5833510323221107, + "learning_rate": 4.239143818537564e-06, + "loss": 0.2798, + "step": 11846 + }, + { + "epoch": 0.554972595680892, + "grad_norm": 0.5732038373399011, + "learning_rate": 4.2390075738762656e-06, + "loss": 0.282, + "step": 11847 + }, + { + "epoch": 0.5550194406708202, + "grad_norm": 0.6454729069450401, + "learning_rate": 4.238871319207417e-06, + "loss": 0.2997, + "step": 11848 + }, + { + "epoch": 0.5550662856607486, + "grad_norm": 0.6011018350513138, + "learning_rate": 4.238735054531801e-06, + "loss": 0.267, + "step": 11849 + }, + { + "epoch": 0.5551131306506769, + "grad_norm": 0.5303059404491207, + "learning_rate": 4.238598779850201e-06, + "loss": 0.2601, + "step": 11850 + }, + { + "epoch": 0.5551599756406053, + "grad_norm": 0.5448569495564154, + "learning_rate": 4.238462495163404e-06, + "loss": 0.2634, + "step": 11851 + }, + { + "epoch": 0.5552068206305336, + "grad_norm": 0.6197126586651076, + "learning_rate": 4.23832620047219e-06, + "loss": 0.2823, + "step": 11852 + }, + { + "epoch": 0.5552536656204619, + "grad_norm": 0.5659396130211484, + "learning_rate": 4.238189895777347e-06, + "loss": 0.2665, + "step": 11853 + }, + { + "epoch": 0.5553005106103902, + "grad_norm": 0.584763944931214, + "learning_rate": 4.238053581079658e-06, + "loss": 0.2775, + "step": 11854 + }, + { + "epoch": 0.5553473556003186, + "grad_norm": 0.5452569332175357, + "learning_rate": 4.237917256379907e-06, + "loss": 0.2597, + "step": 11855 + }, + { + "epoch": 0.5553942005902469, + "grad_norm": 0.6020759840433922, + "learning_rate": 4.237780921678878e-06, + "loss": 0.2995, + "step": 11856 + }, + { + "epoch": 0.5554410455801752, + "grad_norm": 0.5666215402572602, + "learning_rate": 4.237644576977358e-06, + "loss": 0.2794, + "step": 11857 + }, + { + "epoch": 0.5554878905701035, + "grad_norm": 0.6218922607983042, + "learning_rate": 4.23750822227613e-06, + "loss": 0.2887, + "step": 11858 + }, + { + "epoch": 0.5555347355600319, + "grad_norm": 0.6214767297191897, + "learning_rate": 4.237371857575979e-06, + "loss": 0.3033, + "step": 11859 + }, + { + "epoch": 0.5555815805499602, + "grad_norm": 0.5952050041999836, + "learning_rate": 4.237235482877689e-06, + "loss": 0.3011, + "step": 11860 + }, + { + "epoch": 0.5556284255398886, + "grad_norm": 0.5908992269528512, + "learning_rate": 4.2370990981820456e-06, + "loss": 0.289, + "step": 11861 + }, + { + "epoch": 0.5556752705298168, + "grad_norm": 0.5818476884701473, + "learning_rate": 4.236962703489834e-06, + "loss": 0.2963, + "step": 11862 + }, + { + "epoch": 0.5557221155197452, + "grad_norm": 0.5442320252770414, + "learning_rate": 4.236826298801838e-06, + "loss": 0.2776, + "step": 11863 + }, + { + "epoch": 0.5557689605096735, + "grad_norm": 0.6051555927699375, + "learning_rate": 4.236689884118844e-06, + "loss": 0.2754, + "step": 11864 + }, + { + "epoch": 0.5558158054996019, + "grad_norm": 0.6404687814039729, + "learning_rate": 4.2365534594416354e-06, + "loss": 0.2981, + "step": 11865 + }, + { + "epoch": 0.5558626504895301, + "grad_norm": 0.6077079790044163, + "learning_rate": 4.236417024770999e-06, + "loss": 0.2872, + "step": 11866 + }, + { + "epoch": 0.5559094954794584, + "grad_norm": 0.5599597472254959, + "learning_rate": 4.236280580107718e-06, + "loss": 0.2736, + "step": 11867 + }, + { + "epoch": 0.5559563404693868, + "grad_norm": 0.6656348570099866, + "learning_rate": 4.236144125452579e-06, + "loss": 0.2905, + "step": 11868 + }, + { + "epoch": 0.5560031854593152, + "grad_norm": 0.5563384013665786, + "learning_rate": 4.236007660806367e-06, + "loss": 0.277, + "step": 11869 + }, + { + "epoch": 0.5560500304492435, + "grad_norm": 0.5730636520093183, + "learning_rate": 4.235871186169867e-06, + "loss": 0.273, + "step": 11870 + }, + { + "epoch": 0.5560968754391717, + "grad_norm": 0.5843537082888417, + "learning_rate": 4.235734701543866e-06, + "loss": 0.2924, + "step": 11871 + }, + { + "epoch": 0.5561437204291001, + "grad_norm": 0.5612740841745125, + "learning_rate": 4.235598206929147e-06, + "loss": 0.2923, + "step": 11872 + }, + { + "epoch": 0.5561905654190284, + "grad_norm": 0.5796424073811602, + "learning_rate": 4.235461702326496e-06, + "loss": 0.3023, + "step": 11873 + }, + { + "epoch": 0.5562374104089568, + "grad_norm": 0.5727949316187883, + "learning_rate": 4.2353251877367e-06, + "loss": 0.2788, + "step": 11874 + }, + { + "epoch": 0.556284255398885, + "grad_norm": 0.6446261242977457, + "learning_rate": 4.235188663160543e-06, + "loss": 0.2895, + "step": 11875 + }, + { + "epoch": 0.5563311003888134, + "grad_norm": 0.5810142125667082, + "learning_rate": 4.235052128598812e-06, + "loss": 0.3053, + "step": 11876 + }, + { + "epoch": 0.5563779453787417, + "grad_norm": 0.5542947834061638, + "learning_rate": 4.234915584052291e-06, + "loss": 0.2657, + "step": 11877 + }, + { + "epoch": 0.5564247903686701, + "grad_norm": 0.6101616498173441, + "learning_rate": 4.234779029521769e-06, + "loss": 0.2987, + "step": 11878 + }, + { + "epoch": 0.5564716353585984, + "grad_norm": 0.6149517968696664, + "learning_rate": 4.234642465008028e-06, + "loss": 0.2858, + "step": 11879 + }, + { + "epoch": 0.5565184803485267, + "grad_norm": 0.552184103845372, + "learning_rate": 4.234505890511855e-06, + "loss": 0.2741, + "step": 11880 + }, + { + "epoch": 0.556565325338455, + "grad_norm": 0.6007937754425978, + "learning_rate": 4.234369306034038e-06, + "loss": 0.3015, + "step": 11881 + }, + { + "epoch": 0.5566121703283834, + "grad_norm": 0.5971799428798477, + "learning_rate": 4.234232711575361e-06, + "loss": 0.285, + "step": 11882 + }, + { + "epoch": 0.5566590153183117, + "grad_norm": 0.6518969190463558, + "learning_rate": 4.2340961071366105e-06, + "loss": 0.2894, + "step": 11883 + }, + { + "epoch": 0.55670586030824, + "grad_norm": 0.6053536793136195, + "learning_rate": 4.233959492718573e-06, + "loss": 0.2702, + "step": 11884 + }, + { + "epoch": 0.5567527052981683, + "grad_norm": 0.6170268031170519, + "learning_rate": 4.233822868322034e-06, + "loss": 0.2907, + "step": 11885 + }, + { + "epoch": 0.5567995502880967, + "grad_norm": 0.593822283549883, + "learning_rate": 4.233686233947781e-06, + "loss": 0.284, + "step": 11886 + }, + { + "epoch": 0.556846395278025, + "grad_norm": 0.5769392576515358, + "learning_rate": 4.233549589596599e-06, + "loss": 0.2814, + "step": 11887 + }, + { + "epoch": 0.5568932402679534, + "grad_norm": 0.6180446243638209, + "learning_rate": 4.233412935269274e-06, + "loss": 0.286, + "step": 11888 + }, + { + "epoch": 0.5569400852578816, + "grad_norm": 0.7128756741566508, + "learning_rate": 4.2332762709665945e-06, + "loss": 0.2884, + "step": 11889 + }, + { + "epoch": 0.55698693024781, + "grad_norm": 0.6172152976192902, + "learning_rate": 4.233139596689345e-06, + "loss": 0.3007, + "step": 11890 + }, + { + "epoch": 0.5570337752377383, + "grad_norm": 0.5626913836390857, + "learning_rate": 4.233002912438313e-06, + "loss": 0.2579, + "step": 11891 + }, + { + "epoch": 0.5570806202276667, + "grad_norm": 0.5559901619947264, + "learning_rate": 4.232866218214286e-06, + "loss": 0.2699, + "step": 11892 + }, + { + "epoch": 0.5571274652175949, + "grad_norm": 0.6398087060066285, + "learning_rate": 4.232729514018047e-06, + "loss": 0.2937, + "step": 11893 + }, + { + "epoch": 0.5571743102075233, + "grad_norm": 0.6720125617490268, + "learning_rate": 4.232592799850387e-06, + "loss": 0.3067, + "step": 11894 + }, + { + "epoch": 0.5572211551974516, + "grad_norm": 0.6141517130340443, + "learning_rate": 4.23245607571209e-06, + "loss": 0.2762, + "step": 11895 + }, + { + "epoch": 0.55726800018738, + "grad_norm": 0.6307361408101106, + "learning_rate": 4.2323193416039445e-06, + "loss": 0.3015, + "step": 11896 + }, + { + "epoch": 0.5573148451773083, + "grad_norm": 0.5762740609783026, + "learning_rate": 4.232182597526736e-06, + "loss": 0.2792, + "step": 11897 + }, + { + "epoch": 0.5573616901672366, + "grad_norm": 0.5613491854072882, + "learning_rate": 4.232045843481252e-06, + "loss": 0.2613, + "step": 11898 + }, + { + "epoch": 0.5574085351571649, + "grad_norm": 0.5955385423510069, + "learning_rate": 4.231909079468279e-06, + "loss": 0.2947, + "step": 11899 + }, + { + "epoch": 0.5574553801470933, + "grad_norm": 0.6997260402080057, + "learning_rate": 4.231772305488606e-06, + "loss": 0.3164, + "step": 11900 + }, + { + "epoch": 0.5575022251370216, + "grad_norm": 0.6366000362849479, + "learning_rate": 4.2316355215430174e-06, + "loss": 0.2789, + "step": 11901 + }, + { + "epoch": 0.5575490701269499, + "grad_norm": 0.6078872412508052, + "learning_rate": 4.231498727632302e-06, + "loss": 0.2833, + "step": 11902 + }, + { + "epoch": 0.5575959151168782, + "grad_norm": 0.6049540885986665, + "learning_rate": 4.2313619237572465e-06, + "loss": 0.2792, + "step": 11903 + }, + { + "epoch": 0.5576427601068066, + "grad_norm": 0.5983697309013865, + "learning_rate": 4.231225109918639e-06, + "loss": 0.2907, + "step": 11904 + }, + { + "epoch": 0.5576896050967349, + "grad_norm": 0.5779116433950667, + "learning_rate": 4.231088286117265e-06, + "loss": 0.2699, + "step": 11905 + }, + { + "epoch": 0.5577364500866633, + "grad_norm": 0.5818237132990997, + "learning_rate": 4.230951452353914e-06, + "loss": 0.2892, + "step": 11906 + }, + { + "epoch": 0.5577832950765915, + "grad_norm": 0.59462160776867, + "learning_rate": 4.2308146086293724e-06, + "loss": 0.2889, + "step": 11907 + }, + { + "epoch": 0.5578301400665199, + "grad_norm": 0.5711443178896521, + "learning_rate": 4.230677754944427e-06, + "loss": 0.2652, + "step": 11908 + }, + { + "epoch": 0.5578769850564482, + "grad_norm": 0.554338268219613, + "learning_rate": 4.230540891299867e-06, + "loss": 0.2871, + "step": 11909 + }, + { + "epoch": 0.5579238300463766, + "grad_norm": 0.6387357101681456, + "learning_rate": 4.230404017696479e-06, + "loss": 0.3027, + "step": 11910 + }, + { + "epoch": 0.5579706750363048, + "grad_norm": 0.6310467171737274, + "learning_rate": 4.23026713413505e-06, + "loss": 0.3081, + "step": 11911 + }, + { + "epoch": 0.5580175200262332, + "grad_norm": 0.5934151163653232, + "learning_rate": 4.230130240616369e-06, + "loss": 0.2874, + "step": 11912 + }, + { + "epoch": 0.5580643650161615, + "grad_norm": 0.609967133669874, + "learning_rate": 4.2299933371412224e-06, + "loss": 0.2829, + "step": 11913 + }, + { + "epoch": 0.5581112100060899, + "grad_norm": 0.5642491512513048, + "learning_rate": 4.2298564237104e-06, + "loss": 0.2641, + "step": 11914 + }, + { + "epoch": 0.5581580549960182, + "grad_norm": 0.5995452188102686, + "learning_rate": 4.229719500324689e-06, + "loss": 0.3181, + "step": 11915 + }, + { + "epoch": 0.5582048999859465, + "grad_norm": 0.6152505399715511, + "learning_rate": 4.229582566984876e-06, + "loss": 0.3032, + "step": 11916 + }, + { + "epoch": 0.5582517449758748, + "grad_norm": 0.6246044961336177, + "learning_rate": 4.229445623691751e-06, + "loss": 0.2937, + "step": 11917 + }, + { + "epoch": 0.5582985899658032, + "grad_norm": 0.6459942244894948, + "learning_rate": 4.229308670446101e-06, + "loss": 0.2776, + "step": 11918 + }, + { + "epoch": 0.5583454349557315, + "grad_norm": 0.7229316391511911, + "learning_rate": 4.2291717072487135e-06, + "loss": 0.2818, + "step": 11919 + }, + { + "epoch": 0.5583922799456598, + "grad_norm": 0.633935001701916, + "learning_rate": 4.229034734100378e-06, + "loss": 0.3135, + "step": 11920 + }, + { + "epoch": 0.5584391249355881, + "grad_norm": 0.6230699546467635, + "learning_rate": 4.228897751001882e-06, + "loss": 0.2918, + "step": 11921 + }, + { + "epoch": 0.5584859699255165, + "grad_norm": 0.5701113555027454, + "learning_rate": 4.228760757954014e-06, + "loss": 0.2912, + "step": 11922 + }, + { + "epoch": 0.5585328149154448, + "grad_norm": 0.6574145085633566, + "learning_rate": 4.228623754957562e-06, + "loss": 0.3109, + "step": 11923 + }, + { + "epoch": 0.5585796599053732, + "grad_norm": 0.6255858638766344, + "learning_rate": 4.228486742013316e-06, + "loss": 0.2955, + "step": 11924 + }, + { + "epoch": 0.5586265048953014, + "grad_norm": 0.5753747467692971, + "learning_rate": 4.228349719122062e-06, + "loss": 0.254, + "step": 11925 + }, + { + "epoch": 0.5586733498852298, + "grad_norm": 0.6235507449425839, + "learning_rate": 4.2282126862845905e-06, + "loss": 0.2956, + "step": 11926 + }, + { + "epoch": 0.5587201948751581, + "grad_norm": 0.617513647934462, + "learning_rate": 4.228075643501689e-06, + "loss": 0.2757, + "step": 11927 + }, + { + "epoch": 0.5587670398650865, + "grad_norm": 0.642010690157537, + "learning_rate": 4.227938590774146e-06, + "loss": 0.307, + "step": 11928 + }, + { + "epoch": 0.5588138848550147, + "grad_norm": 0.5847883486152392, + "learning_rate": 4.227801528102751e-06, + "loss": 0.3168, + "step": 11929 + }, + { + "epoch": 0.5588607298449431, + "grad_norm": 0.593658930357267, + "learning_rate": 4.227664455488293e-06, + "loss": 0.3013, + "step": 11930 + }, + { + "epoch": 0.5589075748348714, + "grad_norm": 0.5972001450220603, + "learning_rate": 4.2275273729315604e-06, + "loss": 0.2811, + "step": 11931 + }, + { + "epoch": 0.5589544198247998, + "grad_norm": 0.668366827982659, + "learning_rate": 4.227390280433341e-06, + "loss": 0.3036, + "step": 11932 + }, + { + "epoch": 0.5590012648147281, + "grad_norm": 0.7011445335976491, + "learning_rate": 4.227253177994426e-06, + "loss": 0.3386, + "step": 11933 + }, + { + "epoch": 0.5590481098046564, + "grad_norm": 0.6523018935124035, + "learning_rate": 4.227116065615602e-06, + "loss": 0.2924, + "step": 11934 + }, + { + "epoch": 0.5590949547945847, + "grad_norm": 0.5686669331822641, + "learning_rate": 4.226978943297659e-06, + "loss": 0.2595, + "step": 11935 + }, + { + "epoch": 0.5591417997845131, + "grad_norm": 0.5970614855955912, + "learning_rate": 4.226841811041387e-06, + "loss": 0.2676, + "step": 11936 + }, + { + "epoch": 0.5591886447744414, + "grad_norm": 0.5626296315135386, + "learning_rate": 4.226704668847574e-06, + "loss": 0.2836, + "step": 11937 + }, + { + "epoch": 0.5592354897643697, + "grad_norm": 0.6080493461880928, + "learning_rate": 4.22656751671701e-06, + "loss": 0.2877, + "step": 11938 + }, + { + "epoch": 0.559282334754298, + "grad_norm": 0.6397511820299551, + "learning_rate": 4.226430354650484e-06, + "loss": 0.2973, + "step": 11939 + }, + { + "epoch": 0.5593291797442264, + "grad_norm": 0.6134845971063406, + "learning_rate": 4.226293182648784e-06, + "loss": 0.2828, + "step": 11940 + }, + { + "epoch": 0.5593760247341547, + "grad_norm": 0.5972515201664699, + "learning_rate": 4.226156000712702e-06, + "loss": 0.2959, + "step": 11941 + }, + { + "epoch": 0.5594228697240831, + "grad_norm": 0.5881076595184361, + "learning_rate": 4.226018808843025e-06, + "loss": 0.2843, + "step": 11942 + }, + { + "epoch": 0.5594697147140113, + "grad_norm": 0.6103456021427123, + "learning_rate": 4.2258816070405444e-06, + "loss": 0.2907, + "step": 11943 + }, + { + "epoch": 0.5595165597039397, + "grad_norm": 0.6341326435186856, + "learning_rate": 4.2257443953060485e-06, + "loss": 0.2739, + "step": 11944 + }, + { + "epoch": 0.559563404693868, + "grad_norm": 0.6273866919652814, + "learning_rate": 4.2256071736403284e-06, + "loss": 0.2872, + "step": 11945 + }, + { + "epoch": 0.5596102496837964, + "grad_norm": 0.6401646796462434, + "learning_rate": 4.225469942044171e-06, + "loss": 0.2797, + "step": 11946 + }, + { + "epoch": 0.5596570946737246, + "grad_norm": 0.6024969873716852, + "learning_rate": 4.2253327005183685e-06, + "loss": 0.3039, + "step": 11947 + }, + { + "epoch": 0.559703939663653, + "grad_norm": 0.6077746604406404, + "learning_rate": 4.22519544906371e-06, + "loss": 0.2934, + "step": 11948 + }, + { + "epoch": 0.5597507846535813, + "grad_norm": 0.598254464668365, + "learning_rate": 4.225058187680986e-06, + "loss": 0.2984, + "step": 11949 + }, + { + "epoch": 0.5597976296435097, + "grad_norm": 0.5706401696968798, + "learning_rate": 4.224920916370984e-06, + "loss": 0.2811, + "step": 11950 + }, + { + "epoch": 0.559844474633438, + "grad_norm": 0.6179322539611192, + "learning_rate": 4.224783635134498e-06, + "loss": 0.3033, + "step": 11951 + }, + { + "epoch": 0.5598913196233662, + "grad_norm": 0.6199838900924823, + "learning_rate": 4.224646343972314e-06, + "loss": 0.2952, + "step": 11952 + }, + { + "epoch": 0.5599381646132946, + "grad_norm": 0.609437082082529, + "learning_rate": 4.224509042885224e-06, + "loss": 0.2846, + "step": 11953 + }, + { + "epoch": 0.559985009603223, + "grad_norm": 0.5786079296191826, + "learning_rate": 4.224371731874018e-06, + "loss": 0.3055, + "step": 11954 + }, + { + "epoch": 0.5600318545931513, + "grad_norm": 0.5610781947168174, + "learning_rate": 4.224234410939487e-06, + "loss": 0.2922, + "step": 11955 + }, + { + "epoch": 0.5600786995830795, + "grad_norm": 0.5934409996204052, + "learning_rate": 4.224097080082419e-06, + "loss": 0.2776, + "step": 11956 + }, + { + "epoch": 0.5601255445730079, + "grad_norm": 0.5833921558604808, + "learning_rate": 4.2239597393036066e-06, + "loss": 0.2915, + "step": 11957 + }, + { + "epoch": 0.5601723895629362, + "grad_norm": 0.5973170584376389, + "learning_rate": 4.223822388603838e-06, + "loss": 0.2692, + "step": 11958 + }, + { + "epoch": 0.5602192345528646, + "grad_norm": 0.6069300017266644, + "learning_rate": 4.223685027983906e-06, + "loss": 0.2821, + "step": 11959 + }, + { + "epoch": 0.560266079542793, + "grad_norm": 0.6021627520118875, + "learning_rate": 4.223547657444599e-06, + "loss": 0.297, + "step": 11960 + }, + { + "epoch": 0.5603129245327212, + "grad_norm": 0.57766788359473, + "learning_rate": 4.22341027698671e-06, + "loss": 0.2891, + "step": 11961 + }, + { + "epoch": 0.5603597695226495, + "grad_norm": 0.5412855825725342, + "learning_rate": 4.223272886611026e-06, + "loss": 0.2639, + "step": 11962 + }, + { + "epoch": 0.5604066145125779, + "grad_norm": 0.6531981041387999, + "learning_rate": 4.223135486318341e-06, + "loss": 0.284, + "step": 11963 + }, + { + "epoch": 0.5604534595025062, + "grad_norm": 0.6146815312582291, + "learning_rate": 4.222998076109444e-06, + "loss": 0.3009, + "step": 11964 + }, + { + "epoch": 0.5605003044924345, + "grad_norm": 0.5224972298740419, + "learning_rate": 4.222860655985126e-06, + "loss": 0.2744, + "step": 11965 + }, + { + "epoch": 0.5605471494823628, + "grad_norm": 0.6525463709530108, + "learning_rate": 4.222723225946178e-06, + "loss": 0.3017, + "step": 11966 + }, + { + "epoch": 0.5605939944722912, + "grad_norm": 0.6945171298347609, + "learning_rate": 4.222585785993391e-06, + "loss": 0.2997, + "step": 11967 + }, + { + "epoch": 0.5606408394622195, + "grad_norm": 0.6006273631640078, + "learning_rate": 4.222448336127556e-06, + "loss": 0.2797, + "step": 11968 + }, + { + "epoch": 0.5606876844521479, + "grad_norm": 0.6500840900600944, + "learning_rate": 4.222310876349463e-06, + "loss": 0.2954, + "step": 11969 + }, + { + "epoch": 0.5607345294420761, + "grad_norm": 0.567164596586867, + "learning_rate": 4.222173406659904e-06, + "loss": 0.2935, + "step": 11970 + }, + { + "epoch": 0.5607813744320045, + "grad_norm": 0.5994245844846093, + "learning_rate": 4.22203592705967e-06, + "loss": 0.2905, + "step": 11971 + }, + { + "epoch": 0.5608282194219328, + "grad_norm": 0.6265229673910119, + "learning_rate": 4.221898437549552e-06, + "loss": 0.2967, + "step": 11972 + }, + { + "epoch": 0.5608750644118612, + "grad_norm": 0.5661779091329345, + "learning_rate": 4.221760938130342e-06, + "loss": 0.2667, + "step": 11973 + }, + { + "epoch": 0.5609219094017894, + "grad_norm": 0.6392210108103556, + "learning_rate": 4.22162342880283e-06, + "loss": 0.3008, + "step": 11974 + }, + { + "epoch": 0.5609687543917178, + "grad_norm": 0.6266675179649811, + "learning_rate": 4.2214859095678074e-06, + "loss": 0.2863, + "step": 11975 + }, + { + "epoch": 0.5610155993816461, + "grad_norm": 0.6411461137145228, + "learning_rate": 4.221348380426067e-06, + "loss": 0.3061, + "step": 11976 + }, + { + "epoch": 0.5610624443715745, + "grad_norm": 0.5920166191616966, + "learning_rate": 4.221210841378398e-06, + "loss": 0.2881, + "step": 11977 + }, + { + "epoch": 0.5611092893615028, + "grad_norm": 0.641989323700067, + "learning_rate": 4.2210732924255945e-06, + "loss": 0.3057, + "step": 11978 + }, + { + "epoch": 0.5611561343514311, + "grad_norm": 0.5559755452719346, + "learning_rate": 4.220935733568447e-06, + "loss": 0.2594, + "step": 11979 + }, + { + "epoch": 0.5612029793413594, + "grad_norm": 0.6105402913565718, + "learning_rate": 4.220798164807746e-06, + "loss": 0.2676, + "step": 11980 + }, + { + "epoch": 0.5612498243312878, + "grad_norm": 0.6135333991583743, + "learning_rate": 4.220660586144284e-06, + "loss": 0.2869, + "step": 11981 + }, + { + "epoch": 0.5612966693212161, + "grad_norm": 0.5836143513057199, + "learning_rate": 4.220522997578853e-06, + "loss": 0.2989, + "step": 11982 + }, + { + "epoch": 0.5613435143111444, + "grad_norm": 0.5845103153427172, + "learning_rate": 4.220385399112245e-06, + "loss": 0.29, + "step": 11983 + }, + { + "epoch": 0.5613903593010727, + "grad_norm": 0.5997586819459877, + "learning_rate": 4.220247790745251e-06, + "loss": 0.2947, + "step": 11984 + }, + { + "epoch": 0.5614372042910011, + "grad_norm": 0.5861750581147823, + "learning_rate": 4.220110172478663e-06, + "loss": 0.2976, + "step": 11985 + }, + { + "epoch": 0.5614840492809294, + "grad_norm": 0.5946481436702559, + "learning_rate": 4.219972544313274e-06, + "loss": 0.2824, + "step": 11986 + }, + { + "epoch": 0.5615308942708578, + "grad_norm": 0.6050138720829259, + "learning_rate": 4.219834906249875e-06, + "loss": 0.2797, + "step": 11987 + }, + { + "epoch": 0.561577739260786, + "grad_norm": 0.5687210814001565, + "learning_rate": 4.219697258289258e-06, + "loss": 0.28, + "step": 11988 + }, + { + "epoch": 0.5616245842507144, + "grad_norm": 0.5683110812533422, + "learning_rate": 4.219559600432216e-06, + "loss": 0.2777, + "step": 11989 + }, + { + "epoch": 0.5616714292406427, + "grad_norm": 0.5732518607824266, + "learning_rate": 4.219421932679541e-06, + "loss": 0.2865, + "step": 11990 + }, + { + "epoch": 0.5617182742305711, + "grad_norm": 0.6320656802604289, + "learning_rate": 4.219284255032024e-06, + "loss": 0.2989, + "step": 11991 + }, + { + "epoch": 0.5617651192204993, + "grad_norm": 0.6091686872046504, + "learning_rate": 4.219146567490458e-06, + "loss": 0.281, + "step": 11992 + }, + { + "epoch": 0.5618119642104277, + "grad_norm": 0.5841590329805864, + "learning_rate": 4.219008870055637e-06, + "loss": 0.2773, + "step": 11993 + }, + { + "epoch": 0.561858809200356, + "grad_norm": 0.5803339174708252, + "learning_rate": 4.2188711627283515e-06, + "loss": 0.2934, + "step": 11994 + }, + { + "epoch": 0.5619056541902844, + "grad_norm": 0.5971576293553191, + "learning_rate": 4.218733445509395e-06, + "loss": 0.3023, + "step": 11995 + }, + { + "epoch": 0.5619524991802127, + "grad_norm": 0.577488167880642, + "learning_rate": 4.218595718399559e-06, + "loss": 0.2727, + "step": 11996 + }, + { + "epoch": 0.561999344170141, + "grad_norm": 0.6114455478561973, + "learning_rate": 4.218457981399636e-06, + "loss": 0.3078, + "step": 11997 + }, + { + "epoch": 0.5620461891600693, + "grad_norm": 0.5847902898350618, + "learning_rate": 4.21832023451042e-06, + "loss": 0.282, + "step": 11998 + }, + { + "epoch": 0.5620930341499977, + "grad_norm": 0.5616782271182397, + "learning_rate": 4.218182477732702e-06, + "loss": 0.2743, + "step": 11999 + }, + { + "epoch": 0.562139879139926, + "grad_norm": 0.5901228580108312, + "learning_rate": 4.218044711067277e-06, + "loss": 0.2672, + "step": 12000 + }, + { + "epoch": 0.5621867241298543, + "grad_norm": 0.564402255897131, + "learning_rate": 4.217906934514936e-06, + "loss": 0.2796, + "step": 12001 + }, + { + "epoch": 0.5622335691197826, + "grad_norm": 0.592724074020255, + "learning_rate": 4.217769148076472e-06, + "loss": 0.2915, + "step": 12002 + }, + { + "epoch": 0.562280414109711, + "grad_norm": 0.5909220361111236, + "learning_rate": 4.217631351752678e-06, + "loss": 0.2802, + "step": 12003 + }, + { + "epoch": 0.5623272590996393, + "grad_norm": 0.5387945048973379, + "learning_rate": 4.217493545544348e-06, + "loss": 0.2804, + "step": 12004 + }, + { + "epoch": 0.5623741040895677, + "grad_norm": 0.602725910387869, + "learning_rate": 4.2173557294522735e-06, + "loss": 0.2742, + "step": 12005 + }, + { + "epoch": 0.5624209490794959, + "grad_norm": 0.6723925644328083, + "learning_rate": 4.21721790347725e-06, + "loss": 0.3083, + "step": 12006 + }, + { + "epoch": 0.5624677940694243, + "grad_norm": 0.5492712370109497, + "learning_rate": 4.2170800676200675e-06, + "loss": 0.2737, + "step": 12007 + }, + { + "epoch": 0.5625146390593526, + "grad_norm": 0.6384228856056583, + "learning_rate": 4.2169422218815216e-06, + "loss": 0.3071, + "step": 12008 + }, + { + "epoch": 0.562561484049281, + "grad_norm": 0.6425689828125777, + "learning_rate": 4.216804366262405e-06, + "loss": 0.2882, + "step": 12009 + }, + { + "epoch": 0.5626083290392092, + "grad_norm": 0.6416508261760712, + "learning_rate": 4.21666650076351e-06, + "loss": 0.2889, + "step": 12010 + }, + { + "epoch": 0.5626551740291376, + "grad_norm": 0.595805397623262, + "learning_rate": 4.2165286253856306e-06, + "loss": 0.27, + "step": 12011 + }, + { + "epoch": 0.5627020190190659, + "grad_norm": 0.5991620158014055, + "learning_rate": 4.216390740129561e-06, + "loss": 0.2814, + "step": 12012 + }, + { + "epoch": 0.5627488640089943, + "grad_norm": 0.6216009424468765, + "learning_rate": 4.216252844996094e-06, + "loss": 0.2916, + "step": 12013 + }, + { + "epoch": 0.5627957089989226, + "grad_norm": 0.6440905300348823, + "learning_rate": 4.216114939986023e-06, + "loss": 0.2986, + "step": 12014 + }, + { + "epoch": 0.5628425539888509, + "grad_norm": 0.5830935994575152, + "learning_rate": 4.215977025100142e-06, + "loss": 0.2831, + "step": 12015 + }, + { + "epoch": 0.5628893989787792, + "grad_norm": 0.5736729004234672, + "learning_rate": 4.215839100339245e-06, + "loss": 0.2903, + "step": 12016 + }, + { + "epoch": 0.5629362439687076, + "grad_norm": 0.5946189791136816, + "learning_rate": 4.215701165704125e-06, + "loss": 0.2934, + "step": 12017 + }, + { + "epoch": 0.5629830889586359, + "grad_norm": 0.582258919701526, + "learning_rate": 4.215563221195576e-06, + "loss": 0.2723, + "step": 12018 + }, + { + "epoch": 0.5630299339485642, + "grad_norm": 0.6117541903423513, + "learning_rate": 4.215425266814391e-06, + "loss": 0.2825, + "step": 12019 + }, + { + "epoch": 0.5630767789384925, + "grad_norm": 0.5998199135157658, + "learning_rate": 4.215287302561365e-06, + "loss": 0.2665, + "step": 12020 + }, + { + "epoch": 0.5631236239284209, + "grad_norm": 0.609119193842258, + "learning_rate": 4.2151493284372925e-06, + "loss": 0.2954, + "step": 12021 + }, + { + "epoch": 0.5631704689183492, + "grad_norm": 0.5655761016473982, + "learning_rate": 4.215011344442966e-06, + "loss": 0.2835, + "step": 12022 + }, + { + "epoch": 0.5632173139082776, + "grad_norm": 0.583889690324393, + "learning_rate": 4.214873350579181e-06, + "loss": 0.2757, + "step": 12023 + }, + { + "epoch": 0.5632641588982058, + "grad_norm": 0.5838301787869944, + "learning_rate": 4.21473534684673e-06, + "loss": 0.2752, + "step": 12024 + }, + { + "epoch": 0.5633110038881342, + "grad_norm": 0.5937730734219502, + "learning_rate": 4.2145973332464085e-06, + "loss": 0.2968, + "step": 12025 + }, + { + "epoch": 0.5633578488780625, + "grad_norm": 0.554454643449964, + "learning_rate": 4.21445930977901e-06, + "loss": 0.2765, + "step": 12026 + }, + { + "epoch": 0.5634046938679909, + "grad_norm": 0.6035034591655462, + "learning_rate": 4.214321276445329e-06, + "loss": 0.2897, + "step": 12027 + }, + { + "epoch": 0.5634515388579191, + "grad_norm": 0.6248830584132856, + "learning_rate": 4.21418323324616e-06, + "loss": 0.3039, + "step": 12028 + }, + { + "epoch": 0.5634983838478475, + "grad_norm": 0.6095982056959829, + "learning_rate": 4.214045180182299e-06, + "loss": 0.2906, + "step": 12029 + }, + { + "epoch": 0.5635452288377758, + "grad_norm": 0.6503106640591034, + "learning_rate": 4.213907117254537e-06, + "loss": 0.2957, + "step": 12030 + }, + { + "epoch": 0.5635920738277042, + "grad_norm": 0.5910404909729781, + "learning_rate": 4.213769044463671e-06, + "loss": 0.271, + "step": 12031 + }, + { + "epoch": 0.5636389188176325, + "grad_norm": 0.6078195237143245, + "learning_rate": 4.213630961810494e-06, + "loss": 0.3039, + "step": 12032 + }, + { + "epoch": 0.5636857638075607, + "grad_norm": 0.6441318450077093, + "learning_rate": 4.213492869295802e-06, + "loss": 0.3043, + "step": 12033 + }, + { + "epoch": 0.5637326087974891, + "grad_norm": 0.5461900857365841, + "learning_rate": 4.21335476692039e-06, + "loss": 0.2804, + "step": 12034 + }, + { + "epoch": 0.5637794537874175, + "grad_norm": 0.6859747978276001, + "learning_rate": 4.213216654685052e-06, + "loss": 0.2956, + "step": 12035 + }, + { + "epoch": 0.5638262987773458, + "grad_norm": 0.5698243367715301, + "learning_rate": 4.213078532590581e-06, + "loss": 0.2636, + "step": 12036 + }, + { + "epoch": 0.563873143767274, + "grad_norm": 0.6264863139609081, + "learning_rate": 4.212940400637775e-06, + "loss": 0.2907, + "step": 12037 + }, + { + "epoch": 0.5639199887572024, + "grad_norm": 0.6163809236753535, + "learning_rate": 4.212802258827428e-06, + "loss": 0.2868, + "step": 12038 + }, + { + "epoch": 0.5639668337471307, + "grad_norm": 0.5788000562412272, + "learning_rate": 4.212664107160333e-06, + "loss": 0.2952, + "step": 12039 + }, + { + "epoch": 0.5640136787370591, + "grad_norm": 0.5836887250911216, + "learning_rate": 4.2125259456372876e-06, + "loss": 0.2764, + "step": 12040 + }, + { + "epoch": 0.5640605237269875, + "grad_norm": 0.6090861951822553, + "learning_rate": 4.212387774259085e-06, + "loss": 0.2803, + "step": 12041 + }, + { + "epoch": 0.5641073687169157, + "grad_norm": 0.626574043370508, + "learning_rate": 4.212249593026521e-06, + "loss": 0.2766, + "step": 12042 + }, + { + "epoch": 0.564154213706844, + "grad_norm": 0.5997053810450047, + "learning_rate": 4.212111401940392e-06, + "loss": 0.2732, + "step": 12043 + }, + { + "epoch": 0.5642010586967724, + "grad_norm": 0.6273023501059816, + "learning_rate": 4.211973201001492e-06, + "loss": 0.2859, + "step": 12044 + }, + { + "epoch": 0.5642479036867007, + "grad_norm": 0.5825599116849476, + "learning_rate": 4.211834990210616e-06, + "loss": 0.2864, + "step": 12045 + }, + { + "epoch": 0.564294748676629, + "grad_norm": 0.6643339162991189, + "learning_rate": 4.211696769568561e-06, + "loss": 0.2832, + "step": 12046 + }, + { + "epoch": 0.5643415936665573, + "grad_norm": 0.5565085836949232, + "learning_rate": 4.2115585390761196e-06, + "loss": 0.2726, + "step": 12047 + }, + { + "epoch": 0.5643884386564857, + "grad_norm": 0.6041571696034077, + "learning_rate": 4.21142029873409e-06, + "loss": 0.2932, + "step": 12048 + }, + { + "epoch": 0.564435283646414, + "grad_norm": 0.5621747621357598, + "learning_rate": 4.211282048543267e-06, + "loss": 0.2919, + "step": 12049 + }, + { + "epoch": 0.5644821286363424, + "grad_norm": 0.6320428220943003, + "learning_rate": 4.211143788504446e-06, + "loss": 0.2887, + "step": 12050 + }, + { + "epoch": 0.5645289736262706, + "grad_norm": 0.5593120629309108, + "learning_rate": 4.211005518618421e-06, + "loss": 0.2865, + "step": 12051 + }, + { + "epoch": 0.564575818616199, + "grad_norm": 0.6510647318633547, + "learning_rate": 4.21086723888599e-06, + "loss": 0.3179, + "step": 12052 + }, + { + "epoch": 0.5646226636061273, + "grad_norm": 0.572541117731483, + "learning_rate": 4.21072894930795e-06, + "loss": 0.2787, + "step": 12053 + }, + { + "epoch": 0.5646695085960557, + "grad_norm": 0.6116875344739365, + "learning_rate": 4.210590649885092e-06, + "loss": 0.2962, + "step": 12054 + }, + { + "epoch": 0.5647163535859839, + "grad_norm": 0.639259473475011, + "learning_rate": 4.210452340618216e-06, + "loss": 0.2951, + "step": 12055 + }, + { + "epoch": 0.5647631985759123, + "grad_norm": 0.5626816333500627, + "learning_rate": 4.210314021508117e-06, + "loss": 0.2718, + "step": 12056 + }, + { + "epoch": 0.5648100435658406, + "grad_norm": 0.6260886495098912, + "learning_rate": 4.21017569255559e-06, + "loss": 0.2876, + "step": 12057 + }, + { + "epoch": 0.564856888555769, + "grad_norm": 0.6128127251454728, + "learning_rate": 4.2100373537614324e-06, + "loss": 0.2753, + "step": 12058 + }, + { + "epoch": 0.5649037335456973, + "grad_norm": 0.5995574886604305, + "learning_rate": 4.20989900512644e-06, + "loss": 0.2842, + "step": 12059 + }, + { + "epoch": 0.5649505785356256, + "grad_norm": 0.5758584357289974, + "learning_rate": 4.209760646651407e-06, + "loss": 0.2775, + "step": 12060 + }, + { + "epoch": 0.5649974235255539, + "grad_norm": 0.5978764671741394, + "learning_rate": 4.2096222783371335e-06, + "loss": 0.2939, + "step": 12061 + }, + { + "epoch": 0.5650442685154823, + "grad_norm": 0.6221609965196179, + "learning_rate": 4.2094839001844115e-06, + "loss": 0.2925, + "step": 12062 + }, + { + "epoch": 0.5650911135054106, + "grad_norm": 0.6176767388946308, + "learning_rate": 4.20934551219404e-06, + "loss": 0.2971, + "step": 12063 + }, + { + "epoch": 0.5651379584953389, + "grad_norm": 0.6265445771802897, + "learning_rate": 4.209207114366815e-06, + "loss": 0.2962, + "step": 12064 + }, + { + "epoch": 0.5651848034852672, + "grad_norm": 0.5802077530910655, + "learning_rate": 4.209068706703532e-06, + "loss": 0.2894, + "step": 12065 + }, + { + "epoch": 0.5652316484751956, + "grad_norm": 0.6362313719411967, + "learning_rate": 4.2089302892049885e-06, + "loss": 0.2969, + "step": 12066 + }, + { + "epoch": 0.5652784934651239, + "grad_norm": 0.6174720044158343, + "learning_rate": 4.208791861871981e-06, + "loss": 0.2927, + "step": 12067 + }, + { + "epoch": 0.5653253384550523, + "grad_norm": 0.6194491284899802, + "learning_rate": 4.208653424705305e-06, + "loss": 0.3014, + "step": 12068 + }, + { + "epoch": 0.5653721834449805, + "grad_norm": 0.5677639709222443, + "learning_rate": 4.208514977705759e-06, + "loss": 0.2782, + "step": 12069 + }, + { + "epoch": 0.5654190284349089, + "grad_norm": 0.5600458816228026, + "learning_rate": 4.208376520874138e-06, + "loss": 0.2652, + "step": 12070 + }, + { + "epoch": 0.5654658734248372, + "grad_norm": 0.6331038038135578, + "learning_rate": 4.20823805421124e-06, + "loss": 0.3056, + "step": 12071 + }, + { + "epoch": 0.5655127184147656, + "grad_norm": 0.5388502407858813, + "learning_rate": 4.2080995777178605e-06, + "loss": 0.274, + "step": 12072 + }, + { + "epoch": 0.5655595634046938, + "grad_norm": 0.5623159038530586, + "learning_rate": 4.2079610913947984e-06, + "loss": 0.2772, + "step": 12073 + }, + { + "epoch": 0.5656064083946222, + "grad_norm": 0.5957882022742068, + "learning_rate": 4.207822595242848e-06, + "loss": 0.2951, + "step": 12074 + }, + { + "epoch": 0.5656532533845505, + "grad_norm": 0.581627769231528, + "learning_rate": 4.207684089262809e-06, + "loss": 0.2734, + "step": 12075 + }, + { + "epoch": 0.5657000983744789, + "grad_norm": 0.5821564404408845, + "learning_rate": 4.207545573455477e-06, + "loss": 0.298, + "step": 12076 + }, + { + "epoch": 0.5657469433644072, + "grad_norm": 0.6153507168799582, + "learning_rate": 4.20740704782165e-06, + "loss": 0.2918, + "step": 12077 + }, + { + "epoch": 0.5657937883543355, + "grad_norm": 0.5850510343750432, + "learning_rate": 4.207268512362124e-06, + "loss": 0.2838, + "step": 12078 + }, + { + "epoch": 0.5658406333442638, + "grad_norm": 0.5826269871240513, + "learning_rate": 4.2071299670776965e-06, + "loss": 0.293, + "step": 12079 + }, + { + "epoch": 0.5658874783341922, + "grad_norm": 0.5834323594187943, + "learning_rate": 4.206991411969166e-06, + "loss": 0.2757, + "step": 12080 + }, + { + "epoch": 0.5659343233241205, + "grad_norm": 0.5740187040661489, + "learning_rate": 4.206852847037328e-06, + "loss": 0.2765, + "step": 12081 + }, + { + "epoch": 0.5659811683140488, + "grad_norm": 0.6072846334714361, + "learning_rate": 4.206714272282981e-06, + "loss": 0.2505, + "step": 12082 + }, + { + "epoch": 0.5660280133039771, + "grad_norm": 0.6379793261064401, + "learning_rate": 4.206575687706923e-06, + "loss": 0.2915, + "step": 12083 + }, + { + "epoch": 0.5660748582939055, + "grad_norm": 0.5453809912746578, + "learning_rate": 4.20643709330995e-06, + "loss": 0.2742, + "step": 12084 + }, + { + "epoch": 0.5661217032838338, + "grad_norm": 0.5746227302845585, + "learning_rate": 4.206298489092861e-06, + "loss": 0.2877, + "step": 12085 + }, + { + "epoch": 0.5661685482737622, + "grad_norm": 0.5747933964044107, + "learning_rate": 4.206159875056453e-06, + "loss": 0.277, + "step": 12086 + }, + { + "epoch": 0.5662153932636904, + "grad_norm": 0.612149297312845, + "learning_rate": 4.206021251201524e-06, + "loss": 0.2878, + "step": 12087 + }, + { + "epoch": 0.5662622382536188, + "grad_norm": 0.6168909128008423, + "learning_rate": 4.205882617528871e-06, + "loss": 0.2558, + "step": 12088 + }, + { + "epoch": 0.5663090832435471, + "grad_norm": 0.5873451552402406, + "learning_rate": 4.205743974039293e-06, + "loss": 0.2917, + "step": 12089 + }, + { + "epoch": 0.5663559282334755, + "grad_norm": 0.558237087148972, + "learning_rate": 4.2056053207335854e-06, + "loss": 0.2595, + "step": 12090 + }, + { + "epoch": 0.5664027732234037, + "grad_norm": 0.6593196577640975, + "learning_rate": 4.20546665761255e-06, + "loss": 0.2928, + "step": 12091 + }, + { + "epoch": 0.5664496182133321, + "grad_norm": 0.5817910008144227, + "learning_rate": 4.205327984676981e-06, + "loss": 0.263, + "step": 12092 + }, + { + "epoch": 0.5664964632032604, + "grad_norm": 0.5690418797536279, + "learning_rate": 4.205189301927679e-06, + "loss": 0.2745, + "step": 12093 + }, + { + "epoch": 0.5665433081931888, + "grad_norm": 0.623837726233399, + "learning_rate": 4.205050609365441e-06, + "loss": 0.2996, + "step": 12094 + }, + { + "epoch": 0.5665901531831171, + "grad_norm": 0.5730523803019897, + "learning_rate": 4.204911906991065e-06, + "loss": 0.2699, + "step": 12095 + }, + { + "epoch": 0.5666369981730454, + "grad_norm": 0.5706847091460241, + "learning_rate": 4.2047731948053495e-06, + "loss": 0.2796, + "step": 12096 + }, + { + "epoch": 0.5666838431629737, + "grad_norm": 0.5560643191214965, + "learning_rate": 4.2046344728090935e-06, + "loss": 0.2742, + "step": 12097 + }, + { + "epoch": 0.5667306881529021, + "grad_norm": 0.5799613507555297, + "learning_rate": 4.204495741003094e-06, + "loss": 0.259, + "step": 12098 + }, + { + "epoch": 0.5667775331428304, + "grad_norm": 0.6527792297863908, + "learning_rate": 4.20435699938815e-06, + "loss": 0.314, + "step": 12099 + }, + { + "epoch": 0.5668243781327587, + "grad_norm": 0.5560854106488304, + "learning_rate": 4.2042182479650595e-06, + "loss": 0.2669, + "step": 12100 + }, + { + "epoch": 0.566871223122687, + "grad_norm": 0.5799297498708154, + "learning_rate": 4.204079486734621e-06, + "loss": 0.294, + "step": 12101 + }, + { + "epoch": 0.5669180681126154, + "grad_norm": 0.5847948511530854, + "learning_rate": 4.203940715697634e-06, + "loss": 0.28, + "step": 12102 + }, + { + "epoch": 0.5669649131025437, + "grad_norm": 0.6310572346329509, + "learning_rate": 4.203801934854897e-06, + "loss": 0.3047, + "step": 12103 + }, + { + "epoch": 0.5670117580924721, + "grad_norm": 0.5718615645437191, + "learning_rate": 4.203663144207207e-06, + "loss": 0.2847, + "step": 12104 + }, + { + "epoch": 0.5670586030824003, + "grad_norm": 0.6271245848801077, + "learning_rate": 4.203524343755364e-06, + "loss": 0.3068, + "step": 12105 + }, + { + "epoch": 0.5671054480723287, + "grad_norm": 0.5246337667356075, + "learning_rate": 4.203385533500167e-06, + "loss": 0.2557, + "step": 12106 + }, + { + "epoch": 0.567152293062257, + "grad_norm": 0.5476727405673213, + "learning_rate": 4.203246713442415e-06, + "loss": 0.2683, + "step": 12107 + }, + { + "epoch": 0.5671991380521854, + "grad_norm": 0.6613307597611414, + "learning_rate": 4.2031078835829056e-06, + "loss": 0.2776, + "step": 12108 + }, + { + "epoch": 0.5672459830421136, + "grad_norm": 0.5916556005191546, + "learning_rate": 4.202969043922438e-06, + "loss": 0.2758, + "step": 12109 + }, + { + "epoch": 0.567292828032042, + "grad_norm": 0.638086405587034, + "learning_rate": 4.2028301944618115e-06, + "loss": 0.2915, + "step": 12110 + }, + { + "epoch": 0.5673396730219703, + "grad_norm": 0.6368331521469022, + "learning_rate": 4.202691335201826e-06, + "loss": 0.3028, + "step": 12111 + }, + { + "epoch": 0.5673865180118987, + "grad_norm": 0.6431741653699571, + "learning_rate": 4.202552466143279e-06, + "loss": 0.2857, + "step": 12112 + }, + { + "epoch": 0.567433363001827, + "grad_norm": 0.5717658666554929, + "learning_rate": 4.2024135872869706e-06, + "loss": 0.2863, + "step": 12113 + }, + { + "epoch": 0.5674802079917552, + "grad_norm": 0.6670990475386579, + "learning_rate": 4.202274698633701e-06, + "loss": 0.3126, + "step": 12114 + }, + { + "epoch": 0.5675270529816836, + "grad_norm": 0.5507198584015284, + "learning_rate": 4.202135800184267e-06, + "loss": 0.2929, + "step": 12115 + }, + { + "epoch": 0.567573897971612, + "grad_norm": 0.6166406842294039, + "learning_rate": 4.20199689193947e-06, + "loss": 0.2765, + "step": 12116 + }, + { + "epoch": 0.5676207429615403, + "grad_norm": 0.5897944392320444, + "learning_rate": 4.201857973900108e-06, + "loss": 0.3109, + "step": 12117 + }, + { + "epoch": 0.5676675879514685, + "grad_norm": 0.5728666066750548, + "learning_rate": 4.201719046066982e-06, + "loss": 0.2865, + "step": 12118 + }, + { + "epoch": 0.5677144329413969, + "grad_norm": 0.5908223311833627, + "learning_rate": 4.20158010844089e-06, + "loss": 0.2989, + "step": 12119 + }, + { + "epoch": 0.5677612779313252, + "grad_norm": 0.620501452025624, + "learning_rate": 4.2014411610226326e-06, + "loss": 0.2856, + "step": 12120 + }, + { + "epoch": 0.5678081229212536, + "grad_norm": 0.5719897801568399, + "learning_rate": 4.201302203813009e-06, + "loss": 0.2706, + "step": 12121 + }, + { + "epoch": 0.567854967911182, + "grad_norm": 0.6347882399907402, + "learning_rate": 4.2011632368128185e-06, + "loss": 0.2833, + "step": 12122 + }, + { + "epoch": 0.5679018129011102, + "grad_norm": 0.6067138699526179, + "learning_rate": 4.201024260022861e-06, + "loss": 0.2774, + "step": 12123 + }, + { + "epoch": 0.5679486578910385, + "grad_norm": 0.5392985298470457, + "learning_rate": 4.200885273443937e-06, + "loss": 0.2677, + "step": 12124 + }, + { + "epoch": 0.5679955028809669, + "grad_norm": 0.6491893848253489, + "learning_rate": 4.200746277076845e-06, + "loss": 0.3127, + "step": 12125 + }, + { + "epoch": 0.5680423478708952, + "grad_norm": 0.6085659461509111, + "learning_rate": 4.200607270922385e-06, + "loss": 0.293, + "step": 12126 + }, + { + "epoch": 0.5680891928608235, + "grad_norm": 0.6926070668117367, + "learning_rate": 4.200468254981359e-06, + "loss": 0.2763, + "step": 12127 + }, + { + "epoch": 0.5681360378507518, + "grad_norm": 0.5746813119867202, + "learning_rate": 4.200329229254566e-06, + "loss": 0.2793, + "step": 12128 + }, + { + "epoch": 0.5681828828406802, + "grad_norm": 0.5820991838060257, + "learning_rate": 4.200190193742805e-06, + "loss": 0.2681, + "step": 12129 + }, + { + "epoch": 0.5682297278306085, + "grad_norm": 0.5781211164491721, + "learning_rate": 4.200051148446876e-06, + "loss": 0.2663, + "step": 12130 + }, + { + "epoch": 0.5682765728205369, + "grad_norm": 0.6452971354349627, + "learning_rate": 4.199912093367581e-06, + "loss": 0.3055, + "step": 12131 + }, + { + "epoch": 0.5683234178104651, + "grad_norm": 0.6327011640204372, + "learning_rate": 4.1997730285057185e-06, + "loss": 0.2803, + "step": 12132 + }, + { + "epoch": 0.5683702628003935, + "grad_norm": 0.5658256426589708, + "learning_rate": 4.199633953862089e-06, + "loss": 0.2878, + "step": 12133 + }, + { + "epoch": 0.5684171077903218, + "grad_norm": 0.6644343622596185, + "learning_rate": 4.199494869437494e-06, + "loss": 0.2939, + "step": 12134 + }, + { + "epoch": 0.5684639527802502, + "grad_norm": 0.6099692227104113, + "learning_rate": 4.199355775232733e-06, + "loss": 0.293, + "step": 12135 + }, + { + "epoch": 0.5685107977701784, + "grad_norm": 0.5808041954046552, + "learning_rate": 4.199216671248607e-06, + "loss": 0.2773, + "step": 12136 + }, + { + "epoch": 0.5685576427601068, + "grad_norm": 0.6210681938044145, + "learning_rate": 4.199077557485916e-06, + "loss": 0.305, + "step": 12137 + }, + { + "epoch": 0.5686044877500351, + "grad_norm": 0.5795095011970964, + "learning_rate": 4.19893843394546e-06, + "loss": 0.2738, + "step": 12138 + }, + { + "epoch": 0.5686513327399635, + "grad_norm": 0.6033940208384041, + "learning_rate": 4.198799300628042e-06, + "loss": 0.2882, + "step": 12139 + }, + { + "epoch": 0.5686981777298918, + "grad_norm": 0.6002073321092848, + "learning_rate": 4.1986601575344595e-06, + "loss": 0.2872, + "step": 12140 + }, + { + "epoch": 0.5687450227198201, + "grad_norm": 0.6172136805216718, + "learning_rate": 4.198521004665515e-06, + "loss": 0.306, + "step": 12141 + }, + { + "epoch": 0.5687918677097484, + "grad_norm": 0.6153446703688619, + "learning_rate": 4.198381842022009e-06, + "loss": 0.2955, + "step": 12142 + }, + { + "epoch": 0.5688387126996768, + "grad_norm": 0.6333709978285221, + "learning_rate": 4.198242669604743e-06, + "loss": 0.3147, + "step": 12143 + }, + { + "epoch": 0.5688855576896051, + "grad_norm": 0.6056950804416076, + "learning_rate": 4.198103487414516e-06, + "loss": 0.2823, + "step": 12144 + }, + { + "epoch": 0.5689324026795334, + "grad_norm": 0.6089008939859392, + "learning_rate": 4.197964295452131e-06, + "loss": 0.3077, + "step": 12145 + }, + { + "epoch": 0.5689792476694617, + "grad_norm": 0.5627640141649807, + "learning_rate": 4.197825093718388e-06, + "loss": 0.2683, + "step": 12146 + }, + { + "epoch": 0.5690260926593901, + "grad_norm": 0.5615359848802792, + "learning_rate": 4.197685882214088e-06, + "loss": 0.2737, + "step": 12147 + }, + { + "epoch": 0.5690729376493184, + "grad_norm": 0.5867507960046912, + "learning_rate": 4.197546660940034e-06, + "loss": 0.2789, + "step": 12148 + }, + { + "epoch": 0.5691197826392468, + "grad_norm": 0.5724677161594606, + "learning_rate": 4.197407429897024e-06, + "loss": 0.2834, + "step": 12149 + }, + { + "epoch": 0.569166627629175, + "grad_norm": 0.6485147803737575, + "learning_rate": 4.197268189085862e-06, + "loss": 0.2969, + "step": 12150 + }, + { + "epoch": 0.5692134726191034, + "grad_norm": 0.5875419590770491, + "learning_rate": 4.197128938507348e-06, + "loss": 0.2942, + "step": 12151 + }, + { + "epoch": 0.5692603176090317, + "grad_norm": 0.5382604404535002, + "learning_rate": 4.196989678162283e-06, + "loss": 0.2812, + "step": 12152 + }, + { + "epoch": 0.5693071625989601, + "grad_norm": 0.5971339279795764, + "learning_rate": 4.196850408051469e-06, + "loss": 0.2856, + "step": 12153 + }, + { + "epoch": 0.5693540075888883, + "grad_norm": 0.6108759136193537, + "learning_rate": 4.196711128175708e-06, + "loss": 0.29, + "step": 12154 + }, + { + "epoch": 0.5694008525788167, + "grad_norm": 0.6728590283569094, + "learning_rate": 4.1965718385358e-06, + "loss": 0.2861, + "step": 12155 + }, + { + "epoch": 0.569447697568745, + "grad_norm": 0.5719825197980631, + "learning_rate": 4.196432539132548e-06, + "loss": 0.2806, + "step": 12156 + }, + { + "epoch": 0.5694945425586734, + "grad_norm": 0.5226705654689578, + "learning_rate": 4.196293229966753e-06, + "loss": 0.2559, + "step": 12157 + }, + { + "epoch": 0.5695413875486017, + "grad_norm": 0.6103049781592903, + "learning_rate": 4.196153911039217e-06, + "loss": 0.2958, + "step": 12158 + }, + { + "epoch": 0.56958823253853, + "grad_norm": 0.5857178782182143, + "learning_rate": 4.196014582350742e-06, + "loss": 0.2672, + "step": 12159 + }, + { + "epoch": 0.5696350775284583, + "grad_norm": 0.6490730095154984, + "learning_rate": 4.195875243902127e-06, + "loss": 0.3098, + "step": 12160 + }, + { + "epoch": 0.5696819225183867, + "grad_norm": 0.5667423112117971, + "learning_rate": 4.195735895694179e-06, + "loss": 0.2773, + "step": 12161 + }, + { + "epoch": 0.569728767508315, + "grad_norm": 0.5574926885109937, + "learning_rate": 4.195596537727697e-06, + "loss": 0.261, + "step": 12162 + }, + { + "epoch": 0.5697756124982433, + "grad_norm": 0.5903004441896195, + "learning_rate": 4.195457170003481e-06, + "loss": 0.2919, + "step": 12163 + }, + { + "epoch": 0.5698224574881716, + "grad_norm": 0.5698564916909928, + "learning_rate": 4.195317792522337e-06, + "loss": 0.2515, + "step": 12164 + }, + { + "epoch": 0.5698693024781, + "grad_norm": 0.5821450793156527, + "learning_rate": 4.195178405285064e-06, + "loss": 0.311, + "step": 12165 + }, + { + "epoch": 0.5699161474680283, + "grad_norm": 0.6044171903415401, + "learning_rate": 4.195039008292466e-06, + "loss": 0.28, + "step": 12166 + }, + { + "epoch": 0.5699629924579567, + "grad_norm": 0.5777712501604273, + "learning_rate": 4.194899601545344e-06, + "loss": 0.2768, + "step": 12167 + }, + { + "epoch": 0.5700098374478849, + "grad_norm": 0.6546379206052473, + "learning_rate": 4.194760185044502e-06, + "loss": 0.2808, + "step": 12168 + }, + { + "epoch": 0.5700566824378133, + "grad_norm": 0.5956205142355476, + "learning_rate": 4.19462075879074e-06, + "loss": 0.2651, + "step": 12169 + }, + { + "epoch": 0.5701035274277416, + "grad_norm": 0.6144184943799337, + "learning_rate": 4.194481322784862e-06, + "loss": 0.3059, + "step": 12170 + }, + { + "epoch": 0.57015037241767, + "grad_norm": 0.5779297594548857, + "learning_rate": 4.194341877027669e-06, + "loss": 0.2887, + "step": 12171 + }, + { + "epoch": 0.5701972174075982, + "grad_norm": 0.6259740415644143, + "learning_rate": 4.194202421519965e-06, + "loss": 0.2923, + "step": 12172 + }, + { + "epoch": 0.5702440623975266, + "grad_norm": 0.5813722809918637, + "learning_rate": 4.1940629562625515e-06, + "loss": 0.2917, + "step": 12173 + }, + { + "epoch": 0.5702909073874549, + "grad_norm": 0.622196223329422, + "learning_rate": 4.193923481256232e-06, + "loss": 0.3065, + "step": 12174 + }, + { + "epoch": 0.5703377523773833, + "grad_norm": 0.5391291801907695, + "learning_rate": 4.193783996501809e-06, + "loss": 0.291, + "step": 12175 + }, + { + "epoch": 0.5703845973673116, + "grad_norm": 0.6530568807670522, + "learning_rate": 4.193644502000084e-06, + "loss": 0.3, + "step": 12176 + }, + { + "epoch": 0.5704314423572399, + "grad_norm": 0.5948422796688337, + "learning_rate": 4.19350499775186e-06, + "loss": 0.2991, + "step": 12177 + }, + { + "epoch": 0.5704782873471682, + "grad_norm": 0.6185615563798083, + "learning_rate": 4.193365483757942e-06, + "loss": 0.2932, + "step": 12178 + }, + { + "epoch": 0.5705251323370966, + "grad_norm": 0.6216766833344571, + "learning_rate": 4.19322596001913e-06, + "loss": 0.2988, + "step": 12179 + }, + { + "epoch": 0.5705719773270249, + "grad_norm": 0.5936377126281882, + "learning_rate": 4.193086426536229e-06, + "loss": 0.2765, + "step": 12180 + }, + { + "epoch": 0.5706188223169532, + "grad_norm": 0.581862521199703, + "learning_rate": 4.192946883310041e-06, + "loss": 0.284, + "step": 12181 + }, + { + "epoch": 0.5706656673068815, + "grad_norm": 0.5650053203103256, + "learning_rate": 4.192807330341369e-06, + "loss": 0.2778, + "step": 12182 + }, + { + "epoch": 0.5707125122968099, + "grad_norm": 0.5784955985632835, + "learning_rate": 4.192667767631017e-06, + "loss": 0.2918, + "step": 12183 + }, + { + "epoch": 0.5707593572867382, + "grad_norm": 0.6233968276129108, + "learning_rate": 4.192528195179786e-06, + "loss": 0.2912, + "step": 12184 + }, + { + "epoch": 0.5708062022766666, + "grad_norm": 0.6633468414042425, + "learning_rate": 4.192388612988482e-06, + "loss": 0.2968, + "step": 12185 + }, + { + "epoch": 0.5708530472665948, + "grad_norm": 0.5947576723465584, + "learning_rate": 4.192249021057906e-06, + "loss": 0.2704, + "step": 12186 + }, + { + "epoch": 0.5708998922565232, + "grad_norm": 0.6279459352138358, + "learning_rate": 4.192109419388862e-06, + "loss": 0.2814, + "step": 12187 + }, + { + "epoch": 0.5709467372464515, + "grad_norm": 0.607982844313163, + "learning_rate": 4.191969807982154e-06, + "loss": 0.293, + "step": 12188 + }, + { + "epoch": 0.5709935822363799, + "grad_norm": 0.6162708394805896, + "learning_rate": 4.191830186838586e-06, + "loss": 0.3073, + "step": 12189 + }, + { + "epoch": 0.5710404272263081, + "grad_norm": 0.5901029662398803, + "learning_rate": 4.191690555958959e-06, + "loss": 0.3, + "step": 12190 + }, + { + "epoch": 0.5710872722162365, + "grad_norm": 0.5380410123722498, + "learning_rate": 4.191550915344078e-06, + "loss": 0.2686, + "step": 12191 + }, + { + "epoch": 0.5711341172061648, + "grad_norm": 0.5615954190644821, + "learning_rate": 4.191411264994748e-06, + "loss": 0.2725, + "step": 12192 + }, + { + "epoch": 0.5711809621960932, + "grad_norm": 0.5941103459484233, + "learning_rate": 4.19127160491177e-06, + "loss": 0.2926, + "step": 12193 + }, + { + "epoch": 0.5712278071860215, + "grad_norm": 0.6031919216735016, + "learning_rate": 4.19113193509595e-06, + "loss": 0.2906, + "step": 12194 + }, + { + "epoch": 0.5712746521759497, + "grad_norm": 0.5728734965400968, + "learning_rate": 4.19099225554809e-06, + "loss": 0.2741, + "step": 12195 + }, + { + "epoch": 0.5713214971658781, + "grad_norm": 0.5590253624039329, + "learning_rate": 4.190852566268995e-06, + "loss": 0.2838, + "step": 12196 + }, + { + "epoch": 0.5713683421558065, + "grad_norm": 0.5717877750248231, + "learning_rate": 4.190712867259468e-06, + "loss": 0.2804, + "step": 12197 + }, + { + "epoch": 0.5714151871457348, + "grad_norm": 0.5631205796110533, + "learning_rate": 4.190573158520314e-06, + "loss": 0.2711, + "step": 12198 + }, + { + "epoch": 0.571462032135663, + "grad_norm": 0.6468311734122253, + "learning_rate": 4.190433440052336e-06, + "loss": 0.2865, + "step": 12199 + }, + { + "epoch": 0.5715088771255914, + "grad_norm": 0.6146440637251733, + "learning_rate": 4.1902937118563385e-06, + "loss": 0.2844, + "step": 12200 + }, + { + "epoch": 0.5715557221155197, + "grad_norm": 0.5742165790048261, + "learning_rate": 4.190153973933126e-06, + "loss": 0.2919, + "step": 12201 + }, + { + "epoch": 0.5716025671054481, + "grad_norm": 0.637948332738444, + "learning_rate": 4.190014226283502e-06, + "loss": 0.3039, + "step": 12202 + }, + { + "epoch": 0.5716494120953765, + "grad_norm": 0.6127269354055496, + "learning_rate": 4.1898744689082705e-06, + "loss": 0.2763, + "step": 12203 + }, + { + "epoch": 0.5716962570853047, + "grad_norm": 0.6052249014713297, + "learning_rate": 4.189734701808237e-06, + "loss": 0.3072, + "step": 12204 + }, + { + "epoch": 0.571743102075233, + "grad_norm": 0.5942748019330836, + "learning_rate": 4.1895949249842035e-06, + "loss": 0.282, + "step": 12205 + }, + { + "epoch": 0.5717899470651614, + "grad_norm": 0.5972859315957532, + "learning_rate": 4.189455138436977e-06, + "loss": 0.2858, + "step": 12206 + }, + { + "epoch": 0.5718367920550897, + "grad_norm": 0.5365629685542559, + "learning_rate": 4.189315342167361e-06, + "loss": 0.2663, + "step": 12207 + }, + { + "epoch": 0.571883637045018, + "grad_norm": 0.6178888197428276, + "learning_rate": 4.189175536176159e-06, + "loss": 0.2886, + "step": 12208 + }, + { + "epoch": 0.5719304820349463, + "grad_norm": 0.6165166555496249, + "learning_rate": 4.189035720464177e-06, + "loss": 0.2953, + "step": 12209 + }, + { + "epoch": 0.5719773270248747, + "grad_norm": 0.5996324050150575, + "learning_rate": 4.188895895032218e-06, + "loss": 0.2836, + "step": 12210 + }, + { + "epoch": 0.572024172014803, + "grad_norm": 0.6504350997298192, + "learning_rate": 4.188756059881088e-06, + "loss": 0.279, + "step": 12211 + }, + { + "epoch": 0.5720710170047314, + "grad_norm": 0.6003314892302861, + "learning_rate": 4.188616215011592e-06, + "loss": 0.2793, + "step": 12212 + }, + { + "epoch": 0.5721178619946596, + "grad_norm": 0.5883616612789203, + "learning_rate": 4.188476360424533e-06, + "loss": 0.283, + "step": 12213 + }, + { + "epoch": 0.572164706984588, + "grad_norm": 0.6063906725559665, + "learning_rate": 4.188336496120717e-06, + "loss": 0.2758, + "step": 12214 + }, + { + "epoch": 0.5722115519745163, + "grad_norm": 0.6097615612429876, + "learning_rate": 4.188196622100949e-06, + "loss": 0.2752, + "step": 12215 + }, + { + "epoch": 0.5722583969644447, + "grad_norm": 0.6183142846587015, + "learning_rate": 4.188056738366035e-06, + "loss": 0.3003, + "step": 12216 + }, + { + "epoch": 0.5723052419543729, + "grad_norm": 0.5749828045494952, + "learning_rate": 4.187916844916778e-06, + "loss": 0.2943, + "step": 12217 + }, + { + "epoch": 0.5723520869443013, + "grad_norm": 0.5782881364916825, + "learning_rate": 4.187776941753984e-06, + "loss": 0.2747, + "step": 12218 + }, + { + "epoch": 0.5723989319342296, + "grad_norm": 0.5865090072096604, + "learning_rate": 4.1876370288784574e-06, + "loss": 0.2826, + "step": 12219 + }, + { + "epoch": 0.572445776924158, + "grad_norm": 0.5635599332161264, + "learning_rate": 4.1874971062910045e-06, + "loss": 0.2618, + "step": 12220 + }, + { + "epoch": 0.5724926219140863, + "grad_norm": 0.5545133795022235, + "learning_rate": 4.187357173992429e-06, + "loss": 0.273, + "step": 12221 + }, + { + "epoch": 0.5725394669040146, + "grad_norm": 0.5662408679443668, + "learning_rate": 4.187217231983538e-06, + "loss": 0.2404, + "step": 12222 + }, + { + "epoch": 0.5725863118939429, + "grad_norm": 0.6198018223823079, + "learning_rate": 4.187077280265135e-06, + "loss": 0.299, + "step": 12223 + }, + { + "epoch": 0.5726331568838713, + "grad_norm": 0.5871604422535976, + "learning_rate": 4.186937318838026e-06, + "loss": 0.2895, + "step": 12224 + }, + { + "epoch": 0.5726800018737996, + "grad_norm": 0.5396784467507603, + "learning_rate": 4.186797347703018e-06, + "loss": 0.2564, + "step": 12225 + }, + { + "epoch": 0.5727268468637279, + "grad_norm": 0.5840836962497429, + "learning_rate": 4.186657366860915e-06, + "loss": 0.2771, + "step": 12226 + }, + { + "epoch": 0.5727736918536562, + "grad_norm": 0.6399443965791362, + "learning_rate": 4.186517376312522e-06, + "loss": 0.2966, + "step": 12227 + }, + { + "epoch": 0.5728205368435846, + "grad_norm": 0.5773379382484377, + "learning_rate": 4.1863773760586455e-06, + "loss": 0.2878, + "step": 12228 + }, + { + "epoch": 0.5728673818335129, + "grad_norm": 0.6219565153451319, + "learning_rate": 4.186237366100092e-06, + "loss": 0.2772, + "step": 12229 + }, + { + "epoch": 0.5729142268234413, + "grad_norm": 0.553572422941865, + "learning_rate": 4.186097346437665e-06, + "loss": 0.2661, + "step": 12230 + }, + { + "epoch": 0.5729610718133695, + "grad_norm": 0.6342442220844593, + "learning_rate": 4.185957317072173e-06, + "loss": 0.3076, + "step": 12231 + }, + { + "epoch": 0.5730079168032979, + "grad_norm": 0.6191964283803596, + "learning_rate": 4.185817278004419e-06, + "loss": 0.288, + "step": 12232 + }, + { + "epoch": 0.5730547617932262, + "grad_norm": 0.6064308727245915, + "learning_rate": 4.185677229235211e-06, + "loss": 0.2848, + "step": 12233 + }, + { + "epoch": 0.5731016067831546, + "grad_norm": 0.5657239349732894, + "learning_rate": 4.1855371707653535e-06, + "loss": 0.2757, + "step": 12234 + }, + { + "epoch": 0.5731484517730828, + "grad_norm": 0.628234491034497, + "learning_rate": 4.185397102595654e-06, + "loss": 0.309, + "step": 12235 + }, + { + "epoch": 0.5731952967630112, + "grad_norm": 0.6162354077844263, + "learning_rate": 4.185257024726918e-06, + "loss": 0.2897, + "step": 12236 + }, + { + "epoch": 0.5732421417529395, + "grad_norm": 0.7119880737667041, + "learning_rate": 4.185116937159951e-06, + "loss": 0.2966, + "step": 12237 + }, + { + "epoch": 0.5732889867428679, + "grad_norm": 0.6307019696924661, + "learning_rate": 4.1849768398955596e-06, + "loss": 0.2644, + "step": 12238 + }, + { + "epoch": 0.5733358317327962, + "grad_norm": 0.5925407247010843, + "learning_rate": 4.184836732934549e-06, + "loss": 0.2817, + "step": 12239 + }, + { + "epoch": 0.5733826767227245, + "grad_norm": 0.5817738143299794, + "learning_rate": 4.184696616277728e-06, + "loss": 0.2795, + "step": 12240 + }, + { + "epoch": 0.5734295217126528, + "grad_norm": 0.5904563843088738, + "learning_rate": 4.184556489925902e-06, + "loss": 0.2861, + "step": 12241 + }, + { + "epoch": 0.5734763667025812, + "grad_norm": 0.6153017289580228, + "learning_rate": 4.1844163538798746e-06, + "loss": 0.2773, + "step": 12242 + }, + { + "epoch": 0.5735232116925095, + "grad_norm": 0.5587675815365946, + "learning_rate": 4.184276208140455e-06, + "loss": 0.2735, + "step": 12243 + }, + { + "epoch": 0.5735700566824378, + "grad_norm": 0.6245744773336842, + "learning_rate": 4.184136052708451e-06, + "loss": 0.2864, + "step": 12244 + }, + { + "epoch": 0.5736169016723661, + "grad_norm": 0.626100618391699, + "learning_rate": 4.1839958875846654e-06, + "loss": 0.3035, + "step": 12245 + }, + { + "epoch": 0.5736637466622945, + "grad_norm": 0.6279934160175925, + "learning_rate": 4.183855712769907e-06, + "loss": 0.2984, + "step": 12246 + }, + { + "epoch": 0.5737105916522228, + "grad_norm": 0.6096371487471072, + "learning_rate": 4.183715528264982e-06, + "loss": 0.2897, + "step": 12247 + }, + { + "epoch": 0.5737574366421512, + "grad_norm": 0.5810886046961292, + "learning_rate": 4.183575334070698e-06, + "loss": 0.2726, + "step": 12248 + }, + { + "epoch": 0.5738042816320794, + "grad_norm": 0.6050795387688868, + "learning_rate": 4.1834351301878615e-06, + "loss": 0.2595, + "step": 12249 + }, + { + "epoch": 0.5738511266220078, + "grad_norm": 0.6208204621412341, + "learning_rate": 4.183294916617278e-06, + "loss": 0.3017, + "step": 12250 + }, + { + "epoch": 0.5738979716119361, + "grad_norm": 0.5912881557338631, + "learning_rate": 4.183154693359756e-06, + "loss": 0.2913, + "step": 12251 + }, + { + "epoch": 0.5739448166018645, + "grad_norm": 0.618098824281691, + "learning_rate": 4.183014460416101e-06, + "loss": 0.2906, + "step": 12252 + }, + { + "epoch": 0.5739916615917927, + "grad_norm": 0.5917444989090563, + "learning_rate": 4.182874217787122e-06, + "loss": 0.2922, + "step": 12253 + }, + { + "epoch": 0.5740385065817211, + "grad_norm": 0.5787710399853668, + "learning_rate": 4.182733965473624e-06, + "loss": 0.2844, + "step": 12254 + }, + { + "epoch": 0.5740853515716494, + "grad_norm": 0.5640334480095379, + "learning_rate": 4.182593703476414e-06, + "loss": 0.2725, + "step": 12255 + }, + { + "epoch": 0.5741321965615778, + "grad_norm": 0.6069620131100794, + "learning_rate": 4.182453431796302e-06, + "loss": 0.2929, + "step": 12256 + }, + { + "epoch": 0.5741790415515061, + "grad_norm": 0.6377524886686081, + "learning_rate": 4.182313150434092e-06, + "loss": 0.2807, + "step": 12257 + }, + { + "epoch": 0.5742258865414344, + "grad_norm": 0.6972733510346613, + "learning_rate": 4.1821728593905934e-06, + "loss": 0.2817, + "step": 12258 + }, + { + "epoch": 0.5742727315313627, + "grad_norm": 0.5452950573934034, + "learning_rate": 4.182032558666613e-06, + "loss": 0.2702, + "step": 12259 + }, + { + "epoch": 0.5743195765212911, + "grad_norm": 0.5527189235986654, + "learning_rate": 4.181892248262957e-06, + "loss": 0.262, + "step": 12260 + }, + { + "epoch": 0.5743664215112194, + "grad_norm": 0.6094650326349361, + "learning_rate": 4.1817519281804346e-06, + "loss": 0.3032, + "step": 12261 + }, + { + "epoch": 0.5744132665011477, + "grad_norm": 0.5487838758109893, + "learning_rate": 4.181611598419852e-06, + "loss": 0.2683, + "step": 12262 + }, + { + "epoch": 0.574460111491076, + "grad_norm": 0.589361213639348, + "learning_rate": 4.181471258982018e-06, + "loss": 0.2866, + "step": 12263 + }, + { + "epoch": 0.5745069564810044, + "grad_norm": 0.6007974408943987, + "learning_rate": 4.181330909867739e-06, + "loss": 0.279, + "step": 12264 + }, + { + "epoch": 0.5745538014709327, + "grad_norm": 0.5731023036323541, + "learning_rate": 4.181190551077824e-06, + "loss": 0.2896, + "step": 12265 + }, + { + "epoch": 0.5746006464608611, + "grad_norm": 0.6255539980368898, + "learning_rate": 4.181050182613079e-06, + "loss": 0.2829, + "step": 12266 + }, + { + "epoch": 0.5746474914507893, + "grad_norm": 0.5863408344898912, + "learning_rate": 4.180909804474313e-06, + "loss": 0.2931, + "step": 12267 + }, + { + "epoch": 0.5746943364407177, + "grad_norm": 0.5883240115665168, + "learning_rate": 4.180769416662334e-06, + "loss": 0.2883, + "step": 12268 + }, + { + "epoch": 0.574741181430646, + "grad_norm": 0.6263679100373963, + "learning_rate": 4.180629019177949e-06, + "loss": 0.3009, + "step": 12269 + }, + { + "epoch": 0.5747880264205744, + "grad_norm": 0.5775508914989803, + "learning_rate": 4.180488612021966e-06, + "loss": 0.2833, + "step": 12270 + }, + { + "epoch": 0.5748348714105026, + "grad_norm": 0.6146391397493594, + "learning_rate": 4.180348195195194e-06, + "loss": 0.2828, + "step": 12271 + }, + { + "epoch": 0.574881716400431, + "grad_norm": 0.6109605474698702, + "learning_rate": 4.18020776869844e-06, + "loss": 0.2868, + "step": 12272 + }, + { + "epoch": 0.5749285613903593, + "grad_norm": 0.6162904525857034, + "learning_rate": 4.180067332532513e-06, + "loss": 0.2761, + "step": 12273 + }, + { + "epoch": 0.5749754063802877, + "grad_norm": 0.6157929755713064, + "learning_rate": 4.179926886698221e-06, + "loss": 0.2995, + "step": 12274 + }, + { + "epoch": 0.575022251370216, + "grad_norm": 0.6146702125510074, + "learning_rate": 4.179786431196372e-06, + "loss": 0.294, + "step": 12275 + }, + { + "epoch": 0.5750690963601442, + "grad_norm": 0.5907756836507757, + "learning_rate": 4.179645966027773e-06, + "loss": 0.2742, + "step": 12276 + }, + { + "epoch": 0.5751159413500726, + "grad_norm": 0.5804015801271445, + "learning_rate": 4.179505491193235e-06, + "loss": 0.2939, + "step": 12277 + }, + { + "epoch": 0.575162786340001, + "grad_norm": 0.6065470110543195, + "learning_rate": 4.1793650066935655e-06, + "loss": 0.2824, + "step": 12278 + }, + { + "epoch": 0.5752096313299293, + "grad_norm": 0.6378994502111778, + "learning_rate": 4.1792245125295715e-06, + "loss": 0.2855, + "step": 12279 + }, + { + "epoch": 0.5752564763198575, + "grad_norm": 0.6241861338003233, + "learning_rate": 4.179084008702062e-06, + "loss": 0.2968, + "step": 12280 + }, + { + "epoch": 0.5753033213097859, + "grad_norm": 0.6567706738772788, + "learning_rate": 4.178943495211847e-06, + "loss": 0.2974, + "step": 12281 + }, + { + "epoch": 0.5753501662997142, + "grad_norm": 0.598860337246414, + "learning_rate": 4.1788029720597335e-06, + "loss": 0.3029, + "step": 12282 + }, + { + "epoch": 0.5753970112896426, + "grad_norm": 0.5709333144785899, + "learning_rate": 4.178662439246532e-06, + "loss": 0.2846, + "step": 12283 + }, + { + "epoch": 0.575443856279571, + "grad_norm": 0.5548273167155654, + "learning_rate": 4.178521896773049e-06, + "loss": 0.2569, + "step": 12284 + }, + { + "epoch": 0.5754907012694992, + "grad_norm": 0.6405813797482173, + "learning_rate": 4.178381344640094e-06, + "loss": 0.2851, + "step": 12285 + }, + { + "epoch": 0.5755375462594275, + "grad_norm": 0.5610407271439173, + "learning_rate": 4.178240782848477e-06, + "loss": 0.2689, + "step": 12286 + }, + { + "epoch": 0.5755843912493559, + "grad_norm": 0.6147960460104103, + "learning_rate": 4.178100211399007e-06, + "loss": 0.2904, + "step": 12287 + }, + { + "epoch": 0.5756312362392842, + "grad_norm": 0.5554098463147895, + "learning_rate": 4.177959630292491e-06, + "loss": 0.2798, + "step": 12288 + }, + { + "epoch": 0.5756780812292125, + "grad_norm": 0.6037432582130671, + "learning_rate": 4.177819039529738e-06, + "loss": 0.2867, + "step": 12289 + }, + { + "epoch": 0.5757249262191408, + "grad_norm": 0.5949037732735888, + "learning_rate": 4.17767843911156e-06, + "loss": 0.2877, + "step": 12290 + }, + { + "epoch": 0.5757717712090692, + "grad_norm": 0.6309458732673968, + "learning_rate": 4.177537829038763e-06, + "loss": 0.2959, + "step": 12291 + }, + { + "epoch": 0.5758186161989975, + "grad_norm": 0.5593780624780685, + "learning_rate": 4.177397209312158e-06, + "loss": 0.2789, + "step": 12292 + }, + { + "epoch": 0.5758654611889259, + "grad_norm": 0.6073861086439554, + "learning_rate": 4.1772565799325536e-06, + "loss": 0.2975, + "step": 12293 + }, + { + "epoch": 0.5759123061788541, + "grad_norm": 0.5972637681631505, + "learning_rate": 4.1771159409007586e-06, + "loss": 0.2985, + "step": 12294 + }, + { + "epoch": 0.5759591511687825, + "grad_norm": 0.5765663264184197, + "learning_rate": 4.176975292217584e-06, + "loss": 0.274, + "step": 12295 + }, + { + "epoch": 0.5760059961587108, + "grad_norm": 0.5831994753560252, + "learning_rate": 4.1768346338838375e-06, + "loss": 0.2915, + "step": 12296 + }, + { + "epoch": 0.5760528411486392, + "grad_norm": 0.6300291763314971, + "learning_rate": 4.1766939659003285e-06, + "loss": 0.2826, + "step": 12297 + }, + { + "epoch": 0.5760996861385674, + "grad_norm": 0.6113518787173907, + "learning_rate": 4.176553288267868e-06, + "loss": 0.2817, + "step": 12298 + }, + { + "epoch": 0.5761465311284958, + "grad_norm": 0.552603641076066, + "learning_rate": 4.176412600987264e-06, + "loss": 0.2854, + "step": 12299 + }, + { + "epoch": 0.5761933761184241, + "grad_norm": 0.5469179225274038, + "learning_rate": 4.176271904059328e-06, + "loss": 0.2828, + "step": 12300 + }, + { + "epoch": 0.5762402211083525, + "grad_norm": 0.6013316489375216, + "learning_rate": 4.176131197484867e-06, + "loss": 0.2858, + "step": 12301 + }, + { + "epoch": 0.5762870660982808, + "grad_norm": 0.631805404872598, + "learning_rate": 4.1759904812646936e-06, + "loss": 0.2826, + "step": 12302 + }, + { + "epoch": 0.5763339110882091, + "grad_norm": 0.5784003219337817, + "learning_rate": 4.175849755399616e-06, + "loss": 0.2661, + "step": 12303 + }, + { + "epoch": 0.5763807560781374, + "grad_norm": 0.5604798836808339, + "learning_rate": 4.175709019890443e-06, + "loss": 0.2605, + "step": 12304 + }, + { + "epoch": 0.5764276010680658, + "grad_norm": 0.5663021013201622, + "learning_rate": 4.175568274737987e-06, + "loss": 0.2776, + "step": 12305 + }, + { + "epoch": 0.5764744460579941, + "grad_norm": 0.6756246159358157, + "learning_rate": 4.175427519943056e-06, + "loss": 0.2869, + "step": 12306 + }, + { + "epoch": 0.5765212910479224, + "grad_norm": 0.5779877472469536, + "learning_rate": 4.175286755506461e-06, + "loss": 0.2912, + "step": 12307 + }, + { + "epoch": 0.5765681360378507, + "grad_norm": 0.633851573335314, + "learning_rate": 4.175145981429013e-06, + "loss": 0.2955, + "step": 12308 + }, + { + "epoch": 0.5766149810277791, + "grad_norm": 0.617806968294691, + "learning_rate": 4.1750051977115195e-06, + "loss": 0.2911, + "step": 12309 + }, + { + "epoch": 0.5766618260177074, + "grad_norm": 0.6044908227263164, + "learning_rate": 4.174864404354793e-06, + "loss": 0.3083, + "step": 12310 + }, + { + "epoch": 0.5767086710076358, + "grad_norm": 0.5853660838007981, + "learning_rate": 4.174723601359641e-06, + "loss": 0.294, + "step": 12311 + }, + { + "epoch": 0.576755515997564, + "grad_norm": 0.6148431478124623, + "learning_rate": 4.1745827887268776e-06, + "loss": 0.2955, + "step": 12312 + }, + { + "epoch": 0.5768023609874924, + "grad_norm": 0.6098561206695322, + "learning_rate": 4.17444196645731e-06, + "loss": 0.2899, + "step": 12313 + }, + { + "epoch": 0.5768492059774207, + "grad_norm": 0.6180046661020762, + "learning_rate": 4.1743011345517506e-06, + "loss": 0.2857, + "step": 12314 + }, + { + "epoch": 0.5768960509673491, + "grad_norm": 0.6133444676530397, + "learning_rate": 4.1741602930110085e-06, + "loss": 0.2876, + "step": 12315 + }, + { + "epoch": 0.5769428959572773, + "grad_norm": 0.5982410100439891, + "learning_rate": 4.174019441835894e-06, + "loss": 0.2737, + "step": 12316 + }, + { + "epoch": 0.5769897409472057, + "grad_norm": 0.6147661713506877, + "learning_rate": 4.1738785810272195e-06, + "loss": 0.3005, + "step": 12317 + }, + { + "epoch": 0.577036585937134, + "grad_norm": 0.5666500096437758, + "learning_rate": 4.173737710585794e-06, + "loss": 0.2769, + "step": 12318 + }, + { + "epoch": 0.5770834309270624, + "grad_norm": 0.6199387948226316, + "learning_rate": 4.1735968305124286e-06, + "loss": 0.2899, + "step": 12319 + }, + { + "epoch": 0.5771302759169907, + "grad_norm": 0.6667151967967978, + "learning_rate": 4.1734559408079345e-06, + "loss": 0.2974, + "step": 12320 + }, + { + "epoch": 0.577177120906919, + "grad_norm": 0.6743448157839084, + "learning_rate": 4.1733150414731214e-06, + "loss": 0.3106, + "step": 12321 + }, + { + "epoch": 0.5772239658968473, + "grad_norm": 0.5691187103153459, + "learning_rate": 4.1731741325088016e-06, + "loss": 0.2791, + "step": 12322 + }, + { + "epoch": 0.5772708108867757, + "grad_norm": 0.5966418851037193, + "learning_rate": 4.173033213915785e-06, + "loss": 0.3061, + "step": 12323 + }, + { + "epoch": 0.577317655876704, + "grad_norm": 0.5743478898239822, + "learning_rate": 4.1728922856948814e-06, + "loss": 0.2826, + "step": 12324 + }, + { + "epoch": 0.5773645008666323, + "grad_norm": 0.5755790902065464, + "learning_rate": 4.172751347846905e-06, + "loss": 0.2816, + "step": 12325 + }, + { + "epoch": 0.5774113458565606, + "grad_norm": 0.6320334383797599, + "learning_rate": 4.172610400372664e-06, + "loss": 0.2955, + "step": 12326 + }, + { + "epoch": 0.577458190846489, + "grad_norm": 0.6123325517928007, + "learning_rate": 4.1724694432729704e-06, + "loss": 0.2668, + "step": 12327 + }, + { + "epoch": 0.5775050358364173, + "grad_norm": 0.588336507847417, + "learning_rate": 4.172328476548636e-06, + "loss": 0.2813, + "step": 12328 + }, + { + "epoch": 0.5775518808263457, + "grad_norm": 0.5952047556867308, + "learning_rate": 4.172187500200472e-06, + "loss": 0.2789, + "step": 12329 + }, + { + "epoch": 0.5775987258162739, + "grad_norm": 0.552072571013452, + "learning_rate": 4.1720465142292884e-06, + "loss": 0.2659, + "step": 12330 + }, + { + "epoch": 0.5776455708062023, + "grad_norm": 0.5307792710141264, + "learning_rate": 4.171905518635898e-06, + "loss": 0.254, + "step": 12331 + }, + { + "epoch": 0.5776924157961306, + "grad_norm": 0.6054189247964148, + "learning_rate": 4.171764513421112e-06, + "loss": 0.2844, + "step": 12332 + }, + { + "epoch": 0.577739260786059, + "grad_norm": 0.555583296774657, + "learning_rate": 4.17162349858574e-06, + "loss": 0.2788, + "step": 12333 + }, + { + "epoch": 0.5777861057759872, + "grad_norm": 0.6098986493537518, + "learning_rate": 4.171482474130595e-06, + "loss": 0.2753, + "step": 12334 + }, + { + "epoch": 0.5778329507659156, + "grad_norm": 0.639104604118574, + "learning_rate": 4.171341440056489e-06, + "loss": 0.3093, + "step": 12335 + }, + { + "epoch": 0.5778797957558439, + "grad_norm": 0.5966781237622923, + "learning_rate": 4.171200396364234e-06, + "loss": 0.2636, + "step": 12336 + }, + { + "epoch": 0.5779266407457723, + "grad_norm": 0.6381128681835615, + "learning_rate": 4.17105934305464e-06, + "loss": 0.3154, + "step": 12337 + }, + { + "epoch": 0.5779734857357006, + "grad_norm": 0.6098747673758518, + "learning_rate": 4.170918280128519e-06, + "loss": 0.296, + "step": 12338 + }, + { + "epoch": 0.5780203307256289, + "grad_norm": 0.6151179906096298, + "learning_rate": 4.170777207586684e-06, + "loss": 0.2874, + "step": 12339 + }, + { + "epoch": 0.5780671757155572, + "grad_norm": 0.6131451485573498, + "learning_rate": 4.1706361254299455e-06, + "loss": 0.2957, + "step": 12340 + }, + { + "epoch": 0.5781140207054856, + "grad_norm": 0.5738731121717446, + "learning_rate": 4.170495033659116e-06, + "loss": 0.2674, + "step": 12341 + }, + { + "epoch": 0.5781608656954139, + "grad_norm": 0.606051683208936, + "learning_rate": 4.170353932275007e-06, + "loss": 0.2905, + "step": 12342 + }, + { + "epoch": 0.5782077106853422, + "grad_norm": 0.6125025331280677, + "learning_rate": 4.170212821278432e-06, + "loss": 0.2814, + "step": 12343 + }, + { + "epoch": 0.5782545556752705, + "grad_norm": 0.6073286654747417, + "learning_rate": 4.170071700670202e-06, + "loss": 0.2619, + "step": 12344 + }, + { + "epoch": 0.5783014006651989, + "grad_norm": 0.6687548452641581, + "learning_rate": 4.169930570451128e-06, + "loss": 0.2848, + "step": 12345 + }, + { + "epoch": 0.5783482456551272, + "grad_norm": 0.6107060674497802, + "learning_rate": 4.169789430622024e-06, + "loss": 0.2758, + "step": 12346 + }, + { + "epoch": 0.5783950906450556, + "grad_norm": 0.5871766145816605, + "learning_rate": 4.169648281183703e-06, + "loss": 0.2696, + "step": 12347 + }, + { + "epoch": 0.5784419356349838, + "grad_norm": 0.5913448395196182, + "learning_rate": 4.1695071221369735e-06, + "loss": 0.2976, + "step": 12348 + }, + { + "epoch": 0.5784887806249122, + "grad_norm": 0.5733539028753603, + "learning_rate": 4.169365953482651e-06, + "loss": 0.2798, + "step": 12349 + }, + { + "epoch": 0.5785356256148405, + "grad_norm": 0.6075738939545099, + "learning_rate": 4.169224775221548e-06, + "loss": 0.287, + "step": 12350 + }, + { + "epoch": 0.5785824706047689, + "grad_norm": 0.5522115092754935, + "learning_rate": 4.169083587354474e-06, + "loss": 0.2869, + "step": 12351 + }, + { + "epoch": 0.5786293155946971, + "grad_norm": 0.6533370084129598, + "learning_rate": 4.1689423898822445e-06, + "loss": 0.3096, + "step": 12352 + }, + { + "epoch": 0.5786761605846255, + "grad_norm": 0.6059614302059321, + "learning_rate": 4.168801182805671e-06, + "loss": 0.2742, + "step": 12353 + }, + { + "epoch": 0.5787230055745538, + "grad_norm": 0.6350471085831453, + "learning_rate": 4.168659966125565e-06, + "loss": 0.3054, + "step": 12354 + }, + { + "epoch": 0.5787698505644822, + "grad_norm": 0.5825817917573884, + "learning_rate": 4.168518739842742e-06, + "loss": 0.3042, + "step": 12355 + }, + { + "epoch": 0.5788166955544105, + "grad_norm": 0.6107118015335238, + "learning_rate": 4.168377503958013e-06, + "loss": 0.28, + "step": 12356 + }, + { + "epoch": 0.5788635405443388, + "grad_norm": 0.6755693373608461, + "learning_rate": 4.1682362584721906e-06, + "loss": 0.3054, + "step": 12357 + }, + { + "epoch": 0.5789103855342671, + "grad_norm": 0.7065772691388208, + "learning_rate": 4.168095003386087e-06, + "loss": 0.3156, + "step": 12358 + }, + { + "epoch": 0.5789572305241955, + "grad_norm": 0.5663421814831262, + "learning_rate": 4.167953738700517e-06, + "loss": 0.2886, + "step": 12359 + }, + { + "epoch": 0.5790040755141238, + "grad_norm": 0.6161932144869936, + "learning_rate": 4.167812464416291e-06, + "loss": 0.2876, + "step": 12360 + }, + { + "epoch": 0.579050920504052, + "grad_norm": 0.6157651565957282, + "learning_rate": 4.167671180534224e-06, + "loss": 0.3024, + "step": 12361 + }, + { + "epoch": 0.5790977654939804, + "grad_norm": 0.6153385488923188, + "learning_rate": 4.167529887055129e-06, + "loss": 0.2733, + "step": 12362 + }, + { + "epoch": 0.5791446104839088, + "grad_norm": 0.5586086635018425, + "learning_rate": 4.167388583979818e-06, + "loss": 0.2621, + "step": 12363 + }, + { + "epoch": 0.5791914554738371, + "grad_norm": 0.5974374919353579, + "learning_rate": 4.167247271309105e-06, + "loss": 0.2803, + "step": 12364 + }, + { + "epoch": 0.5792383004637655, + "grad_norm": 0.6213269022647265, + "learning_rate": 4.167105949043804e-06, + "loss": 0.2784, + "step": 12365 + }, + { + "epoch": 0.5792851454536937, + "grad_norm": 0.6245876140923287, + "learning_rate": 4.166964617184726e-06, + "loss": 0.2952, + "step": 12366 + }, + { + "epoch": 0.579331990443622, + "grad_norm": 0.562457046388376, + "learning_rate": 4.1668232757326855e-06, + "loss": 0.2933, + "step": 12367 + }, + { + "epoch": 0.5793788354335504, + "grad_norm": 0.6088488775531226, + "learning_rate": 4.166681924688497e-06, + "loss": 0.2914, + "step": 12368 + }, + { + "epoch": 0.5794256804234788, + "grad_norm": 0.636068894334873, + "learning_rate": 4.166540564052972e-06, + "loss": 0.2822, + "step": 12369 + }, + { + "epoch": 0.579472525413407, + "grad_norm": 0.6165493351031119, + "learning_rate": 4.166399193826926e-06, + "loss": 0.2918, + "step": 12370 + }, + { + "epoch": 0.5795193704033353, + "grad_norm": 0.5622543961219905, + "learning_rate": 4.1662578140111706e-06, + "loss": 0.2675, + "step": 12371 + }, + { + "epoch": 0.5795662153932637, + "grad_norm": 0.6109569825093482, + "learning_rate": 4.1661164246065206e-06, + "loss": 0.2912, + "step": 12372 + }, + { + "epoch": 0.579613060383192, + "grad_norm": 0.6294336040871231, + "learning_rate": 4.165975025613789e-06, + "loss": 0.2813, + "step": 12373 + }, + { + "epoch": 0.5796599053731204, + "grad_norm": 0.5509347324549708, + "learning_rate": 4.16583361703379e-06, + "loss": 0.257, + "step": 12374 + }, + { + "epoch": 0.5797067503630486, + "grad_norm": 0.5632629574269333, + "learning_rate": 4.165692198867337e-06, + "loss": 0.2668, + "step": 12375 + }, + { + "epoch": 0.579753595352977, + "grad_norm": 0.6277111889443939, + "learning_rate": 4.1655507711152446e-06, + "loss": 0.2995, + "step": 12376 + }, + { + "epoch": 0.5798004403429053, + "grad_norm": 0.5791354803766662, + "learning_rate": 4.1654093337783265e-06, + "loss": 0.2745, + "step": 12377 + }, + { + "epoch": 0.5798472853328337, + "grad_norm": 0.63056670574803, + "learning_rate": 4.165267886857395e-06, + "loss": 0.293, + "step": 12378 + }, + { + "epoch": 0.5798941303227619, + "grad_norm": 0.5845944041511819, + "learning_rate": 4.165126430353267e-06, + "loss": 0.2814, + "step": 12379 + }, + { + "epoch": 0.5799409753126903, + "grad_norm": 0.5814307193398793, + "learning_rate": 4.164984964266753e-06, + "loss": 0.2793, + "step": 12380 + }, + { + "epoch": 0.5799878203026186, + "grad_norm": 0.9231553660283898, + "learning_rate": 4.164843488598671e-06, + "loss": 0.2826, + "step": 12381 + }, + { + "epoch": 0.580034665292547, + "grad_norm": 0.5819719462569142, + "learning_rate": 4.164702003349831e-06, + "loss": 0.3082, + "step": 12382 + }, + { + "epoch": 0.5800815102824753, + "grad_norm": 0.5533561601995426, + "learning_rate": 4.164560508521051e-06, + "loss": 0.2795, + "step": 12383 + }, + { + "epoch": 0.5801283552724036, + "grad_norm": 0.6097971370699783, + "learning_rate": 4.164419004113143e-06, + "loss": 0.2928, + "step": 12384 + }, + { + "epoch": 0.5801752002623319, + "grad_norm": 0.5833479227331971, + "learning_rate": 4.164277490126922e-06, + "loss": 0.2817, + "step": 12385 + }, + { + "epoch": 0.5802220452522603, + "grad_norm": 0.560378447138068, + "learning_rate": 4.164135966563202e-06, + "loss": 0.2645, + "step": 12386 + }, + { + "epoch": 0.5802688902421886, + "grad_norm": 0.600777315533433, + "learning_rate": 4.163994433422799e-06, + "loss": 0.2878, + "step": 12387 + }, + { + "epoch": 0.5803157352321169, + "grad_norm": 0.5188280196652434, + "learning_rate": 4.1638528907065255e-06, + "loss": 0.2497, + "step": 12388 + }, + { + "epoch": 0.5803625802220452, + "grad_norm": 0.6356212864748079, + "learning_rate": 4.163711338415197e-06, + "loss": 0.292, + "step": 12389 + }, + { + "epoch": 0.5804094252119736, + "grad_norm": 0.5658256513557236, + "learning_rate": 4.163569776549628e-06, + "loss": 0.2767, + "step": 12390 + }, + { + "epoch": 0.5804562702019019, + "grad_norm": 0.5849090177090761, + "learning_rate": 4.163428205110632e-06, + "loss": 0.2888, + "step": 12391 + }, + { + "epoch": 0.5805031151918303, + "grad_norm": 0.6018546343711697, + "learning_rate": 4.163286624099025e-06, + "loss": 0.3034, + "step": 12392 + }, + { + "epoch": 0.5805499601817585, + "grad_norm": 0.5752286533378115, + "learning_rate": 4.163145033515623e-06, + "loss": 0.2969, + "step": 12393 + }, + { + "epoch": 0.5805968051716869, + "grad_norm": 0.5859915741166817, + "learning_rate": 4.163003433361238e-06, + "loss": 0.2929, + "step": 12394 + }, + { + "epoch": 0.5806436501616152, + "grad_norm": 0.5669164469922068, + "learning_rate": 4.162861823636687e-06, + "loss": 0.2639, + "step": 12395 + }, + { + "epoch": 0.5806904951515436, + "grad_norm": 0.6339772986818818, + "learning_rate": 4.162720204342784e-06, + "loss": 0.2992, + "step": 12396 + }, + { + "epoch": 0.5807373401414718, + "grad_norm": 0.6224391608300426, + "learning_rate": 4.1625785754803436e-06, + "loss": 0.2812, + "step": 12397 + }, + { + "epoch": 0.5807841851314002, + "grad_norm": 0.5888833475545262, + "learning_rate": 4.162436937050181e-06, + "loss": 0.3023, + "step": 12398 + }, + { + "epoch": 0.5808310301213285, + "grad_norm": 0.6196093170849664, + "learning_rate": 4.162295289053113e-06, + "loss": 0.2872, + "step": 12399 + }, + { + "epoch": 0.5808778751112569, + "grad_norm": 0.5751805221845423, + "learning_rate": 4.162153631489952e-06, + "loss": 0.2732, + "step": 12400 + }, + { + "epoch": 0.5809247201011852, + "grad_norm": 0.596804207668192, + "learning_rate": 4.162011964361516e-06, + "loss": 0.3021, + "step": 12401 + }, + { + "epoch": 0.5809715650911135, + "grad_norm": 0.5947331821316276, + "learning_rate": 4.161870287668619e-06, + "loss": 0.2958, + "step": 12402 + }, + { + "epoch": 0.5810184100810418, + "grad_norm": 0.7169805118438015, + "learning_rate": 4.161728601412075e-06, + "loss": 0.3063, + "step": 12403 + }, + { + "epoch": 0.5810652550709702, + "grad_norm": 0.6318722970685232, + "learning_rate": 4.161586905592701e-06, + "loss": 0.2925, + "step": 12404 + }, + { + "epoch": 0.5811121000608985, + "grad_norm": 0.5764168886565574, + "learning_rate": 4.161445200211312e-06, + "loss": 0.269, + "step": 12405 + }, + { + "epoch": 0.5811589450508268, + "grad_norm": 0.6027565282517282, + "learning_rate": 4.161303485268723e-06, + "loss": 0.3096, + "step": 12406 + }, + { + "epoch": 0.5812057900407551, + "grad_norm": 0.6401308725773962, + "learning_rate": 4.16116176076575e-06, + "loss": 0.2814, + "step": 12407 + }, + { + "epoch": 0.5812526350306835, + "grad_norm": 0.573670057596585, + "learning_rate": 4.161020026703209e-06, + "loss": 0.3065, + "step": 12408 + }, + { + "epoch": 0.5812994800206118, + "grad_norm": 0.5978872038489367, + "learning_rate": 4.160878283081916e-06, + "loss": 0.2886, + "step": 12409 + }, + { + "epoch": 0.5813463250105402, + "grad_norm": 0.5990444462028325, + "learning_rate": 4.160736529902684e-06, + "loss": 0.2725, + "step": 12410 + }, + { + "epoch": 0.5813931700004684, + "grad_norm": 0.5774621631048432, + "learning_rate": 4.160594767166333e-06, + "loss": 0.2756, + "step": 12411 + }, + { + "epoch": 0.5814400149903968, + "grad_norm": 0.6095892622303991, + "learning_rate": 4.160452994873675e-06, + "loss": 0.2889, + "step": 12412 + }, + { + "epoch": 0.5814868599803251, + "grad_norm": 0.581160949109297, + "learning_rate": 4.1603112130255284e-06, + "loss": 0.2811, + "step": 12413 + }, + { + "epoch": 0.5815337049702535, + "grad_norm": 0.6039751091636198, + "learning_rate": 4.160169421622708e-06, + "loss": 0.2871, + "step": 12414 + }, + { + "epoch": 0.5815805499601817, + "grad_norm": 0.6154287656538548, + "learning_rate": 4.160027620666029e-06, + "loss": 0.2853, + "step": 12415 + }, + { + "epoch": 0.5816273949501101, + "grad_norm": 0.5424546502679178, + "learning_rate": 4.159885810156308e-06, + "loss": 0.2382, + "step": 12416 + }, + { + "epoch": 0.5816742399400384, + "grad_norm": 0.6377258644063023, + "learning_rate": 4.159743990094362e-06, + "loss": 0.301, + "step": 12417 + }, + { + "epoch": 0.5817210849299668, + "grad_norm": 0.6164106499337235, + "learning_rate": 4.1596021604810065e-06, + "loss": 0.2862, + "step": 12418 + }, + { + "epoch": 0.5817679299198951, + "grad_norm": 0.6656804793799586, + "learning_rate": 4.159460321317057e-06, + "loss": 0.2955, + "step": 12419 + }, + { + "epoch": 0.5818147749098234, + "grad_norm": 0.571721725827776, + "learning_rate": 4.159318472603332e-06, + "loss": 0.2782, + "step": 12420 + }, + { + "epoch": 0.5818616198997517, + "grad_norm": 0.5690858602533706, + "learning_rate": 4.1591766143406445e-06, + "loss": 0.2862, + "step": 12421 + }, + { + "epoch": 0.5819084648896801, + "grad_norm": 0.6062922117470593, + "learning_rate": 4.159034746529813e-06, + "loss": 0.293, + "step": 12422 + }, + { + "epoch": 0.5819553098796084, + "grad_norm": 0.5502949821925363, + "learning_rate": 4.158892869171654e-06, + "loss": 0.2703, + "step": 12423 + }, + { + "epoch": 0.5820021548695367, + "grad_norm": 0.5847464605167095, + "learning_rate": 4.158750982266983e-06, + "loss": 0.2708, + "step": 12424 + }, + { + "epoch": 0.582048999859465, + "grad_norm": 0.6432644959071138, + "learning_rate": 4.158609085816618e-06, + "loss": 0.3012, + "step": 12425 + }, + { + "epoch": 0.5820958448493934, + "grad_norm": 0.6752568734666029, + "learning_rate": 4.1584671798213735e-06, + "loss": 0.3139, + "step": 12426 + }, + { + "epoch": 0.5821426898393217, + "grad_norm": 0.6237697214656538, + "learning_rate": 4.1583252642820686e-06, + "loss": 0.2727, + "step": 12427 + }, + { + "epoch": 0.5821895348292501, + "grad_norm": 0.6127300734702179, + "learning_rate": 4.158183339199518e-06, + "loss": 0.2832, + "step": 12428 + }, + { + "epoch": 0.5822363798191783, + "grad_norm": 0.6013403036126307, + "learning_rate": 4.158041404574538e-06, + "loss": 0.2764, + "step": 12429 + }, + { + "epoch": 0.5822832248091067, + "grad_norm": 0.5601896039113262, + "learning_rate": 4.157899460407947e-06, + "loss": 0.2633, + "step": 12430 + }, + { + "epoch": 0.582330069799035, + "grad_norm": 0.5726379349135332, + "learning_rate": 4.1577575067005615e-06, + "loss": 0.2803, + "step": 12431 + }, + { + "epoch": 0.5823769147889634, + "grad_norm": 0.5726730697444059, + "learning_rate": 4.157615543453198e-06, + "loss": 0.2853, + "step": 12432 + }, + { + "epoch": 0.5824237597788916, + "grad_norm": 0.6249810458682621, + "learning_rate": 4.157473570666674e-06, + "loss": 0.2785, + "step": 12433 + }, + { + "epoch": 0.58247060476882, + "grad_norm": 0.6036130146203623, + "learning_rate": 4.157331588341806e-06, + "loss": 0.2882, + "step": 12434 + }, + { + "epoch": 0.5825174497587483, + "grad_norm": 0.5576184173264614, + "learning_rate": 4.157189596479412e-06, + "loss": 0.2538, + "step": 12435 + }, + { + "epoch": 0.5825642947486767, + "grad_norm": 0.617488035091027, + "learning_rate": 4.1570475950803065e-06, + "loss": 0.2777, + "step": 12436 + }, + { + "epoch": 0.582611139738605, + "grad_norm": 0.5889299727732878, + "learning_rate": 4.15690558414531e-06, + "loss": 0.2759, + "step": 12437 + }, + { + "epoch": 0.5826579847285333, + "grad_norm": 0.5832600727616046, + "learning_rate": 4.156763563675238e-06, + "loss": 0.2789, + "step": 12438 + }, + { + "epoch": 0.5827048297184616, + "grad_norm": 0.6264558371686272, + "learning_rate": 4.156621533670909e-06, + "loss": 0.2875, + "step": 12439 + }, + { + "epoch": 0.58275167470839, + "grad_norm": 0.6290647904412556, + "learning_rate": 4.156479494133139e-06, + "loss": 0.2937, + "step": 12440 + }, + { + "epoch": 0.5827985196983183, + "grad_norm": 0.6040981571760295, + "learning_rate": 4.1563374450627445e-06, + "loss": 0.2982, + "step": 12441 + }, + { + "epoch": 0.5828453646882465, + "grad_norm": 0.6306900562395055, + "learning_rate": 4.1561953864605455e-06, + "loss": 0.2887, + "step": 12442 + }, + { + "epoch": 0.5828922096781749, + "grad_norm": 0.6627274100479484, + "learning_rate": 4.156053318327358e-06, + "loss": 0.2993, + "step": 12443 + }, + { + "epoch": 0.5829390546681033, + "grad_norm": 0.569121470254923, + "learning_rate": 4.155911240664e-06, + "loss": 0.2942, + "step": 12444 + }, + { + "epoch": 0.5829858996580316, + "grad_norm": 0.6306382118272449, + "learning_rate": 4.1557691534712894e-06, + "loss": 0.2829, + "step": 12445 + }, + { + "epoch": 0.58303274464796, + "grad_norm": 0.5736500064540353, + "learning_rate": 4.155627056750044e-06, + "loss": 0.2809, + "step": 12446 + }, + { + "epoch": 0.5830795896378882, + "grad_norm": 0.6299912486341056, + "learning_rate": 4.155484950501079e-06, + "loss": 0.2864, + "step": 12447 + }, + { + "epoch": 0.5831264346278165, + "grad_norm": 0.6430247072495462, + "learning_rate": 4.155342834725215e-06, + "loss": 0.2957, + "step": 12448 + }, + { + "epoch": 0.5831732796177449, + "grad_norm": 0.640543076190279, + "learning_rate": 4.1552007094232695e-06, + "loss": 0.2751, + "step": 12449 + }, + { + "epoch": 0.5832201246076733, + "grad_norm": 0.6519891350016472, + "learning_rate": 4.155058574596061e-06, + "loss": 0.2833, + "step": 12450 + }, + { + "epoch": 0.5832669695976015, + "grad_norm": 0.5928997297411147, + "learning_rate": 4.154916430244405e-06, + "loss": 0.2643, + "step": 12451 + }, + { + "epoch": 0.5833138145875298, + "grad_norm": 0.6244760911367382, + "learning_rate": 4.15477427636912e-06, + "loss": 0.2896, + "step": 12452 + }, + { + "epoch": 0.5833606595774582, + "grad_norm": 0.5925861829800728, + "learning_rate": 4.154632112971026e-06, + "loss": 0.2994, + "step": 12453 + }, + { + "epoch": 0.5834075045673865, + "grad_norm": 0.5933603184125322, + "learning_rate": 4.154489940050941e-06, + "loss": 0.2818, + "step": 12454 + }, + { + "epoch": 0.5834543495573149, + "grad_norm": 0.5900714765532579, + "learning_rate": 4.1543477576096804e-06, + "loss": 0.2871, + "step": 12455 + }, + { + "epoch": 0.5835011945472431, + "grad_norm": 0.6275764007814905, + "learning_rate": 4.1542055656480655e-06, + "loss": 0.2739, + "step": 12456 + }, + { + "epoch": 0.5835480395371715, + "grad_norm": 0.5553660429063182, + "learning_rate": 4.154063364166913e-06, + "loss": 0.2917, + "step": 12457 + }, + { + "epoch": 0.5835948845270998, + "grad_norm": 0.5797619144186328, + "learning_rate": 4.153921153167042e-06, + "loss": 0.2786, + "step": 12458 + }, + { + "epoch": 0.5836417295170282, + "grad_norm": 0.6000277536498045, + "learning_rate": 4.1537789326492696e-06, + "loss": 0.283, + "step": 12459 + }, + { + "epoch": 0.5836885745069564, + "grad_norm": 0.598192737932619, + "learning_rate": 4.1536367026144155e-06, + "loss": 0.2852, + "step": 12460 + }, + { + "epoch": 0.5837354194968848, + "grad_norm": 0.6425538022445332, + "learning_rate": 4.153494463063298e-06, + "loss": 0.3044, + "step": 12461 + }, + { + "epoch": 0.5837822644868131, + "grad_norm": 0.5573338440862236, + "learning_rate": 4.153352213996735e-06, + "loss": 0.2726, + "step": 12462 + }, + { + "epoch": 0.5838291094767415, + "grad_norm": 0.6179211449855435, + "learning_rate": 4.153209955415547e-06, + "loss": 0.2903, + "step": 12463 + }, + { + "epoch": 0.5838759544666698, + "grad_norm": 0.6031663073977314, + "learning_rate": 4.15306768732055e-06, + "loss": 0.2793, + "step": 12464 + }, + { + "epoch": 0.5839227994565981, + "grad_norm": 0.6248497061453393, + "learning_rate": 4.152925409712564e-06, + "loss": 0.2781, + "step": 12465 + }, + { + "epoch": 0.5839696444465264, + "grad_norm": 0.5519833704333253, + "learning_rate": 4.152783122592408e-06, + "loss": 0.2679, + "step": 12466 + }, + { + "epoch": 0.5840164894364548, + "grad_norm": 0.584949703365863, + "learning_rate": 4.1526408259609e-06, + "loss": 0.2964, + "step": 12467 + }, + { + "epoch": 0.5840633344263831, + "grad_norm": 0.5909267974135665, + "learning_rate": 4.15249851981886e-06, + "loss": 0.3074, + "step": 12468 + }, + { + "epoch": 0.5841101794163114, + "grad_norm": 0.564793635948752, + "learning_rate": 4.152356204167105e-06, + "loss": 0.2811, + "step": 12469 + }, + { + "epoch": 0.5841570244062397, + "grad_norm": 0.5654654115055922, + "learning_rate": 4.152213879006457e-06, + "loss": 0.2738, + "step": 12470 + }, + { + "epoch": 0.5842038693961681, + "grad_norm": 0.5977102681730362, + "learning_rate": 4.152071544337732e-06, + "loss": 0.2798, + "step": 12471 + }, + { + "epoch": 0.5842507143860964, + "grad_norm": 0.5974262669265236, + "learning_rate": 4.151929200161752e-06, + "loss": 0.2811, + "step": 12472 + }, + { + "epoch": 0.5842975593760248, + "grad_norm": 0.6103147699946933, + "learning_rate": 4.151786846479334e-06, + "loss": 0.2992, + "step": 12473 + }, + { + "epoch": 0.584344404365953, + "grad_norm": 0.613278959656116, + "learning_rate": 4.151644483291298e-06, + "loss": 0.2684, + "step": 12474 + }, + { + "epoch": 0.5843912493558814, + "grad_norm": 0.6225712140962956, + "learning_rate": 4.151502110598463e-06, + "loss": 0.277, + "step": 12475 + }, + { + "epoch": 0.5844380943458097, + "grad_norm": 0.6147211874202505, + "learning_rate": 4.151359728401648e-06, + "loss": 0.2843, + "step": 12476 + }, + { + "epoch": 0.5844849393357381, + "grad_norm": 0.5609710891174154, + "learning_rate": 4.151217336701673e-06, + "loss": 0.2756, + "step": 12477 + }, + { + "epoch": 0.5845317843256663, + "grad_norm": 0.5958618605660561, + "learning_rate": 4.151074935499358e-06, + "loss": 0.29, + "step": 12478 + }, + { + "epoch": 0.5845786293155947, + "grad_norm": 0.5430699124931638, + "learning_rate": 4.150932524795521e-06, + "loss": 0.293, + "step": 12479 + }, + { + "epoch": 0.584625474305523, + "grad_norm": 0.601385110396626, + "learning_rate": 4.150790104590982e-06, + "loss": 0.2728, + "step": 12480 + }, + { + "epoch": 0.5846723192954514, + "grad_norm": 0.5508952559446499, + "learning_rate": 4.150647674886562e-06, + "loss": 0.2701, + "step": 12481 + }, + { + "epoch": 0.5847191642853797, + "grad_norm": 0.5551224302756989, + "learning_rate": 4.150505235683079e-06, + "loss": 0.2735, + "step": 12482 + }, + { + "epoch": 0.584766009275308, + "grad_norm": 0.6256779740709546, + "learning_rate": 4.1503627869813525e-06, + "loss": 0.2911, + "step": 12483 + }, + { + "epoch": 0.5848128542652363, + "grad_norm": 0.6050285852345285, + "learning_rate": 4.150220328782204e-06, + "loss": 0.3085, + "step": 12484 + }, + { + "epoch": 0.5848596992551647, + "grad_norm": 0.6016547390111949, + "learning_rate": 4.150077861086451e-06, + "loss": 0.3126, + "step": 12485 + }, + { + "epoch": 0.584906544245093, + "grad_norm": 0.5984562225660578, + "learning_rate": 4.149935383894916e-06, + "loss": 0.2844, + "step": 12486 + }, + { + "epoch": 0.5849533892350213, + "grad_norm": 0.5819508926001669, + "learning_rate": 4.149792897208416e-06, + "loss": 0.2792, + "step": 12487 + }, + { + "epoch": 0.5850002342249496, + "grad_norm": 0.5984935244585068, + "learning_rate": 4.1496504010277744e-06, + "loss": 0.2867, + "step": 12488 + }, + { + "epoch": 0.585047079214878, + "grad_norm": 0.6422858253728045, + "learning_rate": 4.149507895353808e-06, + "loss": 0.2937, + "step": 12489 + }, + { + "epoch": 0.5850939242048063, + "grad_norm": 0.5867522780133256, + "learning_rate": 4.149365380187339e-06, + "loss": 0.2778, + "step": 12490 + }, + { + "epoch": 0.5851407691947347, + "grad_norm": 0.6466224590324979, + "learning_rate": 4.149222855529187e-06, + "loss": 0.2747, + "step": 12491 + }, + { + "epoch": 0.5851876141846629, + "grad_norm": 0.6174424104132623, + "learning_rate": 4.149080321380171e-06, + "loss": 0.2858, + "step": 12492 + }, + { + "epoch": 0.5852344591745913, + "grad_norm": 0.5820130002095287, + "learning_rate": 4.1489377777411135e-06, + "loss": 0.2772, + "step": 12493 + }, + { + "epoch": 0.5852813041645196, + "grad_norm": 0.569220102166059, + "learning_rate": 4.148795224612832e-06, + "loss": 0.2941, + "step": 12494 + }, + { + "epoch": 0.585328149154448, + "grad_norm": 0.5778591157666397, + "learning_rate": 4.14865266199615e-06, + "loss": 0.275, + "step": 12495 + }, + { + "epoch": 0.5853749941443762, + "grad_norm": 0.6069274932958506, + "learning_rate": 4.148510089891885e-06, + "loss": 0.2904, + "step": 12496 + }, + { + "epoch": 0.5854218391343046, + "grad_norm": 0.547275274041275, + "learning_rate": 4.148367508300859e-06, + "loss": 0.2948, + "step": 12497 + }, + { + "epoch": 0.5854686841242329, + "grad_norm": 0.6565776553304653, + "learning_rate": 4.148224917223893e-06, + "loss": 0.2944, + "step": 12498 + }, + { + "epoch": 0.5855155291141613, + "grad_norm": 0.622364061817516, + "learning_rate": 4.1480823166618054e-06, + "loss": 0.2866, + "step": 12499 + }, + { + "epoch": 0.5855623741040896, + "grad_norm": 0.5807632808882444, + "learning_rate": 4.14793970661542e-06, + "loss": 0.2745, + "step": 12500 + }, + { + "epoch": 0.5856092190940179, + "grad_norm": 0.6526087311082196, + "learning_rate": 4.147797087085554e-06, + "loss": 0.2984, + "step": 12501 + }, + { + "epoch": 0.5856560640839462, + "grad_norm": 0.5784627366457461, + "learning_rate": 4.147654458073032e-06, + "loss": 0.2875, + "step": 12502 + }, + { + "epoch": 0.5857029090738746, + "grad_norm": 0.5896801720062251, + "learning_rate": 4.147511819578671e-06, + "loss": 0.2884, + "step": 12503 + }, + { + "epoch": 0.5857497540638029, + "grad_norm": 0.5693707059480692, + "learning_rate": 4.1473691716032945e-06, + "loss": 0.2649, + "step": 12504 + }, + { + "epoch": 0.5857965990537312, + "grad_norm": 0.5937291237712599, + "learning_rate": 4.147226514147722e-06, + "loss": 0.2977, + "step": 12505 + }, + { + "epoch": 0.5858434440436595, + "grad_norm": 0.6221799957846572, + "learning_rate": 4.1470838472127756e-06, + "loss": 0.3006, + "step": 12506 + }, + { + "epoch": 0.5858902890335879, + "grad_norm": 0.5732710998416141, + "learning_rate": 4.146941170799275e-06, + "loss": 0.2788, + "step": 12507 + }, + { + "epoch": 0.5859371340235162, + "grad_norm": 0.6067702166575744, + "learning_rate": 4.146798484908042e-06, + "loss": 0.2842, + "step": 12508 + }, + { + "epoch": 0.5859839790134446, + "grad_norm": 0.598549676794907, + "learning_rate": 4.146655789539898e-06, + "loss": 0.2903, + "step": 12509 + }, + { + "epoch": 0.5860308240033728, + "grad_norm": 0.5881214545314015, + "learning_rate": 4.1465130846956636e-06, + "loss": 0.2733, + "step": 12510 + }, + { + "epoch": 0.5860776689933012, + "grad_norm": 0.5948655567595635, + "learning_rate": 4.14637037037616e-06, + "loss": 0.3024, + "step": 12511 + }, + { + "epoch": 0.5861245139832295, + "grad_norm": 0.6033958561315622, + "learning_rate": 4.14622764658221e-06, + "loss": 0.2991, + "step": 12512 + }, + { + "epoch": 0.5861713589731579, + "grad_norm": 0.6590164978387074, + "learning_rate": 4.146084913314631e-06, + "loss": 0.3138, + "step": 12513 + }, + { + "epoch": 0.5862182039630861, + "grad_norm": 0.5960801855909442, + "learning_rate": 4.145942170574248e-06, + "loss": 0.2789, + "step": 12514 + }, + { + "epoch": 0.5862650489530145, + "grad_norm": 0.6100050374204097, + "learning_rate": 4.145799418361883e-06, + "loss": 0.2896, + "step": 12515 + }, + { + "epoch": 0.5863118939429428, + "grad_norm": 0.5792125405905547, + "learning_rate": 4.145656656678355e-06, + "loss": 0.269, + "step": 12516 + }, + { + "epoch": 0.5863587389328712, + "grad_norm": 0.5665936866627245, + "learning_rate": 4.145513885524487e-06, + "loss": 0.2764, + "step": 12517 + }, + { + "epoch": 0.5864055839227995, + "grad_norm": 0.5649982658250104, + "learning_rate": 4.1453711049011e-06, + "loss": 0.2843, + "step": 12518 + }, + { + "epoch": 0.5864524289127278, + "grad_norm": 0.5833434007802183, + "learning_rate": 4.145228314809015e-06, + "loss": 0.2851, + "step": 12519 + }, + { + "epoch": 0.5864992739026561, + "grad_norm": 0.6099599412061981, + "learning_rate": 4.145085515249055e-06, + "loss": 0.2814, + "step": 12520 + }, + { + "epoch": 0.5865461188925845, + "grad_norm": 0.6326661903266554, + "learning_rate": 4.1449427062220425e-06, + "loss": 0.2946, + "step": 12521 + }, + { + "epoch": 0.5865929638825128, + "grad_norm": 0.5497180591001688, + "learning_rate": 4.144799887728797e-06, + "loss": 0.2796, + "step": 12522 + }, + { + "epoch": 0.586639808872441, + "grad_norm": 0.6019399756606406, + "learning_rate": 4.1446570597701415e-06, + "loss": 0.2794, + "step": 12523 + }, + { + "epoch": 0.5866866538623694, + "grad_norm": 0.5870610734590836, + "learning_rate": 4.144514222346899e-06, + "loss": 0.3047, + "step": 12524 + }, + { + "epoch": 0.5867334988522978, + "grad_norm": 0.5920534187413525, + "learning_rate": 4.1443713754598894e-06, + "loss": 0.2856, + "step": 12525 + }, + { + "epoch": 0.5867803438422261, + "grad_norm": 0.6218749635298277, + "learning_rate": 4.144228519109936e-06, + "loss": 0.2942, + "step": 12526 + }, + { + "epoch": 0.5868271888321545, + "grad_norm": 0.6451224729752457, + "learning_rate": 4.144085653297861e-06, + "loss": 0.3235, + "step": 12527 + }, + { + "epoch": 0.5868740338220827, + "grad_norm": 0.5316013990909454, + "learning_rate": 4.143942778024487e-06, + "loss": 0.28, + "step": 12528 + }, + { + "epoch": 0.586920878812011, + "grad_norm": 0.6161301260330735, + "learning_rate": 4.143799893290634e-06, + "loss": 0.3123, + "step": 12529 + }, + { + "epoch": 0.5869677238019394, + "grad_norm": 0.5930673724380936, + "learning_rate": 4.143656999097126e-06, + "loss": 0.2816, + "step": 12530 + }, + { + "epoch": 0.5870145687918678, + "grad_norm": 0.5287236016386263, + "learning_rate": 4.1435140954447865e-06, + "loss": 0.2585, + "step": 12531 + }, + { + "epoch": 0.587061413781796, + "grad_norm": 0.5839616093052564, + "learning_rate": 4.143371182334435e-06, + "loss": 0.2888, + "step": 12532 + }, + { + "epoch": 0.5871082587717243, + "grad_norm": 0.6026228150390792, + "learning_rate": 4.143228259766896e-06, + "loss": 0.2765, + "step": 12533 + }, + { + "epoch": 0.5871551037616527, + "grad_norm": 0.6007804286074976, + "learning_rate": 4.143085327742992e-06, + "loss": 0.295, + "step": 12534 + }, + { + "epoch": 0.587201948751581, + "grad_norm": 0.6122659041223836, + "learning_rate": 4.142942386263543e-06, + "loss": 0.2917, + "step": 12535 + }, + { + "epoch": 0.5872487937415094, + "grad_norm": 0.578750455132207, + "learning_rate": 4.142799435329376e-06, + "loss": 0.2716, + "step": 12536 + }, + { + "epoch": 0.5872956387314376, + "grad_norm": 0.5309079746068756, + "learning_rate": 4.142656474941309e-06, + "loss": 0.2527, + "step": 12537 + }, + { + "epoch": 0.587342483721366, + "grad_norm": 0.574689355273957, + "learning_rate": 4.142513505100168e-06, + "loss": 0.2779, + "step": 12538 + }, + { + "epoch": 0.5873893287112943, + "grad_norm": 0.5626075746586698, + "learning_rate": 4.142370525806774e-06, + "loss": 0.2781, + "step": 12539 + }, + { + "epoch": 0.5874361737012227, + "grad_norm": 0.5861615499603062, + "learning_rate": 4.142227537061951e-06, + "loss": 0.2779, + "step": 12540 + }, + { + "epoch": 0.5874830186911509, + "grad_norm": 0.592706583252793, + "learning_rate": 4.142084538866521e-06, + "loss": 0.2619, + "step": 12541 + }, + { + "epoch": 0.5875298636810793, + "grad_norm": 0.5929871429423778, + "learning_rate": 4.141941531221308e-06, + "loss": 0.3078, + "step": 12542 + }, + { + "epoch": 0.5875767086710076, + "grad_norm": 0.5496900275594004, + "learning_rate": 4.141798514127133e-06, + "loss": 0.2749, + "step": 12543 + }, + { + "epoch": 0.587623553660936, + "grad_norm": 0.5909169053018627, + "learning_rate": 4.14165548758482e-06, + "loss": 0.2917, + "step": 12544 + }, + { + "epoch": 0.5876703986508643, + "grad_norm": 0.6134282307825507, + "learning_rate": 4.1415124515951936e-06, + "loss": 0.2876, + "step": 12545 + }, + { + "epoch": 0.5877172436407926, + "grad_norm": 0.578372276215589, + "learning_rate": 4.1413694061590745e-06, + "loss": 0.2867, + "step": 12546 + }, + { + "epoch": 0.5877640886307209, + "grad_norm": 0.5777839899041158, + "learning_rate": 4.141226351277288e-06, + "loss": 0.2838, + "step": 12547 + }, + { + "epoch": 0.5878109336206493, + "grad_norm": 0.6482298805885945, + "learning_rate": 4.141083286950655e-06, + "loss": 0.3003, + "step": 12548 + }, + { + "epoch": 0.5878577786105776, + "grad_norm": 0.5862772978550169, + "learning_rate": 4.140940213180002e-06, + "loss": 0.2698, + "step": 12549 + }, + { + "epoch": 0.5879046236005059, + "grad_norm": 0.5861003569205299, + "learning_rate": 4.140797129966149e-06, + "loss": 0.2769, + "step": 12550 + }, + { + "epoch": 0.5879514685904342, + "grad_norm": 0.6195395825233964, + "learning_rate": 4.140654037309921e-06, + "loss": 0.3086, + "step": 12551 + }, + { + "epoch": 0.5879983135803626, + "grad_norm": 0.5627919624521959, + "learning_rate": 4.140510935212142e-06, + "loss": 0.3024, + "step": 12552 + }, + { + "epoch": 0.5880451585702909, + "grad_norm": 0.6019997450129395, + "learning_rate": 4.140367823673634e-06, + "loss": 0.2627, + "step": 12553 + }, + { + "epoch": 0.5880920035602193, + "grad_norm": 0.5319566548769928, + "learning_rate": 4.140224702695222e-06, + "loss": 0.2731, + "step": 12554 + }, + { + "epoch": 0.5881388485501475, + "grad_norm": 0.5760931752695475, + "learning_rate": 4.14008157227773e-06, + "loss": 0.2914, + "step": 12555 + }, + { + "epoch": 0.5881856935400759, + "grad_norm": 0.639535874798972, + "learning_rate": 4.1399384324219796e-06, + "loss": 0.2657, + "step": 12556 + }, + { + "epoch": 0.5882325385300042, + "grad_norm": 0.632511777083672, + "learning_rate": 4.139795283128796e-06, + "loss": 0.2905, + "step": 12557 + }, + { + "epoch": 0.5882793835199326, + "grad_norm": 0.6705308131749241, + "learning_rate": 4.139652124399003e-06, + "loss": 0.2944, + "step": 12558 + }, + { + "epoch": 0.5883262285098608, + "grad_norm": 0.5941406339082971, + "learning_rate": 4.1395089562334234e-06, + "loss": 0.2945, + "step": 12559 + }, + { + "epoch": 0.5883730734997892, + "grad_norm": 0.5845926361097544, + "learning_rate": 4.1393657786328826e-06, + "loss": 0.2788, + "step": 12560 + }, + { + "epoch": 0.5884199184897175, + "grad_norm": 0.5415334218217581, + "learning_rate": 4.139222591598204e-06, + "loss": 0.2728, + "step": 12561 + }, + { + "epoch": 0.5884667634796459, + "grad_norm": 0.6481939948831918, + "learning_rate": 4.13907939513021e-06, + "loss": 0.2777, + "step": 12562 + }, + { + "epoch": 0.5885136084695742, + "grad_norm": 0.5704607598400219, + "learning_rate": 4.138936189229727e-06, + "loss": 0.2716, + "step": 12563 + }, + { + "epoch": 0.5885604534595025, + "grad_norm": 0.6719425597036748, + "learning_rate": 4.138792973897579e-06, + "loss": 0.288, + "step": 12564 + }, + { + "epoch": 0.5886072984494308, + "grad_norm": 0.5866467063299365, + "learning_rate": 4.138649749134588e-06, + "loss": 0.272, + "step": 12565 + }, + { + "epoch": 0.5886541434393592, + "grad_norm": 0.5999040843455448, + "learning_rate": 4.1385065149415804e-06, + "loss": 0.2751, + "step": 12566 + }, + { + "epoch": 0.5887009884292875, + "grad_norm": 0.562990005681485, + "learning_rate": 4.138363271319379e-06, + "loss": 0.2927, + "step": 12567 + }, + { + "epoch": 0.5887478334192158, + "grad_norm": 0.5732232814906831, + "learning_rate": 4.1382200182688085e-06, + "loss": 0.2699, + "step": 12568 + }, + { + "epoch": 0.5887946784091441, + "grad_norm": 0.6182729975813869, + "learning_rate": 4.138076755790694e-06, + "loss": 0.2971, + "step": 12569 + }, + { + "epoch": 0.5888415233990725, + "grad_norm": 0.5492076497198268, + "learning_rate": 4.1379334838858595e-06, + "loss": 0.2646, + "step": 12570 + }, + { + "epoch": 0.5888883683890008, + "grad_norm": 0.66745307962436, + "learning_rate": 4.1377902025551296e-06, + "loss": 0.2922, + "step": 12571 + }, + { + "epoch": 0.5889352133789292, + "grad_norm": 0.6096680316727818, + "learning_rate": 4.137646911799329e-06, + "loss": 0.2909, + "step": 12572 + }, + { + "epoch": 0.5889820583688574, + "grad_norm": 0.5537287709241577, + "learning_rate": 4.137503611619281e-06, + "loss": 0.2705, + "step": 12573 + }, + { + "epoch": 0.5890289033587858, + "grad_norm": 0.5492859943523078, + "learning_rate": 4.137360302015813e-06, + "loss": 0.2801, + "step": 12574 + }, + { + "epoch": 0.5890757483487141, + "grad_norm": 0.5735420700767094, + "learning_rate": 4.137216982989746e-06, + "loss": 0.2883, + "step": 12575 + }, + { + "epoch": 0.5891225933386425, + "grad_norm": 0.5867226282100378, + "learning_rate": 4.137073654541908e-06, + "loss": 0.2758, + "step": 12576 + }, + { + "epoch": 0.5891694383285707, + "grad_norm": 0.6039663187273412, + "learning_rate": 4.136930316673122e-06, + "loss": 0.2857, + "step": 12577 + }, + { + "epoch": 0.5892162833184991, + "grad_norm": 0.637553958590401, + "learning_rate": 4.136786969384214e-06, + "loss": 0.2861, + "step": 12578 + }, + { + "epoch": 0.5892631283084274, + "grad_norm": 0.5908089867392979, + "learning_rate": 4.136643612676008e-06, + "loss": 0.2876, + "step": 12579 + }, + { + "epoch": 0.5893099732983558, + "grad_norm": 0.6202904255906175, + "learning_rate": 4.13650024654933e-06, + "loss": 0.2898, + "step": 12580 + }, + { + "epoch": 0.5893568182882841, + "grad_norm": 0.5618362858247677, + "learning_rate": 4.136356871005003e-06, + "loss": 0.2869, + "step": 12581 + }, + { + "epoch": 0.5894036632782124, + "grad_norm": 0.6103578122252806, + "learning_rate": 4.1362134860438554e-06, + "loss": 0.28, + "step": 12582 + }, + { + "epoch": 0.5894505082681407, + "grad_norm": 0.6189457382538965, + "learning_rate": 4.136070091666708e-06, + "loss": 0.2914, + "step": 12583 + }, + { + "epoch": 0.5894973532580691, + "grad_norm": 0.5840623398067141, + "learning_rate": 4.135926687874391e-06, + "loss": 0.2929, + "step": 12584 + }, + { + "epoch": 0.5895441982479974, + "grad_norm": 0.5818953420633541, + "learning_rate": 4.135783274667726e-06, + "loss": 0.2857, + "step": 12585 + }, + { + "epoch": 0.5895910432379257, + "grad_norm": 0.5500320242928995, + "learning_rate": 4.135639852047539e-06, + "loss": 0.2808, + "step": 12586 + }, + { + "epoch": 0.589637888227854, + "grad_norm": 0.6375067045993942, + "learning_rate": 4.135496420014656e-06, + "loss": 0.2992, + "step": 12587 + }, + { + "epoch": 0.5896847332177824, + "grad_norm": 0.6207116325015802, + "learning_rate": 4.135352978569902e-06, + "loss": 0.2875, + "step": 12588 + }, + { + "epoch": 0.5897315782077107, + "grad_norm": 0.5504298161732917, + "learning_rate": 4.135209527714103e-06, + "loss": 0.2676, + "step": 12589 + }, + { + "epoch": 0.5897784231976391, + "grad_norm": 0.6551970530979965, + "learning_rate": 4.135066067448083e-06, + "loss": 0.3048, + "step": 12590 + }, + { + "epoch": 0.5898252681875673, + "grad_norm": 0.6235847119503912, + "learning_rate": 4.134922597772671e-06, + "loss": 0.2843, + "step": 12591 + }, + { + "epoch": 0.5898721131774957, + "grad_norm": 0.5747362208883464, + "learning_rate": 4.134779118688689e-06, + "loss": 0.2792, + "step": 12592 + }, + { + "epoch": 0.589918958167424, + "grad_norm": 0.6014469233756595, + "learning_rate": 4.134635630196964e-06, + "loss": 0.2733, + "step": 12593 + }, + { + "epoch": 0.5899658031573524, + "grad_norm": 0.5914727373160398, + "learning_rate": 4.134492132298322e-06, + "loss": 0.2833, + "step": 12594 + }, + { + "epoch": 0.5900126481472806, + "grad_norm": 0.5502791604714518, + "learning_rate": 4.134348624993589e-06, + "loss": 0.282, + "step": 12595 + }, + { + "epoch": 0.590059493137209, + "grad_norm": 0.6488728485126377, + "learning_rate": 4.13420510828359e-06, + "loss": 0.3163, + "step": 12596 + }, + { + "epoch": 0.5901063381271373, + "grad_norm": 0.6076221775748148, + "learning_rate": 4.134061582169151e-06, + "loss": 0.2812, + "step": 12597 + }, + { + "epoch": 0.5901531831170657, + "grad_norm": 0.5986792327391065, + "learning_rate": 4.133918046651099e-06, + "loss": 0.274, + "step": 12598 + }, + { + "epoch": 0.590200028106994, + "grad_norm": 0.6341403928962324, + "learning_rate": 4.133774501730259e-06, + "loss": 0.305, + "step": 12599 + }, + { + "epoch": 0.5902468730969223, + "grad_norm": 0.6351833824275993, + "learning_rate": 4.133630947407458e-06, + "loss": 0.3033, + "step": 12600 + }, + { + "epoch": 0.5902937180868506, + "grad_norm": 0.6226196640071587, + "learning_rate": 4.13348738368352e-06, + "loss": 0.2829, + "step": 12601 + }, + { + "epoch": 0.590340563076779, + "grad_norm": 0.6223097836660537, + "learning_rate": 4.133343810559274e-06, + "loss": 0.2837, + "step": 12602 + }, + { + "epoch": 0.5903874080667073, + "grad_norm": 0.6000775391620109, + "learning_rate": 4.133200228035544e-06, + "loss": 0.2826, + "step": 12603 + }, + { + "epoch": 0.5904342530566355, + "grad_norm": 0.581129097962562, + "learning_rate": 4.133056636113158e-06, + "loss": 0.2912, + "step": 12604 + }, + { + "epoch": 0.5904810980465639, + "grad_norm": 0.5529604444562399, + "learning_rate": 4.132913034792941e-06, + "loss": 0.2678, + "step": 12605 + }, + { + "epoch": 0.5905279430364923, + "grad_norm": 0.6608487896863867, + "learning_rate": 4.13276942407572e-06, + "loss": 0.3101, + "step": 12606 + }, + { + "epoch": 0.5905747880264206, + "grad_norm": 0.5941366307930259, + "learning_rate": 4.1326258039623215e-06, + "loss": 0.2937, + "step": 12607 + }, + { + "epoch": 0.590621633016349, + "grad_norm": 0.5540906371997696, + "learning_rate": 4.1324821744535715e-06, + "loss": 0.2896, + "step": 12608 + }, + { + "epoch": 0.5906684780062772, + "grad_norm": 0.6056449404978738, + "learning_rate": 4.132338535550297e-06, + "loss": 0.2737, + "step": 12609 + }, + { + "epoch": 0.5907153229962055, + "grad_norm": 0.6081693919751877, + "learning_rate": 4.1321948872533245e-06, + "loss": 0.2793, + "step": 12610 + }, + { + "epoch": 0.5907621679861339, + "grad_norm": 0.6005980797041617, + "learning_rate": 4.132051229563481e-06, + "loss": 0.2869, + "step": 12611 + }, + { + "epoch": 0.5908090129760623, + "grad_norm": 0.6010144151582693, + "learning_rate": 4.131907562481593e-06, + "loss": 0.2641, + "step": 12612 + }, + { + "epoch": 0.5908558579659905, + "grad_norm": 0.5750647122140353, + "learning_rate": 4.131763886008486e-06, + "loss": 0.2605, + "step": 12613 + }, + { + "epoch": 0.5909027029559188, + "grad_norm": 0.5233792310416541, + "learning_rate": 4.131620200144989e-06, + "loss": 0.2567, + "step": 12614 + }, + { + "epoch": 0.5909495479458472, + "grad_norm": 0.6511834413471361, + "learning_rate": 4.131476504891928e-06, + "loss": 0.3034, + "step": 12615 + }, + { + "epoch": 0.5909963929357755, + "grad_norm": 0.6466668212136275, + "learning_rate": 4.131332800250129e-06, + "loss": 0.2711, + "step": 12616 + }, + { + "epoch": 0.5910432379257038, + "grad_norm": 0.5876367354921634, + "learning_rate": 4.13118908622042e-06, + "loss": 0.2767, + "step": 12617 + }, + { + "epoch": 0.5910900829156321, + "grad_norm": 0.6081976619790364, + "learning_rate": 4.131045362803628e-06, + "loss": 0.2738, + "step": 12618 + }, + { + "epoch": 0.5911369279055605, + "grad_norm": 0.618511352585323, + "learning_rate": 4.13090163000058e-06, + "loss": 0.2786, + "step": 12619 + }, + { + "epoch": 0.5911837728954888, + "grad_norm": 0.5757944586337282, + "learning_rate": 4.130757887812103e-06, + "loss": 0.2813, + "step": 12620 + }, + { + "epoch": 0.5912306178854172, + "grad_norm": 0.5897103801080115, + "learning_rate": 4.130614136239024e-06, + "loss": 0.2712, + "step": 12621 + }, + { + "epoch": 0.5912774628753454, + "grad_norm": 0.5963799983711088, + "learning_rate": 4.130470375282171e-06, + "loss": 0.3247, + "step": 12622 + }, + { + "epoch": 0.5913243078652738, + "grad_norm": 0.6186856442939076, + "learning_rate": 4.1303266049423695e-06, + "loss": 0.2823, + "step": 12623 + }, + { + "epoch": 0.5913711528552021, + "grad_norm": 0.6139593872413239, + "learning_rate": 4.130182825220449e-06, + "loss": 0.296, + "step": 12624 + }, + { + "epoch": 0.5914179978451305, + "grad_norm": 0.6570770814127295, + "learning_rate": 4.130039036117236e-06, + "loss": 0.2938, + "step": 12625 + }, + { + "epoch": 0.5914648428350587, + "grad_norm": 0.5637151751462626, + "learning_rate": 4.129895237633558e-06, + "loss": 0.2705, + "step": 12626 + }, + { + "epoch": 0.5915116878249871, + "grad_norm": 0.6364788177595793, + "learning_rate": 4.129751429770243e-06, + "loss": 0.3092, + "step": 12627 + }, + { + "epoch": 0.5915585328149154, + "grad_norm": 0.6288464962407742, + "learning_rate": 4.129607612528118e-06, + "loss": 0.2946, + "step": 12628 + }, + { + "epoch": 0.5916053778048438, + "grad_norm": 0.575706125378947, + "learning_rate": 4.12946378590801e-06, + "loss": 0.2925, + "step": 12629 + }, + { + "epoch": 0.5916522227947721, + "grad_norm": 0.6158645362201753, + "learning_rate": 4.129319949910748e-06, + "loss": 0.3034, + "step": 12630 + }, + { + "epoch": 0.5916990677847004, + "grad_norm": 0.6209636027568872, + "learning_rate": 4.129176104537159e-06, + "loss": 0.2977, + "step": 12631 + }, + { + "epoch": 0.5917459127746287, + "grad_norm": 0.5859314475542413, + "learning_rate": 4.129032249788072e-06, + "loss": 0.2745, + "step": 12632 + }, + { + "epoch": 0.5917927577645571, + "grad_norm": 0.6356162208820919, + "learning_rate": 4.128888385664314e-06, + "loss": 0.3147, + "step": 12633 + }, + { + "epoch": 0.5918396027544854, + "grad_norm": 0.5880380721181002, + "learning_rate": 4.128744512166711e-06, + "loss": 0.2796, + "step": 12634 + }, + { + "epoch": 0.5918864477444137, + "grad_norm": 0.5555628227750726, + "learning_rate": 4.128600629296093e-06, + "loss": 0.2889, + "step": 12635 + }, + { + "epoch": 0.591933292734342, + "grad_norm": 0.5780170265230037, + "learning_rate": 4.128456737053289e-06, + "loss": 0.2699, + "step": 12636 + }, + { + "epoch": 0.5919801377242704, + "grad_norm": 0.609689744098702, + "learning_rate": 4.128312835439125e-06, + "loss": 0.299, + "step": 12637 + }, + { + "epoch": 0.5920269827141987, + "grad_norm": 0.5998522699432405, + "learning_rate": 4.12816892445443e-06, + "loss": 0.2742, + "step": 12638 + }, + { + "epoch": 0.5920738277041271, + "grad_norm": 0.5902176529355144, + "learning_rate": 4.128025004100031e-06, + "loss": 0.2814, + "step": 12639 + }, + { + "epoch": 0.5921206726940553, + "grad_norm": 0.5524974966222097, + "learning_rate": 4.127881074376759e-06, + "loss": 0.262, + "step": 12640 + }, + { + "epoch": 0.5921675176839837, + "grad_norm": 0.5857100126663686, + "learning_rate": 4.127737135285439e-06, + "loss": 0.2862, + "step": 12641 + }, + { + "epoch": 0.592214362673912, + "grad_norm": 0.6087308448733529, + "learning_rate": 4.127593186826903e-06, + "loss": 0.2853, + "step": 12642 + }, + { + "epoch": 0.5922612076638404, + "grad_norm": 0.5683385587467263, + "learning_rate": 4.1274492290019755e-06, + "loss": 0.2738, + "step": 12643 + }, + { + "epoch": 0.5923080526537686, + "grad_norm": 0.5273974976569524, + "learning_rate": 4.127305261811487e-06, + "loss": 0.2481, + "step": 12644 + }, + { + "epoch": 0.592354897643697, + "grad_norm": 0.5886808359613276, + "learning_rate": 4.127161285256266e-06, + "loss": 0.305, + "step": 12645 + }, + { + "epoch": 0.5924017426336253, + "grad_norm": 0.6118853418587391, + "learning_rate": 4.127017299337141e-06, + "loss": 0.27, + "step": 12646 + }, + { + "epoch": 0.5924485876235537, + "grad_norm": 0.5729037576540367, + "learning_rate": 4.12687330405494e-06, + "loss": 0.2734, + "step": 12647 + }, + { + "epoch": 0.592495432613482, + "grad_norm": 0.6142241120809372, + "learning_rate": 4.126729299410492e-06, + "loss": 0.2786, + "step": 12648 + }, + { + "epoch": 0.5925422776034103, + "grad_norm": 0.6207972890191258, + "learning_rate": 4.126585285404626e-06, + "loss": 0.3037, + "step": 12649 + }, + { + "epoch": 0.5925891225933386, + "grad_norm": 0.5926317776561053, + "learning_rate": 4.1264412620381715e-06, + "loss": 0.2954, + "step": 12650 + }, + { + "epoch": 0.592635967583267, + "grad_norm": 0.6141418367237248, + "learning_rate": 4.126297229311954e-06, + "loss": 0.3013, + "step": 12651 + }, + { + "epoch": 0.5926828125731953, + "grad_norm": 0.5804345482247845, + "learning_rate": 4.126153187226807e-06, + "loss": 0.2627, + "step": 12652 + }, + { + "epoch": 0.5927296575631236, + "grad_norm": 0.6105047746726925, + "learning_rate": 4.126009135783555e-06, + "loss": 0.2793, + "step": 12653 + }, + { + "epoch": 0.5927765025530519, + "grad_norm": 0.656745159832816, + "learning_rate": 4.125865074983031e-06, + "loss": 0.3187, + "step": 12654 + }, + { + "epoch": 0.5928233475429803, + "grad_norm": 0.6330577139969803, + "learning_rate": 4.125721004826061e-06, + "loss": 0.3013, + "step": 12655 + }, + { + "epoch": 0.5928701925329086, + "grad_norm": 0.6340964263479555, + "learning_rate": 4.125576925313476e-06, + "loss": 0.335, + "step": 12656 + }, + { + "epoch": 0.592917037522837, + "grad_norm": 0.550311118771174, + "learning_rate": 4.125432836446104e-06, + "loss": 0.2797, + "step": 12657 + }, + { + "epoch": 0.5929638825127652, + "grad_norm": 0.620480607586871, + "learning_rate": 4.125288738224774e-06, + "loss": 0.2926, + "step": 12658 + }, + { + "epoch": 0.5930107275026936, + "grad_norm": 0.6491402015929432, + "learning_rate": 4.125144630650316e-06, + "loss": 0.2762, + "step": 12659 + }, + { + "epoch": 0.5930575724926219, + "grad_norm": 0.6494012615239878, + "learning_rate": 4.125000513723559e-06, + "loss": 0.3046, + "step": 12660 + }, + { + "epoch": 0.5931044174825503, + "grad_norm": 0.6464838308229025, + "learning_rate": 4.124856387445334e-06, + "loss": 0.301, + "step": 12661 + }, + { + "epoch": 0.5931512624724785, + "grad_norm": 0.6385345598965084, + "learning_rate": 4.124712251816467e-06, + "loss": 0.2792, + "step": 12662 + }, + { + "epoch": 0.5931981074624069, + "grad_norm": 0.6220996187326707, + "learning_rate": 4.1245681068377905e-06, + "loss": 0.2917, + "step": 12663 + }, + { + "epoch": 0.5932449524523352, + "grad_norm": 0.6079644545592001, + "learning_rate": 4.124423952510133e-06, + "loss": 0.3086, + "step": 12664 + }, + { + "epoch": 0.5932917974422636, + "grad_norm": 0.6716789320689165, + "learning_rate": 4.124279788834324e-06, + "loss": 0.3165, + "step": 12665 + }, + { + "epoch": 0.5933386424321919, + "grad_norm": 0.6328415039456582, + "learning_rate": 4.124135615811191e-06, + "loss": 0.2872, + "step": 12666 + }, + { + "epoch": 0.5933854874221202, + "grad_norm": 0.5484947883896242, + "learning_rate": 4.123991433441568e-06, + "loss": 0.2543, + "step": 12667 + }, + { + "epoch": 0.5934323324120485, + "grad_norm": 0.5926072877170673, + "learning_rate": 4.123847241726282e-06, + "loss": 0.2941, + "step": 12668 + }, + { + "epoch": 0.5934791774019769, + "grad_norm": 0.6291541817808763, + "learning_rate": 4.123703040666163e-06, + "loss": 0.2885, + "step": 12669 + }, + { + "epoch": 0.5935260223919052, + "grad_norm": 0.5915104859180074, + "learning_rate": 4.123558830262041e-06, + "loss": 0.2969, + "step": 12670 + }, + { + "epoch": 0.5935728673818335, + "grad_norm": 0.580380099808745, + "learning_rate": 4.123414610514746e-06, + "loss": 0.2837, + "step": 12671 + }, + { + "epoch": 0.5936197123717618, + "grad_norm": 0.5523173190522106, + "learning_rate": 4.123270381425107e-06, + "loss": 0.2777, + "step": 12672 + }, + { + "epoch": 0.5936665573616902, + "grad_norm": 0.5465509774918538, + "learning_rate": 4.1231261429939565e-06, + "loss": 0.2665, + "step": 12673 + }, + { + "epoch": 0.5937134023516185, + "grad_norm": 0.5751544759572175, + "learning_rate": 4.122981895222122e-06, + "loss": 0.276, + "step": 12674 + }, + { + "epoch": 0.5937602473415469, + "grad_norm": 0.563746450424061, + "learning_rate": 4.122837638110435e-06, + "loss": 0.2671, + "step": 12675 + }, + { + "epoch": 0.5938070923314751, + "grad_norm": 0.5598695802466229, + "learning_rate": 4.122693371659726e-06, + "loss": 0.3041, + "step": 12676 + }, + { + "epoch": 0.5938539373214035, + "grad_norm": 0.5769161745767598, + "learning_rate": 4.122549095870823e-06, + "loss": 0.2827, + "step": 12677 + }, + { + "epoch": 0.5939007823113318, + "grad_norm": 0.6013686949298015, + "learning_rate": 4.12240481074456e-06, + "loss": 0.2849, + "step": 12678 + }, + { + "epoch": 0.5939476273012602, + "grad_norm": 0.5451976442565828, + "learning_rate": 4.122260516281763e-06, + "loss": 0.2583, + "step": 12679 + }, + { + "epoch": 0.5939944722911884, + "grad_norm": 0.555422006570731, + "learning_rate": 4.122116212483266e-06, + "loss": 0.2543, + "step": 12680 + }, + { + "epoch": 0.5940413172811168, + "grad_norm": 0.6414892531342027, + "learning_rate": 4.121971899349896e-06, + "loss": 0.3173, + "step": 12681 + }, + { + "epoch": 0.5940881622710451, + "grad_norm": 0.6291400739408926, + "learning_rate": 4.121827576882487e-06, + "loss": 0.2973, + "step": 12682 + }, + { + "epoch": 0.5941350072609735, + "grad_norm": 0.6066041965578455, + "learning_rate": 4.121683245081867e-06, + "loss": 0.2664, + "step": 12683 + }, + { + "epoch": 0.5941818522509018, + "grad_norm": 0.6001642883096857, + "learning_rate": 4.121538903948869e-06, + "loss": 0.2917, + "step": 12684 + }, + { + "epoch": 0.59422869724083, + "grad_norm": 0.6138938058186543, + "learning_rate": 4.12139455348432e-06, + "loss": 0.2882, + "step": 12685 + }, + { + "epoch": 0.5942755422307584, + "grad_norm": 0.5768451367154478, + "learning_rate": 4.121250193689054e-06, + "loss": 0.2895, + "step": 12686 + }, + { + "epoch": 0.5943223872206868, + "grad_norm": 0.5954406579254196, + "learning_rate": 4.121105824563901e-06, + "loss": 0.308, + "step": 12687 + }, + { + "epoch": 0.5943692322106151, + "grad_norm": 0.5838920832527832, + "learning_rate": 4.120961446109692e-06, + "loss": 0.299, + "step": 12688 + }, + { + "epoch": 0.5944160772005433, + "grad_norm": 0.5583123392302242, + "learning_rate": 4.120817058327256e-06, + "loss": 0.258, + "step": 12689 + }, + { + "epoch": 0.5944629221904717, + "grad_norm": 0.6376408772527324, + "learning_rate": 4.120672661217424e-06, + "loss": 0.3152, + "step": 12690 + }, + { + "epoch": 0.5945097671804, + "grad_norm": 0.6739070099960806, + "learning_rate": 4.1205282547810304e-06, + "loss": 0.2799, + "step": 12691 + }, + { + "epoch": 0.5945566121703284, + "grad_norm": 0.6036158703842773, + "learning_rate": 4.120383839018903e-06, + "loss": 0.2788, + "step": 12692 + }, + { + "epoch": 0.5946034571602568, + "grad_norm": 0.6073899056039118, + "learning_rate": 4.120239413931875e-06, + "loss": 0.2806, + "step": 12693 + }, + { + "epoch": 0.594650302150185, + "grad_norm": 0.5781356631274512, + "learning_rate": 4.120094979520775e-06, + "loss": 0.3071, + "step": 12694 + }, + { + "epoch": 0.5946971471401133, + "grad_norm": 0.5287162589764598, + "learning_rate": 4.119950535786436e-06, + "loss": 0.2649, + "step": 12695 + }, + { + "epoch": 0.5947439921300417, + "grad_norm": 0.560234149203671, + "learning_rate": 4.119806082729689e-06, + "loss": 0.2702, + "step": 12696 + }, + { + "epoch": 0.59479083711997, + "grad_norm": 0.6452314723303211, + "learning_rate": 4.119661620351365e-06, + "loss": 0.3044, + "step": 12697 + }, + { + "epoch": 0.5948376821098983, + "grad_norm": 0.6467620174339536, + "learning_rate": 4.119517148652296e-06, + "loss": 0.2958, + "step": 12698 + }, + { + "epoch": 0.5948845270998266, + "grad_norm": 0.6334632422055094, + "learning_rate": 4.119372667633312e-06, + "loss": 0.303, + "step": 12699 + }, + { + "epoch": 0.594931372089755, + "grad_norm": 0.6210346077624657, + "learning_rate": 4.1192281772952455e-06, + "loss": 0.2952, + "step": 12700 + }, + { + "epoch": 0.5949782170796833, + "grad_norm": 0.5677261156622364, + "learning_rate": 4.119083677638929e-06, + "loss": 0.2827, + "step": 12701 + }, + { + "epoch": 0.5950250620696117, + "grad_norm": 0.5433538930502809, + "learning_rate": 4.118939168665191e-06, + "loss": 0.2747, + "step": 12702 + }, + { + "epoch": 0.5950719070595399, + "grad_norm": 0.5764840809988517, + "learning_rate": 4.1187946503748664e-06, + "loss": 0.2971, + "step": 12703 + }, + { + "epoch": 0.5951187520494683, + "grad_norm": 0.5861232060377319, + "learning_rate": 4.118650122768785e-06, + "loss": 0.2725, + "step": 12704 + }, + { + "epoch": 0.5951655970393966, + "grad_norm": 0.6199686326130563, + "learning_rate": 4.118505585847778e-06, + "loss": 0.3178, + "step": 12705 + }, + { + "epoch": 0.595212442029325, + "grad_norm": 0.5524273207458773, + "learning_rate": 4.118361039612679e-06, + "loss": 0.2717, + "step": 12706 + }, + { + "epoch": 0.5952592870192532, + "grad_norm": 0.5327480389336641, + "learning_rate": 4.11821648406432e-06, + "loss": 0.2693, + "step": 12707 + }, + { + "epoch": 0.5953061320091816, + "grad_norm": 0.5999603274880099, + "learning_rate": 4.118071919203531e-06, + "loss": 0.2886, + "step": 12708 + }, + { + "epoch": 0.5953529769991099, + "grad_norm": 0.6045292806415565, + "learning_rate": 4.117927345031144e-06, + "loss": 0.2952, + "step": 12709 + }, + { + "epoch": 0.5953998219890383, + "grad_norm": 0.5945725607450417, + "learning_rate": 4.1177827615479924e-06, + "loss": 0.2976, + "step": 12710 + }, + { + "epoch": 0.5954466669789666, + "grad_norm": 0.5842732839234936, + "learning_rate": 4.1176381687549085e-06, + "loss": 0.2883, + "step": 12711 + }, + { + "epoch": 0.5954935119688949, + "grad_norm": 0.5905518705613915, + "learning_rate": 4.1174935666527224e-06, + "loss": 0.2754, + "step": 12712 + }, + { + "epoch": 0.5955403569588232, + "grad_norm": 0.5523857211268335, + "learning_rate": 4.117348955242268e-06, + "loss": 0.2787, + "step": 12713 + }, + { + "epoch": 0.5955872019487516, + "grad_norm": 0.5652760890841373, + "learning_rate": 4.117204334524376e-06, + "loss": 0.2716, + "step": 12714 + }, + { + "epoch": 0.5956340469386799, + "grad_norm": 0.6048221210681779, + "learning_rate": 4.11705970449988e-06, + "loss": 0.2944, + "step": 12715 + }, + { + "epoch": 0.5956808919286082, + "grad_norm": 0.6427065666240657, + "learning_rate": 4.116915065169612e-06, + "loss": 0.2999, + "step": 12716 + }, + { + "epoch": 0.5957277369185365, + "grad_norm": 0.5713216265252554, + "learning_rate": 4.1167704165344045e-06, + "loss": 0.3017, + "step": 12717 + }, + { + "epoch": 0.5957745819084649, + "grad_norm": 0.5858582878506062, + "learning_rate": 4.116625758595088e-06, + "loss": 0.2698, + "step": 12718 + }, + { + "epoch": 0.5958214268983932, + "grad_norm": 0.5651647480585338, + "learning_rate": 4.116481091352499e-06, + "loss": 0.2959, + "step": 12719 + }, + { + "epoch": 0.5958682718883216, + "grad_norm": 0.5665587256965287, + "learning_rate": 4.116336414807466e-06, + "loss": 0.2891, + "step": 12720 + }, + { + "epoch": 0.5959151168782498, + "grad_norm": 0.5673907078767373, + "learning_rate": 4.1161917289608235e-06, + "loss": 0.2777, + "step": 12721 + }, + { + "epoch": 0.5959619618681782, + "grad_norm": 0.630668375939894, + "learning_rate": 4.116047033813405e-06, + "loss": 0.2871, + "step": 12722 + }, + { + "epoch": 0.5960088068581065, + "grad_norm": 0.6000535132905467, + "learning_rate": 4.11590232936604e-06, + "loss": 0.3162, + "step": 12723 + }, + { + "epoch": 0.5960556518480349, + "grad_norm": 0.5678888630039708, + "learning_rate": 4.1157576156195645e-06, + "loss": 0.261, + "step": 12724 + }, + { + "epoch": 0.5961024968379631, + "grad_norm": 0.6243923071089302, + "learning_rate": 4.11561289257481e-06, + "loss": 0.292, + "step": 12725 + }, + { + "epoch": 0.5961493418278915, + "grad_norm": 0.5630312783133065, + "learning_rate": 4.115468160232608e-06, + "loss": 0.267, + "step": 12726 + }, + { + "epoch": 0.5961961868178198, + "grad_norm": 0.5950046413995218, + "learning_rate": 4.115323418593794e-06, + "loss": 0.2945, + "step": 12727 + }, + { + "epoch": 0.5962430318077482, + "grad_norm": 0.573742009586318, + "learning_rate": 4.1151786676592e-06, + "loss": 0.2986, + "step": 12728 + }, + { + "epoch": 0.5962898767976765, + "grad_norm": 0.6849819206150719, + "learning_rate": 4.115033907429658e-06, + "loss": 0.3297, + "step": 12729 + }, + { + "epoch": 0.5963367217876048, + "grad_norm": 0.6028869237285194, + "learning_rate": 4.114889137906002e-06, + "loss": 0.2644, + "step": 12730 + }, + { + "epoch": 0.5963835667775331, + "grad_norm": 0.6177422415208479, + "learning_rate": 4.114744359089066e-06, + "loss": 0.2646, + "step": 12731 + }, + { + "epoch": 0.5964304117674615, + "grad_norm": 0.6340526156806371, + "learning_rate": 4.11459957097968e-06, + "loss": 0.2982, + "step": 12732 + }, + { + "epoch": 0.5964772567573898, + "grad_norm": 0.5890213854049106, + "learning_rate": 4.11445477357868e-06, + "loss": 0.3041, + "step": 12733 + }, + { + "epoch": 0.5965241017473181, + "grad_norm": 0.6213955397816205, + "learning_rate": 4.114309966886899e-06, + "loss": 0.2657, + "step": 12734 + }, + { + "epoch": 0.5965709467372464, + "grad_norm": 0.5875563070867484, + "learning_rate": 4.11416515090517e-06, + "loss": 0.278, + "step": 12735 + }, + { + "epoch": 0.5966177917271748, + "grad_norm": 0.6176299101023848, + "learning_rate": 4.114020325634326e-06, + "loss": 0.3032, + "step": 12736 + }, + { + "epoch": 0.5966646367171031, + "grad_norm": 0.6055928918503325, + "learning_rate": 4.113875491075201e-06, + "loss": 0.2973, + "step": 12737 + }, + { + "epoch": 0.5967114817070315, + "grad_norm": 0.6515127141419388, + "learning_rate": 4.113730647228628e-06, + "loss": 0.2729, + "step": 12738 + }, + { + "epoch": 0.5967583266969597, + "grad_norm": 0.6334022327490897, + "learning_rate": 4.11358579409544e-06, + "loss": 0.2976, + "step": 12739 + }, + { + "epoch": 0.5968051716868881, + "grad_norm": 0.5885656729138485, + "learning_rate": 4.1134409316764725e-06, + "loss": 0.2833, + "step": 12740 + }, + { + "epoch": 0.5968520166768164, + "grad_norm": 0.6770383030512379, + "learning_rate": 4.113296059972557e-06, + "loss": 0.29, + "step": 12741 + }, + { + "epoch": 0.5968988616667448, + "grad_norm": 0.659640113890844, + "learning_rate": 4.113151178984528e-06, + "loss": 0.2791, + "step": 12742 + }, + { + "epoch": 0.596945706656673, + "grad_norm": 0.5947950824147332, + "learning_rate": 4.113006288713221e-06, + "loss": 0.3084, + "step": 12743 + }, + { + "epoch": 0.5969925516466014, + "grad_norm": 0.635649767747002, + "learning_rate": 4.112861389159466e-06, + "loss": 0.265, + "step": 12744 + }, + { + "epoch": 0.5970393966365297, + "grad_norm": 0.6136502390633156, + "learning_rate": 4.1127164803241e-06, + "loss": 0.2912, + "step": 12745 + }, + { + "epoch": 0.5970862416264581, + "grad_norm": 0.6359711350712508, + "learning_rate": 4.112571562207958e-06, + "loss": 0.2758, + "step": 12746 + }, + { + "epoch": 0.5971330866163864, + "grad_norm": 0.569408525762244, + "learning_rate": 4.112426634811869e-06, + "loss": 0.2762, + "step": 12747 + }, + { + "epoch": 0.5971799316063147, + "grad_norm": 0.6025239007425245, + "learning_rate": 4.112281698136671e-06, + "loss": 0.2945, + "step": 12748 + }, + { + "epoch": 0.597226776596243, + "grad_norm": 0.6104603673675323, + "learning_rate": 4.1121367521831974e-06, + "loss": 0.2943, + "step": 12749 + }, + { + "epoch": 0.5972736215861714, + "grad_norm": 0.6751374812063743, + "learning_rate": 4.111991796952283e-06, + "loss": 0.3222, + "step": 12750 + }, + { + "epoch": 0.5973204665760997, + "grad_norm": 0.5694904276789262, + "learning_rate": 4.111846832444759e-06, + "loss": 0.277, + "step": 12751 + }, + { + "epoch": 0.597367311566028, + "grad_norm": 0.5981364859881382, + "learning_rate": 4.111701858661463e-06, + "loss": 0.278, + "step": 12752 + }, + { + "epoch": 0.5974141565559563, + "grad_norm": 0.6072597634039185, + "learning_rate": 4.1115568756032265e-06, + "loss": 0.2919, + "step": 12753 + }, + { + "epoch": 0.5974610015458847, + "grad_norm": 0.6157122866729089, + "learning_rate": 4.111411883270886e-06, + "loss": 0.2881, + "step": 12754 + }, + { + "epoch": 0.597507846535813, + "grad_norm": 0.560663968015161, + "learning_rate": 4.111266881665275e-06, + "loss": 0.2699, + "step": 12755 + }, + { + "epoch": 0.5975546915257414, + "grad_norm": 0.5583113799468631, + "learning_rate": 4.111121870787229e-06, + "loss": 0.2796, + "step": 12756 + }, + { + "epoch": 0.5976015365156696, + "grad_norm": 0.5566496451055276, + "learning_rate": 4.11097685063758e-06, + "loss": 0.2773, + "step": 12757 + }, + { + "epoch": 0.597648381505598, + "grad_norm": 0.6258761075137608, + "learning_rate": 4.1108318212171655e-06, + "loss": 0.2832, + "step": 12758 + }, + { + "epoch": 0.5976952264955263, + "grad_norm": 0.6091001879555319, + "learning_rate": 4.110686782526818e-06, + "loss": 0.2548, + "step": 12759 + }, + { + "epoch": 0.5977420714854547, + "grad_norm": 0.6481099674062016, + "learning_rate": 4.110541734567373e-06, + "loss": 0.2897, + "step": 12760 + }, + { + "epoch": 0.5977889164753829, + "grad_norm": 0.6098545919342991, + "learning_rate": 4.1103966773396655e-06, + "loss": 0.3075, + "step": 12761 + }, + { + "epoch": 0.5978357614653113, + "grad_norm": 0.6167687549697934, + "learning_rate": 4.11025161084453e-06, + "loss": 0.2882, + "step": 12762 + }, + { + "epoch": 0.5978826064552396, + "grad_norm": 0.5954806112917701, + "learning_rate": 4.1101065350828e-06, + "loss": 0.2656, + "step": 12763 + }, + { + "epoch": 0.597929451445168, + "grad_norm": 0.5835847682890996, + "learning_rate": 4.109961450055312e-06, + "loss": 0.253, + "step": 12764 + }, + { + "epoch": 0.5979762964350963, + "grad_norm": 0.6671276167218978, + "learning_rate": 4.1098163557629016e-06, + "loss": 0.2913, + "step": 12765 + }, + { + "epoch": 0.5980231414250246, + "grad_norm": 0.613609078941796, + "learning_rate": 4.109671252206402e-06, + "loss": 0.2866, + "step": 12766 + }, + { + "epoch": 0.5980699864149529, + "grad_norm": 0.6067507835010154, + "learning_rate": 4.109526139386649e-06, + "loss": 0.2787, + "step": 12767 + }, + { + "epoch": 0.5981168314048813, + "grad_norm": 0.6793744772183123, + "learning_rate": 4.109381017304477e-06, + "loss": 0.2997, + "step": 12768 + }, + { + "epoch": 0.5981636763948096, + "grad_norm": 0.603952693700679, + "learning_rate": 4.109235885960723e-06, + "loss": 0.2922, + "step": 12769 + }, + { + "epoch": 0.5982105213847378, + "grad_norm": 0.5868709117335696, + "learning_rate": 4.10909074535622e-06, + "loss": 0.2716, + "step": 12770 + }, + { + "epoch": 0.5982573663746662, + "grad_norm": 0.6385496521937669, + "learning_rate": 4.108945595491804e-06, + "loss": 0.3058, + "step": 12771 + }, + { + "epoch": 0.5983042113645946, + "grad_norm": 0.6376466785931955, + "learning_rate": 4.10880043636831e-06, + "loss": 0.3327, + "step": 12772 + }, + { + "epoch": 0.5983510563545229, + "grad_norm": 0.6956546819799273, + "learning_rate": 4.1086552679865755e-06, + "loss": 0.3076, + "step": 12773 + }, + { + "epoch": 0.5983979013444513, + "grad_norm": 0.6731406058184036, + "learning_rate": 4.108510090347433e-06, + "loss": 0.3157, + "step": 12774 + }, + { + "epoch": 0.5984447463343795, + "grad_norm": 0.7294463667088378, + "learning_rate": 4.10836490345172e-06, + "loss": 0.3051, + "step": 12775 + }, + { + "epoch": 0.5984915913243078, + "grad_norm": 0.676945788293545, + "learning_rate": 4.10821970730027e-06, + "loss": 0.2604, + "step": 12776 + }, + { + "epoch": 0.5985384363142362, + "grad_norm": 0.5979122830431235, + "learning_rate": 4.1080745018939216e-06, + "loss": 0.2929, + "step": 12777 + }, + { + "epoch": 0.5985852813041646, + "grad_norm": 0.577302172010393, + "learning_rate": 4.107929287233508e-06, + "loss": 0.2782, + "step": 12778 + }, + { + "epoch": 0.5986321262940928, + "grad_norm": 0.5694278596575025, + "learning_rate": 4.107784063319864e-06, + "loss": 0.2845, + "step": 12779 + }, + { + "epoch": 0.5986789712840211, + "grad_norm": 0.5751942055470052, + "learning_rate": 4.107638830153829e-06, + "loss": 0.2709, + "step": 12780 + }, + { + "epoch": 0.5987258162739495, + "grad_norm": 0.5956895328154934, + "learning_rate": 4.107493587736235e-06, + "loss": 0.2771, + "step": 12781 + }, + { + "epoch": 0.5987726612638778, + "grad_norm": 0.6206891816773277, + "learning_rate": 4.10734833606792e-06, + "loss": 0.2875, + "step": 12782 + }, + { + "epoch": 0.5988195062538062, + "grad_norm": 0.6253342329993355, + "learning_rate": 4.1072030751497195e-06, + "loss": 0.2792, + "step": 12783 + }, + { + "epoch": 0.5988663512437344, + "grad_norm": 0.5626303418017922, + "learning_rate": 4.107057804982469e-06, + "loss": 0.2562, + "step": 12784 + }, + { + "epoch": 0.5989131962336628, + "grad_norm": 0.6413427922689983, + "learning_rate": 4.106912525567005e-06, + "loss": 0.2791, + "step": 12785 + }, + { + "epoch": 0.5989600412235911, + "grad_norm": 0.627819727442218, + "learning_rate": 4.106767236904163e-06, + "loss": 0.287, + "step": 12786 + }, + { + "epoch": 0.5990068862135195, + "grad_norm": 0.5796299636954235, + "learning_rate": 4.10662193899478e-06, + "loss": 0.2556, + "step": 12787 + }, + { + "epoch": 0.5990537312034477, + "grad_norm": 0.6270371900335271, + "learning_rate": 4.10647663183969e-06, + "loss": 0.2896, + "step": 12788 + }, + { + "epoch": 0.5991005761933761, + "grad_norm": 0.655250231568074, + "learning_rate": 4.106331315439732e-06, + "loss": 0.3175, + "step": 12789 + }, + { + "epoch": 0.5991474211833044, + "grad_norm": 0.5713807617534722, + "learning_rate": 4.106185989795741e-06, + "loss": 0.2899, + "step": 12790 + }, + { + "epoch": 0.5991942661732328, + "grad_norm": 0.5676394514397, + "learning_rate": 4.1060406549085526e-06, + "loss": 0.2842, + "step": 12791 + }, + { + "epoch": 0.5992411111631611, + "grad_norm": 0.5817837217017146, + "learning_rate": 4.105895310779005e-06, + "loss": 0.2684, + "step": 12792 + }, + { + "epoch": 0.5992879561530894, + "grad_norm": 0.6127213342826312, + "learning_rate": 4.105749957407933e-06, + "loss": 0.2831, + "step": 12793 + }, + { + "epoch": 0.5993348011430177, + "grad_norm": 0.6635925818982712, + "learning_rate": 4.105604594796173e-06, + "loss": 0.2989, + "step": 12794 + }, + { + "epoch": 0.5993816461329461, + "grad_norm": 0.6167478943895996, + "learning_rate": 4.105459222944563e-06, + "loss": 0.2785, + "step": 12795 + }, + { + "epoch": 0.5994284911228744, + "grad_norm": 0.5519808021720041, + "learning_rate": 4.105313841853939e-06, + "loss": 0.2637, + "step": 12796 + }, + { + "epoch": 0.5994753361128027, + "grad_norm": 0.5724960040455674, + "learning_rate": 4.105168451525137e-06, + "loss": 0.2886, + "step": 12797 + }, + { + "epoch": 0.599522181102731, + "grad_norm": 0.6201818839785387, + "learning_rate": 4.105023051958993e-06, + "loss": 0.2961, + "step": 12798 + }, + { + "epoch": 0.5995690260926594, + "grad_norm": 0.6151208999711663, + "learning_rate": 4.104877643156346e-06, + "loss": 0.2807, + "step": 12799 + }, + { + "epoch": 0.5996158710825877, + "grad_norm": 0.625599945486676, + "learning_rate": 4.104732225118031e-06, + "loss": 0.2824, + "step": 12800 + }, + { + "epoch": 0.5996627160725161, + "grad_norm": 0.6192003490042203, + "learning_rate": 4.104586797844885e-06, + "loss": 0.3105, + "step": 12801 + }, + { + "epoch": 0.5997095610624443, + "grad_norm": 0.5563486164679218, + "learning_rate": 4.1044413613377455e-06, + "loss": 0.2582, + "step": 12802 + }, + { + "epoch": 0.5997564060523727, + "grad_norm": 0.6133107148463485, + "learning_rate": 4.104295915597449e-06, + "loss": 0.2987, + "step": 12803 + }, + { + "epoch": 0.599803251042301, + "grad_norm": 0.5803923539105992, + "learning_rate": 4.104150460624834e-06, + "loss": 0.2828, + "step": 12804 + }, + { + "epoch": 0.5998500960322294, + "grad_norm": 0.5754128147336552, + "learning_rate": 4.104004996420735e-06, + "loss": 0.2882, + "step": 12805 + }, + { + "epoch": 0.5998969410221576, + "grad_norm": 0.5636205709080614, + "learning_rate": 4.1038595229859905e-06, + "loss": 0.2954, + "step": 12806 + }, + { + "epoch": 0.599943786012086, + "grad_norm": 0.5842750006087125, + "learning_rate": 4.103714040321438e-06, + "loss": 0.3011, + "step": 12807 + }, + { + "epoch": 0.5999906310020143, + "grad_norm": 0.5940032752949752, + "learning_rate": 4.103568548427914e-06, + "loss": 0.2593, + "step": 12808 + }, + { + "epoch": 0.6000374759919427, + "grad_norm": 0.6123111708433826, + "learning_rate": 4.1034230473062564e-06, + "loss": 0.3157, + "step": 12809 + }, + { + "epoch": 0.600084320981871, + "grad_norm": 0.5882820108525233, + "learning_rate": 4.103277536957302e-06, + "loss": 0.2841, + "step": 12810 + }, + { + "epoch": 0.6001311659717993, + "grad_norm": 0.6169466521701462, + "learning_rate": 4.103132017381888e-06, + "loss": 0.2956, + "step": 12811 + }, + { + "epoch": 0.6001780109617276, + "grad_norm": 0.5659573424396264, + "learning_rate": 4.1029864885808525e-06, + "loss": 0.2783, + "step": 12812 + }, + { + "epoch": 0.600224855951656, + "grad_norm": 0.6005833753295761, + "learning_rate": 4.102840950555032e-06, + "loss": 0.2975, + "step": 12813 + }, + { + "epoch": 0.6002717009415843, + "grad_norm": 0.5827540430290125, + "learning_rate": 4.102695403305266e-06, + "loss": 0.3036, + "step": 12814 + }, + { + "epoch": 0.6003185459315126, + "grad_norm": 0.5792144094053401, + "learning_rate": 4.10254984683239e-06, + "loss": 0.2958, + "step": 12815 + }, + { + "epoch": 0.6003653909214409, + "grad_norm": 0.581601615577912, + "learning_rate": 4.102404281137243e-06, + "loss": 0.2835, + "step": 12816 + }, + { + "epoch": 0.6004122359113693, + "grad_norm": 0.5982106000764381, + "learning_rate": 4.102258706220661e-06, + "loss": 0.2776, + "step": 12817 + }, + { + "epoch": 0.6004590809012976, + "grad_norm": 0.5867829966886617, + "learning_rate": 4.102113122083484e-06, + "loss": 0.2843, + "step": 12818 + }, + { + "epoch": 0.600505925891226, + "grad_norm": 0.5760375231158262, + "learning_rate": 4.101967528726548e-06, + "loss": 0.2803, + "step": 12819 + }, + { + "epoch": 0.6005527708811542, + "grad_norm": 0.5774981957117489, + "learning_rate": 4.101821926150692e-06, + "loss": 0.285, + "step": 12820 + }, + { + "epoch": 0.6005996158710826, + "grad_norm": 0.5666439207477085, + "learning_rate": 4.101676314356752e-06, + "loss": 0.271, + "step": 12821 + }, + { + "epoch": 0.6006464608610109, + "grad_norm": 0.6050675595298911, + "learning_rate": 4.101530693345569e-06, + "loss": 0.2666, + "step": 12822 + }, + { + "epoch": 0.6006933058509393, + "grad_norm": 0.6184545118233513, + "learning_rate": 4.101385063117979e-06, + "loss": 0.2961, + "step": 12823 + }, + { + "epoch": 0.6007401508408675, + "grad_norm": 0.6493926700290644, + "learning_rate": 4.1012394236748195e-06, + "loss": 0.3022, + "step": 12824 + }, + { + "epoch": 0.6007869958307959, + "grad_norm": 0.597894618690812, + "learning_rate": 4.101093775016931e-06, + "loss": 0.2785, + "step": 12825 + }, + { + "epoch": 0.6008338408207242, + "grad_norm": 0.57434510174103, + "learning_rate": 4.100948117145149e-06, + "loss": 0.2594, + "step": 12826 + }, + { + "epoch": 0.6008806858106526, + "grad_norm": 0.6156184685101475, + "learning_rate": 4.100802450060314e-06, + "loss": 0.256, + "step": 12827 + }, + { + "epoch": 0.6009275308005809, + "grad_norm": 0.6059164629582374, + "learning_rate": 4.100656773763263e-06, + "loss": 0.2725, + "step": 12828 + }, + { + "epoch": 0.6009743757905092, + "grad_norm": 0.5530125222366458, + "learning_rate": 4.100511088254834e-06, + "loss": 0.2591, + "step": 12829 + }, + { + "epoch": 0.6010212207804375, + "grad_norm": 0.6458497792448256, + "learning_rate": 4.100365393535866e-06, + "loss": 0.3031, + "step": 12830 + }, + { + "epoch": 0.6010680657703659, + "grad_norm": 0.59553685014254, + "learning_rate": 4.100219689607198e-06, + "loss": 0.2853, + "step": 12831 + }, + { + "epoch": 0.6011149107602942, + "grad_norm": 0.5604804291748907, + "learning_rate": 4.1000739764696675e-06, + "loss": 0.2666, + "step": 12832 + }, + { + "epoch": 0.6011617557502225, + "grad_norm": 0.6266355122185252, + "learning_rate": 4.099928254124114e-06, + "loss": 0.2871, + "step": 12833 + }, + { + "epoch": 0.6012086007401508, + "grad_norm": 0.6113716735738562, + "learning_rate": 4.0997825225713746e-06, + "loss": 0.2794, + "step": 12834 + }, + { + "epoch": 0.6012554457300792, + "grad_norm": 0.5755925884849292, + "learning_rate": 4.099636781812289e-06, + "loss": 0.2915, + "step": 12835 + }, + { + "epoch": 0.6013022907200075, + "grad_norm": 0.5531972645106042, + "learning_rate": 4.099491031847697e-06, + "loss": 0.2804, + "step": 12836 + }, + { + "epoch": 0.6013491357099359, + "grad_norm": 0.5975565796885632, + "learning_rate": 4.099345272678435e-06, + "loss": 0.2769, + "step": 12837 + }, + { + "epoch": 0.6013959806998641, + "grad_norm": 0.5482139466695711, + "learning_rate": 4.099199504305343e-06, + "loss": 0.2722, + "step": 12838 + }, + { + "epoch": 0.6014428256897925, + "grad_norm": 0.6855087044781755, + "learning_rate": 4.099053726729259e-06, + "loss": 0.3199, + "step": 12839 + }, + { + "epoch": 0.6014896706797208, + "grad_norm": 0.6294002958648104, + "learning_rate": 4.098907939951025e-06, + "loss": 0.3, + "step": 12840 + }, + { + "epoch": 0.6015365156696492, + "grad_norm": 0.5741696868247383, + "learning_rate": 4.0987621439714765e-06, + "loss": 0.2628, + "step": 12841 + }, + { + "epoch": 0.6015833606595774, + "grad_norm": 0.6028726192358038, + "learning_rate": 4.098616338791453e-06, + "loss": 0.2987, + "step": 12842 + }, + { + "epoch": 0.6016302056495058, + "grad_norm": 0.5750315653675037, + "learning_rate": 4.098470524411795e-06, + "loss": 0.2676, + "step": 12843 + }, + { + "epoch": 0.6016770506394341, + "grad_norm": 0.6242030224856466, + "learning_rate": 4.09832470083334e-06, + "loss": 0.2809, + "step": 12844 + }, + { + "epoch": 0.6017238956293625, + "grad_norm": 0.6003289211497157, + "learning_rate": 4.098178868056929e-06, + "loss": 0.282, + "step": 12845 + }, + { + "epoch": 0.6017707406192908, + "grad_norm": 0.572145287735151, + "learning_rate": 4.0980330260834e-06, + "loss": 0.2959, + "step": 12846 + }, + { + "epoch": 0.601817585609219, + "grad_norm": 0.5831932669274147, + "learning_rate": 4.097887174913593e-06, + "loss": 0.2863, + "step": 12847 + }, + { + "epoch": 0.6018644305991474, + "grad_norm": 0.5574857577461154, + "learning_rate": 4.097741314548347e-06, + "loss": 0.2932, + "step": 12848 + }, + { + "epoch": 0.6019112755890758, + "grad_norm": 0.587812583715547, + "learning_rate": 4.0975954449885005e-06, + "loss": 0.2829, + "step": 12849 + }, + { + "epoch": 0.6019581205790041, + "grad_norm": 0.6050047703834209, + "learning_rate": 4.0974495662348945e-06, + "loss": 0.2743, + "step": 12850 + }, + { + "epoch": 0.6020049655689323, + "grad_norm": 0.5941128559899588, + "learning_rate": 4.097303678288367e-06, + "loss": 0.2929, + "step": 12851 + }, + { + "epoch": 0.6020518105588607, + "grad_norm": 0.5849150517234348, + "learning_rate": 4.097157781149758e-06, + "loss": 0.2867, + "step": 12852 + }, + { + "epoch": 0.602098655548789, + "grad_norm": 0.5658676934794058, + "learning_rate": 4.097011874819909e-06, + "loss": 0.2704, + "step": 12853 + }, + { + "epoch": 0.6021455005387174, + "grad_norm": 0.5539808733723246, + "learning_rate": 4.096865959299657e-06, + "loss": 0.2749, + "step": 12854 + }, + { + "epoch": 0.6021923455286458, + "grad_norm": 0.6007479380957276, + "learning_rate": 4.096720034589843e-06, + "loss": 0.305, + "step": 12855 + }, + { + "epoch": 0.602239190518574, + "grad_norm": 0.573625584308433, + "learning_rate": 4.096574100691306e-06, + "loss": 0.2927, + "step": 12856 + }, + { + "epoch": 0.6022860355085023, + "grad_norm": 0.5932249295365647, + "learning_rate": 4.096428157604887e-06, + "loss": 0.3223, + "step": 12857 + }, + { + "epoch": 0.6023328804984307, + "grad_norm": 0.601534540366976, + "learning_rate": 4.096282205331425e-06, + "loss": 0.2845, + "step": 12858 + }, + { + "epoch": 0.602379725488359, + "grad_norm": 0.5591850032499498, + "learning_rate": 4.09613624387176e-06, + "loss": 0.2951, + "step": 12859 + }, + { + "epoch": 0.6024265704782873, + "grad_norm": 0.5860899516849625, + "learning_rate": 4.095990273226733e-06, + "loss": 0.2809, + "step": 12860 + }, + { + "epoch": 0.6024734154682156, + "grad_norm": 0.6005891611093122, + "learning_rate": 4.095844293397182e-06, + "loss": 0.2811, + "step": 12861 + }, + { + "epoch": 0.602520260458144, + "grad_norm": 0.5739317537755153, + "learning_rate": 4.095698304383948e-06, + "loss": 0.2913, + "step": 12862 + }, + { + "epoch": 0.6025671054480723, + "grad_norm": 0.5744187067649692, + "learning_rate": 4.0955523061878726e-06, + "loss": 0.2673, + "step": 12863 + }, + { + "epoch": 0.6026139504380007, + "grad_norm": 0.6341423063224779, + "learning_rate": 4.0954062988097935e-06, + "loss": 0.2932, + "step": 12864 + }, + { + "epoch": 0.6026607954279289, + "grad_norm": 0.5568104333003462, + "learning_rate": 4.0952602822505525e-06, + "loss": 0.2717, + "step": 12865 + }, + { + "epoch": 0.6027076404178573, + "grad_norm": 0.6002986058359369, + "learning_rate": 4.095114256510989e-06, + "loss": 0.2879, + "step": 12866 + }, + { + "epoch": 0.6027544854077856, + "grad_norm": 0.5894202524226898, + "learning_rate": 4.094968221591945e-06, + "loss": 0.2943, + "step": 12867 + }, + { + "epoch": 0.602801330397714, + "grad_norm": 0.5691151298682824, + "learning_rate": 4.0948221774942594e-06, + "loss": 0.2848, + "step": 12868 + }, + { + "epoch": 0.6028481753876422, + "grad_norm": 0.5327484993803543, + "learning_rate": 4.094676124218773e-06, + "loss": 0.2684, + "step": 12869 + }, + { + "epoch": 0.6028950203775706, + "grad_norm": 0.6188675977090363, + "learning_rate": 4.094530061766325e-06, + "loss": 0.2898, + "step": 12870 + }, + { + "epoch": 0.6029418653674989, + "grad_norm": 0.6069577508810126, + "learning_rate": 4.094383990137759e-06, + "loss": 0.2917, + "step": 12871 + }, + { + "epoch": 0.6029887103574273, + "grad_norm": 0.5447756312375006, + "learning_rate": 4.094237909333914e-06, + "loss": 0.2664, + "step": 12872 + }, + { + "epoch": 0.6030355553473556, + "grad_norm": 0.5763540670747767, + "learning_rate": 4.094091819355629e-06, + "loss": 0.2728, + "step": 12873 + }, + { + "epoch": 0.6030824003372839, + "grad_norm": 0.6357257079905648, + "learning_rate": 4.093945720203747e-06, + "loss": 0.303, + "step": 12874 + }, + { + "epoch": 0.6031292453272122, + "grad_norm": 0.6196445170644319, + "learning_rate": 4.0937996118791084e-06, + "loss": 0.2856, + "step": 12875 + }, + { + "epoch": 0.6031760903171406, + "grad_norm": 0.6331835678312738, + "learning_rate": 4.0936534943825535e-06, + "loss": 0.28, + "step": 12876 + }, + { + "epoch": 0.6032229353070689, + "grad_norm": 0.6117425664822096, + "learning_rate": 4.093507367714923e-06, + "loss": 0.2822, + "step": 12877 + }, + { + "epoch": 0.6032697802969972, + "grad_norm": 0.5982661379098242, + "learning_rate": 4.093361231877058e-06, + "loss": 0.3065, + "step": 12878 + }, + { + "epoch": 0.6033166252869255, + "grad_norm": 0.580972486706783, + "learning_rate": 4.093215086869799e-06, + "loss": 0.274, + "step": 12879 + }, + { + "epoch": 0.6033634702768539, + "grad_norm": 0.607137309312099, + "learning_rate": 4.0930689326939895e-06, + "loss": 0.2895, + "step": 12880 + }, + { + "epoch": 0.6034103152667822, + "grad_norm": 0.6038139597213893, + "learning_rate": 4.092922769350467e-06, + "loss": 0.2947, + "step": 12881 + }, + { + "epoch": 0.6034571602567106, + "grad_norm": 0.6523026109852249, + "learning_rate": 4.092776596840075e-06, + "loss": 0.2957, + "step": 12882 + }, + { + "epoch": 0.6035040052466388, + "grad_norm": 0.5688346141384064, + "learning_rate": 4.092630415163654e-06, + "loss": 0.2895, + "step": 12883 + }, + { + "epoch": 0.6035508502365672, + "grad_norm": 0.6057879356320992, + "learning_rate": 4.092484224322045e-06, + "loss": 0.2931, + "step": 12884 + }, + { + "epoch": 0.6035976952264955, + "grad_norm": 0.63385552003771, + "learning_rate": 4.0923380243160895e-06, + "loss": 0.3014, + "step": 12885 + }, + { + "epoch": 0.6036445402164239, + "grad_norm": 0.610113266168926, + "learning_rate": 4.092191815146629e-06, + "loss": 0.2884, + "step": 12886 + }, + { + "epoch": 0.6036913852063521, + "grad_norm": 0.6239452063001591, + "learning_rate": 4.092045596814506e-06, + "loss": 0.2804, + "step": 12887 + }, + { + "epoch": 0.6037382301962805, + "grad_norm": 0.5934292044678128, + "learning_rate": 4.091899369320559e-06, + "loss": 0.2968, + "step": 12888 + }, + { + "epoch": 0.6037850751862088, + "grad_norm": 0.6274817413075499, + "learning_rate": 4.0917531326656326e-06, + "loss": 0.2715, + "step": 12889 + }, + { + "epoch": 0.6038319201761372, + "grad_norm": 0.6053563684675379, + "learning_rate": 4.091606886850566e-06, + "loss": 0.2836, + "step": 12890 + }, + { + "epoch": 0.6038787651660655, + "grad_norm": 0.6353786396069077, + "learning_rate": 4.091460631876202e-06, + "loss": 0.2823, + "step": 12891 + }, + { + "epoch": 0.6039256101559938, + "grad_norm": 0.5599784048324444, + "learning_rate": 4.091314367743382e-06, + "loss": 0.2712, + "step": 12892 + }, + { + "epoch": 0.6039724551459221, + "grad_norm": 0.6175760880792729, + "learning_rate": 4.091168094452948e-06, + "loss": 0.2996, + "step": 12893 + }, + { + "epoch": 0.6040193001358505, + "grad_norm": 0.5352879157086721, + "learning_rate": 4.091021812005741e-06, + "loss": 0.2585, + "step": 12894 + }, + { + "epoch": 0.6040661451257788, + "grad_norm": 0.6018723571554019, + "learning_rate": 4.090875520402604e-06, + "loss": 0.2894, + "step": 12895 + }, + { + "epoch": 0.6041129901157071, + "grad_norm": 0.6538027902803832, + "learning_rate": 4.0907292196443785e-06, + "loss": 0.2823, + "step": 12896 + }, + { + "epoch": 0.6041598351056354, + "grad_norm": 0.6338195325408845, + "learning_rate": 4.090582909731905e-06, + "loss": 0.2989, + "step": 12897 + }, + { + "epoch": 0.6042066800955638, + "grad_norm": 0.6100158739158535, + "learning_rate": 4.090436590666028e-06, + "loss": 0.2939, + "step": 12898 + }, + { + "epoch": 0.6042535250854921, + "grad_norm": 0.5649949644059353, + "learning_rate": 4.090290262447587e-06, + "loss": 0.2724, + "step": 12899 + }, + { + "epoch": 0.6043003700754205, + "grad_norm": 0.5864124528163107, + "learning_rate": 4.090143925077426e-06, + "loss": 0.2747, + "step": 12900 + }, + { + "epoch": 0.6043472150653487, + "grad_norm": 0.5483011623835272, + "learning_rate": 4.089997578556386e-06, + "loss": 0.2598, + "step": 12901 + }, + { + "epoch": 0.6043940600552771, + "grad_norm": 0.6549561265486077, + "learning_rate": 4.089851222885309e-06, + "loss": 0.2968, + "step": 12902 + }, + { + "epoch": 0.6044409050452054, + "grad_norm": 0.639468648048857, + "learning_rate": 4.089704858065039e-06, + "loss": 0.2909, + "step": 12903 + }, + { + "epoch": 0.6044877500351338, + "grad_norm": 0.5753456391932866, + "learning_rate": 4.0895584840964164e-06, + "loss": 0.278, + "step": 12904 + }, + { + "epoch": 0.604534595025062, + "grad_norm": 0.5606450586384827, + "learning_rate": 4.089412100980284e-06, + "loss": 0.2789, + "step": 12905 + }, + { + "epoch": 0.6045814400149904, + "grad_norm": 0.616630812920757, + "learning_rate": 4.089265708717486e-06, + "loss": 0.2985, + "step": 12906 + }, + { + "epoch": 0.6046282850049187, + "grad_norm": 0.6042917278778243, + "learning_rate": 4.0891193073088615e-06, + "loss": 0.2909, + "step": 12907 + }, + { + "epoch": 0.6046751299948471, + "grad_norm": 0.6032289086914693, + "learning_rate": 4.088972896755255e-06, + "loss": 0.27, + "step": 12908 + }, + { + "epoch": 0.6047219749847754, + "grad_norm": 0.6043265296041939, + "learning_rate": 4.08882647705751e-06, + "loss": 0.2717, + "step": 12909 + }, + { + "epoch": 0.6047688199747037, + "grad_norm": 0.6220033877928116, + "learning_rate": 4.088680048216467e-06, + "loss": 0.3007, + "step": 12910 + }, + { + "epoch": 0.604815664964632, + "grad_norm": 0.5780204116772186, + "learning_rate": 4.08853361023297e-06, + "loss": 0.2972, + "step": 12911 + }, + { + "epoch": 0.6048625099545604, + "grad_norm": 0.668348170068088, + "learning_rate": 4.088387163107861e-06, + "loss": 0.296, + "step": 12912 + }, + { + "epoch": 0.6049093549444887, + "grad_norm": 0.607319187476503, + "learning_rate": 4.088240706841984e-06, + "loss": 0.2907, + "step": 12913 + }, + { + "epoch": 0.604956199934417, + "grad_norm": 0.6374310016769136, + "learning_rate": 4.088094241436179e-06, + "loss": 0.2901, + "step": 12914 + }, + { + "epoch": 0.6050030449243453, + "grad_norm": 0.6328232591935424, + "learning_rate": 4.087947766891292e-06, + "loss": 0.2813, + "step": 12915 + }, + { + "epoch": 0.6050498899142737, + "grad_norm": 0.6400911290010276, + "learning_rate": 4.087801283208166e-06, + "loss": 0.304, + "step": 12916 + }, + { + "epoch": 0.605096734904202, + "grad_norm": 0.641570581316346, + "learning_rate": 4.087654790387641e-06, + "loss": 0.2988, + "step": 12917 + }, + { + "epoch": 0.6051435798941304, + "grad_norm": 0.6224883003437341, + "learning_rate": 4.087508288430562e-06, + "loss": 0.3073, + "step": 12918 + }, + { + "epoch": 0.6051904248840586, + "grad_norm": 0.6158794301827925, + "learning_rate": 4.087361777337772e-06, + "loss": 0.2822, + "step": 12919 + }, + { + "epoch": 0.605237269873987, + "grad_norm": 0.6247474290170001, + "learning_rate": 4.0872152571101145e-06, + "loss": 0.306, + "step": 12920 + }, + { + "epoch": 0.6052841148639153, + "grad_norm": 0.6621883077639766, + "learning_rate": 4.0870687277484315e-06, + "loss": 0.3037, + "step": 12921 + }, + { + "epoch": 0.6053309598538437, + "grad_norm": 0.5934815546104981, + "learning_rate": 4.086922189253566e-06, + "loss": 0.2761, + "step": 12922 + }, + { + "epoch": 0.6053778048437719, + "grad_norm": 0.5505680566892146, + "learning_rate": 4.0867756416263635e-06, + "loss": 0.294, + "step": 12923 + }, + { + "epoch": 0.6054246498337003, + "grad_norm": 0.5774993715032448, + "learning_rate": 4.086629084867665e-06, + "loss": 0.2852, + "step": 12924 + }, + { + "epoch": 0.6054714948236286, + "grad_norm": 0.6066315173274744, + "learning_rate": 4.0864825189783155e-06, + "loss": 0.3012, + "step": 12925 + }, + { + "epoch": 0.605518339813557, + "grad_norm": 0.6422134326641279, + "learning_rate": 4.086335943959159e-06, + "loss": 0.2711, + "step": 12926 + }, + { + "epoch": 0.6055651848034853, + "grad_norm": 0.6094937608586966, + "learning_rate": 4.086189359811036e-06, + "loss": 0.2809, + "step": 12927 + }, + { + "epoch": 0.6056120297934136, + "grad_norm": 0.5838236093082971, + "learning_rate": 4.086042766534793e-06, + "loss": 0.2932, + "step": 12928 + }, + { + "epoch": 0.6056588747833419, + "grad_norm": 0.5972953092933455, + "learning_rate": 4.0858961641312725e-06, + "loss": 0.2907, + "step": 12929 + }, + { + "epoch": 0.6057057197732703, + "grad_norm": 0.6117389333552359, + "learning_rate": 4.085749552601317e-06, + "loss": 0.2969, + "step": 12930 + }, + { + "epoch": 0.6057525647631986, + "grad_norm": 0.5571425113097479, + "learning_rate": 4.085602931945772e-06, + "loss": 0.284, + "step": 12931 + }, + { + "epoch": 0.6057994097531268, + "grad_norm": 0.5702173459609045, + "learning_rate": 4.085456302165482e-06, + "loss": 0.2933, + "step": 12932 + }, + { + "epoch": 0.6058462547430552, + "grad_norm": 0.6361970954436563, + "learning_rate": 4.085309663261287e-06, + "loss": 0.2859, + "step": 12933 + }, + { + "epoch": 0.6058930997329836, + "grad_norm": 0.6012042992902162, + "learning_rate": 4.085163015234035e-06, + "loss": 0.2708, + "step": 12934 + }, + { + "epoch": 0.6059399447229119, + "grad_norm": 0.5917833586895256, + "learning_rate": 4.085016358084568e-06, + "loss": 0.2626, + "step": 12935 + }, + { + "epoch": 0.6059867897128403, + "grad_norm": 0.5916181390583236, + "learning_rate": 4.08486969181373e-06, + "loss": 0.2712, + "step": 12936 + }, + { + "epoch": 0.6060336347027685, + "grad_norm": 0.6336987927884081, + "learning_rate": 4.084723016422365e-06, + "loss": 0.3031, + "step": 12937 + }, + { + "epoch": 0.6060804796926968, + "grad_norm": 0.5687933043733778, + "learning_rate": 4.084576331911318e-06, + "loss": 0.2682, + "step": 12938 + }, + { + "epoch": 0.6061273246826252, + "grad_norm": 0.5739443015577823, + "learning_rate": 4.084429638281432e-06, + "loss": 0.2783, + "step": 12939 + }, + { + "epoch": 0.6061741696725536, + "grad_norm": 0.6600702388206591, + "learning_rate": 4.0842829355335525e-06, + "loss": 0.3144, + "step": 12940 + }, + { + "epoch": 0.6062210146624818, + "grad_norm": 0.6173398550080282, + "learning_rate": 4.084136223668523e-06, + "loss": 0.2718, + "step": 12941 + }, + { + "epoch": 0.6062678596524101, + "grad_norm": 0.6105173558203396, + "learning_rate": 4.0839895026871856e-06, + "loss": 0.313, + "step": 12942 + }, + { + "epoch": 0.6063147046423385, + "grad_norm": 0.5677860841384872, + "learning_rate": 4.083842772590388e-06, + "loss": 0.2979, + "step": 12943 + }, + { + "epoch": 0.6063615496322668, + "grad_norm": 0.6059770661387719, + "learning_rate": 4.083696033378973e-06, + "loss": 0.2702, + "step": 12944 + }, + { + "epoch": 0.6064083946221952, + "grad_norm": 0.743183359318538, + "learning_rate": 4.0835492850537865e-06, + "loss": 0.3174, + "step": 12945 + }, + { + "epoch": 0.6064552396121234, + "grad_norm": 0.5865675339569728, + "learning_rate": 4.083402527615671e-06, + "loss": 0.2674, + "step": 12946 + }, + { + "epoch": 0.6065020846020518, + "grad_norm": 0.6497224263383506, + "learning_rate": 4.0832557610654725e-06, + "loss": 0.304, + "step": 12947 + }, + { + "epoch": 0.6065489295919801, + "grad_norm": 0.6417335149263977, + "learning_rate": 4.0831089854040345e-06, + "loss": 0.2929, + "step": 12948 + }, + { + "epoch": 0.6065957745819085, + "grad_norm": 0.5833220771921268, + "learning_rate": 4.082962200632202e-06, + "loss": 0.2719, + "step": 12949 + }, + { + "epoch": 0.6066426195718367, + "grad_norm": 0.6480344562397246, + "learning_rate": 4.0828154067508205e-06, + "loss": 0.2934, + "step": 12950 + }, + { + "epoch": 0.6066894645617651, + "grad_norm": 0.6567881948194618, + "learning_rate": 4.082668603760734e-06, + "loss": 0.2985, + "step": 12951 + }, + { + "epoch": 0.6067363095516934, + "grad_norm": 0.5724046834007623, + "learning_rate": 4.0825217916627874e-06, + "loss": 0.2844, + "step": 12952 + }, + { + "epoch": 0.6067831545416218, + "grad_norm": 0.5604744839346678, + "learning_rate": 4.082374970457826e-06, + "loss": 0.2987, + "step": 12953 + }, + { + "epoch": 0.6068299995315501, + "grad_norm": 0.607460281233767, + "learning_rate": 4.082228140146693e-06, + "loss": 0.2835, + "step": 12954 + }, + { + "epoch": 0.6068768445214784, + "grad_norm": 0.5620778185853276, + "learning_rate": 4.082081300730236e-06, + "loss": 0.2735, + "step": 12955 + }, + { + "epoch": 0.6069236895114067, + "grad_norm": 0.6046768241108899, + "learning_rate": 4.081934452209298e-06, + "loss": 0.264, + "step": 12956 + }, + { + "epoch": 0.6069705345013351, + "grad_norm": 0.5873560799980875, + "learning_rate": 4.0817875945847265e-06, + "loss": 0.2789, + "step": 12957 + }, + { + "epoch": 0.6070173794912634, + "grad_norm": 0.6245081553574853, + "learning_rate": 4.081640727857363e-06, + "loss": 0.2707, + "step": 12958 + }, + { + "epoch": 0.6070642244811917, + "grad_norm": 0.5721280581959625, + "learning_rate": 4.081493852028055e-06, + "loss": 0.2843, + "step": 12959 + }, + { + "epoch": 0.60711106947112, + "grad_norm": 0.6421485358746113, + "learning_rate": 4.0813469670976485e-06, + "loss": 0.3014, + "step": 12960 + }, + { + "epoch": 0.6071579144610484, + "grad_norm": 0.5710080584415818, + "learning_rate": 4.0812000730669865e-06, + "loss": 0.2751, + "step": 12961 + }, + { + "epoch": 0.6072047594509767, + "grad_norm": 0.6182355205150114, + "learning_rate": 4.081053169936916e-06, + "loss": 0.2894, + "step": 12962 + }, + { + "epoch": 0.6072516044409051, + "grad_norm": 0.5751800146439953, + "learning_rate": 4.080906257708282e-06, + "loss": 0.277, + "step": 12963 + }, + { + "epoch": 0.6072984494308333, + "grad_norm": 0.6156780481884284, + "learning_rate": 4.0807593363819295e-06, + "loss": 0.2977, + "step": 12964 + }, + { + "epoch": 0.6073452944207617, + "grad_norm": 0.5807877256310756, + "learning_rate": 4.080612405958705e-06, + "loss": 0.3, + "step": 12965 + }, + { + "epoch": 0.60739213941069, + "grad_norm": 0.616353018158756, + "learning_rate": 4.080465466439453e-06, + "loss": 0.2952, + "step": 12966 + }, + { + "epoch": 0.6074389844006184, + "grad_norm": 0.6093097023486995, + "learning_rate": 4.080318517825018e-06, + "loss": 0.2884, + "step": 12967 + }, + { + "epoch": 0.6074858293905466, + "grad_norm": 0.5748520774266005, + "learning_rate": 4.08017156011625e-06, + "loss": 0.2659, + "step": 12968 + }, + { + "epoch": 0.607532674380475, + "grad_norm": 0.5619110644435759, + "learning_rate": 4.08002459331399e-06, + "loss": 0.2781, + "step": 12969 + }, + { + "epoch": 0.6075795193704033, + "grad_norm": 0.6399216651422902, + "learning_rate": 4.079877617419086e-06, + "loss": 0.3195, + "step": 12970 + }, + { + "epoch": 0.6076263643603317, + "grad_norm": 0.559885009922311, + "learning_rate": 4.079730632432383e-06, + "loss": 0.3078, + "step": 12971 + }, + { + "epoch": 0.60767320935026, + "grad_norm": 0.6012425018504023, + "learning_rate": 4.079583638354727e-06, + "loss": 0.301, + "step": 12972 + }, + { + "epoch": 0.6077200543401883, + "grad_norm": 0.5412871169103112, + "learning_rate": 4.079436635186965e-06, + "loss": 0.2768, + "step": 12973 + }, + { + "epoch": 0.6077668993301166, + "grad_norm": 0.5987169458259035, + "learning_rate": 4.079289622929941e-06, + "loss": 0.2708, + "step": 12974 + }, + { + "epoch": 0.607813744320045, + "grad_norm": 0.6050590018698475, + "learning_rate": 4.079142601584504e-06, + "loss": 0.2946, + "step": 12975 + }, + { + "epoch": 0.6078605893099733, + "grad_norm": 0.56900370627852, + "learning_rate": 4.078995571151497e-06, + "loss": 0.2747, + "step": 12976 + }, + { + "epoch": 0.6079074342999016, + "grad_norm": 0.5248024348640699, + "learning_rate": 4.078848531631768e-06, + "loss": 0.2574, + "step": 12977 + }, + { + "epoch": 0.6079542792898299, + "grad_norm": 0.5750339422240732, + "learning_rate": 4.0787014830261615e-06, + "loss": 0.2806, + "step": 12978 + }, + { + "epoch": 0.6080011242797583, + "grad_norm": 0.5677105851657018, + "learning_rate": 4.078554425335526e-06, + "loss": 0.2754, + "step": 12979 + }, + { + "epoch": 0.6080479692696866, + "grad_norm": 0.5547010501451496, + "learning_rate": 4.078407358560706e-06, + "loss": 0.2657, + "step": 12980 + }, + { + "epoch": 0.608094814259615, + "grad_norm": 0.6059540747262341, + "learning_rate": 4.078260282702548e-06, + "loss": 0.3092, + "step": 12981 + }, + { + "epoch": 0.6081416592495432, + "grad_norm": 0.5768995261488193, + "learning_rate": 4.078113197761899e-06, + "loss": 0.2807, + "step": 12982 + }, + { + "epoch": 0.6081885042394716, + "grad_norm": 0.615328739114309, + "learning_rate": 4.077966103739605e-06, + "loss": 0.2993, + "step": 12983 + }, + { + "epoch": 0.6082353492293999, + "grad_norm": 0.5734211771177945, + "learning_rate": 4.0778190006365135e-06, + "loss": 0.2852, + "step": 12984 + }, + { + "epoch": 0.6082821942193283, + "grad_norm": 0.6023365036510383, + "learning_rate": 4.07767188845347e-06, + "loss": 0.2911, + "step": 12985 + }, + { + "epoch": 0.6083290392092565, + "grad_norm": 0.5770425587006951, + "learning_rate": 4.07752476719132e-06, + "loss": 0.2877, + "step": 12986 + }, + { + "epoch": 0.6083758841991849, + "grad_norm": 0.5804685233997233, + "learning_rate": 4.077377636850913e-06, + "loss": 0.2877, + "step": 12987 + }, + { + "epoch": 0.6084227291891132, + "grad_norm": 0.5681846260524889, + "learning_rate": 4.0772304974330935e-06, + "loss": 0.2762, + "step": 12988 + }, + { + "epoch": 0.6084695741790416, + "grad_norm": 0.6274408075814599, + "learning_rate": 4.077083348938709e-06, + "loss": 0.2865, + "step": 12989 + }, + { + "epoch": 0.6085164191689699, + "grad_norm": 0.5997565328745902, + "learning_rate": 4.076936191368605e-06, + "loss": 0.2843, + "step": 12990 + }, + { + "epoch": 0.6085632641588982, + "grad_norm": 0.5842889265367074, + "learning_rate": 4.076789024723632e-06, + "loss": 0.2926, + "step": 12991 + }, + { + "epoch": 0.6086101091488265, + "grad_norm": 0.6518515745548422, + "learning_rate": 4.076641849004632e-06, + "loss": 0.281, + "step": 12992 + }, + { + "epoch": 0.6086569541387549, + "grad_norm": 0.537924980510903, + "learning_rate": 4.076494664212456e-06, + "loss": 0.2924, + "step": 12993 + }, + { + "epoch": 0.6087037991286832, + "grad_norm": 0.5614307040868325, + "learning_rate": 4.076347470347949e-06, + "loss": 0.2714, + "step": 12994 + }, + { + "epoch": 0.6087506441186115, + "grad_norm": 0.5865902836471789, + "learning_rate": 4.07620026741196e-06, + "loss": 0.2662, + "step": 12995 + }, + { + "epoch": 0.6087974891085398, + "grad_norm": 0.5610924477155826, + "learning_rate": 4.076053055405333e-06, + "loss": 0.2742, + "step": 12996 + }, + { + "epoch": 0.6088443340984682, + "grad_norm": 0.6080251848315971, + "learning_rate": 4.0759058343289164e-06, + "loss": 0.2886, + "step": 12997 + }, + { + "epoch": 0.6088911790883965, + "grad_norm": 0.5839258686150237, + "learning_rate": 4.075758604183558e-06, + "loss": 0.2854, + "step": 12998 + }, + { + "epoch": 0.6089380240783249, + "grad_norm": 0.6242713344216563, + "learning_rate": 4.075611364970106e-06, + "loss": 0.3031, + "step": 12999 + }, + { + "epoch": 0.6089848690682531, + "grad_norm": 0.6695359687864348, + "learning_rate": 4.075464116689406e-06, + "loss": 0.2993, + "step": 13000 + }, + { + "epoch": 0.6090317140581815, + "grad_norm": 0.5796102185977996, + "learning_rate": 4.0753168593423055e-06, + "loss": 0.2785, + "step": 13001 + }, + { + "epoch": 0.6090785590481098, + "grad_norm": 0.6036653959677455, + "learning_rate": 4.075169592929653e-06, + "loss": 0.278, + "step": 13002 + }, + { + "epoch": 0.6091254040380382, + "grad_norm": 0.5746704450878308, + "learning_rate": 4.075022317452295e-06, + "loss": 0.2685, + "step": 13003 + }, + { + "epoch": 0.6091722490279664, + "grad_norm": 0.5998947410151416, + "learning_rate": 4.0748750329110794e-06, + "loss": 0.2897, + "step": 13004 + }, + { + "epoch": 0.6092190940178948, + "grad_norm": 0.5545083946711923, + "learning_rate": 4.074727739306855e-06, + "loss": 0.294, + "step": 13005 + }, + { + "epoch": 0.6092659390078231, + "grad_norm": 0.563577456482818, + "learning_rate": 4.074580436640467e-06, + "loss": 0.2736, + "step": 13006 + }, + { + "epoch": 0.6093127839977515, + "grad_norm": 0.6187351483056831, + "learning_rate": 4.074433124912764e-06, + "loss": 0.2669, + "step": 13007 + }, + { + "epoch": 0.6093596289876798, + "grad_norm": 0.5670836542561564, + "learning_rate": 4.074285804124596e-06, + "loss": 0.27, + "step": 13008 + }, + { + "epoch": 0.609406473977608, + "grad_norm": 0.56554318417496, + "learning_rate": 4.074138474276807e-06, + "loss": 0.2829, + "step": 13009 + }, + { + "epoch": 0.6094533189675364, + "grad_norm": 0.5693070312701989, + "learning_rate": 4.0739911353702474e-06, + "loss": 0.2788, + "step": 13010 + }, + { + "epoch": 0.6095001639574648, + "grad_norm": 0.6300338257584143, + "learning_rate": 4.073843787405765e-06, + "loss": 0.2905, + "step": 13011 + }, + { + "epoch": 0.6095470089473931, + "grad_norm": 0.5823669605236848, + "learning_rate": 4.073696430384206e-06, + "loss": 0.2935, + "step": 13012 + }, + { + "epoch": 0.6095938539373214, + "grad_norm": 0.5738736022599816, + "learning_rate": 4.07354906430642e-06, + "loss": 0.2909, + "step": 13013 + }, + { + "epoch": 0.6096406989272497, + "grad_norm": 0.6048691505976317, + "learning_rate": 4.073401689173255e-06, + "loss": 0.2748, + "step": 13014 + }, + { + "epoch": 0.609687543917178, + "grad_norm": 0.5941871914619556, + "learning_rate": 4.073254304985558e-06, + "loss": 0.2822, + "step": 13015 + }, + { + "epoch": 0.6097343889071064, + "grad_norm": 0.6486948177923882, + "learning_rate": 4.073106911744179e-06, + "loss": 0.2888, + "step": 13016 + }, + { + "epoch": 0.6097812338970348, + "grad_norm": 0.5730062734500464, + "learning_rate": 4.0729595094499644e-06, + "loss": 0.2627, + "step": 13017 + }, + { + "epoch": 0.609828078886963, + "grad_norm": 0.6352650076492883, + "learning_rate": 4.072812098103763e-06, + "loss": 0.2972, + "step": 13018 + }, + { + "epoch": 0.6098749238768914, + "grad_norm": 0.6679215941677684, + "learning_rate": 4.0726646777064234e-06, + "loss": 0.3023, + "step": 13019 + }, + { + "epoch": 0.6099217688668197, + "grad_norm": 0.6250570454916825, + "learning_rate": 4.072517248258795e-06, + "loss": 0.3059, + "step": 13020 + }, + { + "epoch": 0.609968613856748, + "grad_norm": 0.5626537542468033, + "learning_rate": 4.072369809761723e-06, + "loss": 0.2886, + "step": 13021 + }, + { + "epoch": 0.6100154588466763, + "grad_norm": 0.5834590984554014, + "learning_rate": 4.07222236221606e-06, + "loss": 0.2784, + "step": 13022 + }, + { + "epoch": 0.6100623038366046, + "grad_norm": 0.5956792142711329, + "learning_rate": 4.072074905622652e-06, + "loss": 0.2922, + "step": 13023 + }, + { + "epoch": 0.610109148826533, + "grad_norm": 0.6073502699757497, + "learning_rate": 4.0719274399823465e-06, + "loss": 0.2764, + "step": 13024 + }, + { + "epoch": 0.6101559938164614, + "grad_norm": 0.6244638794234905, + "learning_rate": 4.071779965295995e-06, + "loss": 0.2972, + "step": 13025 + }, + { + "epoch": 0.6102028388063897, + "grad_norm": 0.5630724748603567, + "learning_rate": 4.071632481564445e-06, + "loss": 0.2723, + "step": 13026 + }, + { + "epoch": 0.6102496837963179, + "grad_norm": 0.6232603106664465, + "learning_rate": 4.0714849887885445e-06, + "loss": 0.2783, + "step": 13027 + }, + { + "epoch": 0.6102965287862463, + "grad_norm": 0.6368517596825224, + "learning_rate": 4.071337486969142e-06, + "loss": 0.2842, + "step": 13028 + }, + { + "epoch": 0.6103433737761746, + "grad_norm": 0.6162715617408302, + "learning_rate": 4.071189976107089e-06, + "loss": 0.2662, + "step": 13029 + }, + { + "epoch": 0.610390218766103, + "grad_norm": 0.6289064019200066, + "learning_rate": 4.0710424562032315e-06, + "loss": 0.2695, + "step": 13030 + }, + { + "epoch": 0.6104370637560312, + "grad_norm": 0.5514916342448294, + "learning_rate": 4.07089492725842e-06, + "loss": 0.2709, + "step": 13031 + }, + { + "epoch": 0.6104839087459596, + "grad_norm": 0.5759183298946358, + "learning_rate": 4.070747389273502e-06, + "loss": 0.3109, + "step": 13032 + }, + { + "epoch": 0.6105307537358879, + "grad_norm": 0.5686679741950664, + "learning_rate": 4.070599842249329e-06, + "loss": 0.2672, + "step": 13033 + }, + { + "epoch": 0.6105775987258163, + "grad_norm": 0.6470238103204379, + "learning_rate": 4.070452286186748e-06, + "loss": 0.3189, + "step": 13034 + }, + { + "epoch": 0.6106244437157446, + "grad_norm": 0.5968221934371792, + "learning_rate": 4.0703047210866094e-06, + "loss": 0.2838, + "step": 13035 + }, + { + "epoch": 0.6106712887056729, + "grad_norm": 0.5668023343780747, + "learning_rate": 4.070157146949762e-06, + "loss": 0.2775, + "step": 13036 + }, + { + "epoch": 0.6107181336956012, + "grad_norm": 0.6045133662585169, + "learning_rate": 4.070009563777053e-06, + "loss": 0.2922, + "step": 13037 + }, + { + "epoch": 0.6107649786855296, + "grad_norm": 0.5813285010659786, + "learning_rate": 4.069861971569336e-06, + "loss": 0.29, + "step": 13038 + }, + { + "epoch": 0.6108118236754579, + "grad_norm": 0.5969256949970728, + "learning_rate": 4.069714370327456e-06, + "loss": 0.2957, + "step": 13039 + }, + { + "epoch": 0.6108586686653862, + "grad_norm": 0.5679936449489577, + "learning_rate": 4.069566760052265e-06, + "loss": 0.2611, + "step": 13040 + }, + { + "epoch": 0.6109055136553145, + "grad_norm": 0.5396844180589166, + "learning_rate": 4.069419140744612e-06, + "loss": 0.2731, + "step": 13041 + }, + { + "epoch": 0.6109523586452429, + "grad_norm": 0.6277253516475008, + "learning_rate": 4.069271512405346e-06, + "loss": 0.3062, + "step": 13042 + }, + { + "epoch": 0.6109992036351712, + "grad_norm": 0.5670048174701521, + "learning_rate": 4.069123875035317e-06, + "loss": 0.2797, + "step": 13043 + }, + { + "epoch": 0.6110460486250996, + "grad_norm": 0.5634108931990408, + "learning_rate": 4.068976228635375e-06, + "loss": 0.269, + "step": 13044 + }, + { + "epoch": 0.6110928936150278, + "grad_norm": 0.5912296343376583, + "learning_rate": 4.068828573206368e-06, + "loss": 0.2874, + "step": 13045 + }, + { + "epoch": 0.6111397386049562, + "grad_norm": 0.646624202378189, + "learning_rate": 4.068680908749148e-06, + "loss": 0.2737, + "step": 13046 + }, + { + "epoch": 0.6111865835948845, + "grad_norm": 0.6337192790663752, + "learning_rate": 4.068533235264563e-06, + "loss": 0.2721, + "step": 13047 + }, + { + "epoch": 0.6112334285848129, + "grad_norm": 0.6325906901379404, + "learning_rate": 4.068385552753463e-06, + "loss": 0.3033, + "step": 13048 + }, + { + "epoch": 0.6112802735747411, + "grad_norm": 0.5899366980045646, + "learning_rate": 4.068237861216699e-06, + "loss": 0.2763, + "step": 13049 + }, + { + "epoch": 0.6113271185646695, + "grad_norm": 0.605922444492718, + "learning_rate": 4.068090160655121e-06, + "loss": 0.2619, + "step": 13050 + }, + { + "epoch": 0.6113739635545978, + "grad_norm": 0.6098839931803979, + "learning_rate": 4.067942451069577e-06, + "loss": 0.2574, + "step": 13051 + }, + { + "epoch": 0.6114208085445262, + "grad_norm": 0.5827988587759503, + "learning_rate": 4.067794732460918e-06, + "loss": 0.2739, + "step": 13052 + }, + { + "epoch": 0.6114676535344545, + "grad_norm": 0.6069814735937454, + "learning_rate": 4.067647004829995e-06, + "loss": 0.2896, + "step": 13053 + }, + { + "epoch": 0.6115144985243828, + "grad_norm": 0.5991879759978469, + "learning_rate": 4.067499268177658e-06, + "loss": 0.2834, + "step": 13054 + }, + { + "epoch": 0.6115613435143111, + "grad_norm": 0.5611900836485624, + "learning_rate": 4.067351522504757e-06, + "loss": 0.2515, + "step": 13055 + }, + { + "epoch": 0.6116081885042395, + "grad_norm": 0.6602555560476052, + "learning_rate": 4.0672037678121415e-06, + "loss": 0.3202, + "step": 13056 + }, + { + "epoch": 0.6116550334941678, + "grad_norm": 0.5748776775909406, + "learning_rate": 4.067056004100661e-06, + "loss": 0.2912, + "step": 13057 + }, + { + "epoch": 0.6117018784840961, + "grad_norm": 0.5773318822742524, + "learning_rate": 4.066908231371168e-06, + "loss": 0.2931, + "step": 13058 + }, + { + "epoch": 0.6117487234740244, + "grad_norm": 0.5634194942782159, + "learning_rate": 4.066760449624512e-06, + "loss": 0.2865, + "step": 13059 + }, + { + "epoch": 0.6117955684639528, + "grad_norm": 0.5863607960945341, + "learning_rate": 4.066612658861544e-06, + "loss": 0.2842, + "step": 13060 + }, + { + "epoch": 0.6118424134538811, + "grad_norm": 0.5698903135566927, + "learning_rate": 4.066464859083113e-06, + "loss": 0.2757, + "step": 13061 + }, + { + "epoch": 0.6118892584438095, + "grad_norm": 0.6023290965840501, + "learning_rate": 4.066317050290071e-06, + "loss": 0.2918, + "step": 13062 + }, + { + "epoch": 0.6119361034337377, + "grad_norm": 0.5791562928570316, + "learning_rate": 4.066169232483267e-06, + "loss": 0.2641, + "step": 13063 + }, + { + "epoch": 0.6119829484236661, + "grad_norm": 0.579432949209508, + "learning_rate": 4.066021405663554e-06, + "loss": 0.2529, + "step": 13064 + }, + { + "epoch": 0.6120297934135944, + "grad_norm": 0.602780699712448, + "learning_rate": 4.065873569831781e-06, + "loss": 0.2813, + "step": 13065 + }, + { + "epoch": 0.6120766384035228, + "grad_norm": 0.6045773225665275, + "learning_rate": 4.0657257249888e-06, + "loss": 0.2987, + "step": 13066 + }, + { + "epoch": 0.612123483393451, + "grad_norm": 0.5908870032449085, + "learning_rate": 4.065577871135459e-06, + "loss": 0.2757, + "step": 13067 + }, + { + "epoch": 0.6121703283833794, + "grad_norm": 0.5998276485754491, + "learning_rate": 4.065430008272611e-06, + "loss": 0.2854, + "step": 13068 + }, + { + "epoch": 0.6122171733733077, + "grad_norm": 0.5585640155300937, + "learning_rate": 4.0652821364011085e-06, + "loss": 0.2828, + "step": 13069 + }, + { + "epoch": 0.6122640183632361, + "grad_norm": 0.5967207607730767, + "learning_rate": 4.0651342555218e-06, + "loss": 0.3002, + "step": 13070 + }, + { + "epoch": 0.6123108633531644, + "grad_norm": 0.553304313686214, + "learning_rate": 4.064986365635537e-06, + "loss": 0.2832, + "step": 13071 + }, + { + "epoch": 0.6123577083430927, + "grad_norm": 0.5952523356769596, + "learning_rate": 4.0648384667431715e-06, + "loss": 0.2883, + "step": 13072 + }, + { + "epoch": 0.612404553333021, + "grad_norm": 0.5844685007100127, + "learning_rate": 4.064690558845553e-06, + "loss": 0.2943, + "step": 13073 + }, + { + "epoch": 0.6124513983229494, + "grad_norm": 0.5917482839126091, + "learning_rate": 4.064542641943534e-06, + "loss": 0.2633, + "step": 13074 + }, + { + "epoch": 0.6124982433128777, + "grad_norm": 0.5923914295004695, + "learning_rate": 4.064394716037965e-06, + "loss": 0.2821, + "step": 13075 + }, + { + "epoch": 0.612545088302806, + "grad_norm": 0.6074581372476252, + "learning_rate": 4.064246781129698e-06, + "loss": 0.2656, + "step": 13076 + }, + { + "epoch": 0.6125919332927343, + "grad_norm": 0.6338595353264059, + "learning_rate": 4.064098837219584e-06, + "loss": 0.2932, + "step": 13077 + }, + { + "epoch": 0.6126387782826627, + "grad_norm": 0.6484692722616779, + "learning_rate": 4.063950884308474e-06, + "loss": 0.2676, + "step": 13078 + }, + { + "epoch": 0.612685623272591, + "grad_norm": 0.55345298809484, + "learning_rate": 4.06380292239722e-06, + "loss": 0.2692, + "step": 13079 + }, + { + "epoch": 0.6127324682625194, + "grad_norm": 0.5885720086652988, + "learning_rate": 4.063654951486673e-06, + "loss": 0.2803, + "step": 13080 + }, + { + "epoch": 0.6127793132524476, + "grad_norm": 0.6729004150877516, + "learning_rate": 4.063506971577685e-06, + "loss": 0.2761, + "step": 13081 + }, + { + "epoch": 0.612826158242376, + "grad_norm": 0.5901382849988308, + "learning_rate": 4.063358982671107e-06, + "loss": 0.3081, + "step": 13082 + }, + { + "epoch": 0.6128730032323043, + "grad_norm": 0.5603498695143312, + "learning_rate": 4.063210984767792e-06, + "loss": 0.2776, + "step": 13083 + }, + { + "epoch": 0.6129198482222327, + "grad_norm": 0.6235407042268135, + "learning_rate": 4.063062977868589e-06, + "loss": 0.2989, + "step": 13084 + }, + { + "epoch": 0.6129666932121609, + "grad_norm": 0.6229136469533053, + "learning_rate": 4.062914961974352e-06, + "loss": 0.2886, + "step": 13085 + }, + { + "epoch": 0.6130135382020893, + "grad_norm": 0.626437476811794, + "learning_rate": 4.062766937085933e-06, + "loss": 0.3032, + "step": 13086 + }, + { + "epoch": 0.6130603831920176, + "grad_norm": 0.5852539237741115, + "learning_rate": 4.062618903204183e-06, + "loss": 0.2774, + "step": 13087 + }, + { + "epoch": 0.613107228181946, + "grad_norm": 0.568408110212546, + "learning_rate": 4.0624708603299535e-06, + "loss": 0.2805, + "step": 13088 + }, + { + "epoch": 0.6131540731718743, + "grad_norm": 0.6073580150751007, + "learning_rate": 4.062322808464097e-06, + "loss": 0.2734, + "step": 13089 + }, + { + "epoch": 0.6132009181618026, + "grad_norm": 0.6050606864147969, + "learning_rate": 4.062174747607466e-06, + "loss": 0.2543, + "step": 13090 + }, + { + "epoch": 0.6132477631517309, + "grad_norm": 0.5593023471185745, + "learning_rate": 4.06202667776091e-06, + "loss": 0.2626, + "step": 13091 + }, + { + "epoch": 0.6132946081416593, + "grad_norm": 0.6388158489560269, + "learning_rate": 4.061878598925285e-06, + "loss": 0.304, + "step": 13092 + }, + { + "epoch": 0.6133414531315876, + "grad_norm": 0.6308588616695481, + "learning_rate": 4.061730511101441e-06, + "loss": 0.2909, + "step": 13093 + }, + { + "epoch": 0.6133882981215159, + "grad_norm": 0.6111757265189935, + "learning_rate": 4.06158241429023e-06, + "loss": 0.2815, + "step": 13094 + }, + { + "epoch": 0.6134351431114442, + "grad_norm": 0.6380613764758886, + "learning_rate": 4.061434308492505e-06, + "loss": 0.2967, + "step": 13095 + }, + { + "epoch": 0.6134819881013726, + "grad_norm": 0.5905518025118454, + "learning_rate": 4.061286193709117e-06, + "loss": 0.2852, + "step": 13096 + }, + { + "epoch": 0.6135288330913009, + "grad_norm": 0.5961969970664257, + "learning_rate": 4.0611380699409205e-06, + "loss": 0.2774, + "step": 13097 + }, + { + "epoch": 0.6135756780812293, + "grad_norm": 0.6214116454211642, + "learning_rate": 4.060989937188766e-06, + "loss": 0.3009, + "step": 13098 + }, + { + "epoch": 0.6136225230711575, + "grad_norm": 0.58130205929219, + "learning_rate": 4.060841795453508e-06, + "loss": 0.2889, + "step": 13099 + }, + { + "epoch": 0.6136693680610859, + "grad_norm": 0.6028993854456686, + "learning_rate": 4.060693644735996e-06, + "loss": 0.2933, + "step": 13100 + }, + { + "epoch": 0.6137162130510142, + "grad_norm": 0.5889910686470107, + "learning_rate": 4.0605454850370866e-06, + "loss": 0.2667, + "step": 13101 + }, + { + "epoch": 0.6137630580409426, + "grad_norm": 0.6065342760247908, + "learning_rate": 4.0603973163576284e-06, + "loss": 0.2786, + "step": 13102 + }, + { + "epoch": 0.6138099030308708, + "grad_norm": 0.6584988160293884, + "learning_rate": 4.060249138698477e-06, + "loss": 0.3042, + "step": 13103 + }, + { + "epoch": 0.6138567480207991, + "grad_norm": 0.6315906918101415, + "learning_rate": 4.060100952060483e-06, + "loss": 0.3069, + "step": 13104 + }, + { + "epoch": 0.6139035930107275, + "grad_norm": 0.573242952936321, + "learning_rate": 4.059952756444499e-06, + "loss": 0.275, + "step": 13105 + }, + { + "epoch": 0.6139504380006559, + "grad_norm": 0.5935512268918307, + "learning_rate": 4.059804551851381e-06, + "loss": 0.2658, + "step": 13106 + }, + { + "epoch": 0.6139972829905842, + "grad_norm": 0.5378821552136505, + "learning_rate": 4.05965633828198e-06, + "loss": 0.2654, + "step": 13107 + }, + { + "epoch": 0.6140441279805124, + "grad_norm": 0.5910841998772566, + "learning_rate": 4.059508115737147e-06, + "loss": 0.2881, + "step": 13108 + }, + { + "epoch": 0.6140909729704408, + "grad_norm": 0.6062342952609346, + "learning_rate": 4.0593598842177385e-06, + "loss": 0.2709, + "step": 13109 + }, + { + "epoch": 0.6141378179603691, + "grad_norm": 0.6539345684112634, + "learning_rate": 4.0592116437246045e-06, + "loss": 0.2986, + "step": 13110 + }, + { + "epoch": 0.6141846629502975, + "grad_norm": 0.5760095821817329, + "learning_rate": 4.059063394258601e-06, + "loss": 0.2852, + "step": 13111 + }, + { + "epoch": 0.6142315079402257, + "grad_norm": 0.6225202698913437, + "learning_rate": 4.058915135820578e-06, + "loss": 0.3099, + "step": 13112 + }, + { + "epoch": 0.6142783529301541, + "grad_norm": 0.6326705691559416, + "learning_rate": 4.05876686841139e-06, + "loss": 0.2886, + "step": 13113 + }, + { + "epoch": 0.6143251979200824, + "grad_norm": 0.5804247023473059, + "learning_rate": 4.058618592031892e-06, + "loss": 0.2697, + "step": 13114 + }, + { + "epoch": 0.6143720429100108, + "grad_norm": 0.5894059835483142, + "learning_rate": 4.0584703066829346e-06, + "loss": 0.2958, + "step": 13115 + }, + { + "epoch": 0.6144188878999391, + "grad_norm": 0.550754110974534, + "learning_rate": 4.058322012365372e-06, + "loss": 0.2685, + "step": 13116 + }, + { + "epoch": 0.6144657328898674, + "grad_norm": 0.6508224495857076, + "learning_rate": 4.058173709080059e-06, + "loss": 0.3023, + "step": 13117 + }, + { + "epoch": 0.6145125778797957, + "grad_norm": 0.5654641088094539, + "learning_rate": 4.058025396827847e-06, + "loss": 0.2726, + "step": 13118 + }, + { + "epoch": 0.6145594228697241, + "grad_norm": 0.5452426774422408, + "learning_rate": 4.057877075609591e-06, + "loss": 0.292, + "step": 13119 + }, + { + "epoch": 0.6146062678596524, + "grad_norm": 0.6489149810279923, + "learning_rate": 4.057728745426144e-06, + "loss": 0.2745, + "step": 13120 + }, + { + "epoch": 0.6146531128495807, + "grad_norm": 0.5860512268099445, + "learning_rate": 4.05758040627836e-06, + "loss": 0.3075, + "step": 13121 + }, + { + "epoch": 0.614699957839509, + "grad_norm": 0.5808367119170821, + "learning_rate": 4.057432058167091e-06, + "loss": 0.2872, + "step": 13122 + }, + { + "epoch": 0.6147468028294374, + "grad_norm": 0.612463850742571, + "learning_rate": 4.057283701093192e-06, + "loss": 0.2743, + "step": 13123 + }, + { + "epoch": 0.6147936478193657, + "grad_norm": 0.5634818903882499, + "learning_rate": 4.057135335057518e-06, + "loss": 0.2873, + "step": 13124 + }, + { + "epoch": 0.6148404928092941, + "grad_norm": 0.5585003477481413, + "learning_rate": 4.056986960060921e-06, + "loss": 0.2727, + "step": 13125 + }, + { + "epoch": 0.6148873377992223, + "grad_norm": 0.5465259154061176, + "learning_rate": 4.056838576104255e-06, + "loss": 0.2843, + "step": 13126 + }, + { + "epoch": 0.6149341827891507, + "grad_norm": 0.5811166855448198, + "learning_rate": 4.056690183188374e-06, + "loss": 0.2673, + "step": 13127 + }, + { + "epoch": 0.614981027779079, + "grad_norm": 0.5731599781545649, + "learning_rate": 4.056541781314133e-06, + "loss": 0.2734, + "step": 13128 + }, + { + "epoch": 0.6150278727690074, + "grad_norm": 0.6293458740201123, + "learning_rate": 4.0563933704823845e-06, + "loss": 0.2967, + "step": 13129 + }, + { + "epoch": 0.6150747177589356, + "grad_norm": 0.5784125922665166, + "learning_rate": 4.056244950693985e-06, + "loss": 0.2745, + "step": 13130 + }, + { + "epoch": 0.615121562748864, + "grad_norm": 0.5422976607305449, + "learning_rate": 4.056096521949785e-06, + "loss": 0.2766, + "step": 13131 + }, + { + "epoch": 0.6151684077387923, + "grad_norm": 0.5669101832758573, + "learning_rate": 4.055948084250641e-06, + "loss": 0.2704, + "step": 13132 + }, + { + "epoch": 0.6152152527287207, + "grad_norm": 0.6113726770586912, + "learning_rate": 4.0557996375974086e-06, + "loss": 0.2819, + "step": 13133 + }, + { + "epoch": 0.615262097718649, + "grad_norm": 0.5855649187768116, + "learning_rate": 4.0556511819909386e-06, + "loss": 0.2791, + "step": 13134 + }, + { + "epoch": 0.6153089427085773, + "grad_norm": 0.6127305217511161, + "learning_rate": 4.055502717432087e-06, + "loss": 0.2933, + "step": 13135 + }, + { + "epoch": 0.6153557876985056, + "grad_norm": 0.5520643688338251, + "learning_rate": 4.055354243921709e-06, + "loss": 0.2667, + "step": 13136 + }, + { + "epoch": 0.615402632688434, + "grad_norm": 0.5500480720470187, + "learning_rate": 4.055205761460658e-06, + "loss": 0.2462, + "step": 13137 + }, + { + "epoch": 0.6154494776783623, + "grad_norm": 0.5564454542522564, + "learning_rate": 4.055057270049788e-06, + "loss": 0.2741, + "step": 13138 + }, + { + "epoch": 0.6154963226682906, + "grad_norm": 0.6084066388429227, + "learning_rate": 4.054908769689956e-06, + "loss": 0.2989, + "step": 13139 + }, + { + "epoch": 0.6155431676582189, + "grad_norm": 0.5949193462762835, + "learning_rate": 4.0547602603820135e-06, + "loss": 0.2985, + "step": 13140 + }, + { + "epoch": 0.6155900126481473, + "grad_norm": 0.5935442927709771, + "learning_rate": 4.054611742126816e-06, + "loss": 0.2682, + "step": 13141 + }, + { + "epoch": 0.6156368576380756, + "grad_norm": 0.6353531413145915, + "learning_rate": 4.0544632149252205e-06, + "loss": 0.2877, + "step": 13142 + }, + { + "epoch": 0.615683702628004, + "grad_norm": 0.5268753621962375, + "learning_rate": 4.054314678778078e-06, + "loss": 0.2798, + "step": 13143 + }, + { + "epoch": 0.6157305476179322, + "grad_norm": 0.5723275293082721, + "learning_rate": 4.054166133686246e-06, + "loss": 0.2683, + "step": 13144 + }, + { + "epoch": 0.6157773926078606, + "grad_norm": 0.6175508937269673, + "learning_rate": 4.054017579650579e-06, + "loss": 0.2779, + "step": 13145 + }, + { + "epoch": 0.6158242375977889, + "grad_norm": 0.6694984207333802, + "learning_rate": 4.053869016671931e-06, + "loss": 0.2784, + "step": 13146 + }, + { + "epoch": 0.6158710825877173, + "grad_norm": 0.6262916390196217, + "learning_rate": 4.053720444751157e-06, + "loss": 0.2973, + "step": 13147 + }, + { + "epoch": 0.6159179275776455, + "grad_norm": 0.5651945089193916, + "learning_rate": 4.053571863889113e-06, + "loss": 0.2696, + "step": 13148 + }, + { + "epoch": 0.6159647725675739, + "grad_norm": 0.6139454084906069, + "learning_rate": 4.053423274086653e-06, + "loss": 0.2772, + "step": 13149 + }, + { + "epoch": 0.6160116175575022, + "grad_norm": 0.610118783154947, + "learning_rate": 4.053274675344632e-06, + "loss": 0.2749, + "step": 13150 + }, + { + "epoch": 0.6160584625474306, + "grad_norm": 0.6251685183717997, + "learning_rate": 4.053126067663907e-06, + "loss": 0.2943, + "step": 13151 + }, + { + "epoch": 0.6161053075373589, + "grad_norm": 0.5667668357745084, + "learning_rate": 4.05297745104533e-06, + "loss": 0.2691, + "step": 13152 + }, + { + "epoch": 0.6161521525272872, + "grad_norm": 0.6416077341190849, + "learning_rate": 4.05282882548976e-06, + "loss": 0.2843, + "step": 13153 + }, + { + "epoch": 0.6161989975172155, + "grad_norm": 0.6114419227565597, + "learning_rate": 4.052680190998049e-06, + "loss": 0.2984, + "step": 13154 + }, + { + "epoch": 0.6162458425071439, + "grad_norm": 0.654523383627141, + "learning_rate": 4.052531547571054e-06, + "loss": 0.2889, + "step": 13155 + }, + { + "epoch": 0.6162926874970722, + "grad_norm": 0.610829354934864, + "learning_rate": 4.052382895209631e-06, + "loss": 0.295, + "step": 13156 + }, + { + "epoch": 0.6163395324870005, + "grad_norm": 0.5703427818366418, + "learning_rate": 4.052234233914634e-06, + "loss": 0.2813, + "step": 13157 + }, + { + "epoch": 0.6163863774769288, + "grad_norm": 0.5905639097506717, + "learning_rate": 4.052085563686919e-06, + "loss": 0.2919, + "step": 13158 + }, + { + "epoch": 0.6164332224668572, + "grad_norm": 0.6136097638969532, + "learning_rate": 4.051936884527342e-06, + "loss": 0.2902, + "step": 13159 + }, + { + "epoch": 0.6164800674567855, + "grad_norm": 0.6259669224917656, + "learning_rate": 4.051788196436758e-06, + "loss": 0.2842, + "step": 13160 + }, + { + "epoch": 0.6165269124467139, + "grad_norm": 0.5573812754629027, + "learning_rate": 4.051639499416023e-06, + "loss": 0.2795, + "step": 13161 + }, + { + "epoch": 0.6165737574366421, + "grad_norm": 0.6005473321514582, + "learning_rate": 4.051490793465993e-06, + "loss": 0.2869, + "step": 13162 + }, + { + "epoch": 0.6166206024265705, + "grad_norm": 0.6126198177553541, + "learning_rate": 4.051342078587523e-06, + "loss": 0.2831, + "step": 13163 + }, + { + "epoch": 0.6166674474164988, + "grad_norm": 0.625599106182029, + "learning_rate": 4.05119335478147e-06, + "loss": 0.275, + "step": 13164 + }, + { + "epoch": 0.6167142924064272, + "grad_norm": 0.516749269236419, + "learning_rate": 4.051044622048688e-06, + "loss": 0.2367, + "step": 13165 + }, + { + "epoch": 0.6167611373963554, + "grad_norm": 0.5778202817386666, + "learning_rate": 4.050895880390034e-06, + "loss": 0.2717, + "step": 13166 + }, + { + "epoch": 0.6168079823862838, + "grad_norm": 0.5874041410148149, + "learning_rate": 4.050747129806365e-06, + "loss": 0.2824, + "step": 13167 + }, + { + "epoch": 0.6168548273762121, + "grad_norm": 0.6907967479162668, + "learning_rate": 4.050598370298536e-06, + "loss": 0.293, + "step": 13168 + }, + { + "epoch": 0.6169016723661405, + "grad_norm": 0.628596545159779, + "learning_rate": 4.050449601867403e-06, + "loss": 0.2597, + "step": 13169 + }, + { + "epoch": 0.6169485173560688, + "grad_norm": 0.623034331448415, + "learning_rate": 4.050300824513822e-06, + "loss": 0.2798, + "step": 13170 + }, + { + "epoch": 0.6169953623459971, + "grad_norm": 0.6070886684161432, + "learning_rate": 4.0501520382386495e-06, + "loss": 0.2946, + "step": 13171 + }, + { + "epoch": 0.6170422073359254, + "grad_norm": 0.6226265221873097, + "learning_rate": 4.0500032430427415e-06, + "loss": 0.2557, + "step": 13172 + }, + { + "epoch": 0.6170890523258538, + "grad_norm": 0.5917257677687215, + "learning_rate": 4.049854438926955e-06, + "loss": 0.2733, + "step": 13173 + }, + { + "epoch": 0.6171358973157821, + "grad_norm": 0.6057651281799741, + "learning_rate": 4.049705625892144e-06, + "loss": 0.2909, + "step": 13174 + }, + { + "epoch": 0.6171827423057104, + "grad_norm": 0.5779699319492485, + "learning_rate": 4.049556803939168e-06, + "loss": 0.2727, + "step": 13175 + }, + { + "epoch": 0.6172295872956387, + "grad_norm": 0.5677759218065007, + "learning_rate": 4.049407973068882e-06, + "loss": 0.2683, + "step": 13176 + }, + { + "epoch": 0.6172764322855671, + "grad_norm": 0.5516423204079359, + "learning_rate": 4.049259133282143e-06, + "loss": 0.2849, + "step": 13177 + }, + { + "epoch": 0.6173232772754954, + "grad_norm": 0.66091453741401, + "learning_rate": 4.049110284579806e-06, + "loss": 0.2834, + "step": 13178 + }, + { + "epoch": 0.6173701222654238, + "grad_norm": 0.5730817838967276, + "learning_rate": 4.048961426962729e-06, + "loss": 0.2481, + "step": 13179 + }, + { + "epoch": 0.617416967255352, + "grad_norm": 0.6070705466706768, + "learning_rate": 4.048812560431768e-06, + "loss": 0.2757, + "step": 13180 + }, + { + "epoch": 0.6174638122452804, + "grad_norm": 0.6267361114538579, + "learning_rate": 4.048663684987782e-06, + "loss": 0.2777, + "step": 13181 + }, + { + "epoch": 0.6175106572352087, + "grad_norm": 0.5659589141226591, + "learning_rate": 4.048514800631623e-06, + "loss": 0.2664, + "step": 13182 + }, + { + "epoch": 0.6175575022251371, + "grad_norm": 0.5914610968299775, + "learning_rate": 4.048365907364152e-06, + "loss": 0.2903, + "step": 13183 + }, + { + "epoch": 0.6176043472150653, + "grad_norm": 0.6150076732226972, + "learning_rate": 4.048217005186225e-06, + "loss": 0.284, + "step": 13184 + }, + { + "epoch": 0.6176511922049936, + "grad_norm": 0.5774474619779092, + "learning_rate": 4.048068094098697e-06, + "loss": 0.2784, + "step": 13185 + }, + { + "epoch": 0.617698037194922, + "grad_norm": 0.544632883998311, + "learning_rate": 4.047919174102426e-06, + "loss": 0.2616, + "step": 13186 + }, + { + "epoch": 0.6177448821848504, + "grad_norm": 0.6389931726004094, + "learning_rate": 4.04777024519827e-06, + "loss": 0.2819, + "step": 13187 + }, + { + "epoch": 0.6177917271747787, + "grad_norm": 0.5782132116966588, + "learning_rate": 4.047621307387085e-06, + "loss": 0.293, + "step": 13188 + }, + { + "epoch": 0.617838572164707, + "grad_norm": 0.5645695866037247, + "learning_rate": 4.047472360669729e-06, + "loss": 0.2713, + "step": 13189 + }, + { + "epoch": 0.6178854171546353, + "grad_norm": 0.5928392988137012, + "learning_rate": 4.047323405047057e-06, + "loss": 0.2908, + "step": 13190 + }, + { + "epoch": 0.6179322621445636, + "grad_norm": 0.5782844215823466, + "learning_rate": 4.04717444051993e-06, + "loss": 0.2985, + "step": 13191 + }, + { + "epoch": 0.617979107134492, + "grad_norm": 0.555847763365611, + "learning_rate": 4.047025467089201e-06, + "loss": 0.2729, + "step": 13192 + }, + { + "epoch": 0.6180259521244202, + "grad_norm": 0.5660544349709572, + "learning_rate": 4.0468764847557296e-06, + "loss": 0.2849, + "step": 13193 + }, + { + "epoch": 0.6180727971143486, + "grad_norm": 0.5835328569908291, + "learning_rate": 4.046727493520373e-06, + "loss": 0.2752, + "step": 13194 + }, + { + "epoch": 0.618119642104277, + "grad_norm": 0.5694454997310537, + "learning_rate": 4.046578493383988e-06, + "loss": 0.2756, + "step": 13195 + }, + { + "epoch": 0.6181664870942053, + "grad_norm": 0.5904368728727739, + "learning_rate": 4.046429484347434e-06, + "loss": 0.2831, + "step": 13196 + }, + { + "epoch": 0.6182133320841336, + "grad_norm": 0.5598642739341495, + "learning_rate": 4.0462804664115655e-06, + "loss": 0.2742, + "step": 13197 + }, + { + "epoch": 0.6182601770740619, + "grad_norm": 0.6069543220676024, + "learning_rate": 4.046131439577242e-06, + "loss": 0.2982, + "step": 13198 + }, + { + "epoch": 0.6183070220639902, + "grad_norm": 0.6365011220409182, + "learning_rate": 4.045982403845321e-06, + "loss": 0.2925, + "step": 13199 + }, + { + "epoch": 0.6183538670539186, + "grad_norm": 0.6199844600113881, + "learning_rate": 4.04583335921666e-06, + "loss": 0.2872, + "step": 13200 + }, + { + "epoch": 0.618400712043847, + "grad_norm": 0.6378146145571534, + "learning_rate": 4.045684305692116e-06, + "loss": 0.2873, + "step": 13201 + }, + { + "epoch": 0.6184475570337752, + "grad_norm": 0.6170289803411838, + "learning_rate": 4.045535243272547e-06, + "loss": 0.2889, + "step": 13202 + }, + { + "epoch": 0.6184944020237035, + "grad_norm": 0.5654421320954804, + "learning_rate": 4.045386171958812e-06, + "loss": 0.2808, + "step": 13203 + }, + { + "epoch": 0.6185412470136319, + "grad_norm": 0.5431531405803762, + "learning_rate": 4.045237091751767e-06, + "loss": 0.2777, + "step": 13204 + }, + { + "epoch": 0.6185880920035602, + "grad_norm": 0.6095110059088817, + "learning_rate": 4.045088002652271e-06, + "loss": 0.3049, + "step": 13205 + }, + { + "epoch": 0.6186349369934886, + "grad_norm": 0.5876581611190079, + "learning_rate": 4.0449389046611834e-06, + "loss": 0.2892, + "step": 13206 + }, + { + "epoch": 0.6186817819834168, + "grad_norm": 0.6014360756459092, + "learning_rate": 4.044789797779359e-06, + "loss": 0.2853, + "step": 13207 + }, + { + "epoch": 0.6187286269733452, + "grad_norm": 0.5996393764249405, + "learning_rate": 4.044640682007658e-06, + "loss": 0.3007, + "step": 13208 + }, + { + "epoch": 0.6187754719632735, + "grad_norm": 0.6100171228145032, + "learning_rate": 4.044491557346939e-06, + "loss": 0.2978, + "step": 13209 + }, + { + "epoch": 0.6188223169532019, + "grad_norm": 0.6215272258507588, + "learning_rate": 4.044342423798059e-06, + "loss": 0.2814, + "step": 13210 + }, + { + "epoch": 0.6188691619431301, + "grad_norm": 0.6399610155116849, + "learning_rate": 4.044193281361876e-06, + "loss": 0.2771, + "step": 13211 + }, + { + "epoch": 0.6189160069330585, + "grad_norm": 0.6311160589692192, + "learning_rate": 4.0440441300392485e-06, + "loss": 0.3019, + "step": 13212 + }, + { + "epoch": 0.6189628519229868, + "grad_norm": 0.5992835616643115, + "learning_rate": 4.043894969831036e-06, + "loss": 0.2873, + "step": 13213 + }, + { + "epoch": 0.6190096969129152, + "grad_norm": 0.5995786962732271, + "learning_rate": 4.043745800738094e-06, + "loss": 0.2859, + "step": 13214 + }, + { + "epoch": 0.6190565419028435, + "grad_norm": 0.6101477822569827, + "learning_rate": 4.043596622761286e-06, + "loss": 0.2931, + "step": 13215 + }, + { + "epoch": 0.6191033868927718, + "grad_norm": 0.5499091476809774, + "learning_rate": 4.043447435901465e-06, + "loss": 0.2745, + "step": 13216 + }, + { + "epoch": 0.6191502318827001, + "grad_norm": 0.5970409982207417, + "learning_rate": 4.043298240159493e-06, + "loss": 0.2703, + "step": 13217 + }, + { + "epoch": 0.6191970768726285, + "grad_norm": 0.586092858636032, + "learning_rate": 4.043149035536228e-06, + "loss": 0.2616, + "step": 13218 + }, + { + "epoch": 0.6192439218625568, + "grad_norm": 0.5702608248598806, + "learning_rate": 4.042999822032527e-06, + "loss": 0.2494, + "step": 13219 + }, + { + "epoch": 0.6192907668524851, + "grad_norm": 0.5791605052148523, + "learning_rate": 4.04285059964925e-06, + "loss": 0.3002, + "step": 13220 + }, + { + "epoch": 0.6193376118424134, + "grad_norm": 0.5911741496706963, + "learning_rate": 4.042701368387256e-06, + "loss": 0.2701, + "step": 13221 + }, + { + "epoch": 0.6193844568323418, + "grad_norm": 0.6521935066773877, + "learning_rate": 4.042552128247403e-06, + "loss": 0.3066, + "step": 13222 + }, + { + "epoch": 0.6194313018222701, + "grad_norm": 0.5731883487793089, + "learning_rate": 4.042402879230551e-06, + "loss": 0.2904, + "step": 13223 + }, + { + "epoch": 0.6194781468121985, + "grad_norm": 0.5512355200219353, + "learning_rate": 4.0422536213375575e-06, + "loss": 0.2518, + "step": 13224 + }, + { + "epoch": 0.6195249918021267, + "grad_norm": 0.646275688916999, + "learning_rate": 4.0421043545692825e-06, + "loss": 0.3137, + "step": 13225 + }, + { + "epoch": 0.6195718367920551, + "grad_norm": 0.6063952933595378, + "learning_rate": 4.041955078926584e-06, + "loss": 0.2762, + "step": 13226 + }, + { + "epoch": 0.6196186817819834, + "grad_norm": 0.5868032850550389, + "learning_rate": 4.041805794410322e-06, + "loss": 0.3001, + "step": 13227 + }, + { + "epoch": 0.6196655267719118, + "grad_norm": 0.6268318695288476, + "learning_rate": 4.041656501021355e-06, + "loss": 0.2913, + "step": 13228 + }, + { + "epoch": 0.61971237176184, + "grad_norm": 0.591814494178162, + "learning_rate": 4.041507198760542e-06, + "loss": 0.2864, + "step": 13229 + }, + { + "epoch": 0.6197592167517684, + "grad_norm": 0.623757370529589, + "learning_rate": 4.041357887628743e-06, + "loss": 0.261, + "step": 13230 + }, + { + "epoch": 0.6198060617416967, + "grad_norm": 0.5719728443631423, + "learning_rate": 4.041208567626816e-06, + "loss": 0.2736, + "step": 13231 + }, + { + "epoch": 0.6198529067316251, + "grad_norm": 0.6414792935682343, + "learning_rate": 4.041059238755621e-06, + "loss": 0.2906, + "step": 13232 + }, + { + "epoch": 0.6198997517215534, + "grad_norm": 0.6177007727741435, + "learning_rate": 4.040909901016018e-06, + "loss": 0.2891, + "step": 13233 + }, + { + "epoch": 0.6199465967114817, + "grad_norm": 0.6150763052523521, + "learning_rate": 4.0407605544088655e-06, + "loss": 0.3194, + "step": 13234 + }, + { + "epoch": 0.61999344170141, + "grad_norm": 0.5681171406873387, + "learning_rate": 4.040611198935024e-06, + "loss": 0.2893, + "step": 13235 + }, + { + "epoch": 0.6200402866913384, + "grad_norm": 0.5647701935329178, + "learning_rate": 4.0404618345953514e-06, + "loss": 0.2828, + "step": 13236 + }, + { + "epoch": 0.6200871316812667, + "grad_norm": 0.6476755996304028, + "learning_rate": 4.0403124613907095e-06, + "loss": 0.3047, + "step": 13237 + }, + { + "epoch": 0.620133976671195, + "grad_norm": 0.6059742611181734, + "learning_rate": 4.040163079321955e-06, + "loss": 0.2732, + "step": 13238 + }, + { + "epoch": 0.6201808216611233, + "grad_norm": 0.5493829876490997, + "learning_rate": 4.04001368838995e-06, + "loss": 0.2666, + "step": 13239 + }, + { + "epoch": 0.6202276666510517, + "grad_norm": 0.5863847396784103, + "learning_rate": 4.039864288595553e-06, + "loss": 0.2853, + "step": 13240 + }, + { + "epoch": 0.62027451164098, + "grad_norm": 0.5811468927710494, + "learning_rate": 4.039714879939624e-06, + "loss": 0.2692, + "step": 13241 + }, + { + "epoch": 0.6203213566309084, + "grad_norm": 0.6501472519323865, + "learning_rate": 4.039565462423023e-06, + "loss": 0.2997, + "step": 13242 + }, + { + "epoch": 0.6203682016208366, + "grad_norm": 0.6048248436128041, + "learning_rate": 4.03941603604661e-06, + "loss": 0.2964, + "step": 13243 + }, + { + "epoch": 0.620415046610765, + "grad_norm": 0.5884526947516, + "learning_rate": 4.039266600811244e-06, + "loss": 0.2846, + "step": 13244 + }, + { + "epoch": 0.6204618916006933, + "grad_norm": 0.5962222010538933, + "learning_rate": 4.039117156717786e-06, + "loss": 0.287, + "step": 13245 + }, + { + "epoch": 0.6205087365906217, + "grad_norm": 0.5694716512018283, + "learning_rate": 4.038967703767095e-06, + "loss": 0.2744, + "step": 13246 + }, + { + "epoch": 0.6205555815805499, + "grad_norm": 0.6629633895015735, + "learning_rate": 4.038818241960032e-06, + "loss": 0.3086, + "step": 13247 + }, + { + "epoch": 0.6206024265704783, + "grad_norm": 0.5798062461650462, + "learning_rate": 4.038668771297456e-06, + "loss": 0.2709, + "step": 13248 + }, + { + "epoch": 0.6206492715604066, + "grad_norm": 0.5824406894424182, + "learning_rate": 4.038519291780229e-06, + "loss": 0.2682, + "step": 13249 + }, + { + "epoch": 0.620696116550335, + "grad_norm": 0.5801160033500203, + "learning_rate": 4.03836980340921e-06, + "loss": 0.2827, + "step": 13250 + }, + { + "epoch": 0.6207429615402633, + "grad_norm": 0.6361905614452694, + "learning_rate": 4.03822030618526e-06, + "loss": 0.2942, + "step": 13251 + }, + { + "epoch": 0.6207898065301916, + "grad_norm": 0.6206368705895141, + "learning_rate": 4.038070800109237e-06, + "loss": 0.2815, + "step": 13252 + }, + { + "epoch": 0.6208366515201199, + "grad_norm": 0.6398354724061395, + "learning_rate": 4.037921285182005e-06, + "loss": 0.2862, + "step": 13253 + }, + { + "epoch": 0.6208834965100483, + "grad_norm": 0.6768901880268648, + "learning_rate": 4.037771761404422e-06, + "loss": 0.3062, + "step": 13254 + }, + { + "epoch": 0.6209303414999766, + "grad_norm": 0.5713956057764803, + "learning_rate": 4.037622228777349e-06, + "loss": 0.2849, + "step": 13255 + }, + { + "epoch": 0.6209771864899049, + "grad_norm": 0.643130948614081, + "learning_rate": 4.037472687301646e-06, + "loss": 0.3138, + "step": 13256 + }, + { + "epoch": 0.6210240314798332, + "grad_norm": 0.6259390308546399, + "learning_rate": 4.037323136978174e-06, + "loss": 0.3019, + "step": 13257 + }, + { + "epoch": 0.6210708764697616, + "grad_norm": 0.5668583925545715, + "learning_rate": 4.037173577807794e-06, + "loss": 0.2739, + "step": 13258 + }, + { + "epoch": 0.6211177214596899, + "grad_norm": 0.6020141573439189, + "learning_rate": 4.0370240097913674e-06, + "loss": 0.2792, + "step": 13259 + }, + { + "epoch": 0.6211645664496183, + "grad_norm": 0.5925612971776925, + "learning_rate": 4.036874432929752e-06, + "loss": 0.274, + "step": 13260 + }, + { + "epoch": 0.6212114114395465, + "grad_norm": 0.6651124460381679, + "learning_rate": 4.036724847223813e-06, + "loss": 0.2947, + "step": 13261 + }, + { + "epoch": 0.6212582564294749, + "grad_norm": 0.6022630314670033, + "learning_rate": 4.036575252674408e-06, + "loss": 0.2889, + "step": 13262 + }, + { + "epoch": 0.6213051014194032, + "grad_norm": 0.6282341404721953, + "learning_rate": 4.036425649282398e-06, + "loss": 0.2933, + "step": 13263 + }, + { + "epoch": 0.6213519464093316, + "grad_norm": 0.566132630811011, + "learning_rate": 4.036276037048644e-06, + "loss": 0.296, + "step": 13264 + }, + { + "epoch": 0.6213987913992598, + "grad_norm": 0.613006137454145, + "learning_rate": 4.036126415974008e-06, + "loss": 0.302, + "step": 13265 + }, + { + "epoch": 0.6214456363891881, + "grad_norm": 0.6175046745190216, + "learning_rate": 4.035976786059351e-06, + "loss": 0.2704, + "step": 13266 + }, + { + "epoch": 0.6214924813791165, + "grad_norm": 0.5419081003196949, + "learning_rate": 4.035827147305533e-06, + "loss": 0.259, + "step": 13267 + }, + { + "epoch": 0.6215393263690449, + "grad_norm": 0.6721563641051311, + "learning_rate": 4.035677499713416e-06, + "loss": 0.3105, + "step": 13268 + }, + { + "epoch": 0.6215861713589732, + "grad_norm": 0.6191072739688703, + "learning_rate": 4.035527843283861e-06, + "loss": 0.2929, + "step": 13269 + }, + { + "epoch": 0.6216330163489014, + "grad_norm": 0.5624959461437895, + "learning_rate": 4.035378178017731e-06, + "loss": 0.2904, + "step": 13270 + }, + { + "epoch": 0.6216798613388298, + "grad_norm": 0.6412288668608954, + "learning_rate": 4.035228503915882e-06, + "loss": 0.2871, + "step": 13271 + }, + { + "epoch": 0.6217267063287581, + "grad_norm": 0.5279634526334848, + "learning_rate": 4.0350788209791815e-06, + "loss": 0.2638, + "step": 13272 + }, + { + "epoch": 0.6217735513186865, + "grad_norm": 0.5688997156961703, + "learning_rate": 4.034929129208487e-06, + "loss": 0.2843, + "step": 13273 + }, + { + "epoch": 0.6218203963086147, + "grad_norm": 0.6164980853263795, + "learning_rate": 4.0347794286046615e-06, + "loss": 0.2997, + "step": 13274 + }, + { + "epoch": 0.6218672412985431, + "grad_norm": 0.568439509310745, + "learning_rate": 4.0346297191685665e-06, + "loss": 0.2892, + "step": 13275 + }, + { + "epoch": 0.6219140862884714, + "grad_norm": 0.5634063676790544, + "learning_rate": 4.034480000901063e-06, + "loss": 0.2702, + "step": 13276 + }, + { + "epoch": 0.6219609312783998, + "grad_norm": 0.5505184596820392, + "learning_rate": 4.034330273803012e-06, + "loss": 0.2612, + "step": 13277 + }, + { + "epoch": 0.6220077762683281, + "grad_norm": 0.6005706749546472, + "learning_rate": 4.034180537875277e-06, + "loss": 0.2812, + "step": 13278 + }, + { + "epoch": 0.6220546212582564, + "grad_norm": 0.5492809317827659, + "learning_rate": 4.0340307931187185e-06, + "loss": 0.2552, + "step": 13279 + }, + { + "epoch": 0.6221014662481847, + "grad_norm": 0.6504173091700931, + "learning_rate": 4.033881039534197e-06, + "loss": 0.2826, + "step": 13280 + }, + { + "epoch": 0.6221483112381131, + "grad_norm": 0.5741158435122712, + "learning_rate": 4.0337312771225765e-06, + "loss": 0.2957, + "step": 13281 + }, + { + "epoch": 0.6221951562280414, + "grad_norm": 0.6246315535910725, + "learning_rate": 4.033581505884719e-06, + "loss": 0.2978, + "step": 13282 + }, + { + "epoch": 0.6222420012179697, + "grad_norm": 0.5519750765805076, + "learning_rate": 4.033431725821484e-06, + "loss": 0.2695, + "step": 13283 + }, + { + "epoch": 0.622288846207898, + "grad_norm": 0.5773507176995251, + "learning_rate": 4.033281936933735e-06, + "loss": 0.2797, + "step": 13284 + }, + { + "epoch": 0.6223356911978264, + "grad_norm": 0.5666961928475952, + "learning_rate": 4.0331321392223345e-06, + "loss": 0.2694, + "step": 13285 + }, + { + "epoch": 0.6223825361877547, + "grad_norm": 0.5297534764553697, + "learning_rate": 4.032982332688143e-06, + "loss": 0.2725, + "step": 13286 + }, + { + "epoch": 0.6224293811776831, + "grad_norm": 0.5598290989637442, + "learning_rate": 4.032832517332025e-06, + "loss": 0.2795, + "step": 13287 + }, + { + "epoch": 0.6224762261676113, + "grad_norm": 0.5608158261624988, + "learning_rate": 4.03268269315484e-06, + "loss": 0.2855, + "step": 13288 + }, + { + "epoch": 0.6225230711575397, + "grad_norm": 0.6345026399432997, + "learning_rate": 4.032532860157452e-06, + "loss": 0.2761, + "step": 13289 + }, + { + "epoch": 0.622569916147468, + "grad_norm": 0.6130272821682468, + "learning_rate": 4.032383018340721e-06, + "loss": 0.2954, + "step": 13290 + }, + { + "epoch": 0.6226167611373964, + "grad_norm": 0.5766194187037184, + "learning_rate": 4.032233167705513e-06, + "loss": 0.2668, + "step": 13291 + }, + { + "epoch": 0.6226636061273246, + "grad_norm": 0.5613422676147886, + "learning_rate": 4.032083308252687e-06, + "loss": 0.2823, + "step": 13292 + }, + { + "epoch": 0.622710451117253, + "grad_norm": 0.64812797162174, + "learning_rate": 4.031933439983106e-06, + "loss": 0.301, + "step": 13293 + }, + { + "epoch": 0.6227572961071813, + "grad_norm": 0.576112497914643, + "learning_rate": 4.031783562897634e-06, + "loss": 0.2879, + "step": 13294 + }, + { + "epoch": 0.6228041410971097, + "grad_norm": 0.5990644942505313, + "learning_rate": 4.0316336769971325e-06, + "loss": 0.2696, + "step": 13295 + }, + { + "epoch": 0.622850986087038, + "grad_norm": 0.6108309478641751, + "learning_rate": 4.0314837822824636e-06, + "loss": 0.2842, + "step": 13296 + }, + { + "epoch": 0.6228978310769663, + "grad_norm": 0.5742872926230252, + "learning_rate": 4.031333878754492e-06, + "loss": 0.2769, + "step": 13297 + }, + { + "epoch": 0.6229446760668946, + "grad_norm": 0.6427017236547228, + "learning_rate": 4.031183966414077e-06, + "loss": 0.304, + "step": 13298 + }, + { + "epoch": 0.622991521056823, + "grad_norm": 0.6441400771663387, + "learning_rate": 4.0310340452620845e-06, + "loss": 0.2987, + "step": 13299 + }, + { + "epoch": 0.6230383660467513, + "grad_norm": 0.6136633314941818, + "learning_rate": 4.030884115299375e-06, + "loss": 0.2714, + "step": 13300 + }, + { + "epoch": 0.6230852110366796, + "grad_norm": 0.6383268604731301, + "learning_rate": 4.030734176526813e-06, + "loss": 0.298, + "step": 13301 + }, + { + "epoch": 0.6231320560266079, + "grad_norm": 0.592658744249545, + "learning_rate": 4.030584228945259e-06, + "loss": 0.3023, + "step": 13302 + }, + { + "epoch": 0.6231789010165363, + "grad_norm": 0.5389137625581257, + "learning_rate": 4.030434272555579e-06, + "loss": 0.2709, + "step": 13303 + }, + { + "epoch": 0.6232257460064646, + "grad_norm": 0.6182308760419436, + "learning_rate": 4.030284307358634e-06, + "loss": 0.2943, + "step": 13304 + }, + { + "epoch": 0.623272590996393, + "grad_norm": 0.564991183679247, + "learning_rate": 4.030134333355288e-06, + "loss": 0.291, + "step": 13305 + }, + { + "epoch": 0.6233194359863212, + "grad_norm": 0.5330077084850356, + "learning_rate": 4.029984350546402e-06, + "loss": 0.2674, + "step": 13306 + }, + { + "epoch": 0.6233662809762496, + "grad_norm": 0.5716781451063515, + "learning_rate": 4.029834358932842e-06, + "loss": 0.2619, + "step": 13307 + }, + { + "epoch": 0.6234131259661779, + "grad_norm": 0.5644832699728477, + "learning_rate": 4.029684358515469e-06, + "loss": 0.2573, + "step": 13308 + }, + { + "epoch": 0.6234599709561063, + "grad_norm": 0.6019209899133962, + "learning_rate": 4.029534349295147e-06, + "loss": 0.2854, + "step": 13309 + }, + { + "epoch": 0.6235068159460345, + "grad_norm": 0.5647645912245574, + "learning_rate": 4.0293843312727395e-06, + "loss": 0.2699, + "step": 13310 + }, + { + "epoch": 0.6235536609359629, + "grad_norm": 0.5867522174187286, + "learning_rate": 4.02923430444911e-06, + "loss": 0.2667, + "step": 13311 + }, + { + "epoch": 0.6236005059258912, + "grad_norm": 0.6024838018902539, + "learning_rate": 4.02908426882512e-06, + "loss": 0.3071, + "step": 13312 + }, + { + "epoch": 0.6236473509158196, + "grad_norm": 0.6065894487594331, + "learning_rate": 4.028934224401636e-06, + "loss": 0.2764, + "step": 13313 + }, + { + "epoch": 0.6236941959057479, + "grad_norm": 0.5931758793124116, + "learning_rate": 4.028784171179519e-06, + "loss": 0.286, + "step": 13314 + }, + { + "epoch": 0.6237410408956762, + "grad_norm": 0.542495596590036, + "learning_rate": 4.028634109159633e-06, + "loss": 0.2593, + "step": 13315 + }, + { + "epoch": 0.6237878858856045, + "grad_norm": 0.61187124623432, + "learning_rate": 4.028484038342843e-06, + "loss": 0.2722, + "step": 13316 + }, + { + "epoch": 0.6238347308755329, + "grad_norm": 0.5820741964374426, + "learning_rate": 4.028333958730011e-06, + "loss": 0.2963, + "step": 13317 + }, + { + "epoch": 0.6238815758654612, + "grad_norm": 0.5613431100845045, + "learning_rate": 4.028183870322001e-06, + "loss": 0.2652, + "step": 13318 + }, + { + "epoch": 0.6239284208553895, + "grad_norm": 0.6002042903964807, + "learning_rate": 4.028033773119677e-06, + "loss": 0.2764, + "step": 13319 + }, + { + "epoch": 0.6239752658453178, + "grad_norm": 0.5748301600825814, + "learning_rate": 4.027883667123903e-06, + "loss": 0.2775, + "step": 13320 + }, + { + "epoch": 0.6240221108352462, + "grad_norm": 0.5850055978145978, + "learning_rate": 4.027733552335542e-06, + "loss": 0.2949, + "step": 13321 + }, + { + "epoch": 0.6240689558251745, + "grad_norm": 0.6051213240865809, + "learning_rate": 4.027583428755458e-06, + "loss": 0.2694, + "step": 13322 + }, + { + "epoch": 0.6241158008151029, + "grad_norm": 0.6093789250593458, + "learning_rate": 4.027433296384517e-06, + "loss": 0.2806, + "step": 13323 + }, + { + "epoch": 0.6241626458050311, + "grad_norm": 0.5800228010290809, + "learning_rate": 4.02728315522358e-06, + "loss": 0.2746, + "step": 13324 + }, + { + "epoch": 0.6242094907949595, + "grad_norm": 0.6043944062088609, + "learning_rate": 4.027133005273512e-06, + "loss": 0.3018, + "step": 13325 + }, + { + "epoch": 0.6242563357848878, + "grad_norm": 0.5904318070136518, + "learning_rate": 4.026982846535178e-06, + "loss": 0.2651, + "step": 13326 + }, + { + "epoch": 0.6243031807748162, + "grad_norm": 0.5744055952059945, + "learning_rate": 4.026832679009442e-06, + "loss": 0.2735, + "step": 13327 + }, + { + "epoch": 0.6243500257647444, + "grad_norm": 0.554910773653985, + "learning_rate": 4.026682502697167e-06, + "loss": 0.2711, + "step": 13328 + }, + { + "epoch": 0.6243968707546728, + "grad_norm": 0.6121726111945962, + "learning_rate": 4.026532317599218e-06, + "loss": 0.2784, + "step": 13329 + }, + { + "epoch": 0.6244437157446011, + "grad_norm": 0.5757277077119078, + "learning_rate": 4.026382123716459e-06, + "loss": 0.2673, + "step": 13330 + }, + { + "epoch": 0.6244905607345295, + "grad_norm": 0.5458266936477346, + "learning_rate": 4.0262319210497545e-06, + "loss": 0.262, + "step": 13331 + }, + { + "epoch": 0.6245374057244578, + "grad_norm": 0.5661451102489227, + "learning_rate": 4.0260817095999694e-06, + "loss": 0.26, + "step": 13332 + }, + { + "epoch": 0.6245842507143861, + "grad_norm": 0.595905919395124, + "learning_rate": 4.025931489367967e-06, + "loss": 0.2813, + "step": 13333 + }, + { + "epoch": 0.6246310957043144, + "grad_norm": 0.6196333899411591, + "learning_rate": 4.025781260354613e-06, + "loss": 0.2923, + "step": 13334 + }, + { + "epoch": 0.6246779406942428, + "grad_norm": 0.6124466182194819, + "learning_rate": 4.0256310225607715e-06, + "loss": 0.2904, + "step": 13335 + }, + { + "epoch": 0.6247247856841711, + "grad_norm": 0.6058082528564256, + "learning_rate": 4.025480775987306e-06, + "loss": 0.2918, + "step": 13336 + }, + { + "epoch": 0.6247716306740994, + "grad_norm": 0.5942124087367546, + "learning_rate": 4.025330520635083e-06, + "loss": 0.2831, + "step": 13337 + }, + { + "epoch": 0.6248184756640277, + "grad_norm": 0.5883885898905995, + "learning_rate": 4.025180256504966e-06, + "loss": 0.2792, + "step": 13338 + }, + { + "epoch": 0.6248653206539561, + "grad_norm": 0.6653722244410049, + "learning_rate": 4.025029983597819e-06, + "loss": 0.2914, + "step": 13339 + }, + { + "epoch": 0.6249121656438844, + "grad_norm": 0.612160388748058, + "learning_rate": 4.0248797019145094e-06, + "loss": 0.2834, + "step": 13340 + }, + { + "epoch": 0.6249590106338128, + "grad_norm": 0.6198564720794346, + "learning_rate": 4.024729411455899e-06, + "loss": 0.2985, + "step": 13341 + }, + { + "epoch": 0.625005855623741, + "grad_norm": 0.5331203364877484, + "learning_rate": 4.024579112222855e-06, + "loss": 0.2635, + "step": 13342 + }, + { + "epoch": 0.6250527006136694, + "grad_norm": 0.6015409665126286, + "learning_rate": 4.024428804216241e-06, + "loss": 0.3, + "step": 13343 + }, + { + "epoch": 0.6250995456035977, + "grad_norm": 0.5558176737129606, + "learning_rate": 4.024278487436923e-06, + "loss": 0.2563, + "step": 13344 + }, + { + "epoch": 0.6251463905935261, + "grad_norm": 0.6374094490075596, + "learning_rate": 4.0241281618857645e-06, + "loss": 0.3008, + "step": 13345 + }, + { + "epoch": 0.6251932355834543, + "grad_norm": 0.6243158018562961, + "learning_rate": 4.023977827563632e-06, + "loss": 0.2981, + "step": 13346 + }, + { + "epoch": 0.6252400805733827, + "grad_norm": 0.6180683243939403, + "learning_rate": 4.02382748447139e-06, + "loss": 0.2825, + "step": 13347 + }, + { + "epoch": 0.625286925563311, + "grad_norm": 0.6079043984155801, + "learning_rate": 4.023677132609903e-06, + "loss": 0.2895, + "step": 13348 + }, + { + "epoch": 0.6253337705532394, + "grad_norm": 0.633026165886864, + "learning_rate": 4.023526771980038e-06, + "loss": 0.2983, + "step": 13349 + }, + { + "epoch": 0.6253806155431677, + "grad_norm": 0.5831790347353599, + "learning_rate": 4.02337640258266e-06, + "loss": 0.2903, + "step": 13350 + }, + { + "epoch": 0.625427460533096, + "grad_norm": 0.5727173236613776, + "learning_rate": 4.023226024418632e-06, + "loss": 0.2926, + "step": 13351 + }, + { + "epoch": 0.6254743055230243, + "grad_norm": 0.5837290559631985, + "learning_rate": 4.023075637488822e-06, + "loss": 0.2954, + "step": 13352 + }, + { + "epoch": 0.6255211505129527, + "grad_norm": 0.5905511303655862, + "learning_rate": 4.022925241794095e-06, + "loss": 0.2837, + "step": 13353 + }, + { + "epoch": 0.625567995502881, + "grad_norm": 0.5918550882070289, + "learning_rate": 4.022774837335315e-06, + "loss": 0.282, + "step": 13354 + }, + { + "epoch": 0.6256148404928092, + "grad_norm": 0.664569637867571, + "learning_rate": 4.022624424113349e-06, + "loss": 0.2819, + "step": 13355 + }, + { + "epoch": 0.6256616854827376, + "grad_norm": 0.5938406183043105, + "learning_rate": 4.022474002129062e-06, + "loss": 0.2702, + "step": 13356 + }, + { + "epoch": 0.625708530472666, + "grad_norm": 0.6397240717996313, + "learning_rate": 4.022323571383319e-06, + "loss": 0.2982, + "step": 13357 + }, + { + "epoch": 0.6257553754625943, + "grad_norm": 0.5438808168271856, + "learning_rate": 4.0221731318769885e-06, + "loss": 0.2669, + "step": 13358 + }, + { + "epoch": 0.6258022204525227, + "grad_norm": 0.5645472940961265, + "learning_rate": 4.022022683610933e-06, + "loss": 0.2962, + "step": 13359 + }, + { + "epoch": 0.6258490654424509, + "grad_norm": 0.5636484835057843, + "learning_rate": 4.021872226586019e-06, + "loss": 0.2738, + "step": 13360 + }, + { + "epoch": 0.6258959104323792, + "grad_norm": 0.5883372270365413, + "learning_rate": 4.0217217608031136e-06, + "loss": 0.2802, + "step": 13361 + }, + { + "epoch": 0.6259427554223076, + "grad_norm": 0.6410256418456799, + "learning_rate": 4.021571286263082e-06, + "loss": 0.2864, + "step": 13362 + }, + { + "epoch": 0.625989600412236, + "grad_norm": 0.5776715241527063, + "learning_rate": 4.02142080296679e-06, + "loss": 0.2686, + "step": 13363 + }, + { + "epoch": 0.6260364454021642, + "grad_norm": 0.5885439959511644, + "learning_rate": 4.021270310915103e-06, + "loss": 0.2913, + "step": 13364 + }, + { + "epoch": 0.6260832903920925, + "grad_norm": 0.5816562017318406, + "learning_rate": 4.021119810108889e-06, + "loss": 0.2981, + "step": 13365 + }, + { + "epoch": 0.6261301353820209, + "grad_norm": 0.547818999412411, + "learning_rate": 4.020969300549011e-06, + "loss": 0.2641, + "step": 13366 + }, + { + "epoch": 0.6261769803719492, + "grad_norm": 0.6092746224525943, + "learning_rate": 4.0208187822363386e-06, + "loss": 0.2745, + "step": 13367 + }, + { + "epoch": 0.6262238253618776, + "grad_norm": 0.5746480143957737, + "learning_rate": 4.020668255171736e-06, + "loss": 0.2922, + "step": 13368 + }, + { + "epoch": 0.6262706703518058, + "grad_norm": 0.6317140707371712, + "learning_rate": 4.020517719356069e-06, + "loss": 0.2624, + "step": 13369 + }, + { + "epoch": 0.6263175153417342, + "grad_norm": 0.5371579863429701, + "learning_rate": 4.020367174790207e-06, + "loss": 0.2609, + "step": 13370 + }, + { + "epoch": 0.6263643603316625, + "grad_norm": 0.5916685180857764, + "learning_rate": 4.020216621475011e-06, + "loss": 0.2915, + "step": 13371 + }, + { + "epoch": 0.6264112053215909, + "grad_norm": 0.5882291954375266, + "learning_rate": 4.020066059411352e-06, + "loss": 0.2915, + "step": 13372 + }, + { + "epoch": 0.6264580503115191, + "grad_norm": 0.5853160730335802, + "learning_rate": 4.019915488600095e-06, + "loss": 0.279, + "step": 13373 + }, + { + "epoch": 0.6265048953014475, + "grad_norm": 0.6089308183119325, + "learning_rate": 4.019764909042106e-06, + "loss": 0.2946, + "step": 13374 + }, + { + "epoch": 0.6265517402913758, + "grad_norm": 0.616250050376528, + "learning_rate": 4.019614320738251e-06, + "loss": 0.2929, + "step": 13375 + }, + { + "epoch": 0.6265985852813042, + "grad_norm": 0.6007237519244517, + "learning_rate": 4.0194637236893984e-06, + "loss": 0.2972, + "step": 13376 + }, + { + "epoch": 0.6266454302712325, + "grad_norm": 0.5831626944676784, + "learning_rate": 4.019313117896414e-06, + "loss": 0.2759, + "step": 13377 + }, + { + "epoch": 0.6266922752611608, + "grad_norm": 0.5967845032242236, + "learning_rate": 4.019162503360165e-06, + "loss": 0.2663, + "step": 13378 + }, + { + "epoch": 0.6267391202510891, + "grad_norm": 0.6350376886415156, + "learning_rate": 4.019011880081517e-06, + "loss": 0.2965, + "step": 13379 + }, + { + "epoch": 0.6267859652410175, + "grad_norm": 0.6108880857292627, + "learning_rate": 4.018861248061338e-06, + "loss": 0.3111, + "step": 13380 + }, + { + "epoch": 0.6268328102309458, + "grad_norm": 0.6467318830217871, + "learning_rate": 4.018710607300494e-06, + "loss": 0.2893, + "step": 13381 + }, + { + "epoch": 0.6268796552208741, + "grad_norm": 0.5921160375712868, + "learning_rate": 4.018559957799851e-06, + "loss": 0.2803, + "step": 13382 + }, + { + "epoch": 0.6269265002108024, + "grad_norm": 0.5930329967460541, + "learning_rate": 4.018409299560279e-06, + "loss": 0.2841, + "step": 13383 + }, + { + "epoch": 0.6269733452007308, + "grad_norm": 0.5913240032308705, + "learning_rate": 4.018258632582641e-06, + "loss": 0.2626, + "step": 13384 + }, + { + "epoch": 0.6270201901906591, + "grad_norm": 0.6212670240665388, + "learning_rate": 4.018107956867808e-06, + "loss": 0.2772, + "step": 13385 + }, + { + "epoch": 0.6270670351805875, + "grad_norm": 0.6029098339712363, + "learning_rate": 4.017957272416645e-06, + "loss": 0.2729, + "step": 13386 + }, + { + "epoch": 0.6271138801705157, + "grad_norm": 0.6547058524904555, + "learning_rate": 4.017806579230019e-06, + "loss": 0.2947, + "step": 13387 + }, + { + "epoch": 0.6271607251604441, + "grad_norm": 0.5827044360341661, + "learning_rate": 4.017655877308797e-06, + "loss": 0.2814, + "step": 13388 + }, + { + "epoch": 0.6272075701503724, + "grad_norm": 0.5967776832107701, + "learning_rate": 4.017505166653848e-06, + "loss": 0.28, + "step": 13389 + }, + { + "epoch": 0.6272544151403008, + "grad_norm": 0.5682619906788402, + "learning_rate": 4.017354447266037e-06, + "loss": 0.2804, + "step": 13390 + }, + { + "epoch": 0.627301260130229, + "grad_norm": 0.5718545616231673, + "learning_rate": 4.017203719146234e-06, + "loss": 0.282, + "step": 13391 + }, + { + "epoch": 0.6273481051201574, + "grad_norm": 0.616515803126268, + "learning_rate": 4.0170529822953044e-06, + "loss": 0.2935, + "step": 13392 + }, + { + "epoch": 0.6273949501100857, + "grad_norm": 0.5955151333921938, + "learning_rate": 4.016902236714115e-06, + "loss": 0.2813, + "step": 13393 + }, + { + "epoch": 0.6274417951000141, + "grad_norm": 0.700337490148713, + "learning_rate": 4.016751482403536e-06, + "loss": 0.2859, + "step": 13394 + }, + { + "epoch": 0.6274886400899424, + "grad_norm": 0.5727998112777919, + "learning_rate": 4.016600719364433e-06, + "loss": 0.2953, + "step": 13395 + }, + { + "epoch": 0.6275354850798707, + "grad_norm": 0.62565346025595, + "learning_rate": 4.0164499475976735e-06, + "loss": 0.2797, + "step": 13396 + }, + { + "epoch": 0.627582330069799, + "grad_norm": 0.5820906251218072, + "learning_rate": 4.016299167104127e-06, + "loss": 0.2812, + "step": 13397 + }, + { + "epoch": 0.6276291750597274, + "grad_norm": 0.6595592584066262, + "learning_rate": 4.016148377884659e-06, + "loss": 0.3085, + "step": 13398 + }, + { + "epoch": 0.6276760200496557, + "grad_norm": 0.5751463332684742, + "learning_rate": 4.015997579940138e-06, + "loss": 0.2604, + "step": 13399 + }, + { + "epoch": 0.627722865039584, + "grad_norm": 0.5846286582867157, + "learning_rate": 4.015846773271432e-06, + "loss": 0.3012, + "step": 13400 + }, + { + "epoch": 0.6277697100295123, + "grad_norm": 0.592751501798231, + "learning_rate": 4.015695957879409e-06, + "loss": 0.2751, + "step": 13401 + }, + { + "epoch": 0.6278165550194407, + "grad_norm": 0.6754506638440773, + "learning_rate": 4.015545133764937e-06, + "loss": 0.2924, + "step": 13402 + }, + { + "epoch": 0.627863400009369, + "grad_norm": 0.5861875109504777, + "learning_rate": 4.015394300928883e-06, + "loss": 0.2734, + "step": 13403 + }, + { + "epoch": 0.6279102449992974, + "grad_norm": 0.5908292853325734, + "learning_rate": 4.015243459372116e-06, + "loss": 0.2923, + "step": 13404 + }, + { + "epoch": 0.6279570899892256, + "grad_norm": 0.5965575218804902, + "learning_rate": 4.015092609095504e-06, + "loss": 0.2806, + "step": 13405 + }, + { + "epoch": 0.628003934979154, + "grad_norm": 0.6026995783750434, + "learning_rate": 4.014941750099915e-06, + "loss": 0.3046, + "step": 13406 + }, + { + "epoch": 0.6280507799690823, + "grad_norm": 0.5922587290380575, + "learning_rate": 4.0147908823862155e-06, + "loss": 0.2671, + "step": 13407 + }, + { + "epoch": 0.6280976249590107, + "grad_norm": 0.555945783607279, + "learning_rate": 4.014640005955277e-06, + "loss": 0.2706, + "step": 13408 + }, + { + "epoch": 0.6281444699489389, + "grad_norm": 0.6653421288657361, + "learning_rate": 4.014489120807965e-06, + "loss": 0.3108, + "step": 13409 + }, + { + "epoch": 0.6281913149388673, + "grad_norm": 0.6405175760162262, + "learning_rate": 4.014338226945149e-06, + "loss": 0.2836, + "step": 13410 + }, + { + "epoch": 0.6282381599287956, + "grad_norm": 0.5555732123877516, + "learning_rate": 4.014187324367698e-06, + "loss": 0.2765, + "step": 13411 + }, + { + "epoch": 0.628285004918724, + "grad_norm": 0.5611347560314626, + "learning_rate": 4.014036413076478e-06, + "loss": 0.2701, + "step": 13412 + }, + { + "epoch": 0.6283318499086523, + "grad_norm": 0.5584476226511447, + "learning_rate": 4.01388549307236e-06, + "loss": 0.2643, + "step": 13413 + }, + { + "epoch": 0.6283786948985806, + "grad_norm": 0.558799767140567, + "learning_rate": 4.013734564356211e-06, + "loss": 0.2814, + "step": 13414 + }, + { + "epoch": 0.6284255398885089, + "grad_norm": 0.5577944353718758, + "learning_rate": 4.0135836269289e-06, + "loss": 0.2707, + "step": 13415 + }, + { + "epoch": 0.6284723848784373, + "grad_norm": 0.56434817867159, + "learning_rate": 4.013432680791296e-06, + "loss": 0.2855, + "step": 13416 + }, + { + "epoch": 0.6285192298683656, + "grad_norm": 0.6104211931554142, + "learning_rate": 4.013281725944267e-06, + "loss": 0.2807, + "step": 13417 + }, + { + "epoch": 0.6285660748582939, + "grad_norm": 0.6072082741446629, + "learning_rate": 4.013130762388682e-06, + "loss": 0.2805, + "step": 13418 + }, + { + "epoch": 0.6286129198482222, + "grad_norm": 0.6138185807626451, + "learning_rate": 4.01297979012541e-06, + "loss": 0.2927, + "step": 13419 + }, + { + "epoch": 0.6286597648381506, + "grad_norm": 0.6110203821702926, + "learning_rate": 4.012828809155319e-06, + "loss": 0.2952, + "step": 13420 + }, + { + "epoch": 0.6287066098280789, + "grad_norm": 0.5778103224674105, + "learning_rate": 4.012677819479279e-06, + "loss": 0.2742, + "step": 13421 + }, + { + "epoch": 0.6287534548180073, + "grad_norm": 0.5762713322593542, + "learning_rate": 4.012526821098159e-06, + "loss": 0.2801, + "step": 13422 + }, + { + "epoch": 0.6288002998079355, + "grad_norm": 0.5917224195358715, + "learning_rate": 4.012375814012826e-06, + "loss": 0.2854, + "step": 13423 + }, + { + "epoch": 0.6288471447978639, + "grad_norm": 0.5525129547573797, + "learning_rate": 4.012224798224151e-06, + "loss": 0.2593, + "step": 13424 + }, + { + "epoch": 0.6288939897877922, + "grad_norm": 0.6714369656516362, + "learning_rate": 4.0120737737330025e-06, + "loss": 0.2947, + "step": 13425 + }, + { + "epoch": 0.6289408347777206, + "grad_norm": 0.5499358727113299, + "learning_rate": 4.011922740540249e-06, + "loss": 0.2821, + "step": 13426 + }, + { + "epoch": 0.6289876797676488, + "grad_norm": 0.5998997879651853, + "learning_rate": 4.011771698646761e-06, + "loss": 0.2769, + "step": 13427 + }, + { + "epoch": 0.6290345247575772, + "grad_norm": 0.5346831236812327, + "learning_rate": 4.0116206480534065e-06, + "loss": 0.2708, + "step": 13428 + }, + { + "epoch": 0.6290813697475055, + "grad_norm": 0.5958754534723898, + "learning_rate": 4.011469588761055e-06, + "loss": 0.2852, + "step": 13429 + }, + { + "epoch": 0.6291282147374339, + "grad_norm": 0.6090362701817845, + "learning_rate": 4.011318520770575e-06, + "loss": 0.3081, + "step": 13430 + }, + { + "epoch": 0.6291750597273622, + "grad_norm": 0.6031617409842273, + "learning_rate": 4.011167444082838e-06, + "loss": 0.2624, + "step": 13431 + }, + { + "epoch": 0.6292219047172904, + "grad_norm": 0.5914781746131558, + "learning_rate": 4.011016358698712e-06, + "loss": 0.2984, + "step": 13432 + }, + { + "epoch": 0.6292687497072188, + "grad_norm": 0.574215447288072, + "learning_rate": 4.010865264619066e-06, + "loss": 0.267, + "step": 13433 + }, + { + "epoch": 0.6293155946971472, + "grad_norm": 0.644722105679829, + "learning_rate": 4.01071416184477e-06, + "loss": 0.3015, + "step": 13434 + }, + { + "epoch": 0.6293624396870755, + "grad_norm": 0.5639435364524175, + "learning_rate": 4.010563050376695e-06, + "loss": 0.2617, + "step": 13435 + }, + { + "epoch": 0.6294092846770037, + "grad_norm": 0.6175174636243964, + "learning_rate": 4.0104119302157076e-06, + "loss": 0.2932, + "step": 13436 + }, + { + "epoch": 0.6294561296669321, + "grad_norm": 0.5799331766592039, + "learning_rate": 4.010260801362681e-06, + "loss": 0.2851, + "step": 13437 + }, + { + "epoch": 0.6295029746568604, + "grad_norm": 0.5639789527366849, + "learning_rate": 4.010109663818482e-06, + "loss": 0.2646, + "step": 13438 + }, + { + "epoch": 0.6295498196467888, + "grad_norm": 0.6436501496857111, + "learning_rate": 4.009958517583982e-06, + "loss": 0.2974, + "step": 13439 + }, + { + "epoch": 0.6295966646367172, + "grad_norm": 0.5941812523963078, + "learning_rate": 4.009807362660049e-06, + "loss": 0.3012, + "step": 13440 + }, + { + "epoch": 0.6296435096266454, + "grad_norm": 0.5620824428036286, + "learning_rate": 4.009656199047555e-06, + "loss": 0.2766, + "step": 13441 + }, + { + "epoch": 0.6296903546165737, + "grad_norm": 0.5363511585116527, + "learning_rate": 4.009505026747369e-06, + "loss": 0.2746, + "step": 13442 + }, + { + "epoch": 0.6297371996065021, + "grad_norm": 0.5769076245073632, + "learning_rate": 4.009353845760361e-06, + "loss": 0.2843, + "step": 13443 + }, + { + "epoch": 0.6297840445964304, + "grad_norm": 0.5637890827741661, + "learning_rate": 4.009202656087401e-06, + "loss": 0.2774, + "step": 13444 + }, + { + "epoch": 0.6298308895863587, + "grad_norm": 0.702623373720875, + "learning_rate": 4.009051457729359e-06, + "loss": 0.2731, + "step": 13445 + }, + { + "epoch": 0.629877734576287, + "grad_norm": 0.5377547152895874, + "learning_rate": 4.008900250687105e-06, + "loss": 0.2748, + "step": 13446 + }, + { + "epoch": 0.6299245795662154, + "grad_norm": 0.631667198092289, + "learning_rate": 4.0087490349615095e-06, + "loss": 0.2894, + "step": 13447 + }, + { + "epoch": 0.6299714245561437, + "grad_norm": 0.6276669719963137, + "learning_rate": 4.0085978105534415e-06, + "loss": 0.2967, + "step": 13448 + }, + { + "epoch": 0.6300182695460721, + "grad_norm": 0.6417431219908301, + "learning_rate": 4.008446577463774e-06, + "loss": 0.2965, + "step": 13449 + }, + { + "epoch": 0.6300651145360003, + "grad_norm": 0.6070392980618646, + "learning_rate": 4.008295335693374e-06, + "loss": 0.2946, + "step": 13450 + }, + { + "epoch": 0.6301119595259287, + "grad_norm": 0.5905343646207566, + "learning_rate": 4.008144085243115e-06, + "loss": 0.2832, + "step": 13451 + }, + { + "epoch": 0.630158804515857, + "grad_norm": 0.6049353658192176, + "learning_rate": 4.007992826113865e-06, + "loss": 0.2765, + "step": 13452 + }, + { + "epoch": 0.6302056495057854, + "grad_norm": 0.6314673874301299, + "learning_rate": 4.007841558306495e-06, + "loss": 0.3039, + "step": 13453 + }, + { + "epoch": 0.6302524944957136, + "grad_norm": 0.6266966869895335, + "learning_rate": 4.0076902818218765e-06, + "loss": 0.2784, + "step": 13454 + }, + { + "epoch": 0.630299339485642, + "grad_norm": 0.58895449215932, + "learning_rate": 4.007538996660879e-06, + "loss": 0.2593, + "step": 13455 + }, + { + "epoch": 0.6303461844755703, + "grad_norm": 0.5861918626502244, + "learning_rate": 4.0073877028243736e-06, + "loss": 0.2948, + "step": 13456 + }, + { + "epoch": 0.6303930294654987, + "grad_norm": 0.5581139779107799, + "learning_rate": 4.0072364003132305e-06, + "loss": 0.2667, + "step": 13457 + }, + { + "epoch": 0.630439874455427, + "grad_norm": 0.5967124816346581, + "learning_rate": 4.007085089128321e-06, + "loss": 0.2994, + "step": 13458 + }, + { + "epoch": 0.6304867194453553, + "grad_norm": 0.5609532325477534, + "learning_rate": 4.006933769270516e-06, + "loss": 0.2724, + "step": 13459 + }, + { + "epoch": 0.6305335644352836, + "grad_norm": 0.5883169952341772, + "learning_rate": 4.006782440740686e-06, + "loss": 0.2917, + "step": 13460 + }, + { + "epoch": 0.630580409425212, + "grad_norm": 0.6052695121679714, + "learning_rate": 4.0066311035396995e-06, + "loss": 0.2973, + "step": 13461 + }, + { + "epoch": 0.6306272544151403, + "grad_norm": 0.6144474532022537, + "learning_rate": 4.0064797576684325e-06, + "loss": 0.296, + "step": 13462 + }, + { + "epoch": 0.6306740994050686, + "grad_norm": 0.6662859626492847, + "learning_rate": 4.006328403127752e-06, + "loss": 0.311, + "step": 13463 + }, + { + "epoch": 0.6307209443949969, + "grad_norm": 0.6087945463064695, + "learning_rate": 4.00617703991853e-06, + "loss": 0.2671, + "step": 13464 + }, + { + "epoch": 0.6307677893849253, + "grad_norm": 0.6472807260033346, + "learning_rate": 4.006025668041638e-06, + "loss": 0.2786, + "step": 13465 + }, + { + "epoch": 0.6308146343748536, + "grad_norm": 0.5656837528188728, + "learning_rate": 4.005874287497947e-06, + "loss": 0.2784, + "step": 13466 + }, + { + "epoch": 0.630861479364782, + "grad_norm": 0.5609509035864307, + "learning_rate": 4.005722898288328e-06, + "loss": 0.2945, + "step": 13467 + }, + { + "epoch": 0.6309083243547102, + "grad_norm": 0.5887713967300103, + "learning_rate": 4.005571500413652e-06, + "loss": 0.2981, + "step": 13468 + }, + { + "epoch": 0.6309551693446386, + "grad_norm": 0.5767550377695851, + "learning_rate": 4.00542009387479e-06, + "loss": 0.2903, + "step": 13469 + }, + { + "epoch": 0.6310020143345669, + "grad_norm": 0.6921117881514277, + "learning_rate": 4.005268678672613e-06, + "loss": 0.299, + "step": 13470 + }, + { + "epoch": 0.6310488593244953, + "grad_norm": 0.5812314345251801, + "learning_rate": 4.005117254807995e-06, + "loss": 0.2628, + "step": 13471 + }, + { + "epoch": 0.6310957043144235, + "grad_norm": 0.593269403414059, + "learning_rate": 4.004965822281804e-06, + "loss": 0.2802, + "step": 13472 + }, + { + "epoch": 0.6311425493043519, + "grad_norm": 0.6495264173189186, + "learning_rate": 4.004814381094914e-06, + "loss": 0.3047, + "step": 13473 + }, + { + "epoch": 0.6311893942942802, + "grad_norm": 0.6218579208158038, + "learning_rate": 4.004662931248195e-06, + "loss": 0.285, + "step": 13474 + }, + { + "epoch": 0.6312362392842086, + "grad_norm": 0.5288811386720272, + "learning_rate": 4.00451147274252e-06, + "loss": 0.2642, + "step": 13475 + }, + { + "epoch": 0.6312830842741369, + "grad_norm": 0.5994426568197762, + "learning_rate": 4.004360005578758e-06, + "loss": 0.3002, + "step": 13476 + }, + { + "epoch": 0.6313299292640652, + "grad_norm": 0.619739043459857, + "learning_rate": 4.004208529757782e-06, + "loss": 0.2779, + "step": 13477 + }, + { + "epoch": 0.6313767742539935, + "grad_norm": 0.6049252987629801, + "learning_rate": 4.004057045280466e-06, + "loss": 0.2683, + "step": 13478 + }, + { + "epoch": 0.6314236192439219, + "grad_norm": 0.6145223671982163, + "learning_rate": 4.003905552147677e-06, + "loss": 0.2673, + "step": 13479 + }, + { + "epoch": 0.6314704642338502, + "grad_norm": 0.6557106182885144, + "learning_rate": 4.003754050360292e-06, + "loss": 0.2865, + "step": 13480 + }, + { + "epoch": 0.6315173092237785, + "grad_norm": 0.7651876923156373, + "learning_rate": 4.00360253991918e-06, + "loss": 0.3309, + "step": 13481 + }, + { + "epoch": 0.6315641542137068, + "grad_norm": 0.5382834846275567, + "learning_rate": 4.003451020825212e-06, + "loss": 0.2767, + "step": 13482 + }, + { + "epoch": 0.6316109992036352, + "grad_norm": 0.5910135476807667, + "learning_rate": 4.003299493079263e-06, + "loss": 0.2755, + "step": 13483 + }, + { + "epoch": 0.6316578441935635, + "grad_norm": 0.6016467167885721, + "learning_rate": 4.003147956682203e-06, + "loss": 0.2977, + "step": 13484 + }, + { + "epoch": 0.6317046891834919, + "grad_norm": 0.6282632915224092, + "learning_rate": 4.002996411634903e-06, + "loss": 0.2752, + "step": 13485 + }, + { + "epoch": 0.6317515341734201, + "grad_norm": 0.6262806727910842, + "learning_rate": 4.002844857938238e-06, + "loss": 0.2801, + "step": 13486 + }, + { + "epoch": 0.6317983791633485, + "grad_norm": 0.6160040918183494, + "learning_rate": 4.002693295593077e-06, + "loss": 0.282, + "step": 13487 + }, + { + "epoch": 0.6318452241532768, + "grad_norm": 0.5819747326590022, + "learning_rate": 4.002541724600294e-06, + "loss": 0.2739, + "step": 13488 + }, + { + "epoch": 0.6318920691432052, + "grad_norm": 0.5805584587130513, + "learning_rate": 4.002390144960761e-06, + "loss": 0.2739, + "step": 13489 + }, + { + "epoch": 0.6319389141331334, + "grad_norm": 0.6501929753131633, + "learning_rate": 4.002238556675352e-06, + "loss": 0.3072, + "step": 13490 + }, + { + "epoch": 0.6319857591230618, + "grad_norm": 0.6140044121412301, + "learning_rate": 4.002086959744936e-06, + "loss": 0.2697, + "step": 13491 + }, + { + "epoch": 0.6320326041129901, + "grad_norm": 0.5755297930802975, + "learning_rate": 4.001935354170387e-06, + "loss": 0.2704, + "step": 13492 + }, + { + "epoch": 0.6320794491029185, + "grad_norm": 0.591494286938117, + "learning_rate": 4.001783739952578e-06, + "loss": 0.3035, + "step": 13493 + }, + { + "epoch": 0.6321262940928468, + "grad_norm": 0.6396492816670062, + "learning_rate": 4.001632117092382e-06, + "loss": 0.2951, + "step": 13494 + }, + { + "epoch": 0.6321731390827751, + "grad_norm": 0.6034903788153965, + "learning_rate": 4.00148048559067e-06, + "loss": 0.2921, + "step": 13495 + }, + { + "epoch": 0.6322199840727034, + "grad_norm": 0.6692015766663686, + "learning_rate": 4.001328845448315e-06, + "loss": 0.2774, + "step": 13496 + }, + { + "epoch": 0.6322668290626318, + "grad_norm": 0.6062334792523063, + "learning_rate": 4.0011771966661895e-06, + "loss": 0.2708, + "step": 13497 + }, + { + "epoch": 0.6323136740525601, + "grad_norm": 0.65313229183533, + "learning_rate": 4.001025539245167e-06, + "loss": 0.3151, + "step": 13498 + }, + { + "epoch": 0.6323605190424884, + "grad_norm": 0.5882059363841519, + "learning_rate": 4.000873873186119e-06, + "loss": 0.2829, + "step": 13499 + }, + { + "epoch": 0.6324073640324167, + "grad_norm": 0.5706511095231241, + "learning_rate": 4.0007221984899195e-06, + "loss": 0.2852, + "step": 13500 + }, + { + "epoch": 0.6324542090223451, + "grad_norm": 0.6014758619776526, + "learning_rate": 4.0005705151574416e-06, + "loss": 0.2809, + "step": 13501 + }, + { + "epoch": 0.6325010540122734, + "grad_norm": 0.56756271714323, + "learning_rate": 4.000418823189557e-06, + "loss": 0.2519, + "step": 13502 + }, + { + "epoch": 0.6325478990022018, + "grad_norm": 0.6191295488398842, + "learning_rate": 4.000267122587139e-06, + "loss": 0.2841, + "step": 13503 + }, + { + "epoch": 0.63259474399213, + "grad_norm": 0.636666098168621, + "learning_rate": 4.000115413351061e-06, + "loss": 0.2815, + "step": 13504 + }, + { + "epoch": 0.6326415889820584, + "grad_norm": 0.5781063281048648, + "learning_rate": 3.999963695482196e-06, + "loss": 0.2827, + "step": 13505 + }, + { + "epoch": 0.6326884339719867, + "grad_norm": 0.6022296438040456, + "learning_rate": 3.999811968981416e-06, + "loss": 0.2921, + "step": 13506 + }, + { + "epoch": 0.6327352789619151, + "grad_norm": 0.543754659453636, + "learning_rate": 3.999660233849597e-06, + "loss": 0.2618, + "step": 13507 + }, + { + "epoch": 0.6327821239518433, + "grad_norm": 0.576135839708898, + "learning_rate": 3.999508490087609e-06, + "loss": 0.302, + "step": 13508 + }, + { + "epoch": 0.6328289689417717, + "grad_norm": 0.5920010721083568, + "learning_rate": 3.999356737696327e-06, + "loss": 0.2785, + "step": 13509 + }, + { + "epoch": 0.6328758139317, + "grad_norm": 0.5848178259920711, + "learning_rate": 3.999204976676623e-06, + "loss": 0.2807, + "step": 13510 + }, + { + "epoch": 0.6329226589216284, + "grad_norm": 0.5738941650220426, + "learning_rate": 3.999053207029372e-06, + "loss": 0.2827, + "step": 13511 + }, + { + "epoch": 0.6329695039115567, + "grad_norm": 0.6127696757281833, + "learning_rate": 3.9989014287554454e-06, + "loss": 0.2749, + "step": 13512 + }, + { + "epoch": 0.633016348901485, + "grad_norm": 0.6606518039813009, + "learning_rate": 3.99874964185572e-06, + "loss": 0.3002, + "step": 13513 + }, + { + "epoch": 0.6330631938914133, + "grad_norm": 0.605210942916301, + "learning_rate": 3.998597846331065e-06, + "loss": 0.2862, + "step": 13514 + }, + { + "epoch": 0.6331100388813417, + "grad_norm": 0.5406980756496359, + "learning_rate": 3.9984460421823565e-06, + "loss": 0.2515, + "step": 13515 + }, + { + "epoch": 0.63315688387127, + "grad_norm": 0.6366887388404707, + "learning_rate": 3.998294229410468e-06, + "loss": 0.2809, + "step": 13516 + }, + { + "epoch": 0.6332037288611982, + "grad_norm": 0.6353427344427809, + "learning_rate": 3.998142408016274e-06, + "loss": 0.2804, + "step": 13517 + }, + { + "epoch": 0.6332505738511266, + "grad_norm": 0.5369749244423937, + "learning_rate": 3.997990578000645e-06, + "loss": 0.2726, + "step": 13518 + }, + { + "epoch": 0.633297418841055, + "grad_norm": 0.5824430802941932, + "learning_rate": 3.997838739364457e-06, + "loss": 0.2943, + "step": 13519 + }, + { + "epoch": 0.6333442638309833, + "grad_norm": 0.5567743568610243, + "learning_rate": 3.997686892108585e-06, + "loss": 0.2696, + "step": 13520 + }, + { + "epoch": 0.6333911088209117, + "grad_norm": 0.6296529169000633, + "learning_rate": 3.9975350362338996e-06, + "loss": 0.2868, + "step": 13521 + }, + { + "epoch": 0.6334379538108399, + "grad_norm": 0.5842509694897335, + "learning_rate": 3.997383171741277e-06, + "loss": 0.269, + "step": 13522 + }, + { + "epoch": 0.6334847988007682, + "grad_norm": 0.5978432188182831, + "learning_rate": 3.997231298631591e-06, + "loss": 0.2709, + "step": 13523 + }, + { + "epoch": 0.6335316437906966, + "grad_norm": 0.6197767270988975, + "learning_rate": 3.997079416905715e-06, + "loss": 0.3135, + "step": 13524 + }, + { + "epoch": 0.633578488780625, + "grad_norm": 0.5745882829159692, + "learning_rate": 3.996927526564524e-06, + "loss": 0.2572, + "step": 13525 + }, + { + "epoch": 0.6336253337705532, + "grad_norm": 0.6364022881551684, + "learning_rate": 3.99677562760889e-06, + "loss": 0.2857, + "step": 13526 + }, + { + "epoch": 0.6336721787604815, + "grad_norm": 0.6278842717826054, + "learning_rate": 3.996623720039688e-06, + "loss": 0.2921, + "step": 13527 + }, + { + "epoch": 0.6337190237504099, + "grad_norm": 0.609554452689521, + "learning_rate": 3.996471803857795e-06, + "loss": 0.2928, + "step": 13528 + }, + { + "epoch": 0.6337658687403382, + "grad_norm": 0.6115148535139294, + "learning_rate": 3.996319879064081e-06, + "loss": 0.2843, + "step": 13529 + }, + { + "epoch": 0.6338127137302666, + "grad_norm": 0.6257028028782068, + "learning_rate": 3.996167945659423e-06, + "loss": 0.2828, + "step": 13530 + }, + { + "epoch": 0.6338595587201948, + "grad_norm": 0.5939651911837528, + "learning_rate": 3.996016003644694e-06, + "loss": 0.2674, + "step": 13531 + }, + { + "epoch": 0.6339064037101232, + "grad_norm": 0.6137104433390635, + "learning_rate": 3.995864053020769e-06, + "loss": 0.2844, + "step": 13532 + }, + { + "epoch": 0.6339532487000515, + "grad_norm": 0.5568560556566087, + "learning_rate": 3.995712093788523e-06, + "loss": 0.2792, + "step": 13533 + }, + { + "epoch": 0.6340000936899799, + "grad_norm": 0.5985845686783818, + "learning_rate": 3.995560125948829e-06, + "loss": 0.2883, + "step": 13534 + }, + { + "epoch": 0.6340469386799081, + "grad_norm": 0.6381041940180352, + "learning_rate": 3.995408149502563e-06, + "loss": 0.2988, + "step": 13535 + }, + { + "epoch": 0.6340937836698365, + "grad_norm": 0.5455653659954997, + "learning_rate": 3.995256164450598e-06, + "loss": 0.2811, + "step": 13536 + }, + { + "epoch": 0.6341406286597648, + "grad_norm": 0.6031598529906301, + "learning_rate": 3.995104170793811e-06, + "loss": 0.2768, + "step": 13537 + }, + { + "epoch": 0.6341874736496932, + "grad_norm": 0.6611464821491059, + "learning_rate": 3.994952168533074e-06, + "loss": 0.302, + "step": 13538 + }, + { + "epoch": 0.6342343186396215, + "grad_norm": 0.6072411716175559, + "learning_rate": 3.994800157669264e-06, + "loss": 0.2833, + "step": 13539 + }, + { + "epoch": 0.6342811636295498, + "grad_norm": 0.6331759998189366, + "learning_rate": 3.994648138203255e-06, + "loss": 0.2878, + "step": 13540 + }, + { + "epoch": 0.6343280086194781, + "grad_norm": 0.6030567101613081, + "learning_rate": 3.99449611013592e-06, + "loss": 0.2975, + "step": 13541 + }, + { + "epoch": 0.6343748536094065, + "grad_norm": 0.6443621987990763, + "learning_rate": 3.9943440734681375e-06, + "loss": 0.3029, + "step": 13542 + }, + { + "epoch": 0.6344216985993348, + "grad_norm": 0.5486182323356932, + "learning_rate": 3.994192028200778e-06, + "loss": 0.279, + "step": 13543 + }, + { + "epoch": 0.6344685435892631, + "grad_norm": 0.5813270012987176, + "learning_rate": 3.994039974334722e-06, + "loss": 0.2753, + "step": 13544 + }, + { + "epoch": 0.6345153885791914, + "grad_norm": 0.5665300614046657, + "learning_rate": 3.993887911870839e-06, + "loss": 0.2849, + "step": 13545 + }, + { + "epoch": 0.6345622335691198, + "grad_norm": 0.6119445485141667, + "learning_rate": 3.993735840810007e-06, + "loss": 0.2863, + "step": 13546 + }, + { + "epoch": 0.6346090785590481, + "grad_norm": 0.5707755878019761, + "learning_rate": 3.993583761153101e-06, + "loss": 0.2733, + "step": 13547 + }, + { + "epoch": 0.6346559235489765, + "grad_norm": 0.5922130276071528, + "learning_rate": 3.993431672900996e-06, + "loss": 0.2807, + "step": 13548 + }, + { + "epoch": 0.6347027685389047, + "grad_norm": 0.6307811670652228, + "learning_rate": 3.9932795760545675e-06, + "loss": 0.3154, + "step": 13549 + }, + { + "epoch": 0.6347496135288331, + "grad_norm": 0.638807858073789, + "learning_rate": 3.99312747061469e-06, + "loss": 0.2957, + "step": 13550 + }, + { + "epoch": 0.6347964585187614, + "grad_norm": 0.5738049307506546, + "learning_rate": 3.992975356582239e-06, + "loss": 0.2815, + "step": 13551 + }, + { + "epoch": 0.6348433035086898, + "grad_norm": 0.6264068414762141, + "learning_rate": 3.99282323395809e-06, + "loss": 0.2956, + "step": 13552 + }, + { + "epoch": 0.634890148498618, + "grad_norm": 0.5761108786898174, + "learning_rate": 3.992671102743118e-06, + "loss": 0.294, + "step": 13553 + }, + { + "epoch": 0.6349369934885464, + "grad_norm": 0.6785170625919764, + "learning_rate": 3.9925189629382e-06, + "loss": 0.2996, + "step": 13554 + }, + { + "epoch": 0.6349838384784747, + "grad_norm": 0.5757293301610611, + "learning_rate": 3.99236681454421e-06, + "loss": 0.2762, + "step": 13555 + }, + { + "epoch": 0.6350306834684031, + "grad_norm": 0.6170884775272842, + "learning_rate": 3.992214657562025e-06, + "loss": 0.277, + "step": 13556 + }, + { + "epoch": 0.6350775284583314, + "grad_norm": 0.5585421223219208, + "learning_rate": 3.992062491992518e-06, + "loss": 0.2858, + "step": 13557 + }, + { + "epoch": 0.6351243734482597, + "grad_norm": 0.6005658111626759, + "learning_rate": 3.991910317836568e-06, + "loss": 0.2782, + "step": 13558 + }, + { + "epoch": 0.635171218438188, + "grad_norm": 0.6648126639535975, + "learning_rate": 3.991758135095048e-06, + "loss": 0.302, + "step": 13559 + }, + { + "epoch": 0.6352180634281164, + "grad_norm": 0.5769718867102429, + "learning_rate": 3.991605943768835e-06, + "loss": 0.2664, + "step": 13560 + }, + { + "epoch": 0.6352649084180447, + "grad_norm": 0.6351432825322214, + "learning_rate": 3.9914537438588044e-06, + "loss": 0.2774, + "step": 13561 + }, + { + "epoch": 0.635311753407973, + "grad_norm": 0.6292124468603228, + "learning_rate": 3.991301535365834e-06, + "loss": 0.3015, + "step": 13562 + }, + { + "epoch": 0.6353585983979013, + "grad_norm": 0.6055895844561452, + "learning_rate": 3.991149318290797e-06, + "loss": 0.2921, + "step": 13563 + }, + { + "epoch": 0.6354054433878297, + "grad_norm": 0.5832691290184688, + "learning_rate": 3.99099709263457e-06, + "loss": 0.2882, + "step": 13564 + }, + { + "epoch": 0.635452288377758, + "grad_norm": 0.704301681106654, + "learning_rate": 3.99084485839803e-06, + "loss": 0.2941, + "step": 13565 + }, + { + "epoch": 0.6354991333676864, + "grad_norm": 0.5815159935180889, + "learning_rate": 3.990692615582053e-06, + "loss": 0.2872, + "step": 13566 + }, + { + "epoch": 0.6355459783576146, + "grad_norm": 0.5853209590967245, + "learning_rate": 3.990540364187513e-06, + "loss": 0.2862, + "step": 13567 + }, + { + "epoch": 0.635592823347543, + "grad_norm": 0.5665036536131398, + "learning_rate": 3.9903881042152895e-06, + "loss": 0.2785, + "step": 13568 + }, + { + "epoch": 0.6356396683374713, + "grad_norm": 0.6141837217152616, + "learning_rate": 3.990235835666257e-06, + "loss": 0.2898, + "step": 13569 + }, + { + "epoch": 0.6356865133273997, + "grad_norm": 0.615822722756833, + "learning_rate": 3.99008355854129e-06, + "loss": 0.2748, + "step": 13570 + }, + { + "epoch": 0.6357333583173279, + "grad_norm": 0.5345078563995632, + "learning_rate": 3.9899312728412685e-06, + "loss": 0.2602, + "step": 13571 + }, + { + "epoch": 0.6357802033072563, + "grad_norm": 0.5816726871344752, + "learning_rate": 3.989778978567067e-06, + "loss": 0.2886, + "step": 13572 + }, + { + "epoch": 0.6358270482971846, + "grad_norm": 0.6187811211002611, + "learning_rate": 3.9896266757195615e-06, + "loss": 0.3019, + "step": 13573 + }, + { + "epoch": 0.635873893287113, + "grad_norm": 0.6396998844826765, + "learning_rate": 3.989474364299628e-06, + "loss": 0.3132, + "step": 13574 + }, + { + "epoch": 0.6359207382770413, + "grad_norm": 0.5505776372040209, + "learning_rate": 3.989322044308145e-06, + "loss": 0.2757, + "step": 13575 + }, + { + "epoch": 0.6359675832669696, + "grad_norm": 0.613226114642687, + "learning_rate": 3.989169715745987e-06, + "loss": 0.2848, + "step": 13576 + }, + { + "epoch": 0.6360144282568979, + "grad_norm": 0.6115770720502782, + "learning_rate": 3.989017378614033e-06, + "loss": 0.2877, + "step": 13577 + }, + { + "epoch": 0.6360612732468263, + "grad_norm": 0.6053576172644188, + "learning_rate": 3.988865032913158e-06, + "loss": 0.2948, + "step": 13578 + }, + { + "epoch": 0.6361081182367546, + "grad_norm": 0.561562172844347, + "learning_rate": 3.988712678644237e-06, + "loss": 0.2747, + "step": 13579 + }, + { + "epoch": 0.6361549632266829, + "grad_norm": 0.6065668059148476, + "learning_rate": 3.988560315808151e-06, + "loss": 0.2821, + "step": 13580 + }, + { + "epoch": 0.6362018082166112, + "grad_norm": 0.5884083726246417, + "learning_rate": 3.9884079444057735e-06, + "loss": 0.298, + "step": 13581 + }, + { + "epoch": 0.6362486532065396, + "grad_norm": 0.5687490825624991, + "learning_rate": 3.988255564437982e-06, + "loss": 0.2764, + "step": 13582 + }, + { + "epoch": 0.6362954981964679, + "grad_norm": 0.6095161081145508, + "learning_rate": 3.9881031759056546e-06, + "loss": 0.2976, + "step": 13583 + }, + { + "epoch": 0.6363423431863963, + "grad_norm": 0.6501318349567687, + "learning_rate": 3.9879507788096675e-06, + "loss": 0.3048, + "step": 13584 + }, + { + "epoch": 0.6363891881763245, + "grad_norm": 0.618312718153338, + "learning_rate": 3.9877983731508965e-06, + "loss": 0.2763, + "step": 13585 + }, + { + "epoch": 0.6364360331662529, + "grad_norm": 0.5715652699178854, + "learning_rate": 3.987645958930221e-06, + "loss": 0.2787, + "step": 13586 + }, + { + "epoch": 0.6364828781561812, + "grad_norm": 0.5882242142947945, + "learning_rate": 3.987493536148517e-06, + "loss": 0.2662, + "step": 13587 + }, + { + "epoch": 0.6365297231461096, + "grad_norm": 0.6539191643991138, + "learning_rate": 3.98734110480666e-06, + "loss": 0.2844, + "step": 13588 + }, + { + "epoch": 0.6365765681360378, + "grad_norm": 0.6391356542244098, + "learning_rate": 3.98718866490553e-06, + "loss": 0.2915, + "step": 13589 + }, + { + "epoch": 0.6366234131259662, + "grad_norm": 0.5825142163373332, + "learning_rate": 3.987036216446003e-06, + "loss": 0.2899, + "step": 13590 + }, + { + "epoch": 0.6366702581158945, + "grad_norm": 0.6190776544111217, + "learning_rate": 3.986883759428957e-06, + "loss": 0.3075, + "step": 13591 + }, + { + "epoch": 0.6367171031058229, + "grad_norm": 0.6335015528257926, + "learning_rate": 3.986731293855267e-06, + "loss": 0.3007, + "step": 13592 + }, + { + "epoch": 0.6367639480957512, + "grad_norm": 0.5633017248782368, + "learning_rate": 3.986578819725813e-06, + "loss": 0.2738, + "step": 13593 + }, + { + "epoch": 0.6368107930856794, + "grad_norm": 0.6296944776261448, + "learning_rate": 3.986426337041471e-06, + "loss": 0.298, + "step": 13594 + }, + { + "epoch": 0.6368576380756078, + "grad_norm": 0.5717624567109663, + "learning_rate": 3.98627384580312e-06, + "loss": 0.278, + "step": 13595 + }, + { + "epoch": 0.6369044830655362, + "grad_norm": 0.6042967603637942, + "learning_rate": 3.9861213460116365e-06, + "loss": 0.2791, + "step": 13596 + }, + { + "epoch": 0.6369513280554645, + "grad_norm": 0.6032032268104573, + "learning_rate": 3.985968837667897e-06, + "loss": 0.3045, + "step": 13597 + }, + { + "epoch": 0.6369981730453927, + "grad_norm": 0.5326502992442262, + "learning_rate": 3.985816320772782e-06, + "loss": 0.2656, + "step": 13598 + }, + { + "epoch": 0.6370450180353211, + "grad_norm": 0.5854694275641019, + "learning_rate": 3.985663795327166e-06, + "loss": 0.2745, + "step": 13599 + }, + { + "epoch": 0.6370918630252494, + "grad_norm": 0.6247614720255713, + "learning_rate": 3.985511261331929e-06, + "loss": 0.2827, + "step": 13600 + }, + { + "epoch": 0.6371387080151778, + "grad_norm": 0.5846005885721878, + "learning_rate": 3.985358718787948e-06, + "loss": 0.2888, + "step": 13601 + }, + { + "epoch": 0.6371855530051062, + "grad_norm": 0.6460815775028379, + "learning_rate": 3.985206167696101e-06, + "loss": 0.2925, + "step": 13602 + }, + { + "epoch": 0.6372323979950344, + "grad_norm": 0.5385764994893824, + "learning_rate": 3.985053608057266e-06, + "loss": 0.2646, + "step": 13603 + }, + { + "epoch": 0.6372792429849627, + "grad_norm": 0.580543730882628, + "learning_rate": 3.98490103987232e-06, + "loss": 0.2752, + "step": 13604 + }, + { + "epoch": 0.6373260879748911, + "grad_norm": 0.6312413586671831, + "learning_rate": 3.984748463142142e-06, + "loss": 0.2601, + "step": 13605 + }, + { + "epoch": 0.6373729329648194, + "grad_norm": 0.5815167436524775, + "learning_rate": 3.98459587786761e-06, + "loss": 0.2813, + "step": 13606 + }, + { + "epoch": 0.6374197779547477, + "grad_norm": 0.6208479383064052, + "learning_rate": 3.984443284049602e-06, + "loss": 0.3056, + "step": 13607 + }, + { + "epoch": 0.637466622944676, + "grad_norm": 0.6420602153652144, + "learning_rate": 3.984290681688996e-06, + "loss": 0.2786, + "step": 13608 + }, + { + "epoch": 0.6375134679346044, + "grad_norm": 0.5603479666263114, + "learning_rate": 3.984138070786669e-06, + "loss": 0.2669, + "step": 13609 + }, + { + "epoch": 0.6375603129245327, + "grad_norm": 0.5905519573045559, + "learning_rate": 3.983985451343502e-06, + "loss": 0.2846, + "step": 13610 + }, + { + "epoch": 0.6376071579144611, + "grad_norm": 0.6519628089295708, + "learning_rate": 3.983832823360371e-06, + "loss": 0.2886, + "step": 13611 + }, + { + "epoch": 0.6376540029043893, + "grad_norm": 0.6005739998353733, + "learning_rate": 3.983680186838155e-06, + "loss": 0.2951, + "step": 13612 + }, + { + "epoch": 0.6377008478943177, + "grad_norm": 0.5541778239079382, + "learning_rate": 3.983527541777732e-06, + "loss": 0.2743, + "step": 13613 + }, + { + "epoch": 0.637747692884246, + "grad_norm": 0.5924870680947313, + "learning_rate": 3.983374888179982e-06, + "loss": 0.2817, + "step": 13614 + }, + { + "epoch": 0.6377945378741744, + "grad_norm": 0.5966453170581588, + "learning_rate": 3.9832222260457815e-06, + "loss": 0.2969, + "step": 13615 + }, + { + "epoch": 0.6378413828641026, + "grad_norm": 0.5710685627049447, + "learning_rate": 3.9830695553760104e-06, + "loss": 0.2754, + "step": 13616 + }, + { + "epoch": 0.637888227854031, + "grad_norm": 0.6206273850935746, + "learning_rate": 3.9829168761715464e-06, + "loss": 0.2897, + "step": 13617 + }, + { + "epoch": 0.6379350728439593, + "grad_norm": 0.630201475480145, + "learning_rate": 3.982764188433269e-06, + "loss": 0.3074, + "step": 13618 + }, + { + "epoch": 0.6379819178338877, + "grad_norm": 0.6261807829332343, + "learning_rate": 3.982611492162055e-06, + "loss": 0.3018, + "step": 13619 + }, + { + "epoch": 0.638028762823816, + "grad_norm": 0.5899178200069201, + "learning_rate": 3.982458787358786e-06, + "loss": 0.2681, + "step": 13620 + }, + { + "epoch": 0.6380756078137443, + "grad_norm": 0.6745903205916834, + "learning_rate": 3.982306074024338e-06, + "loss": 0.2949, + "step": 13621 + }, + { + "epoch": 0.6381224528036726, + "grad_norm": 0.5690724313495984, + "learning_rate": 3.982153352159592e-06, + "loss": 0.2819, + "step": 13622 + }, + { + "epoch": 0.638169297793601, + "grad_norm": 0.5747438351250539, + "learning_rate": 3.982000621765426e-06, + "loss": 0.288, + "step": 13623 + }, + { + "epoch": 0.6382161427835293, + "grad_norm": 0.657726904453845, + "learning_rate": 3.981847882842719e-06, + "loss": 0.2841, + "step": 13624 + }, + { + "epoch": 0.6382629877734576, + "grad_norm": 0.6077501709268471, + "learning_rate": 3.981695135392349e-06, + "loss": 0.2889, + "step": 13625 + }, + { + "epoch": 0.6383098327633859, + "grad_norm": 0.5837134630181094, + "learning_rate": 3.981542379415197e-06, + "loss": 0.2864, + "step": 13626 + }, + { + "epoch": 0.6383566777533143, + "grad_norm": 0.589621002854239, + "learning_rate": 3.981389614912141e-06, + "loss": 0.2674, + "step": 13627 + }, + { + "epoch": 0.6384035227432426, + "grad_norm": 0.5700641592631721, + "learning_rate": 3.9812368418840595e-06, + "loss": 0.2766, + "step": 13628 + }, + { + "epoch": 0.638450367733171, + "grad_norm": 0.5802733882208887, + "learning_rate": 3.981084060331832e-06, + "loss": 0.2755, + "step": 13629 + }, + { + "epoch": 0.6384972127230992, + "grad_norm": 0.5858901385564104, + "learning_rate": 3.980931270256339e-06, + "loss": 0.281, + "step": 13630 + }, + { + "epoch": 0.6385440577130276, + "grad_norm": 0.5709524403618045, + "learning_rate": 3.980778471658458e-06, + "loss": 0.2916, + "step": 13631 + }, + { + "epoch": 0.6385909027029559, + "grad_norm": 0.5842238508282528, + "learning_rate": 3.980625664539069e-06, + "loss": 0.2906, + "step": 13632 + }, + { + "epoch": 0.6386377476928843, + "grad_norm": 0.5797049067854314, + "learning_rate": 3.980472848899052e-06, + "loss": 0.2882, + "step": 13633 + }, + { + "epoch": 0.6386845926828125, + "grad_norm": 0.6287872855464957, + "learning_rate": 3.980320024739286e-06, + "loss": 0.3055, + "step": 13634 + }, + { + "epoch": 0.6387314376727409, + "grad_norm": 0.6466342703266006, + "learning_rate": 3.980167192060649e-06, + "loss": 0.303, + "step": 13635 + }, + { + "epoch": 0.6387782826626692, + "grad_norm": 0.594339631124179, + "learning_rate": 3.980014350864023e-06, + "loss": 0.2861, + "step": 13636 + }, + { + "epoch": 0.6388251276525976, + "grad_norm": 0.5743789526412927, + "learning_rate": 3.979861501150286e-06, + "loss": 0.2896, + "step": 13637 + }, + { + "epoch": 0.6388719726425259, + "grad_norm": 0.6048711678760611, + "learning_rate": 3.979708642920318e-06, + "loss": 0.2758, + "step": 13638 + }, + { + "epoch": 0.6389188176324542, + "grad_norm": 0.6093949585548386, + "learning_rate": 3.979555776174999e-06, + "loss": 0.2917, + "step": 13639 + }, + { + "epoch": 0.6389656626223825, + "grad_norm": 0.5849740151740979, + "learning_rate": 3.9794029009152076e-06, + "loss": 0.2851, + "step": 13640 + }, + { + "epoch": 0.6390125076123109, + "grad_norm": 0.578698075641313, + "learning_rate": 3.979250017141826e-06, + "loss": 0.2734, + "step": 13641 + }, + { + "epoch": 0.6390593526022392, + "grad_norm": 0.5625413554231692, + "learning_rate": 3.979097124855731e-06, + "loss": 0.2821, + "step": 13642 + }, + { + "epoch": 0.6391061975921675, + "grad_norm": 0.6163962992562717, + "learning_rate": 3.978944224057804e-06, + "loss": 0.2786, + "step": 13643 + }, + { + "epoch": 0.6391530425820958, + "grad_norm": 0.6657007966780579, + "learning_rate": 3.978791314748924e-06, + "loss": 0.2954, + "step": 13644 + }, + { + "epoch": 0.6391998875720242, + "grad_norm": 0.6528007092019686, + "learning_rate": 3.978638396929972e-06, + "loss": 0.2956, + "step": 13645 + }, + { + "epoch": 0.6392467325619525, + "grad_norm": 0.5501880437251718, + "learning_rate": 3.978485470601829e-06, + "loss": 0.2715, + "step": 13646 + }, + { + "epoch": 0.6392935775518809, + "grad_norm": 0.6117232701810529, + "learning_rate": 3.978332535765373e-06, + "loss": 0.2909, + "step": 13647 + }, + { + "epoch": 0.6393404225418091, + "grad_norm": 0.5610408594500108, + "learning_rate": 3.978179592421484e-06, + "loss": 0.2932, + "step": 13648 + }, + { + "epoch": 0.6393872675317375, + "grad_norm": 0.6719499429226643, + "learning_rate": 3.978026640571044e-06, + "loss": 0.2972, + "step": 13649 + }, + { + "epoch": 0.6394341125216658, + "grad_norm": 0.5831635808560472, + "learning_rate": 3.977873680214932e-06, + "loss": 0.2779, + "step": 13650 + }, + { + "epoch": 0.6394809575115942, + "grad_norm": 0.598715700121829, + "learning_rate": 3.977720711354028e-06, + "loss": 0.2905, + "step": 13651 + }, + { + "epoch": 0.6395278025015224, + "grad_norm": 0.5946495618338751, + "learning_rate": 3.977567733989213e-06, + "loss": 0.2679, + "step": 13652 + }, + { + "epoch": 0.6395746474914508, + "grad_norm": 0.5815021836691635, + "learning_rate": 3.977414748121366e-06, + "loss": 0.2931, + "step": 13653 + }, + { + "epoch": 0.6396214924813791, + "grad_norm": 0.6575442336557421, + "learning_rate": 3.97726175375137e-06, + "loss": 0.296, + "step": 13654 + }, + { + "epoch": 0.6396683374713075, + "grad_norm": 0.5757815350729231, + "learning_rate": 3.977108750880103e-06, + "loss": 0.3053, + "step": 13655 + }, + { + "epoch": 0.6397151824612358, + "grad_norm": 0.542822242762406, + "learning_rate": 3.976955739508447e-06, + "loss": 0.2862, + "step": 13656 + }, + { + "epoch": 0.6397620274511641, + "grad_norm": 0.5834916216115447, + "learning_rate": 3.976802719637281e-06, + "loss": 0.2717, + "step": 13657 + }, + { + "epoch": 0.6398088724410924, + "grad_norm": 0.5851475451818385, + "learning_rate": 3.976649691267488e-06, + "loss": 0.2821, + "step": 13658 + }, + { + "epoch": 0.6398557174310208, + "grad_norm": 0.5698762841171761, + "learning_rate": 3.976496654399946e-06, + "loss": 0.2588, + "step": 13659 + }, + { + "epoch": 0.6399025624209491, + "grad_norm": 0.5554976934799786, + "learning_rate": 3.976343609035538e-06, + "loss": 0.2808, + "step": 13660 + }, + { + "epoch": 0.6399494074108774, + "grad_norm": 0.6245752781155823, + "learning_rate": 3.9761905551751426e-06, + "loss": 0.301, + "step": 13661 + }, + { + "epoch": 0.6399962524008057, + "grad_norm": 0.5583419543300091, + "learning_rate": 3.976037492819641e-06, + "loss": 0.2846, + "step": 13662 + }, + { + "epoch": 0.6400430973907341, + "grad_norm": 0.5618262558730209, + "learning_rate": 3.975884421969916e-06, + "loss": 0.2885, + "step": 13663 + }, + { + "epoch": 0.6400899423806624, + "grad_norm": 0.6287890089687284, + "learning_rate": 3.975731342626846e-06, + "loss": 0.2814, + "step": 13664 + }, + { + "epoch": 0.6401367873705908, + "grad_norm": 0.6272716831307515, + "learning_rate": 3.975578254791313e-06, + "loss": 0.2871, + "step": 13665 + }, + { + "epoch": 0.640183632360519, + "grad_norm": 0.5809951813068265, + "learning_rate": 3.9754251584641995e-06, + "loss": 0.2914, + "step": 13666 + }, + { + "epoch": 0.6402304773504474, + "grad_norm": 0.59014507525766, + "learning_rate": 3.9752720536463844e-06, + "loss": 0.2857, + "step": 13667 + }, + { + "epoch": 0.6402773223403757, + "grad_norm": 0.6030166678780612, + "learning_rate": 3.975118940338749e-06, + "loss": 0.3054, + "step": 13668 + }, + { + "epoch": 0.6403241673303041, + "grad_norm": 0.5858172485617921, + "learning_rate": 3.974965818542174e-06, + "loss": 0.2934, + "step": 13669 + }, + { + "epoch": 0.6403710123202323, + "grad_norm": 0.641068149664545, + "learning_rate": 3.974812688257544e-06, + "loss": 0.2818, + "step": 13670 + }, + { + "epoch": 0.6404178573101607, + "grad_norm": 0.622363262948413, + "learning_rate": 3.974659549485735e-06, + "loss": 0.296, + "step": 13671 + }, + { + "epoch": 0.640464702300089, + "grad_norm": 0.6546039685849948, + "learning_rate": 3.9745064022276324e-06, + "loss": 0.2776, + "step": 13672 + }, + { + "epoch": 0.6405115472900174, + "grad_norm": 0.6534865250438423, + "learning_rate": 3.974353246484115e-06, + "loss": 0.2764, + "step": 13673 + }, + { + "epoch": 0.6405583922799457, + "grad_norm": 0.6168987313181397, + "learning_rate": 3.974200082256067e-06, + "loss": 0.2866, + "step": 13674 + }, + { + "epoch": 0.640605237269874, + "grad_norm": 0.6106053061193233, + "learning_rate": 3.974046909544367e-06, + "loss": 0.2797, + "step": 13675 + }, + { + "epoch": 0.6406520822598023, + "grad_norm": 0.5847175650671136, + "learning_rate": 3.973893728349897e-06, + "loss": 0.2766, + "step": 13676 + }, + { + "epoch": 0.6406989272497307, + "grad_norm": 0.5695889560964699, + "learning_rate": 3.9737405386735395e-06, + "loss": 0.2608, + "step": 13677 + }, + { + "epoch": 0.640745772239659, + "grad_norm": 0.6136368593204978, + "learning_rate": 3.973587340516176e-06, + "loss": 0.3061, + "step": 13678 + }, + { + "epoch": 0.6407926172295872, + "grad_norm": 0.5611614803477947, + "learning_rate": 3.9734341338786866e-06, + "loss": 0.284, + "step": 13679 + }, + { + "epoch": 0.6408394622195156, + "grad_norm": 0.5764324750186617, + "learning_rate": 3.973280918761955e-06, + "loss": 0.2842, + "step": 13680 + }, + { + "epoch": 0.640886307209444, + "grad_norm": 0.5685151567238342, + "learning_rate": 3.973127695166862e-06, + "loss": 0.2769, + "step": 13681 + }, + { + "epoch": 0.6409331521993723, + "grad_norm": 0.8105748156084511, + "learning_rate": 3.972974463094289e-06, + "loss": 0.2871, + "step": 13682 + }, + { + "epoch": 0.6409799971893007, + "grad_norm": 0.5643489460906184, + "learning_rate": 3.972821222545119e-06, + "loss": 0.2727, + "step": 13683 + }, + { + "epoch": 0.6410268421792289, + "grad_norm": 0.6458639266730776, + "learning_rate": 3.972667973520232e-06, + "loss": 0.3171, + "step": 13684 + }, + { + "epoch": 0.6410736871691572, + "grad_norm": 0.5661335283559711, + "learning_rate": 3.972514716020511e-06, + "loss": 0.2754, + "step": 13685 + }, + { + "epoch": 0.6411205321590856, + "grad_norm": 0.5497925122841858, + "learning_rate": 3.972361450046839e-06, + "loss": 0.2789, + "step": 13686 + }, + { + "epoch": 0.641167377149014, + "grad_norm": 0.6207108880871169, + "learning_rate": 3.972208175600096e-06, + "loss": 0.3034, + "step": 13687 + }, + { + "epoch": 0.6412142221389422, + "grad_norm": 0.6178671543251691, + "learning_rate": 3.972054892681165e-06, + "loss": 0.2875, + "step": 13688 + }, + { + "epoch": 0.6412610671288705, + "grad_norm": 0.6044026323256386, + "learning_rate": 3.971901601290928e-06, + "loss": 0.281, + "step": 13689 + }, + { + "epoch": 0.6413079121187989, + "grad_norm": 0.6458624071925886, + "learning_rate": 3.971748301430268e-06, + "loss": 0.3049, + "step": 13690 + }, + { + "epoch": 0.6413547571087272, + "grad_norm": 0.6207426623212942, + "learning_rate": 3.971594993100066e-06, + "loss": 0.2943, + "step": 13691 + }, + { + "epoch": 0.6414016020986556, + "grad_norm": 0.5928521823225321, + "learning_rate": 3.971441676301204e-06, + "loss": 0.2701, + "step": 13692 + }, + { + "epoch": 0.6414484470885838, + "grad_norm": 0.5835489927267791, + "learning_rate": 3.971288351034567e-06, + "loss": 0.2726, + "step": 13693 + }, + { + "epoch": 0.6414952920785122, + "grad_norm": 0.5860900269980874, + "learning_rate": 3.971135017301033e-06, + "loss": 0.2705, + "step": 13694 + }, + { + "epoch": 0.6415421370684405, + "grad_norm": 0.5773113827869021, + "learning_rate": 3.9709816751014876e-06, + "loss": 0.2699, + "step": 13695 + }, + { + "epoch": 0.6415889820583689, + "grad_norm": 0.5742850615489169, + "learning_rate": 3.970828324436813e-06, + "loss": 0.2753, + "step": 13696 + }, + { + "epoch": 0.6416358270482971, + "grad_norm": 0.5307134930795973, + "learning_rate": 3.97067496530789e-06, + "loss": 0.2609, + "step": 13697 + }, + { + "epoch": 0.6416826720382255, + "grad_norm": 0.6156614350853614, + "learning_rate": 3.970521597715603e-06, + "loss": 0.2878, + "step": 13698 + }, + { + "epoch": 0.6417295170281538, + "grad_norm": 0.6418675452495778, + "learning_rate": 3.970368221660834e-06, + "loss": 0.272, + "step": 13699 + }, + { + "epoch": 0.6417763620180822, + "grad_norm": 0.5542390990114148, + "learning_rate": 3.970214837144465e-06, + "loss": 0.2849, + "step": 13700 + }, + { + "epoch": 0.6418232070080105, + "grad_norm": 0.5646795867885114, + "learning_rate": 3.9700614441673794e-06, + "loss": 0.2754, + "step": 13701 + }, + { + "epoch": 0.6418700519979388, + "grad_norm": 0.5976681439893242, + "learning_rate": 3.969908042730459e-06, + "loss": 0.2972, + "step": 13702 + }, + { + "epoch": 0.6419168969878671, + "grad_norm": 0.5867010473994497, + "learning_rate": 3.969754632834588e-06, + "loss": 0.2834, + "step": 13703 + }, + { + "epoch": 0.6419637419777955, + "grad_norm": 0.5551952538504826, + "learning_rate": 3.969601214480648e-06, + "loss": 0.2649, + "step": 13704 + }, + { + "epoch": 0.6420105869677238, + "grad_norm": 0.5771265859893533, + "learning_rate": 3.969447787669522e-06, + "loss": 0.2999, + "step": 13705 + }, + { + "epoch": 0.6420574319576521, + "grad_norm": 0.6074502777102534, + "learning_rate": 3.969294352402094e-06, + "loss": 0.2764, + "step": 13706 + }, + { + "epoch": 0.6421042769475804, + "grad_norm": 0.6009835748108796, + "learning_rate": 3.969140908679246e-06, + "loss": 0.2628, + "step": 13707 + }, + { + "epoch": 0.6421511219375088, + "grad_norm": 0.6037522807106274, + "learning_rate": 3.968987456501862e-06, + "loss": 0.2789, + "step": 13708 + }, + { + "epoch": 0.6421979669274371, + "grad_norm": 0.5660408397497716, + "learning_rate": 3.968833995870824e-06, + "loss": 0.2699, + "step": 13709 + }, + { + "epoch": 0.6422448119173655, + "grad_norm": 0.5922390288603402, + "learning_rate": 3.968680526787015e-06, + "loss": 0.3032, + "step": 13710 + }, + { + "epoch": 0.6422916569072937, + "grad_norm": 0.5815974037287462, + "learning_rate": 3.968527049251319e-06, + "loss": 0.2739, + "step": 13711 + }, + { + "epoch": 0.6423385018972221, + "grad_norm": 0.6257597539841684, + "learning_rate": 3.968373563264619e-06, + "loss": 0.2803, + "step": 13712 + }, + { + "epoch": 0.6423853468871504, + "grad_norm": 0.6525816731483384, + "learning_rate": 3.9682200688277984e-06, + "loss": 0.288, + "step": 13713 + }, + { + "epoch": 0.6424321918770788, + "grad_norm": 0.5353119739223096, + "learning_rate": 3.9680665659417405e-06, + "loss": 0.2696, + "step": 13714 + }, + { + "epoch": 0.642479036867007, + "grad_norm": 0.574572098865155, + "learning_rate": 3.967913054607328e-06, + "loss": 0.2818, + "step": 13715 + }, + { + "epoch": 0.6425258818569354, + "grad_norm": 0.6058766919191022, + "learning_rate": 3.967759534825446e-06, + "loss": 0.2742, + "step": 13716 + }, + { + "epoch": 0.6425727268468637, + "grad_norm": 0.6058316372378244, + "learning_rate": 3.967606006596976e-06, + "loss": 0.2963, + "step": 13717 + }, + { + "epoch": 0.6426195718367921, + "grad_norm": 0.5933065365667295, + "learning_rate": 3.967452469922801e-06, + "loss": 0.2881, + "step": 13718 + }, + { + "epoch": 0.6426664168267204, + "grad_norm": 0.6273534783878277, + "learning_rate": 3.967298924803807e-06, + "loss": 0.306, + "step": 13719 + }, + { + "epoch": 0.6427132618166487, + "grad_norm": 0.5465646080433685, + "learning_rate": 3.967145371240877e-06, + "loss": 0.2729, + "step": 13720 + }, + { + "epoch": 0.642760106806577, + "grad_norm": 0.6542001586595054, + "learning_rate": 3.966991809234894e-06, + "loss": 0.3074, + "step": 13721 + }, + { + "epoch": 0.6428069517965054, + "grad_norm": 0.5775143520849287, + "learning_rate": 3.966838238786741e-06, + "loss": 0.2846, + "step": 13722 + }, + { + "epoch": 0.6428537967864337, + "grad_norm": 0.607256579415405, + "learning_rate": 3.966684659897303e-06, + "loss": 0.2888, + "step": 13723 + }, + { + "epoch": 0.642900641776362, + "grad_norm": 0.6008881298589833, + "learning_rate": 3.966531072567463e-06, + "loss": 0.2884, + "step": 13724 + }, + { + "epoch": 0.6429474867662903, + "grad_norm": 0.6085082042403537, + "learning_rate": 3.9663774767981065e-06, + "loss": 0.3007, + "step": 13725 + }, + { + "epoch": 0.6429943317562187, + "grad_norm": 0.5782062119628953, + "learning_rate": 3.966223872590116e-06, + "loss": 0.2634, + "step": 13726 + }, + { + "epoch": 0.643041176746147, + "grad_norm": 0.59834194603078, + "learning_rate": 3.966070259944375e-06, + "loss": 0.2885, + "step": 13727 + }, + { + "epoch": 0.6430880217360754, + "grad_norm": 0.6085148599578977, + "learning_rate": 3.965916638861768e-06, + "loss": 0.2919, + "step": 13728 + }, + { + "epoch": 0.6431348667260036, + "grad_norm": 0.5768789941581058, + "learning_rate": 3.965763009343179e-06, + "loss": 0.2909, + "step": 13729 + }, + { + "epoch": 0.643181711715932, + "grad_norm": 0.5607862437697082, + "learning_rate": 3.965609371389493e-06, + "loss": 0.2871, + "step": 13730 + }, + { + "epoch": 0.6432285567058603, + "grad_norm": 0.5370197039878193, + "learning_rate": 3.965455725001592e-06, + "loss": 0.2471, + "step": 13731 + }, + { + "epoch": 0.6432754016957887, + "grad_norm": 0.5894725975199426, + "learning_rate": 3.965302070180364e-06, + "loss": 0.2866, + "step": 13732 + }, + { + "epoch": 0.6433222466857169, + "grad_norm": 0.5877153918087462, + "learning_rate": 3.965148406926689e-06, + "loss": 0.2932, + "step": 13733 + }, + { + "epoch": 0.6433690916756453, + "grad_norm": 0.5756621861265867, + "learning_rate": 3.964994735241454e-06, + "loss": 0.2845, + "step": 13734 + }, + { + "epoch": 0.6434159366655736, + "grad_norm": 0.7164766460069718, + "learning_rate": 3.964841055125542e-06, + "loss": 0.3057, + "step": 13735 + }, + { + "epoch": 0.643462781655502, + "grad_norm": 0.5949122159646004, + "learning_rate": 3.964687366579838e-06, + "loss": 0.2755, + "step": 13736 + }, + { + "epoch": 0.6435096266454303, + "grad_norm": 0.6100296283885394, + "learning_rate": 3.964533669605227e-06, + "loss": 0.2817, + "step": 13737 + }, + { + "epoch": 0.6435564716353586, + "grad_norm": 0.5672892383195073, + "learning_rate": 3.964379964202593e-06, + "loss": 0.2714, + "step": 13738 + }, + { + "epoch": 0.6436033166252869, + "grad_norm": 0.5997496603339375, + "learning_rate": 3.964226250372819e-06, + "loss": 0.2978, + "step": 13739 + }, + { + "epoch": 0.6436501616152153, + "grad_norm": 0.5866646089577395, + "learning_rate": 3.964072528116792e-06, + "loss": 0.3096, + "step": 13740 + }, + { + "epoch": 0.6436970066051436, + "grad_norm": 0.5635622236017213, + "learning_rate": 3.963918797435395e-06, + "loss": 0.2673, + "step": 13741 + }, + { + "epoch": 0.6437438515950719, + "grad_norm": 0.5893750525226691, + "learning_rate": 3.963765058329514e-06, + "loss": 0.2636, + "step": 13742 + }, + { + "epoch": 0.6437906965850002, + "grad_norm": 0.5493523175999735, + "learning_rate": 3.963611310800032e-06, + "loss": 0.2738, + "step": 13743 + }, + { + "epoch": 0.6438375415749286, + "grad_norm": 0.6269128157276967, + "learning_rate": 3.963457554847835e-06, + "loss": 0.3039, + "step": 13744 + }, + { + "epoch": 0.6438843865648569, + "grad_norm": 0.5523566770920441, + "learning_rate": 3.9633037904738084e-06, + "loss": 0.2837, + "step": 13745 + }, + { + "epoch": 0.6439312315547853, + "grad_norm": 0.5859052635782604, + "learning_rate": 3.963150017678836e-06, + "loss": 0.2843, + "step": 13746 + }, + { + "epoch": 0.6439780765447135, + "grad_norm": 0.5786724295928382, + "learning_rate": 3.962996236463802e-06, + "loss": 0.2952, + "step": 13747 + }, + { + "epoch": 0.6440249215346419, + "grad_norm": 0.5341394858192114, + "learning_rate": 3.962842446829594e-06, + "loss": 0.2782, + "step": 13748 + }, + { + "epoch": 0.6440717665245702, + "grad_norm": 0.6430054229624946, + "learning_rate": 3.962688648777094e-06, + "loss": 0.2978, + "step": 13749 + }, + { + "epoch": 0.6441186115144986, + "grad_norm": 0.6051679965972674, + "learning_rate": 3.962534842307189e-06, + "loss": 0.2916, + "step": 13750 + }, + { + "epoch": 0.6441654565044268, + "grad_norm": 0.5617997922499699, + "learning_rate": 3.962381027420764e-06, + "loss": 0.2776, + "step": 13751 + }, + { + "epoch": 0.6442123014943552, + "grad_norm": 0.5690950436376916, + "learning_rate": 3.962227204118702e-06, + "loss": 0.2772, + "step": 13752 + }, + { + "epoch": 0.6442591464842835, + "grad_norm": 0.5796588255797347, + "learning_rate": 3.962073372401893e-06, + "loss": 0.2584, + "step": 13753 + }, + { + "epoch": 0.6443059914742119, + "grad_norm": 0.5620998287060566, + "learning_rate": 3.961919532271217e-06, + "loss": 0.2854, + "step": 13754 + }, + { + "epoch": 0.6443528364641402, + "grad_norm": 0.5754451505411388, + "learning_rate": 3.961765683727562e-06, + "loss": 0.2718, + "step": 13755 + }, + { + "epoch": 0.6443996814540685, + "grad_norm": 0.5827675460368266, + "learning_rate": 3.961611826771813e-06, + "loss": 0.2778, + "step": 13756 + }, + { + "epoch": 0.6444465264439968, + "grad_norm": 0.5754851337435177, + "learning_rate": 3.961457961404855e-06, + "loss": 0.2775, + "step": 13757 + }, + { + "epoch": 0.6444933714339252, + "grad_norm": 0.6270085804006551, + "learning_rate": 3.961304087627574e-06, + "loss": 0.3017, + "step": 13758 + }, + { + "epoch": 0.6445402164238535, + "grad_norm": 0.6174408142801037, + "learning_rate": 3.961150205440854e-06, + "loss": 0.2836, + "step": 13759 + }, + { + "epoch": 0.6445870614137817, + "grad_norm": 0.6103657498947168, + "learning_rate": 3.960996314845584e-06, + "loss": 0.2843, + "step": 13760 + }, + { + "epoch": 0.6446339064037101, + "grad_norm": 0.6074051891140889, + "learning_rate": 3.960842415842646e-06, + "loss": 0.2705, + "step": 13761 + }, + { + "epoch": 0.6446807513936385, + "grad_norm": 0.5733857383925117, + "learning_rate": 3.960688508432928e-06, + "loss": 0.2526, + "step": 13762 + }, + { + "epoch": 0.6447275963835668, + "grad_norm": 0.6929095504214725, + "learning_rate": 3.960534592617315e-06, + "loss": 0.3253, + "step": 13763 + }, + { + "epoch": 0.6447744413734952, + "grad_norm": 0.6442462885240633, + "learning_rate": 3.960380668396691e-06, + "loss": 0.2794, + "step": 13764 + }, + { + "epoch": 0.6448212863634234, + "grad_norm": 0.6443074913588004, + "learning_rate": 3.960226735771943e-06, + "loss": 0.3028, + "step": 13765 + }, + { + "epoch": 0.6448681313533517, + "grad_norm": 0.5912375313167865, + "learning_rate": 3.96007279474396e-06, + "loss": 0.3079, + "step": 13766 + }, + { + "epoch": 0.6449149763432801, + "grad_norm": 0.5864877124155982, + "learning_rate": 3.959918845313623e-06, + "loss": 0.2649, + "step": 13767 + }, + { + "epoch": 0.6449618213332085, + "grad_norm": 0.5621023675309644, + "learning_rate": 3.95976488748182e-06, + "loss": 0.2553, + "step": 13768 + }, + { + "epoch": 0.6450086663231367, + "grad_norm": 0.619494231384424, + "learning_rate": 3.959610921249437e-06, + "loss": 0.2979, + "step": 13769 + }, + { + "epoch": 0.645055511313065, + "grad_norm": 0.6234437715856117, + "learning_rate": 3.95945694661736e-06, + "loss": 0.2815, + "step": 13770 + }, + { + "epoch": 0.6451023563029934, + "grad_norm": 0.6173283185728367, + "learning_rate": 3.959302963586476e-06, + "loss": 0.2976, + "step": 13771 + }, + { + "epoch": 0.6451492012929217, + "grad_norm": 0.6031360304070778, + "learning_rate": 3.959148972157669e-06, + "loss": 0.2604, + "step": 13772 + }, + { + "epoch": 0.6451960462828501, + "grad_norm": 0.5640911188802378, + "learning_rate": 3.958994972331827e-06, + "loss": 0.2792, + "step": 13773 + }, + { + "epoch": 0.6452428912727783, + "grad_norm": 0.5680570976134236, + "learning_rate": 3.958840964109836e-06, + "loss": 0.2969, + "step": 13774 + }, + { + "epoch": 0.6452897362627067, + "grad_norm": 0.6216966095053764, + "learning_rate": 3.958686947492582e-06, + "loss": 0.2973, + "step": 13775 + }, + { + "epoch": 0.645336581252635, + "grad_norm": 0.5725927734796615, + "learning_rate": 3.9585329224809505e-06, + "loss": 0.2774, + "step": 13776 + }, + { + "epoch": 0.6453834262425634, + "grad_norm": 0.5612656179701131, + "learning_rate": 3.958378889075829e-06, + "loss": 0.2806, + "step": 13777 + }, + { + "epoch": 0.6454302712324916, + "grad_norm": 0.5713121975181924, + "learning_rate": 3.9582248472781035e-06, + "loss": 0.2631, + "step": 13778 + }, + { + "epoch": 0.64547711622242, + "grad_norm": 0.5323746874462213, + "learning_rate": 3.95807079708866e-06, + "loss": 0.2714, + "step": 13779 + }, + { + "epoch": 0.6455239612123483, + "grad_norm": 0.6083174459174214, + "learning_rate": 3.957916738508386e-06, + "loss": 0.2912, + "step": 13780 + }, + { + "epoch": 0.6455708062022767, + "grad_norm": 0.6194986538880283, + "learning_rate": 3.957762671538168e-06, + "loss": 0.2972, + "step": 13781 + }, + { + "epoch": 0.645617651192205, + "grad_norm": 0.6145198931725171, + "learning_rate": 3.957608596178891e-06, + "loss": 0.2887, + "step": 13782 + }, + { + "epoch": 0.6456644961821333, + "grad_norm": 0.5688237216490135, + "learning_rate": 3.957454512431445e-06, + "loss": 0.3027, + "step": 13783 + }, + { + "epoch": 0.6457113411720616, + "grad_norm": 0.5805704911918916, + "learning_rate": 3.957300420296713e-06, + "loss": 0.2847, + "step": 13784 + }, + { + "epoch": 0.64575818616199, + "grad_norm": 0.5444000658836886, + "learning_rate": 3.957146319775583e-06, + "loss": 0.2792, + "step": 13785 + }, + { + "epoch": 0.6458050311519183, + "grad_norm": 0.5740580072661847, + "learning_rate": 3.956992210868943e-06, + "loss": 0.2976, + "step": 13786 + }, + { + "epoch": 0.6458518761418466, + "grad_norm": 0.6341919302585713, + "learning_rate": 3.956838093577679e-06, + "loss": 0.28, + "step": 13787 + }, + { + "epoch": 0.6458987211317749, + "grad_norm": 0.5589173061097568, + "learning_rate": 3.956683967902678e-06, + "loss": 0.2737, + "step": 13788 + }, + { + "epoch": 0.6459455661217033, + "grad_norm": 0.602788728526704, + "learning_rate": 3.956529833844827e-06, + "loss": 0.2914, + "step": 13789 + }, + { + "epoch": 0.6459924111116316, + "grad_norm": 0.5924080548006032, + "learning_rate": 3.956375691405012e-06, + "loss": 0.2944, + "step": 13790 + }, + { + "epoch": 0.64603925610156, + "grad_norm": 0.5446391707154924, + "learning_rate": 3.956221540584122e-06, + "loss": 0.2725, + "step": 13791 + }, + { + "epoch": 0.6460861010914882, + "grad_norm": 0.6031563935422198, + "learning_rate": 3.956067381383042e-06, + "loss": 0.2867, + "step": 13792 + }, + { + "epoch": 0.6461329460814166, + "grad_norm": 0.5545743496179194, + "learning_rate": 3.955913213802661e-06, + "loss": 0.278, + "step": 13793 + }, + { + "epoch": 0.6461797910713449, + "grad_norm": 0.6169668176072926, + "learning_rate": 3.955759037843865e-06, + "loss": 0.305, + "step": 13794 + }, + { + "epoch": 0.6462266360612733, + "grad_norm": 0.5797026651429625, + "learning_rate": 3.955604853507542e-06, + "loss": 0.2761, + "step": 13795 + }, + { + "epoch": 0.6462734810512015, + "grad_norm": 0.605502880611484, + "learning_rate": 3.9554506607945786e-06, + "loss": 0.2851, + "step": 13796 + }, + { + "epoch": 0.6463203260411299, + "grad_norm": 0.619264425966528, + "learning_rate": 3.955296459705862e-06, + "loss": 0.2759, + "step": 13797 + }, + { + "epoch": 0.6463671710310582, + "grad_norm": 0.6611457248952445, + "learning_rate": 3.955142250242281e-06, + "loss": 0.2946, + "step": 13798 + }, + { + "epoch": 0.6464140160209866, + "grad_norm": 0.5519781728414852, + "learning_rate": 3.954988032404722e-06, + "loss": 0.2843, + "step": 13799 + }, + { + "epoch": 0.6464608610109149, + "grad_norm": 0.5710857100457343, + "learning_rate": 3.954833806194072e-06, + "loss": 0.275, + "step": 13800 + }, + { + "epoch": 0.6465077060008432, + "grad_norm": 0.5661111343380697, + "learning_rate": 3.954679571611219e-06, + "loss": 0.2738, + "step": 13801 + }, + { + "epoch": 0.6465545509907715, + "grad_norm": 0.5946106682722774, + "learning_rate": 3.954525328657051e-06, + "loss": 0.285, + "step": 13802 + }, + { + "epoch": 0.6466013959806999, + "grad_norm": 0.5381024894209583, + "learning_rate": 3.954371077332456e-06, + "loss": 0.2686, + "step": 13803 + }, + { + "epoch": 0.6466482409706282, + "grad_norm": 0.610225141966667, + "learning_rate": 3.95421681763832e-06, + "loss": 0.2827, + "step": 13804 + }, + { + "epoch": 0.6466950859605565, + "grad_norm": 0.5778104665373857, + "learning_rate": 3.954062549575532e-06, + "loss": 0.2855, + "step": 13805 + }, + { + "epoch": 0.6467419309504848, + "grad_norm": 0.5915715138342753, + "learning_rate": 3.953908273144979e-06, + "loss": 0.2789, + "step": 13806 + }, + { + "epoch": 0.6467887759404132, + "grad_norm": 0.5496467259212017, + "learning_rate": 3.953753988347551e-06, + "loss": 0.2872, + "step": 13807 + }, + { + "epoch": 0.6468356209303415, + "grad_norm": 0.609360831812123, + "learning_rate": 3.953599695184132e-06, + "loss": 0.2752, + "step": 13808 + }, + { + "epoch": 0.6468824659202699, + "grad_norm": 0.5974218151398752, + "learning_rate": 3.953445393655614e-06, + "loss": 0.2823, + "step": 13809 + }, + { + "epoch": 0.6469293109101981, + "grad_norm": 0.59358574496529, + "learning_rate": 3.953291083762882e-06, + "loss": 0.2895, + "step": 13810 + }, + { + "epoch": 0.6469761559001265, + "grad_norm": 0.580060381701432, + "learning_rate": 3.9531367655068245e-06, + "loss": 0.2705, + "step": 13811 + }, + { + "epoch": 0.6470230008900548, + "grad_norm": 0.5746360863558982, + "learning_rate": 3.952982438888332e-06, + "loss": 0.2604, + "step": 13812 + }, + { + "epoch": 0.6470698458799832, + "grad_norm": 0.6355650951494118, + "learning_rate": 3.952828103908289e-06, + "loss": 0.2892, + "step": 13813 + }, + { + "epoch": 0.6471166908699114, + "grad_norm": 0.545555353183869, + "learning_rate": 3.952673760567587e-06, + "loss": 0.2834, + "step": 13814 + }, + { + "epoch": 0.6471635358598398, + "grad_norm": 0.6197554106697788, + "learning_rate": 3.952519408867112e-06, + "loss": 0.2759, + "step": 13815 + }, + { + "epoch": 0.6472103808497681, + "grad_norm": 0.5956228110661409, + "learning_rate": 3.952365048807752e-06, + "loss": 0.2891, + "step": 13816 + }, + { + "epoch": 0.6472572258396965, + "grad_norm": 0.5993919440705116, + "learning_rate": 3.9522106803903965e-06, + "loss": 0.2926, + "step": 13817 + }, + { + "epoch": 0.6473040708296248, + "grad_norm": 0.5693660348544072, + "learning_rate": 3.952056303615935e-06, + "loss": 0.2833, + "step": 13818 + }, + { + "epoch": 0.6473509158195531, + "grad_norm": 0.5788233227912862, + "learning_rate": 3.951901918485252e-06, + "loss": 0.2734, + "step": 13819 + }, + { + "epoch": 0.6473977608094814, + "grad_norm": 0.6044575820835194, + "learning_rate": 3.9517475249992395e-06, + "loss": 0.2805, + "step": 13820 + }, + { + "epoch": 0.6474446057994098, + "grad_norm": 0.5878736637712263, + "learning_rate": 3.951593123158786e-06, + "loss": 0.282, + "step": 13821 + }, + { + "epoch": 0.6474914507893381, + "grad_norm": 0.5606420822962983, + "learning_rate": 3.951438712964777e-06, + "loss": 0.2783, + "step": 13822 + }, + { + "epoch": 0.6475382957792664, + "grad_norm": 0.6080182442141487, + "learning_rate": 3.951284294418104e-06, + "loss": 0.2734, + "step": 13823 + }, + { + "epoch": 0.6475851407691947, + "grad_norm": 0.6340813673620468, + "learning_rate": 3.951129867519655e-06, + "loss": 0.2869, + "step": 13824 + }, + { + "epoch": 0.6476319857591231, + "grad_norm": 0.618066649405146, + "learning_rate": 3.950975432270318e-06, + "loss": 0.2979, + "step": 13825 + }, + { + "epoch": 0.6476788307490514, + "grad_norm": 0.6371859060121773, + "learning_rate": 3.950820988670982e-06, + "loss": 0.2909, + "step": 13826 + }, + { + "epoch": 0.6477256757389798, + "grad_norm": 0.5327673140810265, + "learning_rate": 3.950666536722536e-06, + "loss": 0.2499, + "step": 13827 + }, + { + "epoch": 0.647772520728908, + "grad_norm": 0.5761664738191195, + "learning_rate": 3.9505120764258686e-06, + "loss": 0.2677, + "step": 13828 + }, + { + "epoch": 0.6478193657188364, + "grad_norm": 0.6626952104848968, + "learning_rate": 3.9503576077818686e-06, + "loss": 0.2649, + "step": 13829 + }, + { + "epoch": 0.6478662107087647, + "grad_norm": 0.615856477692264, + "learning_rate": 3.950203130791426e-06, + "loss": 0.2738, + "step": 13830 + }, + { + "epoch": 0.6479130556986931, + "grad_norm": 0.5775009258855265, + "learning_rate": 3.950048645455428e-06, + "loss": 0.2641, + "step": 13831 + }, + { + "epoch": 0.6479599006886213, + "grad_norm": 0.5789102840160334, + "learning_rate": 3.949894151774764e-06, + "loss": 0.2825, + "step": 13832 + }, + { + "epoch": 0.6480067456785497, + "grad_norm": 0.5783706244231325, + "learning_rate": 3.9497396497503244e-06, + "loss": 0.2743, + "step": 13833 + }, + { + "epoch": 0.648053590668478, + "grad_norm": 0.6202923145946374, + "learning_rate": 3.949585139382998e-06, + "loss": 0.2873, + "step": 13834 + }, + { + "epoch": 0.6481004356584064, + "grad_norm": 0.6273032636174083, + "learning_rate": 3.949430620673672e-06, + "loss": 0.2976, + "step": 13835 + }, + { + "epoch": 0.6481472806483347, + "grad_norm": 0.6016265244166351, + "learning_rate": 3.9492760936232376e-06, + "loss": 0.2931, + "step": 13836 + }, + { + "epoch": 0.648194125638263, + "grad_norm": 0.6025643068051739, + "learning_rate": 3.949121558232584e-06, + "loss": 0.2719, + "step": 13837 + }, + { + "epoch": 0.6482409706281913, + "grad_norm": 0.5921950577542816, + "learning_rate": 3.9489670145025995e-06, + "loss": 0.2856, + "step": 13838 + }, + { + "epoch": 0.6482878156181197, + "grad_norm": 0.6703403021733246, + "learning_rate": 3.948812462434175e-06, + "loss": 0.2785, + "step": 13839 + }, + { + "epoch": 0.648334660608048, + "grad_norm": 0.6601222869199941, + "learning_rate": 3.948657902028198e-06, + "loss": 0.3026, + "step": 13840 + }, + { + "epoch": 0.6483815055979762, + "grad_norm": 0.6011193195098893, + "learning_rate": 3.948503333285559e-06, + "loss": 0.3028, + "step": 13841 + }, + { + "epoch": 0.6484283505879046, + "grad_norm": 0.5194612111999413, + "learning_rate": 3.948348756207148e-06, + "loss": 0.2456, + "step": 13842 + }, + { + "epoch": 0.648475195577833, + "grad_norm": 0.5920330893160678, + "learning_rate": 3.948194170793853e-06, + "loss": 0.2625, + "step": 13843 + }, + { + "epoch": 0.6485220405677613, + "grad_norm": 0.6198970157744007, + "learning_rate": 3.948039577046565e-06, + "loss": 0.2904, + "step": 13844 + }, + { + "epoch": 0.6485688855576897, + "grad_norm": 0.6188371651281801, + "learning_rate": 3.947884974966174e-06, + "loss": 0.2652, + "step": 13845 + }, + { + "epoch": 0.6486157305476179, + "grad_norm": 0.5749126104753848, + "learning_rate": 3.947730364553568e-06, + "loss": 0.2754, + "step": 13846 + }, + { + "epoch": 0.6486625755375462, + "grad_norm": 0.6331723321085001, + "learning_rate": 3.947575745809638e-06, + "loss": 0.2725, + "step": 13847 + }, + { + "epoch": 0.6487094205274746, + "grad_norm": 0.5897070965757362, + "learning_rate": 3.947421118735273e-06, + "loss": 0.2857, + "step": 13848 + }, + { + "epoch": 0.648756265517403, + "grad_norm": 0.5291870077420833, + "learning_rate": 3.947266483331363e-06, + "loss": 0.2723, + "step": 13849 + }, + { + "epoch": 0.6488031105073312, + "grad_norm": 0.5940999229832034, + "learning_rate": 3.947111839598798e-06, + "loss": 0.2762, + "step": 13850 + }, + { + "epoch": 0.6488499554972595, + "grad_norm": 0.5781864620464741, + "learning_rate": 3.946957187538469e-06, + "loss": 0.2779, + "step": 13851 + }, + { + "epoch": 0.6488968004871879, + "grad_norm": 0.5917747252243066, + "learning_rate": 3.9468025271512644e-06, + "loss": 0.2912, + "step": 13852 + }, + { + "epoch": 0.6489436454771162, + "grad_norm": 0.5805813573377855, + "learning_rate": 3.9466478584380755e-06, + "loss": 0.3011, + "step": 13853 + }, + { + "epoch": 0.6489904904670446, + "grad_norm": 0.5551323525325463, + "learning_rate": 3.946493181399791e-06, + "loss": 0.2839, + "step": 13854 + }, + { + "epoch": 0.6490373354569728, + "grad_norm": 0.5988700491950811, + "learning_rate": 3.946338496037303e-06, + "loss": 0.2817, + "step": 13855 + }, + { + "epoch": 0.6490841804469012, + "grad_norm": 0.5706057684484818, + "learning_rate": 3.9461838023515e-06, + "loss": 0.285, + "step": 13856 + }, + { + "epoch": 0.6491310254368295, + "grad_norm": 0.6075532569812192, + "learning_rate": 3.946029100343272e-06, + "loss": 0.2825, + "step": 13857 + }, + { + "epoch": 0.6491778704267579, + "grad_norm": 0.5789658218651134, + "learning_rate": 3.9458743900135104e-06, + "loss": 0.2781, + "step": 13858 + }, + { + "epoch": 0.6492247154166861, + "grad_norm": 0.608365611654585, + "learning_rate": 3.945719671363105e-06, + "loss": 0.2845, + "step": 13859 + }, + { + "epoch": 0.6492715604066145, + "grad_norm": 0.6733793543726716, + "learning_rate": 3.945564944392947e-06, + "loss": 0.2986, + "step": 13860 + }, + { + "epoch": 0.6493184053965428, + "grad_norm": 0.5479854755073608, + "learning_rate": 3.945410209103925e-06, + "loss": 0.2772, + "step": 13861 + }, + { + "epoch": 0.6493652503864712, + "grad_norm": 0.6347946182786129, + "learning_rate": 3.945255465496931e-06, + "loss": 0.3163, + "step": 13862 + }, + { + "epoch": 0.6494120953763995, + "grad_norm": 0.5993720698527395, + "learning_rate": 3.945100713572855e-06, + "loss": 0.2938, + "step": 13863 + }, + { + "epoch": 0.6494589403663278, + "grad_norm": 0.6112633488460721, + "learning_rate": 3.944945953332588e-06, + "loss": 0.2875, + "step": 13864 + }, + { + "epoch": 0.6495057853562561, + "grad_norm": 0.55048670764339, + "learning_rate": 3.944791184777019e-06, + "loss": 0.2606, + "step": 13865 + }, + { + "epoch": 0.6495526303461845, + "grad_norm": 0.6252617733305688, + "learning_rate": 3.944636407907041e-06, + "loss": 0.2844, + "step": 13866 + }, + { + "epoch": 0.6495994753361128, + "grad_norm": 0.573434517443023, + "learning_rate": 3.944481622723544e-06, + "loss": 0.2607, + "step": 13867 + }, + { + "epoch": 0.6496463203260411, + "grad_norm": 0.6351262962440395, + "learning_rate": 3.944326829227418e-06, + "loss": 0.2775, + "step": 13868 + }, + { + "epoch": 0.6496931653159694, + "grad_norm": 0.5860218883778255, + "learning_rate": 3.944172027419553e-06, + "loss": 0.2852, + "step": 13869 + }, + { + "epoch": 0.6497400103058978, + "grad_norm": 0.5977060232389916, + "learning_rate": 3.9440172173008415e-06, + "loss": 0.2616, + "step": 13870 + }, + { + "epoch": 0.6497868552958261, + "grad_norm": 0.5584509379663354, + "learning_rate": 3.9438623988721735e-06, + "loss": 0.287, + "step": 13871 + }, + { + "epoch": 0.6498337002857545, + "grad_norm": 0.5763194069760168, + "learning_rate": 3.943707572134441e-06, + "loss": 0.2915, + "step": 13872 + }, + { + "epoch": 0.6498805452756827, + "grad_norm": 0.6082787098283935, + "learning_rate": 3.943552737088534e-06, + "loss": 0.2746, + "step": 13873 + }, + { + "epoch": 0.6499273902656111, + "grad_norm": 0.6516206253975741, + "learning_rate": 3.943397893735344e-06, + "loss": 0.2907, + "step": 13874 + }, + { + "epoch": 0.6499742352555394, + "grad_norm": 0.6395394279423103, + "learning_rate": 3.943243042075762e-06, + "loss": 0.2887, + "step": 13875 + }, + { + "epoch": 0.6500210802454678, + "grad_norm": 0.6172480715252022, + "learning_rate": 3.943088182110679e-06, + "loss": 0.2923, + "step": 13876 + }, + { + "epoch": 0.650067925235396, + "grad_norm": 0.5761866022360699, + "learning_rate": 3.942933313840987e-06, + "loss": 0.3016, + "step": 13877 + }, + { + "epoch": 0.6501147702253244, + "grad_norm": 0.6692486120384648, + "learning_rate": 3.9427784372675745e-06, + "loss": 0.2715, + "step": 13878 + }, + { + "epoch": 0.6501616152152527, + "grad_norm": 0.5925420138404952, + "learning_rate": 3.942623552391336e-06, + "loss": 0.2804, + "step": 13879 + }, + { + "epoch": 0.6502084602051811, + "grad_norm": 0.5739461841522875, + "learning_rate": 3.942468659213161e-06, + "loss": 0.2858, + "step": 13880 + }, + { + "epoch": 0.6502553051951094, + "grad_norm": 0.6337630899419755, + "learning_rate": 3.942313757733942e-06, + "loss": 0.2868, + "step": 13881 + }, + { + "epoch": 0.6503021501850377, + "grad_norm": 0.6002262428773345, + "learning_rate": 3.942158847954569e-06, + "loss": 0.2779, + "step": 13882 + }, + { + "epoch": 0.650348995174966, + "grad_norm": 0.5160338845067495, + "learning_rate": 3.942003929875935e-06, + "loss": 0.259, + "step": 13883 + }, + { + "epoch": 0.6503958401648944, + "grad_norm": 0.5518795034437538, + "learning_rate": 3.941849003498931e-06, + "loss": 0.2879, + "step": 13884 + }, + { + "epoch": 0.6504426851548227, + "grad_norm": 0.5813311763806408, + "learning_rate": 3.9416940688244485e-06, + "loss": 0.2917, + "step": 13885 + }, + { + "epoch": 0.650489530144751, + "grad_norm": 0.5749701886008046, + "learning_rate": 3.941539125853378e-06, + "loss": 0.2867, + "step": 13886 + }, + { + "epoch": 0.6505363751346793, + "grad_norm": 0.5886809327261588, + "learning_rate": 3.941384174586612e-06, + "loss": 0.2807, + "step": 13887 + }, + { + "epoch": 0.6505832201246077, + "grad_norm": 0.6106859677910915, + "learning_rate": 3.9412292150250445e-06, + "loss": 0.2735, + "step": 13888 + }, + { + "epoch": 0.650630065114536, + "grad_norm": 0.5932916370632259, + "learning_rate": 3.941074247169563e-06, + "loss": 0.2864, + "step": 13889 + }, + { + "epoch": 0.6506769101044644, + "grad_norm": 0.6398921383667474, + "learning_rate": 3.940919271021062e-06, + "loss": 0.2879, + "step": 13890 + }, + { + "epoch": 0.6507237550943926, + "grad_norm": 0.6399573847960348, + "learning_rate": 3.940764286580433e-06, + "loss": 0.3065, + "step": 13891 + }, + { + "epoch": 0.650770600084321, + "grad_norm": 0.6495636789967867, + "learning_rate": 3.9406092938485675e-06, + "loss": 0.2977, + "step": 13892 + }, + { + "epoch": 0.6508174450742493, + "grad_norm": 0.5849408603693889, + "learning_rate": 3.940454292826357e-06, + "loss": 0.2878, + "step": 13893 + }, + { + "epoch": 0.6508642900641777, + "grad_norm": 0.6056535065756267, + "learning_rate": 3.940299283514696e-06, + "loss": 0.2886, + "step": 13894 + }, + { + "epoch": 0.6509111350541059, + "grad_norm": 0.5898438709400929, + "learning_rate": 3.940144265914473e-06, + "loss": 0.2753, + "step": 13895 + }, + { + "epoch": 0.6509579800440343, + "grad_norm": 0.5814380719421468, + "learning_rate": 3.939989240026582e-06, + "loss": 0.2693, + "step": 13896 + }, + { + "epoch": 0.6510048250339626, + "grad_norm": 0.577408790688595, + "learning_rate": 3.939834205851915e-06, + "loss": 0.2923, + "step": 13897 + }, + { + "epoch": 0.651051670023891, + "grad_norm": 0.6102909421120642, + "learning_rate": 3.939679163391364e-06, + "loss": 0.2964, + "step": 13898 + }, + { + "epoch": 0.6510985150138193, + "grad_norm": 0.5642518305124018, + "learning_rate": 3.93952411264582e-06, + "loss": 0.2959, + "step": 13899 + }, + { + "epoch": 0.6511453600037476, + "grad_norm": 0.5554406820238044, + "learning_rate": 3.9393690536161786e-06, + "loss": 0.2776, + "step": 13900 + }, + { + "epoch": 0.6511922049936759, + "grad_norm": 0.5624021966776941, + "learning_rate": 3.9392139863033285e-06, + "loss": 0.2769, + "step": 13901 + }, + { + "epoch": 0.6512390499836043, + "grad_norm": 0.5527520463313538, + "learning_rate": 3.939058910708165e-06, + "loss": 0.2604, + "step": 13902 + }, + { + "epoch": 0.6512858949735326, + "grad_norm": 0.5774095590399998, + "learning_rate": 3.938903826831579e-06, + "loss": 0.2762, + "step": 13903 + }, + { + "epoch": 0.6513327399634609, + "grad_norm": 0.6493113149190471, + "learning_rate": 3.938748734674461e-06, + "loss": 0.2953, + "step": 13904 + }, + { + "epoch": 0.6513795849533892, + "grad_norm": 0.5829146019325488, + "learning_rate": 3.938593634237707e-06, + "loss": 0.2927, + "step": 13905 + }, + { + "epoch": 0.6514264299433176, + "grad_norm": 0.6057820738001265, + "learning_rate": 3.938438525522209e-06, + "loss": 0.2881, + "step": 13906 + }, + { + "epoch": 0.6514732749332459, + "grad_norm": 0.5462359484067486, + "learning_rate": 3.938283408528859e-06, + "loss": 0.2833, + "step": 13907 + }, + { + "epoch": 0.6515201199231743, + "grad_norm": 0.6076336954187659, + "learning_rate": 3.938128283258548e-06, + "loss": 0.2667, + "step": 13908 + }, + { + "epoch": 0.6515669649131025, + "grad_norm": 0.5849392125159955, + "learning_rate": 3.937973149712172e-06, + "loss": 0.2867, + "step": 13909 + }, + { + "epoch": 0.6516138099030309, + "grad_norm": 0.6314946386716926, + "learning_rate": 3.93781800789062e-06, + "loss": 0.2904, + "step": 13910 + }, + { + "epoch": 0.6516606548929592, + "grad_norm": 0.607959570862781, + "learning_rate": 3.937662857794787e-06, + "loss": 0.2808, + "step": 13911 + }, + { + "epoch": 0.6517074998828876, + "grad_norm": 0.596972626056457, + "learning_rate": 3.937507699425567e-06, + "loss": 0.2886, + "step": 13912 + }, + { + "epoch": 0.6517543448728158, + "grad_norm": 0.5504379838376392, + "learning_rate": 3.937352532783851e-06, + "loss": 0.2688, + "step": 13913 + }, + { + "epoch": 0.6518011898627442, + "grad_norm": 0.5606953552075613, + "learning_rate": 3.937197357870532e-06, + "loss": 0.2721, + "step": 13914 + }, + { + "epoch": 0.6518480348526725, + "grad_norm": 0.5727292876195028, + "learning_rate": 3.937042174686504e-06, + "loss": 0.2837, + "step": 13915 + }, + { + "epoch": 0.6518948798426009, + "grad_norm": 0.6123639740366673, + "learning_rate": 3.936886983232658e-06, + "loss": 0.2764, + "step": 13916 + }, + { + "epoch": 0.6519417248325292, + "grad_norm": 0.5890341098483695, + "learning_rate": 3.936731783509891e-06, + "loss": 0.2906, + "step": 13917 + }, + { + "epoch": 0.6519885698224575, + "grad_norm": 0.5744226080032089, + "learning_rate": 3.936576575519092e-06, + "loss": 0.2882, + "step": 13918 + }, + { + "epoch": 0.6520354148123858, + "grad_norm": 0.6475334524105384, + "learning_rate": 3.936421359261156e-06, + "loss": 0.2738, + "step": 13919 + }, + { + "epoch": 0.6520822598023142, + "grad_norm": 0.5343084590202888, + "learning_rate": 3.9362661347369755e-06, + "loss": 0.2639, + "step": 13920 + }, + { + "epoch": 0.6521291047922425, + "grad_norm": 0.6120581755243704, + "learning_rate": 3.936110901947446e-06, + "loss": 0.2924, + "step": 13921 + }, + { + "epoch": 0.6521759497821707, + "grad_norm": 0.5744263361257064, + "learning_rate": 3.935955660893459e-06, + "loss": 0.2931, + "step": 13922 + }, + { + "epoch": 0.6522227947720991, + "grad_norm": 0.6095770237412752, + "learning_rate": 3.9358004115759066e-06, + "loss": 0.2795, + "step": 13923 + }, + { + "epoch": 0.6522696397620275, + "grad_norm": 0.6184029799395301, + "learning_rate": 3.935645153995685e-06, + "loss": 0.2921, + "step": 13924 + }, + { + "epoch": 0.6523164847519558, + "grad_norm": 0.6648156967544391, + "learning_rate": 3.935489888153686e-06, + "loss": 0.3061, + "step": 13925 + }, + { + "epoch": 0.6523633297418842, + "grad_norm": 0.6471248882235473, + "learning_rate": 3.935334614050803e-06, + "loss": 0.282, + "step": 13926 + }, + { + "epoch": 0.6524101747318124, + "grad_norm": 0.6672394363623576, + "learning_rate": 3.935179331687931e-06, + "loss": 0.3175, + "step": 13927 + }, + { + "epoch": 0.6524570197217407, + "grad_norm": 0.5888479356618675, + "learning_rate": 3.9350240410659626e-06, + "loss": 0.2831, + "step": 13928 + }, + { + "epoch": 0.6525038647116691, + "grad_norm": 0.6285088244834919, + "learning_rate": 3.9348687421857904e-06, + "loss": 0.2895, + "step": 13929 + }, + { + "epoch": 0.6525507097015975, + "grad_norm": 0.6124408025137853, + "learning_rate": 3.934713435048311e-06, + "loss": 0.2925, + "step": 13930 + }, + { + "epoch": 0.6525975546915257, + "grad_norm": 0.5880138993280806, + "learning_rate": 3.934558119654415e-06, + "loss": 0.3084, + "step": 13931 + }, + { + "epoch": 0.652644399681454, + "grad_norm": 0.5926896624502003, + "learning_rate": 3.934402796004997e-06, + "loss": 0.2729, + "step": 13932 + }, + { + "epoch": 0.6526912446713824, + "grad_norm": 0.6218634390943515, + "learning_rate": 3.934247464100953e-06, + "loss": 0.2824, + "step": 13933 + }, + { + "epoch": 0.6527380896613107, + "grad_norm": 0.6268849308750235, + "learning_rate": 3.934092123943174e-06, + "loss": 0.2907, + "step": 13934 + }, + { + "epoch": 0.6527849346512391, + "grad_norm": 0.5600898206915437, + "learning_rate": 3.9339367755325565e-06, + "loss": 0.2554, + "step": 13935 + }, + { + "epoch": 0.6528317796411673, + "grad_norm": 0.6382837158359481, + "learning_rate": 3.933781418869993e-06, + "loss": 0.2993, + "step": 13936 + }, + { + "epoch": 0.6528786246310957, + "grad_norm": 0.6198402513939675, + "learning_rate": 3.9336260539563776e-06, + "loss": 0.2847, + "step": 13937 + }, + { + "epoch": 0.652925469621024, + "grad_norm": 0.5871851008210367, + "learning_rate": 3.9334706807926035e-06, + "loss": 0.2767, + "step": 13938 + }, + { + "epoch": 0.6529723146109524, + "grad_norm": 0.5500522044451678, + "learning_rate": 3.933315299379568e-06, + "loss": 0.27, + "step": 13939 + }, + { + "epoch": 0.6530191596008806, + "grad_norm": 0.5550480045554457, + "learning_rate": 3.933159909718162e-06, + "loss": 0.263, + "step": 13940 + }, + { + "epoch": 0.653066004590809, + "grad_norm": 0.6129968243899626, + "learning_rate": 3.933004511809281e-06, + "loss": 0.2851, + "step": 13941 + }, + { + "epoch": 0.6531128495807373, + "grad_norm": 0.6143491334141584, + "learning_rate": 3.932849105653818e-06, + "loss": 0.2849, + "step": 13942 + }, + { + "epoch": 0.6531596945706657, + "grad_norm": 0.6029324654982322, + "learning_rate": 3.9326936912526705e-06, + "loss": 0.2818, + "step": 13943 + }, + { + "epoch": 0.653206539560594, + "grad_norm": 0.552450676826493, + "learning_rate": 3.93253826860673e-06, + "loss": 0.2635, + "step": 13944 + }, + { + "epoch": 0.6532533845505223, + "grad_norm": 0.5611549840507781, + "learning_rate": 3.932382837716892e-06, + "loss": 0.2682, + "step": 13945 + }, + { + "epoch": 0.6533002295404506, + "grad_norm": 0.6371102724932354, + "learning_rate": 3.932227398584051e-06, + "loss": 0.3067, + "step": 13946 + }, + { + "epoch": 0.653347074530379, + "grad_norm": 0.5924663854851897, + "learning_rate": 3.9320719512091015e-06, + "loss": 0.2934, + "step": 13947 + }, + { + "epoch": 0.6533939195203073, + "grad_norm": 0.5843942477582144, + "learning_rate": 3.931916495592937e-06, + "loss": 0.2909, + "step": 13948 + }, + { + "epoch": 0.6534407645102356, + "grad_norm": 0.5368003089669977, + "learning_rate": 3.9317610317364545e-06, + "loss": 0.2645, + "step": 13949 + }, + { + "epoch": 0.6534876095001639, + "grad_norm": 0.6130702130156913, + "learning_rate": 3.931605559640546e-06, + "loss": 0.2939, + "step": 13950 + }, + { + "epoch": 0.6535344544900923, + "grad_norm": 0.566019059773071, + "learning_rate": 3.931450079306107e-06, + "loss": 0.2787, + "step": 13951 + }, + { + "epoch": 0.6535812994800206, + "grad_norm": 0.6518541002515991, + "learning_rate": 3.931294590734033e-06, + "loss": 0.2961, + "step": 13952 + }, + { + "epoch": 0.653628144469949, + "grad_norm": 0.5887766197202356, + "learning_rate": 3.931139093925218e-06, + "loss": 0.2814, + "step": 13953 + }, + { + "epoch": 0.6536749894598772, + "grad_norm": 0.5560104480148776, + "learning_rate": 3.930983588880557e-06, + "loss": 0.2633, + "step": 13954 + }, + { + "epoch": 0.6537218344498056, + "grad_norm": 0.5792092763457788, + "learning_rate": 3.930828075600946e-06, + "loss": 0.2575, + "step": 13955 + }, + { + "epoch": 0.6537686794397339, + "grad_norm": 0.5321376112818479, + "learning_rate": 3.93067255408728e-06, + "loss": 0.2608, + "step": 13956 + }, + { + "epoch": 0.6538155244296623, + "grad_norm": 0.6722242247699078, + "learning_rate": 3.93051702434045e-06, + "loss": 0.3011, + "step": 13957 + }, + { + "epoch": 0.6538623694195905, + "grad_norm": 0.571137811112114, + "learning_rate": 3.930361486361357e-06, + "loss": 0.2861, + "step": 13958 + }, + { + "epoch": 0.6539092144095189, + "grad_norm": 0.5583755819796345, + "learning_rate": 3.930205940150892e-06, + "loss": 0.2791, + "step": 13959 + }, + { + "epoch": 0.6539560593994472, + "grad_norm": 0.5780639767494009, + "learning_rate": 3.930050385709951e-06, + "loss": 0.2919, + "step": 13960 + }, + { + "epoch": 0.6540029043893756, + "grad_norm": 0.6113807083325472, + "learning_rate": 3.929894823039429e-06, + "loss": 0.2814, + "step": 13961 + }, + { + "epoch": 0.6540497493793039, + "grad_norm": 0.6090573489271162, + "learning_rate": 3.929739252140222e-06, + "loss": 0.2973, + "step": 13962 + }, + { + "epoch": 0.6540965943692322, + "grad_norm": 0.589276303042313, + "learning_rate": 3.9295836730132256e-06, + "loss": 0.2869, + "step": 13963 + }, + { + "epoch": 0.6541434393591605, + "grad_norm": 0.5677390301850728, + "learning_rate": 3.929428085659335e-06, + "loss": 0.2573, + "step": 13964 + }, + { + "epoch": 0.6541902843490889, + "grad_norm": 0.6635886714472546, + "learning_rate": 3.929272490079445e-06, + "loss": 0.2846, + "step": 13965 + }, + { + "epoch": 0.6542371293390172, + "grad_norm": 0.5998249182473453, + "learning_rate": 3.929116886274449e-06, + "loss": 0.2829, + "step": 13966 + }, + { + "epoch": 0.6542839743289455, + "grad_norm": 0.5416718767638348, + "learning_rate": 3.9289612742452464e-06, + "loss": 0.2804, + "step": 13967 + }, + { + "epoch": 0.6543308193188738, + "grad_norm": 0.6209598669331534, + "learning_rate": 3.92880565399273e-06, + "loss": 0.3003, + "step": 13968 + }, + { + "epoch": 0.6543776643088022, + "grad_norm": 0.6247094456875152, + "learning_rate": 3.928650025517796e-06, + "loss": 0.2957, + "step": 13969 + }, + { + "epoch": 0.6544245092987305, + "grad_norm": 0.5198925052380673, + "learning_rate": 3.928494388821341e-06, + "loss": 0.2615, + "step": 13970 + }, + { + "epoch": 0.6544713542886589, + "grad_norm": 0.5892686564718328, + "learning_rate": 3.92833874390426e-06, + "loss": 0.2825, + "step": 13971 + }, + { + "epoch": 0.6545181992785871, + "grad_norm": 0.6137904339679672, + "learning_rate": 3.928183090767448e-06, + "loss": 0.2818, + "step": 13972 + }, + { + "epoch": 0.6545650442685155, + "grad_norm": 0.6981163651871714, + "learning_rate": 3.9280274294118e-06, + "loss": 0.311, + "step": 13973 + }, + { + "epoch": 0.6546118892584438, + "grad_norm": 0.6106048832589119, + "learning_rate": 3.927871759838215e-06, + "loss": 0.2958, + "step": 13974 + }, + { + "epoch": 0.6546587342483722, + "grad_norm": 0.5806150946297866, + "learning_rate": 3.927716082047586e-06, + "loss": 0.295, + "step": 13975 + }, + { + "epoch": 0.6547055792383004, + "grad_norm": 0.619047401506748, + "learning_rate": 3.92756039604081e-06, + "loss": 0.3017, + "step": 13976 + }, + { + "epoch": 0.6547524242282288, + "grad_norm": 0.6112622830849266, + "learning_rate": 3.9274047018187834e-06, + "loss": 0.2802, + "step": 13977 + }, + { + "epoch": 0.6547992692181571, + "grad_norm": 0.5442830902542837, + "learning_rate": 3.9272489993824005e-06, + "loss": 0.2718, + "step": 13978 + }, + { + "epoch": 0.6548461142080855, + "grad_norm": 0.6594762749220444, + "learning_rate": 3.927093288732558e-06, + "loss": 0.2642, + "step": 13979 + }, + { + "epoch": 0.6548929591980138, + "grad_norm": 0.5695242513580763, + "learning_rate": 3.926937569870154e-06, + "loss": 0.2884, + "step": 13980 + }, + { + "epoch": 0.6549398041879421, + "grad_norm": 0.6064530246556012, + "learning_rate": 3.926781842796082e-06, + "loss": 0.303, + "step": 13981 + }, + { + "epoch": 0.6549866491778704, + "grad_norm": 0.6055919398575921, + "learning_rate": 3.926626107511239e-06, + "loss": 0.2772, + "step": 13982 + }, + { + "epoch": 0.6550334941677988, + "grad_norm": 0.537440715218361, + "learning_rate": 3.926470364016521e-06, + "loss": 0.2434, + "step": 13983 + }, + { + "epoch": 0.6550803391577271, + "grad_norm": 0.6117422599325979, + "learning_rate": 3.926314612312825e-06, + "loss": 0.2896, + "step": 13984 + }, + { + "epoch": 0.6551271841476554, + "grad_norm": 0.6142227397160092, + "learning_rate": 3.926158852401046e-06, + "loss": 0.2987, + "step": 13985 + }, + { + "epoch": 0.6551740291375837, + "grad_norm": 0.5619759678343161, + "learning_rate": 3.926003084282083e-06, + "loss": 0.284, + "step": 13986 + }, + { + "epoch": 0.6552208741275121, + "grad_norm": 0.6437705154430073, + "learning_rate": 3.925847307956829e-06, + "loss": 0.2749, + "step": 13987 + }, + { + "epoch": 0.6552677191174404, + "grad_norm": 0.5633079336509897, + "learning_rate": 3.925691523426183e-06, + "loss": 0.2818, + "step": 13988 + }, + { + "epoch": 0.6553145641073688, + "grad_norm": 0.5615742294473638, + "learning_rate": 3.9255357306910406e-06, + "loss": 0.2859, + "step": 13989 + }, + { + "epoch": 0.655361409097297, + "grad_norm": 0.5741163831654491, + "learning_rate": 3.9253799297522975e-06, + "loss": 0.2846, + "step": 13990 + }, + { + "epoch": 0.6554082540872254, + "grad_norm": 0.6125187858598028, + "learning_rate": 3.925224120610853e-06, + "loss": 0.313, + "step": 13991 + }, + { + "epoch": 0.6554550990771537, + "grad_norm": 0.5642792117680856, + "learning_rate": 3.9250683032676e-06, + "loss": 0.2873, + "step": 13992 + }, + { + "epoch": 0.6555019440670821, + "grad_norm": 0.5439960823826883, + "learning_rate": 3.924912477723438e-06, + "loss": 0.2663, + "step": 13993 + }, + { + "epoch": 0.6555487890570103, + "grad_norm": 0.6472762977958093, + "learning_rate": 3.924756643979263e-06, + "loss": 0.2813, + "step": 13994 + }, + { + "epoch": 0.6555956340469387, + "grad_norm": 0.5673381153170958, + "learning_rate": 3.924600802035972e-06, + "loss": 0.275, + "step": 13995 + }, + { + "epoch": 0.655642479036867, + "grad_norm": 0.5696560184650036, + "learning_rate": 3.92444495189446e-06, + "loss": 0.2621, + "step": 13996 + }, + { + "epoch": 0.6556893240267954, + "grad_norm": 0.6274737291442514, + "learning_rate": 3.9242890935556265e-06, + "loss": 0.2745, + "step": 13997 + }, + { + "epoch": 0.6557361690167237, + "grad_norm": 0.5985207944263096, + "learning_rate": 3.924133227020367e-06, + "loss": 0.2855, + "step": 13998 + }, + { + "epoch": 0.655783014006652, + "grad_norm": 0.5947213128778716, + "learning_rate": 3.923977352289578e-06, + "loss": 0.2955, + "step": 13999 + }, + { + "epoch": 0.6558298589965803, + "grad_norm": 0.5848490752711988, + "learning_rate": 3.923821469364158e-06, + "loss": 0.283, + "step": 14000 + }, + { + "epoch": 0.6558767039865087, + "grad_norm": 0.605055042553698, + "learning_rate": 3.923665578245003e-06, + "loss": 0.2928, + "step": 14001 + }, + { + "epoch": 0.655923548976437, + "grad_norm": 0.5899117537973563, + "learning_rate": 3.9235096789330106e-06, + "loss": 0.2843, + "step": 14002 + }, + { + "epoch": 0.6559703939663653, + "grad_norm": 0.6093247491435807, + "learning_rate": 3.923353771429078e-06, + "loss": 0.2899, + "step": 14003 + }, + { + "epoch": 0.6560172389562936, + "grad_norm": 0.6062750064999788, + "learning_rate": 3.923197855734102e-06, + "loss": 0.2844, + "step": 14004 + }, + { + "epoch": 0.656064083946222, + "grad_norm": 0.612309752360483, + "learning_rate": 3.9230419318489796e-06, + "loss": 0.2986, + "step": 14005 + }, + { + "epoch": 0.6561109289361503, + "grad_norm": 0.5611697041901319, + "learning_rate": 3.922885999774608e-06, + "loss": 0.2685, + "step": 14006 + }, + { + "epoch": 0.6561577739260787, + "grad_norm": 0.5567042530331947, + "learning_rate": 3.922730059511887e-06, + "loss": 0.2737, + "step": 14007 + }, + { + "epoch": 0.6562046189160069, + "grad_norm": 0.6264469166520117, + "learning_rate": 3.922574111061711e-06, + "loss": 0.2851, + "step": 14008 + }, + { + "epoch": 0.6562514639059353, + "grad_norm": 0.5580113395524092, + "learning_rate": 3.922418154424979e-06, + "loss": 0.2781, + "step": 14009 + }, + { + "epoch": 0.6562983088958636, + "grad_norm": 0.6356902473593184, + "learning_rate": 3.922262189602587e-06, + "loss": 0.3027, + "step": 14010 + }, + { + "epoch": 0.656345153885792, + "grad_norm": 0.610922048369862, + "learning_rate": 3.922106216595434e-06, + "loss": 0.2854, + "step": 14011 + }, + { + "epoch": 0.6563919988757202, + "grad_norm": 0.5970149054156749, + "learning_rate": 3.9219502354044175e-06, + "loss": 0.2681, + "step": 14012 + }, + { + "epoch": 0.6564388438656485, + "grad_norm": 0.5984585213164686, + "learning_rate": 3.921794246030435e-06, + "loss": 0.3015, + "step": 14013 + }, + { + "epoch": 0.6564856888555769, + "grad_norm": 0.5850452244968836, + "learning_rate": 3.921638248474384e-06, + "loss": 0.2824, + "step": 14014 + }, + { + "epoch": 0.6565325338455053, + "grad_norm": 0.5739447895907583, + "learning_rate": 3.921482242737161e-06, + "loss": 0.2814, + "step": 14015 + }, + { + "epoch": 0.6565793788354336, + "grad_norm": 0.5880605419543828, + "learning_rate": 3.921326228819666e-06, + "loss": 0.2788, + "step": 14016 + }, + { + "epoch": 0.6566262238253618, + "grad_norm": 0.5642470180913884, + "learning_rate": 3.9211702067227965e-06, + "loss": 0.2926, + "step": 14017 + }, + { + "epoch": 0.6566730688152902, + "grad_norm": 0.5682184782197293, + "learning_rate": 3.9210141764474485e-06, + "loss": 0.2688, + "step": 14018 + }, + { + "epoch": 0.6567199138052185, + "grad_norm": 0.566048666915977, + "learning_rate": 3.920858137994521e-06, + "loss": 0.2849, + "step": 14019 + }, + { + "epoch": 0.6567667587951469, + "grad_norm": 0.6729474582856297, + "learning_rate": 3.920702091364913e-06, + "loss": 0.2864, + "step": 14020 + }, + { + "epoch": 0.6568136037850751, + "grad_norm": 0.5756812200651987, + "learning_rate": 3.920546036559521e-06, + "loss": 0.2858, + "step": 14021 + }, + { + "epoch": 0.6568604487750035, + "grad_norm": 0.5803595396390439, + "learning_rate": 3.920389973579243e-06, + "loss": 0.272, + "step": 14022 + }, + { + "epoch": 0.6569072937649318, + "grad_norm": 0.5928606808503761, + "learning_rate": 3.920233902424978e-06, + "loss": 0.2801, + "step": 14023 + }, + { + "epoch": 0.6569541387548602, + "grad_norm": 0.6029239598683622, + "learning_rate": 3.9200778230976244e-06, + "loss": 0.2795, + "step": 14024 + }, + { + "epoch": 0.6570009837447885, + "grad_norm": 0.6186676192201166, + "learning_rate": 3.91992173559808e-06, + "loss": 0.2918, + "step": 14025 + }, + { + "epoch": 0.6570478287347168, + "grad_norm": 0.6262930549033001, + "learning_rate": 3.9197656399272424e-06, + "loss": 0.292, + "step": 14026 + }, + { + "epoch": 0.6570946737246451, + "grad_norm": 0.6140565361410617, + "learning_rate": 3.91960953608601e-06, + "loss": 0.3134, + "step": 14027 + }, + { + "epoch": 0.6571415187145735, + "grad_norm": 0.6232142680756925, + "learning_rate": 3.919453424075282e-06, + "loss": 0.2968, + "step": 14028 + }, + { + "epoch": 0.6571883637045018, + "grad_norm": 0.6016922405337564, + "learning_rate": 3.919297303895956e-06, + "loss": 0.2925, + "step": 14029 + }, + { + "epoch": 0.6572352086944301, + "grad_norm": 0.5794096940407449, + "learning_rate": 3.919141175548931e-06, + "loss": 0.2963, + "step": 14030 + }, + { + "epoch": 0.6572820536843584, + "grad_norm": 0.5786829265731723, + "learning_rate": 3.918985039035106e-06, + "loss": 0.2864, + "step": 14031 + }, + { + "epoch": 0.6573288986742868, + "grad_norm": 0.5883765851955337, + "learning_rate": 3.918828894355377e-06, + "loss": 0.2829, + "step": 14032 + }, + { + "epoch": 0.6573757436642151, + "grad_norm": 0.5868693305435437, + "learning_rate": 3.918672741510645e-06, + "loss": 0.3002, + "step": 14033 + }, + { + "epoch": 0.6574225886541435, + "grad_norm": 0.5289824853839172, + "learning_rate": 3.918516580501809e-06, + "loss": 0.2545, + "step": 14034 + }, + { + "epoch": 0.6574694336440717, + "grad_norm": 0.5941081070888575, + "learning_rate": 3.918360411329766e-06, + "loss": 0.2852, + "step": 14035 + }, + { + "epoch": 0.6575162786340001, + "grad_norm": 0.5951258649418519, + "learning_rate": 3.918204233995415e-06, + "loss": 0.2892, + "step": 14036 + }, + { + "epoch": 0.6575631236239284, + "grad_norm": 0.589027549777887, + "learning_rate": 3.918048048499656e-06, + "loss": 0.309, + "step": 14037 + }, + { + "epoch": 0.6576099686138568, + "grad_norm": 0.6548184579641746, + "learning_rate": 3.917891854843385e-06, + "loss": 0.3028, + "step": 14038 + }, + { + "epoch": 0.657656813603785, + "grad_norm": 0.588082590131201, + "learning_rate": 3.917735653027503e-06, + "loss": 0.2696, + "step": 14039 + }, + { + "epoch": 0.6577036585937134, + "grad_norm": 0.5923923090547313, + "learning_rate": 3.91757944305291e-06, + "loss": 0.2935, + "step": 14040 + }, + { + "epoch": 0.6577505035836417, + "grad_norm": 0.5590593609233473, + "learning_rate": 3.917423224920503e-06, + "loss": 0.2673, + "step": 14041 + }, + { + "epoch": 0.6577973485735701, + "grad_norm": 0.5661372401115805, + "learning_rate": 3.917266998631182e-06, + "loss": 0.2799, + "step": 14042 + }, + { + "epoch": 0.6578441935634984, + "grad_norm": 0.5739423543187341, + "learning_rate": 3.9171107641858465e-06, + "loss": 0.2772, + "step": 14043 + }, + { + "epoch": 0.6578910385534267, + "grad_norm": 0.5964922227765695, + "learning_rate": 3.916954521585393e-06, + "loss": 0.2865, + "step": 14044 + }, + { + "epoch": 0.657937883543355, + "grad_norm": 0.5320727481609755, + "learning_rate": 3.916798270830723e-06, + "loss": 0.2737, + "step": 14045 + }, + { + "epoch": 0.6579847285332834, + "grad_norm": 0.5983059840492903, + "learning_rate": 3.916642011922736e-06, + "loss": 0.28, + "step": 14046 + }, + { + "epoch": 0.6580315735232117, + "grad_norm": 0.5844144176753105, + "learning_rate": 3.916485744862328e-06, + "loss": 0.2971, + "step": 14047 + }, + { + "epoch": 0.65807841851314, + "grad_norm": 0.5849605374265348, + "learning_rate": 3.916329469650402e-06, + "loss": 0.2983, + "step": 14048 + }, + { + "epoch": 0.6581252635030683, + "grad_norm": 0.6192004616572052, + "learning_rate": 3.916173186287856e-06, + "loss": 0.2903, + "step": 14049 + }, + { + "epoch": 0.6581721084929967, + "grad_norm": 0.5843266190967396, + "learning_rate": 3.916016894775589e-06, + "loss": 0.2859, + "step": 14050 + }, + { + "epoch": 0.658218953482925, + "grad_norm": 0.5634344147919448, + "learning_rate": 3.9158605951145e-06, + "loss": 0.2655, + "step": 14051 + }, + { + "epoch": 0.6582657984728534, + "grad_norm": 0.6295585463586322, + "learning_rate": 3.9157042873054905e-06, + "loss": 0.2836, + "step": 14052 + }, + { + "epoch": 0.6583126434627816, + "grad_norm": 0.5938838061249135, + "learning_rate": 3.915547971349458e-06, + "loss": 0.2712, + "step": 14053 + }, + { + "epoch": 0.65835948845271, + "grad_norm": 0.5390892829169223, + "learning_rate": 3.915391647247303e-06, + "loss": 0.2667, + "step": 14054 + }, + { + "epoch": 0.6584063334426383, + "grad_norm": 0.5536726564557587, + "learning_rate": 3.915235314999924e-06, + "loss": 0.2788, + "step": 14055 + }, + { + "epoch": 0.6584531784325667, + "grad_norm": 0.5687766830445463, + "learning_rate": 3.9150789746082215e-06, + "loss": 0.2831, + "step": 14056 + }, + { + "epoch": 0.6585000234224949, + "grad_norm": 0.5663546226497004, + "learning_rate": 3.9149226260730965e-06, + "loss": 0.2742, + "step": 14057 + }, + { + "epoch": 0.6585468684124233, + "grad_norm": 0.6774376451078687, + "learning_rate": 3.914766269395446e-06, + "loss": 0.3033, + "step": 14058 + }, + { + "epoch": 0.6585937134023516, + "grad_norm": 0.6060699856888542, + "learning_rate": 3.914609904576172e-06, + "loss": 0.2742, + "step": 14059 + }, + { + "epoch": 0.65864055839228, + "grad_norm": 0.5685402432995371, + "learning_rate": 3.914453531616173e-06, + "loss": 0.2758, + "step": 14060 + }, + { + "epoch": 0.6586874033822083, + "grad_norm": 0.5542792088103725, + "learning_rate": 3.91429715051635e-06, + "loss": 0.2787, + "step": 14061 + }, + { + "epoch": 0.6587342483721366, + "grad_norm": 0.5701991839590503, + "learning_rate": 3.914140761277603e-06, + "loss": 0.2691, + "step": 14062 + }, + { + "epoch": 0.6587810933620649, + "grad_norm": 0.611598151996131, + "learning_rate": 3.91398436390083e-06, + "loss": 0.2924, + "step": 14063 + }, + { + "epoch": 0.6588279383519933, + "grad_norm": 0.642520094061918, + "learning_rate": 3.913827958386933e-06, + "loss": 0.2874, + "step": 14064 + }, + { + "epoch": 0.6588747833419216, + "grad_norm": 0.5915639394684162, + "learning_rate": 3.913671544736811e-06, + "loss": 0.276, + "step": 14065 + }, + { + "epoch": 0.6589216283318499, + "grad_norm": 0.5768552742954225, + "learning_rate": 3.913515122951365e-06, + "loss": 0.2705, + "step": 14066 + }, + { + "epoch": 0.6589684733217782, + "grad_norm": 0.6309660902465963, + "learning_rate": 3.913358693031494e-06, + "loss": 0.2948, + "step": 14067 + }, + { + "epoch": 0.6590153183117066, + "grad_norm": 0.6034534915042664, + "learning_rate": 3.9132022549781e-06, + "loss": 0.285, + "step": 14068 + }, + { + "epoch": 0.6590621633016349, + "grad_norm": 0.5702351219917215, + "learning_rate": 3.913045808792082e-06, + "loss": 0.2941, + "step": 14069 + }, + { + "epoch": 0.6591090082915633, + "grad_norm": 0.5583889237858464, + "learning_rate": 3.9128893544743405e-06, + "loss": 0.2517, + "step": 14070 + }, + { + "epoch": 0.6591558532814915, + "grad_norm": 0.5807640622517479, + "learning_rate": 3.912732892025775e-06, + "loss": 0.2708, + "step": 14071 + }, + { + "epoch": 0.6592026982714199, + "grad_norm": 0.6171648982002026, + "learning_rate": 3.912576421447287e-06, + "loss": 0.2802, + "step": 14072 + }, + { + "epoch": 0.6592495432613482, + "grad_norm": 0.6416756286493258, + "learning_rate": 3.912419942739778e-06, + "loss": 0.2694, + "step": 14073 + }, + { + "epoch": 0.6592963882512766, + "grad_norm": 0.6387518997001614, + "learning_rate": 3.912263455904146e-06, + "loss": 0.3026, + "step": 14074 + }, + { + "epoch": 0.6593432332412048, + "grad_norm": 0.6322096447889847, + "learning_rate": 3.912106960941293e-06, + "loss": 0.2703, + "step": 14075 + }, + { + "epoch": 0.6593900782311332, + "grad_norm": 0.644451827475426, + "learning_rate": 3.9119504578521195e-06, + "loss": 0.292, + "step": 14076 + }, + { + "epoch": 0.6594369232210615, + "grad_norm": 0.6575966253833213, + "learning_rate": 3.911793946637526e-06, + "loss": 0.3007, + "step": 14077 + }, + { + "epoch": 0.6594837682109899, + "grad_norm": 0.5994360615059143, + "learning_rate": 3.911637427298413e-06, + "loss": 0.2772, + "step": 14078 + }, + { + "epoch": 0.6595306132009182, + "grad_norm": 0.5571694237344315, + "learning_rate": 3.911480899835683e-06, + "loss": 0.2716, + "step": 14079 + }, + { + "epoch": 0.6595774581908465, + "grad_norm": 0.6130390168382159, + "learning_rate": 3.911324364250233e-06, + "loss": 0.2795, + "step": 14080 + }, + { + "epoch": 0.6596243031807748, + "grad_norm": 0.576954215851852, + "learning_rate": 3.911167820542967e-06, + "loss": 0.2854, + "step": 14081 + }, + { + "epoch": 0.6596711481707032, + "grad_norm": 0.5586698918869536, + "learning_rate": 3.911011268714784e-06, + "loss": 0.2794, + "step": 14082 + }, + { + "epoch": 0.6597179931606315, + "grad_norm": 0.5745114656340491, + "learning_rate": 3.910854708766587e-06, + "loss": 0.273, + "step": 14083 + }, + { + "epoch": 0.6597648381505598, + "grad_norm": 0.5386573885977446, + "learning_rate": 3.910698140699275e-06, + "loss": 0.2712, + "step": 14084 + }, + { + "epoch": 0.6598116831404881, + "grad_norm": 0.6118185637036756, + "learning_rate": 3.91054156451375e-06, + "loss": 0.2942, + "step": 14085 + }, + { + "epoch": 0.6598585281304165, + "grad_norm": 0.5900382436437674, + "learning_rate": 3.910384980210913e-06, + "loss": 0.2771, + "step": 14086 + }, + { + "epoch": 0.6599053731203448, + "grad_norm": 0.5700740154128305, + "learning_rate": 3.910228387791665e-06, + "loss": 0.2805, + "step": 14087 + }, + { + "epoch": 0.6599522181102732, + "grad_norm": 0.5681472936761012, + "learning_rate": 3.910071787256906e-06, + "loss": 0.2914, + "step": 14088 + }, + { + "epoch": 0.6599990631002014, + "grad_norm": 0.5984774256076721, + "learning_rate": 3.90991517860754e-06, + "loss": 0.2767, + "step": 14089 + }, + { + "epoch": 0.6600459080901298, + "grad_norm": 0.5963018630716577, + "learning_rate": 3.909758561844466e-06, + "loss": 0.272, + "step": 14090 + }, + { + "epoch": 0.6600927530800581, + "grad_norm": 0.5872222712518347, + "learning_rate": 3.909601936968585e-06, + "loss": 0.3023, + "step": 14091 + }, + { + "epoch": 0.6601395980699865, + "grad_norm": 0.6795404764776775, + "learning_rate": 3.9094453039808e-06, + "loss": 0.3025, + "step": 14092 + }, + { + "epoch": 0.6601864430599147, + "grad_norm": 0.5882416742992336, + "learning_rate": 3.90928866288201e-06, + "loss": 0.2916, + "step": 14093 + }, + { + "epoch": 0.660233288049843, + "grad_norm": 0.628084006496596, + "learning_rate": 3.909132013673119e-06, + "loss": 0.3064, + "step": 14094 + }, + { + "epoch": 0.6602801330397714, + "grad_norm": 0.6310838634319286, + "learning_rate": 3.908975356355028e-06, + "loss": 0.2835, + "step": 14095 + }, + { + "epoch": 0.6603269780296998, + "grad_norm": 0.6582779637868054, + "learning_rate": 3.908818690928637e-06, + "loss": 0.2863, + "step": 14096 + }, + { + "epoch": 0.6603738230196281, + "grad_norm": 0.6334022946249758, + "learning_rate": 3.90866201739485e-06, + "loss": 0.3019, + "step": 14097 + }, + { + "epoch": 0.6604206680095563, + "grad_norm": 0.5887483938871122, + "learning_rate": 3.908505335754565e-06, + "loss": 0.284, + "step": 14098 + }, + { + "epoch": 0.6604675129994847, + "grad_norm": 0.5810135817843369, + "learning_rate": 3.908348646008688e-06, + "loss": 0.2955, + "step": 14099 + }, + { + "epoch": 0.660514357989413, + "grad_norm": 0.5769394878752389, + "learning_rate": 3.908191948158117e-06, + "loss": 0.2743, + "step": 14100 + }, + { + "epoch": 0.6605612029793414, + "grad_norm": 0.6391627356924849, + "learning_rate": 3.908035242203756e-06, + "loss": 0.3102, + "step": 14101 + }, + { + "epoch": 0.6606080479692696, + "grad_norm": 0.5760471318328745, + "learning_rate": 3.907878528146506e-06, + "loss": 0.2784, + "step": 14102 + }, + { + "epoch": 0.660654892959198, + "grad_norm": 0.5810203789687907, + "learning_rate": 3.907721805987268e-06, + "loss": 0.2766, + "step": 14103 + }, + { + "epoch": 0.6607017379491263, + "grad_norm": 0.653252099206587, + "learning_rate": 3.907565075726946e-06, + "loss": 0.2836, + "step": 14104 + }, + { + "epoch": 0.6607485829390547, + "grad_norm": 0.5996134264826878, + "learning_rate": 3.90740833736644e-06, + "loss": 0.282, + "step": 14105 + }, + { + "epoch": 0.660795427928983, + "grad_norm": 0.5816611208777801, + "learning_rate": 3.907251590906654e-06, + "loss": 0.2852, + "step": 14106 + }, + { + "epoch": 0.6608422729189113, + "grad_norm": 0.6163131147541024, + "learning_rate": 3.907094836348488e-06, + "loss": 0.2746, + "step": 14107 + }, + { + "epoch": 0.6608891179088396, + "grad_norm": 0.5732860004082639, + "learning_rate": 3.906938073692844e-06, + "loss": 0.2656, + "step": 14108 + }, + { + "epoch": 0.660935962898768, + "grad_norm": 0.5891948060312577, + "learning_rate": 3.906781302940626e-06, + "loss": 0.2784, + "step": 14109 + }, + { + "epoch": 0.6609828078886963, + "grad_norm": 0.6001046889576509, + "learning_rate": 3.9066245240927345e-06, + "loss": 0.2801, + "step": 14110 + }, + { + "epoch": 0.6610296528786246, + "grad_norm": 0.6544887900290527, + "learning_rate": 3.906467737150072e-06, + "loss": 0.3025, + "step": 14111 + }, + { + "epoch": 0.6610764978685529, + "grad_norm": 0.5775461649995681, + "learning_rate": 3.9063109421135425e-06, + "loss": 0.2433, + "step": 14112 + }, + { + "epoch": 0.6611233428584813, + "grad_norm": 0.5660675416179616, + "learning_rate": 3.906154138984047e-06, + "loss": 0.2764, + "step": 14113 + }, + { + "epoch": 0.6611701878484096, + "grad_norm": 0.5962718748553801, + "learning_rate": 3.905997327762488e-06, + "loss": 0.279, + "step": 14114 + }, + { + "epoch": 0.661217032838338, + "grad_norm": 0.5800475649452856, + "learning_rate": 3.905840508449766e-06, + "loss": 0.2689, + "step": 14115 + }, + { + "epoch": 0.6612638778282662, + "grad_norm": 0.6068712052765529, + "learning_rate": 3.905683681046787e-06, + "loss": 0.2879, + "step": 14116 + }, + { + "epoch": 0.6613107228181946, + "grad_norm": 0.5940793772308243, + "learning_rate": 3.905526845554451e-06, + "loss": 0.2874, + "step": 14117 + }, + { + "epoch": 0.6613575678081229, + "grad_norm": 0.5691365546789333, + "learning_rate": 3.905370001973661e-06, + "loss": 0.2826, + "step": 14118 + }, + { + "epoch": 0.6614044127980513, + "grad_norm": 0.6106047287027948, + "learning_rate": 3.905213150305321e-06, + "loss": 0.3084, + "step": 14119 + }, + { + "epoch": 0.6614512577879795, + "grad_norm": 0.5480992959234281, + "learning_rate": 3.905056290550331e-06, + "loss": 0.2734, + "step": 14120 + }, + { + "epoch": 0.6614981027779079, + "grad_norm": 0.6387588384845988, + "learning_rate": 3.904899422709596e-06, + "loss": 0.2802, + "step": 14121 + }, + { + "epoch": 0.6615449477678362, + "grad_norm": 0.6111713984308286, + "learning_rate": 3.904742546784018e-06, + "loss": 0.2744, + "step": 14122 + }, + { + "epoch": 0.6615917927577646, + "grad_norm": 0.5458081949739988, + "learning_rate": 3.9045856627744995e-06, + "loss": 0.2609, + "step": 14123 + }, + { + "epoch": 0.6616386377476929, + "grad_norm": 0.6355732457757863, + "learning_rate": 3.904428770681943e-06, + "loss": 0.3004, + "step": 14124 + }, + { + "epoch": 0.6616854827376212, + "grad_norm": 0.5644327491140788, + "learning_rate": 3.904271870507253e-06, + "loss": 0.2804, + "step": 14125 + }, + { + "epoch": 0.6617323277275495, + "grad_norm": 0.5826513754918556, + "learning_rate": 3.904114962251331e-06, + "loss": 0.2693, + "step": 14126 + }, + { + "epoch": 0.6617791727174779, + "grad_norm": 0.5969689027018293, + "learning_rate": 3.90395804591508e-06, + "loss": 0.2728, + "step": 14127 + }, + { + "epoch": 0.6618260177074062, + "grad_norm": 0.6531592880285442, + "learning_rate": 3.903801121499403e-06, + "loss": 0.2947, + "step": 14128 + }, + { + "epoch": 0.6618728626973345, + "grad_norm": 0.6559966125530919, + "learning_rate": 3.9036441890052045e-06, + "loss": 0.3006, + "step": 14129 + }, + { + "epoch": 0.6619197076872628, + "grad_norm": 0.6029242093235346, + "learning_rate": 3.9034872484333856e-06, + "loss": 0.2785, + "step": 14130 + }, + { + "epoch": 0.6619665526771912, + "grad_norm": 0.5688122674577583, + "learning_rate": 3.903330299784851e-06, + "loss": 0.2525, + "step": 14131 + }, + { + "epoch": 0.6620133976671195, + "grad_norm": 0.6349932998476678, + "learning_rate": 3.9031733430605024e-06, + "loss": 0.2878, + "step": 14132 + }, + { + "epoch": 0.6620602426570479, + "grad_norm": 0.6068326412859278, + "learning_rate": 3.903016378261244e-06, + "loss": 0.2613, + "step": 14133 + }, + { + "epoch": 0.6621070876469761, + "grad_norm": 0.568467520566826, + "learning_rate": 3.90285940538798e-06, + "loss": 0.2859, + "step": 14134 + }, + { + "epoch": 0.6621539326369045, + "grad_norm": 0.5804384614130841, + "learning_rate": 3.902702424441611e-06, + "loss": 0.2761, + "step": 14135 + }, + { + "epoch": 0.6622007776268328, + "grad_norm": 0.6061214320327103, + "learning_rate": 3.902545435423043e-06, + "loss": 0.2836, + "step": 14136 + }, + { + "epoch": 0.6622476226167612, + "grad_norm": 0.5969807663836106, + "learning_rate": 3.902388438333179e-06, + "loss": 0.2888, + "step": 14137 + }, + { + "epoch": 0.6622944676066894, + "grad_norm": 0.5364942357228659, + "learning_rate": 3.9022314331729214e-06, + "loss": 0.2549, + "step": 14138 + }, + { + "epoch": 0.6623413125966178, + "grad_norm": 0.5924946527573571, + "learning_rate": 3.9020744199431745e-06, + "loss": 0.279, + "step": 14139 + }, + { + "epoch": 0.6623881575865461, + "grad_norm": 0.5561099424168445, + "learning_rate": 3.9019173986448425e-06, + "loss": 0.268, + "step": 14140 + }, + { + "epoch": 0.6624350025764745, + "grad_norm": 0.582995530605209, + "learning_rate": 3.901760369278827e-06, + "loss": 0.2747, + "step": 14141 + }, + { + "epoch": 0.6624818475664028, + "grad_norm": 0.5922886165420888, + "learning_rate": 3.901603331846033e-06, + "loss": 0.2857, + "step": 14142 + }, + { + "epoch": 0.6625286925563311, + "grad_norm": 0.6060051801711552, + "learning_rate": 3.901446286347365e-06, + "loss": 0.2897, + "step": 14143 + }, + { + "epoch": 0.6625755375462594, + "grad_norm": 0.5441481212706017, + "learning_rate": 3.901289232783725e-06, + "loss": 0.2686, + "step": 14144 + }, + { + "epoch": 0.6626223825361878, + "grad_norm": 0.6140560346266752, + "learning_rate": 3.901132171156018e-06, + "loss": 0.289, + "step": 14145 + }, + { + "epoch": 0.6626692275261161, + "grad_norm": 0.6294684327293256, + "learning_rate": 3.900975101465148e-06, + "loss": 0.2914, + "step": 14146 + }, + { + "epoch": 0.6627160725160444, + "grad_norm": 0.5909182244865427, + "learning_rate": 3.900818023712018e-06, + "loss": 0.3, + "step": 14147 + }, + { + "epoch": 0.6627629175059727, + "grad_norm": 0.5407344974792009, + "learning_rate": 3.900660937897532e-06, + "loss": 0.2793, + "step": 14148 + }, + { + "epoch": 0.6628097624959011, + "grad_norm": 0.5762861525032739, + "learning_rate": 3.900503844022595e-06, + "loss": 0.2898, + "step": 14149 + }, + { + "epoch": 0.6628566074858294, + "grad_norm": 0.5804110102511786, + "learning_rate": 3.90034674208811e-06, + "loss": 0.2793, + "step": 14150 + }, + { + "epoch": 0.6629034524757578, + "grad_norm": 0.593856960813908, + "learning_rate": 3.900189632094982e-06, + "loss": 0.291, + "step": 14151 + }, + { + "epoch": 0.662950297465686, + "grad_norm": 0.5749498620198145, + "learning_rate": 3.900032514044113e-06, + "loss": 0.2823, + "step": 14152 + }, + { + "epoch": 0.6629971424556144, + "grad_norm": 0.6406255292113933, + "learning_rate": 3.8998753879364105e-06, + "loss": 0.2839, + "step": 14153 + }, + { + "epoch": 0.6630439874455427, + "grad_norm": 0.6177571742034292, + "learning_rate": 3.899718253772776e-06, + "loss": 0.2921, + "step": 14154 + }, + { + "epoch": 0.6630908324354711, + "grad_norm": 0.6222462302247224, + "learning_rate": 3.899561111554115e-06, + "loss": 0.3086, + "step": 14155 + }, + { + "epoch": 0.6631376774253993, + "grad_norm": 0.5947045868819015, + "learning_rate": 3.899403961281332e-06, + "loss": 0.2768, + "step": 14156 + }, + { + "epoch": 0.6631845224153277, + "grad_norm": 0.6177804656146599, + "learning_rate": 3.899246802955331e-06, + "loss": 0.281, + "step": 14157 + }, + { + "epoch": 0.663231367405256, + "grad_norm": 0.5888530230810475, + "learning_rate": 3.8990896365770155e-06, + "loss": 0.2808, + "step": 14158 + }, + { + "epoch": 0.6632782123951844, + "grad_norm": 0.561659018326354, + "learning_rate": 3.898932462147291e-06, + "loss": 0.2649, + "step": 14159 + }, + { + "epoch": 0.6633250573851127, + "grad_norm": 0.5759083817957871, + "learning_rate": 3.898775279667063e-06, + "loss": 0.2917, + "step": 14160 + }, + { + "epoch": 0.663371902375041, + "grad_norm": 0.624418661540489, + "learning_rate": 3.898618089137233e-06, + "loss": 0.2876, + "step": 14161 + }, + { + "epoch": 0.6634187473649693, + "grad_norm": 0.6351455595768837, + "learning_rate": 3.898460890558708e-06, + "loss": 0.3169, + "step": 14162 + }, + { + "epoch": 0.6634655923548977, + "grad_norm": 0.563481708170388, + "learning_rate": 3.898303683932393e-06, + "loss": 0.2876, + "step": 14163 + }, + { + "epoch": 0.663512437344826, + "grad_norm": 0.6208821707069666, + "learning_rate": 3.898146469259191e-06, + "loss": 0.2996, + "step": 14164 + }, + { + "epoch": 0.6635592823347543, + "grad_norm": 0.5688794844959837, + "learning_rate": 3.897989246540008e-06, + "loss": 0.2691, + "step": 14165 + }, + { + "epoch": 0.6636061273246826, + "grad_norm": 0.6554533750897794, + "learning_rate": 3.8978320157757475e-06, + "loss": 0.2771, + "step": 14166 + }, + { + "epoch": 0.663652972314611, + "grad_norm": 0.5365040993991299, + "learning_rate": 3.897674776967315e-06, + "loss": 0.2477, + "step": 14167 + }, + { + "epoch": 0.6636998173045393, + "grad_norm": 0.5774058864886685, + "learning_rate": 3.897517530115617e-06, + "loss": 0.2647, + "step": 14168 + }, + { + "epoch": 0.6637466622944677, + "grad_norm": 0.6020723681461948, + "learning_rate": 3.897360275221555e-06, + "loss": 0.2889, + "step": 14169 + }, + { + "epoch": 0.6637935072843959, + "grad_norm": 0.5672170885663885, + "learning_rate": 3.897203012286036e-06, + "loss": 0.2762, + "step": 14170 + }, + { + "epoch": 0.6638403522743243, + "grad_norm": 0.5504213489445472, + "learning_rate": 3.897045741309966e-06, + "loss": 0.2801, + "step": 14171 + }, + { + "epoch": 0.6638871972642526, + "grad_norm": 0.6304584420145325, + "learning_rate": 3.896888462294248e-06, + "loss": 0.2838, + "step": 14172 + }, + { + "epoch": 0.663934042254181, + "grad_norm": 0.550380528137928, + "learning_rate": 3.896731175239789e-06, + "loss": 0.2639, + "step": 14173 + }, + { + "epoch": 0.6639808872441092, + "grad_norm": 0.597470247284432, + "learning_rate": 3.8965738801474915e-06, + "loss": 0.2817, + "step": 14174 + }, + { + "epoch": 0.6640277322340375, + "grad_norm": 0.6296940693339473, + "learning_rate": 3.896416577018264e-06, + "loss": 0.2896, + "step": 14175 + }, + { + "epoch": 0.6640745772239659, + "grad_norm": 0.6107398992370384, + "learning_rate": 3.896259265853009e-06, + "loss": 0.2865, + "step": 14176 + }, + { + "epoch": 0.6641214222138943, + "grad_norm": 0.5835113241934919, + "learning_rate": 3.896101946652634e-06, + "loss": 0.2554, + "step": 14177 + }, + { + "epoch": 0.6641682672038226, + "grad_norm": 0.5683854596981323, + "learning_rate": 3.895944619418042e-06, + "loss": 0.2651, + "step": 14178 + }, + { + "epoch": 0.6642151121937508, + "grad_norm": 0.5876590022371984, + "learning_rate": 3.895787284150139e-06, + "loss": 0.2905, + "step": 14179 + }, + { + "epoch": 0.6642619571836792, + "grad_norm": 0.5780573755500663, + "learning_rate": 3.895629940849833e-06, + "loss": 0.2745, + "step": 14180 + }, + { + "epoch": 0.6643088021736075, + "grad_norm": 0.6263602131549398, + "learning_rate": 3.895472589518026e-06, + "loss": 0.2921, + "step": 14181 + }, + { + "epoch": 0.6643556471635359, + "grad_norm": 0.5923919461303689, + "learning_rate": 3.8953152301556256e-06, + "loss": 0.2728, + "step": 14182 + }, + { + "epoch": 0.6644024921534641, + "grad_norm": 0.6320244218682313, + "learning_rate": 3.895157862763537e-06, + "loss": 0.2916, + "step": 14183 + }, + { + "epoch": 0.6644493371433925, + "grad_norm": 0.5885339690340821, + "learning_rate": 3.895000487342664e-06, + "loss": 0.2937, + "step": 14184 + }, + { + "epoch": 0.6644961821333208, + "grad_norm": 0.59981102660286, + "learning_rate": 3.894843103893915e-06, + "loss": 0.2948, + "step": 14185 + }, + { + "epoch": 0.6645430271232492, + "grad_norm": 0.5820993404186098, + "learning_rate": 3.8946857124181946e-06, + "loss": 0.2743, + "step": 14186 + }, + { + "epoch": 0.6645898721131775, + "grad_norm": 0.5977011196283613, + "learning_rate": 3.894528312916409e-06, + "loss": 0.2877, + "step": 14187 + }, + { + "epoch": 0.6646367171031058, + "grad_norm": 0.6331931489055743, + "learning_rate": 3.8943709053894625e-06, + "loss": 0.2842, + "step": 14188 + }, + { + "epoch": 0.6646835620930341, + "grad_norm": 0.5425915325917569, + "learning_rate": 3.894213489838262e-06, + "loss": 0.2667, + "step": 14189 + }, + { + "epoch": 0.6647304070829625, + "grad_norm": 0.6087718496388875, + "learning_rate": 3.894056066263714e-06, + "loss": 0.2659, + "step": 14190 + }, + { + "epoch": 0.6647772520728908, + "grad_norm": 0.5922542115728782, + "learning_rate": 3.8938986346667225e-06, + "loss": 0.2658, + "step": 14191 + }, + { + "epoch": 0.6648240970628191, + "grad_norm": 0.6473361322678931, + "learning_rate": 3.893741195048196e-06, + "loss": 0.2958, + "step": 14192 + }, + { + "epoch": 0.6648709420527474, + "grad_norm": 0.5825591868593831, + "learning_rate": 3.893583747409039e-06, + "loss": 0.2843, + "step": 14193 + }, + { + "epoch": 0.6649177870426758, + "grad_norm": 0.615908517100428, + "learning_rate": 3.8934262917501566e-06, + "loss": 0.302, + "step": 14194 + }, + { + "epoch": 0.6649646320326041, + "grad_norm": 0.6117882467654446, + "learning_rate": 3.893268828072457e-06, + "loss": 0.2838, + "step": 14195 + }, + { + "epoch": 0.6650114770225325, + "grad_norm": 0.6016760036913346, + "learning_rate": 3.893111356376845e-06, + "loss": 0.2813, + "step": 14196 + }, + { + "epoch": 0.6650583220124607, + "grad_norm": 0.5881332289955896, + "learning_rate": 3.892953876664229e-06, + "loss": 0.3081, + "step": 14197 + }, + { + "epoch": 0.6651051670023891, + "grad_norm": 0.585685662284055, + "learning_rate": 3.892796388935512e-06, + "loss": 0.2874, + "step": 14198 + }, + { + "epoch": 0.6651520119923174, + "grad_norm": 0.587435289042469, + "learning_rate": 3.8926388931916016e-06, + "loss": 0.2768, + "step": 14199 + }, + { + "epoch": 0.6651988569822458, + "grad_norm": 0.5858032437817062, + "learning_rate": 3.8924813894334045e-06, + "loss": 0.2829, + "step": 14200 + }, + { + "epoch": 0.665245701972174, + "grad_norm": 0.5899676129242584, + "learning_rate": 3.892323877661828e-06, + "loss": 0.2954, + "step": 14201 + }, + { + "epoch": 0.6652925469621024, + "grad_norm": 0.6271963447159812, + "learning_rate": 3.8921663578777764e-06, + "loss": 0.2981, + "step": 14202 + }, + { + "epoch": 0.6653393919520307, + "grad_norm": 0.580683704476539, + "learning_rate": 3.892008830082158e-06, + "loss": 0.2792, + "step": 14203 + }, + { + "epoch": 0.6653862369419591, + "grad_norm": 0.6078029112958337, + "learning_rate": 3.891851294275879e-06, + "loss": 0.2802, + "step": 14204 + }, + { + "epoch": 0.6654330819318874, + "grad_norm": 0.6220042335006922, + "learning_rate": 3.891693750459845e-06, + "loss": 0.3076, + "step": 14205 + }, + { + "epoch": 0.6654799269218157, + "grad_norm": 0.5949692788573926, + "learning_rate": 3.891536198634963e-06, + "loss": 0.2844, + "step": 14206 + }, + { + "epoch": 0.665526771911744, + "grad_norm": 0.6045523707151099, + "learning_rate": 3.8913786388021406e-06, + "loss": 0.303, + "step": 14207 + }, + { + "epoch": 0.6655736169016724, + "grad_norm": 0.5753770390850098, + "learning_rate": 3.891221070962283e-06, + "loss": 0.2757, + "step": 14208 + }, + { + "epoch": 0.6656204618916007, + "grad_norm": 0.5380035244233045, + "learning_rate": 3.891063495116299e-06, + "loss": 0.2728, + "step": 14209 + }, + { + "epoch": 0.665667306881529, + "grad_norm": 0.6077607610084063, + "learning_rate": 3.890905911265094e-06, + "loss": 0.2778, + "step": 14210 + }, + { + "epoch": 0.6657141518714573, + "grad_norm": 0.5859866080986361, + "learning_rate": 3.890748319409574e-06, + "loss": 0.2635, + "step": 14211 + }, + { + "epoch": 0.6657609968613857, + "grad_norm": 0.5741615853253712, + "learning_rate": 3.890590719550647e-06, + "loss": 0.2791, + "step": 14212 + }, + { + "epoch": 0.665807841851314, + "grad_norm": 0.6601582047338166, + "learning_rate": 3.890433111689221e-06, + "loss": 0.3108, + "step": 14213 + }, + { + "epoch": 0.6658546868412424, + "grad_norm": 0.6092837155195104, + "learning_rate": 3.8902754958262014e-06, + "loss": 0.2974, + "step": 14214 + }, + { + "epoch": 0.6659015318311706, + "grad_norm": 0.5950995180925538, + "learning_rate": 3.890117871962496e-06, + "loss": 0.2729, + "step": 14215 + }, + { + "epoch": 0.665948376821099, + "grad_norm": 0.6053839801567807, + "learning_rate": 3.889960240099011e-06, + "loss": 0.3094, + "step": 14216 + }, + { + "epoch": 0.6659952218110273, + "grad_norm": 0.5684053371267318, + "learning_rate": 3.889802600236655e-06, + "loss": 0.2836, + "step": 14217 + }, + { + "epoch": 0.6660420668009557, + "grad_norm": 0.6391407975305136, + "learning_rate": 3.889644952376334e-06, + "loss": 0.3119, + "step": 14218 + }, + { + "epoch": 0.6660889117908839, + "grad_norm": 0.5513007820113615, + "learning_rate": 3.889487296518955e-06, + "loss": 0.2657, + "step": 14219 + }, + { + "epoch": 0.6661357567808123, + "grad_norm": 0.6144482156648169, + "learning_rate": 3.8893296326654275e-06, + "loss": 0.3068, + "step": 14220 + }, + { + "epoch": 0.6661826017707406, + "grad_norm": 0.5059805315788595, + "learning_rate": 3.889171960816656e-06, + "loss": 0.2475, + "step": 14221 + }, + { + "epoch": 0.666229446760669, + "grad_norm": 0.5759378993672962, + "learning_rate": 3.889014280973549e-06, + "loss": 0.2727, + "step": 14222 + }, + { + "epoch": 0.6662762917505973, + "grad_norm": 0.6587055919314984, + "learning_rate": 3.888856593137014e-06, + "loss": 0.2987, + "step": 14223 + }, + { + "epoch": 0.6663231367405256, + "grad_norm": 0.6897034383761862, + "learning_rate": 3.888698897307958e-06, + "loss": 0.3057, + "step": 14224 + }, + { + "epoch": 0.6663699817304539, + "grad_norm": 0.5790811630758006, + "learning_rate": 3.888541193487289e-06, + "loss": 0.293, + "step": 14225 + }, + { + "epoch": 0.6664168267203823, + "grad_norm": 0.6586627447904186, + "learning_rate": 3.888383481675915e-06, + "loss": 0.2976, + "step": 14226 + }, + { + "epoch": 0.6664636717103106, + "grad_norm": 0.6266245892757124, + "learning_rate": 3.888225761874743e-06, + "loss": 0.2687, + "step": 14227 + }, + { + "epoch": 0.6665105167002389, + "grad_norm": 0.5948171874825768, + "learning_rate": 3.8880680340846795e-06, + "loss": 0.3047, + "step": 14228 + }, + { + "epoch": 0.6665573616901672, + "grad_norm": 0.6120616054914926, + "learning_rate": 3.887910298306636e-06, + "loss": 0.2718, + "step": 14229 + }, + { + "epoch": 0.6666042066800956, + "grad_norm": 0.6242350366370916, + "learning_rate": 3.887752554541515e-06, + "loss": 0.2908, + "step": 14230 + }, + { + "epoch": 0.6666510516700239, + "grad_norm": 0.5772411395066191, + "learning_rate": 3.887594802790227e-06, + "loss": 0.2679, + "step": 14231 + }, + { + "epoch": 0.6666978966599523, + "grad_norm": 0.61556044244119, + "learning_rate": 3.887437043053681e-06, + "loss": 0.2887, + "step": 14232 + }, + { + "epoch": 0.6667447416498805, + "grad_norm": 0.6484235623879444, + "learning_rate": 3.887279275332783e-06, + "loss": 0.3119, + "step": 14233 + }, + { + "epoch": 0.6667915866398089, + "grad_norm": 0.5601221907118831, + "learning_rate": 3.88712149962844e-06, + "loss": 0.2689, + "step": 14234 + }, + { + "epoch": 0.6668384316297372, + "grad_norm": 0.5857115345608231, + "learning_rate": 3.886963715941563e-06, + "loss": 0.2626, + "step": 14235 + }, + { + "epoch": 0.6668852766196656, + "grad_norm": 0.5753839377749641, + "learning_rate": 3.8868059242730585e-06, + "loss": 0.2876, + "step": 14236 + }, + { + "epoch": 0.6669321216095938, + "grad_norm": 0.5759880029515414, + "learning_rate": 3.8866481246238345e-06, + "loss": 0.2881, + "step": 14237 + }, + { + "epoch": 0.6669789665995222, + "grad_norm": 0.6039203777592201, + "learning_rate": 3.886490316994798e-06, + "loss": 0.2776, + "step": 14238 + }, + { + "epoch": 0.6670258115894505, + "grad_norm": 0.6669100044709235, + "learning_rate": 3.886332501386859e-06, + "loss": 0.3008, + "step": 14239 + }, + { + "epoch": 0.6670726565793789, + "grad_norm": 0.5887905762862758, + "learning_rate": 3.886174677800924e-06, + "loss": 0.2669, + "step": 14240 + }, + { + "epoch": 0.6671195015693072, + "grad_norm": 0.5769860832986029, + "learning_rate": 3.886016846237904e-06, + "loss": 0.2815, + "step": 14241 + }, + { + "epoch": 0.6671663465592355, + "grad_norm": 0.573895470301137, + "learning_rate": 3.885859006698703e-06, + "loss": 0.295, + "step": 14242 + }, + { + "epoch": 0.6672131915491638, + "grad_norm": 0.5785872320419778, + "learning_rate": 3.8857011591842335e-06, + "loss": 0.2873, + "step": 14243 + }, + { + "epoch": 0.6672600365390922, + "grad_norm": 0.6129879007935716, + "learning_rate": 3.885543303695402e-06, + "loss": 0.2805, + "step": 14244 + }, + { + "epoch": 0.6673068815290205, + "grad_norm": 0.5605000263314596, + "learning_rate": 3.885385440233117e-06, + "loss": 0.266, + "step": 14245 + }, + { + "epoch": 0.6673537265189488, + "grad_norm": 0.5924164144897874, + "learning_rate": 3.885227568798287e-06, + "loss": 0.2637, + "step": 14246 + }, + { + "epoch": 0.6674005715088771, + "grad_norm": 0.6187402318887191, + "learning_rate": 3.88506968939182e-06, + "loss": 0.2935, + "step": 14247 + }, + { + "epoch": 0.6674474164988055, + "grad_norm": 0.5888023954103839, + "learning_rate": 3.884911802014625e-06, + "loss": 0.2876, + "step": 14248 + }, + { + "epoch": 0.6674942614887338, + "grad_norm": 0.6822322508038832, + "learning_rate": 3.88475390666761e-06, + "loss": 0.3048, + "step": 14249 + }, + { + "epoch": 0.6675411064786622, + "grad_norm": 0.5766084567740378, + "learning_rate": 3.8845960033516864e-06, + "loss": 0.263, + "step": 14250 + }, + { + "epoch": 0.6675879514685904, + "grad_norm": 0.6049575457107492, + "learning_rate": 3.8844380920677585e-06, + "loss": 0.2914, + "step": 14251 + }, + { + "epoch": 0.6676347964585188, + "grad_norm": 0.5583288197547249, + "learning_rate": 3.884280172816739e-06, + "loss": 0.2766, + "step": 14252 + }, + { + "epoch": 0.6676816414484471, + "grad_norm": 0.6191536598939176, + "learning_rate": 3.884122245599534e-06, + "loss": 0.2808, + "step": 14253 + }, + { + "epoch": 0.6677284864383755, + "grad_norm": 0.6089697575044982, + "learning_rate": 3.883964310417054e-06, + "loss": 0.2844, + "step": 14254 + }, + { + "epoch": 0.6677753314283037, + "grad_norm": 0.5927915416949366, + "learning_rate": 3.883806367270208e-06, + "loss": 0.3049, + "step": 14255 + }, + { + "epoch": 0.667822176418232, + "grad_norm": 0.585310030287585, + "learning_rate": 3.883648416159903e-06, + "loss": 0.2875, + "step": 14256 + }, + { + "epoch": 0.6678690214081604, + "grad_norm": 0.5484717935286976, + "learning_rate": 3.883490457087049e-06, + "loss": 0.2746, + "step": 14257 + }, + { + "epoch": 0.6679158663980888, + "grad_norm": 0.5794136167374774, + "learning_rate": 3.8833324900525555e-06, + "loss": 0.2876, + "step": 14258 + }, + { + "epoch": 0.6679627113880171, + "grad_norm": 0.5368694581293, + "learning_rate": 3.883174515057331e-06, + "loss": 0.2816, + "step": 14259 + }, + { + "epoch": 0.6680095563779453, + "grad_norm": 0.623101996413954, + "learning_rate": 3.883016532102285e-06, + "loss": 0.2927, + "step": 14260 + }, + { + "epoch": 0.6680564013678737, + "grad_norm": 0.5703538538802049, + "learning_rate": 3.882858541188327e-06, + "loss": 0.2817, + "step": 14261 + }, + { + "epoch": 0.668103246357802, + "grad_norm": 0.599313413243638, + "learning_rate": 3.882700542316366e-06, + "loss": 0.2835, + "step": 14262 + }, + { + "epoch": 0.6681500913477304, + "grad_norm": 0.5563649795656205, + "learning_rate": 3.882542535487309e-06, + "loss": 0.277, + "step": 14263 + }, + { + "epoch": 0.6681969363376586, + "grad_norm": 0.5801660601380624, + "learning_rate": 3.882384520702068e-06, + "loss": 0.2843, + "step": 14264 + }, + { + "epoch": 0.668243781327587, + "grad_norm": 0.5734764722792742, + "learning_rate": 3.882226497961552e-06, + "loss": 0.2869, + "step": 14265 + }, + { + "epoch": 0.6682906263175153, + "grad_norm": 0.6478209013553473, + "learning_rate": 3.882068467266669e-06, + "loss": 0.2823, + "step": 14266 + }, + { + "epoch": 0.6683374713074437, + "grad_norm": 0.578351348265855, + "learning_rate": 3.881910428618331e-06, + "loss": 0.2794, + "step": 14267 + }, + { + "epoch": 0.668384316297372, + "grad_norm": 0.6257431743410079, + "learning_rate": 3.881752382017446e-06, + "loss": 0.2741, + "step": 14268 + }, + { + "epoch": 0.6684311612873003, + "grad_norm": 0.5765542207563559, + "learning_rate": 3.881594327464922e-06, + "loss": 0.2812, + "step": 14269 + }, + { + "epoch": 0.6684780062772286, + "grad_norm": 0.5755439886766217, + "learning_rate": 3.881436264961669e-06, + "loss": 0.2657, + "step": 14270 + }, + { + "epoch": 0.668524851267157, + "grad_norm": 0.607452793108574, + "learning_rate": 3.881278194508598e-06, + "loss": 0.2757, + "step": 14271 + }, + { + "epoch": 0.6685716962570853, + "grad_norm": 0.6020717704966173, + "learning_rate": 3.881120116106619e-06, + "loss": 0.286, + "step": 14272 + }, + { + "epoch": 0.6686185412470136, + "grad_norm": 0.5741344883929754, + "learning_rate": 3.880962029756641e-06, + "loss": 0.2849, + "step": 14273 + }, + { + "epoch": 0.6686653862369419, + "grad_norm": 0.587577382512349, + "learning_rate": 3.8808039354595736e-06, + "loss": 0.2869, + "step": 14274 + }, + { + "epoch": 0.6687122312268703, + "grad_norm": 0.5603802725725627, + "learning_rate": 3.880645833216326e-06, + "loss": 0.27, + "step": 14275 + }, + { + "epoch": 0.6687590762167986, + "grad_norm": 0.5634027335350626, + "learning_rate": 3.880487723027809e-06, + "loss": 0.2753, + "step": 14276 + }, + { + "epoch": 0.668805921206727, + "grad_norm": 0.6225947899222632, + "learning_rate": 3.880329604894932e-06, + "loss": 0.2969, + "step": 14277 + }, + { + "epoch": 0.6688527661966552, + "grad_norm": 0.5965130341012961, + "learning_rate": 3.8801714788186055e-06, + "loss": 0.2605, + "step": 14278 + }, + { + "epoch": 0.6688996111865836, + "grad_norm": 0.5898276522299242, + "learning_rate": 3.880013344799738e-06, + "loss": 0.2874, + "step": 14279 + }, + { + "epoch": 0.6689464561765119, + "grad_norm": 0.6195958596365183, + "learning_rate": 3.879855202839241e-06, + "loss": 0.2815, + "step": 14280 + }, + { + "epoch": 0.6689933011664403, + "grad_norm": 0.7092281229675972, + "learning_rate": 3.879697052938025e-06, + "loss": 0.2875, + "step": 14281 + }, + { + "epoch": 0.6690401461563685, + "grad_norm": 0.553984084351429, + "learning_rate": 3.879538895096998e-06, + "loss": 0.2727, + "step": 14282 + }, + { + "epoch": 0.6690869911462969, + "grad_norm": 0.5585009048336105, + "learning_rate": 3.879380729317072e-06, + "loss": 0.2645, + "step": 14283 + }, + { + "epoch": 0.6691338361362252, + "grad_norm": 0.5972377804067451, + "learning_rate": 3.879222555599157e-06, + "loss": 0.2925, + "step": 14284 + }, + { + "epoch": 0.6691806811261536, + "grad_norm": 0.6050603633851697, + "learning_rate": 3.879064373944162e-06, + "loss": 0.28, + "step": 14285 + }, + { + "epoch": 0.6692275261160819, + "grad_norm": 0.6679054965421799, + "learning_rate": 3.878906184352998e-06, + "loss": 0.2838, + "step": 14286 + }, + { + "epoch": 0.6692743711060102, + "grad_norm": 0.5953696936638455, + "learning_rate": 3.878747986826576e-06, + "loss": 0.2698, + "step": 14287 + }, + { + "epoch": 0.6693212160959385, + "grad_norm": 0.6083884349086207, + "learning_rate": 3.878589781365806e-06, + "loss": 0.2739, + "step": 14288 + }, + { + "epoch": 0.6693680610858669, + "grad_norm": 0.6148109223295015, + "learning_rate": 3.878431567971598e-06, + "loss": 0.2766, + "step": 14289 + }, + { + "epoch": 0.6694149060757952, + "grad_norm": 0.5808657290563908, + "learning_rate": 3.878273346644863e-06, + "loss": 0.2623, + "step": 14290 + }, + { + "epoch": 0.6694617510657235, + "grad_norm": 0.5718662352313122, + "learning_rate": 3.878115117386512e-06, + "loss": 0.2765, + "step": 14291 + }, + { + "epoch": 0.6695085960556518, + "grad_norm": 0.6548560621902862, + "learning_rate": 3.877956880197454e-06, + "loss": 0.3005, + "step": 14292 + }, + { + "epoch": 0.6695554410455802, + "grad_norm": 0.5451274923608451, + "learning_rate": 3.8777986350786e-06, + "loss": 0.2733, + "step": 14293 + }, + { + "epoch": 0.6696022860355085, + "grad_norm": 0.5632353787141019, + "learning_rate": 3.877640382030863e-06, + "loss": 0.2688, + "step": 14294 + }, + { + "epoch": 0.6696491310254369, + "grad_norm": 0.5823910271276898, + "learning_rate": 3.877482121055149e-06, + "loss": 0.2719, + "step": 14295 + }, + { + "epoch": 0.6696959760153651, + "grad_norm": 0.5797105229725101, + "learning_rate": 3.877323852152374e-06, + "loss": 0.2883, + "step": 14296 + }, + { + "epoch": 0.6697428210052935, + "grad_norm": 0.604768886315764, + "learning_rate": 3.877165575323446e-06, + "loss": 0.2957, + "step": 14297 + }, + { + "epoch": 0.6697896659952218, + "grad_norm": 0.6134141986938907, + "learning_rate": 3.877007290569276e-06, + "loss": 0.2847, + "step": 14298 + }, + { + "epoch": 0.6698365109851502, + "grad_norm": 0.6105975147633705, + "learning_rate": 3.876848997890775e-06, + "loss": 0.2639, + "step": 14299 + }, + { + "epoch": 0.6698833559750784, + "grad_norm": 0.5611728120966571, + "learning_rate": 3.8766906972888544e-06, + "loss": 0.2718, + "step": 14300 + }, + { + "epoch": 0.6699302009650068, + "grad_norm": 0.6327271839825894, + "learning_rate": 3.876532388764424e-06, + "loss": 0.2727, + "step": 14301 + }, + { + "epoch": 0.6699770459549351, + "grad_norm": 0.683616532907205, + "learning_rate": 3.876374072318396e-06, + "loss": 0.3045, + "step": 14302 + }, + { + "epoch": 0.6700238909448635, + "grad_norm": 0.5766355477261241, + "learning_rate": 3.876215747951681e-06, + "loss": 0.2727, + "step": 14303 + }, + { + "epoch": 0.6700707359347918, + "grad_norm": 0.5516652806627004, + "learning_rate": 3.8760574156651905e-06, + "loss": 0.2607, + "step": 14304 + }, + { + "epoch": 0.6701175809247201, + "grad_norm": 0.6038861227557347, + "learning_rate": 3.875899075459836e-06, + "loss": 0.2869, + "step": 14305 + }, + { + "epoch": 0.6701644259146484, + "grad_norm": 0.6702954965278755, + "learning_rate": 3.875740727336528e-06, + "loss": 0.3047, + "step": 14306 + }, + { + "epoch": 0.6702112709045768, + "grad_norm": 0.5585049799031593, + "learning_rate": 3.875582371296177e-06, + "loss": 0.29, + "step": 14307 + }, + { + "epoch": 0.6702581158945051, + "grad_norm": 0.6347956090388276, + "learning_rate": 3.875424007339696e-06, + "loss": 0.279, + "step": 14308 + }, + { + "epoch": 0.6703049608844334, + "grad_norm": 0.6252085907603531, + "learning_rate": 3.875265635467997e-06, + "loss": 0.2983, + "step": 14309 + }, + { + "epoch": 0.6703518058743617, + "grad_norm": 0.5309054953013526, + "learning_rate": 3.875107255681987e-06, + "loss": 0.2874, + "step": 14310 + }, + { + "epoch": 0.6703986508642901, + "grad_norm": 0.6069830208887708, + "learning_rate": 3.874948867982582e-06, + "loss": 0.3216, + "step": 14311 + }, + { + "epoch": 0.6704454958542184, + "grad_norm": 0.539494174998389, + "learning_rate": 3.874790472370691e-06, + "loss": 0.2753, + "step": 14312 + }, + { + "epoch": 0.6704923408441468, + "grad_norm": 0.5605472251436083, + "learning_rate": 3.874632068847227e-06, + "loss": 0.2836, + "step": 14313 + }, + { + "epoch": 0.670539185834075, + "grad_norm": 0.5881729715649732, + "learning_rate": 3.874473657413102e-06, + "loss": 0.2875, + "step": 14314 + }, + { + "epoch": 0.6705860308240034, + "grad_norm": 0.5993483377458715, + "learning_rate": 3.8743152380692245e-06, + "loss": 0.2989, + "step": 14315 + }, + { + "epoch": 0.6706328758139317, + "grad_norm": 0.595785479526614, + "learning_rate": 3.874156810816509e-06, + "loss": 0.2902, + "step": 14316 + }, + { + "epoch": 0.6706797208038601, + "grad_norm": 0.5423785547542996, + "learning_rate": 3.873998375655867e-06, + "loss": 0.2664, + "step": 14317 + }, + { + "epoch": 0.6707265657937883, + "grad_norm": 0.5514216280867401, + "learning_rate": 3.873839932588209e-06, + "loss": 0.2508, + "step": 14318 + }, + { + "epoch": 0.6707734107837167, + "grad_norm": 0.5605170741899373, + "learning_rate": 3.873681481614448e-06, + "loss": 0.2741, + "step": 14319 + }, + { + "epoch": 0.670820255773645, + "grad_norm": 0.5661371983598462, + "learning_rate": 3.873523022735495e-06, + "loss": 0.2749, + "step": 14320 + }, + { + "epoch": 0.6708671007635734, + "grad_norm": 0.5614065510391267, + "learning_rate": 3.873364555952262e-06, + "loss": 0.2712, + "step": 14321 + }, + { + "epoch": 0.6709139457535017, + "grad_norm": 0.5567243615953195, + "learning_rate": 3.873206081265662e-06, + "loss": 0.2791, + "step": 14322 + }, + { + "epoch": 0.67096079074343, + "grad_norm": 0.5703750458862021, + "learning_rate": 3.873047598676605e-06, + "loss": 0.2825, + "step": 14323 + }, + { + "epoch": 0.6710076357333583, + "grad_norm": 0.5549524662412358, + "learning_rate": 3.872889108186005e-06, + "loss": 0.2731, + "step": 14324 + }, + { + "epoch": 0.6710544807232867, + "grad_norm": 0.6070348321039469, + "learning_rate": 3.8727306097947725e-06, + "loss": 0.2894, + "step": 14325 + }, + { + "epoch": 0.671101325713215, + "grad_norm": 0.6228674632212857, + "learning_rate": 3.872572103503821e-06, + "loss": 0.2876, + "step": 14326 + }, + { + "epoch": 0.6711481707031433, + "grad_norm": 0.6208225249200736, + "learning_rate": 3.8724135893140625e-06, + "loss": 0.2867, + "step": 14327 + }, + { + "epoch": 0.6711950156930716, + "grad_norm": 0.5510850423508428, + "learning_rate": 3.8722550672264084e-06, + "loss": 0.2578, + "step": 14328 + }, + { + "epoch": 0.671241860683, + "grad_norm": 0.5985176499229691, + "learning_rate": 3.8720965372417705e-06, + "loss": 0.2872, + "step": 14329 + }, + { + "epoch": 0.6712887056729283, + "grad_norm": 0.54874285219302, + "learning_rate": 3.871937999361063e-06, + "loss": 0.26, + "step": 14330 + }, + { + "epoch": 0.6713355506628567, + "grad_norm": 0.5835108693716738, + "learning_rate": 3.871779453585196e-06, + "loss": 0.2539, + "step": 14331 + }, + { + "epoch": 0.6713823956527849, + "grad_norm": 0.654338102071438, + "learning_rate": 3.871620899915084e-06, + "loss": 0.2825, + "step": 14332 + }, + { + "epoch": 0.6714292406427133, + "grad_norm": 0.6417250137353715, + "learning_rate": 3.871462338351638e-06, + "loss": 0.2868, + "step": 14333 + }, + { + "epoch": 0.6714760856326416, + "grad_norm": 0.6352177302521155, + "learning_rate": 3.87130376889577e-06, + "loss": 0.2734, + "step": 14334 + }, + { + "epoch": 0.67152293062257, + "grad_norm": 0.5613142024196234, + "learning_rate": 3.871145191548395e-06, + "loss": 0.2744, + "step": 14335 + }, + { + "epoch": 0.6715697756124982, + "grad_norm": 0.6414542654507814, + "learning_rate": 3.870986606310424e-06, + "loss": 0.3013, + "step": 14336 + }, + { + "epoch": 0.6716166206024266, + "grad_norm": 0.5611568860470113, + "learning_rate": 3.870828013182769e-06, + "loss": 0.2699, + "step": 14337 + }, + { + "epoch": 0.6716634655923549, + "grad_norm": 0.6075582895519718, + "learning_rate": 3.870669412166343e-06, + "loss": 0.2889, + "step": 14338 + }, + { + "epoch": 0.6717103105822833, + "grad_norm": 0.6080476372983578, + "learning_rate": 3.87051080326206e-06, + "loss": 0.2728, + "step": 14339 + }, + { + "epoch": 0.6717571555722116, + "grad_norm": 0.6055587010171718, + "learning_rate": 3.870352186470831e-06, + "loss": 0.262, + "step": 14340 + }, + { + "epoch": 0.6718040005621398, + "grad_norm": 0.5983861520824401, + "learning_rate": 3.87019356179357e-06, + "loss": 0.293, + "step": 14341 + }, + { + "epoch": 0.6718508455520682, + "grad_norm": 0.5988582688467495, + "learning_rate": 3.870034929231189e-06, + "loss": 0.2855, + "step": 14342 + }, + { + "epoch": 0.6718976905419966, + "grad_norm": 0.5892491788224553, + "learning_rate": 3.869876288784602e-06, + "loss": 0.2795, + "step": 14343 + }, + { + "epoch": 0.6719445355319249, + "grad_norm": 0.5703997272899887, + "learning_rate": 3.869717640454721e-06, + "loss": 0.2887, + "step": 14344 + }, + { + "epoch": 0.6719913805218531, + "grad_norm": 0.6088461038189749, + "learning_rate": 3.8695589842424594e-06, + "loss": 0.2984, + "step": 14345 + }, + { + "epoch": 0.6720382255117815, + "grad_norm": 0.5576640247474787, + "learning_rate": 3.86940032014873e-06, + "loss": 0.2687, + "step": 14346 + }, + { + "epoch": 0.6720850705017098, + "grad_norm": 0.5379521477350717, + "learning_rate": 3.869241648174445e-06, + "loss": 0.2552, + "step": 14347 + }, + { + "epoch": 0.6721319154916382, + "grad_norm": 0.5543334985560963, + "learning_rate": 3.86908296832052e-06, + "loss": 0.2884, + "step": 14348 + }, + { + "epoch": 0.6721787604815666, + "grad_norm": 0.6072218524755142, + "learning_rate": 3.868924280587866e-06, + "loss": 0.2619, + "step": 14349 + }, + { + "epoch": 0.6722256054714948, + "grad_norm": 0.5835681641176276, + "learning_rate": 3.868765584977396e-06, + "loss": 0.2941, + "step": 14350 + }, + { + "epoch": 0.6722724504614231, + "grad_norm": 0.5848880121612453, + "learning_rate": 3.868606881490026e-06, + "loss": 0.2942, + "step": 14351 + }, + { + "epoch": 0.6723192954513515, + "grad_norm": 0.6103589573173077, + "learning_rate": 3.8684481701266654e-06, + "loss": 0.2864, + "step": 14352 + }, + { + "epoch": 0.6723661404412798, + "grad_norm": 0.5956786867963454, + "learning_rate": 3.8682894508882304e-06, + "loss": 0.262, + "step": 14353 + }, + { + "epoch": 0.6724129854312081, + "grad_norm": 0.6110842401342156, + "learning_rate": 3.868130723775634e-06, + "loss": 0.2795, + "step": 14354 + }, + { + "epoch": 0.6724598304211364, + "grad_norm": 0.6916737829240169, + "learning_rate": 3.8679719887897884e-06, + "loss": 0.3176, + "step": 14355 + }, + { + "epoch": 0.6725066754110648, + "grad_norm": 0.6086167320190539, + "learning_rate": 3.867813245931608e-06, + "loss": 0.2924, + "step": 14356 + }, + { + "epoch": 0.6725535204009931, + "grad_norm": 0.6624421147751238, + "learning_rate": 3.8676544952020055e-06, + "loss": 0.2919, + "step": 14357 + }, + { + "epoch": 0.6726003653909215, + "grad_norm": 0.5784568542400702, + "learning_rate": 3.867495736601896e-06, + "loss": 0.2754, + "step": 14358 + }, + { + "epoch": 0.6726472103808497, + "grad_norm": 0.5793641093987658, + "learning_rate": 3.867336970132192e-06, + "loss": 0.2775, + "step": 14359 + }, + { + "epoch": 0.6726940553707781, + "grad_norm": 0.6097283389308615, + "learning_rate": 3.8671781957938086e-06, + "loss": 0.3009, + "step": 14360 + }, + { + "epoch": 0.6727409003607064, + "grad_norm": 0.5745705975737941, + "learning_rate": 3.867019413587656e-06, + "loss": 0.2917, + "step": 14361 + }, + { + "epoch": 0.6727877453506348, + "grad_norm": 0.6114398120889638, + "learning_rate": 3.8668606235146515e-06, + "loss": 0.2776, + "step": 14362 + }, + { + "epoch": 0.672834590340563, + "grad_norm": 0.6557846638129692, + "learning_rate": 3.866701825575708e-06, + "loss": 0.2814, + "step": 14363 + }, + { + "epoch": 0.6728814353304914, + "grad_norm": 0.6224105147349853, + "learning_rate": 3.866543019771738e-06, + "loss": 0.3167, + "step": 14364 + }, + { + "epoch": 0.6729282803204197, + "grad_norm": 0.6897319119466027, + "learning_rate": 3.866384206103657e-06, + "loss": 0.3145, + "step": 14365 + }, + { + "epoch": 0.6729751253103481, + "grad_norm": 0.6153342231229678, + "learning_rate": 3.866225384572378e-06, + "loss": 0.3041, + "step": 14366 + }, + { + "epoch": 0.6730219703002764, + "grad_norm": 0.5444932808678878, + "learning_rate": 3.866066555178817e-06, + "loss": 0.2594, + "step": 14367 + }, + { + "epoch": 0.6730688152902047, + "grad_norm": 0.5617231611650289, + "learning_rate": 3.865907717923885e-06, + "loss": 0.2814, + "step": 14368 + }, + { + "epoch": 0.673115660280133, + "grad_norm": 0.6103181734381319, + "learning_rate": 3.865748872808497e-06, + "loss": 0.29, + "step": 14369 + }, + { + "epoch": 0.6731625052700614, + "grad_norm": 0.5869312350812759, + "learning_rate": 3.865590019833567e-06, + "loss": 0.2818, + "step": 14370 + }, + { + "epoch": 0.6732093502599897, + "grad_norm": 0.6331686133511646, + "learning_rate": 3.8654311590000105e-06, + "loss": 0.2716, + "step": 14371 + }, + { + "epoch": 0.673256195249918, + "grad_norm": 0.5914492356091583, + "learning_rate": 3.865272290308741e-06, + "loss": 0.2662, + "step": 14372 + }, + { + "epoch": 0.6733030402398463, + "grad_norm": 0.5551194727074189, + "learning_rate": 3.865113413760673e-06, + "loss": 0.274, + "step": 14373 + }, + { + "epoch": 0.6733498852297747, + "grad_norm": 0.625985550606182, + "learning_rate": 3.864954529356719e-06, + "loss": 0.2823, + "step": 14374 + }, + { + "epoch": 0.673396730219703, + "grad_norm": 0.6441430692915908, + "learning_rate": 3.864795637097796e-06, + "loss": 0.2864, + "step": 14375 + }, + { + "epoch": 0.6734435752096314, + "grad_norm": 0.6093723198362878, + "learning_rate": 3.864636736984817e-06, + "loss": 0.2821, + "step": 14376 + }, + { + "epoch": 0.6734904201995596, + "grad_norm": 0.5798361476390935, + "learning_rate": 3.8644778290186965e-06, + "loss": 0.2754, + "step": 14377 + }, + { + "epoch": 0.673537265189488, + "grad_norm": 0.642340589693481, + "learning_rate": 3.864318913200349e-06, + "loss": 0.294, + "step": 14378 + }, + { + "epoch": 0.6735841101794163, + "grad_norm": 0.5761090575361038, + "learning_rate": 3.864159989530689e-06, + "loss": 0.2624, + "step": 14379 + }, + { + "epoch": 0.6736309551693447, + "grad_norm": 0.6161149847546649, + "learning_rate": 3.864001058010631e-06, + "loss": 0.3011, + "step": 14380 + }, + { + "epoch": 0.6736778001592729, + "grad_norm": 0.5710150026186057, + "learning_rate": 3.86384211864109e-06, + "loss": 0.2743, + "step": 14381 + }, + { + "epoch": 0.6737246451492013, + "grad_norm": 0.6075589153740568, + "learning_rate": 3.8636831714229795e-06, + "loss": 0.2754, + "step": 14382 + }, + { + "epoch": 0.6737714901391296, + "grad_norm": 0.5750884339182426, + "learning_rate": 3.863524216357216e-06, + "loss": 0.2821, + "step": 14383 + }, + { + "epoch": 0.673818335129058, + "grad_norm": 0.6025558030864017, + "learning_rate": 3.863365253444713e-06, + "loss": 0.3203, + "step": 14384 + }, + { + "epoch": 0.6738651801189863, + "grad_norm": 0.6353414971039537, + "learning_rate": 3.863206282686386e-06, + "loss": 0.2948, + "step": 14385 + }, + { + "epoch": 0.6739120251089146, + "grad_norm": 0.6955329328264972, + "learning_rate": 3.86304730408315e-06, + "loss": 0.304, + "step": 14386 + }, + { + "epoch": 0.6739588700988429, + "grad_norm": 0.5860810019120053, + "learning_rate": 3.862888317635917e-06, + "loss": 0.2907, + "step": 14387 + }, + { + "epoch": 0.6740057150887713, + "grad_norm": 0.6093217720520683, + "learning_rate": 3.862729323345607e-06, + "loss": 0.2751, + "step": 14388 + }, + { + "epoch": 0.6740525600786996, + "grad_norm": 0.6404385311250937, + "learning_rate": 3.862570321213131e-06, + "loss": 0.2726, + "step": 14389 + }, + { + "epoch": 0.6740994050686279, + "grad_norm": 0.5351598240005176, + "learning_rate": 3.862411311239406e-06, + "loss": 0.2575, + "step": 14390 + }, + { + "epoch": 0.6741462500585562, + "grad_norm": 0.579210413694586, + "learning_rate": 3.862252293425345e-06, + "loss": 0.2825, + "step": 14391 + }, + { + "epoch": 0.6741930950484846, + "grad_norm": 0.6043476237844438, + "learning_rate": 3.862093267771866e-06, + "loss": 0.2696, + "step": 14392 + }, + { + "epoch": 0.6742399400384129, + "grad_norm": 0.5624378518823954, + "learning_rate": 3.861934234279881e-06, + "loss": 0.2934, + "step": 14393 + }, + { + "epoch": 0.6742867850283413, + "grad_norm": 0.5570371796186955, + "learning_rate": 3.861775192950308e-06, + "loss": 0.2606, + "step": 14394 + }, + { + "epoch": 0.6743336300182695, + "grad_norm": 0.589524945277054, + "learning_rate": 3.8616161437840605e-06, + "loss": 0.2791, + "step": 14395 + }, + { + "epoch": 0.6743804750081979, + "grad_norm": 0.5830256452315585, + "learning_rate": 3.861457086782054e-06, + "loss": 0.2784, + "step": 14396 + }, + { + "epoch": 0.6744273199981262, + "grad_norm": 0.5866692323939303, + "learning_rate": 3.861298021945205e-06, + "loss": 0.2949, + "step": 14397 + }, + { + "epoch": 0.6744741649880546, + "grad_norm": 0.6635211677475165, + "learning_rate": 3.8611389492744276e-06, + "loss": 0.3317, + "step": 14398 + }, + { + "epoch": 0.6745210099779828, + "grad_norm": 0.608786204396792, + "learning_rate": 3.8609798687706376e-06, + "loss": 0.297, + "step": 14399 + }, + { + "epoch": 0.6745678549679112, + "grad_norm": 0.5773570491052296, + "learning_rate": 3.860820780434751e-06, + "loss": 0.2742, + "step": 14400 + }, + { + "epoch": 0.6746146999578395, + "grad_norm": 0.6495344452755175, + "learning_rate": 3.8606616842676815e-06, + "loss": 0.2686, + "step": 14401 + }, + { + "epoch": 0.6746615449477679, + "grad_norm": 0.5765381917267377, + "learning_rate": 3.860502580270348e-06, + "loss": 0.2863, + "step": 14402 + }, + { + "epoch": 0.6747083899376962, + "grad_norm": 0.5690895844491778, + "learning_rate": 3.860343468443662e-06, + "loss": 0.2695, + "step": 14403 + }, + { + "epoch": 0.6747552349276245, + "grad_norm": 0.6651969356199406, + "learning_rate": 3.860184348788542e-06, + "loss": 0.2959, + "step": 14404 + }, + { + "epoch": 0.6748020799175528, + "grad_norm": 0.5946536855000796, + "learning_rate": 3.8600252213059045e-06, + "loss": 0.2888, + "step": 14405 + }, + { + "epoch": 0.6748489249074812, + "grad_norm": 0.5396356890911563, + "learning_rate": 3.859866085996662e-06, + "loss": 0.271, + "step": 14406 + }, + { + "epoch": 0.6748957698974095, + "grad_norm": 0.5909383251706142, + "learning_rate": 3.859706942861732e-06, + "loss": 0.3124, + "step": 14407 + }, + { + "epoch": 0.6749426148873378, + "grad_norm": 0.5582151253874209, + "learning_rate": 3.859547791902031e-06, + "loss": 0.2856, + "step": 14408 + }, + { + "epoch": 0.6749894598772661, + "grad_norm": 0.6181587972739526, + "learning_rate": 3.859388633118474e-06, + "loss": 0.2865, + "step": 14409 + }, + { + "epoch": 0.6750363048671945, + "grad_norm": 0.6405715435209771, + "learning_rate": 3.8592294665119766e-06, + "loss": 0.3056, + "step": 14410 + }, + { + "epoch": 0.6750831498571228, + "grad_norm": 0.6269614515023295, + "learning_rate": 3.859070292083456e-06, + "loss": 0.3107, + "step": 14411 + }, + { + "epoch": 0.6751299948470512, + "grad_norm": 0.5977641305259309, + "learning_rate": 3.858911109833827e-06, + "loss": 0.288, + "step": 14412 + }, + { + "epoch": 0.6751768398369794, + "grad_norm": 0.6244955134991632, + "learning_rate": 3.858751919764006e-06, + "loss": 0.3097, + "step": 14413 + }, + { + "epoch": 0.6752236848269078, + "grad_norm": 0.6574920641354667, + "learning_rate": 3.85859272187491e-06, + "loss": 0.2955, + "step": 14414 + }, + { + "epoch": 0.6752705298168361, + "grad_norm": 0.5733658304217036, + "learning_rate": 3.8584335161674536e-06, + "loss": 0.2723, + "step": 14415 + }, + { + "epoch": 0.6753173748067645, + "grad_norm": 0.6425375000491595, + "learning_rate": 3.858274302642553e-06, + "loss": 0.2916, + "step": 14416 + }, + { + "epoch": 0.6753642197966927, + "grad_norm": 0.6057598726413083, + "learning_rate": 3.8581150813011265e-06, + "loss": 0.2852, + "step": 14417 + }, + { + "epoch": 0.675411064786621, + "grad_norm": 0.6068378278269106, + "learning_rate": 3.8579558521440885e-06, + "loss": 0.3065, + "step": 14418 + }, + { + "epoch": 0.6754579097765494, + "grad_norm": 0.6246697067990582, + "learning_rate": 3.857796615172356e-06, + "loss": 0.3068, + "step": 14419 + }, + { + "epoch": 0.6755047547664778, + "grad_norm": 0.5861676285638466, + "learning_rate": 3.857637370386845e-06, + "loss": 0.2635, + "step": 14420 + }, + { + "epoch": 0.6755515997564061, + "grad_norm": 0.577904626109325, + "learning_rate": 3.8574781177884725e-06, + "loss": 0.2891, + "step": 14421 + }, + { + "epoch": 0.6755984447463343, + "grad_norm": 0.6465314171651141, + "learning_rate": 3.857318857378154e-06, + "loss": 0.3124, + "step": 14422 + }, + { + "epoch": 0.6756452897362627, + "grad_norm": 0.5915563531650051, + "learning_rate": 3.857159589156808e-06, + "loss": 0.2832, + "step": 14423 + }, + { + "epoch": 0.675692134726191, + "grad_norm": 0.6423349546201005, + "learning_rate": 3.8570003131253484e-06, + "loss": 0.307, + "step": 14424 + }, + { + "epoch": 0.6757389797161194, + "grad_norm": 0.5558452232516781, + "learning_rate": 3.8568410292846934e-06, + "loss": 0.2843, + "step": 14425 + }, + { + "epoch": 0.6757858247060476, + "grad_norm": 0.586121659244117, + "learning_rate": 3.856681737635759e-06, + "loss": 0.295, + "step": 14426 + }, + { + "epoch": 0.675832669695976, + "grad_norm": 0.5640573861717452, + "learning_rate": 3.856522438179463e-06, + "loss": 0.2827, + "step": 14427 + }, + { + "epoch": 0.6758795146859043, + "grad_norm": 0.5975864946302821, + "learning_rate": 3.856363130916721e-06, + "loss": 0.2882, + "step": 14428 + }, + { + "epoch": 0.6759263596758327, + "grad_norm": 0.621729962840586, + "learning_rate": 3.85620381584845e-06, + "loss": 0.2971, + "step": 14429 + }, + { + "epoch": 0.675973204665761, + "grad_norm": 0.5530607147636222, + "learning_rate": 3.856044492975567e-06, + "loss": 0.2673, + "step": 14430 + }, + { + "epoch": 0.6760200496556893, + "grad_norm": 0.578140332849237, + "learning_rate": 3.855885162298988e-06, + "loss": 0.2961, + "step": 14431 + }, + { + "epoch": 0.6760668946456176, + "grad_norm": 0.5727403070884566, + "learning_rate": 3.855725823819632e-06, + "loss": 0.2826, + "step": 14432 + }, + { + "epoch": 0.676113739635546, + "grad_norm": 0.6083184727568278, + "learning_rate": 3.855566477538414e-06, + "loss": 0.2965, + "step": 14433 + }, + { + "epoch": 0.6761605846254743, + "grad_norm": 0.5840985500611202, + "learning_rate": 3.855407123456251e-06, + "loss": 0.2821, + "step": 14434 + }, + { + "epoch": 0.6762074296154026, + "grad_norm": 0.6164444112471777, + "learning_rate": 3.855247761574061e-06, + "loss": 0.2992, + "step": 14435 + }, + { + "epoch": 0.6762542746053309, + "grad_norm": 0.5927371278324707, + "learning_rate": 3.855088391892761e-06, + "loss": 0.2803, + "step": 14436 + }, + { + "epoch": 0.6763011195952593, + "grad_norm": 0.6181207218533669, + "learning_rate": 3.854929014413269e-06, + "loss": 0.2761, + "step": 14437 + }, + { + "epoch": 0.6763479645851876, + "grad_norm": 0.6903783408749659, + "learning_rate": 3.854769629136499e-06, + "loss": 0.3097, + "step": 14438 + }, + { + "epoch": 0.676394809575116, + "grad_norm": 0.6327821223733923, + "learning_rate": 3.854610236063372e-06, + "loss": 0.2754, + "step": 14439 + }, + { + "epoch": 0.6764416545650442, + "grad_norm": 0.644709120415438, + "learning_rate": 3.854450835194803e-06, + "loss": 0.3025, + "step": 14440 + }, + { + "epoch": 0.6764884995549726, + "grad_norm": 0.5767251248437197, + "learning_rate": 3.8542914265317085e-06, + "loss": 0.2902, + "step": 14441 + }, + { + "epoch": 0.6765353445449009, + "grad_norm": 0.6564817158368479, + "learning_rate": 3.85413201007501e-06, + "loss": 0.3187, + "step": 14442 + }, + { + "epoch": 0.6765821895348293, + "grad_norm": 0.563054286685035, + "learning_rate": 3.85397258582562e-06, + "loss": 0.271, + "step": 14443 + }, + { + "epoch": 0.6766290345247575, + "grad_norm": 0.6121799864877948, + "learning_rate": 3.853813153784458e-06, + "loss": 0.2726, + "step": 14444 + }, + { + "epoch": 0.6766758795146859, + "grad_norm": 0.5199364771490876, + "learning_rate": 3.853653713952443e-06, + "loss": 0.2538, + "step": 14445 + }, + { + "epoch": 0.6767227245046142, + "grad_norm": 0.5627346120588342, + "learning_rate": 3.85349426633049e-06, + "loss": 0.2587, + "step": 14446 + }, + { + "epoch": 0.6767695694945426, + "grad_norm": 0.651649730179807, + "learning_rate": 3.853334810919518e-06, + "loss": 0.3013, + "step": 14447 + }, + { + "epoch": 0.6768164144844709, + "grad_norm": 0.579718678591752, + "learning_rate": 3.853175347720445e-06, + "loss": 0.2755, + "step": 14448 + }, + { + "epoch": 0.6768632594743992, + "grad_norm": 0.5570677822041017, + "learning_rate": 3.853015876734187e-06, + "loss": 0.2615, + "step": 14449 + }, + { + "epoch": 0.6769101044643275, + "grad_norm": 0.6183562498223565, + "learning_rate": 3.852856397961663e-06, + "loss": 0.2768, + "step": 14450 + }, + { + "epoch": 0.6769569494542559, + "grad_norm": 0.6067119432290606, + "learning_rate": 3.852696911403791e-06, + "loss": 0.2714, + "step": 14451 + }, + { + "epoch": 0.6770037944441842, + "grad_norm": 0.5809452224557126, + "learning_rate": 3.8525374170614875e-06, + "loss": 0.2952, + "step": 14452 + }, + { + "epoch": 0.6770506394341125, + "grad_norm": 0.5481492321638481, + "learning_rate": 3.8523779149356714e-06, + "loss": 0.2798, + "step": 14453 + }, + { + "epoch": 0.6770974844240408, + "grad_norm": 0.6116638373576584, + "learning_rate": 3.852218405027259e-06, + "loss": 0.294, + "step": 14454 + }, + { + "epoch": 0.6771443294139692, + "grad_norm": 0.677278800150084, + "learning_rate": 3.852058887337171e-06, + "loss": 0.2928, + "step": 14455 + }, + { + "epoch": 0.6771911744038975, + "grad_norm": 0.5639961706485968, + "learning_rate": 3.851899361866324e-06, + "loss": 0.2658, + "step": 14456 + }, + { + "epoch": 0.6772380193938259, + "grad_norm": 0.5776128731833425, + "learning_rate": 3.851739828615636e-06, + "loss": 0.2815, + "step": 14457 + }, + { + "epoch": 0.6772848643837541, + "grad_norm": 0.5858904562603383, + "learning_rate": 3.851580287586024e-06, + "loss": 0.2684, + "step": 14458 + }, + { + "epoch": 0.6773317093736825, + "grad_norm": 0.5366490049770974, + "learning_rate": 3.851420738778408e-06, + "loss": 0.2637, + "step": 14459 + }, + { + "epoch": 0.6773785543636108, + "grad_norm": 0.6181902012722473, + "learning_rate": 3.851261182193705e-06, + "loss": 0.2726, + "step": 14460 + }, + { + "epoch": 0.6774253993535392, + "grad_norm": 0.6039383336331607, + "learning_rate": 3.851101617832834e-06, + "loss": 0.3003, + "step": 14461 + }, + { + "epoch": 0.6774722443434674, + "grad_norm": 0.61799337135355, + "learning_rate": 3.850942045696713e-06, + "loss": 0.2867, + "step": 14462 + }, + { + "epoch": 0.6775190893333958, + "grad_norm": 0.5450971091179927, + "learning_rate": 3.850782465786259e-06, + "loss": 0.2831, + "step": 14463 + }, + { + "epoch": 0.6775659343233241, + "grad_norm": 0.5931244024607858, + "learning_rate": 3.850622878102392e-06, + "loss": 0.2902, + "step": 14464 + }, + { + "epoch": 0.6776127793132525, + "grad_norm": 0.6126929345505654, + "learning_rate": 3.85046328264603e-06, + "loss": 0.2819, + "step": 14465 + }, + { + "epoch": 0.6776596243031808, + "grad_norm": 0.5947669238394723, + "learning_rate": 3.850303679418092e-06, + "loss": 0.301, + "step": 14466 + }, + { + "epoch": 0.6777064692931091, + "grad_norm": 0.6140961048527476, + "learning_rate": 3.850144068419494e-06, + "loss": 0.2959, + "step": 14467 + }, + { + "epoch": 0.6777533142830374, + "grad_norm": 0.6016219689194837, + "learning_rate": 3.849984449651156e-06, + "loss": 0.2779, + "step": 14468 + }, + { + "epoch": 0.6778001592729658, + "grad_norm": 0.6177196674208643, + "learning_rate": 3.849824823113999e-06, + "loss": 0.2936, + "step": 14469 + }, + { + "epoch": 0.6778470042628941, + "grad_norm": 0.6003634545044776, + "learning_rate": 3.849665188808938e-06, + "loss": 0.2868, + "step": 14470 + }, + { + "epoch": 0.6778938492528224, + "grad_norm": 0.5729799310122903, + "learning_rate": 3.849505546736892e-06, + "loss": 0.2702, + "step": 14471 + }, + { + "epoch": 0.6779406942427507, + "grad_norm": 0.598265815623702, + "learning_rate": 3.849345896898783e-06, + "loss": 0.287, + "step": 14472 + }, + { + "epoch": 0.6779875392326791, + "grad_norm": 0.5735100705611419, + "learning_rate": 3.8491862392955265e-06, + "loss": 0.2646, + "step": 14473 + }, + { + "epoch": 0.6780343842226074, + "grad_norm": 0.5659604669664705, + "learning_rate": 3.849026573928042e-06, + "loss": 0.2707, + "step": 14474 + }, + { + "epoch": 0.6780812292125358, + "grad_norm": 0.6089358447154722, + "learning_rate": 3.848866900797249e-06, + "loss": 0.2871, + "step": 14475 + }, + { + "epoch": 0.678128074202464, + "grad_norm": 0.5904163273943162, + "learning_rate": 3.848707219904066e-06, + "loss": 0.2919, + "step": 14476 + }, + { + "epoch": 0.6781749191923924, + "grad_norm": 0.585629862143514, + "learning_rate": 3.848547531249412e-06, + "loss": 0.2845, + "step": 14477 + }, + { + "epoch": 0.6782217641823207, + "grad_norm": 0.5657290032863136, + "learning_rate": 3.8483878348342055e-06, + "loss": 0.2746, + "step": 14478 + }, + { + "epoch": 0.6782686091722491, + "grad_norm": 0.6114266289662286, + "learning_rate": 3.848228130659366e-06, + "loss": 0.2896, + "step": 14479 + }, + { + "epoch": 0.6783154541621773, + "grad_norm": 0.6026868513569545, + "learning_rate": 3.848068418725813e-06, + "loss": 0.2855, + "step": 14480 + }, + { + "epoch": 0.6783622991521057, + "grad_norm": 0.5555641776861274, + "learning_rate": 3.847908699034465e-06, + "loss": 0.2783, + "step": 14481 + }, + { + "epoch": 0.678409144142034, + "grad_norm": 0.645692901083203, + "learning_rate": 3.84774897158624e-06, + "loss": 0.2984, + "step": 14482 + }, + { + "epoch": 0.6784559891319624, + "grad_norm": 0.5708192337495571, + "learning_rate": 3.84758923638206e-06, + "loss": 0.2705, + "step": 14483 + }, + { + "epoch": 0.6785028341218907, + "grad_norm": 0.598757941590671, + "learning_rate": 3.847429493422842e-06, + "loss": 0.3016, + "step": 14484 + }, + { + "epoch": 0.678549679111819, + "grad_norm": 0.5843672877151961, + "learning_rate": 3.847269742709506e-06, + "loss": 0.3047, + "step": 14485 + }, + { + "epoch": 0.6785965241017473, + "grad_norm": 0.5424523949908603, + "learning_rate": 3.84710998424297e-06, + "loss": 0.2816, + "step": 14486 + }, + { + "epoch": 0.6786433690916757, + "grad_norm": 0.5254031917548422, + "learning_rate": 3.846950218024157e-06, + "loss": 0.2715, + "step": 14487 + }, + { + "epoch": 0.678690214081604, + "grad_norm": 0.6155771377866711, + "learning_rate": 3.846790444053983e-06, + "loss": 0.2996, + "step": 14488 + }, + { + "epoch": 0.6787370590715323, + "grad_norm": 0.5487882665863776, + "learning_rate": 3.846630662333369e-06, + "loss": 0.2657, + "step": 14489 + }, + { + "epoch": 0.6787839040614606, + "grad_norm": 0.6246657972065722, + "learning_rate": 3.846470872863233e-06, + "loss": 0.2861, + "step": 14490 + }, + { + "epoch": 0.678830749051389, + "grad_norm": 0.5388975538088625, + "learning_rate": 3.846311075644495e-06, + "loss": 0.2619, + "step": 14491 + }, + { + "epoch": 0.6788775940413173, + "grad_norm": 0.6474907010642634, + "learning_rate": 3.846151270678076e-06, + "loss": 0.3002, + "step": 14492 + }, + { + "epoch": 0.6789244390312457, + "grad_norm": 0.582954291663068, + "learning_rate": 3.845991457964895e-06, + "loss": 0.2624, + "step": 14493 + }, + { + "epoch": 0.6789712840211739, + "grad_norm": 0.5975328470859367, + "learning_rate": 3.845831637505872e-06, + "loss": 0.2807, + "step": 14494 + }, + { + "epoch": 0.6790181290111023, + "grad_norm": 0.5610885276236607, + "learning_rate": 3.845671809301925e-06, + "loss": 0.2854, + "step": 14495 + }, + { + "epoch": 0.6790649740010306, + "grad_norm": 0.5603919755849555, + "learning_rate": 3.845511973353975e-06, + "loss": 0.2642, + "step": 14496 + }, + { + "epoch": 0.679111818990959, + "grad_norm": 0.5977139481072254, + "learning_rate": 3.845352129662943e-06, + "loss": 0.2724, + "step": 14497 + }, + { + "epoch": 0.6791586639808872, + "grad_norm": 0.5833507479233907, + "learning_rate": 3.845192278229747e-06, + "loss": 0.2729, + "step": 14498 + }, + { + "epoch": 0.6792055089708156, + "grad_norm": 0.5236222818422631, + "learning_rate": 3.845032419055307e-06, + "loss": 0.2808, + "step": 14499 + }, + { + "epoch": 0.6792523539607439, + "grad_norm": 0.585930680602558, + "learning_rate": 3.844872552140544e-06, + "loss": 0.2878, + "step": 14500 + }, + { + "epoch": 0.6792991989506723, + "grad_norm": 0.5673437835241609, + "learning_rate": 3.844712677486377e-06, + "loss": 0.2709, + "step": 14501 + }, + { + "epoch": 0.6793460439406006, + "grad_norm": 0.5951440684030598, + "learning_rate": 3.844552795093726e-06, + "loss": 0.2903, + "step": 14502 + }, + { + "epoch": 0.6793928889305288, + "grad_norm": 0.5795805277298238, + "learning_rate": 3.844392904963513e-06, + "loss": 0.2716, + "step": 14503 + }, + { + "epoch": 0.6794397339204572, + "grad_norm": 0.5688842697357728, + "learning_rate": 3.844233007096657e-06, + "loss": 0.2707, + "step": 14504 + }, + { + "epoch": 0.6794865789103856, + "grad_norm": 0.6128616071333776, + "learning_rate": 3.844073101494077e-06, + "loss": 0.3105, + "step": 14505 + }, + { + "epoch": 0.6795334239003139, + "grad_norm": 0.603412381146089, + "learning_rate": 3.843913188156694e-06, + "loss": 0.2873, + "step": 14506 + }, + { + "epoch": 0.6795802688902421, + "grad_norm": 0.5791385991712585, + "learning_rate": 3.843753267085428e-06, + "loss": 0.2761, + "step": 14507 + }, + { + "epoch": 0.6796271138801705, + "grad_norm": 0.5492107341773403, + "learning_rate": 3.843593338281201e-06, + "loss": 0.2722, + "step": 14508 + }, + { + "epoch": 0.6796739588700988, + "grad_norm": 0.5730039168493215, + "learning_rate": 3.8434334017449314e-06, + "loss": 0.2814, + "step": 14509 + }, + { + "epoch": 0.6797208038600272, + "grad_norm": 0.5730445161913422, + "learning_rate": 3.84327345747754e-06, + "loss": 0.275, + "step": 14510 + }, + { + "epoch": 0.6797676488499556, + "grad_norm": 0.5772194223907933, + "learning_rate": 3.843113505479948e-06, + "loss": 0.2736, + "step": 14511 + }, + { + "epoch": 0.6798144938398838, + "grad_norm": 0.6082675996306007, + "learning_rate": 3.842953545753076e-06, + "loss": 0.2868, + "step": 14512 + }, + { + "epoch": 0.6798613388298121, + "grad_norm": 0.6418250000634249, + "learning_rate": 3.842793578297842e-06, + "loss": 0.2937, + "step": 14513 + }, + { + "epoch": 0.6799081838197405, + "grad_norm": 0.5636783134636544, + "learning_rate": 3.842633603115169e-06, + "loss": 0.2786, + "step": 14514 + }, + { + "epoch": 0.6799550288096688, + "grad_norm": 0.6070644532571625, + "learning_rate": 3.842473620205978e-06, + "loss": 0.2882, + "step": 14515 + }, + { + "epoch": 0.6800018737995971, + "grad_norm": 0.5614284686110182, + "learning_rate": 3.842313629571188e-06, + "loss": 0.2787, + "step": 14516 + }, + { + "epoch": 0.6800487187895254, + "grad_norm": 0.6250235686103871, + "learning_rate": 3.84215363121172e-06, + "loss": 0.3084, + "step": 14517 + }, + { + "epoch": 0.6800955637794538, + "grad_norm": 0.6402789238629492, + "learning_rate": 3.8419936251284965e-06, + "loss": 0.2911, + "step": 14518 + }, + { + "epoch": 0.6801424087693821, + "grad_norm": 0.6003292992381796, + "learning_rate": 3.841833611322436e-06, + "loss": 0.2875, + "step": 14519 + }, + { + "epoch": 0.6801892537593105, + "grad_norm": 0.53400388019951, + "learning_rate": 3.84167358979446e-06, + "loss": 0.2761, + "step": 14520 + }, + { + "epoch": 0.6802360987492387, + "grad_norm": 0.5993044428033723, + "learning_rate": 3.8415135605454905e-06, + "loss": 0.3023, + "step": 14521 + }, + { + "epoch": 0.6802829437391671, + "grad_norm": 0.5723028677394311, + "learning_rate": 3.841353523576448e-06, + "loss": 0.276, + "step": 14522 + }, + { + "epoch": 0.6803297887290954, + "grad_norm": 0.5911888016890824, + "learning_rate": 3.841193478888252e-06, + "loss": 0.286, + "step": 14523 + }, + { + "epoch": 0.6803766337190238, + "grad_norm": 0.5777848047090793, + "learning_rate": 3.8410334264818254e-06, + "loss": 0.2777, + "step": 14524 + }, + { + "epoch": 0.680423478708952, + "grad_norm": 0.615154368644923, + "learning_rate": 3.840873366358089e-06, + "loss": 0.2678, + "step": 14525 + }, + { + "epoch": 0.6804703236988804, + "grad_norm": 0.5687962711188413, + "learning_rate": 3.840713298517962e-06, + "loss": 0.2846, + "step": 14526 + }, + { + "epoch": 0.6805171686888087, + "grad_norm": 0.648775625595139, + "learning_rate": 3.8405532229623676e-06, + "loss": 0.2785, + "step": 14527 + }, + { + "epoch": 0.6805640136787371, + "grad_norm": 0.5896047483923816, + "learning_rate": 3.840393139692225e-06, + "loss": 0.2793, + "step": 14528 + }, + { + "epoch": 0.6806108586686654, + "grad_norm": 0.6199635134884384, + "learning_rate": 3.84023304870846e-06, + "loss": 0.2644, + "step": 14529 + }, + { + "epoch": 0.6806577036585937, + "grad_norm": 0.62986708869857, + "learning_rate": 3.840072950011988e-06, + "loss": 0.3001, + "step": 14530 + }, + { + "epoch": 0.680704548648522, + "grad_norm": 0.5705957641767971, + "learning_rate": 3.839912843603733e-06, + "loss": 0.2643, + "step": 14531 + }, + { + "epoch": 0.6807513936384504, + "grad_norm": 0.6456908822441481, + "learning_rate": 3.839752729484618e-06, + "loss": 0.2829, + "step": 14532 + }, + { + "epoch": 0.6807982386283787, + "grad_norm": 0.6306221743416583, + "learning_rate": 3.8395926076555624e-06, + "loss": 0.2775, + "step": 14533 + }, + { + "epoch": 0.680845083618307, + "grad_norm": 0.5831438840417194, + "learning_rate": 3.839432478117487e-06, + "loss": 0.2876, + "step": 14534 + }, + { + "epoch": 0.6808919286082353, + "grad_norm": 0.6068993091740116, + "learning_rate": 3.839272340871315e-06, + "loss": 0.2992, + "step": 14535 + }, + { + "epoch": 0.6809387735981637, + "grad_norm": 0.6780720635742509, + "learning_rate": 3.839112195917967e-06, + "loss": 0.2979, + "step": 14536 + }, + { + "epoch": 0.680985618588092, + "grad_norm": 0.606182243552605, + "learning_rate": 3.838952043258365e-06, + "loss": 0.2659, + "step": 14537 + }, + { + "epoch": 0.6810324635780204, + "grad_norm": 0.5916952531298044, + "learning_rate": 3.8387918828934304e-06, + "loss": 0.2872, + "step": 14538 + }, + { + "epoch": 0.6810793085679486, + "grad_norm": 0.5742560535150477, + "learning_rate": 3.838631714824086e-06, + "loss": 0.2803, + "step": 14539 + }, + { + "epoch": 0.681126153557877, + "grad_norm": 0.6369163826251083, + "learning_rate": 3.838471539051251e-06, + "loss": 0.316, + "step": 14540 + }, + { + "epoch": 0.6811729985478053, + "grad_norm": 0.5858823016698518, + "learning_rate": 3.838311355575849e-06, + "loss": 0.2818, + "step": 14541 + }, + { + "epoch": 0.6812198435377337, + "grad_norm": 0.6386656675133757, + "learning_rate": 3.838151164398802e-06, + "loss": 0.2897, + "step": 14542 + }, + { + "epoch": 0.6812666885276619, + "grad_norm": 0.5622746450069819, + "learning_rate": 3.837990965521032e-06, + "loss": 0.2663, + "step": 14543 + }, + { + "epoch": 0.6813135335175903, + "grad_norm": 0.5724902962329792, + "learning_rate": 3.837830758943459e-06, + "loss": 0.2776, + "step": 14544 + }, + { + "epoch": 0.6813603785075186, + "grad_norm": 0.572636686836756, + "learning_rate": 3.837670544667007e-06, + "loss": 0.2673, + "step": 14545 + }, + { + "epoch": 0.681407223497447, + "grad_norm": 0.6018460418754786, + "learning_rate": 3.837510322692598e-06, + "loss": 0.2814, + "step": 14546 + }, + { + "epoch": 0.6814540684873753, + "grad_norm": 0.5617856107149133, + "learning_rate": 3.837350093021151e-06, + "loss": 0.2767, + "step": 14547 + }, + { + "epoch": 0.6815009134773036, + "grad_norm": 0.5705433775519071, + "learning_rate": 3.837189855653592e-06, + "loss": 0.2976, + "step": 14548 + }, + { + "epoch": 0.6815477584672319, + "grad_norm": 0.5952184132036594, + "learning_rate": 3.837029610590841e-06, + "loss": 0.2893, + "step": 14549 + }, + { + "epoch": 0.6815946034571603, + "grad_norm": 0.6029061724847248, + "learning_rate": 3.836869357833821e-06, + "loss": 0.2775, + "step": 14550 + }, + { + "epoch": 0.6816414484470886, + "grad_norm": 0.5808007488036154, + "learning_rate": 3.836709097383454e-06, + "loss": 0.2809, + "step": 14551 + }, + { + "epoch": 0.6816882934370169, + "grad_norm": 0.6224006927355907, + "learning_rate": 3.836548829240662e-06, + "loss": 0.3051, + "step": 14552 + }, + { + "epoch": 0.6817351384269452, + "grad_norm": 0.6330035946885912, + "learning_rate": 3.836388553406367e-06, + "loss": 0.3026, + "step": 14553 + }, + { + "epoch": 0.6817819834168736, + "grad_norm": 0.5568549103258166, + "learning_rate": 3.836228269881491e-06, + "loss": 0.2825, + "step": 14554 + }, + { + "epoch": 0.6818288284068019, + "grad_norm": 0.637130558805172, + "learning_rate": 3.836067978666959e-06, + "loss": 0.3008, + "step": 14555 + }, + { + "epoch": 0.6818756733967302, + "grad_norm": 0.5975234055836375, + "learning_rate": 3.83590767976369e-06, + "loss": 0.2843, + "step": 14556 + }, + { + "epoch": 0.6819225183866585, + "grad_norm": 0.614777056287611, + "learning_rate": 3.835747373172609e-06, + "loss": 0.2934, + "step": 14557 + }, + { + "epoch": 0.6819693633765869, + "grad_norm": 0.5949747438104663, + "learning_rate": 3.8355870588946375e-06, + "loss": 0.2643, + "step": 14558 + }, + { + "epoch": 0.6820162083665152, + "grad_norm": 0.583517490201397, + "learning_rate": 3.835426736930697e-06, + "loss": 0.2859, + "step": 14559 + }, + { + "epoch": 0.6820630533564436, + "grad_norm": 0.629265673705006, + "learning_rate": 3.835266407281714e-06, + "loss": 0.277, + "step": 14560 + }, + { + "epoch": 0.6821098983463718, + "grad_norm": 0.6026638036228686, + "learning_rate": 3.835106069948606e-06, + "loss": 0.2669, + "step": 14561 + }, + { + "epoch": 0.6821567433363002, + "grad_norm": 0.5290013947709872, + "learning_rate": 3.8349457249322984e-06, + "loss": 0.2468, + "step": 14562 + }, + { + "epoch": 0.6822035883262285, + "grad_norm": 0.5979684841671671, + "learning_rate": 3.834785372233715e-06, + "loss": 0.2875, + "step": 14563 + }, + { + "epoch": 0.6822504333161569, + "grad_norm": 0.5776247471228525, + "learning_rate": 3.834625011853776e-06, + "loss": 0.2754, + "step": 14564 + }, + { + "epoch": 0.6822972783060851, + "grad_norm": 0.5928218227749518, + "learning_rate": 3.834464643793406e-06, + "loss": 0.2833, + "step": 14565 + }, + { + "epoch": 0.6823441232960135, + "grad_norm": 0.5836102551767071, + "learning_rate": 3.834304268053527e-06, + "loss": 0.2725, + "step": 14566 + }, + { + "epoch": 0.6823909682859418, + "grad_norm": 0.6128318316563979, + "learning_rate": 3.834143884635063e-06, + "loss": 0.2872, + "step": 14567 + }, + { + "epoch": 0.6824378132758702, + "grad_norm": 0.5880581028437357, + "learning_rate": 3.833983493538935e-06, + "loss": 0.3015, + "step": 14568 + }, + { + "epoch": 0.6824846582657985, + "grad_norm": 0.5694567839936732, + "learning_rate": 3.833823094766068e-06, + "loss": 0.2728, + "step": 14569 + }, + { + "epoch": 0.6825315032557268, + "grad_norm": 0.5422024989049324, + "learning_rate": 3.833662688317385e-06, + "loss": 0.2599, + "step": 14570 + }, + { + "epoch": 0.6825783482456551, + "grad_norm": 0.5726498387420191, + "learning_rate": 3.833502274193808e-06, + "loss": 0.2691, + "step": 14571 + }, + { + "epoch": 0.6826251932355835, + "grad_norm": 0.5858986045669637, + "learning_rate": 3.833341852396261e-06, + "loss": 0.2746, + "step": 14572 + }, + { + "epoch": 0.6826720382255118, + "grad_norm": 0.5686000894969261, + "learning_rate": 3.833181422925666e-06, + "loss": 0.2841, + "step": 14573 + }, + { + "epoch": 0.68271888321544, + "grad_norm": 0.5884771892698174, + "learning_rate": 3.833020985782947e-06, + "loss": 0.2746, + "step": 14574 + }, + { + "epoch": 0.6827657282053684, + "grad_norm": 0.5047096415580842, + "learning_rate": 3.8328605409690275e-06, + "loss": 0.2504, + "step": 14575 + }, + { + "epoch": 0.6828125731952968, + "grad_norm": 0.6728205035944983, + "learning_rate": 3.832700088484831e-06, + "loss": 0.3039, + "step": 14576 + }, + { + "epoch": 0.6828594181852251, + "grad_norm": 0.5616321747953925, + "learning_rate": 3.832539628331279e-06, + "loss": 0.286, + "step": 14577 + }, + { + "epoch": 0.6829062631751535, + "grad_norm": 0.6026977028653411, + "learning_rate": 3.832379160509298e-06, + "loss": 0.2953, + "step": 14578 + }, + { + "epoch": 0.6829531081650817, + "grad_norm": 0.5915755401298161, + "learning_rate": 3.83221868501981e-06, + "loss": 0.2772, + "step": 14579 + }, + { + "epoch": 0.68299995315501, + "grad_norm": 0.5776096345866439, + "learning_rate": 3.832058201863737e-06, + "loss": 0.2745, + "step": 14580 + }, + { + "epoch": 0.6830467981449384, + "grad_norm": 0.5552185442718925, + "learning_rate": 3.831897711042004e-06, + "loss": 0.2765, + "step": 14581 + }, + { + "epoch": 0.6830936431348668, + "grad_norm": 0.5926420922577275, + "learning_rate": 3.831737212555535e-06, + "loss": 0.2573, + "step": 14582 + }, + { + "epoch": 0.683140488124795, + "grad_norm": 0.6054695838220913, + "learning_rate": 3.831576706405253e-06, + "loss": 0.2864, + "step": 14583 + }, + { + "epoch": 0.6831873331147233, + "grad_norm": 0.6014768303581828, + "learning_rate": 3.831416192592081e-06, + "loss": 0.2748, + "step": 14584 + }, + { + "epoch": 0.6832341781046517, + "grad_norm": 0.5828882121940716, + "learning_rate": 3.831255671116945e-06, + "loss": 0.2966, + "step": 14585 + }, + { + "epoch": 0.68328102309458, + "grad_norm": 0.6072251716157933, + "learning_rate": 3.8310951419807656e-06, + "loss": 0.2753, + "step": 14586 + }, + { + "epoch": 0.6833278680845084, + "grad_norm": 0.5989736414443602, + "learning_rate": 3.830934605184469e-06, + "loss": 0.279, + "step": 14587 + }, + { + "epoch": 0.6833747130744366, + "grad_norm": 0.621303813411414, + "learning_rate": 3.830774060728978e-06, + "loss": 0.2873, + "step": 14588 + }, + { + "epoch": 0.683421558064365, + "grad_norm": 0.5903322788502225, + "learning_rate": 3.830613508615217e-06, + "loss": 0.3003, + "step": 14589 + }, + { + "epoch": 0.6834684030542933, + "grad_norm": 0.6026528892172478, + "learning_rate": 3.830452948844109e-06, + "loss": 0.2927, + "step": 14590 + }, + { + "epoch": 0.6835152480442217, + "grad_norm": 0.5833307225803852, + "learning_rate": 3.830292381416579e-06, + "loss": 0.279, + "step": 14591 + }, + { + "epoch": 0.6835620930341499, + "grad_norm": 0.6380900784171882, + "learning_rate": 3.830131806333552e-06, + "loss": 0.2986, + "step": 14592 + }, + { + "epoch": 0.6836089380240783, + "grad_norm": 0.5680357294322169, + "learning_rate": 3.829971223595949e-06, + "loss": 0.2874, + "step": 14593 + }, + { + "epoch": 0.6836557830140066, + "grad_norm": 0.5870477503201971, + "learning_rate": 3.829810633204697e-06, + "loss": 0.2656, + "step": 14594 + }, + { + "epoch": 0.683702628003935, + "grad_norm": 0.5747381900988324, + "learning_rate": 3.8296500351607195e-06, + "loss": 0.26, + "step": 14595 + }, + { + "epoch": 0.6837494729938633, + "grad_norm": 0.5656780726821262, + "learning_rate": 3.829489429464939e-06, + "loss": 0.2682, + "step": 14596 + }, + { + "epoch": 0.6837963179837916, + "grad_norm": 0.5291822512028657, + "learning_rate": 3.829328816118281e-06, + "loss": 0.2718, + "step": 14597 + }, + { + "epoch": 0.6838431629737199, + "grad_norm": 0.59318575391777, + "learning_rate": 3.82916819512167e-06, + "loss": 0.2775, + "step": 14598 + }, + { + "epoch": 0.6838900079636483, + "grad_norm": 0.5333114894967792, + "learning_rate": 3.829007566476031e-06, + "loss": 0.2639, + "step": 14599 + }, + { + "epoch": 0.6839368529535766, + "grad_norm": 0.5513851412951457, + "learning_rate": 3.828846930182286e-06, + "loss": 0.2763, + "step": 14600 + }, + { + "epoch": 0.6839836979435049, + "grad_norm": 0.6039703749738897, + "learning_rate": 3.828686286241362e-06, + "loss": 0.2708, + "step": 14601 + }, + { + "epoch": 0.6840305429334332, + "grad_norm": 0.57331627580514, + "learning_rate": 3.8285256346541825e-06, + "loss": 0.2657, + "step": 14602 + }, + { + "epoch": 0.6840773879233616, + "grad_norm": 0.6757365337561324, + "learning_rate": 3.828364975421671e-06, + "loss": 0.3114, + "step": 14603 + }, + { + "epoch": 0.6841242329132899, + "grad_norm": 0.6119036654631727, + "learning_rate": 3.828204308544753e-06, + "loss": 0.2908, + "step": 14604 + }, + { + "epoch": 0.6841710779032183, + "grad_norm": 0.601896454815759, + "learning_rate": 3.828043634024353e-06, + "loss": 0.2872, + "step": 14605 + }, + { + "epoch": 0.6842179228931465, + "grad_norm": 0.5638529970277397, + "learning_rate": 3.827882951861397e-06, + "loss": 0.2829, + "step": 14606 + }, + { + "epoch": 0.6842647678830749, + "grad_norm": 0.5481609268061005, + "learning_rate": 3.827722262056807e-06, + "loss": 0.2703, + "step": 14607 + }, + { + "epoch": 0.6843116128730032, + "grad_norm": 0.7082391003571159, + "learning_rate": 3.827561564611509e-06, + "loss": 0.2874, + "step": 14608 + }, + { + "epoch": 0.6843584578629316, + "grad_norm": 0.629230360956409, + "learning_rate": 3.827400859526429e-06, + "loss": 0.2884, + "step": 14609 + }, + { + "epoch": 0.6844053028528598, + "grad_norm": 0.5781617558372537, + "learning_rate": 3.82724014680249e-06, + "loss": 0.2827, + "step": 14610 + }, + { + "epoch": 0.6844521478427882, + "grad_norm": 0.6392720236436393, + "learning_rate": 3.827079426440617e-06, + "loss": 0.2996, + "step": 14611 + }, + { + "epoch": 0.6844989928327165, + "grad_norm": 0.583620321274968, + "learning_rate": 3.8269186984417374e-06, + "loss": 0.2892, + "step": 14612 + }, + { + "epoch": 0.6845458378226449, + "grad_norm": 0.5953137275574888, + "learning_rate": 3.826757962806773e-06, + "loss": 0.2758, + "step": 14613 + }, + { + "epoch": 0.6845926828125732, + "grad_norm": 0.6593687513497364, + "learning_rate": 3.826597219536649e-06, + "loss": 0.305, + "step": 14614 + }, + { + "epoch": 0.6846395278025015, + "grad_norm": 0.5954554950737129, + "learning_rate": 3.826436468632293e-06, + "loss": 0.2739, + "step": 14615 + }, + { + "epoch": 0.6846863727924298, + "grad_norm": 0.5653459806751964, + "learning_rate": 3.826275710094628e-06, + "loss": 0.2674, + "step": 14616 + }, + { + "epoch": 0.6847332177823582, + "grad_norm": 0.6192840780240773, + "learning_rate": 3.826114943924579e-06, + "loss": 0.2568, + "step": 14617 + }, + { + "epoch": 0.6847800627722865, + "grad_norm": 0.5398756314435368, + "learning_rate": 3.825954170123072e-06, + "loss": 0.2799, + "step": 14618 + }, + { + "epoch": 0.6848269077622148, + "grad_norm": 0.6263796554341281, + "learning_rate": 3.825793388691032e-06, + "loss": 0.2944, + "step": 14619 + }, + { + "epoch": 0.6848737527521431, + "grad_norm": 0.5457328766234528, + "learning_rate": 3.825632599629385e-06, + "loss": 0.276, + "step": 14620 + }, + { + "epoch": 0.6849205977420715, + "grad_norm": 0.5998398233923568, + "learning_rate": 3.825471802939055e-06, + "loss": 0.2836, + "step": 14621 + }, + { + "epoch": 0.6849674427319998, + "grad_norm": 0.5408575084309638, + "learning_rate": 3.825310998620968e-06, + "loss": 0.28, + "step": 14622 + }, + { + "epoch": 0.6850142877219282, + "grad_norm": 0.6014660792580594, + "learning_rate": 3.82515018667605e-06, + "loss": 0.3058, + "step": 14623 + }, + { + "epoch": 0.6850611327118564, + "grad_norm": 0.6119644410060339, + "learning_rate": 3.824989367105224e-06, + "loss": 0.2814, + "step": 14624 + }, + { + "epoch": 0.6851079777017848, + "grad_norm": 0.6158946494033298, + "learning_rate": 3.824828539909419e-06, + "loss": 0.3009, + "step": 14625 + }, + { + "epoch": 0.6851548226917131, + "grad_norm": 0.5644245973398376, + "learning_rate": 3.824667705089558e-06, + "loss": 0.2744, + "step": 14626 + }, + { + "epoch": 0.6852016676816415, + "grad_norm": 0.5719868929260833, + "learning_rate": 3.824506862646567e-06, + "loss": 0.2854, + "step": 14627 + }, + { + "epoch": 0.6852485126715697, + "grad_norm": 0.5953076334844946, + "learning_rate": 3.8243460125813725e-06, + "loss": 0.2779, + "step": 14628 + }, + { + "epoch": 0.6852953576614981, + "grad_norm": 0.5849790363294876, + "learning_rate": 3.8241851548948995e-06, + "loss": 0.2987, + "step": 14629 + }, + { + "epoch": 0.6853422026514264, + "grad_norm": 0.5989711416720942, + "learning_rate": 3.8240242895880734e-06, + "loss": 0.286, + "step": 14630 + }, + { + "epoch": 0.6853890476413548, + "grad_norm": 0.5650221640870442, + "learning_rate": 3.82386341666182e-06, + "loss": 0.2743, + "step": 14631 + }, + { + "epoch": 0.6854358926312831, + "grad_norm": 0.5723182815200023, + "learning_rate": 3.823702536117066e-06, + "loss": 0.2783, + "step": 14632 + }, + { + "epoch": 0.6854827376212114, + "grad_norm": 0.5520034955411595, + "learning_rate": 3.8235416479547365e-06, + "loss": 0.2785, + "step": 14633 + }, + { + "epoch": 0.6855295826111397, + "grad_norm": 0.6169580290002409, + "learning_rate": 3.823380752175758e-06, + "loss": 0.2905, + "step": 14634 + }, + { + "epoch": 0.6855764276010681, + "grad_norm": 0.5549380159911796, + "learning_rate": 3.823219848781054e-06, + "loss": 0.2616, + "step": 14635 + }, + { + "epoch": 0.6856232725909964, + "grad_norm": 0.6118482278605231, + "learning_rate": 3.823058937771554e-06, + "loss": 0.294, + "step": 14636 + }, + { + "epoch": 0.6856701175809247, + "grad_norm": 0.6509395676315438, + "learning_rate": 3.8228980191481825e-06, + "loss": 0.2933, + "step": 14637 + }, + { + "epoch": 0.685716962570853, + "grad_norm": 0.6316330728692795, + "learning_rate": 3.822737092911865e-06, + "loss": 0.2905, + "step": 14638 + }, + { + "epoch": 0.6857638075607814, + "grad_norm": 0.6199246309255869, + "learning_rate": 3.8225761590635275e-06, + "loss": 0.2879, + "step": 14639 + }, + { + "epoch": 0.6858106525507097, + "grad_norm": 0.6517785115185287, + "learning_rate": 3.822415217604098e-06, + "loss": 0.289, + "step": 14640 + }, + { + "epoch": 0.6858574975406381, + "grad_norm": 0.6243598999060285, + "learning_rate": 3.8222542685345e-06, + "loss": 0.2958, + "step": 14641 + }, + { + "epoch": 0.6859043425305663, + "grad_norm": 0.6129066678131758, + "learning_rate": 3.822093311855661e-06, + "loss": 0.2719, + "step": 14642 + }, + { + "epoch": 0.6859511875204947, + "grad_norm": 0.6156115788079173, + "learning_rate": 3.821932347568508e-06, + "loss": 0.2893, + "step": 14643 + }, + { + "epoch": 0.685998032510423, + "grad_norm": 0.5965340042528132, + "learning_rate": 3.821771375673966e-06, + "loss": 0.269, + "step": 14644 + }, + { + "epoch": 0.6860448775003514, + "grad_norm": 0.6434558887732443, + "learning_rate": 3.8216103961729616e-06, + "loss": 0.2748, + "step": 14645 + }, + { + "epoch": 0.6860917224902796, + "grad_norm": 0.62169051203493, + "learning_rate": 3.821449409066422e-06, + "loss": 0.2692, + "step": 14646 + }, + { + "epoch": 0.686138567480208, + "grad_norm": 0.5838017498140244, + "learning_rate": 3.821288414355273e-06, + "loss": 0.2718, + "step": 14647 + }, + { + "epoch": 0.6861854124701363, + "grad_norm": 0.6251900066506468, + "learning_rate": 3.821127412040442e-06, + "loss": 0.3049, + "step": 14648 + }, + { + "epoch": 0.6862322574600647, + "grad_norm": 0.6419313722163428, + "learning_rate": 3.820966402122855e-06, + "loss": 0.2809, + "step": 14649 + }, + { + "epoch": 0.686279102449993, + "grad_norm": 0.599063634654441, + "learning_rate": 3.820805384603437e-06, + "loss": 0.2654, + "step": 14650 + }, + { + "epoch": 0.6863259474399213, + "grad_norm": 0.6222188249457977, + "learning_rate": 3.820644359483117e-06, + "loss": 0.2953, + "step": 14651 + }, + { + "epoch": 0.6863727924298496, + "grad_norm": 0.6768881720016404, + "learning_rate": 3.820483326762821e-06, + "loss": 0.3012, + "step": 14652 + }, + { + "epoch": 0.686419637419778, + "grad_norm": 0.6228057644257303, + "learning_rate": 3.8203222864434745e-06, + "loss": 0.3068, + "step": 14653 + }, + { + "epoch": 0.6864664824097063, + "grad_norm": 0.6294170467875332, + "learning_rate": 3.8201612385260046e-06, + "loss": 0.2994, + "step": 14654 + }, + { + "epoch": 0.6865133273996346, + "grad_norm": 0.5420604542399233, + "learning_rate": 3.8200001830113394e-06, + "loss": 0.2691, + "step": 14655 + }, + { + "epoch": 0.6865601723895629, + "grad_norm": 0.5975934342948437, + "learning_rate": 3.819839119900405e-06, + "loss": 0.2835, + "step": 14656 + }, + { + "epoch": 0.6866070173794913, + "grad_norm": 0.7164513766536368, + "learning_rate": 3.819678049194128e-06, + "loss": 0.2877, + "step": 14657 + }, + { + "epoch": 0.6866538623694196, + "grad_norm": 0.6368332339626123, + "learning_rate": 3.819516970893435e-06, + "loss": 0.2803, + "step": 14658 + }, + { + "epoch": 0.686700707359348, + "grad_norm": 0.6467590673140937, + "learning_rate": 3.819355884999255e-06, + "loss": 0.2904, + "step": 14659 + }, + { + "epoch": 0.6867475523492762, + "grad_norm": 0.593812281650282, + "learning_rate": 3.819194791512512e-06, + "loss": 0.2878, + "step": 14660 + }, + { + "epoch": 0.6867943973392046, + "grad_norm": 0.6342473170130937, + "learning_rate": 3.819033690434135e-06, + "loss": 0.2796, + "step": 14661 + }, + { + "epoch": 0.6868412423291329, + "grad_norm": 0.5852081384813177, + "learning_rate": 3.818872581765051e-06, + "loss": 0.2557, + "step": 14662 + }, + { + "epoch": 0.6868880873190613, + "grad_norm": 0.6599275581389228, + "learning_rate": 3.818711465506186e-06, + "loss": 0.3191, + "step": 14663 + }, + { + "epoch": 0.6869349323089895, + "grad_norm": 0.554410432572325, + "learning_rate": 3.8185503416584685e-06, + "loss": 0.27, + "step": 14664 + }, + { + "epoch": 0.6869817772989179, + "grad_norm": 0.5915553155230474, + "learning_rate": 3.818389210222825e-06, + "loss": 0.2834, + "step": 14665 + }, + { + "epoch": 0.6870286222888462, + "grad_norm": 0.5780739346899837, + "learning_rate": 3.818228071200183e-06, + "loss": 0.274, + "step": 14666 + }, + { + "epoch": 0.6870754672787746, + "grad_norm": 0.593351596145944, + "learning_rate": 3.81806692459147e-06, + "loss": 0.293, + "step": 14667 + }, + { + "epoch": 0.6871223122687029, + "grad_norm": 0.6178200045814768, + "learning_rate": 3.817905770397612e-06, + "loss": 0.2949, + "step": 14668 + }, + { + "epoch": 0.6871691572586311, + "grad_norm": 0.6022459671941527, + "learning_rate": 3.817744608619539e-06, + "loss": 0.2897, + "step": 14669 + }, + { + "epoch": 0.6872160022485595, + "grad_norm": 0.5719447192545584, + "learning_rate": 3.817583439258177e-06, + "loss": 0.2674, + "step": 14670 + }, + { + "epoch": 0.6872628472384879, + "grad_norm": 0.5447842108795481, + "learning_rate": 3.817422262314452e-06, + "loss": 0.2716, + "step": 14671 + }, + { + "epoch": 0.6873096922284162, + "grad_norm": 0.6143085156332411, + "learning_rate": 3.817261077789295e-06, + "loss": 0.2913, + "step": 14672 + }, + { + "epoch": 0.6873565372183444, + "grad_norm": 0.5714600173142229, + "learning_rate": 3.817099885683631e-06, + "loss": 0.2727, + "step": 14673 + }, + { + "epoch": 0.6874033822082728, + "grad_norm": 0.5933878525263064, + "learning_rate": 3.816938685998387e-06, + "loss": 0.2866, + "step": 14674 + }, + { + "epoch": 0.6874502271982011, + "grad_norm": 0.6172395071436367, + "learning_rate": 3.816777478734492e-06, + "loss": 0.2921, + "step": 14675 + }, + { + "epoch": 0.6874970721881295, + "grad_norm": 0.5962304886142856, + "learning_rate": 3.816616263892875e-06, + "loss": 0.278, + "step": 14676 + }, + { + "epoch": 0.6875439171780579, + "grad_norm": 0.6642549184894224, + "learning_rate": 3.816455041474461e-06, + "loss": 0.2841, + "step": 14677 + }, + { + "epoch": 0.6875907621679861, + "grad_norm": 0.5480757994384365, + "learning_rate": 3.8162938114801795e-06, + "loss": 0.2807, + "step": 14678 + }, + { + "epoch": 0.6876376071579144, + "grad_norm": 0.5553164989111719, + "learning_rate": 3.816132573910958e-06, + "loss": 0.2667, + "step": 14679 + }, + { + "epoch": 0.6876844521478428, + "grad_norm": 0.5526681216392653, + "learning_rate": 3.815971328767725e-06, + "loss": 0.2729, + "step": 14680 + }, + { + "epoch": 0.6877312971377711, + "grad_norm": 0.5705293563002232, + "learning_rate": 3.815810076051406e-06, + "loss": 0.2706, + "step": 14681 + }, + { + "epoch": 0.6877781421276994, + "grad_norm": 0.5765283306377414, + "learning_rate": 3.815648815762932e-06, + "loss": 0.2643, + "step": 14682 + }, + { + "epoch": 0.6878249871176277, + "grad_norm": 0.666543574234102, + "learning_rate": 3.81548754790323e-06, + "loss": 0.2909, + "step": 14683 + }, + { + "epoch": 0.6878718321075561, + "grad_norm": 0.6073202862907845, + "learning_rate": 3.815326272473227e-06, + "loss": 0.2689, + "step": 14684 + }, + { + "epoch": 0.6879186770974844, + "grad_norm": 0.588230567670807, + "learning_rate": 3.8151649894738525e-06, + "loss": 0.2877, + "step": 14685 + }, + { + "epoch": 0.6879655220874128, + "grad_norm": 0.5696962290428983, + "learning_rate": 3.815003698906034e-06, + "loss": 0.2864, + "step": 14686 + }, + { + "epoch": 0.688012367077341, + "grad_norm": 0.6044565656364707, + "learning_rate": 3.8148424007707e-06, + "loss": 0.2776, + "step": 14687 + }, + { + "epoch": 0.6880592120672694, + "grad_norm": 0.6471494906885559, + "learning_rate": 3.814681095068777e-06, + "loss": 0.3083, + "step": 14688 + }, + { + "epoch": 0.6881060570571977, + "grad_norm": 0.5103862363652848, + "learning_rate": 3.814519781801196e-06, + "loss": 0.2559, + "step": 14689 + }, + { + "epoch": 0.6881529020471261, + "grad_norm": 0.562274446880649, + "learning_rate": 3.8143584609688837e-06, + "loss": 0.2786, + "step": 14690 + }, + { + "epoch": 0.6881997470370543, + "grad_norm": 0.5616520267108126, + "learning_rate": 3.8141971325727688e-06, + "loss": 0.2775, + "step": 14691 + }, + { + "epoch": 0.6882465920269827, + "grad_norm": 0.5440464266877508, + "learning_rate": 3.8140357966137796e-06, + "loss": 0.272, + "step": 14692 + }, + { + "epoch": 0.688293437016911, + "grad_norm": 0.6406806435623106, + "learning_rate": 3.813874453092845e-06, + "loss": 0.3029, + "step": 14693 + }, + { + "epoch": 0.6883402820068394, + "grad_norm": 0.6282309695691701, + "learning_rate": 3.813713102010893e-06, + "loss": 0.3134, + "step": 14694 + }, + { + "epoch": 0.6883871269967677, + "grad_norm": 0.5975442025770912, + "learning_rate": 3.813551743368853e-06, + "loss": 0.2778, + "step": 14695 + }, + { + "epoch": 0.688433971986696, + "grad_norm": 0.6350325561279758, + "learning_rate": 3.813390377167652e-06, + "loss": 0.2803, + "step": 14696 + }, + { + "epoch": 0.6884808169766243, + "grad_norm": 0.5758528925795752, + "learning_rate": 3.81322900340822e-06, + "loss": 0.2732, + "step": 14697 + }, + { + "epoch": 0.6885276619665527, + "grad_norm": 0.5856793465206195, + "learning_rate": 3.8130676220914843e-06, + "loss": 0.2496, + "step": 14698 + }, + { + "epoch": 0.688574506956481, + "grad_norm": 0.5784772048269443, + "learning_rate": 3.812906233218375e-06, + "loss": 0.2788, + "step": 14699 + }, + { + "epoch": 0.6886213519464093, + "grad_norm": 0.5440788355082733, + "learning_rate": 3.81274483678982e-06, + "loss": 0.2704, + "step": 14700 + }, + { + "epoch": 0.6886681969363376, + "grad_norm": 0.610963407277692, + "learning_rate": 3.812583432806749e-06, + "loss": 0.2807, + "step": 14701 + }, + { + "epoch": 0.688715041926266, + "grad_norm": 0.5680697648762345, + "learning_rate": 3.8124220212700903e-06, + "loss": 0.2433, + "step": 14702 + }, + { + "epoch": 0.6887618869161943, + "grad_norm": 0.6024900879919279, + "learning_rate": 3.8122606021807716e-06, + "loss": 0.2934, + "step": 14703 + }, + { + "epoch": 0.6888087319061227, + "grad_norm": 0.5701894861210526, + "learning_rate": 3.8120991755397243e-06, + "loss": 0.2691, + "step": 14704 + }, + { + "epoch": 0.6888555768960509, + "grad_norm": 0.5740831891376227, + "learning_rate": 3.8119377413478755e-06, + "loss": 0.2921, + "step": 14705 + }, + { + "epoch": 0.6889024218859793, + "grad_norm": 0.6697308276139605, + "learning_rate": 3.8117762996061543e-06, + "loss": 0.2898, + "step": 14706 + }, + { + "epoch": 0.6889492668759076, + "grad_norm": 0.650212655940473, + "learning_rate": 3.8116148503154905e-06, + "loss": 0.2805, + "step": 14707 + }, + { + "epoch": 0.688996111865836, + "grad_norm": 0.5868975302038891, + "learning_rate": 3.811453393476813e-06, + "loss": 0.2788, + "step": 14708 + }, + { + "epoch": 0.6890429568557642, + "grad_norm": 0.5822409771904692, + "learning_rate": 3.811291929091051e-06, + "loss": 0.281, + "step": 14709 + }, + { + "epoch": 0.6890898018456926, + "grad_norm": 0.5762493273953923, + "learning_rate": 3.8111304571591334e-06, + "loss": 0.2767, + "step": 14710 + }, + { + "epoch": 0.6891366468356209, + "grad_norm": 0.5605503688835002, + "learning_rate": 3.81096897768199e-06, + "loss": 0.2593, + "step": 14711 + }, + { + "epoch": 0.6891834918255493, + "grad_norm": 0.5793782149147945, + "learning_rate": 3.8108074906605488e-06, + "loss": 0.2689, + "step": 14712 + }, + { + "epoch": 0.6892303368154776, + "grad_norm": 0.590111803484475, + "learning_rate": 3.8106459960957405e-06, + "loss": 0.301, + "step": 14713 + }, + { + "epoch": 0.6892771818054059, + "grad_norm": 0.5369329197537727, + "learning_rate": 3.810484493988494e-06, + "loss": 0.269, + "step": 14714 + }, + { + "epoch": 0.6893240267953342, + "grad_norm": 0.5988531639702097, + "learning_rate": 3.8103229843397383e-06, + "loss": 0.2865, + "step": 14715 + }, + { + "epoch": 0.6893708717852626, + "grad_norm": 0.6031197383379582, + "learning_rate": 3.8101614671504026e-06, + "loss": 0.3077, + "step": 14716 + }, + { + "epoch": 0.6894177167751909, + "grad_norm": 0.5731797912119354, + "learning_rate": 3.8099999424214175e-06, + "loss": 0.2801, + "step": 14717 + }, + { + "epoch": 0.6894645617651192, + "grad_norm": 0.6034638658537619, + "learning_rate": 3.809838410153712e-06, + "loss": 0.2798, + "step": 14718 + }, + { + "epoch": 0.6895114067550475, + "grad_norm": 0.5634282797029719, + "learning_rate": 3.8096768703482164e-06, + "loss": 0.2804, + "step": 14719 + }, + { + "epoch": 0.6895582517449759, + "grad_norm": 0.643153998978431, + "learning_rate": 3.8095153230058584e-06, + "loss": 0.3042, + "step": 14720 + }, + { + "epoch": 0.6896050967349042, + "grad_norm": 0.5939375994493185, + "learning_rate": 3.8093537681275694e-06, + "loss": 0.29, + "step": 14721 + }, + { + "epoch": 0.6896519417248326, + "grad_norm": 0.6166888366454685, + "learning_rate": 3.809192205714279e-06, + "loss": 0.2878, + "step": 14722 + }, + { + "epoch": 0.6896987867147608, + "grad_norm": 0.586130978454405, + "learning_rate": 3.809030635766916e-06, + "loss": 0.2735, + "step": 14723 + }, + { + "epoch": 0.6897456317046892, + "grad_norm": 0.6157839739680433, + "learning_rate": 3.80886905828641e-06, + "loss": 0.2989, + "step": 14724 + }, + { + "epoch": 0.6897924766946175, + "grad_norm": 0.63302644307388, + "learning_rate": 3.808707473273693e-06, + "loss": 0.2909, + "step": 14725 + }, + { + "epoch": 0.6898393216845459, + "grad_norm": 0.5793476938141695, + "learning_rate": 3.8085458807296927e-06, + "loss": 0.2691, + "step": 14726 + }, + { + "epoch": 0.6898861666744741, + "grad_norm": 0.5923605098185295, + "learning_rate": 3.8083842806553396e-06, + "loss": 0.266, + "step": 14727 + }, + { + "epoch": 0.6899330116644025, + "grad_norm": 0.5785344843574197, + "learning_rate": 3.8082226730515638e-06, + "loss": 0.2735, + "step": 14728 + }, + { + "epoch": 0.6899798566543308, + "grad_norm": 0.5960916810835506, + "learning_rate": 3.8080610579192955e-06, + "loss": 0.2833, + "step": 14729 + }, + { + "epoch": 0.6900267016442592, + "grad_norm": 0.6274166586472185, + "learning_rate": 3.8078994352594646e-06, + "loss": 0.2966, + "step": 14730 + }, + { + "epoch": 0.6900735466341875, + "grad_norm": 0.5644902458058086, + "learning_rate": 3.807737805073001e-06, + "loss": 0.2623, + "step": 14731 + }, + { + "epoch": 0.6901203916241158, + "grad_norm": 0.6076088407877627, + "learning_rate": 3.8075761673608354e-06, + "loss": 0.2812, + "step": 14732 + }, + { + "epoch": 0.6901672366140441, + "grad_norm": 0.5457755335610862, + "learning_rate": 3.807414522123897e-06, + "loss": 0.2635, + "step": 14733 + }, + { + "epoch": 0.6902140816039725, + "grad_norm": 0.6042634908280597, + "learning_rate": 3.807252869363117e-06, + "loss": 0.2739, + "step": 14734 + }, + { + "epoch": 0.6902609265939008, + "grad_norm": 0.6045202963446574, + "learning_rate": 3.8070912090794256e-06, + "loss": 0.2819, + "step": 14735 + }, + { + "epoch": 0.690307771583829, + "grad_norm": 0.5936563821501412, + "learning_rate": 3.8069295412737524e-06, + "loss": 0.2938, + "step": 14736 + }, + { + "epoch": 0.6903546165737574, + "grad_norm": 0.5349615742275329, + "learning_rate": 3.8067678659470285e-06, + "loss": 0.2644, + "step": 14737 + }, + { + "epoch": 0.6904014615636858, + "grad_norm": 0.5423599765727861, + "learning_rate": 3.8066061831001834e-06, + "loss": 0.259, + "step": 14738 + }, + { + "epoch": 0.6904483065536141, + "grad_norm": 0.5995287223331077, + "learning_rate": 3.806444492734148e-06, + "loss": 0.2679, + "step": 14739 + }, + { + "epoch": 0.6904951515435425, + "grad_norm": 0.5961496924315449, + "learning_rate": 3.806282794849854e-06, + "loss": 0.2899, + "step": 14740 + }, + { + "epoch": 0.6905419965334707, + "grad_norm": 0.5814744347474667, + "learning_rate": 3.8061210894482304e-06, + "loss": 0.2861, + "step": 14741 + }, + { + "epoch": 0.690588841523399, + "grad_norm": 0.5897491108333907, + "learning_rate": 3.8059593765302076e-06, + "loss": 0.2648, + "step": 14742 + }, + { + "epoch": 0.6906356865133274, + "grad_norm": 0.5840398491005099, + "learning_rate": 3.805797656096718e-06, + "loss": 0.3106, + "step": 14743 + }, + { + "epoch": 0.6906825315032558, + "grad_norm": 0.5847734931237435, + "learning_rate": 3.8056359281486898e-06, + "loss": 0.3006, + "step": 14744 + }, + { + "epoch": 0.690729376493184, + "grad_norm": 0.5961037965609259, + "learning_rate": 3.8054741926870553e-06, + "loss": 0.2806, + "step": 14745 + }, + { + "epoch": 0.6907762214831124, + "grad_norm": 0.5816055123416366, + "learning_rate": 3.805312449712746e-06, + "loss": 0.2802, + "step": 14746 + }, + { + "epoch": 0.6908230664730407, + "grad_norm": 0.5939778116865173, + "learning_rate": 3.8051506992266906e-06, + "loss": 0.2965, + "step": 14747 + }, + { + "epoch": 0.690869911462969, + "grad_norm": 0.590695278047719, + "learning_rate": 3.8049889412298204e-06, + "loss": 0.2551, + "step": 14748 + }, + { + "epoch": 0.6909167564528974, + "grad_norm": 0.5794886181480305, + "learning_rate": 3.804827175723068e-06, + "loss": 0.2929, + "step": 14749 + }, + { + "epoch": 0.6909636014428256, + "grad_norm": 0.6040432102570763, + "learning_rate": 3.8046654027073625e-06, + "loss": 0.2884, + "step": 14750 + }, + { + "epoch": 0.691010446432754, + "grad_norm": 0.6095359193534879, + "learning_rate": 3.8045036221836363e-06, + "loss": 0.2888, + "step": 14751 + }, + { + "epoch": 0.6910572914226824, + "grad_norm": 0.5768435661603026, + "learning_rate": 3.8043418341528192e-06, + "loss": 0.2719, + "step": 14752 + }, + { + "epoch": 0.6911041364126107, + "grad_norm": 0.5704668313067932, + "learning_rate": 3.8041800386158422e-06, + "loss": 0.2928, + "step": 14753 + }, + { + "epoch": 0.6911509814025389, + "grad_norm": 0.5764347855303616, + "learning_rate": 3.8040182355736375e-06, + "loss": 0.2793, + "step": 14754 + }, + { + "epoch": 0.6911978263924673, + "grad_norm": 0.5366601430113269, + "learning_rate": 3.8038564250271355e-06, + "loss": 0.2861, + "step": 14755 + }, + { + "epoch": 0.6912446713823956, + "grad_norm": 0.5906837647894928, + "learning_rate": 3.8036946069772675e-06, + "loss": 0.2994, + "step": 14756 + }, + { + "epoch": 0.691291516372324, + "grad_norm": 0.5925322856251873, + "learning_rate": 3.8035327814249644e-06, + "loss": 0.2767, + "step": 14757 + }, + { + "epoch": 0.6913383613622524, + "grad_norm": 0.5707687394583574, + "learning_rate": 3.803370948371158e-06, + "loss": 0.2847, + "step": 14758 + }, + { + "epoch": 0.6913852063521806, + "grad_norm": 0.6232123482510072, + "learning_rate": 3.8032091078167786e-06, + "loss": 0.2905, + "step": 14759 + }, + { + "epoch": 0.6914320513421089, + "grad_norm": 0.562939070947084, + "learning_rate": 3.80304725976276e-06, + "loss": 0.2707, + "step": 14760 + }, + { + "epoch": 0.6914788963320373, + "grad_norm": 0.5943386155836771, + "learning_rate": 3.8028854042100303e-06, + "loss": 0.2613, + "step": 14761 + }, + { + "epoch": 0.6915257413219656, + "grad_norm": 0.6105968130771292, + "learning_rate": 3.802723541159523e-06, + "loss": 0.2866, + "step": 14762 + }, + { + "epoch": 0.6915725863118939, + "grad_norm": 0.545575124904537, + "learning_rate": 3.8025616706121705e-06, + "loss": 0.2683, + "step": 14763 + }, + { + "epoch": 0.6916194313018222, + "grad_norm": 0.5769863998004175, + "learning_rate": 3.802399792568901e-06, + "loss": 0.2774, + "step": 14764 + }, + { + "epoch": 0.6916662762917506, + "grad_norm": 0.5674868870928927, + "learning_rate": 3.8022379070306492e-06, + "loss": 0.2868, + "step": 14765 + }, + { + "epoch": 0.6917131212816789, + "grad_norm": 0.6269786231060309, + "learning_rate": 3.802076013998345e-06, + "loss": 0.2926, + "step": 14766 + }, + { + "epoch": 0.6917599662716073, + "grad_norm": 0.6244624503193763, + "learning_rate": 3.8019141134729208e-06, + "loss": 0.2923, + "step": 14767 + }, + { + "epoch": 0.6918068112615355, + "grad_norm": 0.5526953067083329, + "learning_rate": 3.8017522054553085e-06, + "loss": 0.2732, + "step": 14768 + }, + { + "epoch": 0.6918536562514639, + "grad_norm": 0.5630936977891237, + "learning_rate": 3.801590289946439e-06, + "loss": 0.2584, + "step": 14769 + }, + { + "epoch": 0.6919005012413922, + "grad_norm": 0.5563585934302541, + "learning_rate": 3.8014283669472453e-06, + "loss": 0.262, + "step": 14770 + }, + { + "epoch": 0.6919473462313206, + "grad_norm": 0.5673656562297711, + "learning_rate": 3.801266436458657e-06, + "loss": 0.271, + "step": 14771 + }, + { + "epoch": 0.6919941912212488, + "grad_norm": 0.5700248924040994, + "learning_rate": 3.801104498481608e-06, + "loss": 0.2719, + "step": 14772 + }, + { + "epoch": 0.6920410362111772, + "grad_norm": 0.6148090916760125, + "learning_rate": 3.800942553017031e-06, + "loss": 0.2688, + "step": 14773 + }, + { + "epoch": 0.6920878812011055, + "grad_norm": 0.6184883403035749, + "learning_rate": 3.8007806000658556e-06, + "loss": 0.2879, + "step": 14774 + }, + { + "epoch": 0.6921347261910339, + "grad_norm": 0.5767152616421638, + "learning_rate": 3.8006186396290135e-06, + "loss": 0.2695, + "step": 14775 + }, + { + "epoch": 0.6921815711809622, + "grad_norm": 0.5908291713170243, + "learning_rate": 3.800456671707439e-06, + "loss": 0.2768, + "step": 14776 + }, + { + "epoch": 0.6922284161708905, + "grad_norm": 0.5955788144487374, + "learning_rate": 3.800294696302064e-06, + "loss": 0.2776, + "step": 14777 + }, + { + "epoch": 0.6922752611608188, + "grad_norm": 0.5867979478325044, + "learning_rate": 3.800132713413819e-06, + "loss": 0.2732, + "step": 14778 + }, + { + "epoch": 0.6923221061507472, + "grad_norm": 0.5444860284455597, + "learning_rate": 3.799970723043637e-06, + "loss": 0.2683, + "step": 14779 + }, + { + "epoch": 0.6923689511406755, + "grad_norm": 0.603797691671667, + "learning_rate": 3.7998087251924508e-06, + "loss": 0.2845, + "step": 14780 + }, + { + "epoch": 0.6924157961306038, + "grad_norm": 0.5895306195293829, + "learning_rate": 3.799646719861192e-06, + "loss": 0.275, + "step": 14781 + }, + { + "epoch": 0.6924626411205321, + "grad_norm": 0.6206535206736368, + "learning_rate": 3.7994847070507922e-06, + "loss": 0.2983, + "step": 14782 + }, + { + "epoch": 0.6925094861104605, + "grad_norm": 0.6054417871837481, + "learning_rate": 3.7993226867621845e-06, + "loss": 0.2944, + "step": 14783 + }, + { + "epoch": 0.6925563311003888, + "grad_norm": 0.5363975615550055, + "learning_rate": 3.799160658996303e-06, + "loss": 0.274, + "step": 14784 + }, + { + "epoch": 0.6926031760903172, + "grad_norm": 0.6242761129247045, + "learning_rate": 3.7989986237540767e-06, + "loss": 0.2766, + "step": 14785 + }, + { + "epoch": 0.6926500210802454, + "grad_norm": 0.579465379584607, + "learning_rate": 3.79883658103644e-06, + "loss": 0.289, + "step": 14786 + }, + { + "epoch": 0.6926968660701738, + "grad_norm": 0.5660776523331809, + "learning_rate": 3.7986745308443257e-06, + "loss": 0.2889, + "step": 14787 + }, + { + "epoch": 0.6927437110601021, + "grad_norm": 0.5850821732676966, + "learning_rate": 3.798512473178666e-06, + "loss": 0.3029, + "step": 14788 + }, + { + "epoch": 0.6927905560500305, + "grad_norm": 0.5762797950644084, + "learning_rate": 3.7983504080403933e-06, + "loss": 0.287, + "step": 14789 + }, + { + "epoch": 0.6928374010399587, + "grad_norm": 0.5419354000688976, + "learning_rate": 3.79818833543044e-06, + "loss": 0.2607, + "step": 14790 + }, + { + "epoch": 0.6928842460298871, + "grad_norm": 0.5927417328198635, + "learning_rate": 3.7980262553497394e-06, + "loss": 0.2831, + "step": 14791 + }, + { + "epoch": 0.6929310910198154, + "grad_norm": 0.6029656804606303, + "learning_rate": 3.797864167799224e-06, + "loss": 0.2972, + "step": 14792 + }, + { + "epoch": 0.6929779360097438, + "grad_norm": 0.5912231367888016, + "learning_rate": 3.7977020727798264e-06, + "loss": 0.3019, + "step": 14793 + }, + { + "epoch": 0.6930247809996721, + "grad_norm": 0.5847138660132772, + "learning_rate": 3.7975399702924788e-06, + "loss": 0.281, + "step": 14794 + }, + { + "epoch": 0.6930716259896004, + "grad_norm": 0.6403245006073143, + "learning_rate": 3.797377860338116e-06, + "loss": 0.3097, + "step": 14795 + }, + { + "epoch": 0.6931184709795287, + "grad_norm": 0.5842372355829544, + "learning_rate": 3.797215742917669e-06, + "loss": 0.2952, + "step": 14796 + }, + { + "epoch": 0.6931653159694571, + "grad_norm": 0.5540693172778699, + "learning_rate": 3.7970536180320718e-06, + "loss": 0.2859, + "step": 14797 + }, + { + "epoch": 0.6932121609593854, + "grad_norm": 0.6236618863207765, + "learning_rate": 3.796891485682257e-06, + "loss": 0.2802, + "step": 14798 + }, + { + "epoch": 0.6932590059493137, + "grad_norm": 0.5767403214457645, + "learning_rate": 3.796729345869158e-06, + "loss": 0.2956, + "step": 14799 + }, + { + "epoch": 0.693305850939242, + "grad_norm": 0.5684652163982404, + "learning_rate": 3.796567198593706e-06, + "loss": 0.277, + "step": 14800 + }, + { + "epoch": 0.6933526959291704, + "grad_norm": 0.6066705922119524, + "learning_rate": 3.7964050438568373e-06, + "loss": 0.2705, + "step": 14801 + }, + { + "epoch": 0.6933995409190987, + "grad_norm": 0.6148547997180955, + "learning_rate": 3.796242881659483e-06, + "loss": 0.2784, + "step": 14802 + }, + { + "epoch": 0.6934463859090271, + "grad_norm": 0.5644143229500553, + "learning_rate": 3.7960807120025765e-06, + "loss": 0.2852, + "step": 14803 + }, + { + "epoch": 0.6934932308989553, + "grad_norm": 0.5395825552579264, + "learning_rate": 3.7959185348870515e-06, + "loss": 0.2568, + "step": 14804 + }, + { + "epoch": 0.6935400758888837, + "grad_norm": 0.6112241474984879, + "learning_rate": 3.7957563503138405e-06, + "loss": 0.2846, + "step": 14805 + }, + { + "epoch": 0.693586920878812, + "grad_norm": 0.6209427034960122, + "learning_rate": 3.795594158283878e-06, + "loss": 0.2769, + "step": 14806 + }, + { + "epoch": 0.6936337658687404, + "grad_norm": 0.5740145720789106, + "learning_rate": 3.7954319587980963e-06, + "loss": 0.2705, + "step": 14807 + }, + { + "epoch": 0.6936806108586686, + "grad_norm": 0.6099239931153955, + "learning_rate": 3.7952697518574294e-06, + "loss": 0.2681, + "step": 14808 + }, + { + "epoch": 0.693727455848597, + "grad_norm": 0.6068461009956838, + "learning_rate": 3.795107537462811e-06, + "loss": 0.2934, + "step": 14809 + }, + { + "epoch": 0.6937743008385253, + "grad_norm": 0.550125956161878, + "learning_rate": 3.794945315615174e-06, + "loss": 0.2713, + "step": 14810 + }, + { + "epoch": 0.6938211458284537, + "grad_norm": 0.5598767087201518, + "learning_rate": 3.7947830863154523e-06, + "loss": 0.2778, + "step": 14811 + }, + { + "epoch": 0.693867990818382, + "grad_norm": 0.6607359758604641, + "learning_rate": 3.7946208495645786e-06, + "loss": 0.2801, + "step": 14812 + }, + { + "epoch": 0.6939148358083103, + "grad_norm": 0.5530135340576592, + "learning_rate": 3.7944586053634884e-06, + "loss": 0.2714, + "step": 14813 + }, + { + "epoch": 0.6939616807982386, + "grad_norm": 0.6070845706503154, + "learning_rate": 3.7942963537131137e-06, + "loss": 0.2828, + "step": 14814 + }, + { + "epoch": 0.694008525788167, + "grad_norm": 0.5897185129033802, + "learning_rate": 3.7941340946143886e-06, + "loss": 0.3058, + "step": 14815 + }, + { + "epoch": 0.6940553707780953, + "grad_norm": 0.5872338088680336, + "learning_rate": 3.793971828068248e-06, + "loss": 0.2642, + "step": 14816 + }, + { + "epoch": 0.6941022157680236, + "grad_norm": 0.535831472942768, + "learning_rate": 3.7938095540756237e-06, + "loss": 0.2543, + "step": 14817 + }, + { + "epoch": 0.6941490607579519, + "grad_norm": 0.6088549630468083, + "learning_rate": 3.7936472726374507e-06, + "loss": 0.2865, + "step": 14818 + }, + { + "epoch": 0.6941959057478803, + "grad_norm": 0.5662082149913003, + "learning_rate": 3.7934849837546628e-06, + "loss": 0.2881, + "step": 14819 + }, + { + "epoch": 0.6942427507378086, + "grad_norm": 0.5953985914404492, + "learning_rate": 3.7933226874281946e-06, + "loss": 0.2899, + "step": 14820 + }, + { + "epoch": 0.694289595727737, + "grad_norm": 0.7105744770846236, + "learning_rate": 3.793160383658979e-06, + "loss": 0.2933, + "step": 14821 + }, + { + "epoch": 0.6943364407176652, + "grad_norm": 0.5940086219902284, + "learning_rate": 3.7929980724479506e-06, + "loss": 0.2756, + "step": 14822 + }, + { + "epoch": 0.6943832857075936, + "grad_norm": 0.5782585552857381, + "learning_rate": 3.792835753796043e-06, + "loss": 0.2689, + "step": 14823 + }, + { + "epoch": 0.6944301306975219, + "grad_norm": 0.6234808693136076, + "learning_rate": 3.7926734277041906e-06, + "loss": 0.2766, + "step": 14824 + }, + { + "epoch": 0.6944769756874503, + "grad_norm": 0.5637970690423099, + "learning_rate": 3.7925110941733266e-06, + "loss": 0.2727, + "step": 14825 + }, + { + "epoch": 0.6945238206773785, + "grad_norm": 0.5760930644092889, + "learning_rate": 3.792348753204388e-06, + "loss": 0.275, + "step": 14826 + }, + { + "epoch": 0.6945706656673069, + "grad_norm": 0.5635819857639321, + "learning_rate": 3.792186404798306e-06, + "loss": 0.277, + "step": 14827 + }, + { + "epoch": 0.6946175106572352, + "grad_norm": 0.5751785343183831, + "learning_rate": 3.7920240489560163e-06, + "loss": 0.2858, + "step": 14828 + }, + { + "epoch": 0.6946643556471636, + "grad_norm": 0.5626954551035253, + "learning_rate": 3.791861685678453e-06, + "loss": 0.2779, + "step": 14829 + }, + { + "epoch": 0.6947112006370919, + "grad_norm": 0.574409725288256, + "learning_rate": 3.7916993149665505e-06, + "loss": 0.2715, + "step": 14830 + }, + { + "epoch": 0.6947580456270201, + "grad_norm": 0.5963126052101837, + "learning_rate": 3.791536936821243e-06, + "loss": 0.2791, + "step": 14831 + }, + { + "epoch": 0.6948048906169485, + "grad_norm": 0.5396645400885763, + "learning_rate": 3.7913745512434647e-06, + "loss": 0.2566, + "step": 14832 + }, + { + "epoch": 0.6948517356068769, + "grad_norm": 0.6136592844597932, + "learning_rate": 3.791212158234151e-06, + "loss": 0.306, + "step": 14833 + }, + { + "epoch": 0.6948985805968052, + "grad_norm": 0.661119864020262, + "learning_rate": 3.791049757794235e-06, + "loss": 0.2844, + "step": 14834 + }, + { + "epoch": 0.6949454255867334, + "grad_norm": 0.6035805241391257, + "learning_rate": 3.790887349924653e-06, + "loss": 0.2797, + "step": 14835 + }, + { + "epoch": 0.6949922705766618, + "grad_norm": 0.5581957043029663, + "learning_rate": 3.790724934626338e-06, + "loss": 0.3033, + "step": 14836 + }, + { + "epoch": 0.6950391155665901, + "grad_norm": 0.6311302731529932, + "learning_rate": 3.7905625119002264e-06, + "loss": 0.2918, + "step": 14837 + }, + { + "epoch": 0.6950859605565185, + "grad_norm": 0.6160436023063277, + "learning_rate": 3.7904000817472507e-06, + "loss": 0.2916, + "step": 14838 + }, + { + "epoch": 0.6951328055464469, + "grad_norm": 0.5734862420096148, + "learning_rate": 3.7902376441683477e-06, + "loss": 0.274, + "step": 14839 + }, + { + "epoch": 0.6951796505363751, + "grad_norm": 0.5328232941718816, + "learning_rate": 3.7900751991644513e-06, + "loss": 0.2701, + "step": 14840 + }, + { + "epoch": 0.6952264955263034, + "grad_norm": 0.6492506963131284, + "learning_rate": 3.7899127467364966e-06, + "loss": 0.2853, + "step": 14841 + }, + { + "epoch": 0.6952733405162318, + "grad_norm": 0.6077580783938547, + "learning_rate": 3.7897502868854175e-06, + "loss": 0.2871, + "step": 14842 + }, + { + "epoch": 0.6953201855061601, + "grad_norm": 0.5434791498040236, + "learning_rate": 3.7895878196121504e-06, + "loss": 0.2998, + "step": 14843 + }, + { + "epoch": 0.6953670304960884, + "grad_norm": 0.6465030978227514, + "learning_rate": 3.7894253449176286e-06, + "loss": 0.2856, + "step": 14844 + }, + { + "epoch": 0.6954138754860167, + "grad_norm": 0.6897943923531082, + "learning_rate": 3.789262862802789e-06, + "loss": 0.286, + "step": 14845 + }, + { + "epoch": 0.6954607204759451, + "grad_norm": 0.6322661996179786, + "learning_rate": 3.7891003732685648e-06, + "loss": 0.2791, + "step": 14846 + }, + { + "epoch": 0.6955075654658734, + "grad_norm": 0.6612814918640593, + "learning_rate": 3.7889378763158933e-06, + "loss": 0.3146, + "step": 14847 + }, + { + "epoch": 0.6955544104558018, + "grad_norm": 0.5880791753524454, + "learning_rate": 3.788775371945707e-06, + "loss": 0.2851, + "step": 14848 + }, + { + "epoch": 0.69560125544573, + "grad_norm": 0.6191286171774482, + "learning_rate": 3.7886128601589424e-06, + "loss": 0.284, + "step": 14849 + }, + { + "epoch": 0.6956481004356584, + "grad_norm": 0.5286346150996211, + "learning_rate": 3.7884503409565353e-06, + "loss": 0.2618, + "step": 14850 + }, + { + "epoch": 0.6956949454255867, + "grad_norm": 0.5793037844082941, + "learning_rate": 3.7882878143394204e-06, + "loss": 0.2588, + "step": 14851 + }, + { + "epoch": 0.6957417904155151, + "grad_norm": 0.5523295704100643, + "learning_rate": 3.788125280308532e-06, + "loss": 0.275, + "step": 14852 + }, + { + "epoch": 0.6957886354054433, + "grad_norm": 0.6614930540113052, + "learning_rate": 3.7879627388648076e-06, + "loss": 0.3202, + "step": 14853 + }, + { + "epoch": 0.6958354803953717, + "grad_norm": 0.6091175985726417, + "learning_rate": 3.787800190009181e-06, + "loss": 0.2871, + "step": 14854 + }, + { + "epoch": 0.6958823253853, + "grad_norm": 0.6333181762824183, + "learning_rate": 3.787637633742587e-06, + "loss": 0.2813, + "step": 14855 + }, + { + "epoch": 0.6959291703752284, + "grad_norm": 0.6277926324257082, + "learning_rate": 3.787475070065963e-06, + "loss": 0.2891, + "step": 14856 + }, + { + "epoch": 0.6959760153651567, + "grad_norm": 0.6329706381375477, + "learning_rate": 3.787312498980243e-06, + "loss": 0.2935, + "step": 14857 + }, + { + "epoch": 0.696022860355085, + "grad_norm": 0.6244834914690548, + "learning_rate": 3.7871499204863637e-06, + "loss": 0.3013, + "step": 14858 + }, + { + "epoch": 0.6960697053450133, + "grad_norm": 0.5980897089038119, + "learning_rate": 3.7869873345852603e-06, + "loss": 0.2921, + "step": 14859 + }, + { + "epoch": 0.6961165503349417, + "grad_norm": 0.6110447607590825, + "learning_rate": 3.786824741277867e-06, + "loss": 0.2784, + "step": 14860 + }, + { + "epoch": 0.69616339532487, + "grad_norm": 0.6145639162808861, + "learning_rate": 3.7866621405651217e-06, + "loss": 0.2661, + "step": 14861 + }, + { + "epoch": 0.6962102403147983, + "grad_norm": 0.5552659699107249, + "learning_rate": 3.7864995324479594e-06, + "loss": 0.2663, + "step": 14862 + }, + { + "epoch": 0.6962570853047266, + "grad_norm": 0.5849802955316723, + "learning_rate": 3.786336916927315e-06, + "loss": 0.3026, + "step": 14863 + }, + { + "epoch": 0.696303930294655, + "grad_norm": 0.6131048623118197, + "learning_rate": 3.7861742940041246e-06, + "loss": 0.2756, + "step": 14864 + }, + { + "epoch": 0.6963507752845833, + "grad_norm": 0.5900946735487118, + "learning_rate": 3.7860116636793267e-06, + "loss": 0.2953, + "step": 14865 + }, + { + "epoch": 0.6963976202745117, + "grad_norm": 0.633128615080831, + "learning_rate": 3.785849025953853e-06, + "loss": 0.2937, + "step": 14866 + }, + { + "epoch": 0.6964444652644399, + "grad_norm": 0.6554069210791824, + "learning_rate": 3.7856863808286402e-06, + "loss": 0.2867, + "step": 14867 + }, + { + "epoch": 0.6964913102543683, + "grad_norm": 0.5851197895467618, + "learning_rate": 3.7855237283046275e-06, + "loss": 0.2901, + "step": 14868 + }, + { + "epoch": 0.6965381552442966, + "grad_norm": 0.6130831990568567, + "learning_rate": 3.785361068382748e-06, + "loss": 0.2701, + "step": 14869 + }, + { + "epoch": 0.696585000234225, + "grad_norm": 0.6341004116082113, + "learning_rate": 3.7851984010639385e-06, + "loss": 0.2953, + "step": 14870 + }, + { + "epoch": 0.6966318452241532, + "grad_norm": 0.5906819604560636, + "learning_rate": 3.7850357263491355e-06, + "loss": 0.2827, + "step": 14871 + }, + { + "epoch": 0.6966786902140816, + "grad_norm": 0.6226499702026783, + "learning_rate": 3.7848730442392754e-06, + "loss": 0.3085, + "step": 14872 + }, + { + "epoch": 0.6967255352040099, + "grad_norm": 0.5631571177481028, + "learning_rate": 3.7847103547352932e-06, + "loss": 0.2718, + "step": 14873 + }, + { + "epoch": 0.6967723801939383, + "grad_norm": 0.5860087249159416, + "learning_rate": 3.784547657838126e-06, + "loss": 0.2591, + "step": 14874 + }, + { + "epoch": 0.6968192251838666, + "grad_norm": 0.6458065623247269, + "learning_rate": 3.7843849535487097e-06, + "loss": 0.2741, + "step": 14875 + }, + { + "epoch": 0.6968660701737949, + "grad_norm": 0.5740700146992848, + "learning_rate": 3.7842222418679807e-06, + "loss": 0.2729, + "step": 14876 + }, + { + "epoch": 0.6969129151637232, + "grad_norm": 0.6042703799396303, + "learning_rate": 3.784059522796876e-06, + "loss": 0.2902, + "step": 14877 + }, + { + "epoch": 0.6969597601536516, + "grad_norm": 0.5887037382961089, + "learning_rate": 3.783896796336331e-06, + "loss": 0.2647, + "step": 14878 + }, + { + "epoch": 0.6970066051435799, + "grad_norm": 0.5618068029232949, + "learning_rate": 3.783734062487283e-06, + "loss": 0.2651, + "step": 14879 + }, + { + "epoch": 0.6970534501335082, + "grad_norm": 0.5583418568151411, + "learning_rate": 3.7835713212506685e-06, + "loss": 0.2627, + "step": 14880 + }, + { + "epoch": 0.6971002951234365, + "grad_norm": 0.623667299138535, + "learning_rate": 3.783408572627423e-06, + "loss": 0.2947, + "step": 14881 + }, + { + "epoch": 0.6971471401133649, + "grad_norm": 0.6517160582839474, + "learning_rate": 3.7832458166184838e-06, + "loss": 0.2636, + "step": 14882 + }, + { + "epoch": 0.6971939851032932, + "grad_norm": 0.6391840635784217, + "learning_rate": 3.7830830532247874e-06, + "loss": 0.2697, + "step": 14883 + }, + { + "epoch": 0.6972408300932216, + "grad_norm": 0.5771711363827798, + "learning_rate": 3.7829202824472706e-06, + "loss": 0.293, + "step": 14884 + }, + { + "epoch": 0.6972876750831498, + "grad_norm": 0.5980493247579522, + "learning_rate": 3.7827575042868698e-06, + "loss": 0.2648, + "step": 14885 + }, + { + "epoch": 0.6973345200730782, + "grad_norm": 0.6198601119272766, + "learning_rate": 3.7825947187445226e-06, + "loss": 0.2857, + "step": 14886 + }, + { + "epoch": 0.6973813650630065, + "grad_norm": 0.597920682970382, + "learning_rate": 3.782431925821165e-06, + "loss": 0.2714, + "step": 14887 + }, + { + "epoch": 0.6974282100529349, + "grad_norm": 0.5661617573964386, + "learning_rate": 3.7822691255177335e-06, + "loss": 0.2789, + "step": 14888 + }, + { + "epoch": 0.6974750550428631, + "grad_norm": 0.5992309725736211, + "learning_rate": 3.7821063178351657e-06, + "loss": 0.2694, + "step": 14889 + }, + { + "epoch": 0.6975219000327915, + "grad_norm": 0.6479156890122233, + "learning_rate": 3.7819435027743984e-06, + "loss": 0.284, + "step": 14890 + }, + { + "epoch": 0.6975687450227198, + "grad_norm": 0.5747354822917411, + "learning_rate": 3.7817806803363684e-06, + "loss": 0.2621, + "step": 14891 + }, + { + "epoch": 0.6976155900126482, + "grad_norm": 0.5397710117912657, + "learning_rate": 3.7816178505220125e-06, + "loss": 0.2531, + "step": 14892 + }, + { + "epoch": 0.6976624350025765, + "grad_norm": 0.5501272737771495, + "learning_rate": 3.7814550133322685e-06, + "loss": 0.2668, + "step": 14893 + }, + { + "epoch": 0.6977092799925048, + "grad_norm": 0.5368667338815217, + "learning_rate": 3.7812921687680726e-06, + "loss": 0.2722, + "step": 14894 + }, + { + "epoch": 0.6977561249824331, + "grad_norm": 0.6123894088475016, + "learning_rate": 3.7811293168303618e-06, + "loss": 0.292, + "step": 14895 + }, + { + "epoch": 0.6978029699723615, + "grad_norm": 0.5484125314228341, + "learning_rate": 3.7809664575200753e-06, + "loss": 0.2661, + "step": 14896 + }, + { + "epoch": 0.6978498149622898, + "grad_norm": 0.5830560571781784, + "learning_rate": 3.780803590838148e-06, + "loss": 0.2746, + "step": 14897 + }, + { + "epoch": 0.6978966599522181, + "grad_norm": 0.5843555290694002, + "learning_rate": 3.7806407167855173e-06, + "loss": 0.2797, + "step": 14898 + }, + { + "epoch": 0.6979435049421464, + "grad_norm": 0.585317061906005, + "learning_rate": 3.7804778353631216e-06, + "loss": 0.2864, + "step": 14899 + }, + { + "epoch": 0.6979903499320748, + "grad_norm": 0.639599014502365, + "learning_rate": 3.780314946571898e-06, + "loss": 0.2838, + "step": 14900 + }, + { + "epoch": 0.6980371949220031, + "grad_norm": 0.5748334286825847, + "learning_rate": 3.780152050412783e-06, + "loss": 0.2573, + "step": 14901 + }, + { + "epoch": 0.6980840399119315, + "grad_norm": 0.5394219949127885, + "learning_rate": 3.7799891468867156e-06, + "loss": 0.2722, + "step": 14902 + }, + { + "epoch": 0.6981308849018597, + "grad_norm": 0.599424347533483, + "learning_rate": 3.7798262359946324e-06, + "loss": 0.2888, + "step": 14903 + }, + { + "epoch": 0.6981777298917881, + "grad_norm": 0.5395573405889671, + "learning_rate": 3.7796633177374703e-06, + "loss": 0.2813, + "step": 14904 + }, + { + "epoch": 0.6982245748817164, + "grad_norm": 0.6207658030482973, + "learning_rate": 3.7795003921161677e-06, + "loss": 0.2916, + "step": 14905 + }, + { + "epoch": 0.6982714198716448, + "grad_norm": 0.5413984694532248, + "learning_rate": 3.7793374591316624e-06, + "loss": 0.2753, + "step": 14906 + }, + { + "epoch": 0.698318264861573, + "grad_norm": 0.552882505455665, + "learning_rate": 3.7791745187848906e-06, + "loss": 0.2857, + "step": 14907 + }, + { + "epoch": 0.6983651098515014, + "grad_norm": 0.5796433692083175, + "learning_rate": 3.7790115710767915e-06, + "loss": 0.2887, + "step": 14908 + }, + { + "epoch": 0.6984119548414297, + "grad_norm": 0.5774950843119953, + "learning_rate": 3.778848616008302e-06, + "loss": 0.2688, + "step": 14909 + }, + { + "epoch": 0.6984587998313581, + "grad_norm": 0.5883856925204081, + "learning_rate": 3.7786856535803606e-06, + "loss": 0.2858, + "step": 14910 + }, + { + "epoch": 0.6985056448212864, + "grad_norm": 0.5846356421765339, + "learning_rate": 3.778522683793905e-06, + "loss": 0.274, + "step": 14911 + }, + { + "epoch": 0.6985524898112146, + "grad_norm": 0.5584686694393407, + "learning_rate": 3.778359706649871e-06, + "loss": 0.2661, + "step": 14912 + }, + { + "epoch": 0.698599334801143, + "grad_norm": 0.6183997314843954, + "learning_rate": 3.7781967221492e-06, + "loss": 0.2835, + "step": 14913 + }, + { + "epoch": 0.6986461797910714, + "grad_norm": 0.5900373403957896, + "learning_rate": 3.778033730292827e-06, + "loss": 0.2853, + "step": 14914 + }, + { + "epoch": 0.6986930247809997, + "grad_norm": 0.5112420085630541, + "learning_rate": 3.777870731081692e-06, + "loss": 0.2675, + "step": 14915 + }, + { + "epoch": 0.698739869770928, + "grad_norm": 0.5758763567720606, + "learning_rate": 3.7777077245167315e-06, + "loss": 0.2719, + "step": 14916 + }, + { + "epoch": 0.6987867147608563, + "grad_norm": 0.5656290964419334, + "learning_rate": 3.777544710598885e-06, + "loss": 0.2839, + "step": 14917 + }, + { + "epoch": 0.6988335597507846, + "grad_norm": 0.5915390838732845, + "learning_rate": 3.7773816893290884e-06, + "loss": 0.2901, + "step": 14918 + }, + { + "epoch": 0.698880404740713, + "grad_norm": 0.60947287868944, + "learning_rate": 3.7772186607082817e-06, + "loss": 0.304, + "step": 14919 + }, + { + "epoch": 0.6989272497306414, + "grad_norm": 0.6260673445928332, + "learning_rate": 3.777055624737403e-06, + "loss": 0.286, + "step": 14920 + }, + { + "epoch": 0.6989740947205696, + "grad_norm": 0.5326666502414854, + "learning_rate": 3.77689258141739e-06, + "loss": 0.2728, + "step": 14921 + }, + { + "epoch": 0.699020939710498, + "grad_norm": 0.5711968439501355, + "learning_rate": 3.7767295307491815e-06, + "loss": 0.2821, + "step": 14922 + }, + { + "epoch": 0.6990677847004263, + "grad_norm": 0.6088382173863685, + "learning_rate": 3.7765664727337147e-06, + "loss": 0.2726, + "step": 14923 + }, + { + "epoch": 0.6991146296903546, + "grad_norm": 0.5771962709327012, + "learning_rate": 3.776403407371929e-06, + "loss": 0.2756, + "step": 14924 + }, + { + "epoch": 0.6991614746802829, + "grad_norm": 0.586708576728897, + "learning_rate": 3.7762403346647624e-06, + "loss": 0.2754, + "step": 14925 + }, + { + "epoch": 0.6992083196702112, + "grad_norm": 0.5782223816712335, + "learning_rate": 3.776077254613154e-06, + "loss": 0.2626, + "step": 14926 + }, + { + "epoch": 0.6992551646601396, + "grad_norm": 0.5594995209051151, + "learning_rate": 3.775914167218041e-06, + "loss": 0.2757, + "step": 14927 + }, + { + "epoch": 0.699302009650068, + "grad_norm": 0.5622235142038698, + "learning_rate": 3.7757510724803637e-06, + "loss": 0.2775, + "step": 14928 + }, + { + "epoch": 0.6993488546399963, + "grad_norm": 0.5768566217905116, + "learning_rate": 3.7755879704010588e-06, + "loss": 0.2637, + "step": 14929 + }, + { + "epoch": 0.6993956996299245, + "grad_norm": 0.5776122280840101, + "learning_rate": 3.775424860981066e-06, + "loss": 0.2783, + "step": 14930 + }, + { + "epoch": 0.6994425446198529, + "grad_norm": 0.5437190206886998, + "learning_rate": 3.7752617442213236e-06, + "loss": 0.2603, + "step": 14931 + }, + { + "epoch": 0.6994893896097812, + "grad_norm": 0.5805109135740104, + "learning_rate": 3.7750986201227704e-06, + "loss": 0.2655, + "step": 14932 + }, + { + "epoch": 0.6995362345997096, + "grad_norm": 0.6139932214562611, + "learning_rate": 3.774935488686345e-06, + "loss": 0.2871, + "step": 14933 + }, + { + "epoch": 0.6995830795896378, + "grad_norm": 0.6476006369699978, + "learning_rate": 3.774772349912986e-06, + "loss": 0.2806, + "step": 14934 + }, + { + "epoch": 0.6996299245795662, + "grad_norm": 0.6778786248425731, + "learning_rate": 3.774609203803634e-06, + "loss": 0.2867, + "step": 14935 + }, + { + "epoch": 0.6996767695694945, + "grad_norm": 0.5966736434717927, + "learning_rate": 3.774446050359225e-06, + "loss": 0.3052, + "step": 14936 + }, + { + "epoch": 0.6997236145594229, + "grad_norm": 0.5824074354213554, + "learning_rate": 3.774282889580699e-06, + "loss": 0.2761, + "step": 14937 + }, + { + "epoch": 0.6997704595493512, + "grad_norm": 0.5895132715034392, + "learning_rate": 3.774119721468996e-06, + "loss": 0.2787, + "step": 14938 + }, + { + "epoch": 0.6998173045392795, + "grad_norm": 0.5877793485289602, + "learning_rate": 3.7739565460250543e-06, + "loss": 0.2975, + "step": 14939 + }, + { + "epoch": 0.6998641495292078, + "grad_norm": 0.591025317371045, + "learning_rate": 3.7737933632498124e-06, + "loss": 0.2714, + "step": 14940 + }, + { + "epoch": 0.6999109945191362, + "grad_norm": 0.5956960036431799, + "learning_rate": 3.7736301731442106e-06, + "loss": 0.2845, + "step": 14941 + }, + { + "epoch": 0.6999578395090645, + "grad_norm": 0.5485661630450488, + "learning_rate": 3.773466975709187e-06, + "loss": 0.2693, + "step": 14942 + }, + { + "epoch": 0.7000046844989928, + "grad_norm": 0.5745346771424469, + "learning_rate": 3.7733037709456804e-06, + "loss": 0.29, + "step": 14943 + }, + { + "epoch": 0.7000515294889211, + "grad_norm": 0.5879677645317154, + "learning_rate": 3.773140558854631e-06, + "loss": 0.2812, + "step": 14944 + }, + { + "epoch": 0.7000983744788495, + "grad_norm": 0.5715754068477505, + "learning_rate": 3.7729773394369786e-06, + "loss": 0.2771, + "step": 14945 + }, + { + "epoch": 0.7001452194687778, + "grad_norm": 0.5987444691771083, + "learning_rate": 3.7728141126936603e-06, + "loss": 0.2716, + "step": 14946 + }, + { + "epoch": 0.7001920644587062, + "grad_norm": 0.5450155020240142, + "learning_rate": 3.772650878625617e-06, + "loss": 0.2816, + "step": 14947 + }, + { + "epoch": 0.7002389094486344, + "grad_norm": 0.5876814351633161, + "learning_rate": 3.7724876372337877e-06, + "loss": 0.2841, + "step": 14948 + }, + { + "epoch": 0.7002857544385628, + "grad_norm": 0.6018396617716991, + "learning_rate": 3.772324388519112e-06, + "loss": 0.3139, + "step": 14949 + }, + { + "epoch": 0.7003325994284911, + "grad_norm": 0.5448703012541644, + "learning_rate": 3.77216113248253e-06, + "loss": 0.2769, + "step": 14950 + }, + { + "epoch": 0.7003794444184195, + "grad_norm": 0.6327948112422526, + "learning_rate": 3.771997869124979e-06, + "loss": 0.2957, + "step": 14951 + }, + { + "epoch": 0.7004262894083477, + "grad_norm": 0.639808095106606, + "learning_rate": 3.7718345984474007e-06, + "loss": 0.2952, + "step": 14952 + }, + { + "epoch": 0.7004731343982761, + "grad_norm": 0.5684732163806246, + "learning_rate": 3.7716713204507346e-06, + "loss": 0.2855, + "step": 14953 + }, + { + "epoch": 0.7005199793882044, + "grad_norm": 0.5551487803395292, + "learning_rate": 3.7715080351359186e-06, + "loss": 0.2786, + "step": 14954 + }, + { + "epoch": 0.7005668243781328, + "grad_norm": 0.5925083857451366, + "learning_rate": 3.771344742503893e-06, + "loss": 0.3093, + "step": 14955 + }, + { + "epoch": 0.7006136693680611, + "grad_norm": 0.5783113125343429, + "learning_rate": 3.7711814425555996e-06, + "loss": 0.2953, + "step": 14956 + }, + { + "epoch": 0.7006605143579894, + "grad_norm": 0.5251033596303264, + "learning_rate": 3.7710181352919754e-06, + "loss": 0.2591, + "step": 14957 + }, + { + "epoch": 0.7007073593479177, + "grad_norm": 0.596195454806211, + "learning_rate": 3.7708548207139617e-06, + "loss": 0.2897, + "step": 14958 + }, + { + "epoch": 0.7007542043378461, + "grad_norm": 0.5635833826997075, + "learning_rate": 3.7706914988224974e-06, + "loss": 0.2852, + "step": 14959 + }, + { + "epoch": 0.7008010493277744, + "grad_norm": 0.5435371943789961, + "learning_rate": 3.7705281696185235e-06, + "loss": 0.2715, + "step": 14960 + }, + { + "epoch": 0.7008478943177027, + "grad_norm": 0.6134530578369526, + "learning_rate": 3.770364833102979e-06, + "loss": 0.2936, + "step": 14961 + }, + { + "epoch": 0.700894739307631, + "grad_norm": 0.5829159986865601, + "learning_rate": 3.7702014892768046e-06, + "loss": 0.2697, + "step": 14962 + }, + { + "epoch": 0.7009415842975594, + "grad_norm": 0.6482749723669153, + "learning_rate": 3.77003813814094e-06, + "loss": 0.3084, + "step": 14963 + }, + { + "epoch": 0.7009884292874877, + "grad_norm": 0.5693891763387208, + "learning_rate": 3.7698747796963244e-06, + "loss": 0.2744, + "step": 14964 + }, + { + "epoch": 0.7010352742774161, + "grad_norm": 0.664200078440164, + "learning_rate": 3.7697114139438982e-06, + "loss": 0.3234, + "step": 14965 + }, + { + "epoch": 0.7010821192673443, + "grad_norm": 0.625379535712288, + "learning_rate": 3.7695480408846034e-06, + "loss": 0.2822, + "step": 14966 + }, + { + "epoch": 0.7011289642572727, + "grad_norm": 0.5589016435011008, + "learning_rate": 3.769384660519378e-06, + "loss": 0.2673, + "step": 14967 + }, + { + "epoch": 0.701175809247201, + "grad_norm": 0.599464067710604, + "learning_rate": 3.7692212728491623e-06, + "loss": 0.2852, + "step": 14968 + }, + { + "epoch": 0.7012226542371294, + "grad_norm": 0.5951478296915542, + "learning_rate": 3.769057877874898e-06, + "loss": 0.2898, + "step": 14969 + }, + { + "epoch": 0.7012694992270576, + "grad_norm": 0.5498388055683116, + "learning_rate": 3.768894475597524e-06, + "loss": 0.2733, + "step": 14970 + }, + { + "epoch": 0.701316344216986, + "grad_norm": 0.554496770700727, + "learning_rate": 3.768731066017982e-06, + "loss": 0.2796, + "step": 14971 + }, + { + "epoch": 0.7013631892069143, + "grad_norm": 0.5897720228905361, + "learning_rate": 3.7685676491372114e-06, + "loss": 0.3019, + "step": 14972 + }, + { + "epoch": 0.7014100341968427, + "grad_norm": 0.6044788446491407, + "learning_rate": 3.7684042249561527e-06, + "loss": 0.2874, + "step": 14973 + }, + { + "epoch": 0.701456879186771, + "grad_norm": 0.5866751772223024, + "learning_rate": 3.768240793475746e-06, + "loss": 0.2797, + "step": 14974 + }, + { + "epoch": 0.7015037241766993, + "grad_norm": 0.6138651771800405, + "learning_rate": 3.768077354696933e-06, + "loss": 0.2921, + "step": 14975 + }, + { + "epoch": 0.7015505691666276, + "grad_norm": 0.5927762085141598, + "learning_rate": 3.767913908620653e-06, + "loss": 0.2985, + "step": 14976 + }, + { + "epoch": 0.701597414156556, + "grad_norm": 0.5840644495645828, + "learning_rate": 3.767750455247847e-06, + "loss": 0.2761, + "step": 14977 + }, + { + "epoch": 0.7016442591464843, + "grad_norm": 0.6015086401641211, + "learning_rate": 3.7675869945794563e-06, + "loss": 0.2836, + "step": 14978 + }, + { + "epoch": 0.7016911041364126, + "grad_norm": 0.5794807022676287, + "learning_rate": 3.7674235266164203e-06, + "loss": 0.2679, + "step": 14979 + }, + { + "epoch": 0.7017379491263409, + "grad_norm": 0.597724295343655, + "learning_rate": 3.767260051359682e-06, + "loss": 0.2793, + "step": 14980 + }, + { + "epoch": 0.7017847941162693, + "grad_norm": 0.6302038186058748, + "learning_rate": 3.7670965688101795e-06, + "loss": 0.3017, + "step": 14981 + }, + { + "epoch": 0.7018316391061976, + "grad_norm": 0.5718288061838609, + "learning_rate": 3.7669330789688547e-06, + "loss": 0.2817, + "step": 14982 + }, + { + "epoch": 0.701878484096126, + "grad_norm": 0.6070500677741356, + "learning_rate": 3.766769581836649e-06, + "loss": 0.2725, + "step": 14983 + }, + { + "epoch": 0.7019253290860542, + "grad_norm": 0.5846297906208153, + "learning_rate": 3.7666060774145023e-06, + "loss": 0.2765, + "step": 14984 + }, + { + "epoch": 0.7019721740759826, + "grad_norm": 0.6144702243330138, + "learning_rate": 3.7664425657033556e-06, + "loss": 0.2793, + "step": 14985 + }, + { + "epoch": 0.7020190190659109, + "grad_norm": 0.5845443686255034, + "learning_rate": 3.7662790467041506e-06, + "loss": 0.2869, + "step": 14986 + }, + { + "epoch": 0.7020658640558393, + "grad_norm": 0.542825664556767, + "learning_rate": 3.766115520417829e-06, + "loss": 0.2784, + "step": 14987 + }, + { + "epoch": 0.7021127090457675, + "grad_norm": 0.6074444630553755, + "learning_rate": 3.7659519868453293e-06, + "loss": 0.2916, + "step": 14988 + }, + { + "epoch": 0.7021595540356959, + "grad_norm": 0.5781122220063327, + "learning_rate": 3.765788445987594e-06, + "loss": 0.2809, + "step": 14989 + }, + { + "epoch": 0.7022063990256242, + "grad_norm": 0.5613531910202141, + "learning_rate": 3.7656248978455655e-06, + "loss": 0.2859, + "step": 14990 + }, + { + "epoch": 0.7022532440155526, + "grad_norm": 0.6172363375033877, + "learning_rate": 3.7654613424201838e-06, + "loss": 0.3119, + "step": 14991 + }, + { + "epoch": 0.7023000890054809, + "grad_norm": 0.5663580225811132, + "learning_rate": 3.765297779712389e-06, + "loss": 0.2835, + "step": 14992 + }, + { + "epoch": 0.7023469339954092, + "grad_norm": 0.6114593010709695, + "learning_rate": 3.765134209723125e-06, + "loss": 0.3072, + "step": 14993 + }, + { + "epoch": 0.7023937789853375, + "grad_norm": 0.5961030192723527, + "learning_rate": 3.7649706324533305e-06, + "loss": 0.2667, + "step": 14994 + }, + { + "epoch": 0.7024406239752659, + "grad_norm": 0.5639286569237186, + "learning_rate": 3.764807047903948e-06, + "loss": 0.273, + "step": 14995 + }, + { + "epoch": 0.7024874689651942, + "grad_norm": 0.5717555827356996, + "learning_rate": 3.7646434560759187e-06, + "loss": 0.2961, + "step": 14996 + }, + { + "epoch": 0.7025343139551224, + "grad_norm": 0.5781767914921212, + "learning_rate": 3.7644798569701847e-06, + "loss": 0.2864, + "step": 14997 + }, + { + "epoch": 0.7025811589450508, + "grad_norm": 0.592849360305199, + "learning_rate": 3.7643162505876863e-06, + "loss": 0.272, + "step": 14998 + }, + { + "epoch": 0.7026280039349792, + "grad_norm": 0.5651778859543273, + "learning_rate": 3.7641526369293667e-06, + "loss": 0.2728, + "step": 14999 + }, + { + "epoch": 0.7026748489249075, + "grad_norm": 0.5717060732282593, + "learning_rate": 3.7639890159961657e-06, + "loss": 0.2642, + "step": 15000 + }, + { + "epoch": 0.7027216939148359, + "grad_norm": 0.6781270225593915, + "learning_rate": 3.7638253877890253e-06, + "loss": 0.3078, + "step": 15001 + }, + { + "epoch": 0.7027685389047641, + "grad_norm": 0.6142818286208918, + "learning_rate": 3.763661752308888e-06, + "loss": 0.2795, + "step": 15002 + }, + { + "epoch": 0.7028153838946924, + "grad_norm": 0.5611548965795929, + "learning_rate": 3.763498109556695e-06, + "loss": 0.275, + "step": 15003 + }, + { + "epoch": 0.7028622288846208, + "grad_norm": 0.6278158823479946, + "learning_rate": 3.763334459533387e-06, + "loss": 0.3118, + "step": 15004 + }, + { + "epoch": 0.7029090738745492, + "grad_norm": 0.5890206411727745, + "learning_rate": 3.763170802239908e-06, + "loss": 0.2661, + "step": 15005 + }, + { + "epoch": 0.7029559188644774, + "grad_norm": 0.5413514420287932, + "learning_rate": 3.7630071376771984e-06, + "loss": 0.251, + "step": 15006 + }, + { + "epoch": 0.7030027638544057, + "grad_norm": 0.6091414906380983, + "learning_rate": 3.762843465846199e-06, + "loss": 0.3035, + "step": 15007 + }, + { + "epoch": 0.7030496088443341, + "grad_norm": 0.5806110305718103, + "learning_rate": 3.7626797867478536e-06, + "loss": 0.2897, + "step": 15008 + }, + { + "epoch": 0.7030964538342624, + "grad_norm": 0.6036027825258623, + "learning_rate": 3.7625161003831036e-06, + "loss": 0.299, + "step": 15009 + }, + { + "epoch": 0.7031432988241908, + "grad_norm": 0.5916642835290143, + "learning_rate": 3.7623524067528904e-06, + "loss": 0.2963, + "step": 15010 + }, + { + "epoch": 0.703190143814119, + "grad_norm": 0.5836840088962437, + "learning_rate": 3.7621887058581564e-06, + "loss": 0.268, + "step": 15011 + }, + { + "epoch": 0.7032369888040474, + "grad_norm": 0.5615708818517605, + "learning_rate": 3.7620249976998437e-06, + "loss": 0.2791, + "step": 15012 + }, + { + "epoch": 0.7032838337939757, + "grad_norm": 0.6296155140061145, + "learning_rate": 3.761861282278894e-06, + "loss": 0.3044, + "step": 15013 + }, + { + "epoch": 0.7033306787839041, + "grad_norm": 0.6233879608157942, + "learning_rate": 3.7616975595962507e-06, + "loss": 0.2631, + "step": 15014 + }, + { + "epoch": 0.7033775237738323, + "grad_norm": 0.6419515680787227, + "learning_rate": 3.7615338296528546e-06, + "loss": 0.2992, + "step": 15015 + }, + { + "epoch": 0.7034243687637607, + "grad_norm": 0.6402545003706871, + "learning_rate": 3.7613700924496475e-06, + "loss": 0.3196, + "step": 15016 + }, + { + "epoch": 0.703471213753689, + "grad_norm": 0.6147110753509235, + "learning_rate": 3.761206347987574e-06, + "loss": 0.2846, + "step": 15017 + }, + { + "epoch": 0.7035180587436174, + "grad_norm": 0.5776089417300776, + "learning_rate": 3.761042596267574e-06, + "loss": 0.2975, + "step": 15018 + }, + { + "epoch": 0.7035649037335457, + "grad_norm": 0.5524436817432516, + "learning_rate": 3.7608788372905912e-06, + "loss": 0.2693, + "step": 15019 + }, + { + "epoch": 0.703611748723474, + "grad_norm": 0.5989346692089016, + "learning_rate": 3.760715071057568e-06, + "loss": 0.2778, + "step": 15020 + }, + { + "epoch": 0.7036585937134023, + "grad_norm": 0.6231488305207288, + "learning_rate": 3.7605512975694457e-06, + "loss": 0.3154, + "step": 15021 + }, + { + "epoch": 0.7037054387033307, + "grad_norm": 0.6201768870489494, + "learning_rate": 3.7603875168271675e-06, + "loss": 0.2837, + "step": 15022 + }, + { + "epoch": 0.703752283693259, + "grad_norm": 0.7207334980628111, + "learning_rate": 3.7602237288316768e-06, + "loss": 0.2876, + "step": 15023 + }, + { + "epoch": 0.7037991286831873, + "grad_norm": 0.6033345080966376, + "learning_rate": 3.760059933583914e-06, + "loss": 0.2807, + "step": 15024 + }, + { + "epoch": 0.7038459736731156, + "grad_norm": 0.5606193481026547, + "learning_rate": 3.759896131084824e-06, + "loss": 0.2618, + "step": 15025 + }, + { + "epoch": 0.703892818663044, + "grad_norm": 0.5343563092255046, + "learning_rate": 3.759732321335348e-06, + "loss": 0.2692, + "step": 15026 + }, + { + "epoch": 0.7039396636529723, + "grad_norm": 0.6243264437626439, + "learning_rate": 3.7595685043364293e-06, + "loss": 0.281, + "step": 15027 + }, + { + "epoch": 0.7039865086429007, + "grad_norm": 0.6114243232162938, + "learning_rate": 3.75940468008901e-06, + "loss": 0.2881, + "step": 15028 + }, + { + "epoch": 0.7040333536328289, + "grad_norm": 0.6535878065398575, + "learning_rate": 3.7592408485940334e-06, + "loss": 0.3169, + "step": 15029 + }, + { + "epoch": 0.7040801986227573, + "grad_norm": 0.577791030199981, + "learning_rate": 3.759077009852443e-06, + "loss": 0.2861, + "step": 15030 + }, + { + "epoch": 0.7041270436126856, + "grad_norm": 0.5722046424712949, + "learning_rate": 3.75891316386518e-06, + "loss": 0.2755, + "step": 15031 + }, + { + "epoch": 0.704173888602614, + "grad_norm": 0.5727048872602003, + "learning_rate": 3.7587493106331885e-06, + "loss": 0.2863, + "step": 15032 + }, + { + "epoch": 0.7042207335925422, + "grad_norm": 0.6181003001733418, + "learning_rate": 3.7585854501574116e-06, + "loss": 0.2889, + "step": 15033 + }, + { + "epoch": 0.7042675785824706, + "grad_norm": 0.5503494789259397, + "learning_rate": 3.75842158243879e-06, + "loss": 0.2731, + "step": 15034 + }, + { + "epoch": 0.7043144235723989, + "grad_norm": 0.610330184138104, + "learning_rate": 3.75825770747827e-06, + "loss": 0.2825, + "step": 15035 + }, + { + "epoch": 0.7043612685623273, + "grad_norm": 0.5824215206716142, + "learning_rate": 3.758093825276794e-06, + "loss": 0.271, + "step": 15036 + }, + { + "epoch": 0.7044081135522556, + "grad_norm": 0.5726507479780989, + "learning_rate": 3.7579299358353027e-06, + "loss": 0.2775, + "step": 15037 + }, + { + "epoch": 0.7044549585421839, + "grad_norm": 0.5504071835738883, + "learning_rate": 3.757766039154741e-06, + "loss": 0.2673, + "step": 15038 + }, + { + "epoch": 0.7045018035321122, + "grad_norm": 0.57308137531439, + "learning_rate": 3.757602135236052e-06, + "loss": 0.2763, + "step": 15039 + }, + { + "epoch": 0.7045486485220406, + "grad_norm": 0.5836269270374358, + "learning_rate": 3.757438224080179e-06, + "loss": 0.2915, + "step": 15040 + }, + { + "epoch": 0.7045954935119689, + "grad_norm": 0.5682710223204519, + "learning_rate": 3.7572743056880646e-06, + "loss": 0.2625, + "step": 15041 + }, + { + "epoch": 0.7046423385018972, + "grad_norm": 0.6162423162819004, + "learning_rate": 3.7571103800606534e-06, + "loss": 0.2835, + "step": 15042 + }, + { + "epoch": 0.7046891834918255, + "grad_norm": 0.5862709085167112, + "learning_rate": 3.7569464471988876e-06, + "loss": 0.286, + "step": 15043 + }, + { + "epoch": 0.7047360284817539, + "grad_norm": 0.5712666501806102, + "learning_rate": 3.7567825071037102e-06, + "loss": 0.2717, + "step": 15044 + }, + { + "epoch": 0.7047828734716822, + "grad_norm": 0.6064806337374696, + "learning_rate": 3.7566185597760662e-06, + "loss": 0.2944, + "step": 15045 + }, + { + "epoch": 0.7048297184616106, + "grad_norm": 0.5398691809131115, + "learning_rate": 3.7564546052168975e-06, + "loss": 0.2805, + "step": 15046 + }, + { + "epoch": 0.7048765634515388, + "grad_norm": 0.5847454376141382, + "learning_rate": 3.7562906434271494e-06, + "loss": 0.2636, + "step": 15047 + }, + { + "epoch": 0.7049234084414672, + "grad_norm": 0.6263535048593274, + "learning_rate": 3.756126674407764e-06, + "loss": 0.282, + "step": 15048 + }, + { + "epoch": 0.7049702534313955, + "grad_norm": 0.5723628700412469, + "learning_rate": 3.755962698159684e-06, + "loss": 0.3022, + "step": 15049 + }, + { + "epoch": 0.7050170984213239, + "grad_norm": 0.6860420477417115, + "learning_rate": 3.7557987146838558e-06, + "loss": 0.3075, + "step": 15050 + }, + { + "epoch": 0.7050639434112521, + "grad_norm": 0.6151292381636055, + "learning_rate": 3.755634723981222e-06, + "loss": 0.2956, + "step": 15051 + }, + { + "epoch": 0.7051107884011805, + "grad_norm": 0.5706109785569136, + "learning_rate": 3.7554707260527246e-06, + "loss": 0.2708, + "step": 15052 + }, + { + "epoch": 0.7051576333911088, + "grad_norm": 0.6294938450250481, + "learning_rate": 3.75530672089931e-06, + "loss": 0.2829, + "step": 15053 + }, + { + "epoch": 0.7052044783810372, + "grad_norm": 0.5561603192505084, + "learning_rate": 3.75514270852192e-06, + "loss": 0.2766, + "step": 15054 + }, + { + "epoch": 0.7052513233709655, + "grad_norm": 0.6071957732527604, + "learning_rate": 3.754978688921499e-06, + "loss": 0.2942, + "step": 15055 + }, + { + "epoch": 0.7052981683608938, + "grad_norm": 0.592491670574105, + "learning_rate": 3.754814662098991e-06, + "loss": 0.2894, + "step": 15056 + }, + { + "epoch": 0.7053450133508221, + "grad_norm": 0.5810457099788886, + "learning_rate": 3.7546506280553408e-06, + "loss": 0.2837, + "step": 15057 + }, + { + "epoch": 0.7053918583407505, + "grad_norm": 0.5804682364790298, + "learning_rate": 3.754486586791491e-06, + "loss": 0.2732, + "step": 15058 + }, + { + "epoch": 0.7054387033306788, + "grad_norm": 0.6252527330906709, + "learning_rate": 3.754322538308386e-06, + "loss": 0.289, + "step": 15059 + }, + { + "epoch": 0.7054855483206071, + "grad_norm": 0.552080309583587, + "learning_rate": 3.7541584826069706e-06, + "loss": 0.2886, + "step": 15060 + }, + { + "epoch": 0.7055323933105354, + "grad_norm": 0.5738551498301274, + "learning_rate": 3.753994419688188e-06, + "loss": 0.2804, + "step": 15061 + }, + { + "epoch": 0.7055792383004638, + "grad_norm": 0.6153407033520844, + "learning_rate": 3.7538303495529827e-06, + "loss": 0.2865, + "step": 15062 + }, + { + "epoch": 0.7056260832903921, + "grad_norm": 0.6441721522456346, + "learning_rate": 3.753666272202299e-06, + "loss": 0.3021, + "step": 15063 + }, + { + "epoch": 0.7056729282803205, + "grad_norm": 0.6581160466988345, + "learning_rate": 3.7535021876370816e-06, + "loss": 0.29, + "step": 15064 + }, + { + "epoch": 0.7057197732702487, + "grad_norm": 0.5708999273358917, + "learning_rate": 3.7533380958582734e-06, + "loss": 0.2899, + "step": 15065 + }, + { + "epoch": 0.7057666182601771, + "grad_norm": 0.557309538684151, + "learning_rate": 3.7531739968668197e-06, + "loss": 0.2739, + "step": 15066 + }, + { + "epoch": 0.7058134632501054, + "grad_norm": 0.5453491971243931, + "learning_rate": 3.7530098906636643e-06, + "loss": 0.2722, + "step": 15067 + }, + { + "epoch": 0.7058603082400338, + "grad_norm": 0.5765231048689913, + "learning_rate": 3.7528457772497517e-06, + "loss": 0.2946, + "step": 15068 + }, + { + "epoch": 0.705907153229962, + "grad_norm": 0.603096274735025, + "learning_rate": 3.7526816566260277e-06, + "loss": 0.2851, + "step": 15069 + }, + { + "epoch": 0.7059539982198904, + "grad_norm": 0.6103730620867995, + "learning_rate": 3.7525175287934345e-06, + "loss": 0.2722, + "step": 15070 + }, + { + "epoch": 0.7060008432098187, + "grad_norm": 0.5815284909109875, + "learning_rate": 3.7523533937529184e-06, + "loss": 0.2663, + "step": 15071 + }, + { + "epoch": 0.7060476881997471, + "grad_norm": 0.627725433936712, + "learning_rate": 3.752189251505423e-06, + "loss": 0.2981, + "step": 15072 + }, + { + "epoch": 0.7060945331896754, + "grad_norm": 0.5578911229651686, + "learning_rate": 3.7520251020518927e-06, + "loss": 0.2765, + "step": 15073 + }, + { + "epoch": 0.7061413781796037, + "grad_norm": 0.5773400413165329, + "learning_rate": 3.751860945393273e-06, + "loss": 0.2804, + "step": 15074 + }, + { + "epoch": 0.706188223169532, + "grad_norm": 0.5657276942131892, + "learning_rate": 3.7516967815305095e-06, + "loss": 0.267, + "step": 15075 + }, + { + "epoch": 0.7062350681594604, + "grad_norm": 0.6116859120626069, + "learning_rate": 3.7515326104645437e-06, + "loss": 0.3004, + "step": 15076 + }, + { + "epoch": 0.7062819131493887, + "grad_norm": 0.5546104912019554, + "learning_rate": 3.751368432196323e-06, + "loss": 0.2821, + "step": 15077 + }, + { + "epoch": 0.706328758139317, + "grad_norm": 0.5437976754729537, + "learning_rate": 3.7512042467267917e-06, + "loss": 0.2725, + "step": 15078 + }, + { + "epoch": 0.7063756031292453, + "grad_norm": 0.5745053806955733, + "learning_rate": 3.7510400540568948e-06, + "loss": 0.2959, + "step": 15079 + }, + { + "epoch": 0.7064224481191737, + "grad_norm": 0.5821286228183251, + "learning_rate": 3.7508758541875757e-06, + "loss": 0.2998, + "step": 15080 + }, + { + "epoch": 0.706469293109102, + "grad_norm": 0.5748238188284359, + "learning_rate": 3.7507116471197814e-06, + "loss": 0.2989, + "step": 15081 + }, + { + "epoch": 0.7065161380990304, + "grad_norm": 0.6137686079253649, + "learning_rate": 3.7505474328544555e-06, + "loss": 0.2974, + "step": 15082 + }, + { + "epoch": 0.7065629830889586, + "grad_norm": 0.587791788672615, + "learning_rate": 3.7503832113925433e-06, + "loss": 0.2777, + "step": 15083 + }, + { + "epoch": 0.706609828078887, + "grad_norm": 0.6146803098789988, + "learning_rate": 3.7502189827349905e-06, + "loss": 0.2754, + "step": 15084 + }, + { + "epoch": 0.7066566730688153, + "grad_norm": 0.607224274199998, + "learning_rate": 3.750054746882742e-06, + "loss": 0.2957, + "step": 15085 + }, + { + "epoch": 0.7067035180587437, + "grad_norm": 0.6094357178818187, + "learning_rate": 3.7498905038367418e-06, + "loss": 0.2715, + "step": 15086 + }, + { + "epoch": 0.7067503630486719, + "grad_norm": 0.5820230582498768, + "learning_rate": 3.7497262535979363e-06, + "loss": 0.2894, + "step": 15087 + }, + { + "epoch": 0.7067972080386002, + "grad_norm": 0.5883804395615498, + "learning_rate": 3.749561996167269e-06, + "loss": 0.284, + "step": 15088 + }, + { + "epoch": 0.7068440530285286, + "grad_norm": 0.6039356474064245, + "learning_rate": 3.7493977315456882e-06, + "loss": 0.2913, + "step": 15089 + }, + { + "epoch": 0.706890898018457, + "grad_norm": 0.634048951933721, + "learning_rate": 3.7492334597341374e-06, + "loss": 0.2865, + "step": 15090 + }, + { + "epoch": 0.7069377430083853, + "grad_norm": 0.5460070528801055, + "learning_rate": 3.7490691807335613e-06, + "loss": 0.2822, + "step": 15091 + }, + { + "epoch": 0.7069845879983135, + "grad_norm": 0.6171010373282639, + "learning_rate": 3.748904894544906e-06, + "loss": 0.2692, + "step": 15092 + }, + { + "epoch": 0.7070314329882419, + "grad_norm": 0.5692574230150265, + "learning_rate": 3.7487406011691173e-06, + "loss": 0.2904, + "step": 15093 + }, + { + "epoch": 0.7070782779781702, + "grad_norm": 0.637994706215439, + "learning_rate": 3.74857630060714e-06, + "loss": 0.2851, + "step": 15094 + }, + { + "epoch": 0.7071251229680986, + "grad_norm": 0.5709271305262456, + "learning_rate": 3.74841199285992e-06, + "loss": 0.2879, + "step": 15095 + }, + { + "epoch": 0.7071719679580268, + "grad_norm": 0.5675636150180942, + "learning_rate": 3.748247677928403e-06, + "loss": 0.2725, + "step": 15096 + }, + { + "epoch": 0.7072188129479552, + "grad_norm": 0.5904548771204469, + "learning_rate": 3.7480833558135345e-06, + "loss": 0.2744, + "step": 15097 + }, + { + "epoch": 0.7072656579378835, + "grad_norm": 0.6036998032231109, + "learning_rate": 3.747919026516259e-06, + "loss": 0.2822, + "step": 15098 + }, + { + "epoch": 0.7073125029278119, + "grad_norm": 0.5186416299643194, + "learning_rate": 3.747754690037524e-06, + "loss": 0.2529, + "step": 15099 + }, + { + "epoch": 0.7073593479177402, + "grad_norm": 0.5584912136758289, + "learning_rate": 3.747590346378274e-06, + "loss": 0.2912, + "step": 15100 + }, + { + "epoch": 0.7074061929076685, + "grad_norm": 0.5941192944678259, + "learning_rate": 3.7474259955394552e-06, + "loss": 0.2932, + "step": 15101 + }, + { + "epoch": 0.7074530378975968, + "grad_norm": 0.5933288446400978, + "learning_rate": 3.747261637522014e-06, + "loss": 0.2843, + "step": 15102 + }, + { + "epoch": 0.7074998828875252, + "grad_norm": 0.6529846860275834, + "learning_rate": 3.747097272326895e-06, + "loss": 0.2653, + "step": 15103 + }, + { + "epoch": 0.7075467278774535, + "grad_norm": 0.5759638386021327, + "learning_rate": 3.7469328999550446e-06, + "loss": 0.2687, + "step": 15104 + }, + { + "epoch": 0.7075935728673818, + "grad_norm": 0.5925704539879854, + "learning_rate": 3.7467685204074085e-06, + "loss": 0.277, + "step": 15105 + }, + { + "epoch": 0.7076404178573101, + "grad_norm": 0.6205287383620292, + "learning_rate": 3.7466041336849336e-06, + "loss": 0.2821, + "step": 15106 + }, + { + "epoch": 0.7076872628472385, + "grad_norm": 0.61637231343308, + "learning_rate": 3.746439739788565e-06, + "loss": 0.2685, + "step": 15107 + }, + { + "epoch": 0.7077341078371668, + "grad_norm": 0.6049575435165319, + "learning_rate": 3.7462753387192484e-06, + "loss": 0.2846, + "step": 15108 + }, + { + "epoch": 0.7077809528270952, + "grad_norm": 0.6501098758799247, + "learning_rate": 3.7461109304779308e-06, + "loss": 0.3087, + "step": 15109 + }, + { + "epoch": 0.7078277978170234, + "grad_norm": 0.5755525964380455, + "learning_rate": 3.745946515065558e-06, + "loss": 0.2851, + "step": 15110 + }, + { + "epoch": 0.7078746428069518, + "grad_norm": 0.6274988174487697, + "learning_rate": 3.745782092483076e-06, + "loss": 0.2878, + "step": 15111 + }, + { + "epoch": 0.7079214877968801, + "grad_norm": 0.5683076455377234, + "learning_rate": 3.745617662731432e-06, + "loss": 0.293, + "step": 15112 + }, + { + "epoch": 0.7079683327868085, + "grad_norm": 0.5712638805774459, + "learning_rate": 3.745453225811571e-06, + "loss": 0.2677, + "step": 15113 + }, + { + "epoch": 0.7080151777767367, + "grad_norm": 0.6141884191819674, + "learning_rate": 3.745288781724439e-06, + "loss": 0.2867, + "step": 15114 + }, + { + "epoch": 0.7080620227666651, + "grad_norm": 0.5558468613888965, + "learning_rate": 3.745124330470984e-06, + "loss": 0.2833, + "step": 15115 + }, + { + "epoch": 0.7081088677565934, + "grad_norm": 0.5493868320799835, + "learning_rate": 3.744959872052151e-06, + "loss": 0.2559, + "step": 15116 + }, + { + "epoch": 0.7081557127465218, + "grad_norm": 0.6089865229493443, + "learning_rate": 3.7447954064688876e-06, + "loss": 0.2968, + "step": 15117 + }, + { + "epoch": 0.7082025577364501, + "grad_norm": 0.6391287944446595, + "learning_rate": 3.7446309337221388e-06, + "loss": 0.2851, + "step": 15118 + }, + { + "epoch": 0.7082494027263784, + "grad_norm": 0.6124156998296812, + "learning_rate": 3.744466453812851e-06, + "loss": 0.3014, + "step": 15119 + }, + { + "epoch": 0.7082962477163067, + "grad_norm": 0.5910738179181844, + "learning_rate": 3.744301966741973e-06, + "loss": 0.3006, + "step": 15120 + }, + { + "epoch": 0.7083430927062351, + "grad_norm": 0.5628231189563925, + "learning_rate": 3.7441374725104494e-06, + "loss": 0.2725, + "step": 15121 + }, + { + "epoch": 0.7083899376961634, + "grad_norm": 0.5935907796218686, + "learning_rate": 3.743972971119227e-06, + "loss": 0.2669, + "step": 15122 + }, + { + "epoch": 0.7084367826860917, + "grad_norm": 0.6540078435369657, + "learning_rate": 3.7438084625692528e-06, + "loss": 0.3129, + "step": 15123 + }, + { + "epoch": 0.70848362767602, + "grad_norm": 0.5550705930071157, + "learning_rate": 3.7436439468614744e-06, + "loss": 0.2772, + "step": 15124 + }, + { + "epoch": 0.7085304726659484, + "grad_norm": 0.5723696226044215, + "learning_rate": 3.7434794239968365e-06, + "loss": 0.2701, + "step": 15125 + }, + { + "epoch": 0.7085773176558767, + "grad_norm": 0.6344400685470198, + "learning_rate": 3.7433148939762876e-06, + "loss": 0.2899, + "step": 15126 + }, + { + "epoch": 0.7086241626458051, + "grad_norm": 0.5619962491827803, + "learning_rate": 3.7431503568007743e-06, + "loss": 0.2805, + "step": 15127 + }, + { + "epoch": 0.7086710076357333, + "grad_norm": 0.6278639569201554, + "learning_rate": 3.7429858124712427e-06, + "loss": 0.2827, + "step": 15128 + }, + { + "epoch": 0.7087178526256617, + "grad_norm": 0.6032919922002119, + "learning_rate": 3.74282126098864e-06, + "loss": 0.2732, + "step": 15129 + }, + { + "epoch": 0.70876469761559, + "grad_norm": 0.5839839489668622, + "learning_rate": 3.7426567023539133e-06, + "loss": 0.2922, + "step": 15130 + }, + { + "epoch": 0.7088115426055184, + "grad_norm": 0.6066814494040004, + "learning_rate": 3.74249213656801e-06, + "loss": 0.283, + "step": 15131 + }, + { + "epoch": 0.7088583875954466, + "grad_norm": 0.6137535537645602, + "learning_rate": 3.7423275636318767e-06, + "loss": 0.2939, + "step": 15132 + }, + { + "epoch": 0.708905232585375, + "grad_norm": 0.6037142077273102, + "learning_rate": 3.7421629835464608e-06, + "loss": 0.2957, + "step": 15133 + }, + { + "epoch": 0.7089520775753033, + "grad_norm": 0.6529488052136605, + "learning_rate": 3.741998396312709e-06, + "loss": 0.2948, + "step": 15134 + }, + { + "epoch": 0.7089989225652317, + "grad_norm": 0.6149209766694321, + "learning_rate": 3.741833801931568e-06, + "loss": 0.2565, + "step": 15135 + }, + { + "epoch": 0.70904576755516, + "grad_norm": 0.5820729195563317, + "learning_rate": 3.741669200403986e-06, + "loss": 0.2915, + "step": 15136 + }, + { + "epoch": 0.7090926125450883, + "grad_norm": 0.5737473020962497, + "learning_rate": 3.7415045917309097e-06, + "loss": 0.2482, + "step": 15137 + }, + { + "epoch": 0.7091394575350166, + "grad_norm": 0.5770795476223686, + "learning_rate": 3.741339975913287e-06, + "loss": 0.2823, + "step": 15138 + }, + { + "epoch": 0.709186302524945, + "grad_norm": 0.6314657084684016, + "learning_rate": 3.7411753529520644e-06, + "loss": 0.2842, + "step": 15139 + }, + { + "epoch": 0.7092331475148733, + "grad_norm": 0.6006099144426019, + "learning_rate": 3.741010722848189e-06, + "loss": 0.2862, + "step": 15140 + }, + { + "epoch": 0.7092799925048016, + "grad_norm": 0.5754647978110944, + "learning_rate": 3.7408460856026098e-06, + "loss": 0.2784, + "step": 15141 + }, + { + "epoch": 0.7093268374947299, + "grad_norm": 0.584240688037232, + "learning_rate": 3.740681441216273e-06, + "loss": 0.2695, + "step": 15142 + }, + { + "epoch": 0.7093736824846583, + "grad_norm": 0.666455042560467, + "learning_rate": 3.740516789690126e-06, + "loss": 0.3149, + "step": 15143 + }, + { + "epoch": 0.7094205274745866, + "grad_norm": 0.6077655166099388, + "learning_rate": 3.740352131025116e-06, + "loss": 0.2798, + "step": 15144 + }, + { + "epoch": 0.709467372464515, + "grad_norm": 0.5843733020461063, + "learning_rate": 3.740187465222193e-06, + "loss": 0.2817, + "step": 15145 + }, + { + "epoch": 0.7095142174544432, + "grad_norm": 0.5665214570558893, + "learning_rate": 3.740022792282302e-06, + "loss": 0.2694, + "step": 15146 + }, + { + "epoch": 0.7095610624443716, + "grad_norm": 0.5725767191158706, + "learning_rate": 3.739858112206391e-06, + "loss": 0.2751, + "step": 15147 + }, + { + "epoch": 0.7096079074342999, + "grad_norm": 0.607622168684724, + "learning_rate": 3.739693424995409e-06, + "loss": 0.2821, + "step": 15148 + }, + { + "epoch": 0.7096547524242283, + "grad_norm": 0.5788690012225748, + "learning_rate": 3.7395287306503025e-06, + "loss": 0.2592, + "step": 15149 + }, + { + "epoch": 0.7097015974141565, + "grad_norm": 0.5816837743716562, + "learning_rate": 3.739364029172019e-06, + "loss": 0.2696, + "step": 15150 + }, + { + "epoch": 0.7097484424040849, + "grad_norm": 0.5934247078379001, + "learning_rate": 3.7391993205615085e-06, + "loss": 0.277, + "step": 15151 + }, + { + "epoch": 0.7097952873940132, + "grad_norm": 0.6135335202541198, + "learning_rate": 3.739034604819717e-06, + "loss": 0.2888, + "step": 15152 + }, + { + "epoch": 0.7098421323839416, + "grad_norm": 0.5430926786580351, + "learning_rate": 3.7388698819475917e-06, + "loss": 0.2715, + "step": 15153 + }, + { + "epoch": 0.7098889773738699, + "grad_norm": 0.6756473315020922, + "learning_rate": 3.7387051519460825e-06, + "loss": 0.2946, + "step": 15154 + }, + { + "epoch": 0.7099358223637982, + "grad_norm": 0.5682184126019713, + "learning_rate": 3.7385404148161363e-06, + "loss": 0.2807, + "step": 15155 + }, + { + "epoch": 0.7099826673537265, + "grad_norm": 0.613103460671005, + "learning_rate": 3.738375670558701e-06, + "loss": 0.2771, + "step": 15156 + }, + { + "epoch": 0.7100295123436549, + "grad_norm": 0.5880544347817739, + "learning_rate": 3.738210919174725e-06, + "loss": 0.2813, + "step": 15157 + }, + { + "epoch": 0.7100763573335832, + "grad_norm": 0.5591641275579856, + "learning_rate": 3.738046160665157e-06, + "loss": 0.2829, + "step": 15158 + }, + { + "epoch": 0.7101232023235114, + "grad_norm": 0.5727275074006619, + "learning_rate": 3.737881395030944e-06, + "loss": 0.2889, + "step": 15159 + }, + { + "epoch": 0.7101700473134398, + "grad_norm": 0.5698641338266337, + "learning_rate": 3.7377166222730353e-06, + "loss": 0.2906, + "step": 15160 + }, + { + "epoch": 0.7102168923033682, + "grad_norm": 0.5682181326936931, + "learning_rate": 3.7375518423923774e-06, + "loss": 0.2868, + "step": 15161 + }, + { + "epoch": 0.7102637372932965, + "grad_norm": 0.5480735121584275, + "learning_rate": 3.7373870553899203e-06, + "loss": 0.2596, + "step": 15162 + }, + { + "epoch": 0.7103105822832249, + "grad_norm": 0.5603994351378274, + "learning_rate": 3.7372222612666127e-06, + "loss": 0.2754, + "step": 15163 + }, + { + "epoch": 0.7103574272731531, + "grad_norm": 0.6108650391206428, + "learning_rate": 3.7370574600234006e-06, + "loss": 0.2947, + "step": 15164 + }, + { + "epoch": 0.7104042722630814, + "grad_norm": 0.58161140059171, + "learning_rate": 3.7368926516612336e-06, + "loss": 0.2956, + "step": 15165 + }, + { + "epoch": 0.7104511172530098, + "grad_norm": 0.6152735692380632, + "learning_rate": 3.7367278361810612e-06, + "loss": 0.2936, + "step": 15166 + }, + { + "epoch": 0.7104979622429382, + "grad_norm": 0.617201403650658, + "learning_rate": 3.7365630135838305e-06, + "loss": 0.3063, + "step": 15167 + }, + { + "epoch": 0.7105448072328664, + "grad_norm": 0.6359715105168806, + "learning_rate": 3.7363981838704905e-06, + "loss": 0.3022, + "step": 15168 + }, + { + "epoch": 0.7105916522227947, + "grad_norm": 0.5482921572830625, + "learning_rate": 3.73623334704199e-06, + "loss": 0.264, + "step": 15169 + }, + { + "epoch": 0.7106384972127231, + "grad_norm": 0.5876658946876402, + "learning_rate": 3.7360685030992772e-06, + "loss": 0.2864, + "step": 15170 + }, + { + "epoch": 0.7106853422026514, + "grad_norm": 0.6050570414540613, + "learning_rate": 3.7359036520433e-06, + "loss": 0.2971, + "step": 15171 + }, + { + "epoch": 0.7107321871925798, + "grad_norm": 0.5806333721465322, + "learning_rate": 3.735738793875009e-06, + "loss": 0.2839, + "step": 15172 + }, + { + "epoch": 0.710779032182508, + "grad_norm": 0.6006990739506243, + "learning_rate": 3.7355739285953517e-06, + "loss": 0.2719, + "step": 15173 + }, + { + "epoch": 0.7108258771724364, + "grad_norm": 0.6307730357293437, + "learning_rate": 3.7354090562052764e-06, + "loss": 0.2931, + "step": 15174 + }, + { + "epoch": 0.7108727221623647, + "grad_norm": 0.5771820530004624, + "learning_rate": 3.735244176705732e-06, + "loss": 0.2675, + "step": 15175 + }, + { + "epoch": 0.7109195671522931, + "grad_norm": 0.5969830971187282, + "learning_rate": 3.7350792900976698e-06, + "loss": 0.2848, + "step": 15176 + }, + { + "epoch": 0.7109664121422213, + "grad_norm": 0.6172217027276088, + "learning_rate": 3.7349143963820357e-06, + "loss": 0.2864, + "step": 15177 + }, + { + "epoch": 0.7110132571321497, + "grad_norm": 0.5632618865659793, + "learning_rate": 3.7347494955597783e-06, + "loss": 0.2818, + "step": 15178 + }, + { + "epoch": 0.711060102122078, + "grad_norm": 0.6283872330521661, + "learning_rate": 3.73458458763185e-06, + "loss": 0.2852, + "step": 15179 + }, + { + "epoch": 0.7111069471120064, + "grad_norm": 0.616558057760467, + "learning_rate": 3.7344196725991966e-06, + "loss": 0.2953, + "step": 15180 + }, + { + "epoch": 0.7111537921019347, + "grad_norm": 0.609448693771008, + "learning_rate": 3.734254750462768e-06, + "loss": 0.2635, + "step": 15181 + }, + { + "epoch": 0.711200637091863, + "grad_norm": 0.6060274366989943, + "learning_rate": 3.7340898212235144e-06, + "loss": 0.2991, + "step": 15182 + }, + { + "epoch": 0.7112474820817913, + "grad_norm": 0.6871707961115558, + "learning_rate": 3.733924884882384e-06, + "loss": 0.3073, + "step": 15183 + }, + { + "epoch": 0.7112943270717197, + "grad_norm": 0.5221712107626335, + "learning_rate": 3.7337599414403254e-06, + "loss": 0.2531, + "step": 15184 + }, + { + "epoch": 0.711341172061648, + "grad_norm": 0.5585027441313695, + "learning_rate": 3.733594990898288e-06, + "loss": 0.274, + "step": 15185 + }, + { + "epoch": 0.7113880170515763, + "grad_norm": 0.6597333503547606, + "learning_rate": 3.733430033257223e-06, + "loss": 0.2977, + "step": 15186 + }, + { + "epoch": 0.7114348620415046, + "grad_norm": 0.6051711814038366, + "learning_rate": 3.733265068518077e-06, + "loss": 0.2786, + "step": 15187 + }, + { + "epoch": 0.711481707031433, + "grad_norm": 0.6094914502576453, + "learning_rate": 3.7331000966818008e-06, + "loss": 0.2916, + "step": 15188 + }, + { + "epoch": 0.7115285520213613, + "grad_norm": 0.5510023189206644, + "learning_rate": 3.7329351177493435e-06, + "loss": 0.2722, + "step": 15189 + }, + { + "epoch": 0.7115753970112897, + "grad_norm": 0.6231306662354045, + "learning_rate": 3.7327701317216545e-06, + "loss": 0.2743, + "step": 15190 + }, + { + "epoch": 0.7116222420012179, + "grad_norm": 0.5520637906049871, + "learning_rate": 3.732605138599683e-06, + "loss": 0.2647, + "step": 15191 + }, + { + "epoch": 0.7116690869911463, + "grad_norm": 0.5944994105249141, + "learning_rate": 3.732440138384379e-06, + "loss": 0.2769, + "step": 15192 + }, + { + "epoch": 0.7117159319810746, + "grad_norm": 0.5779226800525643, + "learning_rate": 3.732275131076692e-06, + "loss": 0.2773, + "step": 15193 + }, + { + "epoch": 0.711762776971003, + "grad_norm": 0.6312161453630774, + "learning_rate": 3.732110116677571e-06, + "loss": 0.2935, + "step": 15194 + }, + { + "epoch": 0.7118096219609312, + "grad_norm": 0.6509709270079128, + "learning_rate": 3.7319450951879655e-06, + "loss": 0.278, + "step": 15195 + }, + { + "epoch": 0.7118564669508596, + "grad_norm": 0.5874118329006236, + "learning_rate": 3.731780066608826e-06, + "loss": 0.2732, + "step": 15196 + }, + { + "epoch": 0.7119033119407879, + "grad_norm": 0.5846591024591257, + "learning_rate": 3.7316150309411024e-06, + "loss": 0.2816, + "step": 15197 + }, + { + "epoch": 0.7119501569307163, + "grad_norm": 0.6368850841537165, + "learning_rate": 3.7314499881857433e-06, + "loss": 0.2672, + "step": 15198 + }, + { + "epoch": 0.7119970019206446, + "grad_norm": 0.5470062701808867, + "learning_rate": 3.731284938343699e-06, + "loss": 0.2679, + "step": 15199 + }, + { + "epoch": 0.7120438469105729, + "grad_norm": 0.6126248473473651, + "learning_rate": 3.7311198814159196e-06, + "loss": 0.296, + "step": 15200 + }, + { + "epoch": 0.7120906919005012, + "grad_norm": 0.6064416108951729, + "learning_rate": 3.730954817403355e-06, + "loss": 0.2928, + "step": 15201 + }, + { + "epoch": 0.7121375368904296, + "grad_norm": 0.631213052649654, + "learning_rate": 3.7307897463069535e-06, + "loss": 0.3079, + "step": 15202 + }, + { + "epoch": 0.7121843818803579, + "grad_norm": 0.6152543749471434, + "learning_rate": 3.7306246681276674e-06, + "loss": 0.3063, + "step": 15203 + }, + { + "epoch": 0.7122312268702862, + "grad_norm": 0.5678398527669689, + "learning_rate": 3.730459582866446e-06, + "loss": 0.2702, + "step": 15204 + }, + { + "epoch": 0.7122780718602145, + "grad_norm": 0.6358510883941069, + "learning_rate": 3.7302944905242382e-06, + "loss": 0.2862, + "step": 15205 + }, + { + "epoch": 0.7123249168501429, + "grad_norm": 0.5764394001818817, + "learning_rate": 3.7301293911019955e-06, + "loss": 0.275, + "step": 15206 + }, + { + "epoch": 0.7123717618400712, + "grad_norm": 0.6186387512919186, + "learning_rate": 3.729964284600666e-06, + "loss": 0.2692, + "step": 15207 + }, + { + "epoch": 0.7124186068299996, + "grad_norm": 0.6731510521096653, + "learning_rate": 3.729799171021203e-06, + "loss": 0.3094, + "step": 15208 + }, + { + "epoch": 0.7124654518199278, + "grad_norm": 0.6232532784267711, + "learning_rate": 3.729634050364554e-06, + "loss": 0.2836, + "step": 15209 + }, + { + "epoch": 0.7125122968098562, + "grad_norm": 0.5896293275915439, + "learning_rate": 3.7294689226316695e-06, + "loss": 0.2781, + "step": 15210 + }, + { + "epoch": 0.7125591417997845, + "grad_norm": 0.5778846952115967, + "learning_rate": 3.729303787823501e-06, + "loss": 0.2655, + "step": 15211 + }, + { + "epoch": 0.7126059867897129, + "grad_norm": 0.604831753555272, + "learning_rate": 3.7291386459409984e-06, + "loss": 0.2804, + "step": 15212 + }, + { + "epoch": 0.7126528317796411, + "grad_norm": 0.5665612069703577, + "learning_rate": 3.728973496985111e-06, + "loss": 0.269, + "step": 15213 + }, + { + "epoch": 0.7126996767695695, + "grad_norm": 0.5841522737973858, + "learning_rate": 3.7288083409567906e-06, + "loss": 0.2941, + "step": 15214 + }, + { + "epoch": 0.7127465217594978, + "grad_norm": 0.5803403655139578, + "learning_rate": 3.7286431778569877e-06, + "loss": 0.286, + "step": 15215 + }, + { + "epoch": 0.7127933667494262, + "grad_norm": 0.5859448491161503, + "learning_rate": 3.728478007686651e-06, + "loss": 0.2855, + "step": 15216 + }, + { + "epoch": 0.7128402117393545, + "grad_norm": 0.6254101715004076, + "learning_rate": 3.728312830446732e-06, + "loss": 0.294, + "step": 15217 + }, + { + "epoch": 0.7128870567292828, + "grad_norm": 0.581827681765028, + "learning_rate": 3.7281476461381826e-06, + "loss": 0.2854, + "step": 15218 + }, + { + "epoch": 0.7129339017192111, + "grad_norm": 0.5724642249869323, + "learning_rate": 3.7279824547619513e-06, + "loss": 0.272, + "step": 15219 + }, + { + "epoch": 0.7129807467091395, + "grad_norm": 0.5831917666092684, + "learning_rate": 3.7278172563189897e-06, + "loss": 0.2893, + "step": 15220 + }, + { + "epoch": 0.7130275916990678, + "grad_norm": 0.5907776273947669, + "learning_rate": 3.7276520508102487e-06, + "loss": 0.2762, + "step": 15221 + }, + { + "epoch": 0.7130744366889961, + "grad_norm": 0.5461551439693195, + "learning_rate": 3.7274868382366786e-06, + "loss": 0.2527, + "step": 15222 + }, + { + "epoch": 0.7131212816789244, + "grad_norm": 0.5639004857870373, + "learning_rate": 3.7273216185992302e-06, + "loss": 0.2797, + "step": 15223 + }, + { + "epoch": 0.7131681266688528, + "grad_norm": 0.5818399982478688, + "learning_rate": 3.7271563918988544e-06, + "loss": 0.2681, + "step": 15224 + }, + { + "epoch": 0.7132149716587811, + "grad_norm": 0.5666979245543446, + "learning_rate": 3.726991158136502e-06, + "loss": 0.278, + "step": 15225 + }, + { + "epoch": 0.7132618166487095, + "grad_norm": 0.5856956735350617, + "learning_rate": 3.726825917313124e-06, + "loss": 0.2552, + "step": 15226 + }, + { + "epoch": 0.7133086616386377, + "grad_norm": 0.5960360103336997, + "learning_rate": 3.7266606694296708e-06, + "loss": 0.2932, + "step": 15227 + }, + { + "epoch": 0.7133555066285661, + "grad_norm": 0.5764953211086147, + "learning_rate": 3.7264954144870934e-06, + "loss": 0.2719, + "step": 15228 + }, + { + "epoch": 0.7134023516184944, + "grad_norm": 0.5854924910566631, + "learning_rate": 3.726330152486344e-06, + "loss": 0.2694, + "step": 15229 + }, + { + "epoch": 0.7134491966084228, + "grad_norm": 0.6635988919733583, + "learning_rate": 3.726164883428373e-06, + "loss": 0.2929, + "step": 15230 + }, + { + "epoch": 0.713496041598351, + "grad_norm": 0.609086094434895, + "learning_rate": 3.72599960731413e-06, + "loss": 0.2696, + "step": 15231 + }, + { + "epoch": 0.7135428865882794, + "grad_norm": 0.5715601491025663, + "learning_rate": 3.725834324144569e-06, + "loss": 0.2857, + "step": 15232 + }, + { + "epoch": 0.7135897315782077, + "grad_norm": 0.5746228577522312, + "learning_rate": 3.7256690339206392e-06, + "loss": 0.2887, + "step": 15233 + }, + { + "epoch": 0.7136365765681361, + "grad_norm": 0.5660432007183827, + "learning_rate": 3.725503736643291e-06, + "loss": 0.2591, + "step": 15234 + }, + { + "epoch": 0.7136834215580644, + "grad_norm": 0.6073651313867992, + "learning_rate": 3.7253384323134774e-06, + "loss": 0.2894, + "step": 15235 + }, + { + "epoch": 0.7137302665479927, + "grad_norm": 0.6305312329649946, + "learning_rate": 3.7251731209321494e-06, + "loss": 0.2884, + "step": 15236 + }, + { + "epoch": 0.713777111537921, + "grad_norm": 0.6387418217797787, + "learning_rate": 3.7250078025002577e-06, + "loss": 0.2868, + "step": 15237 + }, + { + "epoch": 0.7138239565278494, + "grad_norm": 0.5954217651149232, + "learning_rate": 3.7248424770187543e-06, + "loss": 0.2624, + "step": 15238 + }, + { + "epoch": 0.7138708015177777, + "grad_norm": 0.6195157977094645, + "learning_rate": 3.7246771444885904e-06, + "loss": 0.2802, + "step": 15239 + }, + { + "epoch": 0.713917646507706, + "grad_norm": 0.6151728219351328, + "learning_rate": 3.7245118049107177e-06, + "loss": 0.2696, + "step": 15240 + }, + { + "epoch": 0.7139644914976343, + "grad_norm": 0.5843535700649521, + "learning_rate": 3.724346458286086e-06, + "loss": 0.2728, + "step": 15241 + }, + { + "epoch": 0.7140113364875627, + "grad_norm": 0.6240020406760584, + "learning_rate": 3.724181104615649e-06, + "loss": 0.2837, + "step": 15242 + }, + { + "epoch": 0.714058181477491, + "grad_norm": 0.5698479824249654, + "learning_rate": 3.7240157439003578e-06, + "loss": 0.2968, + "step": 15243 + }, + { + "epoch": 0.7141050264674194, + "grad_norm": 0.6173914935652336, + "learning_rate": 3.7238503761411627e-06, + "loss": 0.3093, + "step": 15244 + }, + { + "epoch": 0.7141518714573476, + "grad_norm": 0.6019380098909238, + "learning_rate": 3.723685001339017e-06, + "loss": 0.2787, + "step": 15245 + }, + { + "epoch": 0.714198716447276, + "grad_norm": 0.574028558698804, + "learning_rate": 3.723519619494872e-06, + "loss": 0.2816, + "step": 15246 + }, + { + "epoch": 0.7142455614372043, + "grad_norm": 0.5454424978043737, + "learning_rate": 3.7233542306096782e-06, + "loss": 0.2838, + "step": 15247 + }, + { + "epoch": 0.7142924064271327, + "grad_norm": 0.6175228680665278, + "learning_rate": 3.723188834684388e-06, + "loss": 0.29, + "step": 15248 + }, + { + "epoch": 0.7143392514170609, + "grad_norm": 0.5832153007595325, + "learning_rate": 3.7230234317199544e-06, + "loss": 0.2716, + "step": 15249 + }, + { + "epoch": 0.7143860964069892, + "grad_norm": 0.6083633709105399, + "learning_rate": 3.722858021717328e-06, + "loss": 0.2829, + "step": 15250 + }, + { + "epoch": 0.7144329413969176, + "grad_norm": 0.6025407297335237, + "learning_rate": 3.7226926046774614e-06, + "loss": 0.277, + "step": 15251 + }, + { + "epoch": 0.714479786386846, + "grad_norm": 0.6117755780547309, + "learning_rate": 3.722527180601306e-06, + "loss": 0.2807, + "step": 15252 + }, + { + "epoch": 0.7145266313767743, + "grad_norm": 0.6146737792443379, + "learning_rate": 3.722361749489814e-06, + "loss": 0.3007, + "step": 15253 + }, + { + "epoch": 0.7145734763667025, + "grad_norm": 0.5968750677722194, + "learning_rate": 3.722196311343937e-06, + "loss": 0.2835, + "step": 15254 + }, + { + "epoch": 0.7146203213566309, + "grad_norm": 0.5696515987520115, + "learning_rate": 3.7220308661646277e-06, + "loss": 0.2704, + "step": 15255 + }, + { + "epoch": 0.7146671663465592, + "grad_norm": 0.607702086960688, + "learning_rate": 3.7218654139528376e-06, + "loss": 0.295, + "step": 15256 + }, + { + "epoch": 0.7147140113364876, + "grad_norm": 0.5702781006943538, + "learning_rate": 3.7216999547095196e-06, + "loss": 0.2874, + "step": 15257 + }, + { + "epoch": 0.7147608563264158, + "grad_norm": 0.6393698833694775, + "learning_rate": 3.7215344884356253e-06, + "loss": 0.288, + "step": 15258 + }, + { + "epoch": 0.7148077013163442, + "grad_norm": 0.5516996701343002, + "learning_rate": 3.7213690151321062e-06, + "loss": 0.281, + "step": 15259 + }, + { + "epoch": 0.7148545463062725, + "grad_norm": 0.5749748015691639, + "learning_rate": 3.7212035347999163e-06, + "loss": 0.2604, + "step": 15260 + }, + { + "epoch": 0.7149013912962009, + "grad_norm": 0.58578649781479, + "learning_rate": 3.7210380474400074e-06, + "loss": 0.2728, + "step": 15261 + }, + { + "epoch": 0.7149482362861292, + "grad_norm": 0.6031025502206162, + "learning_rate": 3.7208725530533306e-06, + "loss": 0.2767, + "step": 15262 + }, + { + "epoch": 0.7149950812760575, + "grad_norm": 0.5629627912794574, + "learning_rate": 3.7207070516408394e-06, + "loss": 0.2532, + "step": 15263 + }, + { + "epoch": 0.7150419262659858, + "grad_norm": 0.5853885214472739, + "learning_rate": 3.7205415432034864e-06, + "loss": 0.2851, + "step": 15264 + }, + { + "epoch": 0.7150887712559142, + "grad_norm": 0.6121158613273633, + "learning_rate": 3.7203760277422223e-06, + "loss": 0.2669, + "step": 15265 + }, + { + "epoch": 0.7151356162458425, + "grad_norm": 0.6142777213041595, + "learning_rate": 3.720210505258002e-06, + "loss": 0.3057, + "step": 15266 + }, + { + "epoch": 0.7151824612357708, + "grad_norm": 0.5512415770956316, + "learning_rate": 3.720044975751777e-06, + "loss": 0.2575, + "step": 15267 + }, + { + "epoch": 0.7152293062256991, + "grad_norm": 0.58681939394691, + "learning_rate": 3.719879439224499e-06, + "loss": 0.2748, + "step": 15268 + }, + { + "epoch": 0.7152761512156275, + "grad_norm": 0.5825182914428778, + "learning_rate": 3.7197138956771218e-06, + "loss": 0.2822, + "step": 15269 + }, + { + "epoch": 0.7153229962055558, + "grad_norm": 0.5457026736449566, + "learning_rate": 3.719548345110598e-06, + "loss": 0.2543, + "step": 15270 + }, + { + "epoch": 0.7153698411954842, + "grad_norm": 0.5515705968816159, + "learning_rate": 3.71938278752588e-06, + "loss": 0.2662, + "step": 15271 + }, + { + "epoch": 0.7154166861854124, + "grad_norm": 0.5904239973632601, + "learning_rate": 3.7192172229239197e-06, + "loss": 0.293, + "step": 15272 + }, + { + "epoch": 0.7154635311753408, + "grad_norm": 0.6139577010811136, + "learning_rate": 3.719051651305672e-06, + "loss": 0.2851, + "step": 15273 + }, + { + "epoch": 0.7155103761652691, + "grad_norm": 0.6579117344626323, + "learning_rate": 3.7188860726720876e-06, + "loss": 0.2939, + "step": 15274 + }, + { + "epoch": 0.7155572211551975, + "grad_norm": 0.6225010391766299, + "learning_rate": 3.71872048702412e-06, + "loss": 0.2937, + "step": 15275 + }, + { + "epoch": 0.7156040661451257, + "grad_norm": 0.6212663191342522, + "learning_rate": 3.7185548943627227e-06, + "loss": 0.3084, + "step": 15276 + }, + { + "epoch": 0.7156509111350541, + "grad_norm": 0.6015482261092745, + "learning_rate": 3.7183892946888476e-06, + "loss": 0.2911, + "step": 15277 + }, + { + "epoch": 0.7156977561249824, + "grad_norm": 0.5784795677747542, + "learning_rate": 3.7182236880034493e-06, + "loss": 0.2716, + "step": 15278 + }, + { + "epoch": 0.7157446011149108, + "grad_norm": 0.608212599277012, + "learning_rate": 3.71805807430748e-06, + "loss": 0.2767, + "step": 15279 + }, + { + "epoch": 0.7157914461048391, + "grad_norm": 0.5583190204235868, + "learning_rate": 3.717892453601892e-06, + "loss": 0.2674, + "step": 15280 + }, + { + "epoch": 0.7158382910947674, + "grad_norm": 0.5905348250650816, + "learning_rate": 3.717726825887639e-06, + "loss": 0.2782, + "step": 15281 + }, + { + "epoch": 0.7158851360846957, + "grad_norm": 0.609632145536254, + "learning_rate": 3.717561191165675e-06, + "loss": 0.2886, + "step": 15282 + }, + { + "epoch": 0.7159319810746241, + "grad_norm": 0.5897444537260778, + "learning_rate": 3.7173955494369513e-06, + "loss": 0.2953, + "step": 15283 + }, + { + "epoch": 0.7159788260645524, + "grad_norm": 0.5811747830208349, + "learning_rate": 3.7172299007024226e-06, + "loss": 0.2877, + "step": 15284 + }, + { + "epoch": 0.7160256710544807, + "grad_norm": 0.5725722028979163, + "learning_rate": 3.7170642449630424e-06, + "loss": 0.2829, + "step": 15285 + }, + { + "epoch": 0.716072516044409, + "grad_norm": 0.6135015055160729, + "learning_rate": 3.7168985822197624e-06, + "loss": 0.2823, + "step": 15286 + }, + { + "epoch": 0.7161193610343374, + "grad_norm": 0.6009251356286561, + "learning_rate": 3.716732912473537e-06, + "loss": 0.2889, + "step": 15287 + }, + { + "epoch": 0.7161662060242657, + "grad_norm": 0.5412201776796663, + "learning_rate": 3.71656723572532e-06, + "loss": 0.2622, + "step": 15288 + }, + { + "epoch": 0.7162130510141941, + "grad_norm": 0.6123592564889188, + "learning_rate": 3.7164015519760644e-06, + "loss": 0.2908, + "step": 15289 + }, + { + "epoch": 0.7162598960041223, + "grad_norm": 0.5742602143102011, + "learning_rate": 3.716235861226723e-06, + "loss": 0.2718, + "step": 15290 + }, + { + "epoch": 0.7163067409940507, + "grad_norm": 0.5622623400019428, + "learning_rate": 3.7160701634782502e-06, + "loss": 0.2814, + "step": 15291 + }, + { + "epoch": 0.716353585983979, + "grad_norm": 0.5728453207027475, + "learning_rate": 3.7159044587315994e-06, + "loss": 0.2779, + "step": 15292 + }, + { + "epoch": 0.7164004309739074, + "grad_norm": 0.5653065908853038, + "learning_rate": 3.715738746987724e-06, + "loss": 0.2563, + "step": 15293 + }, + { + "epoch": 0.7164472759638356, + "grad_norm": 0.5646581667721647, + "learning_rate": 3.7155730282475776e-06, + "loss": 0.2645, + "step": 15294 + }, + { + "epoch": 0.716494120953764, + "grad_norm": 0.6162586878893949, + "learning_rate": 3.715407302512114e-06, + "loss": 0.3134, + "step": 15295 + }, + { + "epoch": 0.7165409659436923, + "grad_norm": 0.5422404413947045, + "learning_rate": 3.7152415697822865e-06, + "loss": 0.266, + "step": 15296 + }, + { + "epoch": 0.7165878109336207, + "grad_norm": 0.5499075493935343, + "learning_rate": 3.7150758300590496e-06, + "loss": 0.2769, + "step": 15297 + }, + { + "epoch": 0.716634655923549, + "grad_norm": 0.5662866836916932, + "learning_rate": 3.7149100833433567e-06, + "loss": 0.2736, + "step": 15298 + }, + { + "epoch": 0.7166815009134773, + "grad_norm": 0.5670011118868932, + "learning_rate": 3.714744329636161e-06, + "loss": 0.2988, + "step": 15299 + }, + { + "epoch": 0.7167283459034056, + "grad_norm": 0.6569530299776957, + "learning_rate": 3.7145785689384182e-06, + "loss": 0.2978, + "step": 15300 + }, + { + "epoch": 0.716775190893334, + "grad_norm": 0.614203683264948, + "learning_rate": 3.7144128012510798e-06, + "loss": 0.3088, + "step": 15301 + }, + { + "epoch": 0.7168220358832623, + "grad_norm": 0.5623982908103978, + "learning_rate": 3.714247026575102e-06, + "loss": 0.2745, + "step": 15302 + }, + { + "epoch": 0.7168688808731906, + "grad_norm": 0.6376632211661222, + "learning_rate": 3.714081244911437e-06, + "loss": 0.3007, + "step": 15303 + }, + { + "epoch": 0.7169157258631189, + "grad_norm": 0.5792934878654109, + "learning_rate": 3.71391545626104e-06, + "loss": 0.3078, + "step": 15304 + }, + { + "epoch": 0.7169625708530473, + "grad_norm": 0.6168104185608442, + "learning_rate": 3.713749660624864e-06, + "loss": 0.2792, + "step": 15305 + }, + { + "epoch": 0.7170094158429756, + "grad_norm": 0.5553099125026036, + "learning_rate": 3.7135838580038646e-06, + "loss": 0.2828, + "step": 15306 + }, + { + "epoch": 0.717056260832904, + "grad_norm": 0.6031693602752832, + "learning_rate": 3.7134180483989946e-06, + "loss": 0.2799, + "step": 15307 + }, + { + "epoch": 0.7171031058228322, + "grad_norm": 0.5743930453345037, + "learning_rate": 3.713252231811209e-06, + "loss": 0.2972, + "step": 15308 + }, + { + "epoch": 0.7171499508127606, + "grad_norm": 0.5815883962434744, + "learning_rate": 3.713086408241462e-06, + "loss": 0.2908, + "step": 15309 + }, + { + "epoch": 0.7171967958026889, + "grad_norm": 0.6173756335747098, + "learning_rate": 3.712920577690707e-06, + "loss": 0.2728, + "step": 15310 + }, + { + "epoch": 0.7172436407926173, + "grad_norm": 0.5888963853481631, + "learning_rate": 3.712754740159899e-06, + "loss": 0.2876, + "step": 15311 + }, + { + "epoch": 0.7172904857825455, + "grad_norm": 0.6272404959737837, + "learning_rate": 3.7125888956499926e-06, + "loss": 0.2861, + "step": 15312 + }, + { + "epoch": 0.7173373307724739, + "grad_norm": 0.5900019902077318, + "learning_rate": 3.712423044161942e-06, + "loss": 0.2923, + "step": 15313 + }, + { + "epoch": 0.7173841757624022, + "grad_norm": 0.6413834404194162, + "learning_rate": 3.712257185696701e-06, + "loss": 0.2996, + "step": 15314 + }, + { + "epoch": 0.7174310207523306, + "grad_norm": 0.6223421035559045, + "learning_rate": 3.7120913202552244e-06, + "loss": 0.3023, + "step": 15315 + }, + { + "epoch": 0.7174778657422589, + "grad_norm": 0.6144375013760099, + "learning_rate": 3.7119254478384682e-06, + "loss": 0.3119, + "step": 15316 + }, + { + "epoch": 0.7175247107321872, + "grad_norm": 0.6294226022055089, + "learning_rate": 3.7117595684473847e-06, + "loss": 0.2625, + "step": 15317 + }, + { + "epoch": 0.7175715557221155, + "grad_norm": 0.6148073018636624, + "learning_rate": 3.711593682082929e-06, + "loss": 0.2962, + "step": 15318 + }, + { + "epoch": 0.7176184007120439, + "grad_norm": 0.6110002820448809, + "learning_rate": 3.711427788746057e-06, + "loss": 0.3005, + "step": 15319 + }, + { + "epoch": 0.7176652457019722, + "grad_norm": 0.6355935811832696, + "learning_rate": 3.7112618884377227e-06, + "loss": 0.3068, + "step": 15320 + }, + { + "epoch": 0.7177120906919005, + "grad_norm": 0.5695202741422489, + "learning_rate": 3.7110959811588797e-06, + "loss": 0.2733, + "step": 15321 + }, + { + "epoch": 0.7177589356818288, + "grad_norm": 0.5727423643157533, + "learning_rate": 3.710930066910484e-06, + "loss": 0.2637, + "step": 15322 + }, + { + "epoch": 0.7178057806717572, + "grad_norm": 0.5914729173803018, + "learning_rate": 3.7107641456934912e-06, + "loss": 0.3061, + "step": 15323 + }, + { + "epoch": 0.7178526256616855, + "grad_norm": 0.5761760902361966, + "learning_rate": 3.710598217508854e-06, + "loss": 0.2779, + "step": 15324 + }, + { + "epoch": 0.7178994706516139, + "grad_norm": 0.6713226301660185, + "learning_rate": 3.7104322823575285e-06, + "loss": 0.3053, + "step": 15325 + }, + { + "epoch": 0.7179463156415421, + "grad_norm": 0.5879732066514499, + "learning_rate": 3.7102663402404694e-06, + "loss": 0.2817, + "step": 15326 + }, + { + "epoch": 0.7179931606314705, + "grad_norm": 0.6015522496721672, + "learning_rate": 3.710100391158632e-06, + "loss": 0.2961, + "step": 15327 + }, + { + "epoch": 0.7180400056213988, + "grad_norm": 0.653086283810886, + "learning_rate": 3.709934435112971e-06, + "loss": 0.2875, + "step": 15328 + }, + { + "epoch": 0.7180868506113272, + "grad_norm": 0.576406023295332, + "learning_rate": 3.709768472104441e-06, + "loss": 0.2852, + "step": 15329 + }, + { + "epoch": 0.7181336956012554, + "grad_norm": 0.5940557376176898, + "learning_rate": 3.7096025021339976e-06, + "loss": 0.2854, + "step": 15330 + }, + { + "epoch": 0.7181805405911837, + "grad_norm": 0.582170990044218, + "learning_rate": 3.7094365252025966e-06, + "loss": 0.2775, + "step": 15331 + }, + { + "epoch": 0.7182273855811121, + "grad_norm": 0.6231028540259179, + "learning_rate": 3.7092705413111914e-06, + "loss": 0.2949, + "step": 15332 + }, + { + "epoch": 0.7182742305710405, + "grad_norm": 0.5655400010902708, + "learning_rate": 3.709104550460739e-06, + "loss": 0.2649, + "step": 15333 + }, + { + "epoch": 0.7183210755609688, + "grad_norm": 0.5928825320101082, + "learning_rate": 3.708938552652193e-06, + "loss": 0.2635, + "step": 15334 + }, + { + "epoch": 0.718367920550897, + "grad_norm": 0.644830598182271, + "learning_rate": 3.7087725478865098e-06, + "loss": 0.2903, + "step": 15335 + }, + { + "epoch": 0.7184147655408254, + "grad_norm": 0.5553212965856191, + "learning_rate": 3.708606536164644e-06, + "loss": 0.2673, + "step": 15336 + }, + { + "epoch": 0.7184616105307537, + "grad_norm": 0.5572795970970343, + "learning_rate": 3.7084405174875524e-06, + "loss": 0.2722, + "step": 15337 + }, + { + "epoch": 0.7185084555206821, + "grad_norm": 0.5658096268912802, + "learning_rate": 3.7082744918561886e-06, + "loss": 0.2751, + "step": 15338 + }, + { + "epoch": 0.7185553005106103, + "grad_norm": 0.5797494461350958, + "learning_rate": 3.7081084592715088e-06, + "loss": 0.281, + "step": 15339 + }, + { + "epoch": 0.7186021455005387, + "grad_norm": 0.5842939050931116, + "learning_rate": 3.707942419734469e-06, + "loss": 0.2623, + "step": 15340 + }, + { + "epoch": 0.718648990490467, + "grad_norm": 0.5708131517226044, + "learning_rate": 3.7077763732460237e-06, + "loss": 0.2866, + "step": 15341 + }, + { + "epoch": 0.7186958354803954, + "grad_norm": 0.5823648494171155, + "learning_rate": 3.707610319807129e-06, + "loss": 0.281, + "step": 15342 + }, + { + "epoch": 0.7187426804703237, + "grad_norm": 0.5754300365174491, + "learning_rate": 3.7074442594187405e-06, + "loss": 0.2606, + "step": 15343 + }, + { + "epoch": 0.718789525460252, + "grad_norm": 0.6301422054660845, + "learning_rate": 3.7072781920818134e-06, + "loss": 0.2879, + "step": 15344 + }, + { + "epoch": 0.7188363704501803, + "grad_norm": 0.6211077171832051, + "learning_rate": 3.707112117797304e-06, + "loss": 0.2874, + "step": 15345 + }, + { + "epoch": 0.7188832154401087, + "grad_norm": 0.5392007259663062, + "learning_rate": 3.706946036566168e-06, + "loss": 0.2617, + "step": 15346 + }, + { + "epoch": 0.718930060430037, + "grad_norm": 0.5408256202623489, + "learning_rate": 3.7067799483893606e-06, + "loss": 0.2696, + "step": 15347 + }, + { + "epoch": 0.7189769054199653, + "grad_norm": 0.577157815724061, + "learning_rate": 3.706613853267838e-06, + "loss": 0.2786, + "step": 15348 + }, + { + "epoch": 0.7190237504098936, + "grad_norm": 0.5774000536729247, + "learning_rate": 3.7064477512025564e-06, + "loss": 0.2645, + "step": 15349 + }, + { + "epoch": 0.719070595399822, + "grad_norm": 0.5802730388813501, + "learning_rate": 3.7062816421944704e-06, + "loss": 0.2617, + "step": 15350 + }, + { + "epoch": 0.7191174403897503, + "grad_norm": 0.572337367792002, + "learning_rate": 3.706115526244537e-06, + "loss": 0.2715, + "step": 15351 + }, + { + "epoch": 0.7191642853796787, + "grad_norm": 0.6112891403010193, + "learning_rate": 3.7059494033537126e-06, + "loss": 0.2982, + "step": 15352 + }, + { + "epoch": 0.7192111303696069, + "grad_norm": 0.5574590018054181, + "learning_rate": 3.7057832735229517e-06, + "loss": 0.2727, + "step": 15353 + }, + { + "epoch": 0.7192579753595353, + "grad_norm": 0.574126842073582, + "learning_rate": 3.7056171367532114e-06, + "loss": 0.2594, + "step": 15354 + }, + { + "epoch": 0.7193048203494636, + "grad_norm": 0.6288328534738999, + "learning_rate": 3.7054509930454475e-06, + "loss": 0.2783, + "step": 15355 + }, + { + "epoch": 0.719351665339392, + "grad_norm": 0.5662888398892039, + "learning_rate": 3.705284842400616e-06, + "loss": 0.28, + "step": 15356 + }, + { + "epoch": 0.7193985103293202, + "grad_norm": 0.5420348771325376, + "learning_rate": 3.705118684819673e-06, + "loss": 0.2759, + "step": 15357 + }, + { + "epoch": 0.7194453553192486, + "grad_norm": 0.5754800229720456, + "learning_rate": 3.704952520303575e-06, + "loss": 0.2878, + "step": 15358 + }, + { + "epoch": 0.7194922003091769, + "grad_norm": 0.600239650661394, + "learning_rate": 3.7047863488532787e-06, + "loss": 0.2836, + "step": 15359 + }, + { + "epoch": 0.7195390452991053, + "grad_norm": 0.5400859290605894, + "learning_rate": 3.704620170469739e-06, + "loss": 0.2726, + "step": 15360 + }, + { + "epoch": 0.7195858902890336, + "grad_norm": 0.6435977966289937, + "learning_rate": 3.7044539851539128e-06, + "loss": 0.2834, + "step": 15361 + }, + { + "epoch": 0.7196327352789619, + "grad_norm": 0.6085126110124677, + "learning_rate": 3.7042877929067577e-06, + "loss": 0.2917, + "step": 15362 + }, + { + "epoch": 0.7196795802688902, + "grad_norm": 0.6298012622012793, + "learning_rate": 3.704121593729228e-06, + "loss": 0.3029, + "step": 15363 + }, + { + "epoch": 0.7197264252588186, + "grad_norm": 0.6346754535787841, + "learning_rate": 3.7039553876222817e-06, + "loss": 0.2925, + "step": 15364 + }, + { + "epoch": 0.7197732702487469, + "grad_norm": 0.5845079771095145, + "learning_rate": 3.7037891745868747e-06, + "loss": 0.2843, + "step": 15365 + }, + { + "epoch": 0.7198201152386752, + "grad_norm": 0.6039599702104704, + "learning_rate": 3.7036229546239635e-06, + "loss": 0.2856, + "step": 15366 + }, + { + "epoch": 0.7198669602286035, + "grad_norm": 0.6312889780731997, + "learning_rate": 3.7034567277345047e-06, + "loss": 0.2768, + "step": 15367 + }, + { + "epoch": 0.7199138052185319, + "grad_norm": 0.5862722072598476, + "learning_rate": 3.7032904939194546e-06, + "loss": 0.3019, + "step": 15368 + }, + { + "epoch": 0.7199606502084602, + "grad_norm": 0.6851918373757611, + "learning_rate": 3.703124253179771e-06, + "loss": 0.2995, + "step": 15369 + }, + { + "epoch": 0.7200074951983886, + "grad_norm": 0.5294040640952401, + "learning_rate": 3.702958005516409e-06, + "loss": 0.2575, + "step": 15370 + }, + { + "epoch": 0.7200543401883168, + "grad_norm": 0.5967545493503759, + "learning_rate": 3.7027917509303262e-06, + "loss": 0.2881, + "step": 15371 + }, + { + "epoch": 0.7201011851782452, + "grad_norm": 0.6084580901947626, + "learning_rate": 3.7026254894224797e-06, + "loss": 0.2753, + "step": 15372 + }, + { + "epoch": 0.7201480301681735, + "grad_norm": 0.6109590514633675, + "learning_rate": 3.702459220993825e-06, + "loss": 0.2832, + "step": 15373 + }, + { + "epoch": 0.7201948751581019, + "grad_norm": 0.5916500899550837, + "learning_rate": 3.70229294564532e-06, + "loss": 0.2897, + "step": 15374 + }, + { + "epoch": 0.7202417201480301, + "grad_norm": 0.5614588028643323, + "learning_rate": 3.7021266633779206e-06, + "loss": 0.275, + "step": 15375 + }, + { + "epoch": 0.7202885651379585, + "grad_norm": 0.6047377460370114, + "learning_rate": 3.701960374192586e-06, + "loss": 0.2847, + "step": 15376 + }, + { + "epoch": 0.7203354101278868, + "grad_norm": 0.6110022165115977, + "learning_rate": 3.7017940780902706e-06, + "loss": 0.3014, + "step": 15377 + }, + { + "epoch": 0.7203822551178152, + "grad_norm": 0.596670132362532, + "learning_rate": 3.701627775071932e-06, + "loss": 0.3025, + "step": 15378 + }, + { + "epoch": 0.7204291001077435, + "grad_norm": 0.5447288703894789, + "learning_rate": 3.701461465138528e-06, + "loss": 0.2713, + "step": 15379 + }, + { + "epoch": 0.7204759450976718, + "grad_norm": 0.5568687489323891, + "learning_rate": 3.7012951482910153e-06, + "loss": 0.2659, + "step": 15380 + }, + { + "epoch": 0.7205227900876001, + "grad_norm": 0.5756679037580689, + "learning_rate": 3.7011288245303506e-06, + "loss": 0.2753, + "step": 15381 + }, + { + "epoch": 0.7205696350775285, + "grad_norm": 0.6031028645271878, + "learning_rate": 3.7009624938574917e-06, + "loss": 0.2942, + "step": 15382 + }, + { + "epoch": 0.7206164800674568, + "grad_norm": 0.5426211098952686, + "learning_rate": 3.700796156273396e-06, + "loss": 0.2705, + "step": 15383 + }, + { + "epoch": 0.7206633250573851, + "grad_norm": 0.5456597834172726, + "learning_rate": 3.700629811779019e-06, + "loss": 0.2856, + "step": 15384 + }, + { + "epoch": 0.7207101700473134, + "grad_norm": 0.5885366642549392, + "learning_rate": 3.70046346037532e-06, + "loss": 0.2792, + "step": 15385 + }, + { + "epoch": 0.7207570150372418, + "grad_norm": 0.606793233312903, + "learning_rate": 3.7002971020632554e-06, + "loss": 0.2974, + "step": 15386 + }, + { + "epoch": 0.7208038600271701, + "grad_norm": 0.5922550248343654, + "learning_rate": 3.700130736843783e-06, + "loss": 0.2936, + "step": 15387 + }, + { + "epoch": 0.7208507050170985, + "grad_norm": 0.578848167383787, + "learning_rate": 3.699964364717859e-06, + "loss": 0.2925, + "step": 15388 + }, + { + "epoch": 0.7208975500070267, + "grad_norm": 0.6054429808607417, + "learning_rate": 3.6997979856864426e-06, + "loss": 0.2915, + "step": 15389 + }, + { + "epoch": 0.7209443949969551, + "grad_norm": 0.649274410100821, + "learning_rate": 3.6996315997504895e-06, + "loss": 0.3038, + "step": 15390 + }, + { + "epoch": 0.7209912399868834, + "grad_norm": 0.5962317630225336, + "learning_rate": 3.699465206910958e-06, + "loss": 0.315, + "step": 15391 + }, + { + "epoch": 0.7210380849768118, + "grad_norm": 0.5860259087492725, + "learning_rate": 3.699298807168807e-06, + "loss": 0.2845, + "step": 15392 + }, + { + "epoch": 0.72108492996674, + "grad_norm": 0.5515809608167163, + "learning_rate": 3.699132400524992e-06, + "loss": 0.2629, + "step": 15393 + }, + { + "epoch": 0.7211317749566684, + "grad_norm": 0.6566114281877777, + "learning_rate": 3.6989659869804713e-06, + "loss": 0.2933, + "step": 15394 + }, + { + "epoch": 0.7211786199465967, + "grad_norm": 0.5463765925152462, + "learning_rate": 3.698799566536203e-06, + "loss": 0.2623, + "step": 15395 + }, + { + "epoch": 0.7212254649365251, + "grad_norm": 0.6327981262626687, + "learning_rate": 3.698633139193144e-06, + "loss": 0.2907, + "step": 15396 + }, + { + "epoch": 0.7212723099264534, + "grad_norm": 0.55524538825303, + "learning_rate": 3.698466704952253e-06, + "loss": 0.2719, + "step": 15397 + }, + { + "epoch": 0.7213191549163817, + "grad_norm": 0.5910717350666775, + "learning_rate": 3.698300263814487e-06, + "loss": 0.2779, + "step": 15398 + }, + { + "epoch": 0.72136599990631, + "grad_norm": 0.5882166852698276, + "learning_rate": 3.6981338157808045e-06, + "loss": 0.2926, + "step": 15399 + }, + { + "epoch": 0.7214128448962384, + "grad_norm": 0.5915854860502257, + "learning_rate": 3.6979673608521628e-06, + "loss": 0.2972, + "step": 15400 + }, + { + "epoch": 0.7214596898861667, + "grad_norm": 0.5795914207903878, + "learning_rate": 3.6978008990295208e-06, + "loss": 0.2872, + "step": 15401 + }, + { + "epoch": 0.721506534876095, + "grad_norm": 0.5851176989022797, + "learning_rate": 3.6976344303138343e-06, + "loss": 0.2677, + "step": 15402 + }, + { + "epoch": 0.7215533798660233, + "grad_norm": 0.5627479808442204, + "learning_rate": 3.697467954706064e-06, + "loss": 0.2729, + "step": 15403 + }, + { + "epoch": 0.7216002248559517, + "grad_norm": 0.5464973257318644, + "learning_rate": 3.697301472207166e-06, + "loss": 0.2641, + "step": 15404 + }, + { + "epoch": 0.72164706984588, + "grad_norm": 0.6338766551572667, + "learning_rate": 3.6971349828180985e-06, + "loss": 0.2874, + "step": 15405 + }, + { + "epoch": 0.7216939148358084, + "grad_norm": 0.5902586162508304, + "learning_rate": 3.69696848653982e-06, + "loss": 0.2632, + "step": 15406 + }, + { + "epoch": 0.7217407598257366, + "grad_norm": 0.6165120706935681, + "learning_rate": 3.69680198337329e-06, + "loss": 0.2628, + "step": 15407 + }, + { + "epoch": 0.721787604815665, + "grad_norm": 0.5754195604444279, + "learning_rate": 3.696635473319464e-06, + "loss": 0.2774, + "step": 15408 + }, + { + "epoch": 0.7218344498055933, + "grad_norm": 0.59249946198679, + "learning_rate": 3.6964689563793023e-06, + "loss": 0.2788, + "step": 15409 + }, + { + "epoch": 0.7218812947955217, + "grad_norm": 0.6259434648749864, + "learning_rate": 3.696302432553762e-06, + "loss": 0.3045, + "step": 15410 + }, + { + "epoch": 0.7219281397854499, + "grad_norm": 0.560936693246062, + "learning_rate": 3.6961359018438025e-06, + "loss": 0.2573, + "step": 15411 + }, + { + "epoch": 0.7219749847753782, + "grad_norm": 0.5964363582241047, + "learning_rate": 3.6959693642503813e-06, + "loss": 0.2889, + "step": 15412 + }, + { + "epoch": 0.7220218297653066, + "grad_norm": 0.5564133588718775, + "learning_rate": 3.695802819774457e-06, + "loss": 0.2499, + "step": 15413 + }, + { + "epoch": 0.722068674755235, + "grad_norm": 0.6312857993877196, + "learning_rate": 3.6956362684169885e-06, + "loss": 0.2955, + "step": 15414 + }, + { + "epoch": 0.7221155197451633, + "grad_norm": 0.5737147684365841, + "learning_rate": 3.695469710178933e-06, + "loss": 0.2709, + "step": 15415 + }, + { + "epoch": 0.7221623647350915, + "grad_norm": 0.642174948870714, + "learning_rate": 3.6953031450612505e-06, + "loss": 0.3026, + "step": 15416 + }, + { + "epoch": 0.7222092097250199, + "grad_norm": 0.5617374568190302, + "learning_rate": 3.695136573064898e-06, + "loss": 0.2646, + "step": 15417 + }, + { + "epoch": 0.7222560547149482, + "grad_norm": 0.6312463370572156, + "learning_rate": 3.6949699941908352e-06, + "loss": 0.2928, + "step": 15418 + }, + { + "epoch": 0.7223028997048766, + "grad_norm": 0.5800998985432761, + "learning_rate": 3.6948034084400208e-06, + "loss": 0.2685, + "step": 15419 + }, + { + "epoch": 0.7223497446948048, + "grad_norm": 0.5936287141631749, + "learning_rate": 3.6946368158134125e-06, + "loss": 0.2686, + "step": 15420 + }, + { + "epoch": 0.7223965896847332, + "grad_norm": 0.6048147886730878, + "learning_rate": 3.69447021631197e-06, + "loss": 0.2775, + "step": 15421 + }, + { + "epoch": 0.7224434346746615, + "grad_norm": 0.5397285854513507, + "learning_rate": 3.6943036099366518e-06, + "loss": 0.2637, + "step": 15422 + }, + { + "epoch": 0.7224902796645899, + "grad_norm": 0.5921326934012658, + "learning_rate": 3.694136996688416e-06, + "loss": 0.2735, + "step": 15423 + }, + { + "epoch": 0.7225371246545182, + "grad_norm": 0.6431203663236676, + "learning_rate": 3.693970376568222e-06, + "loss": 0.3058, + "step": 15424 + }, + { + "epoch": 0.7225839696444465, + "grad_norm": 0.6131278170520594, + "learning_rate": 3.6938037495770294e-06, + "loss": 0.2963, + "step": 15425 + }, + { + "epoch": 0.7226308146343748, + "grad_norm": 0.6072528180671879, + "learning_rate": 3.693637115715795e-06, + "loss": 0.2818, + "step": 15426 + }, + { + "epoch": 0.7226776596243032, + "grad_norm": 0.5983828216258411, + "learning_rate": 3.6934704749854798e-06, + "loss": 0.2712, + "step": 15427 + }, + { + "epoch": 0.7227245046142315, + "grad_norm": 0.5865592165900734, + "learning_rate": 3.693303827387042e-06, + "loss": 0.2812, + "step": 15428 + }, + { + "epoch": 0.7227713496041598, + "grad_norm": 0.5462995401329207, + "learning_rate": 3.6931371729214406e-06, + "loss": 0.2713, + "step": 15429 + }, + { + "epoch": 0.7228181945940881, + "grad_norm": 0.6330002239107608, + "learning_rate": 3.6929705115896344e-06, + "loss": 0.3101, + "step": 15430 + }, + { + "epoch": 0.7228650395840165, + "grad_norm": 0.5506568141559847, + "learning_rate": 3.692803843392583e-06, + "loss": 0.2627, + "step": 15431 + }, + { + "epoch": 0.7229118845739448, + "grad_norm": 0.6050653245378704, + "learning_rate": 3.6926371683312447e-06, + "loss": 0.3013, + "step": 15432 + }, + { + "epoch": 0.7229587295638732, + "grad_norm": 0.6104145011542117, + "learning_rate": 3.6924704864065797e-06, + "loss": 0.3198, + "step": 15433 + }, + { + "epoch": 0.7230055745538014, + "grad_norm": 0.5844610638740128, + "learning_rate": 3.6923037976195465e-06, + "loss": 0.2765, + "step": 15434 + }, + { + "epoch": 0.7230524195437298, + "grad_norm": 0.5776888533792494, + "learning_rate": 3.692137101971105e-06, + "loss": 0.2856, + "step": 15435 + }, + { + "epoch": 0.7230992645336581, + "grad_norm": 0.5609114699158654, + "learning_rate": 3.6919703994622134e-06, + "loss": 0.2701, + "step": 15436 + }, + { + "epoch": 0.7231461095235865, + "grad_norm": 0.5924648359305759, + "learning_rate": 3.691803690093833e-06, + "loss": 0.2771, + "step": 15437 + }, + { + "epoch": 0.7231929545135147, + "grad_norm": 0.5369021703155948, + "learning_rate": 3.691636973866921e-06, + "loss": 0.2632, + "step": 15438 + }, + { + "epoch": 0.7232397995034431, + "grad_norm": 0.6019035097924836, + "learning_rate": 3.691470250782438e-06, + "loss": 0.2978, + "step": 15439 + }, + { + "epoch": 0.7232866444933714, + "grad_norm": 0.5643319369491766, + "learning_rate": 3.6913035208413428e-06, + "loss": 0.286, + "step": 15440 + }, + { + "epoch": 0.7233334894832998, + "grad_norm": 0.6301002651098317, + "learning_rate": 3.6911367840445956e-06, + "loss": 0.288, + "step": 15441 + }, + { + "epoch": 0.7233803344732281, + "grad_norm": 0.5505720531240174, + "learning_rate": 3.690970040393156e-06, + "loss": 0.262, + "step": 15442 + }, + { + "epoch": 0.7234271794631564, + "grad_norm": 0.5512601280340458, + "learning_rate": 3.690803289887982e-06, + "loss": 0.268, + "step": 15443 + }, + { + "epoch": 0.7234740244530847, + "grad_norm": 0.58025362257908, + "learning_rate": 3.6906365325300354e-06, + "loss": 0.2847, + "step": 15444 + }, + { + "epoch": 0.7235208694430131, + "grad_norm": 0.6122054432380255, + "learning_rate": 3.690469768320274e-06, + "loss": 0.2667, + "step": 15445 + }, + { + "epoch": 0.7235677144329414, + "grad_norm": 0.6686853487998603, + "learning_rate": 3.6903029972596595e-06, + "loss": 0.3107, + "step": 15446 + }, + { + "epoch": 0.7236145594228697, + "grad_norm": 0.6116951687755973, + "learning_rate": 3.6901362193491498e-06, + "loss": 0.2936, + "step": 15447 + }, + { + "epoch": 0.723661404412798, + "grad_norm": 0.6119632958156224, + "learning_rate": 3.689969434589705e-06, + "loss": 0.2753, + "step": 15448 + }, + { + "epoch": 0.7237082494027264, + "grad_norm": 0.6152912881125794, + "learning_rate": 3.6898026429822853e-06, + "loss": 0.2999, + "step": 15449 + }, + { + "epoch": 0.7237550943926547, + "grad_norm": 0.5804086769221011, + "learning_rate": 3.689635844527851e-06, + "loss": 0.2819, + "step": 15450 + }, + { + "epoch": 0.7238019393825831, + "grad_norm": 0.6062598743265623, + "learning_rate": 3.68946903922736e-06, + "loss": 0.2808, + "step": 15451 + }, + { + "epoch": 0.7238487843725113, + "grad_norm": 0.5736521454271172, + "learning_rate": 3.689302227081775e-06, + "loss": 0.2737, + "step": 15452 + }, + { + "epoch": 0.7238956293624397, + "grad_norm": 0.6334048827503713, + "learning_rate": 3.6891354080920544e-06, + "loss": 0.2883, + "step": 15453 + }, + { + "epoch": 0.723942474352368, + "grad_norm": 0.5809559479887063, + "learning_rate": 3.6889685822591577e-06, + "loss": 0.2792, + "step": 15454 + }, + { + "epoch": 0.7239893193422964, + "grad_norm": 0.5975026445279441, + "learning_rate": 3.6888017495840458e-06, + "loss": 0.2732, + "step": 15455 + }, + { + "epoch": 0.7240361643322246, + "grad_norm": 0.5751098477574011, + "learning_rate": 3.68863491006768e-06, + "loss": 0.2904, + "step": 15456 + }, + { + "epoch": 0.724083009322153, + "grad_norm": 0.5814849976211438, + "learning_rate": 3.688468063711017e-06, + "loss": 0.272, + "step": 15457 + }, + { + "epoch": 0.7241298543120813, + "grad_norm": 0.5866211419515848, + "learning_rate": 3.68830121051502e-06, + "loss": 0.2773, + "step": 15458 + }, + { + "epoch": 0.7241766993020097, + "grad_norm": 0.5882957403935912, + "learning_rate": 3.688134350480648e-06, + "loss": 0.3032, + "step": 15459 + }, + { + "epoch": 0.724223544291938, + "grad_norm": 0.5384753929885576, + "learning_rate": 3.687967483608862e-06, + "loss": 0.2566, + "step": 15460 + }, + { + "epoch": 0.7242703892818663, + "grad_norm": 0.5767449958869412, + "learning_rate": 3.6878006099006214e-06, + "loss": 0.2613, + "step": 15461 + }, + { + "epoch": 0.7243172342717946, + "grad_norm": 0.5976058620620082, + "learning_rate": 3.687633729356887e-06, + "loss": 0.2789, + "step": 15462 + }, + { + "epoch": 0.724364079261723, + "grad_norm": 0.607677934158203, + "learning_rate": 3.6874668419786184e-06, + "loss": 0.3116, + "step": 15463 + }, + { + "epoch": 0.7244109242516513, + "grad_norm": 0.574373351490963, + "learning_rate": 3.687299947766777e-06, + "loss": 0.2783, + "step": 15464 + }, + { + "epoch": 0.7244577692415796, + "grad_norm": 0.5614354288749256, + "learning_rate": 3.687133046722323e-06, + "loss": 0.2518, + "step": 15465 + }, + { + "epoch": 0.7245046142315079, + "grad_norm": 0.5718352733400744, + "learning_rate": 3.686966138846216e-06, + "loss": 0.2727, + "step": 15466 + }, + { + "epoch": 0.7245514592214363, + "grad_norm": 0.6577304958464057, + "learning_rate": 3.686799224139418e-06, + "loss": 0.2918, + "step": 15467 + }, + { + "epoch": 0.7245983042113646, + "grad_norm": 0.6031223403167734, + "learning_rate": 3.6866323026028885e-06, + "loss": 0.2869, + "step": 15468 + }, + { + "epoch": 0.724645149201293, + "grad_norm": 0.5999625223897713, + "learning_rate": 3.686465374237588e-06, + "loss": 0.2747, + "step": 15469 + }, + { + "epoch": 0.7246919941912212, + "grad_norm": 0.6077057685922337, + "learning_rate": 3.686298439044478e-06, + "loss": 0.3005, + "step": 15470 + }, + { + "epoch": 0.7247388391811496, + "grad_norm": 0.595031010517552, + "learning_rate": 3.6861314970245186e-06, + "loss": 0.2606, + "step": 15471 + }, + { + "epoch": 0.7247856841710779, + "grad_norm": 0.5922121088527325, + "learning_rate": 3.68596454817867e-06, + "loss": 0.281, + "step": 15472 + }, + { + "epoch": 0.7248325291610063, + "grad_norm": 0.6203565712228079, + "learning_rate": 3.685797592507894e-06, + "loss": 0.3012, + "step": 15473 + }, + { + "epoch": 0.7248793741509345, + "grad_norm": 0.8832271406834357, + "learning_rate": 3.6856306300131513e-06, + "loss": 0.2801, + "step": 15474 + }, + { + "epoch": 0.7249262191408629, + "grad_norm": 0.6066429650316334, + "learning_rate": 3.685463660695401e-06, + "loss": 0.2843, + "step": 15475 + }, + { + "epoch": 0.7249730641307912, + "grad_norm": 0.6404691122003132, + "learning_rate": 3.685296684555606e-06, + "loss": 0.3098, + "step": 15476 + }, + { + "epoch": 0.7250199091207196, + "grad_norm": 0.5995747498314477, + "learning_rate": 3.6851297015947274e-06, + "loss": 0.2958, + "step": 15477 + }, + { + "epoch": 0.7250667541106479, + "grad_norm": 0.5949580485942253, + "learning_rate": 3.6849627118137236e-06, + "loss": 0.2944, + "step": 15478 + }, + { + "epoch": 0.7251135991005762, + "grad_norm": 0.5664229057683737, + "learning_rate": 3.6847957152135583e-06, + "loss": 0.2757, + "step": 15479 + }, + { + "epoch": 0.7251604440905045, + "grad_norm": 0.5975647776797899, + "learning_rate": 3.6846287117951906e-06, + "loss": 0.2935, + "step": 15480 + }, + { + "epoch": 0.7252072890804329, + "grad_norm": 0.5880597690916881, + "learning_rate": 3.6844617015595833e-06, + "loss": 0.2708, + "step": 15481 + }, + { + "epoch": 0.7252541340703612, + "grad_norm": 0.5831356459842192, + "learning_rate": 3.6842946845076965e-06, + "loss": 0.2904, + "step": 15482 + }, + { + "epoch": 0.7253009790602895, + "grad_norm": 0.611603279925659, + "learning_rate": 3.684127660640491e-06, + "loss": 0.2855, + "step": 15483 + }, + { + "epoch": 0.7253478240502178, + "grad_norm": 0.5268587399808539, + "learning_rate": 3.6839606299589294e-06, + "loss": 0.2601, + "step": 15484 + }, + { + "epoch": 0.7253946690401462, + "grad_norm": 0.6088974178267176, + "learning_rate": 3.6837935924639706e-06, + "loss": 0.2821, + "step": 15485 + }, + { + "epoch": 0.7254415140300745, + "grad_norm": 0.5960782851889784, + "learning_rate": 3.683626548156578e-06, + "loss": 0.253, + "step": 15486 + }, + { + "epoch": 0.7254883590200029, + "grad_norm": 0.6027222204762037, + "learning_rate": 3.6834594970377115e-06, + "loss": 0.2916, + "step": 15487 + }, + { + "epoch": 0.7255352040099311, + "grad_norm": 0.6094830937628812, + "learning_rate": 3.6832924391083336e-06, + "loss": 0.291, + "step": 15488 + }, + { + "epoch": 0.7255820489998595, + "grad_norm": 0.593350362770431, + "learning_rate": 3.683125374369405e-06, + "loss": 0.2937, + "step": 15489 + }, + { + "epoch": 0.7256288939897878, + "grad_norm": 0.6116883760988542, + "learning_rate": 3.6829583028218874e-06, + "loss": 0.2981, + "step": 15490 + }, + { + "epoch": 0.7256757389797162, + "grad_norm": 0.5715883736530245, + "learning_rate": 3.682791224466742e-06, + "loss": 0.242, + "step": 15491 + }, + { + "epoch": 0.7257225839696444, + "grad_norm": 0.5804065597078648, + "learning_rate": 3.6826241393049307e-06, + "loss": 0.2761, + "step": 15492 + }, + { + "epoch": 0.7257694289595727, + "grad_norm": 0.5786500076579809, + "learning_rate": 3.682457047337414e-06, + "loss": 0.262, + "step": 15493 + }, + { + "epoch": 0.7258162739495011, + "grad_norm": 0.6272954264154369, + "learning_rate": 3.682289948565154e-06, + "loss": 0.2824, + "step": 15494 + }, + { + "epoch": 0.7258631189394295, + "grad_norm": 0.5629921477046769, + "learning_rate": 3.6821228429891138e-06, + "loss": 0.2631, + "step": 15495 + }, + { + "epoch": 0.7259099639293578, + "grad_norm": 0.6075607073074559, + "learning_rate": 3.6819557306102527e-06, + "loss": 0.2781, + "step": 15496 + }, + { + "epoch": 0.725956808919286, + "grad_norm": 0.5871532074482578, + "learning_rate": 3.6817886114295333e-06, + "loss": 0.2859, + "step": 15497 + }, + { + "epoch": 0.7260036539092144, + "grad_norm": 0.5615007376424966, + "learning_rate": 3.681621485447918e-06, + "loss": 0.2734, + "step": 15498 + }, + { + "epoch": 0.7260504988991427, + "grad_norm": 0.6127718245878704, + "learning_rate": 3.6814543526663683e-06, + "loss": 0.3018, + "step": 15499 + }, + { + "epoch": 0.7260973438890711, + "grad_norm": 0.6487004548285662, + "learning_rate": 3.681287213085845e-06, + "loss": 0.2974, + "step": 15500 + }, + { + "epoch": 0.7261441888789993, + "grad_norm": 0.5794001547382539, + "learning_rate": 3.681120066707311e-06, + "loss": 0.2756, + "step": 15501 + }, + { + "epoch": 0.7261910338689277, + "grad_norm": 0.58902384680302, + "learning_rate": 3.6809529135317285e-06, + "loss": 0.251, + "step": 15502 + }, + { + "epoch": 0.726237878858856, + "grad_norm": 0.581292834634589, + "learning_rate": 3.6807857535600576e-06, + "loss": 0.281, + "step": 15503 + }, + { + "epoch": 0.7262847238487844, + "grad_norm": 0.6060627199519666, + "learning_rate": 3.6806185867932627e-06, + "loss": 0.2969, + "step": 15504 + }, + { + "epoch": 0.7263315688387127, + "grad_norm": 0.602071105137095, + "learning_rate": 3.6804514132323043e-06, + "loss": 0.3035, + "step": 15505 + }, + { + "epoch": 0.726378413828641, + "grad_norm": 0.5894114599246739, + "learning_rate": 3.680284232878144e-06, + "loss": 0.3059, + "step": 15506 + }, + { + "epoch": 0.7264252588185693, + "grad_norm": 0.6422078858039701, + "learning_rate": 3.6801170457317446e-06, + "loss": 0.2955, + "step": 15507 + }, + { + "epoch": 0.7264721038084977, + "grad_norm": 0.5676642732376688, + "learning_rate": 3.679949851794069e-06, + "loss": 0.2813, + "step": 15508 + }, + { + "epoch": 0.726518948798426, + "grad_norm": 0.5714081536200214, + "learning_rate": 3.6797826510660785e-06, + "loss": 0.265, + "step": 15509 + }, + { + "epoch": 0.7265657937883543, + "grad_norm": 0.592733854994677, + "learning_rate": 3.6796154435487347e-06, + "loss": 0.2999, + "step": 15510 + }, + { + "epoch": 0.7266126387782826, + "grad_norm": 0.5588195794195084, + "learning_rate": 3.6794482292430013e-06, + "loss": 0.2826, + "step": 15511 + }, + { + "epoch": 0.726659483768211, + "grad_norm": 0.5494140366089202, + "learning_rate": 3.679281008149839e-06, + "loss": 0.2613, + "step": 15512 + }, + { + "epoch": 0.7267063287581393, + "grad_norm": 0.5874080433429001, + "learning_rate": 3.6791137802702116e-06, + "loss": 0.2752, + "step": 15513 + }, + { + "epoch": 0.7267531737480677, + "grad_norm": 0.5222621647463291, + "learning_rate": 3.67894654560508e-06, + "loss": 0.2531, + "step": 15514 + }, + { + "epoch": 0.7268000187379959, + "grad_norm": 0.5796596051850346, + "learning_rate": 3.678779304155408e-06, + "loss": 0.2987, + "step": 15515 + }, + { + "epoch": 0.7268468637279243, + "grad_norm": 0.5659597076152028, + "learning_rate": 3.678612055922157e-06, + "loss": 0.2754, + "step": 15516 + }, + { + "epoch": 0.7268937087178526, + "grad_norm": 0.5598133532398141, + "learning_rate": 3.67844480090629e-06, + "loss": 0.2722, + "step": 15517 + }, + { + "epoch": 0.726940553707781, + "grad_norm": 0.5957699157858799, + "learning_rate": 3.6782775391087687e-06, + "loss": 0.2512, + "step": 15518 + }, + { + "epoch": 0.7269873986977092, + "grad_norm": 0.5568818297482953, + "learning_rate": 3.6781102705305575e-06, + "loss": 0.271, + "step": 15519 + }, + { + "epoch": 0.7270342436876376, + "grad_norm": 0.5511828724118049, + "learning_rate": 3.6779429951726175e-06, + "loss": 0.28, + "step": 15520 + }, + { + "epoch": 0.7270810886775659, + "grad_norm": 0.540980459851557, + "learning_rate": 3.677775713035911e-06, + "loss": 0.289, + "step": 15521 + }, + { + "epoch": 0.7271279336674943, + "grad_norm": 0.5671252679095573, + "learning_rate": 3.677608424121402e-06, + "loss": 0.2987, + "step": 15522 + }, + { + "epoch": 0.7271747786574226, + "grad_norm": 0.5725673518957445, + "learning_rate": 3.6774411284300524e-06, + "loss": 0.2599, + "step": 15523 + }, + { + "epoch": 0.7272216236473509, + "grad_norm": 0.5147999668221954, + "learning_rate": 3.677273825962824e-06, + "loss": 0.2684, + "step": 15524 + }, + { + "epoch": 0.7272684686372792, + "grad_norm": 0.6032444461494302, + "learning_rate": 3.6771065167206817e-06, + "loss": 0.2861, + "step": 15525 + }, + { + "epoch": 0.7273153136272076, + "grad_norm": 0.5899478251739613, + "learning_rate": 3.676939200704588e-06, + "loss": 0.274, + "step": 15526 + }, + { + "epoch": 0.7273621586171359, + "grad_norm": 0.5841488107871555, + "learning_rate": 3.6767718779155037e-06, + "loss": 0.2767, + "step": 15527 + }, + { + "epoch": 0.7274090036070642, + "grad_norm": 0.6099056901602884, + "learning_rate": 3.6766045483543927e-06, + "loss": 0.2735, + "step": 15528 + }, + { + "epoch": 0.7274558485969925, + "grad_norm": 0.6181190354545081, + "learning_rate": 3.6764372120222193e-06, + "loss": 0.2662, + "step": 15529 + }, + { + "epoch": 0.7275026935869209, + "grad_norm": 0.5982245001981042, + "learning_rate": 3.676269868919945e-06, + "loss": 0.283, + "step": 15530 + }, + { + "epoch": 0.7275495385768492, + "grad_norm": 0.5591772781364512, + "learning_rate": 3.6761025190485335e-06, + "loss": 0.2932, + "step": 15531 + }, + { + "epoch": 0.7275963835667776, + "grad_norm": 0.5838490366596728, + "learning_rate": 3.6759351624089468e-06, + "loss": 0.2765, + "step": 15532 + }, + { + "epoch": 0.7276432285567058, + "grad_norm": 0.5897640877250954, + "learning_rate": 3.67576779900215e-06, + "loss": 0.3038, + "step": 15533 + }, + { + "epoch": 0.7276900735466342, + "grad_norm": 0.5578925842382623, + "learning_rate": 3.6756004288291037e-06, + "loss": 0.2745, + "step": 15534 + }, + { + "epoch": 0.7277369185365625, + "grad_norm": 0.6045365000876075, + "learning_rate": 3.6754330518907734e-06, + "loss": 0.2722, + "step": 15535 + }, + { + "epoch": 0.7277837635264909, + "grad_norm": 0.5626134839197056, + "learning_rate": 3.675265668188121e-06, + "loss": 0.2703, + "step": 15536 + }, + { + "epoch": 0.7278306085164191, + "grad_norm": 0.576834778825943, + "learning_rate": 3.67509827772211e-06, + "loss": 0.2802, + "step": 15537 + }, + { + "epoch": 0.7278774535063475, + "grad_norm": 0.6099282161787613, + "learning_rate": 3.6749308804937045e-06, + "loss": 0.2696, + "step": 15538 + }, + { + "epoch": 0.7279242984962758, + "grad_norm": 0.5202615103422928, + "learning_rate": 3.674763476503866e-06, + "loss": 0.2396, + "step": 15539 + }, + { + "epoch": 0.7279711434862042, + "grad_norm": 0.5686189365754977, + "learning_rate": 3.6745960657535594e-06, + "loss": 0.2814, + "step": 15540 + }, + { + "epoch": 0.7280179884761325, + "grad_norm": 0.5918727020477801, + "learning_rate": 3.674428648243748e-06, + "loss": 0.2841, + "step": 15541 + }, + { + "epoch": 0.7280648334660608, + "grad_norm": 0.5872335080279856, + "learning_rate": 3.6742612239753946e-06, + "loss": 0.2946, + "step": 15542 + }, + { + "epoch": 0.7281116784559891, + "grad_norm": 0.6439346562110765, + "learning_rate": 3.674093792949463e-06, + "loss": 0.2832, + "step": 15543 + }, + { + "epoch": 0.7281585234459175, + "grad_norm": 0.5357413818631085, + "learning_rate": 3.673926355166917e-06, + "loss": 0.2703, + "step": 15544 + }, + { + "epoch": 0.7282053684358458, + "grad_norm": 0.5702886227407562, + "learning_rate": 3.673758910628719e-06, + "loss": 0.286, + "step": 15545 + }, + { + "epoch": 0.7282522134257741, + "grad_norm": 0.6284501045115863, + "learning_rate": 3.6735914593358336e-06, + "loss": 0.296, + "step": 15546 + }, + { + "epoch": 0.7282990584157024, + "grad_norm": 0.5720923933109681, + "learning_rate": 3.6734240012892257e-06, + "loss": 0.2731, + "step": 15547 + }, + { + "epoch": 0.7283459034056308, + "grad_norm": 0.57485425846072, + "learning_rate": 3.6732565364898565e-06, + "loss": 0.2895, + "step": 15548 + }, + { + "epoch": 0.7283927483955591, + "grad_norm": 0.6057784847320772, + "learning_rate": 3.673089064938691e-06, + "loss": 0.2773, + "step": 15549 + }, + { + "epoch": 0.7284395933854875, + "grad_norm": 0.6120616036847143, + "learning_rate": 3.6729215866366924e-06, + "loss": 0.2614, + "step": 15550 + }, + { + "epoch": 0.7284864383754157, + "grad_norm": 0.5956118144959318, + "learning_rate": 3.6727541015848255e-06, + "loss": 0.2716, + "step": 15551 + }, + { + "epoch": 0.7285332833653441, + "grad_norm": 0.5174430412445725, + "learning_rate": 3.672586609784053e-06, + "loss": 0.2552, + "step": 15552 + }, + { + "epoch": 0.7285801283552724, + "grad_norm": 0.5737635262525983, + "learning_rate": 3.67241911123534e-06, + "loss": 0.277, + "step": 15553 + }, + { + "epoch": 0.7286269733452008, + "grad_norm": 0.6229380990479704, + "learning_rate": 3.6722516059396497e-06, + "loss": 0.3021, + "step": 15554 + }, + { + "epoch": 0.728673818335129, + "grad_norm": 0.581307253956241, + "learning_rate": 3.6720840938979445e-06, + "loss": 0.2821, + "step": 15555 + }, + { + "epoch": 0.7287206633250574, + "grad_norm": 0.5654593891864805, + "learning_rate": 3.6719165751111917e-06, + "loss": 0.2961, + "step": 15556 + }, + { + "epoch": 0.7287675083149857, + "grad_norm": 0.5999403636093934, + "learning_rate": 3.671749049580352e-06, + "loss": 0.2731, + "step": 15557 + }, + { + "epoch": 0.7288143533049141, + "grad_norm": 0.6473346860176401, + "learning_rate": 3.6715815173063925e-06, + "loss": 0.3186, + "step": 15558 + }, + { + "epoch": 0.7288611982948424, + "grad_norm": 0.5778582791878055, + "learning_rate": 3.6714139782902754e-06, + "loss": 0.2773, + "step": 15559 + }, + { + "epoch": 0.7289080432847707, + "grad_norm": 0.5805135439456574, + "learning_rate": 3.671246432532965e-06, + "loss": 0.2703, + "step": 15560 + }, + { + "epoch": 0.728954888274699, + "grad_norm": 0.5675369041870558, + "learning_rate": 3.671078880035426e-06, + "loss": 0.2624, + "step": 15561 + }, + { + "epoch": 0.7290017332646274, + "grad_norm": 0.5710426431128868, + "learning_rate": 3.6709113207986223e-06, + "loss": 0.2589, + "step": 15562 + }, + { + "epoch": 0.7290485782545557, + "grad_norm": 0.5659200195317393, + "learning_rate": 3.670743754823518e-06, + "loss": 0.2873, + "step": 15563 + }, + { + "epoch": 0.729095423244484, + "grad_norm": 0.6166956207234701, + "learning_rate": 3.6705761821110776e-06, + "loss": 0.2918, + "step": 15564 + }, + { + "epoch": 0.7291422682344123, + "grad_norm": 0.5629138861658539, + "learning_rate": 3.6704086026622664e-06, + "loss": 0.2771, + "step": 15565 + }, + { + "epoch": 0.7291891132243407, + "grad_norm": 0.6213482147388031, + "learning_rate": 3.6702410164780465e-06, + "loss": 0.2954, + "step": 15566 + }, + { + "epoch": 0.729235958214269, + "grad_norm": 0.6282611752125535, + "learning_rate": 3.6700734235593844e-06, + "loss": 0.2818, + "step": 15567 + }, + { + "epoch": 0.7292828032041974, + "grad_norm": 0.5804103344017182, + "learning_rate": 3.6699058239072437e-06, + "loss": 0.2818, + "step": 15568 + }, + { + "epoch": 0.7293296481941256, + "grad_norm": 0.5761602460027283, + "learning_rate": 3.66973821752259e-06, + "loss": 0.2954, + "step": 15569 + }, + { + "epoch": 0.729376493184054, + "grad_norm": 0.6310335463560603, + "learning_rate": 3.669570604406385e-06, + "loss": 0.2744, + "step": 15570 + }, + { + "epoch": 0.7294233381739823, + "grad_norm": 0.5738843980402693, + "learning_rate": 3.6694029845595968e-06, + "loss": 0.2746, + "step": 15571 + }, + { + "epoch": 0.7294701831639107, + "grad_norm": 0.5698594889225729, + "learning_rate": 3.6692353579831873e-06, + "loss": 0.2857, + "step": 15572 + }, + { + "epoch": 0.7295170281538389, + "grad_norm": 0.5772642831265431, + "learning_rate": 3.6690677246781227e-06, + "loss": 0.2709, + "step": 15573 + }, + { + "epoch": 0.7295638731437672, + "grad_norm": 0.5451168119088307, + "learning_rate": 3.668900084645367e-06, + "loss": 0.2751, + "step": 15574 + }, + { + "epoch": 0.7296107181336956, + "grad_norm": 0.5342151382178356, + "learning_rate": 3.668732437885886e-06, + "loss": 0.2561, + "step": 15575 + }, + { + "epoch": 0.729657563123624, + "grad_norm": 0.624717784617333, + "learning_rate": 3.6685647844006424e-06, + "loss": 0.2936, + "step": 15576 + }, + { + "epoch": 0.7297044081135523, + "grad_norm": 0.6087763095611494, + "learning_rate": 3.668397124190602e-06, + "loss": 0.286, + "step": 15577 + }, + { + "epoch": 0.7297512531034805, + "grad_norm": 0.617908214343186, + "learning_rate": 3.66822945725673e-06, + "loss": 0.2877, + "step": 15578 + }, + { + "epoch": 0.7297980980934089, + "grad_norm": 0.5658829339419921, + "learning_rate": 3.668061783599992e-06, + "loss": 0.2676, + "step": 15579 + }, + { + "epoch": 0.7298449430833372, + "grad_norm": 0.6088247741666504, + "learning_rate": 3.6678941032213513e-06, + "loss": 0.295, + "step": 15580 + }, + { + "epoch": 0.7298917880732656, + "grad_norm": 0.6210198608118689, + "learning_rate": 3.6677264161217734e-06, + "loss": 0.296, + "step": 15581 + }, + { + "epoch": 0.7299386330631938, + "grad_norm": 0.5525954295765263, + "learning_rate": 3.667558722302224e-06, + "loss": 0.2745, + "step": 15582 + }, + { + "epoch": 0.7299854780531222, + "grad_norm": 0.6005188279202314, + "learning_rate": 3.6673910217636673e-06, + "loss": 0.2727, + "step": 15583 + }, + { + "epoch": 0.7300323230430505, + "grad_norm": 0.5444553536974548, + "learning_rate": 3.667223314507069e-06, + "loss": 0.2635, + "step": 15584 + }, + { + "epoch": 0.7300791680329789, + "grad_norm": 0.6369300958519811, + "learning_rate": 3.6670556005333935e-06, + "loss": 0.2677, + "step": 15585 + }, + { + "epoch": 0.7301260130229072, + "grad_norm": 0.6071681313710622, + "learning_rate": 3.6668878798436065e-06, + "loss": 0.3003, + "step": 15586 + }, + { + "epoch": 0.7301728580128355, + "grad_norm": 0.5628681401202682, + "learning_rate": 3.666720152438673e-06, + "loss": 0.2652, + "step": 15587 + }, + { + "epoch": 0.7302197030027638, + "grad_norm": 0.5962567502798927, + "learning_rate": 3.666552418319558e-06, + "loss": 0.2729, + "step": 15588 + }, + { + "epoch": 0.7302665479926922, + "grad_norm": 0.5326223305827046, + "learning_rate": 3.666384677487228e-06, + "loss": 0.2732, + "step": 15589 + }, + { + "epoch": 0.7303133929826205, + "grad_norm": 0.5925217073940273, + "learning_rate": 3.6662169299426465e-06, + "loss": 0.2737, + "step": 15590 + }, + { + "epoch": 0.7303602379725488, + "grad_norm": 0.618362929689589, + "learning_rate": 3.66604917568678e-06, + "loss": 0.2878, + "step": 15591 + }, + { + "epoch": 0.7304070829624771, + "grad_norm": 0.566577836608685, + "learning_rate": 3.6658814147205934e-06, + "loss": 0.255, + "step": 15592 + }, + { + "epoch": 0.7304539279524055, + "grad_norm": 0.5406295801776353, + "learning_rate": 3.665713647045053e-06, + "loss": 0.2553, + "step": 15593 + }, + { + "epoch": 0.7305007729423338, + "grad_norm": 0.60050302899239, + "learning_rate": 3.665545872661122e-06, + "loss": 0.2983, + "step": 15594 + }, + { + "epoch": 0.7305476179322622, + "grad_norm": 0.5664837804343474, + "learning_rate": 3.665378091569768e-06, + "loss": 0.2835, + "step": 15595 + }, + { + "epoch": 0.7305944629221904, + "grad_norm": 0.5957985785505693, + "learning_rate": 3.6652103037719573e-06, + "loss": 0.2827, + "step": 15596 + }, + { + "epoch": 0.7306413079121188, + "grad_norm": 0.5688110911721682, + "learning_rate": 3.6650425092686526e-06, + "loss": 0.2799, + "step": 15597 + }, + { + "epoch": 0.7306881529020471, + "grad_norm": 0.5741547762167069, + "learning_rate": 3.6648747080608216e-06, + "loss": 0.2686, + "step": 15598 + }, + { + "epoch": 0.7307349978919755, + "grad_norm": 0.5901113588852239, + "learning_rate": 3.6647069001494296e-06, + "loss": 0.2899, + "step": 15599 + }, + { + "epoch": 0.7307818428819037, + "grad_norm": 0.5785797056046011, + "learning_rate": 3.664539085535442e-06, + "loss": 0.2904, + "step": 15600 + }, + { + "epoch": 0.7308286878718321, + "grad_norm": 0.6055372010089838, + "learning_rate": 3.664371264219825e-06, + "loss": 0.2968, + "step": 15601 + }, + { + "epoch": 0.7308755328617604, + "grad_norm": 0.570582303841337, + "learning_rate": 3.6642034362035435e-06, + "loss": 0.2876, + "step": 15602 + }, + { + "epoch": 0.7309223778516888, + "grad_norm": 0.5706412784364797, + "learning_rate": 3.6640356014875643e-06, + "loss": 0.2902, + "step": 15603 + }, + { + "epoch": 0.7309692228416171, + "grad_norm": 0.5962258384886584, + "learning_rate": 3.663867760072852e-06, + "loss": 0.2763, + "step": 15604 + }, + { + "epoch": 0.7310160678315454, + "grad_norm": 0.5666221446317051, + "learning_rate": 3.6636999119603744e-06, + "loss": 0.272, + "step": 15605 + }, + { + "epoch": 0.7310629128214737, + "grad_norm": 0.5284274863446039, + "learning_rate": 3.6635320571510953e-06, + "loss": 0.2509, + "step": 15606 + }, + { + "epoch": 0.7311097578114021, + "grad_norm": 0.6173866814842714, + "learning_rate": 3.663364195645982e-06, + "loss": 0.3099, + "step": 15607 + }, + { + "epoch": 0.7311566028013304, + "grad_norm": 0.651934878807431, + "learning_rate": 3.6631963274460004e-06, + "loss": 0.2947, + "step": 15608 + }, + { + "epoch": 0.7312034477912587, + "grad_norm": 0.5403994210898152, + "learning_rate": 3.6630284525521158e-06, + "loss": 0.2672, + "step": 15609 + }, + { + "epoch": 0.731250292781187, + "grad_norm": 0.5467292898345684, + "learning_rate": 3.6628605709652953e-06, + "loss": 0.2753, + "step": 15610 + }, + { + "epoch": 0.7312971377711154, + "grad_norm": 0.5631682705273137, + "learning_rate": 3.6626926826865046e-06, + "loss": 0.2816, + "step": 15611 + }, + { + "epoch": 0.7313439827610437, + "grad_norm": 0.5718832994817091, + "learning_rate": 3.662524787716709e-06, + "loss": 0.2786, + "step": 15612 + }, + { + "epoch": 0.7313908277509721, + "grad_norm": 0.6017803095888766, + "learning_rate": 3.6623568860568757e-06, + "loss": 0.2553, + "step": 15613 + }, + { + "epoch": 0.7314376727409003, + "grad_norm": 0.611587081146794, + "learning_rate": 3.6621889777079717e-06, + "loss": 0.2975, + "step": 15614 + }, + { + "epoch": 0.7314845177308287, + "grad_norm": 0.5952758736998592, + "learning_rate": 3.662021062670961e-06, + "loss": 0.2838, + "step": 15615 + }, + { + "epoch": 0.731531362720757, + "grad_norm": 0.6129572499045582, + "learning_rate": 3.661853140946811e-06, + "loss": 0.3196, + "step": 15616 + }, + { + "epoch": 0.7315782077106854, + "grad_norm": 0.5476931949665881, + "learning_rate": 3.6616852125364892e-06, + "loss": 0.2528, + "step": 15617 + }, + { + "epoch": 0.7316250527006136, + "grad_norm": 0.5654032198827296, + "learning_rate": 3.66151727744096e-06, + "loss": 0.289, + "step": 15618 + }, + { + "epoch": 0.731671897690542, + "grad_norm": 0.5305366181043428, + "learning_rate": 3.6613493356611913e-06, + "loss": 0.2655, + "step": 15619 + }, + { + "epoch": 0.7317187426804703, + "grad_norm": 0.5591207328004464, + "learning_rate": 3.6611813871981495e-06, + "loss": 0.2715, + "step": 15620 + }, + { + "epoch": 0.7317655876703987, + "grad_norm": 0.5806614246135812, + "learning_rate": 3.6610134320528e-06, + "loss": 0.2847, + "step": 15621 + }, + { + "epoch": 0.731812432660327, + "grad_norm": 0.6148009894241196, + "learning_rate": 3.6608454702261097e-06, + "loss": 0.2735, + "step": 15622 + }, + { + "epoch": 0.7318592776502553, + "grad_norm": 0.6206094672224386, + "learning_rate": 3.6606775017190467e-06, + "loss": 0.3036, + "step": 15623 + }, + { + "epoch": 0.7319061226401836, + "grad_norm": 0.5666954846704794, + "learning_rate": 3.6605095265325758e-06, + "loss": 0.2819, + "step": 15624 + }, + { + "epoch": 0.731952967630112, + "grad_norm": 0.5803364283099641, + "learning_rate": 3.660341544667664e-06, + "loss": 0.2757, + "step": 15625 + }, + { + "epoch": 0.7319998126200403, + "grad_norm": 0.5548760103593336, + "learning_rate": 3.660173556125278e-06, + "loss": 0.2798, + "step": 15626 + }, + { + "epoch": 0.7320466576099686, + "grad_norm": 0.5855455270153235, + "learning_rate": 3.6600055609063858e-06, + "loss": 0.2631, + "step": 15627 + }, + { + "epoch": 0.7320935025998969, + "grad_norm": 0.5892242713884744, + "learning_rate": 3.659837559011952e-06, + "loss": 0.2655, + "step": 15628 + }, + { + "epoch": 0.7321403475898253, + "grad_norm": 0.5697530757551891, + "learning_rate": 3.659669550442946e-06, + "loss": 0.2657, + "step": 15629 + }, + { + "epoch": 0.7321871925797536, + "grad_norm": 0.6150388803314362, + "learning_rate": 3.6595015352003314e-06, + "loss": 0.2794, + "step": 15630 + }, + { + "epoch": 0.732234037569682, + "grad_norm": 0.5990074647483127, + "learning_rate": 3.6593335132850784e-06, + "loss": 0.2803, + "step": 15631 + }, + { + "epoch": 0.7322808825596102, + "grad_norm": 0.6279473788712869, + "learning_rate": 3.659165484698152e-06, + "loss": 0.2814, + "step": 15632 + }, + { + "epoch": 0.7323277275495386, + "grad_norm": 0.5919965256573564, + "learning_rate": 3.658997449440519e-06, + "loss": 0.2854, + "step": 15633 + }, + { + "epoch": 0.7323745725394669, + "grad_norm": 0.6003160381290421, + "learning_rate": 3.658829407513147e-06, + "loss": 0.299, + "step": 15634 + }, + { + "epoch": 0.7324214175293953, + "grad_norm": 0.5834608689920352, + "learning_rate": 3.6586613589170047e-06, + "loss": 0.2971, + "step": 15635 + }, + { + "epoch": 0.7324682625193235, + "grad_norm": 0.6346010249065486, + "learning_rate": 3.6584933036530558e-06, + "loss": 0.2673, + "step": 15636 + }, + { + "epoch": 0.7325151075092519, + "grad_norm": 0.6640195489360615, + "learning_rate": 3.6583252417222694e-06, + "loss": 0.2759, + "step": 15637 + }, + { + "epoch": 0.7325619524991802, + "grad_norm": 0.6233003201333387, + "learning_rate": 3.658157173125612e-06, + "loss": 0.292, + "step": 15638 + }, + { + "epoch": 0.7326087974891086, + "grad_norm": 0.5988250814228646, + "learning_rate": 3.657989097864052e-06, + "loss": 0.2994, + "step": 15639 + }, + { + "epoch": 0.7326556424790369, + "grad_norm": 0.5757564892470767, + "learning_rate": 3.6578210159385553e-06, + "loss": 0.2729, + "step": 15640 + }, + { + "epoch": 0.7327024874689652, + "grad_norm": 0.5928218291852416, + "learning_rate": 3.65765292735009e-06, + "loss": 0.27, + "step": 15641 + }, + { + "epoch": 0.7327493324588935, + "grad_norm": 0.6188305146808903, + "learning_rate": 3.657484832099623e-06, + "loss": 0.2603, + "step": 15642 + }, + { + "epoch": 0.7327961774488219, + "grad_norm": 0.6291835527246996, + "learning_rate": 3.6573167301881214e-06, + "loss": 0.29, + "step": 15643 + }, + { + "epoch": 0.7328430224387502, + "grad_norm": 0.5827908426966866, + "learning_rate": 3.657148621616553e-06, + "loss": 0.2893, + "step": 15644 + }, + { + "epoch": 0.7328898674286785, + "grad_norm": 0.5873443401426143, + "learning_rate": 3.6569805063858854e-06, + "loss": 0.2786, + "step": 15645 + }, + { + "epoch": 0.7329367124186068, + "grad_norm": 0.5606420394687924, + "learning_rate": 3.6568123844970854e-06, + "loss": 0.2684, + "step": 15646 + }, + { + "epoch": 0.7329835574085352, + "grad_norm": 0.6246456631187524, + "learning_rate": 3.656644255951121e-06, + "loss": 0.3073, + "step": 15647 + }, + { + "epoch": 0.7330304023984635, + "grad_norm": 0.6300884080099997, + "learning_rate": 3.6564761207489595e-06, + "loss": 0.2822, + "step": 15648 + }, + { + "epoch": 0.7330772473883919, + "grad_norm": 0.5754469221387148, + "learning_rate": 3.6563079788915695e-06, + "loss": 0.2874, + "step": 15649 + }, + { + "epoch": 0.7331240923783201, + "grad_norm": 0.67212484009479, + "learning_rate": 3.6561398303799166e-06, + "loss": 0.2853, + "step": 15650 + }, + { + "epoch": 0.7331709373682485, + "grad_norm": 0.6277354587922814, + "learning_rate": 3.65597167521497e-06, + "loss": 0.3091, + "step": 15651 + }, + { + "epoch": 0.7332177823581768, + "grad_norm": 0.6310527005082966, + "learning_rate": 3.655803513397697e-06, + "loss": 0.2903, + "step": 15652 + }, + { + "epoch": 0.7332646273481052, + "grad_norm": 0.584832182811863, + "learning_rate": 3.6556353449290648e-06, + "loss": 0.2881, + "step": 15653 + }, + { + "epoch": 0.7333114723380334, + "grad_norm": 0.5973995302758422, + "learning_rate": 3.6554671698100424e-06, + "loss": 0.2709, + "step": 15654 + }, + { + "epoch": 0.7333583173279618, + "grad_norm": 0.6394266919164335, + "learning_rate": 3.655298988041596e-06, + "loss": 0.3007, + "step": 15655 + }, + { + "epoch": 0.7334051623178901, + "grad_norm": 0.5443137115530112, + "learning_rate": 3.6551307996246952e-06, + "loss": 0.2618, + "step": 15656 + }, + { + "epoch": 0.7334520073078185, + "grad_norm": 0.5997380790741801, + "learning_rate": 3.6549626045603072e-06, + "loss": 0.2817, + "step": 15657 + }, + { + "epoch": 0.7334988522977468, + "grad_norm": 0.6717500281277564, + "learning_rate": 3.6547944028493987e-06, + "loss": 0.2846, + "step": 15658 + }, + { + "epoch": 0.733545697287675, + "grad_norm": 0.6306437654734717, + "learning_rate": 3.654626194492939e-06, + "loss": 0.2917, + "step": 15659 + }, + { + "epoch": 0.7335925422776034, + "grad_norm": 0.5396312478692821, + "learning_rate": 3.6544579794918962e-06, + "loss": 0.2564, + "step": 15660 + }, + { + "epoch": 0.7336393872675318, + "grad_norm": 0.5710655647534864, + "learning_rate": 3.654289757847237e-06, + "loss": 0.2856, + "step": 15661 + }, + { + "epoch": 0.7336862322574601, + "grad_norm": 0.5850176585282526, + "learning_rate": 3.6541215295599306e-06, + "loss": 0.2806, + "step": 15662 + }, + { + "epoch": 0.7337330772473883, + "grad_norm": 0.620925007667811, + "learning_rate": 3.653953294630946e-06, + "loss": 0.2884, + "step": 15663 + }, + { + "epoch": 0.7337799222373167, + "grad_norm": 0.5537587360946435, + "learning_rate": 3.6537850530612485e-06, + "loss": 0.2659, + "step": 15664 + }, + { + "epoch": 0.733826767227245, + "grad_norm": 0.5814680118053103, + "learning_rate": 3.653616804851809e-06, + "loss": 0.2789, + "step": 15665 + }, + { + "epoch": 0.7338736122171734, + "grad_norm": 0.5701461765841119, + "learning_rate": 3.653448550003595e-06, + "loss": 0.2764, + "step": 15666 + }, + { + "epoch": 0.7339204572071018, + "grad_norm": 0.5887824485185927, + "learning_rate": 3.653280288517574e-06, + "loss": 0.2887, + "step": 15667 + }, + { + "epoch": 0.73396730219703, + "grad_norm": 0.554388980576395, + "learning_rate": 3.6531120203947146e-06, + "loss": 0.2718, + "step": 15668 + }, + { + "epoch": 0.7340141471869583, + "grad_norm": 0.7001217171914621, + "learning_rate": 3.6529437456359858e-06, + "loss": 0.2745, + "step": 15669 + }, + { + "epoch": 0.7340609921768867, + "grad_norm": 0.6024255455173861, + "learning_rate": 3.652775464242355e-06, + "loss": 0.2708, + "step": 15670 + }, + { + "epoch": 0.734107837166815, + "grad_norm": 0.6024029441034858, + "learning_rate": 3.652607176214792e-06, + "loss": 0.2654, + "step": 15671 + }, + { + "epoch": 0.7341546821567433, + "grad_norm": 0.6096724391390725, + "learning_rate": 3.652438881554264e-06, + "loss": 0.3006, + "step": 15672 + }, + { + "epoch": 0.7342015271466716, + "grad_norm": 0.5932189247699415, + "learning_rate": 3.6522705802617397e-06, + "loss": 0.2816, + "step": 15673 + }, + { + "epoch": 0.7342483721366, + "grad_norm": 0.627337621807581, + "learning_rate": 3.6521022723381876e-06, + "loss": 0.288, + "step": 15674 + }, + { + "epoch": 0.7342952171265283, + "grad_norm": 0.5968658573796344, + "learning_rate": 3.651933957784577e-06, + "loss": 0.2851, + "step": 15675 + }, + { + "epoch": 0.7343420621164567, + "grad_norm": 0.5834191419384456, + "learning_rate": 3.6517656366018756e-06, + "loss": 0.2666, + "step": 15676 + }, + { + "epoch": 0.7343889071063849, + "grad_norm": 0.6010642666396951, + "learning_rate": 3.651597308791053e-06, + "loss": 0.282, + "step": 15677 + }, + { + "epoch": 0.7344357520963133, + "grad_norm": 0.5837467019013617, + "learning_rate": 3.6514289743530774e-06, + "loss": 0.2794, + "step": 15678 + }, + { + "epoch": 0.7344825970862416, + "grad_norm": 0.5631916958454699, + "learning_rate": 3.6512606332889165e-06, + "loss": 0.2613, + "step": 15679 + }, + { + "epoch": 0.73452944207617, + "grad_norm": 0.6434373349034309, + "learning_rate": 3.6510922855995404e-06, + "loss": 0.2998, + "step": 15680 + }, + { + "epoch": 0.7345762870660982, + "grad_norm": 0.5786148099120046, + "learning_rate": 3.650923931285918e-06, + "loss": 0.2707, + "step": 15681 + }, + { + "epoch": 0.7346231320560266, + "grad_norm": 0.6413840079043394, + "learning_rate": 3.650755570349017e-06, + "loss": 0.2963, + "step": 15682 + }, + { + "epoch": 0.7346699770459549, + "grad_norm": 0.6292248101228549, + "learning_rate": 3.6505872027898076e-06, + "loss": 0.2876, + "step": 15683 + }, + { + "epoch": 0.7347168220358833, + "grad_norm": 0.5830909446690449, + "learning_rate": 3.650418828609258e-06, + "loss": 0.2677, + "step": 15684 + }, + { + "epoch": 0.7347636670258116, + "grad_norm": 0.5768363738469837, + "learning_rate": 3.6502504478083366e-06, + "loss": 0.2812, + "step": 15685 + }, + { + "epoch": 0.7348105120157399, + "grad_norm": 0.5836638854061921, + "learning_rate": 3.650082060388013e-06, + "loss": 0.2871, + "step": 15686 + }, + { + "epoch": 0.7348573570056682, + "grad_norm": 0.5641597009905128, + "learning_rate": 3.6499136663492573e-06, + "loss": 0.2493, + "step": 15687 + }, + { + "epoch": 0.7349042019955966, + "grad_norm": 0.6322362010697886, + "learning_rate": 3.649745265693036e-06, + "loss": 0.2912, + "step": 15688 + }, + { + "epoch": 0.7349510469855249, + "grad_norm": 0.58679090137016, + "learning_rate": 3.64957685842032e-06, + "loss": 0.283, + "step": 15689 + }, + { + "epoch": 0.7349978919754532, + "grad_norm": 0.5951174503793277, + "learning_rate": 3.649408444532079e-06, + "loss": 0.2731, + "step": 15690 + }, + { + "epoch": 0.7350447369653815, + "grad_norm": 0.5947869611589033, + "learning_rate": 3.6492400240292807e-06, + "loss": 0.2679, + "step": 15691 + }, + { + "epoch": 0.7350915819553099, + "grad_norm": 0.5829893420855051, + "learning_rate": 3.6490715969128942e-06, + "loss": 0.2718, + "step": 15692 + }, + { + "epoch": 0.7351384269452382, + "grad_norm": 0.6467127892836966, + "learning_rate": 3.6489031631838905e-06, + "loss": 0.2881, + "step": 15693 + }, + { + "epoch": 0.7351852719351666, + "grad_norm": 0.5659646423460378, + "learning_rate": 3.648734722843238e-06, + "loss": 0.2776, + "step": 15694 + }, + { + "epoch": 0.7352321169250948, + "grad_norm": 0.6243390939471443, + "learning_rate": 3.6485662758919048e-06, + "loss": 0.304, + "step": 15695 + }, + { + "epoch": 0.7352789619150232, + "grad_norm": 0.6143641243437279, + "learning_rate": 3.6483978223308625e-06, + "loss": 0.2974, + "step": 15696 + }, + { + "epoch": 0.7353258069049515, + "grad_norm": 0.6342802333879178, + "learning_rate": 3.6482293621610786e-06, + "loss": 0.2824, + "step": 15697 + }, + { + "epoch": 0.7353726518948799, + "grad_norm": 0.585445893890738, + "learning_rate": 3.648060895383524e-06, + "loss": 0.2694, + "step": 15698 + }, + { + "epoch": 0.7354194968848081, + "grad_norm": 0.609621831767907, + "learning_rate": 3.6478924219991675e-06, + "loss": 0.2886, + "step": 15699 + }, + { + "epoch": 0.7354663418747365, + "grad_norm": 0.5254058561561737, + "learning_rate": 3.6477239420089778e-06, + "loss": 0.2711, + "step": 15700 + }, + { + "epoch": 0.7355131868646648, + "grad_norm": 0.5697577093055725, + "learning_rate": 3.647555455413926e-06, + "loss": 0.2764, + "step": 15701 + }, + { + "epoch": 0.7355600318545932, + "grad_norm": 0.5996064886096699, + "learning_rate": 3.6473869622149803e-06, + "loss": 0.2831, + "step": 15702 + }, + { + "epoch": 0.7356068768445215, + "grad_norm": 0.5445737279568627, + "learning_rate": 3.6472184624131114e-06, + "loss": 0.273, + "step": 15703 + }, + { + "epoch": 0.7356537218344498, + "grad_norm": 0.5737751417840993, + "learning_rate": 3.647049956009288e-06, + "loss": 0.2628, + "step": 15704 + }, + { + "epoch": 0.7357005668243781, + "grad_norm": 0.557379619836044, + "learning_rate": 3.646881443004482e-06, + "loss": 0.2778, + "step": 15705 + }, + { + "epoch": 0.7357474118143065, + "grad_norm": 0.5901470897701356, + "learning_rate": 3.64671292339966e-06, + "loss": 0.2498, + "step": 15706 + }, + { + "epoch": 0.7357942568042348, + "grad_norm": 0.5970222810748973, + "learning_rate": 3.6465443971957935e-06, + "loss": 0.2978, + "step": 15707 + }, + { + "epoch": 0.7358411017941631, + "grad_norm": 0.5992475234553359, + "learning_rate": 3.646375864393853e-06, + "loss": 0.2704, + "step": 15708 + }, + { + "epoch": 0.7358879467840914, + "grad_norm": 0.5715558426348583, + "learning_rate": 3.646207324994807e-06, + "loss": 0.2842, + "step": 15709 + }, + { + "epoch": 0.7359347917740198, + "grad_norm": 0.5927884031856723, + "learning_rate": 3.646038778999626e-06, + "loss": 0.2878, + "step": 15710 + }, + { + "epoch": 0.7359816367639481, + "grad_norm": 0.5948240359907355, + "learning_rate": 3.6458702264092803e-06, + "loss": 0.278, + "step": 15711 + }, + { + "epoch": 0.7360284817538765, + "grad_norm": 0.5959156431891915, + "learning_rate": 3.6457016672247393e-06, + "loss": 0.2829, + "step": 15712 + }, + { + "epoch": 0.7360753267438047, + "grad_norm": 0.5883312405158423, + "learning_rate": 3.6455331014469723e-06, + "loss": 0.287, + "step": 15713 + }, + { + "epoch": 0.7361221717337331, + "grad_norm": 0.5633291917693628, + "learning_rate": 3.6453645290769507e-06, + "loss": 0.2556, + "step": 15714 + }, + { + "epoch": 0.7361690167236614, + "grad_norm": 0.5682316343244845, + "learning_rate": 3.6451959501156447e-06, + "loss": 0.2912, + "step": 15715 + }, + { + "epoch": 0.7362158617135898, + "grad_norm": 0.5118026886387796, + "learning_rate": 3.645027364564023e-06, + "loss": 0.2523, + "step": 15716 + }, + { + "epoch": 0.736262706703518, + "grad_norm": 0.5835553723992569, + "learning_rate": 3.6448587724230565e-06, + "loss": 0.2875, + "step": 15717 + }, + { + "epoch": 0.7363095516934464, + "grad_norm": 0.6265740643535095, + "learning_rate": 3.644690173693717e-06, + "loss": 0.2778, + "step": 15718 + }, + { + "epoch": 0.7363563966833747, + "grad_norm": 0.6237398273942716, + "learning_rate": 3.644521568376972e-06, + "loss": 0.2733, + "step": 15719 + }, + { + "epoch": 0.7364032416733031, + "grad_norm": 0.5844306554877461, + "learning_rate": 3.644352956473793e-06, + "loss": 0.285, + "step": 15720 + }, + { + "epoch": 0.7364500866632314, + "grad_norm": 0.6991523334287182, + "learning_rate": 3.644184337985151e-06, + "loss": 0.2861, + "step": 15721 + }, + { + "epoch": 0.7364969316531597, + "grad_norm": 0.5793672711201071, + "learning_rate": 3.6440157129120156e-06, + "loss": 0.2858, + "step": 15722 + }, + { + "epoch": 0.736543776643088, + "grad_norm": 0.5944072896264897, + "learning_rate": 3.643847081255357e-06, + "loss": 0.2845, + "step": 15723 + }, + { + "epoch": 0.7365906216330164, + "grad_norm": 0.6214005414394366, + "learning_rate": 3.643678443016146e-06, + "loss": 0.301, + "step": 15724 + }, + { + "epoch": 0.7366374666229447, + "grad_norm": 0.6043301313564022, + "learning_rate": 3.6435097981953526e-06, + "loss": 0.2746, + "step": 15725 + }, + { + "epoch": 0.736684311612873, + "grad_norm": 0.6548306726470408, + "learning_rate": 3.6433411467939484e-06, + "loss": 0.2777, + "step": 15726 + }, + { + "epoch": 0.7367311566028013, + "grad_norm": 0.577681404078513, + "learning_rate": 3.6431724888129033e-06, + "loss": 0.2616, + "step": 15727 + }, + { + "epoch": 0.7367780015927297, + "grad_norm": 0.5863019753088822, + "learning_rate": 3.6430038242531875e-06, + "loss": 0.2974, + "step": 15728 + }, + { + "epoch": 0.736824846582658, + "grad_norm": 0.5987028654016195, + "learning_rate": 3.6428351531157725e-06, + "loss": 0.2896, + "step": 15729 + }, + { + "epoch": 0.7368716915725864, + "grad_norm": 0.6321329816451131, + "learning_rate": 3.6426664754016278e-06, + "loss": 0.3049, + "step": 15730 + }, + { + "epoch": 0.7369185365625146, + "grad_norm": 0.6223218314634764, + "learning_rate": 3.6424977911117245e-06, + "loss": 0.2975, + "step": 15731 + }, + { + "epoch": 0.736965381552443, + "grad_norm": 0.5629408288977257, + "learning_rate": 3.642329100247034e-06, + "loss": 0.2721, + "step": 15732 + }, + { + "epoch": 0.7370122265423713, + "grad_norm": 0.5696204602041612, + "learning_rate": 3.6421604028085274e-06, + "loss": 0.2598, + "step": 15733 + }, + { + "epoch": 0.7370590715322997, + "grad_norm": 0.5949317657895942, + "learning_rate": 3.641991698797174e-06, + "loss": 0.2768, + "step": 15734 + }, + { + "epoch": 0.7371059165222279, + "grad_norm": 0.5823320903827732, + "learning_rate": 3.6418229882139444e-06, + "loss": 0.2763, + "step": 15735 + }, + { + "epoch": 0.7371527615121563, + "grad_norm": 0.5479602350047782, + "learning_rate": 3.6416542710598124e-06, + "loss": 0.2695, + "step": 15736 + }, + { + "epoch": 0.7371996065020846, + "grad_norm": 0.5652253307839792, + "learning_rate": 3.6414855473357456e-06, + "loss": 0.2593, + "step": 15737 + }, + { + "epoch": 0.737246451492013, + "grad_norm": 0.6061835597702402, + "learning_rate": 3.6413168170427164e-06, + "loss": 0.2827, + "step": 15738 + }, + { + "epoch": 0.7372932964819413, + "grad_norm": 0.6218467073886879, + "learning_rate": 3.6411480801816967e-06, + "loss": 0.2939, + "step": 15739 + }, + { + "epoch": 0.7373401414718695, + "grad_norm": 0.5673095231731401, + "learning_rate": 3.640979336753656e-06, + "loss": 0.2725, + "step": 15740 + }, + { + "epoch": 0.7373869864617979, + "grad_norm": 0.5601902690923948, + "learning_rate": 3.6408105867595662e-06, + "loss": 0.2831, + "step": 15741 + }, + { + "epoch": 0.7374338314517263, + "grad_norm": 0.620025354137641, + "learning_rate": 3.640641830200399e-06, + "loss": 0.2759, + "step": 15742 + }, + { + "epoch": 0.7374806764416546, + "grad_norm": 0.6012235219613693, + "learning_rate": 3.640473067077124e-06, + "loss": 0.2842, + "step": 15743 + }, + { + "epoch": 0.7375275214315828, + "grad_norm": 0.5574063589838889, + "learning_rate": 3.640304297390712e-06, + "loss": 0.2724, + "step": 15744 + }, + { + "epoch": 0.7375743664215112, + "grad_norm": 0.5794157890880438, + "learning_rate": 3.640135521142137e-06, + "loss": 0.281, + "step": 15745 + }, + { + "epoch": 0.7376212114114395, + "grad_norm": 0.5916647573019009, + "learning_rate": 3.639966738332368e-06, + "loss": 0.2692, + "step": 15746 + }, + { + "epoch": 0.7376680564013679, + "grad_norm": 0.6563917234399922, + "learning_rate": 3.639797948962377e-06, + "loss": 0.301, + "step": 15747 + }, + { + "epoch": 0.7377149013912963, + "grad_norm": 0.6085411534817267, + "learning_rate": 3.639629153033135e-06, + "loss": 0.2914, + "step": 15748 + }, + { + "epoch": 0.7377617463812245, + "grad_norm": 0.601932173075793, + "learning_rate": 3.6394603505456137e-06, + "loss": 0.2656, + "step": 15749 + }, + { + "epoch": 0.7378085913711528, + "grad_norm": 0.6134315507340419, + "learning_rate": 3.6392915415007855e-06, + "loss": 0.2825, + "step": 15750 + }, + { + "epoch": 0.7378554363610812, + "grad_norm": 0.6471188828992206, + "learning_rate": 3.63912272589962e-06, + "loss": 0.2767, + "step": 15751 + }, + { + "epoch": 0.7379022813510095, + "grad_norm": 0.6268498390186484, + "learning_rate": 3.6389539037430893e-06, + "loss": 0.2981, + "step": 15752 + }, + { + "epoch": 0.7379491263409378, + "grad_norm": 0.5625351233779755, + "learning_rate": 3.6387850750321656e-06, + "loss": 0.2711, + "step": 15753 + }, + { + "epoch": 0.7379959713308661, + "grad_norm": 0.5511800291430211, + "learning_rate": 3.6386162397678205e-06, + "loss": 0.2545, + "step": 15754 + }, + { + "epoch": 0.7380428163207945, + "grad_norm": 0.6047022314981456, + "learning_rate": 3.6384473979510237e-06, + "loss": 0.3009, + "step": 15755 + }, + { + "epoch": 0.7380896613107228, + "grad_norm": 0.5402892233025132, + "learning_rate": 3.638278549582749e-06, + "loss": 0.267, + "step": 15756 + }, + { + "epoch": 0.7381365063006512, + "grad_norm": 0.5598566313858334, + "learning_rate": 3.638109694663968e-06, + "loss": 0.277, + "step": 15757 + }, + { + "epoch": 0.7381833512905794, + "grad_norm": 0.6270415010424419, + "learning_rate": 3.6379408331956508e-06, + "loss": 0.3026, + "step": 15758 + }, + { + "epoch": 0.7382301962805078, + "grad_norm": 0.6437812039616181, + "learning_rate": 3.63777196517877e-06, + "loss": 0.2806, + "step": 15759 + }, + { + "epoch": 0.7382770412704361, + "grad_norm": 0.6167025688453702, + "learning_rate": 3.6376030906142985e-06, + "loss": 0.2961, + "step": 15760 + }, + { + "epoch": 0.7383238862603645, + "grad_norm": 0.6278245709248548, + "learning_rate": 3.6374342095032066e-06, + "loss": 0.2831, + "step": 15761 + }, + { + "epoch": 0.7383707312502927, + "grad_norm": 0.6098929007437977, + "learning_rate": 3.6372653218464665e-06, + "loss": 0.2958, + "step": 15762 + }, + { + "epoch": 0.7384175762402211, + "grad_norm": 0.6355868173574308, + "learning_rate": 3.6370964276450505e-06, + "loss": 0.2837, + "step": 15763 + }, + { + "epoch": 0.7384644212301494, + "grad_norm": 0.604531052356421, + "learning_rate": 3.6369275268999305e-06, + "loss": 0.2825, + "step": 15764 + }, + { + "epoch": 0.7385112662200778, + "grad_norm": 0.6109902782037056, + "learning_rate": 3.636758619612078e-06, + "loss": 0.3091, + "step": 15765 + }, + { + "epoch": 0.7385581112100061, + "grad_norm": 0.5736884886050525, + "learning_rate": 3.636589705782466e-06, + "loss": 0.2725, + "step": 15766 + }, + { + "epoch": 0.7386049561999344, + "grad_norm": 0.6128596170269104, + "learning_rate": 3.636420785412065e-06, + "loss": 0.273, + "step": 15767 + }, + { + "epoch": 0.7386518011898627, + "grad_norm": 0.5956492755239748, + "learning_rate": 3.6362518585018487e-06, + "loss": 0.2507, + "step": 15768 + }, + { + "epoch": 0.7386986461797911, + "grad_norm": 0.571148482428346, + "learning_rate": 3.6360829250527884e-06, + "loss": 0.2809, + "step": 15769 + }, + { + "epoch": 0.7387454911697194, + "grad_norm": 0.5618915774835817, + "learning_rate": 3.6359139850658564e-06, + "loss": 0.2716, + "step": 15770 + }, + { + "epoch": 0.7387923361596477, + "grad_norm": 0.6221276568067763, + "learning_rate": 3.635745038542025e-06, + "loss": 0.3036, + "step": 15771 + }, + { + "epoch": 0.738839181149576, + "grad_norm": 0.6144094084870304, + "learning_rate": 3.635576085482266e-06, + "loss": 0.2794, + "step": 15772 + }, + { + "epoch": 0.7388860261395044, + "grad_norm": 0.6513257319733028, + "learning_rate": 3.6354071258875523e-06, + "loss": 0.2856, + "step": 15773 + }, + { + "epoch": 0.7389328711294327, + "grad_norm": 0.5978905074553595, + "learning_rate": 3.635238159758856e-06, + "loss": 0.2733, + "step": 15774 + }, + { + "epoch": 0.7389797161193611, + "grad_norm": 0.5950057245665776, + "learning_rate": 3.63506918709715e-06, + "loss": 0.2767, + "step": 15775 + }, + { + "epoch": 0.7390265611092893, + "grad_norm": 0.6436052727721743, + "learning_rate": 3.6349002079034044e-06, + "loss": 0.2888, + "step": 15776 + }, + { + "epoch": 0.7390734060992177, + "grad_norm": 0.5808739322326487, + "learning_rate": 3.6347312221785944e-06, + "loss": 0.2777, + "step": 15777 + }, + { + "epoch": 0.739120251089146, + "grad_norm": 0.6186900261964706, + "learning_rate": 3.6345622299236915e-06, + "loss": 0.3123, + "step": 15778 + }, + { + "epoch": 0.7391670960790744, + "grad_norm": 0.5968894128574157, + "learning_rate": 3.6343932311396685e-06, + "loss": 0.293, + "step": 15779 + }, + { + "epoch": 0.7392139410690026, + "grad_norm": 0.5634439728684769, + "learning_rate": 3.634224225827497e-06, + "loss": 0.2777, + "step": 15780 + }, + { + "epoch": 0.739260786058931, + "grad_norm": 0.5876232147679358, + "learning_rate": 3.63405521398815e-06, + "loss": 0.272, + "step": 15781 + }, + { + "epoch": 0.7393076310488593, + "grad_norm": 0.5878040098098954, + "learning_rate": 3.6338861956226015e-06, + "loss": 0.2813, + "step": 15782 + }, + { + "epoch": 0.7393544760387877, + "grad_norm": 0.6480434500305131, + "learning_rate": 3.6337171707318215e-06, + "loss": 0.3079, + "step": 15783 + }, + { + "epoch": 0.739401321028716, + "grad_norm": 0.6232878912232692, + "learning_rate": 3.6335481393167842e-06, + "loss": 0.2972, + "step": 15784 + }, + { + "epoch": 0.7394481660186443, + "grad_norm": 0.5738931714882646, + "learning_rate": 3.6333791013784638e-06, + "loss": 0.262, + "step": 15785 + }, + { + "epoch": 0.7394950110085726, + "grad_norm": 0.5371619833117471, + "learning_rate": 3.6332100569178295e-06, + "loss": 0.2553, + "step": 15786 + }, + { + "epoch": 0.739541855998501, + "grad_norm": 0.58876291804518, + "learning_rate": 3.6330410059358572e-06, + "loss": 0.2734, + "step": 15787 + }, + { + "epoch": 0.7395887009884293, + "grad_norm": 0.5913805988661439, + "learning_rate": 3.6328719484335185e-06, + "loss": 0.2754, + "step": 15788 + }, + { + "epoch": 0.7396355459783576, + "grad_norm": 0.6465645918764906, + "learning_rate": 3.6327028844117874e-06, + "loss": 0.2785, + "step": 15789 + }, + { + "epoch": 0.7396823909682859, + "grad_norm": 0.5251992386799204, + "learning_rate": 3.6325338138716343e-06, + "loss": 0.2619, + "step": 15790 + }, + { + "epoch": 0.7397292359582143, + "grad_norm": 0.625199003318972, + "learning_rate": 3.632364736814035e-06, + "loss": 0.2857, + "step": 15791 + }, + { + "epoch": 0.7397760809481426, + "grad_norm": 0.5962087017258655, + "learning_rate": 3.6321956532399606e-06, + "loss": 0.2563, + "step": 15792 + }, + { + "epoch": 0.739822925938071, + "grad_norm": 0.5844609223208441, + "learning_rate": 3.632026563150385e-06, + "loss": 0.273, + "step": 15793 + }, + { + "epoch": 0.7398697709279992, + "grad_norm": 0.577262863135702, + "learning_rate": 3.6318574665462808e-06, + "loss": 0.2635, + "step": 15794 + }, + { + "epoch": 0.7399166159179276, + "grad_norm": 0.5716345919491091, + "learning_rate": 3.6316883634286217e-06, + "loss": 0.2663, + "step": 15795 + }, + { + "epoch": 0.7399634609078559, + "grad_norm": 0.6384890290607217, + "learning_rate": 3.63151925379838e-06, + "loss": 0.2994, + "step": 15796 + }, + { + "epoch": 0.7400103058977843, + "grad_norm": 0.6213801393353435, + "learning_rate": 3.63135013765653e-06, + "loss": 0.2759, + "step": 15797 + }, + { + "epoch": 0.7400571508877125, + "grad_norm": 0.5366079381185382, + "learning_rate": 3.6311810150040436e-06, + "loss": 0.2664, + "step": 15798 + }, + { + "epoch": 0.7401039958776409, + "grad_norm": 0.6094479264762469, + "learning_rate": 3.631011885841896e-06, + "loss": 0.262, + "step": 15799 + }, + { + "epoch": 0.7401508408675692, + "grad_norm": 0.5432278616756869, + "learning_rate": 3.6308427501710586e-06, + "loss": 0.252, + "step": 15800 + }, + { + "epoch": 0.7401976858574976, + "grad_norm": 0.6224769498619498, + "learning_rate": 3.630673607992505e-06, + "loss": 0.2893, + "step": 15801 + }, + { + "epoch": 0.7402445308474259, + "grad_norm": 0.5820992426510866, + "learning_rate": 3.630504459307209e-06, + "loss": 0.2711, + "step": 15802 + }, + { + "epoch": 0.7402913758373542, + "grad_norm": 0.5570935397505711, + "learning_rate": 3.6303353041161447e-06, + "loss": 0.2587, + "step": 15803 + }, + { + "epoch": 0.7403382208272825, + "grad_norm": 0.5855572213588777, + "learning_rate": 3.630166142420284e-06, + "loss": 0.2764, + "step": 15804 + }, + { + "epoch": 0.7403850658172109, + "grad_norm": 0.6155509714496967, + "learning_rate": 3.6299969742206015e-06, + "loss": 0.3117, + "step": 15805 + }, + { + "epoch": 0.7404319108071392, + "grad_norm": 0.5284021879421971, + "learning_rate": 3.629827799518071e-06, + "loss": 0.2605, + "step": 15806 + }, + { + "epoch": 0.7404787557970675, + "grad_norm": 0.631889543819735, + "learning_rate": 3.629658618313665e-06, + "loss": 0.2847, + "step": 15807 + }, + { + "epoch": 0.7405256007869958, + "grad_norm": 0.6214684864936404, + "learning_rate": 3.6294894306083574e-06, + "loss": 0.2627, + "step": 15808 + }, + { + "epoch": 0.7405724457769242, + "grad_norm": 0.5873885228567395, + "learning_rate": 3.6293202364031223e-06, + "loss": 0.2862, + "step": 15809 + }, + { + "epoch": 0.7406192907668525, + "grad_norm": 0.5793510614586679, + "learning_rate": 3.629151035698933e-06, + "loss": 0.2798, + "step": 15810 + }, + { + "epoch": 0.7406661357567809, + "grad_norm": 0.6084369103325568, + "learning_rate": 3.6289818284967624e-06, + "loss": 0.2866, + "step": 15811 + }, + { + "epoch": 0.7407129807467091, + "grad_norm": 0.6294851054859474, + "learning_rate": 3.6288126147975867e-06, + "loss": 0.303, + "step": 15812 + }, + { + "epoch": 0.7407598257366375, + "grad_norm": 0.6261754576969272, + "learning_rate": 3.6286433946023776e-06, + "loss": 0.3029, + "step": 15813 + }, + { + "epoch": 0.7408066707265658, + "grad_norm": 0.5668271528518269, + "learning_rate": 3.628474167912109e-06, + "loss": 0.2758, + "step": 15814 + }, + { + "epoch": 0.7408535157164942, + "grad_norm": 0.6106906621071341, + "learning_rate": 3.6283049347277554e-06, + "loss": 0.2768, + "step": 15815 + }, + { + "epoch": 0.7409003607064224, + "grad_norm": 0.5503434168102413, + "learning_rate": 3.6281356950502905e-06, + "loss": 0.2809, + "step": 15816 + }, + { + "epoch": 0.7409472056963508, + "grad_norm": 0.5625171255373487, + "learning_rate": 3.627966448880688e-06, + "loss": 0.2619, + "step": 15817 + }, + { + "epoch": 0.7409940506862791, + "grad_norm": 0.6056527192475737, + "learning_rate": 3.627797196219923e-06, + "loss": 0.2812, + "step": 15818 + }, + { + "epoch": 0.7410408956762075, + "grad_norm": 0.6298019964286917, + "learning_rate": 3.6276279370689678e-06, + "loss": 0.2938, + "step": 15819 + }, + { + "epoch": 0.7410877406661358, + "grad_norm": 0.6584138034193612, + "learning_rate": 3.6274586714287974e-06, + "loss": 0.3, + "step": 15820 + }, + { + "epoch": 0.741134585656064, + "grad_norm": 0.5480534908025863, + "learning_rate": 3.627289399300386e-06, + "loss": 0.2713, + "step": 15821 + }, + { + "epoch": 0.7411814306459924, + "grad_norm": 0.5549634439342856, + "learning_rate": 3.627120120684707e-06, + "loss": 0.2442, + "step": 15822 + }, + { + "epoch": 0.7412282756359208, + "grad_norm": 0.5407915528511732, + "learning_rate": 3.6269508355827348e-06, + "loss": 0.2767, + "step": 15823 + }, + { + "epoch": 0.7412751206258491, + "grad_norm": 0.5878549634238784, + "learning_rate": 3.6267815439954448e-06, + "loss": 0.2756, + "step": 15824 + }, + { + "epoch": 0.7413219656157773, + "grad_norm": 0.58159055502168, + "learning_rate": 3.626612245923809e-06, + "loss": 0.2782, + "step": 15825 + }, + { + "epoch": 0.7413688106057057, + "grad_norm": 0.598073101797499, + "learning_rate": 3.6264429413688027e-06, + "loss": 0.2809, + "step": 15826 + }, + { + "epoch": 0.741415655595634, + "grad_norm": 0.5200113679947506, + "learning_rate": 3.6262736303314022e-06, + "loss": 0.2353, + "step": 15827 + }, + { + "epoch": 0.7414625005855624, + "grad_norm": 0.5679071825079018, + "learning_rate": 3.626104312812579e-06, + "loss": 0.2832, + "step": 15828 + }, + { + "epoch": 0.7415093455754908, + "grad_norm": 0.5887436530928739, + "learning_rate": 3.6259349888133076e-06, + "loss": 0.2731, + "step": 15829 + }, + { + "epoch": 0.741556190565419, + "grad_norm": 0.5753651632824032, + "learning_rate": 3.6257656583345647e-06, + "loss": 0.2786, + "step": 15830 + }, + { + "epoch": 0.7416030355553473, + "grad_norm": 0.6059443915038919, + "learning_rate": 3.6255963213773233e-06, + "loss": 0.2998, + "step": 15831 + }, + { + "epoch": 0.7416498805452757, + "grad_norm": 0.6071476229132541, + "learning_rate": 3.625426977942557e-06, + "loss": 0.2915, + "step": 15832 + }, + { + "epoch": 0.741696725535204, + "grad_norm": 0.6339084284538111, + "learning_rate": 3.6252576280312423e-06, + "loss": 0.2909, + "step": 15833 + }, + { + "epoch": 0.7417435705251323, + "grad_norm": 0.6283851966136064, + "learning_rate": 3.6250882716443525e-06, + "loss": 0.2742, + "step": 15834 + }, + { + "epoch": 0.7417904155150606, + "grad_norm": 0.5977742813168105, + "learning_rate": 3.624918908782862e-06, + "loss": 0.2914, + "step": 15835 + }, + { + "epoch": 0.741837260504989, + "grad_norm": 0.6332439589523228, + "learning_rate": 3.6247495394477467e-06, + "loss": 0.3113, + "step": 15836 + }, + { + "epoch": 0.7418841054949173, + "grad_norm": 0.6316084131552011, + "learning_rate": 3.6245801636399797e-06, + "loss": 0.3073, + "step": 15837 + }, + { + "epoch": 0.7419309504848457, + "grad_norm": 0.6057570875752745, + "learning_rate": 3.6244107813605366e-06, + "loss": 0.2682, + "step": 15838 + }, + { + "epoch": 0.7419777954747739, + "grad_norm": 0.6091887649712275, + "learning_rate": 3.6242413926103925e-06, + "loss": 0.2823, + "step": 15839 + }, + { + "epoch": 0.7420246404647023, + "grad_norm": 0.5660410898613385, + "learning_rate": 3.6240719973905213e-06, + "loss": 0.2749, + "step": 15840 + }, + { + "epoch": 0.7420714854546306, + "grad_norm": 0.6596195420925277, + "learning_rate": 3.6239025957018993e-06, + "loss": 0.2756, + "step": 15841 + }, + { + "epoch": 0.742118330444559, + "grad_norm": 0.6355532857979507, + "learning_rate": 3.6237331875454997e-06, + "loss": 0.2876, + "step": 15842 + }, + { + "epoch": 0.7421651754344872, + "grad_norm": 0.6380381919545743, + "learning_rate": 3.623563772922297e-06, + "loss": 0.3003, + "step": 15843 + }, + { + "epoch": 0.7422120204244156, + "grad_norm": 0.5715464292883504, + "learning_rate": 3.623394351833268e-06, + "loss": 0.2889, + "step": 15844 + }, + { + "epoch": 0.7422588654143439, + "grad_norm": 0.6145951542231396, + "learning_rate": 3.6232249242793884e-06, + "loss": 0.2913, + "step": 15845 + }, + { + "epoch": 0.7423057104042723, + "grad_norm": 0.5958171968876875, + "learning_rate": 3.623055490261629e-06, + "loss": 0.2821, + "step": 15846 + }, + { + "epoch": 0.7423525553942006, + "grad_norm": 0.5601407344074597, + "learning_rate": 3.6228860497809692e-06, + "loss": 0.2857, + "step": 15847 + }, + { + "epoch": 0.7423994003841289, + "grad_norm": 0.6114834478898835, + "learning_rate": 3.622716602838382e-06, + "loss": 0.2914, + "step": 15848 + }, + { + "epoch": 0.7424462453740572, + "grad_norm": 0.5746938554964874, + "learning_rate": 3.622547149434843e-06, + "loss": 0.282, + "step": 15849 + }, + { + "epoch": 0.7424930903639856, + "grad_norm": 0.5674534950206084, + "learning_rate": 3.622377689571327e-06, + "loss": 0.2946, + "step": 15850 + }, + { + "epoch": 0.7425399353539139, + "grad_norm": 0.595351578192573, + "learning_rate": 3.6222082232488097e-06, + "loss": 0.282, + "step": 15851 + }, + { + "epoch": 0.7425867803438422, + "grad_norm": 0.5839663244869814, + "learning_rate": 3.622038750468267e-06, + "loss": 0.2821, + "step": 15852 + }, + { + "epoch": 0.7426336253337705, + "grad_norm": 0.6314725315913047, + "learning_rate": 3.621869271230672e-06, + "loss": 0.2761, + "step": 15853 + }, + { + "epoch": 0.7426804703236989, + "grad_norm": 0.6258127284511026, + "learning_rate": 3.621699785537001e-06, + "loss": 0.2928, + "step": 15854 + }, + { + "epoch": 0.7427273153136272, + "grad_norm": 0.5801205545166235, + "learning_rate": 3.6215302933882313e-06, + "loss": 0.2747, + "step": 15855 + }, + { + "epoch": 0.7427741603035556, + "grad_norm": 0.6336093771836834, + "learning_rate": 3.621360794785336e-06, + "loss": 0.2975, + "step": 15856 + }, + { + "epoch": 0.7428210052934838, + "grad_norm": 0.6023214734794967, + "learning_rate": 3.62119128972929e-06, + "loss": 0.2762, + "step": 15857 + }, + { + "epoch": 0.7428678502834122, + "grad_norm": 0.6089141963798752, + "learning_rate": 3.6210217782210715e-06, + "loss": 0.2708, + "step": 15858 + }, + { + "epoch": 0.7429146952733405, + "grad_norm": 0.538771260915168, + "learning_rate": 3.620852260261654e-06, + "loss": 0.273, + "step": 15859 + }, + { + "epoch": 0.7429615402632689, + "grad_norm": 0.5886165397715745, + "learning_rate": 3.620682735852013e-06, + "loss": 0.2739, + "step": 15860 + }, + { + "epoch": 0.7430083852531971, + "grad_norm": 0.5646697941239089, + "learning_rate": 3.6205132049931245e-06, + "loss": 0.2577, + "step": 15861 + }, + { + "epoch": 0.7430552302431255, + "grad_norm": 0.5406331956099296, + "learning_rate": 3.620343667685965e-06, + "loss": 0.2841, + "step": 15862 + }, + { + "epoch": 0.7431020752330538, + "grad_norm": 0.5680091722175169, + "learning_rate": 3.6201741239315087e-06, + "loss": 0.2836, + "step": 15863 + }, + { + "epoch": 0.7431489202229822, + "grad_norm": 0.6172410338896398, + "learning_rate": 3.620004573730732e-06, + "loss": 0.2851, + "step": 15864 + }, + { + "epoch": 0.7431957652129105, + "grad_norm": 0.6079025948543656, + "learning_rate": 3.6198350170846096e-06, + "loss": 0.2773, + "step": 15865 + }, + { + "epoch": 0.7432426102028388, + "grad_norm": 0.6610980691444627, + "learning_rate": 3.6196654539941196e-06, + "loss": 0.3034, + "step": 15866 + }, + { + "epoch": 0.7432894551927671, + "grad_norm": 0.5874493020307547, + "learning_rate": 3.6194958844602355e-06, + "loss": 0.3033, + "step": 15867 + }, + { + "epoch": 0.7433363001826955, + "grad_norm": 0.5914484412604892, + "learning_rate": 3.6193263084839336e-06, + "loss": 0.2719, + "step": 15868 + }, + { + "epoch": 0.7433831451726238, + "grad_norm": 0.5739090576669528, + "learning_rate": 3.619156726066191e-06, + "loss": 0.2936, + "step": 15869 + }, + { + "epoch": 0.7434299901625521, + "grad_norm": 0.6257003045370777, + "learning_rate": 3.618987137207983e-06, + "loss": 0.2857, + "step": 15870 + }, + { + "epoch": 0.7434768351524804, + "grad_norm": 0.6003677055954305, + "learning_rate": 3.618817541910284e-06, + "loss": 0.2791, + "step": 15871 + }, + { + "epoch": 0.7435236801424088, + "grad_norm": 0.5701864349389338, + "learning_rate": 3.6186479401740723e-06, + "loss": 0.2739, + "step": 15872 + }, + { + "epoch": 0.7435705251323371, + "grad_norm": 0.5982250742482581, + "learning_rate": 3.6184783320003226e-06, + "loss": 0.2751, + "step": 15873 + }, + { + "epoch": 0.7436173701222655, + "grad_norm": 0.5346451026164432, + "learning_rate": 3.6183087173900107e-06, + "loss": 0.2388, + "step": 15874 + }, + { + "epoch": 0.7436642151121937, + "grad_norm": 0.5500190445509636, + "learning_rate": 3.618139096344113e-06, + "loss": 0.2843, + "step": 15875 + }, + { + "epoch": 0.7437110601021221, + "grad_norm": 0.6196137163895874, + "learning_rate": 3.617969468863607e-06, + "loss": 0.2858, + "step": 15876 + }, + { + "epoch": 0.7437579050920504, + "grad_norm": 0.5874316745775378, + "learning_rate": 3.617799834949467e-06, + "loss": 0.2863, + "step": 15877 + }, + { + "epoch": 0.7438047500819788, + "grad_norm": 0.5824267437043686, + "learning_rate": 3.6176301946026694e-06, + "loss": 0.279, + "step": 15878 + }, + { + "epoch": 0.743851595071907, + "grad_norm": 0.5910855872679674, + "learning_rate": 3.617460547824192e-06, + "loss": 0.2905, + "step": 15879 + }, + { + "epoch": 0.7438984400618354, + "grad_norm": 0.5552684328155337, + "learning_rate": 3.6172908946150097e-06, + "loss": 0.2727, + "step": 15880 + }, + { + "epoch": 0.7439452850517637, + "grad_norm": 0.5553684399794615, + "learning_rate": 3.6171212349760985e-06, + "loss": 0.2781, + "step": 15881 + }, + { + "epoch": 0.7439921300416921, + "grad_norm": 0.5584329909559862, + "learning_rate": 3.616951568908436e-06, + "loss": 0.2773, + "step": 15882 + }, + { + "epoch": 0.7440389750316204, + "grad_norm": 0.5961518049345331, + "learning_rate": 3.6167818964129982e-06, + "loss": 0.2793, + "step": 15883 + }, + { + "epoch": 0.7440858200215487, + "grad_norm": 0.5581730828872447, + "learning_rate": 3.6166122174907603e-06, + "loss": 0.2859, + "step": 15884 + }, + { + "epoch": 0.744132665011477, + "grad_norm": 0.6075897190436759, + "learning_rate": 3.6164425321427004e-06, + "loss": 0.2659, + "step": 15885 + }, + { + "epoch": 0.7441795100014054, + "grad_norm": 0.6155734894067879, + "learning_rate": 3.6162728403697944e-06, + "loss": 0.2662, + "step": 15886 + }, + { + "epoch": 0.7442263549913337, + "grad_norm": 0.5833330274714481, + "learning_rate": 3.6161031421730185e-06, + "loss": 0.2977, + "step": 15887 + }, + { + "epoch": 0.744273199981262, + "grad_norm": 0.6209616719447848, + "learning_rate": 3.61593343755335e-06, + "loss": 0.2979, + "step": 15888 + }, + { + "epoch": 0.7443200449711903, + "grad_norm": 0.6199887046963318, + "learning_rate": 3.615763726511764e-06, + "loss": 0.2925, + "step": 15889 + }, + { + "epoch": 0.7443668899611187, + "grad_norm": 0.5968555377689204, + "learning_rate": 3.6155940090492392e-06, + "loss": 0.2821, + "step": 15890 + }, + { + "epoch": 0.744413734951047, + "grad_norm": 0.5690679509782405, + "learning_rate": 3.6154242851667516e-06, + "loss": 0.2774, + "step": 15891 + }, + { + "epoch": 0.7444605799409754, + "grad_norm": 0.6502230711979659, + "learning_rate": 3.6152545548652764e-06, + "loss": 0.2696, + "step": 15892 + }, + { + "epoch": 0.7445074249309036, + "grad_norm": 0.6072680876713806, + "learning_rate": 3.615084818145792e-06, + "loss": 0.3013, + "step": 15893 + }, + { + "epoch": 0.744554269920832, + "grad_norm": 0.5968027495329868, + "learning_rate": 3.6149150750092755e-06, + "loss": 0.2901, + "step": 15894 + }, + { + "epoch": 0.7446011149107603, + "grad_norm": 0.6342441852091074, + "learning_rate": 3.614745325456702e-06, + "loss": 0.3035, + "step": 15895 + }, + { + "epoch": 0.7446479599006887, + "grad_norm": 0.5480107233237537, + "learning_rate": 3.6145755694890487e-06, + "loss": 0.2755, + "step": 15896 + }, + { + "epoch": 0.7446948048906169, + "grad_norm": 0.5900024783394807, + "learning_rate": 3.614405807107295e-06, + "loss": 0.2901, + "step": 15897 + }, + { + "epoch": 0.7447416498805453, + "grad_norm": 0.6072815483256637, + "learning_rate": 3.6142360383124147e-06, + "loss": 0.2769, + "step": 15898 + }, + { + "epoch": 0.7447884948704736, + "grad_norm": 0.5596500981259126, + "learning_rate": 3.6140662631053865e-06, + "loss": 0.2717, + "step": 15899 + }, + { + "epoch": 0.744835339860402, + "grad_norm": 0.5871717935468724, + "learning_rate": 3.6138964814871868e-06, + "loss": 0.2784, + "step": 15900 + }, + { + "epoch": 0.7448821848503303, + "grad_norm": 0.5438100684087964, + "learning_rate": 3.6137266934587932e-06, + "loss": 0.2763, + "step": 15901 + }, + { + "epoch": 0.7449290298402585, + "grad_norm": 0.6095268273854461, + "learning_rate": 3.613556899021182e-06, + "loss": 0.2759, + "step": 15902 + }, + { + "epoch": 0.7449758748301869, + "grad_norm": 0.6119058238005062, + "learning_rate": 3.6133870981753303e-06, + "loss": 0.2987, + "step": 15903 + }, + { + "epoch": 0.7450227198201153, + "grad_norm": 0.5795271748065506, + "learning_rate": 3.6132172909222167e-06, + "loss": 0.2658, + "step": 15904 + }, + { + "epoch": 0.7450695648100436, + "grad_norm": 0.5937785468970047, + "learning_rate": 3.6130474772628167e-06, + "loss": 0.2706, + "step": 15905 + }, + { + "epoch": 0.7451164097999718, + "grad_norm": 0.5938049701848073, + "learning_rate": 3.612877657198108e-06, + "loss": 0.307, + "step": 15906 + }, + { + "epoch": 0.7451632547899002, + "grad_norm": 0.6450165550087475, + "learning_rate": 3.6127078307290688e-06, + "loss": 0.3136, + "step": 15907 + }, + { + "epoch": 0.7452100997798285, + "grad_norm": 0.5638428859835123, + "learning_rate": 3.6125379978566754e-06, + "loss": 0.2761, + "step": 15908 + }, + { + "epoch": 0.7452569447697569, + "grad_norm": 0.573349509686536, + "learning_rate": 3.612368158581906e-06, + "loss": 0.2705, + "step": 15909 + }, + { + "epoch": 0.7453037897596853, + "grad_norm": 0.5877841798088771, + "learning_rate": 3.6121983129057363e-06, + "loss": 0.2848, + "step": 15910 + }, + { + "epoch": 0.7453506347496135, + "grad_norm": 0.5439572871646042, + "learning_rate": 3.6120284608291455e-06, + "loss": 0.2542, + "step": 15911 + }, + { + "epoch": 0.7453974797395418, + "grad_norm": 0.6515773553452974, + "learning_rate": 3.6118586023531103e-06, + "loss": 0.29, + "step": 15912 + }, + { + "epoch": 0.7454443247294702, + "grad_norm": 0.5830702515894054, + "learning_rate": 3.611688737478608e-06, + "loss": 0.2649, + "step": 15913 + }, + { + "epoch": 0.7454911697193985, + "grad_norm": 0.6030916388736585, + "learning_rate": 3.611518866206617e-06, + "loss": 0.2754, + "step": 15914 + }, + { + "epoch": 0.7455380147093268, + "grad_norm": 0.5862903396757249, + "learning_rate": 3.611348988538115e-06, + "loss": 0.2632, + "step": 15915 + }, + { + "epoch": 0.7455848596992551, + "grad_norm": 0.5605151805101637, + "learning_rate": 3.6111791044740774e-06, + "loss": 0.2791, + "step": 15916 + }, + { + "epoch": 0.7456317046891835, + "grad_norm": 0.6050987034937763, + "learning_rate": 3.6110092140154833e-06, + "loss": 0.3068, + "step": 15917 + }, + { + "epoch": 0.7456785496791118, + "grad_norm": 0.5778250953331991, + "learning_rate": 3.6108393171633118e-06, + "loss": 0.2861, + "step": 15918 + }, + { + "epoch": 0.7457253946690402, + "grad_norm": 0.5853861540455433, + "learning_rate": 3.610669413918538e-06, + "loss": 0.2769, + "step": 15919 + }, + { + "epoch": 0.7457722396589684, + "grad_norm": 0.5799093347076809, + "learning_rate": 3.610499504282141e-06, + "loss": 0.2793, + "step": 15920 + }, + { + "epoch": 0.7458190846488968, + "grad_norm": 0.6159721471959898, + "learning_rate": 3.6103295882550994e-06, + "loss": 0.2833, + "step": 15921 + }, + { + "epoch": 0.7458659296388251, + "grad_norm": 0.5281640457399555, + "learning_rate": 3.6101596658383893e-06, + "loss": 0.2623, + "step": 15922 + }, + { + "epoch": 0.7459127746287535, + "grad_norm": 0.6002758952396732, + "learning_rate": 3.6099897370329895e-06, + "loss": 0.3097, + "step": 15923 + }, + { + "epoch": 0.7459596196186817, + "grad_norm": 0.6039526631161953, + "learning_rate": 3.609819801839877e-06, + "loss": 0.2738, + "step": 15924 + }, + { + "epoch": 0.7460064646086101, + "grad_norm": 0.662434424769413, + "learning_rate": 3.609649860260032e-06, + "loss": 0.3242, + "step": 15925 + }, + { + "epoch": 0.7460533095985384, + "grad_norm": 0.56937269793044, + "learning_rate": 3.6094799122944302e-06, + "loss": 0.2616, + "step": 15926 + }, + { + "epoch": 0.7461001545884668, + "grad_norm": 0.5903420705880142, + "learning_rate": 3.60930995794405e-06, + "loss": 0.2985, + "step": 15927 + }, + { + "epoch": 0.7461469995783951, + "grad_norm": 0.618099038666555, + "learning_rate": 3.6091399972098706e-06, + "loss": 0.2815, + "step": 15928 + }, + { + "epoch": 0.7461938445683234, + "grad_norm": 0.5682663301156587, + "learning_rate": 3.6089700300928687e-06, + "loss": 0.2666, + "step": 15929 + }, + { + "epoch": 0.7462406895582517, + "grad_norm": 0.6018059961073468, + "learning_rate": 3.608800056594023e-06, + "loss": 0.2898, + "step": 15930 + }, + { + "epoch": 0.7462875345481801, + "grad_norm": 0.5781724952723463, + "learning_rate": 3.6086300767143117e-06, + "loss": 0.2527, + "step": 15931 + }, + { + "epoch": 0.7463343795381084, + "grad_norm": 0.6088363056076153, + "learning_rate": 3.6084600904547137e-06, + "loss": 0.2755, + "step": 15932 + }, + { + "epoch": 0.7463812245280367, + "grad_norm": 0.5596588187929981, + "learning_rate": 3.6082900978162054e-06, + "loss": 0.2788, + "step": 15933 + }, + { + "epoch": 0.746428069517965, + "grad_norm": 0.5732557204023084, + "learning_rate": 3.6081200987997665e-06, + "loss": 0.2678, + "step": 15934 + }, + { + "epoch": 0.7464749145078934, + "grad_norm": 0.5778488810116748, + "learning_rate": 3.6079500934063748e-06, + "loss": 0.2769, + "step": 15935 + }, + { + "epoch": 0.7465217594978217, + "grad_norm": 0.6017217220553293, + "learning_rate": 3.6077800816370095e-06, + "loss": 0.2781, + "step": 15936 + }, + { + "epoch": 0.7465686044877501, + "grad_norm": 0.541791932473845, + "learning_rate": 3.607610063492648e-06, + "loss": 0.259, + "step": 15937 + }, + { + "epoch": 0.7466154494776783, + "grad_norm": 0.5622511446742082, + "learning_rate": 3.607440038974268e-06, + "loss": 0.2653, + "step": 15938 + }, + { + "epoch": 0.7466622944676067, + "grad_norm": 0.577078655438458, + "learning_rate": 3.6072700080828506e-06, + "loss": 0.2658, + "step": 15939 + }, + { + "epoch": 0.746709139457535, + "grad_norm": 0.6078223402177968, + "learning_rate": 3.6070999708193717e-06, + "loss": 0.2917, + "step": 15940 + }, + { + "epoch": 0.7467559844474634, + "grad_norm": 0.5766177705019798, + "learning_rate": 3.6069299271848103e-06, + "loss": 0.276, + "step": 15941 + }, + { + "epoch": 0.7468028294373916, + "grad_norm": 0.601825699843523, + "learning_rate": 3.606759877180146e-06, + "loss": 0.2916, + "step": 15942 + }, + { + "epoch": 0.74684967442732, + "grad_norm": 0.5789927888044755, + "learning_rate": 3.6065898208063566e-06, + "loss": 0.2679, + "step": 15943 + }, + { + "epoch": 0.7468965194172483, + "grad_norm": 0.5608776497802855, + "learning_rate": 3.6064197580644213e-06, + "loss": 0.2814, + "step": 15944 + }, + { + "epoch": 0.7469433644071767, + "grad_norm": 0.6828164266535663, + "learning_rate": 3.6062496889553173e-06, + "loss": 0.3041, + "step": 15945 + }, + { + "epoch": 0.746990209397105, + "grad_norm": 0.6546044037252194, + "learning_rate": 3.6060796134800256e-06, + "loss": 0.2783, + "step": 15946 + }, + { + "epoch": 0.7470370543870333, + "grad_norm": 0.5748232452254197, + "learning_rate": 3.605909531639523e-06, + "loss": 0.2694, + "step": 15947 + }, + { + "epoch": 0.7470838993769616, + "grad_norm": 0.6198750371492736, + "learning_rate": 3.6057394434347885e-06, + "loss": 0.3029, + "step": 15948 + }, + { + "epoch": 0.74713074436689, + "grad_norm": 0.5851882062450381, + "learning_rate": 3.6055693488668027e-06, + "loss": 0.2484, + "step": 15949 + }, + { + "epoch": 0.7471775893568183, + "grad_norm": 0.5365833052719673, + "learning_rate": 3.605399247936543e-06, + "loss": 0.2576, + "step": 15950 + }, + { + "epoch": 0.7472244343467466, + "grad_norm": 0.6102316105130351, + "learning_rate": 3.6052291406449873e-06, + "loss": 0.2849, + "step": 15951 + }, + { + "epoch": 0.7472712793366749, + "grad_norm": 0.6223294245676524, + "learning_rate": 3.6050590269931167e-06, + "loss": 0.295, + "step": 15952 + }, + { + "epoch": 0.7473181243266033, + "grad_norm": 0.6122295330582322, + "learning_rate": 3.604888906981909e-06, + "loss": 0.2914, + "step": 15953 + }, + { + "epoch": 0.7473649693165316, + "grad_norm": 0.6017397108960677, + "learning_rate": 3.604718780612343e-06, + "loss": 0.2788, + "step": 15954 + }, + { + "epoch": 0.74741181430646, + "grad_norm": 0.5720730408427458, + "learning_rate": 3.604548647885399e-06, + "loss": 0.2654, + "step": 15955 + }, + { + "epoch": 0.7474586592963882, + "grad_norm": 0.5658169131461032, + "learning_rate": 3.604378508802054e-06, + "loss": 0.2737, + "step": 15956 + }, + { + "epoch": 0.7475055042863166, + "grad_norm": 0.6246991496137813, + "learning_rate": 3.6042083633632883e-06, + "loss": 0.302, + "step": 15957 + }, + { + "epoch": 0.7475523492762449, + "grad_norm": 0.6258658191172445, + "learning_rate": 3.6040382115700823e-06, + "loss": 0.2903, + "step": 15958 + }, + { + "epoch": 0.7475991942661733, + "grad_norm": 0.5943201257378148, + "learning_rate": 3.6038680534234127e-06, + "loss": 0.2772, + "step": 15959 + }, + { + "epoch": 0.7476460392561015, + "grad_norm": 0.5988606664300674, + "learning_rate": 3.60369788892426e-06, + "loss": 0.2916, + "step": 15960 + }, + { + "epoch": 0.7476928842460299, + "grad_norm": 0.6098359505817147, + "learning_rate": 3.603527718073604e-06, + "loss": 0.2999, + "step": 15961 + }, + { + "epoch": 0.7477397292359582, + "grad_norm": 0.6578668699357587, + "learning_rate": 3.6033575408724226e-06, + "loss": 0.301, + "step": 15962 + }, + { + "epoch": 0.7477865742258866, + "grad_norm": 0.5540077262996457, + "learning_rate": 3.603187357321696e-06, + "loss": 0.2735, + "step": 15963 + }, + { + "epoch": 0.7478334192158149, + "grad_norm": 0.5844677139483296, + "learning_rate": 3.603017167422404e-06, + "loss": 0.2704, + "step": 15964 + }, + { + "epoch": 0.7478802642057432, + "grad_norm": 0.6812256480603506, + "learning_rate": 3.602846971175525e-06, + "loss": 0.3087, + "step": 15965 + }, + { + "epoch": 0.7479271091956715, + "grad_norm": 0.5638764398773914, + "learning_rate": 3.6026767685820385e-06, + "loss": 0.2666, + "step": 15966 + }, + { + "epoch": 0.7479739541855999, + "grad_norm": 0.6000286011742684, + "learning_rate": 3.6025065596429255e-06, + "loss": 0.2784, + "step": 15967 + }, + { + "epoch": 0.7480207991755282, + "grad_norm": 0.5709981407812031, + "learning_rate": 3.602336344359163e-06, + "loss": 0.2623, + "step": 15968 + }, + { + "epoch": 0.7480676441654565, + "grad_norm": 0.5559479946319147, + "learning_rate": 3.6021661227317324e-06, + "loss": 0.2761, + "step": 15969 + }, + { + "epoch": 0.7481144891553848, + "grad_norm": 0.6035412008720866, + "learning_rate": 3.6019958947616128e-06, + "loss": 0.2865, + "step": 15970 + }, + { + "epoch": 0.7481613341453132, + "grad_norm": 0.5887494689690085, + "learning_rate": 3.601825660449784e-06, + "loss": 0.2742, + "step": 15971 + }, + { + "epoch": 0.7482081791352415, + "grad_norm": 0.6184037736219051, + "learning_rate": 3.6016554197972252e-06, + "loss": 0.2944, + "step": 15972 + }, + { + "epoch": 0.7482550241251699, + "grad_norm": 0.5618907824403658, + "learning_rate": 3.6014851728049165e-06, + "loss": 0.2682, + "step": 15973 + }, + { + "epoch": 0.7483018691150981, + "grad_norm": 0.5459261110979235, + "learning_rate": 3.601314919473838e-06, + "loss": 0.2672, + "step": 15974 + }, + { + "epoch": 0.7483487141050265, + "grad_norm": 0.6059275709546549, + "learning_rate": 3.601144659804968e-06, + "loss": 0.2773, + "step": 15975 + }, + { + "epoch": 0.7483955590949548, + "grad_norm": 0.6430488296374149, + "learning_rate": 3.600974393799288e-06, + "loss": 0.2866, + "step": 15976 + }, + { + "epoch": 0.7484424040848832, + "grad_norm": 0.633571419860621, + "learning_rate": 3.600804121457776e-06, + "loss": 0.2888, + "step": 15977 + }, + { + "epoch": 0.7484892490748114, + "grad_norm": 0.6061731979502191, + "learning_rate": 3.600633842781414e-06, + "loss": 0.2881, + "step": 15978 + }, + { + "epoch": 0.7485360940647398, + "grad_norm": 0.5971986172508584, + "learning_rate": 3.600463557771181e-06, + "loss": 0.2837, + "step": 15979 + }, + { + "epoch": 0.7485829390546681, + "grad_norm": 0.6090944440907998, + "learning_rate": 3.600293266428056e-06, + "loss": 0.281, + "step": 15980 + }, + { + "epoch": 0.7486297840445965, + "grad_norm": 0.6298484386687428, + "learning_rate": 3.6001229687530203e-06, + "loss": 0.292, + "step": 15981 + }, + { + "epoch": 0.7486766290345248, + "grad_norm": 0.5742882663216226, + "learning_rate": 3.5999526647470535e-06, + "loss": 0.2684, + "step": 15982 + }, + { + "epoch": 0.748723474024453, + "grad_norm": 0.573564182914225, + "learning_rate": 3.5997823544111354e-06, + "loss": 0.2746, + "step": 15983 + }, + { + "epoch": 0.7487703190143814, + "grad_norm": 0.5796723621221257, + "learning_rate": 3.5996120377462464e-06, + "loss": 0.2892, + "step": 15984 + }, + { + "epoch": 0.7488171640043098, + "grad_norm": 0.5632312739055698, + "learning_rate": 3.5994417147533656e-06, + "loss": 0.2714, + "step": 15985 + }, + { + "epoch": 0.7488640089942381, + "grad_norm": 0.6164514972073165, + "learning_rate": 3.5992713854334753e-06, + "loss": 0.2921, + "step": 15986 + }, + { + "epoch": 0.7489108539841663, + "grad_norm": 0.5905437545833864, + "learning_rate": 3.5991010497875533e-06, + "loss": 0.2863, + "step": 15987 + }, + { + "epoch": 0.7489576989740947, + "grad_norm": 0.6044649166000648, + "learning_rate": 3.5989307078165826e-06, + "loss": 0.2791, + "step": 15988 + }, + { + "epoch": 0.749004543964023, + "grad_norm": 0.5854705823516622, + "learning_rate": 3.598760359521541e-06, + "loss": 0.2789, + "step": 15989 + }, + { + "epoch": 0.7490513889539514, + "grad_norm": 0.6066116679307572, + "learning_rate": 3.5985900049034097e-06, + "loss": 0.2797, + "step": 15990 + }, + { + "epoch": 0.7490982339438798, + "grad_norm": 0.5260278883701384, + "learning_rate": 3.5984196439631687e-06, + "loss": 0.264, + "step": 15991 + }, + { + "epoch": 0.749145078933808, + "grad_norm": 0.5510320793084281, + "learning_rate": 3.598249276701799e-06, + "loss": 0.2802, + "step": 15992 + }, + { + "epoch": 0.7491919239237363, + "grad_norm": 0.5942333705922556, + "learning_rate": 3.5980789031202805e-06, + "loss": 0.2869, + "step": 15993 + }, + { + "epoch": 0.7492387689136647, + "grad_norm": 0.5993336170698286, + "learning_rate": 3.597908523219594e-06, + "loss": 0.2915, + "step": 15994 + }, + { + "epoch": 0.749285613903593, + "grad_norm": 0.5739947734231611, + "learning_rate": 3.597738137000721e-06, + "loss": 0.2702, + "step": 15995 + }, + { + "epoch": 0.7493324588935213, + "grad_norm": 0.6115424342545079, + "learning_rate": 3.5975677444646395e-06, + "loss": 0.2945, + "step": 15996 + }, + { + "epoch": 0.7493793038834496, + "grad_norm": 0.5529665001687117, + "learning_rate": 3.5973973456123316e-06, + "loss": 0.2889, + "step": 15997 + }, + { + "epoch": 0.749426148873378, + "grad_norm": 0.5112678332478936, + "learning_rate": 3.5972269404447786e-06, + "loss": 0.2654, + "step": 15998 + }, + { + "epoch": 0.7494729938633063, + "grad_norm": 0.6073409728208043, + "learning_rate": 3.59705652896296e-06, + "loss": 0.3027, + "step": 15999 + }, + { + "epoch": 0.7495198388532347, + "grad_norm": 0.6010134356832945, + "learning_rate": 3.5968861111678565e-06, + "loss": 0.2991, + "step": 16000 + }, + { + "epoch": 0.7495666838431629, + "grad_norm": 0.5929589489137215, + "learning_rate": 3.59671568706045e-06, + "loss": 0.2706, + "step": 16001 + }, + { + "epoch": 0.7496135288330913, + "grad_norm": 0.5866122353503963, + "learning_rate": 3.59654525664172e-06, + "loss": 0.2643, + "step": 16002 + }, + { + "epoch": 0.7496603738230196, + "grad_norm": 0.5785978432382626, + "learning_rate": 3.596374819912647e-06, + "loss": 0.2766, + "step": 16003 + }, + { + "epoch": 0.749707218812948, + "grad_norm": 0.6119953013764452, + "learning_rate": 3.5962043768742127e-06, + "loss": 0.3009, + "step": 16004 + }, + { + "epoch": 0.7497540638028762, + "grad_norm": 0.6998634545572455, + "learning_rate": 3.5960339275273978e-06, + "loss": 0.3254, + "step": 16005 + }, + { + "epoch": 0.7498009087928046, + "grad_norm": 0.5728440424937433, + "learning_rate": 3.5958634718731833e-06, + "loss": 0.264, + "step": 16006 + }, + { + "epoch": 0.7498477537827329, + "grad_norm": 0.6070522436378822, + "learning_rate": 3.5956930099125498e-06, + "loss": 0.2973, + "step": 16007 + }, + { + "epoch": 0.7498945987726613, + "grad_norm": 0.5740295559545391, + "learning_rate": 3.5955225416464785e-06, + "loss": 0.2887, + "step": 16008 + }, + { + "epoch": 0.7499414437625896, + "grad_norm": 0.5707865305816447, + "learning_rate": 3.5953520670759503e-06, + "loss": 0.2725, + "step": 16009 + }, + { + "epoch": 0.7499882887525179, + "grad_norm": 0.5937212014269083, + "learning_rate": 3.595181586201947e-06, + "loss": 0.2911, + "step": 16010 + }, + { + "epoch": 0.7500351337424462, + "grad_norm": 0.5684939752801427, + "learning_rate": 3.595011099025447e-06, + "loss": 0.276, + "step": 16011 + }, + { + "epoch": 0.7500819787323746, + "grad_norm": 0.5626835419783242, + "learning_rate": 3.594840605547435e-06, + "loss": 0.2805, + "step": 16012 + }, + { + "epoch": 0.7501288237223029, + "grad_norm": 0.5389350298172505, + "learning_rate": 3.5946701057688903e-06, + "loss": 0.2763, + "step": 16013 + }, + { + "epoch": 0.7501756687122312, + "grad_norm": 0.6823841674635265, + "learning_rate": 3.594499599690794e-06, + "loss": 0.3034, + "step": 16014 + }, + { + "epoch": 0.7502225137021595, + "grad_norm": 0.604606854571789, + "learning_rate": 3.5943290873141275e-06, + "loss": 0.275, + "step": 16015 + }, + { + "epoch": 0.7502693586920879, + "grad_norm": 0.5492226945852864, + "learning_rate": 3.5941585686398727e-06, + "loss": 0.2906, + "step": 16016 + }, + { + "epoch": 0.7503162036820162, + "grad_norm": 0.5927687917345751, + "learning_rate": 3.59398804366901e-06, + "loss": 0.2749, + "step": 16017 + }, + { + "epoch": 0.7503630486719446, + "grad_norm": 0.6058760834158952, + "learning_rate": 3.5938175124025204e-06, + "loss": 0.2719, + "step": 16018 + }, + { + "epoch": 0.7504098936618728, + "grad_norm": 0.6247395551046944, + "learning_rate": 3.5936469748413868e-06, + "loss": 0.2961, + "step": 16019 + }, + { + "epoch": 0.7504567386518012, + "grad_norm": 0.5781697529016837, + "learning_rate": 3.5934764309865895e-06, + "loss": 0.2756, + "step": 16020 + }, + { + "epoch": 0.7505035836417295, + "grad_norm": 0.5876842723476974, + "learning_rate": 3.5933058808391095e-06, + "loss": 0.297, + "step": 16021 + }, + { + "epoch": 0.7505504286316579, + "grad_norm": 0.7008688690252137, + "learning_rate": 3.59313532439993e-06, + "loss": 0.2871, + "step": 16022 + }, + { + "epoch": 0.7505972736215861, + "grad_norm": 0.6541209757276157, + "learning_rate": 3.5929647616700316e-06, + "loss": 0.2818, + "step": 16023 + }, + { + "epoch": 0.7506441186115145, + "grad_norm": 0.6421888465197554, + "learning_rate": 3.5927941926503945e-06, + "loss": 0.3072, + "step": 16024 + }, + { + "epoch": 0.7506909636014428, + "grad_norm": 0.5663008885085352, + "learning_rate": 3.5926236173420024e-06, + "loss": 0.2793, + "step": 16025 + }, + { + "epoch": 0.7507378085913712, + "grad_norm": 0.579885335311753, + "learning_rate": 3.5924530357458353e-06, + "loss": 0.2973, + "step": 16026 + }, + { + "epoch": 0.7507846535812995, + "grad_norm": 0.5436366093950835, + "learning_rate": 3.5922824478628766e-06, + "loss": 0.2712, + "step": 16027 + }, + { + "epoch": 0.7508314985712278, + "grad_norm": 0.6099758643096468, + "learning_rate": 3.5921118536941062e-06, + "loss": 0.2847, + "step": 16028 + }, + { + "epoch": 0.7508783435611561, + "grad_norm": 0.5461461551941732, + "learning_rate": 3.591941253240507e-06, + "loss": 0.2615, + "step": 16029 + }, + { + "epoch": 0.7509251885510845, + "grad_norm": 0.5953520878936904, + "learning_rate": 3.5917706465030597e-06, + "loss": 0.2897, + "step": 16030 + }, + { + "epoch": 0.7509720335410128, + "grad_norm": 0.5990304045348548, + "learning_rate": 3.591600033482747e-06, + "loss": 0.2685, + "step": 16031 + }, + { + "epoch": 0.7510188785309411, + "grad_norm": 0.7101304875808792, + "learning_rate": 3.5914294141805504e-06, + "loss": 0.2852, + "step": 16032 + }, + { + "epoch": 0.7510657235208694, + "grad_norm": 0.625040251561844, + "learning_rate": 3.5912587885974524e-06, + "loss": 0.2911, + "step": 16033 + }, + { + "epoch": 0.7511125685107978, + "grad_norm": 0.6212689962032798, + "learning_rate": 3.591088156734434e-06, + "loss": 0.2783, + "step": 16034 + }, + { + "epoch": 0.7511594135007261, + "grad_norm": 0.6002155393683412, + "learning_rate": 3.590917518592477e-06, + "loss": 0.2896, + "step": 16035 + }, + { + "epoch": 0.7512062584906545, + "grad_norm": 0.5721860631812639, + "learning_rate": 3.590746874172564e-06, + "loss": 0.282, + "step": 16036 + }, + { + "epoch": 0.7512531034805827, + "grad_norm": 0.5623982575177878, + "learning_rate": 3.5905762234756787e-06, + "loss": 0.2613, + "step": 16037 + }, + { + "epoch": 0.7512999484705111, + "grad_norm": 0.5931393850789038, + "learning_rate": 3.5904055665027992e-06, + "loss": 0.2711, + "step": 16038 + }, + { + "epoch": 0.7513467934604394, + "grad_norm": 0.6462002930323509, + "learning_rate": 3.59023490325491e-06, + "loss": 0.2893, + "step": 16039 + }, + { + "epoch": 0.7513936384503678, + "grad_norm": 0.584204602930029, + "learning_rate": 3.590064233732994e-06, + "loss": 0.2618, + "step": 16040 + }, + { + "epoch": 0.751440483440296, + "grad_norm": 0.6244209657230236, + "learning_rate": 3.5898935579380313e-06, + "loss": 0.2902, + "step": 16041 + }, + { + "epoch": 0.7514873284302244, + "grad_norm": 0.577063576557631, + "learning_rate": 3.5897228758710054e-06, + "loss": 0.2774, + "step": 16042 + }, + { + "epoch": 0.7515341734201527, + "grad_norm": 0.5885304054408923, + "learning_rate": 3.5895521875328987e-06, + "loss": 0.2989, + "step": 16043 + }, + { + "epoch": 0.7515810184100811, + "grad_norm": 0.6189445568137123, + "learning_rate": 3.589381492924693e-06, + "loss": 0.2649, + "step": 16044 + }, + { + "epoch": 0.7516278634000094, + "grad_norm": 0.5733129374795886, + "learning_rate": 3.5892107920473695e-06, + "loss": 0.2811, + "step": 16045 + }, + { + "epoch": 0.7516747083899377, + "grad_norm": 0.6375708241739864, + "learning_rate": 3.5890400849019126e-06, + "loss": 0.2799, + "step": 16046 + }, + { + "epoch": 0.751721553379866, + "grad_norm": 0.5539081818788938, + "learning_rate": 3.588869371489303e-06, + "loss": 0.2702, + "step": 16047 + }, + { + "epoch": 0.7517683983697944, + "grad_norm": 0.5621113523224675, + "learning_rate": 3.588698651810525e-06, + "loss": 0.2759, + "step": 16048 + }, + { + "epoch": 0.7518152433597227, + "grad_norm": 0.5802561172086582, + "learning_rate": 3.5885279258665588e-06, + "loss": 0.2766, + "step": 16049 + }, + { + "epoch": 0.751862088349651, + "grad_norm": 0.586702708823113, + "learning_rate": 3.5883571936583884e-06, + "loss": 0.2998, + "step": 16050 + }, + { + "epoch": 0.7519089333395793, + "grad_norm": 0.6032297925890134, + "learning_rate": 3.588186455186996e-06, + "loss": 0.2698, + "step": 16051 + }, + { + "epoch": 0.7519557783295077, + "grad_norm": 0.6405178312786978, + "learning_rate": 3.5880157104533636e-06, + "loss": 0.3138, + "step": 16052 + }, + { + "epoch": 0.752002623319436, + "grad_norm": 0.5861258032939775, + "learning_rate": 3.587844959458475e-06, + "loss": 0.272, + "step": 16053 + }, + { + "epoch": 0.7520494683093644, + "grad_norm": 0.5691055173483366, + "learning_rate": 3.587674202203312e-06, + "loss": 0.2838, + "step": 16054 + }, + { + "epoch": 0.7520963132992926, + "grad_norm": 0.6625237704913407, + "learning_rate": 3.5875034386888563e-06, + "loss": 0.2699, + "step": 16055 + }, + { + "epoch": 0.752143158289221, + "grad_norm": 0.6271991742098939, + "learning_rate": 3.5873326689160927e-06, + "loss": 0.2766, + "step": 16056 + }, + { + "epoch": 0.7521900032791493, + "grad_norm": 0.5852711779435331, + "learning_rate": 3.5871618928860024e-06, + "loss": 0.266, + "step": 16057 + }, + { + "epoch": 0.7522368482690777, + "grad_norm": 0.5794635772067771, + "learning_rate": 3.586991110599569e-06, + "loss": 0.2914, + "step": 16058 + }, + { + "epoch": 0.7522836932590059, + "grad_norm": 0.5680097002233615, + "learning_rate": 3.586820322057775e-06, + "loss": 0.2795, + "step": 16059 + }, + { + "epoch": 0.7523305382489343, + "grad_norm": 0.5817553413853207, + "learning_rate": 3.5866495272616026e-06, + "loss": 0.2693, + "step": 16060 + }, + { + "epoch": 0.7523773832388626, + "grad_norm": 0.5920675952484206, + "learning_rate": 3.586478726212036e-06, + "loss": 0.2809, + "step": 16061 + }, + { + "epoch": 0.752424228228791, + "grad_norm": 0.620707756067755, + "learning_rate": 3.586307918910057e-06, + "loss": 0.259, + "step": 16062 + }, + { + "epoch": 0.7524710732187193, + "grad_norm": 0.6214393858527882, + "learning_rate": 3.5861371053566492e-06, + "loss": 0.2854, + "step": 16063 + }, + { + "epoch": 0.7525179182086476, + "grad_norm": 0.5532073174287729, + "learning_rate": 3.5859662855527945e-06, + "loss": 0.2776, + "step": 16064 + }, + { + "epoch": 0.7525647631985759, + "grad_norm": 0.5688399756927729, + "learning_rate": 3.585795459499478e-06, + "loss": 0.2591, + "step": 16065 + }, + { + "epoch": 0.7526116081885043, + "grad_norm": 0.6224023606723, + "learning_rate": 3.5856246271976815e-06, + "loss": 0.2953, + "step": 16066 + }, + { + "epoch": 0.7526584531784326, + "grad_norm": 0.6135239974265123, + "learning_rate": 3.585453788648387e-06, + "loss": 0.2678, + "step": 16067 + }, + { + "epoch": 0.7527052981683608, + "grad_norm": 0.599323451677609, + "learning_rate": 3.585282943852579e-06, + "loss": 0.2745, + "step": 16068 + }, + { + "epoch": 0.7527521431582892, + "grad_norm": 0.5720976203687842, + "learning_rate": 3.5851120928112416e-06, + "loss": 0.2617, + "step": 16069 + }, + { + "epoch": 0.7527989881482176, + "grad_norm": 0.6096692963891542, + "learning_rate": 3.5849412355253556e-06, + "loss": 0.277, + "step": 16070 + }, + { + "epoch": 0.7528458331381459, + "grad_norm": 0.5446403280352587, + "learning_rate": 3.584770371995906e-06, + "loss": 0.2476, + "step": 16071 + }, + { + "epoch": 0.7528926781280743, + "grad_norm": 0.5610732179988296, + "learning_rate": 3.584599502223876e-06, + "loss": 0.2551, + "step": 16072 + }, + { + "epoch": 0.7529395231180025, + "grad_norm": 0.6623553334952351, + "learning_rate": 3.5844286262102478e-06, + "loss": 0.2855, + "step": 16073 + }, + { + "epoch": 0.7529863681079308, + "grad_norm": 0.600339367954832, + "learning_rate": 3.5842577439560057e-06, + "loss": 0.2875, + "step": 16074 + }, + { + "epoch": 0.7530332130978592, + "grad_norm": 0.571048589054025, + "learning_rate": 3.5840868554621323e-06, + "loss": 0.2746, + "step": 16075 + }, + { + "epoch": 0.7530800580877876, + "grad_norm": 0.5695259335518615, + "learning_rate": 3.583915960729612e-06, + "loss": 0.2678, + "step": 16076 + }, + { + "epoch": 0.7531269030777158, + "grad_norm": 0.6213480385126804, + "learning_rate": 3.583745059759428e-06, + "loss": 0.2908, + "step": 16077 + }, + { + "epoch": 0.7531737480676441, + "grad_norm": 0.5861629929160639, + "learning_rate": 3.583574152552563e-06, + "loss": 0.2638, + "step": 16078 + }, + { + "epoch": 0.7532205930575725, + "grad_norm": 0.586517745523365, + "learning_rate": 3.5834032391100015e-06, + "loss": 0.2914, + "step": 16079 + }, + { + "epoch": 0.7532674380475008, + "grad_norm": 0.631593884348073, + "learning_rate": 3.5832323194327266e-06, + "loss": 0.2859, + "step": 16080 + }, + { + "epoch": 0.7533142830374292, + "grad_norm": 0.5898293055258457, + "learning_rate": 3.583061393521722e-06, + "loss": 0.265, + "step": 16081 + }, + { + "epoch": 0.7533611280273574, + "grad_norm": 0.5446402619079339, + "learning_rate": 3.5828904613779716e-06, + "loss": 0.2712, + "step": 16082 + }, + { + "epoch": 0.7534079730172858, + "grad_norm": 0.5711073256031185, + "learning_rate": 3.582719523002458e-06, + "loss": 0.2756, + "step": 16083 + }, + { + "epoch": 0.7534548180072141, + "grad_norm": 0.578234544513984, + "learning_rate": 3.5825485783961656e-06, + "loss": 0.2737, + "step": 16084 + }, + { + "epoch": 0.7535016629971425, + "grad_norm": 0.6116083384822064, + "learning_rate": 3.5823776275600786e-06, + "loss": 0.2673, + "step": 16085 + }, + { + "epoch": 0.7535485079870707, + "grad_norm": 0.5364266096092927, + "learning_rate": 3.5822066704951806e-06, + "loss": 0.2487, + "step": 16086 + }, + { + "epoch": 0.7535953529769991, + "grad_norm": 0.6212809822266534, + "learning_rate": 3.5820357072024555e-06, + "loss": 0.3196, + "step": 16087 + }, + { + "epoch": 0.7536421979669274, + "grad_norm": 0.5757327533404641, + "learning_rate": 3.581864737682886e-06, + "loss": 0.2848, + "step": 16088 + }, + { + "epoch": 0.7536890429568558, + "grad_norm": 0.5866499167968123, + "learning_rate": 3.5816937619374576e-06, + "loss": 0.2877, + "step": 16089 + }, + { + "epoch": 0.7537358879467841, + "grad_norm": 0.6099114777114024, + "learning_rate": 3.5815227799671533e-06, + "loss": 0.2828, + "step": 16090 + }, + { + "epoch": 0.7537827329367124, + "grad_norm": 0.5951361060678703, + "learning_rate": 3.581351791772957e-06, + "loss": 0.2747, + "step": 16091 + }, + { + "epoch": 0.7538295779266407, + "grad_norm": 0.6375392741328699, + "learning_rate": 3.5811807973558528e-06, + "loss": 0.2848, + "step": 16092 + }, + { + "epoch": 0.7538764229165691, + "grad_norm": 0.5892341621603713, + "learning_rate": 3.5810097967168254e-06, + "loss": 0.2815, + "step": 16093 + }, + { + "epoch": 0.7539232679064974, + "grad_norm": 0.5909359269223153, + "learning_rate": 3.5808387898568573e-06, + "loss": 0.2875, + "step": 16094 + }, + { + "epoch": 0.7539701128964257, + "grad_norm": 0.5858570247857245, + "learning_rate": 3.580667776776935e-06, + "loss": 0.2766, + "step": 16095 + }, + { + "epoch": 0.754016957886354, + "grad_norm": 0.5742153801403417, + "learning_rate": 3.5804967574780403e-06, + "loss": 0.2761, + "step": 16096 + }, + { + "epoch": 0.7540638028762824, + "grad_norm": 0.6000674637656787, + "learning_rate": 3.5803257319611585e-06, + "loss": 0.2704, + "step": 16097 + }, + { + "epoch": 0.7541106478662107, + "grad_norm": 0.5531705933876073, + "learning_rate": 3.5801547002272742e-06, + "loss": 0.2696, + "step": 16098 + }, + { + "epoch": 0.7541574928561391, + "grad_norm": 0.648210927227527, + "learning_rate": 3.5799836622773697e-06, + "loss": 0.279, + "step": 16099 + }, + { + "epoch": 0.7542043378460673, + "grad_norm": 0.659334944014554, + "learning_rate": 3.579812618112432e-06, + "loss": 0.2916, + "step": 16100 + }, + { + "epoch": 0.7542511828359957, + "grad_norm": 0.6300822164089617, + "learning_rate": 3.579641567733444e-06, + "loss": 0.2883, + "step": 16101 + }, + { + "epoch": 0.754298027825924, + "grad_norm": 0.6096271396792134, + "learning_rate": 3.5794705111413898e-06, + "loss": 0.285, + "step": 16102 + }, + { + "epoch": 0.7543448728158524, + "grad_norm": 0.549068414903634, + "learning_rate": 3.579299448337254e-06, + "loss": 0.2613, + "step": 16103 + }, + { + "epoch": 0.7543917178057806, + "grad_norm": 0.5905902463855941, + "learning_rate": 3.5791283793220215e-06, + "loss": 0.2941, + "step": 16104 + }, + { + "epoch": 0.754438562795709, + "grad_norm": 0.5791587205346425, + "learning_rate": 3.578957304096676e-06, + "loss": 0.2865, + "step": 16105 + }, + { + "epoch": 0.7544854077856373, + "grad_norm": 0.6215213403856941, + "learning_rate": 3.578786222662202e-06, + "loss": 0.2763, + "step": 16106 + }, + { + "epoch": 0.7545322527755657, + "grad_norm": 0.6048041761111169, + "learning_rate": 3.5786151350195862e-06, + "loss": 0.3015, + "step": 16107 + }, + { + "epoch": 0.754579097765494, + "grad_norm": 0.5863449237186966, + "learning_rate": 3.5784440411698097e-06, + "loss": 0.2765, + "step": 16108 + }, + { + "epoch": 0.7546259427554223, + "grad_norm": 0.5353397486413867, + "learning_rate": 3.578272941113859e-06, + "loss": 0.2739, + "step": 16109 + }, + { + "epoch": 0.7546727877453506, + "grad_norm": 0.5898186714199573, + "learning_rate": 3.5781018348527193e-06, + "loss": 0.2799, + "step": 16110 + }, + { + "epoch": 0.754719632735279, + "grad_norm": 0.6788960011917791, + "learning_rate": 3.577930722387374e-06, + "loss": 0.3193, + "step": 16111 + }, + { + "epoch": 0.7547664777252073, + "grad_norm": 0.6018576205815354, + "learning_rate": 3.5777596037188082e-06, + "loss": 0.2927, + "step": 16112 + }, + { + "epoch": 0.7548133227151356, + "grad_norm": 0.6077149868471232, + "learning_rate": 3.577588478848007e-06, + "loss": 0.2829, + "step": 16113 + }, + { + "epoch": 0.7548601677050639, + "grad_norm": 0.5736507105179476, + "learning_rate": 3.5774173477759556e-06, + "loss": 0.2955, + "step": 16114 + }, + { + "epoch": 0.7549070126949923, + "grad_norm": 0.5743401431339382, + "learning_rate": 3.5772462105036364e-06, + "loss": 0.2712, + "step": 16115 + }, + { + "epoch": 0.7549538576849206, + "grad_norm": 0.6283119450348913, + "learning_rate": 3.5770750670320376e-06, + "loss": 0.3021, + "step": 16116 + }, + { + "epoch": 0.755000702674849, + "grad_norm": 0.5824866420791499, + "learning_rate": 3.576903917362142e-06, + "loss": 0.2916, + "step": 16117 + }, + { + "epoch": 0.7550475476647772, + "grad_norm": 0.5656998078134705, + "learning_rate": 3.576732761494935e-06, + "loss": 0.2703, + "step": 16118 + }, + { + "epoch": 0.7550943926547056, + "grad_norm": 0.5517498841777978, + "learning_rate": 3.576561599431402e-06, + "loss": 0.2805, + "step": 16119 + }, + { + "epoch": 0.7551412376446339, + "grad_norm": 0.564838180555526, + "learning_rate": 3.5763904311725265e-06, + "loss": 0.2788, + "step": 16120 + }, + { + "epoch": 0.7551880826345623, + "grad_norm": 0.5963747371129205, + "learning_rate": 3.5762192567192954e-06, + "loss": 0.2779, + "step": 16121 + }, + { + "epoch": 0.7552349276244905, + "grad_norm": 0.5766341509733682, + "learning_rate": 3.576048076072693e-06, + "loss": 0.2855, + "step": 16122 + }, + { + "epoch": 0.7552817726144189, + "grad_norm": 0.6032674076383179, + "learning_rate": 3.5758768892337043e-06, + "loss": 0.2798, + "step": 16123 + }, + { + "epoch": 0.7553286176043472, + "grad_norm": 0.5911166765651422, + "learning_rate": 3.5757056962033144e-06, + "loss": 0.2844, + "step": 16124 + }, + { + "epoch": 0.7553754625942756, + "grad_norm": 0.560291758268338, + "learning_rate": 3.5755344969825083e-06, + "loss": 0.2862, + "step": 16125 + }, + { + "epoch": 0.7554223075842039, + "grad_norm": 0.5808309613094684, + "learning_rate": 3.5753632915722724e-06, + "loss": 0.2894, + "step": 16126 + }, + { + "epoch": 0.7554691525741322, + "grad_norm": 0.6404454870175129, + "learning_rate": 3.57519207997359e-06, + "loss": 0.313, + "step": 16127 + }, + { + "epoch": 0.7555159975640605, + "grad_norm": 0.6063358830899029, + "learning_rate": 3.575020862187448e-06, + "loss": 0.2682, + "step": 16128 + }, + { + "epoch": 0.7555628425539889, + "grad_norm": 0.6180499697878769, + "learning_rate": 3.574849638214831e-06, + "loss": 0.2686, + "step": 16129 + }, + { + "epoch": 0.7556096875439172, + "grad_norm": 0.6122586079280127, + "learning_rate": 3.5746784080567244e-06, + "loss": 0.2934, + "step": 16130 + }, + { + "epoch": 0.7556565325338455, + "grad_norm": 0.6525414641791607, + "learning_rate": 3.5745071717141137e-06, + "loss": 0.2772, + "step": 16131 + }, + { + "epoch": 0.7557033775237738, + "grad_norm": 0.6182206525130564, + "learning_rate": 3.5743359291879846e-06, + "loss": 0.2898, + "step": 16132 + }, + { + "epoch": 0.7557502225137022, + "grad_norm": 0.5913504770844052, + "learning_rate": 3.5741646804793218e-06, + "loss": 0.2646, + "step": 16133 + }, + { + "epoch": 0.7557970675036305, + "grad_norm": 0.5575691577819043, + "learning_rate": 3.573993425589111e-06, + "loss": 0.2691, + "step": 16134 + }, + { + "epoch": 0.7558439124935589, + "grad_norm": 0.5498665321213688, + "learning_rate": 3.5738221645183396e-06, + "loss": 0.2713, + "step": 16135 + }, + { + "epoch": 0.7558907574834871, + "grad_norm": 0.555975482738851, + "learning_rate": 3.57365089726799e-06, + "loss": 0.2778, + "step": 16136 + }, + { + "epoch": 0.7559376024734155, + "grad_norm": 0.5659129487700812, + "learning_rate": 3.5734796238390497e-06, + "loss": 0.2829, + "step": 16137 + }, + { + "epoch": 0.7559844474633438, + "grad_norm": 0.577232085596838, + "learning_rate": 3.573308344232504e-06, + "loss": 0.2741, + "step": 16138 + }, + { + "epoch": 0.7560312924532722, + "grad_norm": 0.6218803705570125, + "learning_rate": 3.5731370584493384e-06, + "loss": 0.2862, + "step": 16139 + }, + { + "epoch": 0.7560781374432004, + "grad_norm": 0.5722947688924328, + "learning_rate": 3.572965766490539e-06, + "loss": 0.2653, + "step": 16140 + }, + { + "epoch": 0.7561249824331288, + "grad_norm": 0.5818304459785764, + "learning_rate": 3.5727944683570915e-06, + "loss": 0.2832, + "step": 16141 + }, + { + "epoch": 0.7561718274230571, + "grad_norm": 0.6190921075671393, + "learning_rate": 3.572623164049981e-06, + "loss": 0.2816, + "step": 16142 + }, + { + "epoch": 0.7562186724129855, + "grad_norm": 0.5682110648190759, + "learning_rate": 3.5724518535701936e-06, + "loss": 0.2582, + "step": 16143 + }, + { + "epoch": 0.7562655174029138, + "grad_norm": 0.5814448911977756, + "learning_rate": 3.572280536918716e-06, + "loss": 0.2602, + "step": 16144 + }, + { + "epoch": 0.756312362392842, + "grad_norm": 0.5772291573219168, + "learning_rate": 3.572109214096533e-06, + "loss": 0.2805, + "step": 16145 + }, + { + "epoch": 0.7563592073827704, + "grad_norm": 0.6221247890308774, + "learning_rate": 3.571937885104631e-06, + "loss": 0.2751, + "step": 16146 + }, + { + "epoch": 0.7564060523726988, + "grad_norm": 0.5581922169300597, + "learning_rate": 3.5717665499439957e-06, + "loss": 0.2779, + "step": 16147 + }, + { + "epoch": 0.7564528973626271, + "grad_norm": 0.5996199987936965, + "learning_rate": 3.5715952086156136e-06, + "loss": 0.2945, + "step": 16148 + }, + { + "epoch": 0.7564997423525553, + "grad_norm": 0.6213117079207607, + "learning_rate": 3.5714238611204704e-06, + "loss": 0.2922, + "step": 16149 + }, + { + "epoch": 0.7565465873424837, + "grad_norm": 0.5916858178298129, + "learning_rate": 3.571252507459552e-06, + "loss": 0.2787, + "step": 16150 + }, + { + "epoch": 0.756593432332412, + "grad_norm": 0.5844901091129492, + "learning_rate": 3.571081147633845e-06, + "loss": 0.2835, + "step": 16151 + }, + { + "epoch": 0.7566402773223404, + "grad_norm": 0.5647618104752339, + "learning_rate": 3.570909781644335e-06, + "loss": 0.2606, + "step": 16152 + }, + { + "epoch": 0.7566871223122688, + "grad_norm": 0.5592994294491684, + "learning_rate": 3.5707384094920083e-06, + "loss": 0.2541, + "step": 16153 + }, + { + "epoch": 0.756733967302197, + "grad_norm": 0.5666678593212424, + "learning_rate": 3.570567031177851e-06, + "loss": 0.265, + "step": 16154 + }, + { + "epoch": 0.7567808122921253, + "grad_norm": 0.606670586646136, + "learning_rate": 3.5703956467028495e-06, + "loss": 0.287, + "step": 16155 + }, + { + "epoch": 0.7568276572820537, + "grad_norm": 0.6303159840966349, + "learning_rate": 3.5702242560679914e-06, + "loss": 0.2887, + "step": 16156 + }, + { + "epoch": 0.756874502271982, + "grad_norm": 0.5415821327756524, + "learning_rate": 3.57005285927426e-06, + "loss": 0.2616, + "step": 16157 + }, + { + "epoch": 0.7569213472619103, + "grad_norm": 0.5919767887475827, + "learning_rate": 3.5698814563226437e-06, + "loss": 0.2979, + "step": 16158 + }, + { + "epoch": 0.7569681922518386, + "grad_norm": 0.6060856199804494, + "learning_rate": 3.5697100472141287e-06, + "loss": 0.2851, + "step": 16159 + }, + { + "epoch": 0.757015037241767, + "grad_norm": 0.5954163602282094, + "learning_rate": 3.5695386319497017e-06, + "loss": 0.2681, + "step": 16160 + }, + { + "epoch": 0.7570618822316953, + "grad_norm": 0.5709262638848996, + "learning_rate": 3.569367210530348e-06, + "loss": 0.2809, + "step": 16161 + }, + { + "epoch": 0.7571087272216237, + "grad_norm": 0.604828619337463, + "learning_rate": 3.569195782957055e-06, + "loss": 0.2713, + "step": 16162 + }, + { + "epoch": 0.7571555722115519, + "grad_norm": 0.6051368643312373, + "learning_rate": 3.5690243492308095e-06, + "loss": 0.2971, + "step": 16163 + }, + { + "epoch": 0.7572024172014803, + "grad_norm": 0.5643673500860458, + "learning_rate": 3.568852909352597e-06, + "loss": 0.27, + "step": 16164 + }, + { + "epoch": 0.7572492621914086, + "grad_norm": 0.5976441793035419, + "learning_rate": 3.5686814633234044e-06, + "loss": 0.3018, + "step": 16165 + }, + { + "epoch": 0.757296107181337, + "grad_norm": 0.596813232933636, + "learning_rate": 3.568510011144219e-06, + "loss": 0.2994, + "step": 16166 + }, + { + "epoch": 0.7573429521712652, + "grad_norm": 0.6034525182166526, + "learning_rate": 3.5683385528160276e-06, + "loss": 0.2921, + "step": 16167 + }, + { + "epoch": 0.7573897971611936, + "grad_norm": 0.5646746229608317, + "learning_rate": 3.5681670883398157e-06, + "loss": 0.2752, + "step": 16168 + }, + { + "epoch": 0.7574366421511219, + "grad_norm": 0.6153353984451497, + "learning_rate": 3.5679956177165705e-06, + "loss": 0.2829, + "step": 16169 + }, + { + "epoch": 0.7574834871410503, + "grad_norm": 0.6146826269247286, + "learning_rate": 3.567824140947279e-06, + "loss": 0.2891, + "step": 16170 + }, + { + "epoch": 0.7575303321309786, + "grad_norm": 0.5841227870457834, + "learning_rate": 3.5676526580329286e-06, + "loss": 0.2905, + "step": 16171 + }, + { + "epoch": 0.7575771771209069, + "grad_norm": 0.6273167154422492, + "learning_rate": 3.5674811689745047e-06, + "loss": 0.2906, + "step": 16172 + }, + { + "epoch": 0.7576240221108352, + "grad_norm": 0.5857325435908655, + "learning_rate": 3.5673096737729955e-06, + "loss": 0.2851, + "step": 16173 + }, + { + "epoch": 0.7576708671007636, + "grad_norm": 0.6106741954759449, + "learning_rate": 3.5671381724293874e-06, + "loss": 0.2931, + "step": 16174 + }, + { + "epoch": 0.7577177120906919, + "grad_norm": 0.5964272902164209, + "learning_rate": 3.5669666649446667e-06, + "loss": 0.2865, + "step": 16175 + }, + { + "epoch": 0.7577645570806202, + "grad_norm": 0.5640816082466202, + "learning_rate": 3.566795151319821e-06, + "loss": 0.275, + "step": 16176 + }, + { + "epoch": 0.7578114020705485, + "grad_norm": 0.6339648296532054, + "learning_rate": 3.566623631555839e-06, + "loss": 0.2821, + "step": 16177 + }, + { + "epoch": 0.7578582470604769, + "grad_norm": 0.6085708468186609, + "learning_rate": 3.5664521056537043e-06, + "loss": 0.2862, + "step": 16178 + }, + { + "epoch": 0.7579050920504052, + "grad_norm": 0.5727227486367701, + "learning_rate": 3.5662805736144057e-06, + "loss": 0.2902, + "step": 16179 + }, + { + "epoch": 0.7579519370403336, + "grad_norm": 0.5790190237937736, + "learning_rate": 3.5661090354389315e-06, + "loss": 0.2632, + "step": 16180 + }, + { + "epoch": 0.7579987820302618, + "grad_norm": 0.5946117087083941, + "learning_rate": 3.5659374911282672e-06, + "loss": 0.2732, + "step": 16181 + }, + { + "epoch": 0.7580456270201902, + "grad_norm": 0.5601863167548584, + "learning_rate": 3.5657659406834005e-06, + "loss": 0.27, + "step": 16182 + }, + { + "epoch": 0.7580924720101185, + "grad_norm": 0.5697770364965151, + "learning_rate": 3.5655943841053194e-06, + "loss": 0.2552, + "step": 16183 + }, + { + "epoch": 0.7581393170000469, + "grad_norm": 0.598057937291885, + "learning_rate": 3.56542282139501e-06, + "loss": 0.2811, + "step": 16184 + }, + { + "epoch": 0.7581861619899751, + "grad_norm": 0.5615128120229599, + "learning_rate": 3.5652512525534596e-06, + "loss": 0.269, + "step": 16185 + }, + { + "epoch": 0.7582330069799035, + "grad_norm": 0.6599338716338895, + "learning_rate": 3.5650796775816565e-06, + "loss": 0.2976, + "step": 16186 + }, + { + "epoch": 0.7582798519698318, + "grad_norm": 0.5917537590585764, + "learning_rate": 3.564908096480587e-06, + "loss": 0.2782, + "step": 16187 + }, + { + "epoch": 0.7583266969597602, + "grad_norm": 0.5885125628709659, + "learning_rate": 3.56473650925124e-06, + "loss": 0.2721, + "step": 16188 + }, + { + "epoch": 0.7583735419496885, + "grad_norm": 0.6050226886241287, + "learning_rate": 3.5645649158946007e-06, + "loss": 0.2799, + "step": 16189 + }, + { + "epoch": 0.7584203869396168, + "grad_norm": 0.6044090371314074, + "learning_rate": 3.564393316411659e-06, + "loss": 0.2655, + "step": 16190 + }, + { + "epoch": 0.7584672319295451, + "grad_norm": 0.6289366223529834, + "learning_rate": 3.564221710803401e-06, + "loss": 0.2849, + "step": 16191 + }, + { + "epoch": 0.7585140769194735, + "grad_norm": 0.5697902692830694, + "learning_rate": 3.564050099070814e-06, + "loss": 0.2556, + "step": 16192 + }, + { + "epoch": 0.7585609219094018, + "grad_norm": 0.6040302793825012, + "learning_rate": 3.563878481214887e-06, + "loss": 0.2895, + "step": 16193 + }, + { + "epoch": 0.7586077668993301, + "grad_norm": 0.5846101197023051, + "learning_rate": 3.5637068572366064e-06, + "loss": 0.301, + "step": 16194 + }, + { + "epoch": 0.7586546118892584, + "grad_norm": 0.5902301754671518, + "learning_rate": 3.5635352271369596e-06, + "loss": 0.2731, + "step": 16195 + }, + { + "epoch": 0.7587014568791868, + "grad_norm": 0.5979664583149993, + "learning_rate": 3.5633635909169355e-06, + "loss": 0.2846, + "step": 16196 + }, + { + "epoch": 0.7587483018691151, + "grad_norm": 0.5948536794863565, + "learning_rate": 3.5631919485775207e-06, + "loss": 0.29, + "step": 16197 + }, + { + "epoch": 0.7587951468590435, + "grad_norm": 0.6145694899428122, + "learning_rate": 3.563020300119704e-06, + "loss": 0.2829, + "step": 16198 + }, + { + "epoch": 0.7588419918489717, + "grad_norm": 0.5631404399858597, + "learning_rate": 3.5628486455444725e-06, + "loss": 0.276, + "step": 16199 + }, + { + "epoch": 0.7588888368389001, + "grad_norm": 0.6239989972243263, + "learning_rate": 3.562676984852814e-06, + "loss": 0.2889, + "step": 16200 + }, + { + "epoch": 0.7589356818288284, + "grad_norm": 0.5891113899865128, + "learning_rate": 3.562505318045717e-06, + "loss": 0.2962, + "step": 16201 + }, + { + "epoch": 0.7589825268187568, + "grad_norm": 0.6254199963335689, + "learning_rate": 3.5623336451241684e-06, + "loss": 0.2719, + "step": 16202 + }, + { + "epoch": 0.759029371808685, + "grad_norm": 0.596503208726089, + "learning_rate": 3.5621619660891573e-06, + "loss": 0.2896, + "step": 16203 + }, + { + "epoch": 0.7590762167986134, + "grad_norm": 0.6484317200898351, + "learning_rate": 3.56199028094167e-06, + "loss": 0.2839, + "step": 16204 + }, + { + "epoch": 0.7591230617885417, + "grad_norm": 0.6153281469013983, + "learning_rate": 3.5618185896826966e-06, + "loss": 0.2559, + "step": 16205 + }, + { + "epoch": 0.7591699067784701, + "grad_norm": 0.657286518810751, + "learning_rate": 3.5616468923132237e-06, + "loss": 0.2857, + "step": 16206 + }, + { + "epoch": 0.7592167517683984, + "grad_norm": 0.5651461928612688, + "learning_rate": 3.5614751888342397e-06, + "loss": 0.262, + "step": 16207 + }, + { + "epoch": 0.7592635967583267, + "grad_norm": 0.5491373876212977, + "learning_rate": 3.5613034792467328e-06, + "loss": 0.2731, + "step": 16208 + }, + { + "epoch": 0.759310441748255, + "grad_norm": 0.6268106633115117, + "learning_rate": 3.561131763551692e-06, + "loss": 0.2793, + "step": 16209 + }, + { + "epoch": 0.7593572867381834, + "grad_norm": 0.6083279268183015, + "learning_rate": 3.560960041750103e-06, + "loss": 0.273, + "step": 16210 + }, + { + "epoch": 0.7594041317281117, + "grad_norm": 0.6337585794122821, + "learning_rate": 3.5607883138429567e-06, + "loss": 0.2946, + "step": 16211 + }, + { + "epoch": 0.75945097671804, + "grad_norm": 0.5754349110791074, + "learning_rate": 3.56061657983124e-06, + "loss": 0.2633, + "step": 16212 + }, + { + "epoch": 0.7594978217079683, + "grad_norm": 0.6015246146505346, + "learning_rate": 3.5604448397159414e-06, + "loss": 0.275, + "step": 16213 + }, + { + "epoch": 0.7595446666978967, + "grad_norm": 0.6294917188058851, + "learning_rate": 3.5602730934980496e-06, + "loss": 0.2888, + "step": 16214 + }, + { + "epoch": 0.759591511687825, + "grad_norm": 0.5431668439373978, + "learning_rate": 3.5601013411785517e-06, + "loss": 0.2707, + "step": 16215 + }, + { + "epoch": 0.7596383566777534, + "grad_norm": 0.6145891433396458, + "learning_rate": 3.559929582758438e-06, + "loss": 0.2981, + "step": 16216 + }, + { + "epoch": 0.7596852016676816, + "grad_norm": 0.5941325186471358, + "learning_rate": 3.559757818238696e-06, + "loss": 0.281, + "step": 16217 + }, + { + "epoch": 0.75973204665761, + "grad_norm": 0.5911416297229579, + "learning_rate": 3.5595860476203132e-06, + "loss": 0.3005, + "step": 16218 + }, + { + "epoch": 0.7597788916475383, + "grad_norm": 0.615417786047728, + "learning_rate": 3.55941427090428e-06, + "loss": 0.2788, + "step": 16219 + }, + { + "epoch": 0.7598257366374667, + "grad_norm": 0.6203472797745686, + "learning_rate": 3.5592424880915834e-06, + "loss": 0.2782, + "step": 16220 + }, + { + "epoch": 0.7598725816273949, + "grad_norm": 0.5886002259242469, + "learning_rate": 3.5590706991832124e-06, + "loss": 0.2999, + "step": 16221 + }, + { + "epoch": 0.7599194266173233, + "grad_norm": 0.5934665828006828, + "learning_rate": 3.5588989041801565e-06, + "loss": 0.2843, + "step": 16222 + }, + { + "epoch": 0.7599662716072516, + "grad_norm": 0.6183801861995202, + "learning_rate": 3.558727103083403e-06, + "loss": 0.2895, + "step": 16223 + }, + { + "epoch": 0.76001311659718, + "grad_norm": 0.578392162256575, + "learning_rate": 3.5585552958939406e-06, + "loss": 0.2808, + "step": 16224 + }, + { + "epoch": 0.7600599615871083, + "grad_norm": 0.6291884785889544, + "learning_rate": 3.558383482612759e-06, + "loss": 0.299, + "step": 16225 + }, + { + "epoch": 0.7601068065770366, + "grad_norm": 0.6639721183603817, + "learning_rate": 3.558211663240847e-06, + "loss": 0.2899, + "step": 16226 + }, + { + "epoch": 0.7601536515669649, + "grad_norm": 0.5745660074940874, + "learning_rate": 3.5580398377791924e-06, + "loss": 0.2724, + "step": 16227 + }, + { + "epoch": 0.7602004965568933, + "grad_norm": 0.5343277836231608, + "learning_rate": 3.557868006228784e-06, + "loss": 0.2606, + "step": 16228 + }, + { + "epoch": 0.7602473415468216, + "grad_norm": 0.5759110077650613, + "learning_rate": 3.557696168590612e-06, + "loss": 0.2899, + "step": 16229 + }, + { + "epoch": 0.7602941865367498, + "grad_norm": 0.5771028073329735, + "learning_rate": 3.557524324865664e-06, + "loss": 0.2732, + "step": 16230 + }, + { + "epoch": 0.7603410315266782, + "grad_norm": 0.5938775620950735, + "learning_rate": 3.557352475054929e-06, + "loss": 0.2817, + "step": 16231 + }, + { + "epoch": 0.7603878765166066, + "grad_norm": 0.5612036387707001, + "learning_rate": 3.5571806191593965e-06, + "loss": 0.2676, + "step": 16232 + }, + { + "epoch": 0.7604347215065349, + "grad_norm": 0.6423734952937484, + "learning_rate": 3.5570087571800554e-06, + "loss": 0.2696, + "step": 16233 + }, + { + "epoch": 0.7604815664964633, + "grad_norm": 0.6110562776422198, + "learning_rate": 3.556836889117894e-06, + "loss": 0.2821, + "step": 16234 + }, + { + "epoch": 0.7605284114863915, + "grad_norm": 0.5952725620495739, + "learning_rate": 3.5566650149739025e-06, + "loss": 0.2764, + "step": 16235 + }, + { + "epoch": 0.7605752564763198, + "grad_norm": 0.636840192475055, + "learning_rate": 3.5564931347490694e-06, + "loss": 0.2675, + "step": 16236 + }, + { + "epoch": 0.7606221014662482, + "grad_norm": 0.5752737872858359, + "learning_rate": 3.5563212484443837e-06, + "loss": 0.2661, + "step": 16237 + }, + { + "epoch": 0.7606689464561766, + "grad_norm": 0.6202163291853909, + "learning_rate": 3.556149356060835e-06, + "loss": 0.2796, + "step": 16238 + }, + { + "epoch": 0.7607157914461048, + "grad_norm": 0.5997072725614626, + "learning_rate": 3.5559774575994115e-06, + "loss": 0.2946, + "step": 16239 + }, + { + "epoch": 0.7607626364360331, + "grad_norm": 0.6329788300987487, + "learning_rate": 3.5558055530611037e-06, + "loss": 0.2824, + "step": 16240 + }, + { + "epoch": 0.7608094814259615, + "grad_norm": 0.5996869094893859, + "learning_rate": 3.5556336424469007e-06, + "loss": 0.3019, + "step": 16241 + }, + { + "epoch": 0.7608563264158898, + "grad_norm": 0.5611870454544642, + "learning_rate": 3.5554617257577905e-06, + "loss": 0.2716, + "step": 16242 + }, + { + "epoch": 0.7609031714058182, + "grad_norm": 0.5320115619975463, + "learning_rate": 3.5552898029947637e-06, + "loss": 0.265, + "step": 16243 + }, + { + "epoch": 0.7609500163957464, + "grad_norm": 0.6537778187746973, + "learning_rate": 3.55511787415881e-06, + "loss": 0.3002, + "step": 16244 + }, + { + "epoch": 0.7609968613856748, + "grad_norm": 0.5309243372519022, + "learning_rate": 3.5549459392509165e-06, + "loss": 0.2635, + "step": 16245 + }, + { + "epoch": 0.7610437063756031, + "grad_norm": 0.5998378114003343, + "learning_rate": 3.554773998272075e-06, + "loss": 0.2777, + "step": 16246 + }, + { + "epoch": 0.7610905513655315, + "grad_norm": 0.6106779896432268, + "learning_rate": 3.5546020512232755e-06, + "loss": 0.2979, + "step": 16247 + }, + { + "epoch": 0.7611373963554597, + "grad_norm": 0.5743365230201293, + "learning_rate": 3.554430098105505e-06, + "loss": 0.27, + "step": 16248 + }, + { + "epoch": 0.7611842413453881, + "grad_norm": 0.5803097673419392, + "learning_rate": 3.5542581389197545e-06, + "loss": 0.287, + "step": 16249 + }, + { + "epoch": 0.7612310863353164, + "grad_norm": 0.5869358922362459, + "learning_rate": 3.5540861736670136e-06, + "loss": 0.2917, + "step": 16250 + }, + { + "epoch": 0.7612779313252448, + "grad_norm": 0.5993087699357083, + "learning_rate": 3.553914202348272e-06, + "loss": 0.2911, + "step": 16251 + }, + { + "epoch": 0.7613247763151731, + "grad_norm": 0.5465300931770316, + "learning_rate": 3.5537422249645183e-06, + "loss": 0.2725, + "step": 16252 + }, + { + "epoch": 0.7613716213051014, + "grad_norm": 0.6485637623046329, + "learning_rate": 3.553570241516743e-06, + "loss": 0.2897, + "step": 16253 + }, + { + "epoch": 0.7614184662950297, + "grad_norm": 0.6063584187081511, + "learning_rate": 3.553398252005936e-06, + "loss": 0.2922, + "step": 16254 + }, + { + "epoch": 0.7614653112849581, + "grad_norm": 0.6543356984637086, + "learning_rate": 3.553226256433087e-06, + "loss": 0.2844, + "step": 16255 + }, + { + "epoch": 0.7615121562748864, + "grad_norm": 0.632953823263304, + "learning_rate": 3.553054254799186e-06, + "loss": 0.2638, + "step": 16256 + }, + { + "epoch": 0.7615590012648147, + "grad_norm": 0.586627406747517, + "learning_rate": 3.5528822471052214e-06, + "loss": 0.2901, + "step": 16257 + }, + { + "epoch": 0.761605846254743, + "grad_norm": 0.6045438785801738, + "learning_rate": 3.5527102333521845e-06, + "loss": 0.2886, + "step": 16258 + }, + { + "epoch": 0.7616526912446714, + "grad_norm": 0.5932600489454549, + "learning_rate": 3.5525382135410646e-06, + "loss": 0.2721, + "step": 16259 + }, + { + "epoch": 0.7616995362345997, + "grad_norm": 0.5627799297688959, + "learning_rate": 3.552366187672853e-06, + "loss": 0.2845, + "step": 16260 + }, + { + "epoch": 0.7617463812245281, + "grad_norm": 0.5947410374498892, + "learning_rate": 3.552194155748537e-06, + "loss": 0.2853, + "step": 16261 + }, + { + "epoch": 0.7617932262144563, + "grad_norm": 0.6098506842020134, + "learning_rate": 3.552022117769109e-06, + "loss": 0.2887, + "step": 16262 + }, + { + "epoch": 0.7618400712043847, + "grad_norm": 0.5391588675228401, + "learning_rate": 3.5518500737355578e-06, + "loss": 0.2825, + "step": 16263 + }, + { + "epoch": 0.761886916194313, + "grad_norm": 0.5225737778511634, + "learning_rate": 3.551678023648874e-06, + "loss": 0.2613, + "step": 16264 + }, + { + "epoch": 0.7619337611842414, + "grad_norm": 0.5857147210787331, + "learning_rate": 3.551505967510047e-06, + "loss": 0.2931, + "step": 16265 + }, + { + "epoch": 0.7619806061741696, + "grad_norm": 0.589112620530884, + "learning_rate": 3.5513339053200678e-06, + "loss": 0.2788, + "step": 16266 + }, + { + "epoch": 0.762027451164098, + "grad_norm": 0.5875266313402481, + "learning_rate": 3.551161837079926e-06, + "loss": 0.2997, + "step": 16267 + }, + { + "epoch": 0.7620742961540263, + "grad_norm": 0.5638012902386333, + "learning_rate": 3.5509897627906124e-06, + "loss": 0.2875, + "step": 16268 + }, + { + "epoch": 0.7621211411439547, + "grad_norm": 0.618047152927961, + "learning_rate": 3.5508176824531172e-06, + "loss": 0.2877, + "step": 16269 + }, + { + "epoch": 0.762167986133883, + "grad_norm": 0.6393303956777462, + "learning_rate": 3.5506455960684294e-06, + "loss": 0.2967, + "step": 16270 + }, + { + "epoch": 0.7622148311238113, + "grad_norm": 0.559557731686686, + "learning_rate": 3.550473503637541e-06, + "loss": 0.2844, + "step": 16271 + }, + { + "epoch": 0.7622616761137396, + "grad_norm": 0.6063402156614107, + "learning_rate": 3.5503014051614416e-06, + "loss": 0.2809, + "step": 16272 + }, + { + "epoch": 0.762308521103668, + "grad_norm": 0.6172355762346988, + "learning_rate": 3.550129300641121e-06, + "loss": 0.3059, + "step": 16273 + }, + { + "epoch": 0.7623553660935963, + "grad_norm": 0.5879359234437209, + "learning_rate": 3.5499571900775702e-06, + "loss": 0.2805, + "step": 16274 + }, + { + "epoch": 0.7624022110835246, + "grad_norm": 0.6133770777943257, + "learning_rate": 3.549785073471781e-06, + "loss": 0.2819, + "step": 16275 + }, + { + "epoch": 0.7624490560734529, + "grad_norm": 0.5794627012974606, + "learning_rate": 3.549612950824741e-06, + "loss": 0.277, + "step": 16276 + }, + { + "epoch": 0.7624959010633813, + "grad_norm": 0.624330573739565, + "learning_rate": 3.549440822137443e-06, + "loss": 0.288, + "step": 16277 + }, + { + "epoch": 0.7625427460533096, + "grad_norm": 0.6124258293062235, + "learning_rate": 3.5492686874108767e-06, + "loss": 0.275, + "step": 16278 + }, + { + "epoch": 0.762589591043238, + "grad_norm": 0.620363289605904, + "learning_rate": 3.5490965466460333e-06, + "loss": 0.2962, + "step": 16279 + }, + { + "epoch": 0.7626364360331662, + "grad_norm": 0.6043955735681573, + "learning_rate": 3.5489243998439016e-06, + "loss": 0.2828, + "step": 16280 + }, + { + "epoch": 0.7626832810230946, + "grad_norm": 0.5627831903430527, + "learning_rate": 3.5487522470054747e-06, + "loss": 0.2757, + "step": 16281 + }, + { + "epoch": 0.7627301260130229, + "grad_norm": 0.6600148728820255, + "learning_rate": 3.5485800881317424e-06, + "loss": 0.2965, + "step": 16282 + }, + { + "epoch": 0.7627769710029513, + "grad_norm": 0.5523793046392632, + "learning_rate": 3.5484079232236944e-06, + "loss": 0.2696, + "step": 16283 + }, + { + "epoch": 0.7628238159928795, + "grad_norm": 0.6098165651612741, + "learning_rate": 3.548235752282323e-06, + "loss": 0.3017, + "step": 16284 + }, + { + "epoch": 0.7628706609828079, + "grad_norm": 0.6135754986171461, + "learning_rate": 3.548063575308618e-06, + "loss": 0.2686, + "step": 16285 + }, + { + "epoch": 0.7629175059727362, + "grad_norm": 0.5861558214978075, + "learning_rate": 3.5478913923035707e-06, + "loss": 0.2845, + "step": 16286 + }, + { + "epoch": 0.7629643509626646, + "grad_norm": 0.5871019752160693, + "learning_rate": 3.5477192032681717e-06, + "loss": 0.2774, + "step": 16287 + }, + { + "epoch": 0.7630111959525929, + "grad_norm": 0.5562983066374798, + "learning_rate": 3.5475470082034118e-06, + "loss": 0.2688, + "step": 16288 + }, + { + "epoch": 0.7630580409425212, + "grad_norm": 0.5987747478210154, + "learning_rate": 3.5473748071102827e-06, + "loss": 0.3093, + "step": 16289 + }, + { + "epoch": 0.7631048859324495, + "grad_norm": 0.6105719991452535, + "learning_rate": 3.547202599989775e-06, + "loss": 0.2826, + "step": 16290 + }, + { + "epoch": 0.7631517309223779, + "grad_norm": 0.553829741822328, + "learning_rate": 3.5470303868428787e-06, + "loss": 0.2617, + "step": 16291 + }, + { + "epoch": 0.7631985759123062, + "grad_norm": 0.5640569781602804, + "learning_rate": 3.546858167670586e-06, + "loss": 0.2784, + "step": 16292 + }, + { + "epoch": 0.7632454209022345, + "grad_norm": 0.5589871425423341, + "learning_rate": 3.546685942473888e-06, + "loss": 0.2632, + "step": 16293 + }, + { + "epoch": 0.7632922658921628, + "grad_norm": 0.5605658853807014, + "learning_rate": 3.546513711253775e-06, + "loss": 0.2857, + "step": 16294 + }, + { + "epoch": 0.7633391108820912, + "grad_norm": 0.6081199176170475, + "learning_rate": 3.5463414740112386e-06, + "loss": 0.3006, + "step": 16295 + }, + { + "epoch": 0.7633859558720195, + "grad_norm": 0.6007036065634054, + "learning_rate": 3.5461692307472707e-06, + "loss": 0.2779, + "step": 16296 + }, + { + "epoch": 0.7634328008619479, + "grad_norm": 0.5911233666555269, + "learning_rate": 3.5459969814628615e-06, + "loss": 0.2947, + "step": 16297 + }, + { + "epoch": 0.7634796458518761, + "grad_norm": 0.5672058328158971, + "learning_rate": 3.5458247261590018e-06, + "loss": 0.294, + "step": 16298 + }, + { + "epoch": 0.7635264908418045, + "grad_norm": 0.5680547749656457, + "learning_rate": 3.545652464836684e-06, + "loss": 0.2634, + "step": 16299 + }, + { + "epoch": 0.7635733358317328, + "grad_norm": 0.6242354219909723, + "learning_rate": 3.5454801974969e-06, + "loss": 0.2779, + "step": 16300 + }, + { + "epoch": 0.7636201808216612, + "grad_norm": 0.5738304710327033, + "learning_rate": 3.545307924140639e-06, + "loss": 0.2914, + "step": 16301 + }, + { + "epoch": 0.7636670258115894, + "grad_norm": 0.5867346631888817, + "learning_rate": 3.5451356447688944e-06, + "loss": 0.2855, + "step": 16302 + }, + { + "epoch": 0.7637138708015178, + "grad_norm": 0.5493381926820623, + "learning_rate": 3.544963359382657e-06, + "loss": 0.2883, + "step": 16303 + }, + { + "epoch": 0.7637607157914461, + "grad_norm": 0.5904436436137779, + "learning_rate": 3.544791067982918e-06, + "loss": 0.3078, + "step": 16304 + }, + { + "epoch": 0.7638075607813745, + "grad_norm": 0.5783400524867223, + "learning_rate": 3.5446187705706686e-06, + "loss": 0.2645, + "step": 16305 + }, + { + "epoch": 0.7638544057713028, + "grad_norm": 0.6237154422145269, + "learning_rate": 3.5444464671469005e-06, + "loss": 0.2839, + "step": 16306 + }, + { + "epoch": 0.763901250761231, + "grad_norm": 0.5697872836698121, + "learning_rate": 3.544274157712606e-06, + "loss": 0.2529, + "step": 16307 + }, + { + "epoch": 0.7639480957511594, + "grad_norm": 0.6128433991523149, + "learning_rate": 3.544101842268777e-06, + "loss": 0.2927, + "step": 16308 + }, + { + "epoch": 0.7639949407410878, + "grad_norm": 0.586381761761036, + "learning_rate": 3.5439295208164025e-06, + "loss": 0.2985, + "step": 16309 + }, + { + "epoch": 0.7640417857310161, + "grad_norm": 0.5944675415974129, + "learning_rate": 3.5437571933564775e-06, + "loss": 0.2797, + "step": 16310 + }, + { + "epoch": 0.7640886307209444, + "grad_norm": 0.6242584960049856, + "learning_rate": 3.543584859889991e-06, + "loss": 0.2921, + "step": 16311 + }, + { + "epoch": 0.7641354757108727, + "grad_norm": 0.5747064028387694, + "learning_rate": 3.543412520417937e-06, + "loss": 0.2923, + "step": 16312 + }, + { + "epoch": 0.764182320700801, + "grad_norm": 0.5736299232499924, + "learning_rate": 3.543240174941306e-06, + "loss": 0.2905, + "step": 16313 + }, + { + "epoch": 0.7642291656907294, + "grad_norm": 0.6119964758950482, + "learning_rate": 3.5430678234610894e-06, + "loss": 0.2908, + "step": 16314 + }, + { + "epoch": 0.7642760106806578, + "grad_norm": 0.5632548112215724, + "learning_rate": 3.54289546597828e-06, + "loss": 0.2676, + "step": 16315 + }, + { + "epoch": 0.764322855670586, + "grad_norm": 0.5936918720300581, + "learning_rate": 3.542723102493869e-06, + "loss": 0.2713, + "step": 16316 + }, + { + "epoch": 0.7643697006605144, + "grad_norm": 0.6362005612537538, + "learning_rate": 3.5425507330088495e-06, + "loss": 0.2839, + "step": 16317 + }, + { + "epoch": 0.7644165456504427, + "grad_norm": 0.5497519749009726, + "learning_rate": 3.5423783575242117e-06, + "loss": 0.257, + "step": 16318 + }, + { + "epoch": 0.764463390640371, + "grad_norm": 0.5597021604753276, + "learning_rate": 3.5422059760409483e-06, + "loss": 0.2615, + "step": 16319 + }, + { + "epoch": 0.7645102356302993, + "grad_norm": 0.6250838176200816, + "learning_rate": 3.5420335885600523e-06, + "loss": 0.3003, + "step": 16320 + }, + { + "epoch": 0.7645570806202276, + "grad_norm": 0.6081472057021118, + "learning_rate": 3.5418611950825144e-06, + "loss": 0.2696, + "step": 16321 + }, + { + "epoch": 0.764603925610156, + "grad_norm": 0.6319001752128238, + "learning_rate": 3.541688795609327e-06, + "loss": 0.2894, + "step": 16322 + }, + { + "epoch": 0.7646507706000844, + "grad_norm": 0.5803177904217586, + "learning_rate": 3.5415163901414827e-06, + "loss": 0.2765, + "step": 16323 + }, + { + "epoch": 0.7646976155900127, + "grad_norm": 0.5928582092838466, + "learning_rate": 3.5413439786799734e-06, + "loss": 0.2851, + "step": 16324 + }, + { + "epoch": 0.7647444605799409, + "grad_norm": 0.6479879850907369, + "learning_rate": 3.541171561225791e-06, + "loss": 0.2995, + "step": 16325 + }, + { + "epoch": 0.7647913055698693, + "grad_norm": 0.5883678127806843, + "learning_rate": 3.540999137779928e-06, + "loss": 0.2815, + "step": 16326 + }, + { + "epoch": 0.7648381505597976, + "grad_norm": 0.6159884228264778, + "learning_rate": 3.5408267083433765e-06, + "loss": 0.2762, + "step": 16327 + }, + { + "epoch": 0.764884995549726, + "grad_norm": 0.6491737338792722, + "learning_rate": 3.5406542729171296e-06, + "loss": 0.297, + "step": 16328 + }, + { + "epoch": 0.7649318405396542, + "grad_norm": 0.5898436242494235, + "learning_rate": 3.5404818315021784e-06, + "loss": 0.2605, + "step": 16329 + }, + { + "epoch": 0.7649786855295826, + "grad_norm": 0.6105254843647102, + "learning_rate": 3.5403093840995157e-06, + "loss": 0.2836, + "step": 16330 + }, + { + "epoch": 0.7650255305195109, + "grad_norm": 0.6038023496768942, + "learning_rate": 3.540136930710134e-06, + "loss": 0.2813, + "step": 16331 + }, + { + "epoch": 0.7650723755094393, + "grad_norm": 0.5772305261575474, + "learning_rate": 3.5399644713350256e-06, + "loss": 0.2723, + "step": 16332 + }, + { + "epoch": 0.7651192204993676, + "grad_norm": 0.5316112214087082, + "learning_rate": 3.539792005975183e-06, + "loss": 0.2549, + "step": 16333 + }, + { + "epoch": 0.7651660654892959, + "grad_norm": 0.5691438547544675, + "learning_rate": 3.5396195346315997e-06, + "loss": 0.2609, + "step": 16334 + }, + { + "epoch": 0.7652129104792242, + "grad_norm": 0.5529411799533916, + "learning_rate": 3.5394470573052663e-06, + "loss": 0.2845, + "step": 16335 + }, + { + "epoch": 0.7652597554691526, + "grad_norm": 0.6301601272006334, + "learning_rate": 3.539274573997177e-06, + "loss": 0.2986, + "step": 16336 + }, + { + "epoch": 0.7653066004590809, + "grad_norm": 0.6477015595019203, + "learning_rate": 3.5391020847083225e-06, + "loss": 0.3312, + "step": 16337 + }, + { + "epoch": 0.7653534454490092, + "grad_norm": 0.5718043674144366, + "learning_rate": 3.538929589439698e-06, + "loss": 0.2822, + "step": 16338 + }, + { + "epoch": 0.7654002904389375, + "grad_norm": 0.6339496423394838, + "learning_rate": 3.5387570881922944e-06, + "loss": 0.2877, + "step": 16339 + }, + { + "epoch": 0.7654471354288659, + "grad_norm": 0.6075686933100902, + "learning_rate": 3.5385845809671044e-06, + "loss": 0.2862, + "step": 16340 + }, + { + "epoch": 0.7654939804187942, + "grad_norm": 0.5599961947870339, + "learning_rate": 3.5384120677651216e-06, + "loss": 0.2745, + "step": 16341 + }, + { + "epoch": 0.7655408254087226, + "grad_norm": 0.5774933987172984, + "learning_rate": 3.5382395485873387e-06, + "loss": 0.2601, + "step": 16342 + }, + { + "epoch": 0.7655876703986508, + "grad_norm": 0.609894573698071, + "learning_rate": 3.5380670234347477e-06, + "loss": 0.2839, + "step": 16343 + }, + { + "epoch": 0.7656345153885792, + "grad_norm": 0.5795581470514112, + "learning_rate": 3.5378944923083416e-06, + "loss": 0.2855, + "step": 16344 + }, + { + "epoch": 0.7656813603785075, + "grad_norm": 0.5989191444721161, + "learning_rate": 3.537721955209115e-06, + "loss": 0.2708, + "step": 16345 + }, + { + "epoch": 0.7657282053684359, + "grad_norm": 0.6780775426736941, + "learning_rate": 3.537549412138057e-06, + "loss": 0.3017, + "step": 16346 + }, + { + "epoch": 0.7657750503583641, + "grad_norm": 0.6575115774130982, + "learning_rate": 3.5373768630961643e-06, + "loss": 0.3098, + "step": 16347 + }, + { + "epoch": 0.7658218953482925, + "grad_norm": 0.5773044546047053, + "learning_rate": 3.537204308084428e-06, + "loss": 0.2799, + "step": 16348 + }, + { + "epoch": 0.7658687403382208, + "grad_norm": 0.6474646455733051, + "learning_rate": 3.5370317471038423e-06, + "loss": 0.2924, + "step": 16349 + }, + { + "epoch": 0.7659155853281492, + "grad_norm": 0.6200185940116881, + "learning_rate": 3.5368591801553993e-06, + "loss": 0.3141, + "step": 16350 + }, + { + "epoch": 0.7659624303180775, + "grad_norm": 0.5911906736978104, + "learning_rate": 3.5366866072400925e-06, + "loss": 0.2952, + "step": 16351 + }, + { + "epoch": 0.7660092753080058, + "grad_norm": 0.5572869295393581, + "learning_rate": 3.5365140283589145e-06, + "loss": 0.2794, + "step": 16352 + }, + { + "epoch": 0.7660561202979341, + "grad_norm": 0.6522937316663504, + "learning_rate": 3.5363414435128586e-06, + "loss": 0.2925, + "step": 16353 + }, + { + "epoch": 0.7661029652878625, + "grad_norm": 0.5944910647635601, + "learning_rate": 3.5361688527029186e-06, + "loss": 0.2737, + "step": 16354 + }, + { + "epoch": 0.7661498102777908, + "grad_norm": 0.6102680796373158, + "learning_rate": 3.5359962559300864e-06, + "loss": 0.2662, + "step": 16355 + }, + { + "epoch": 0.7661966552677191, + "grad_norm": 0.5944160885035202, + "learning_rate": 3.535823653195357e-06, + "loss": 0.2879, + "step": 16356 + }, + { + "epoch": 0.7662435002576474, + "grad_norm": 0.5643295099208154, + "learning_rate": 3.5356510444997233e-06, + "loss": 0.259, + "step": 16357 + }, + { + "epoch": 0.7662903452475758, + "grad_norm": 0.6110510157289689, + "learning_rate": 3.535478429844177e-06, + "loss": 0.2615, + "step": 16358 + }, + { + "epoch": 0.7663371902375041, + "grad_norm": 0.6416175123411012, + "learning_rate": 3.5353058092297133e-06, + "loss": 0.2934, + "step": 16359 + }, + { + "epoch": 0.7663840352274325, + "grad_norm": 0.6101069732035765, + "learning_rate": 3.535133182657325e-06, + "loss": 0.2811, + "step": 16360 + }, + { + "epoch": 0.7664308802173607, + "grad_norm": 0.5568691885254581, + "learning_rate": 3.5349605501280048e-06, + "loss": 0.2749, + "step": 16361 + }, + { + "epoch": 0.7664777252072891, + "grad_norm": 0.5682989771460029, + "learning_rate": 3.534787911642747e-06, + "loss": 0.3022, + "step": 16362 + }, + { + "epoch": 0.7665245701972174, + "grad_norm": 0.5732029065175174, + "learning_rate": 3.534615267202545e-06, + "loss": 0.3056, + "step": 16363 + }, + { + "epoch": 0.7665714151871458, + "grad_norm": 0.6033581963311377, + "learning_rate": 3.5344426168083917e-06, + "loss": 0.2974, + "step": 16364 + }, + { + "epoch": 0.766618260177074, + "grad_norm": 0.5862260169545225, + "learning_rate": 3.534269960461281e-06, + "loss": 0.2494, + "step": 16365 + }, + { + "epoch": 0.7666651051670024, + "grad_norm": 0.6037666494233458, + "learning_rate": 3.534097298162208e-06, + "loss": 0.2922, + "step": 16366 + }, + { + "epoch": 0.7667119501569307, + "grad_norm": 0.629821516486537, + "learning_rate": 3.533924629912164e-06, + "loss": 0.2929, + "step": 16367 + }, + { + "epoch": 0.7667587951468591, + "grad_norm": 0.5828510161549448, + "learning_rate": 3.5337519557121436e-06, + "loss": 0.2944, + "step": 16368 + }, + { + "epoch": 0.7668056401367874, + "grad_norm": 0.5820910799280328, + "learning_rate": 3.5335792755631404e-06, + "loss": 0.2658, + "step": 16369 + }, + { + "epoch": 0.7668524851267157, + "grad_norm": 0.6180705908074284, + "learning_rate": 3.5334065894661485e-06, + "loss": 0.3078, + "step": 16370 + }, + { + "epoch": 0.766899330116644, + "grad_norm": 0.569636503340534, + "learning_rate": 3.5332338974221616e-06, + "loss": 0.2792, + "step": 16371 + }, + { + "epoch": 0.7669461751065724, + "grad_norm": 0.5931337389448665, + "learning_rate": 3.533061199432173e-06, + "loss": 0.2564, + "step": 16372 + }, + { + "epoch": 0.7669930200965007, + "grad_norm": 0.5838133306266896, + "learning_rate": 3.532888495497177e-06, + "loss": 0.283, + "step": 16373 + }, + { + "epoch": 0.767039865086429, + "grad_norm": 0.6211118192661524, + "learning_rate": 3.5327157856181672e-06, + "loss": 0.2787, + "step": 16374 + }, + { + "epoch": 0.7670867100763573, + "grad_norm": 0.5688418603786515, + "learning_rate": 3.5325430697961376e-06, + "loss": 0.2721, + "step": 16375 + }, + { + "epoch": 0.7671335550662857, + "grad_norm": 0.5783424216865715, + "learning_rate": 3.532370348032082e-06, + "loss": 0.2639, + "step": 16376 + }, + { + "epoch": 0.767180400056214, + "grad_norm": 0.5755477129451887, + "learning_rate": 3.5321976203269955e-06, + "loss": 0.2738, + "step": 16377 + }, + { + "epoch": 0.7672272450461424, + "grad_norm": 0.6416322418892969, + "learning_rate": 3.53202488668187e-06, + "loss": 0.2855, + "step": 16378 + }, + { + "epoch": 0.7672740900360706, + "grad_norm": 0.5622927028439825, + "learning_rate": 3.5318521470977008e-06, + "loss": 0.2826, + "step": 16379 + }, + { + "epoch": 0.767320935025999, + "grad_norm": 0.5528475470809706, + "learning_rate": 3.5316794015754824e-06, + "loss": 0.279, + "step": 16380 + }, + { + "epoch": 0.7673677800159273, + "grad_norm": 0.5827939637918326, + "learning_rate": 3.531506650116208e-06, + "loss": 0.282, + "step": 16381 + }, + { + "epoch": 0.7674146250058557, + "grad_norm": 0.5692377067066929, + "learning_rate": 3.5313338927208717e-06, + "loss": 0.2758, + "step": 16382 + }, + { + "epoch": 0.7674614699957839, + "grad_norm": 0.6616575312037621, + "learning_rate": 3.531161129390469e-06, + "loss": 0.2992, + "step": 16383 + }, + { + "epoch": 0.7675083149857123, + "grad_norm": 0.5977106838628159, + "learning_rate": 3.530988360125993e-06, + "loss": 0.2821, + "step": 16384 + }, + { + "epoch": 0.7675551599756406, + "grad_norm": 0.5745826612976213, + "learning_rate": 3.5308155849284374e-06, + "loss": 0.2827, + "step": 16385 + }, + { + "epoch": 0.767602004965569, + "grad_norm": 0.569042895106781, + "learning_rate": 3.530642803798797e-06, + "loss": 0.2791, + "step": 16386 + }, + { + "epoch": 0.7676488499554973, + "grad_norm": 0.6151704803418504, + "learning_rate": 3.530470016738068e-06, + "loss": 0.2957, + "step": 16387 + }, + { + "epoch": 0.7676956949454256, + "grad_norm": 0.5799438142355703, + "learning_rate": 3.5302972237472415e-06, + "loss": 0.2676, + "step": 16388 + }, + { + "epoch": 0.7677425399353539, + "grad_norm": 0.5704500372076872, + "learning_rate": 3.5301244248273136e-06, + "loss": 0.2794, + "step": 16389 + }, + { + "epoch": 0.7677893849252823, + "grad_norm": 0.6418606957610007, + "learning_rate": 3.529951619979279e-06, + "loss": 0.2829, + "step": 16390 + }, + { + "epoch": 0.7678362299152106, + "grad_norm": 0.6012021009807131, + "learning_rate": 3.5297788092041317e-06, + "loss": 0.2679, + "step": 16391 + }, + { + "epoch": 0.7678830749051389, + "grad_norm": 0.658677408768006, + "learning_rate": 3.529605992502866e-06, + "loss": 0.2999, + "step": 16392 + }, + { + "epoch": 0.7679299198950672, + "grad_norm": 0.6128802419358104, + "learning_rate": 3.529433169876476e-06, + "loss": 0.2745, + "step": 16393 + }, + { + "epoch": 0.7679767648849956, + "grad_norm": 0.5681413651446828, + "learning_rate": 3.529260341325958e-06, + "loss": 0.2579, + "step": 16394 + }, + { + "epoch": 0.7680236098749239, + "grad_norm": 0.5691442627003197, + "learning_rate": 3.5290875068523045e-06, + "loss": 0.2844, + "step": 16395 + }, + { + "epoch": 0.7680704548648523, + "grad_norm": 0.6225855062464124, + "learning_rate": 3.528914666456511e-06, + "loss": 0.2666, + "step": 16396 + }, + { + "epoch": 0.7681172998547805, + "grad_norm": 0.6141992224849603, + "learning_rate": 3.528741820139573e-06, + "loss": 0.2809, + "step": 16397 + }, + { + "epoch": 0.7681641448447089, + "grad_norm": 0.5758669568792395, + "learning_rate": 3.528568967902484e-06, + "loss": 0.268, + "step": 16398 + }, + { + "epoch": 0.7682109898346372, + "grad_norm": 0.6351616264815898, + "learning_rate": 3.5283961097462396e-06, + "loss": 0.3052, + "step": 16399 + }, + { + "epoch": 0.7682578348245656, + "grad_norm": 0.5566112863116589, + "learning_rate": 3.5282232456718334e-06, + "loss": 0.2621, + "step": 16400 + }, + { + "epoch": 0.7683046798144938, + "grad_norm": 0.5992332959139329, + "learning_rate": 3.528050375680261e-06, + "loss": 0.2955, + "step": 16401 + }, + { + "epoch": 0.7683515248044221, + "grad_norm": 0.6795386324599028, + "learning_rate": 3.527877499772517e-06, + "loss": 0.2929, + "step": 16402 + }, + { + "epoch": 0.7683983697943505, + "grad_norm": 0.5987616286972344, + "learning_rate": 3.5277046179495966e-06, + "loss": 0.2733, + "step": 16403 + }, + { + "epoch": 0.7684452147842789, + "grad_norm": 0.525032726302045, + "learning_rate": 3.5275317302124945e-06, + "loss": 0.2606, + "step": 16404 + }, + { + "epoch": 0.7684920597742072, + "grad_norm": 0.5933176989949124, + "learning_rate": 3.5273588365622046e-06, + "loss": 0.2868, + "step": 16405 + }, + { + "epoch": 0.7685389047641354, + "grad_norm": 0.599860602991573, + "learning_rate": 3.5271859369997243e-06, + "loss": 0.2839, + "step": 16406 + }, + { + "epoch": 0.7685857497540638, + "grad_norm": 0.5966674568268427, + "learning_rate": 3.527013031526046e-06, + "loss": 0.2805, + "step": 16407 + }, + { + "epoch": 0.7686325947439921, + "grad_norm": 0.6190993698495129, + "learning_rate": 3.5268401201421665e-06, + "loss": 0.2994, + "step": 16408 + }, + { + "epoch": 0.7686794397339205, + "grad_norm": 0.6215438047838941, + "learning_rate": 3.52666720284908e-06, + "loss": 0.2695, + "step": 16409 + }, + { + "epoch": 0.7687262847238487, + "grad_norm": 0.5386517514416991, + "learning_rate": 3.526494279647781e-06, + "loss": 0.2869, + "step": 16410 + }, + { + "epoch": 0.7687731297137771, + "grad_norm": 0.540540159161526, + "learning_rate": 3.5263213505392662e-06, + "loss": 0.2696, + "step": 16411 + }, + { + "epoch": 0.7688199747037054, + "grad_norm": 0.5643534846975701, + "learning_rate": 3.52614841552453e-06, + "loss": 0.2852, + "step": 16412 + }, + { + "epoch": 0.7688668196936338, + "grad_norm": 0.5948405716624174, + "learning_rate": 3.525975474604567e-06, + "loss": 0.2867, + "step": 16413 + }, + { + "epoch": 0.7689136646835621, + "grad_norm": 0.615235784275659, + "learning_rate": 3.5258025277803737e-06, + "loss": 0.2753, + "step": 16414 + }, + { + "epoch": 0.7689605096734904, + "grad_norm": 0.5726044455482401, + "learning_rate": 3.525629575052945e-06, + "loss": 0.2789, + "step": 16415 + }, + { + "epoch": 0.7690073546634187, + "grad_norm": 0.6170511465691679, + "learning_rate": 3.525456616423275e-06, + "loss": 0.2789, + "step": 16416 + }, + { + "epoch": 0.7690541996533471, + "grad_norm": 0.6174422981526884, + "learning_rate": 3.52528365189236e-06, + "loss": 0.2803, + "step": 16417 + }, + { + "epoch": 0.7691010446432754, + "grad_norm": 0.593660020960463, + "learning_rate": 3.525110681461196e-06, + "loss": 0.2774, + "step": 16418 + }, + { + "epoch": 0.7691478896332037, + "grad_norm": 0.6232294109697837, + "learning_rate": 3.524937705130777e-06, + "loss": 0.3132, + "step": 16419 + }, + { + "epoch": 0.769194734623132, + "grad_norm": 0.6206400931232496, + "learning_rate": 3.524764722902099e-06, + "loss": 0.279, + "step": 16420 + }, + { + "epoch": 0.7692415796130604, + "grad_norm": 0.5769573352394491, + "learning_rate": 3.5245917347761583e-06, + "loss": 0.267, + "step": 16421 + }, + { + "epoch": 0.7692884246029887, + "grad_norm": 0.6259405809393971, + "learning_rate": 3.5244187407539494e-06, + "loss": 0.2685, + "step": 16422 + }, + { + "epoch": 0.7693352695929171, + "grad_norm": 0.594997837416354, + "learning_rate": 3.524245740836468e-06, + "loss": 0.2709, + "step": 16423 + }, + { + "epoch": 0.7693821145828453, + "grad_norm": 0.6389897375466269, + "learning_rate": 3.5240727350247094e-06, + "loss": 0.3012, + "step": 16424 + }, + { + "epoch": 0.7694289595727737, + "grad_norm": 0.6151748076582054, + "learning_rate": 3.5238997233196696e-06, + "loss": 0.2744, + "step": 16425 + }, + { + "epoch": 0.769475804562702, + "grad_norm": 0.5912558861454853, + "learning_rate": 3.523726705722345e-06, + "loss": 0.2628, + "step": 16426 + }, + { + "epoch": 0.7695226495526304, + "grad_norm": 0.6297498115022246, + "learning_rate": 3.52355368223373e-06, + "loss": 0.2737, + "step": 16427 + }, + { + "epoch": 0.7695694945425586, + "grad_norm": 0.6244592688249363, + "learning_rate": 3.5233806528548207e-06, + "loss": 0.294, + "step": 16428 + }, + { + "epoch": 0.769616339532487, + "grad_norm": 0.5752794627324838, + "learning_rate": 3.5232076175866135e-06, + "loss": 0.2634, + "step": 16429 + }, + { + "epoch": 0.7696631845224153, + "grad_norm": 0.6069805162347135, + "learning_rate": 3.523034576430104e-06, + "loss": 0.282, + "step": 16430 + }, + { + "epoch": 0.7697100295123437, + "grad_norm": 0.6742412397495656, + "learning_rate": 3.5228615293862867e-06, + "loss": 0.2777, + "step": 16431 + }, + { + "epoch": 0.769756874502272, + "grad_norm": 0.5752964270540746, + "learning_rate": 3.5226884764561585e-06, + "loss": 0.2869, + "step": 16432 + }, + { + "epoch": 0.7698037194922003, + "grad_norm": 0.592035928875969, + "learning_rate": 3.522515417640716e-06, + "loss": 0.2881, + "step": 16433 + }, + { + "epoch": 0.7698505644821286, + "grad_norm": 0.5680166057638248, + "learning_rate": 3.5223423529409533e-06, + "loss": 0.29, + "step": 16434 + }, + { + "epoch": 0.769897409472057, + "grad_norm": 0.5752523094774872, + "learning_rate": 3.5221692823578673e-06, + "loss": 0.2709, + "step": 16435 + }, + { + "epoch": 0.7699442544619853, + "grad_norm": 0.6230582499249202, + "learning_rate": 3.521996205892455e-06, + "loss": 0.2887, + "step": 16436 + }, + { + "epoch": 0.7699910994519136, + "grad_norm": 0.586504104511945, + "learning_rate": 3.52182312354571e-06, + "loss": 0.2798, + "step": 16437 + }, + { + "epoch": 0.7700379444418419, + "grad_norm": 0.5971872008866026, + "learning_rate": 3.5216500353186307e-06, + "loss": 0.2986, + "step": 16438 + }, + { + "epoch": 0.7700847894317703, + "grad_norm": 0.5199272231933836, + "learning_rate": 3.521476941212212e-06, + "loss": 0.277, + "step": 16439 + }, + { + "epoch": 0.7701316344216986, + "grad_norm": 0.61042179982888, + "learning_rate": 3.5213038412274503e-06, + "loss": 0.2965, + "step": 16440 + }, + { + "epoch": 0.770178479411627, + "grad_norm": 0.6524081646740642, + "learning_rate": 3.5211307353653417e-06, + "loss": 0.3096, + "step": 16441 + }, + { + "epoch": 0.7702253244015552, + "grad_norm": 0.5419205213239959, + "learning_rate": 3.5209576236268827e-06, + "loss": 0.2607, + "step": 16442 + }, + { + "epoch": 0.7702721693914836, + "grad_norm": 0.571450982865234, + "learning_rate": 3.5207845060130686e-06, + "loss": 0.2698, + "step": 16443 + }, + { + "epoch": 0.7703190143814119, + "grad_norm": 0.6186229917170529, + "learning_rate": 3.520611382524896e-06, + "loss": 0.2826, + "step": 16444 + }, + { + "epoch": 0.7703658593713403, + "grad_norm": 0.595947387855867, + "learning_rate": 3.5204382531633625e-06, + "loss": 0.2708, + "step": 16445 + }, + { + "epoch": 0.7704127043612685, + "grad_norm": 0.5632860730359772, + "learning_rate": 3.5202651179294624e-06, + "loss": 0.2688, + "step": 16446 + }, + { + "epoch": 0.7704595493511969, + "grad_norm": 0.5820635411863626, + "learning_rate": 3.5200919768241932e-06, + "loss": 0.2773, + "step": 16447 + }, + { + "epoch": 0.7705063943411252, + "grad_norm": 0.5787032756453376, + "learning_rate": 3.5199188298485516e-06, + "loss": 0.2745, + "step": 16448 + }, + { + "epoch": 0.7705532393310536, + "grad_norm": 0.6371859443599076, + "learning_rate": 3.5197456770035325e-06, + "loss": 0.275, + "step": 16449 + }, + { + "epoch": 0.7706000843209819, + "grad_norm": 0.5556028071957713, + "learning_rate": 3.519572518290134e-06, + "loss": 0.2744, + "step": 16450 + }, + { + "epoch": 0.7706469293109102, + "grad_norm": 0.5372628048120438, + "learning_rate": 3.5193993537093518e-06, + "loss": 0.2709, + "step": 16451 + }, + { + "epoch": 0.7706937743008385, + "grad_norm": 0.5681890811516791, + "learning_rate": 3.519226183262182e-06, + "loss": 0.2709, + "step": 16452 + }, + { + "epoch": 0.7707406192907669, + "grad_norm": 0.5796402139168091, + "learning_rate": 3.5190530069496225e-06, + "loss": 0.2768, + "step": 16453 + }, + { + "epoch": 0.7707874642806952, + "grad_norm": 0.6003108529568775, + "learning_rate": 3.518879824772669e-06, + "loss": 0.2888, + "step": 16454 + }, + { + "epoch": 0.7708343092706235, + "grad_norm": 0.6370785903440195, + "learning_rate": 3.5187066367323174e-06, + "loss": 0.2739, + "step": 16455 + }, + { + "epoch": 0.7708811542605518, + "grad_norm": 0.596165847777639, + "learning_rate": 3.5185334428295657e-06, + "loss": 0.2868, + "step": 16456 + }, + { + "epoch": 0.7709279992504802, + "grad_norm": 0.5555255216589526, + "learning_rate": 3.518360243065411e-06, + "loss": 0.2694, + "step": 16457 + }, + { + "epoch": 0.7709748442404085, + "grad_norm": 0.6077875903103155, + "learning_rate": 3.518187037440848e-06, + "loss": 0.2823, + "step": 16458 + }, + { + "epoch": 0.7710216892303369, + "grad_norm": 0.5660900811822533, + "learning_rate": 3.518013825956874e-06, + "loss": 0.2756, + "step": 16459 + }, + { + "epoch": 0.7710685342202651, + "grad_norm": 0.6464626481536929, + "learning_rate": 3.5178406086144868e-06, + "loss": 0.2803, + "step": 16460 + }, + { + "epoch": 0.7711153792101935, + "grad_norm": 0.6050678504068145, + "learning_rate": 3.517667385414683e-06, + "loss": 0.2874, + "step": 16461 + }, + { + "epoch": 0.7711622242001218, + "grad_norm": 0.5276538300412061, + "learning_rate": 3.5174941563584586e-06, + "loss": 0.2517, + "step": 16462 + }, + { + "epoch": 0.7712090691900502, + "grad_norm": 0.5854694329072371, + "learning_rate": 3.5173209214468113e-06, + "loss": 0.2722, + "step": 16463 + }, + { + "epoch": 0.7712559141799784, + "grad_norm": 0.6165355291584801, + "learning_rate": 3.5171476806807387e-06, + "loss": 0.3018, + "step": 16464 + }, + { + "epoch": 0.7713027591699068, + "grad_norm": 0.596719645987293, + "learning_rate": 3.516974434061236e-06, + "loss": 0.2836, + "step": 16465 + }, + { + "epoch": 0.7713496041598351, + "grad_norm": 0.5454860582187647, + "learning_rate": 3.516801181589301e-06, + "loss": 0.2582, + "step": 16466 + }, + { + "epoch": 0.7713964491497635, + "grad_norm": 0.6460168100228392, + "learning_rate": 3.5166279232659308e-06, + "loss": 0.2985, + "step": 16467 + }, + { + "epoch": 0.7714432941396918, + "grad_norm": 0.6013310159594192, + "learning_rate": 3.516454659092123e-06, + "loss": 0.2874, + "step": 16468 + }, + { + "epoch": 0.7714901391296201, + "grad_norm": 0.6169079228390374, + "learning_rate": 3.516281389068873e-06, + "loss": 0.2837, + "step": 16469 + }, + { + "epoch": 0.7715369841195484, + "grad_norm": 0.5930521027936645, + "learning_rate": 3.5161081131971797e-06, + "loss": 0.2611, + "step": 16470 + }, + { + "epoch": 0.7715838291094768, + "grad_norm": 0.6031046284809486, + "learning_rate": 3.51593483147804e-06, + "loss": 0.2776, + "step": 16471 + }, + { + "epoch": 0.7716306740994051, + "grad_norm": 0.6068701926232656, + "learning_rate": 3.51576154391245e-06, + "loss": 0.2996, + "step": 16472 + }, + { + "epoch": 0.7716775190893334, + "grad_norm": 0.5620808481504113, + "learning_rate": 3.515588250501408e-06, + "loss": 0.2701, + "step": 16473 + }, + { + "epoch": 0.7717243640792617, + "grad_norm": 0.5311173874619576, + "learning_rate": 3.5154149512459114e-06, + "loss": 0.2574, + "step": 16474 + }, + { + "epoch": 0.7717712090691901, + "grad_norm": 0.5853638699283786, + "learning_rate": 3.515241646146956e-06, + "loss": 0.2758, + "step": 16475 + }, + { + "epoch": 0.7718180540591184, + "grad_norm": 0.5781086025487135, + "learning_rate": 3.51506833520554e-06, + "loss": 0.2762, + "step": 16476 + }, + { + "epoch": 0.7718648990490468, + "grad_norm": 0.5810955244524179, + "learning_rate": 3.5148950184226618e-06, + "loss": 0.2703, + "step": 16477 + }, + { + "epoch": 0.771911744038975, + "grad_norm": 0.5823454028191644, + "learning_rate": 3.514721695799317e-06, + "loss": 0.2874, + "step": 16478 + }, + { + "epoch": 0.7719585890289034, + "grad_norm": 0.5966964359250042, + "learning_rate": 3.5145483673365044e-06, + "loss": 0.2851, + "step": 16479 + }, + { + "epoch": 0.7720054340188317, + "grad_norm": 0.5869735131921321, + "learning_rate": 3.5143750330352206e-06, + "loss": 0.2832, + "step": 16480 + }, + { + "epoch": 0.7720522790087601, + "grad_norm": 0.5890110983286754, + "learning_rate": 3.514201692896464e-06, + "loss": 0.2673, + "step": 16481 + }, + { + "epoch": 0.7720991239986883, + "grad_norm": 0.5902304535325319, + "learning_rate": 3.514028346921231e-06, + "loss": 0.2604, + "step": 16482 + }, + { + "epoch": 0.7721459689886166, + "grad_norm": 0.5329825577510137, + "learning_rate": 3.5138549951105194e-06, + "loss": 0.2604, + "step": 16483 + }, + { + "epoch": 0.772192813978545, + "grad_norm": 0.6092107343676199, + "learning_rate": 3.513681637465328e-06, + "loss": 0.2795, + "step": 16484 + }, + { + "epoch": 0.7722396589684734, + "grad_norm": 0.5985534316100916, + "learning_rate": 3.5135082739866534e-06, + "loss": 0.2854, + "step": 16485 + }, + { + "epoch": 0.7722865039584017, + "grad_norm": 0.6121451588340425, + "learning_rate": 3.5133349046754926e-06, + "loss": 0.2668, + "step": 16486 + }, + { + "epoch": 0.77233334894833, + "grad_norm": 0.5823865580891349, + "learning_rate": 3.513161529532845e-06, + "loss": 0.2835, + "step": 16487 + }, + { + "epoch": 0.7723801939382583, + "grad_norm": 0.6023836045354377, + "learning_rate": 3.5129881485597074e-06, + "loss": 0.2738, + "step": 16488 + }, + { + "epoch": 0.7724270389281866, + "grad_norm": 0.5906708013538055, + "learning_rate": 3.5128147617570773e-06, + "loss": 0.29, + "step": 16489 + }, + { + "epoch": 0.772473883918115, + "grad_norm": 0.5412145699036905, + "learning_rate": 3.5126413691259526e-06, + "loss": 0.2765, + "step": 16490 + }, + { + "epoch": 0.7725207289080432, + "grad_norm": 0.6996882861406447, + "learning_rate": 3.5124679706673314e-06, + "loss": 0.2692, + "step": 16491 + }, + { + "epoch": 0.7725675738979716, + "grad_norm": 0.598931488852824, + "learning_rate": 3.5122945663822124e-06, + "loss": 0.2871, + "step": 16492 + }, + { + "epoch": 0.7726144188879, + "grad_norm": 0.576010127295319, + "learning_rate": 3.5121211562715917e-06, + "loss": 0.2633, + "step": 16493 + }, + { + "epoch": 0.7726612638778283, + "grad_norm": 0.5553825613257886, + "learning_rate": 3.5119477403364683e-06, + "loss": 0.2614, + "step": 16494 + }, + { + "epoch": 0.7727081088677566, + "grad_norm": 0.5827692319997153, + "learning_rate": 3.5117743185778397e-06, + "loss": 0.2701, + "step": 16495 + }, + { + "epoch": 0.7727549538576849, + "grad_norm": 0.6340220551100486, + "learning_rate": 3.511600890996705e-06, + "loss": 0.2907, + "step": 16496 + }, + { + "epoch": 0.7728017988476132, + "grad_norm": 0.5530224158833541, + "learning_rate": 3.511427457594061e-06, + "loss": 0.2699, + "step": 16497 + }, + { + "epoch": 0.7728486438375416, + "grad_norm": 0.5612742177811106, + "learning_rate": 3.511254018370906e-06, + "loss": 0.2673, + "step": 16498 + }, + { + "epoch": 0.77289548882747, + "grad_norm": 0.6380376640398373, + "learning_rate": 3.5110805733282387e-06, + "loss": 0.2931, + "step": 16499 + }, + { + "epoch": 0.7729423338173982, + "grad_norm": 0.6314806583998805, + "learning_rate": 3.5109071224670564e-06, + "loss": 0.3059, + "step": 16500 + }, + { + "epoch": 0.7729891788073265, + "grad_norm": 0.6547499132959987, + "learning_rate": 3.510733665788358e-06, + "loss": 0.2937, + "step": 16501 + }, + { + "epoch": 0.7730360237972549, + "grad_norm": 0.6226671899703006, + "learning_rate": 3.5105602032931412e-06, + "loss": 0.3128, + "step": 16502 + }, + { + "epoch": 0.7730828687871832, + "grad_norm": 0.5778389885082471, + "learning_rate": 3.510386734982405e-06, + "loss": 0.3017, + "step": 16503 + }, + { + "epoch": 0.7731297137771115, + "grad_norm": 0.610294497331676, + "learning_rate": 3.5102132608571458e-06, + "loss": 0.2923, + "step": 16504 + }, + { + "epoch": 0.7731765587670398, + "grad_norm": 0.6201082700413421, + "learning_rate": 3.510039780918364e-06, + "loss": 0.277, + "step": 16505 + }, + { + "epoch": 0.7732234037569682, + "grad_norm": 0.5665332787505509, + "learning_rate": 3.509866295167058e-06, + "loss": 0.2806, + "step": 16506 + }, + { + "epoch": 0.7732702487468965, + "grad_norm": 0.6370171172086464, + "learning_rate": 3.5096928036042236e-06, + "loss": 0.2793, + "step": 16507 + }, + { + "epoch": 0.7733170937368249, + "grad_norm": 0.5497112525704949, + "learning_rate": 3.5095193062308618e-06, + "loss": 0.2537, + "step": 16508 + }, + { + "epoch": 0.7733639387267531, + "grad_norm": 0.6271423600571804, + "learning_rate": 3.50934580304797e-06, + "loss": 0.3075, + "step": 16509 + }, + { + "epoch": 0.7734107837166815, + "grad_norm": 0.5681704667246184, + "learning_rate": 3.509172294056547e-06, + "loss": 0.2607, + "step": 16510 + }, + { + "epoch": 0.7734576287066098, + "grad_norm": 0.595803616956932, + "learning_rate": 3.508998779257591e-06, + "loss": 0.2968, + "step": 16511 + }, + { + "epoch": 0.7735044736965382, + "grad_norm": 0.6092768960825123, + "learning_rate": 3.5088252586521003e-06, + "loss": 0.2826, + "step": 16512 + }, + { + "epoch": 0.7735513186864664, + "grad_norm": 0.5383455491729836, + "learning_rate": 3.5086517322410747e-06, + "loss": 0.2663, + "step": 16513 + }, + { + "epoch": 0.7735981636763948, + "grad_norm": 0.6183612354426853, + "learning_rate": 3.5084782000255106e-06, + "loss": 0.3063, + "step": 16514 + }, + { + "epoch": 0.7736450086663231, + "grad_norm": 0.6860745333149705, + "learning_rate": 3.5083046620064088e-06, + "loss": 0.2846, + "step": 16515 + }, + { + "epoch": 0.7736918536562515, + "grad_norm": 0.576809368945971, + "learning_rate": 3.5081311181847666e-06, + "loss": 0.2708, + "step": 16516 + }, + { + "epoch": 0.7737386986461798, + "grad_norm": 0.5682342202065814, + "learning_rate": 3.5079575685615836e-06, + "loss": 0.2791, + "step": 16517 + }, + { + "epoch": 0.7737855436361081, + "grad_norm": 0.5495864017796284, + "learning_rate": 3.507784013137858e-06, + "loss": 0.2629, + "step": 16518 + }, + { + "epoch": 0.7738323886260364, + "grad_norm": 0.6091874506589605, + "learning_rate": 3.507610451914589e-06, + "loss": 0.2901, + "step": 16519 + }, + { + "epoch": 0.7738792336159648, + "grad_norm": 0.5486794036569, + "learning_rate": 3.507436884892774e-06, + "loss": 0.2684, + "step": 16520 + }, + { + "epoch": 0.7739260786058931, + "grad_norm": 0.6031608900383832, + "learning_rate": 3.5072633120734146e-06, + "loss": 0.2744, + "step": 16521 + }, + { + "epoch": 0.7739729235958214, + "grad_norm": 0.6055653673999041, + "learning_rate": 3.5070897334575064e-06, + "loss": 0.2768, + "step": 16522 + }, + { + "epoch": 0.7740197685857497, + "grad_norm": 0.5455068192723858, + "learning_rate": 3.5069161490460513e-06, + "loss": 0.2812, + "step": 16523 + }, + { + "epoch": 0.7740666135756781, + "grad_norm": 0.6462148255631597, + "learning_rate": 3.506742558840046e-06, + "loss": 0.2827, + "step": 16524 + }, + { + "epoch": 0.7741134585656064, + "grad_norm": 0.6315707931824341, + "learning_rate": 3.5065689628404903e-06, + "loss": 0.2807, + "step": 16525 + }, + { + "epoch": 0.7741603035555348, + "grad_norm": 0.630390733534809, + "learning_rate": 3.506395361048383e-06, + "loss": 0.2595, + "step": 16526 + }, + { + "epoch": 0.774207148545463, + "grad_norm": 0.5338913574118794, + "learning_rate": 3.5062217534647246e-06, + "loss": 0.261, + "step": 16527 + }, + { + "epoch": 0.7742539935353914, + "grad_norm": 0.5914174418431123, + "learning_rate": 3.5060481400905115e-06, + "loss": 0.2893, + "step": 16528 + }, + { + "epoch": 0.7743008385253197, + "grad_norm": 0.584170328082794, + "learning_rate": 3.5058745209267448e-06, + "loss": 0.2735, + "step": 16529 + }, + { + "epoch": 0.7743476835152481, + "grad_norm": 0.616634188395344, + "learning_rate": 3.505700895974423e-06, + "loss": 0.2831, + "step": 16530 + }, + { + "epoch": 0.7743945285051763, + "grad_norm": 0.5806344005602668, + "learning_rate": 3.505527265234546e-06, + "loss": 0.2718, + "step": 16531 + }, + { + "epoch": 0.7744413734951047, + "grad_norm": 0.6117725493085172, + "learning_rate": 3.5053536287081113e-06, + "loss": 0.3092, + "step": 16532 + }, + { + "epoch": 0.774488218485033, + "grad_norm": 0.5477951664217608, + "learning_rate": 3.5051799863961187e-06, + "loss": 0.2696, + "step": 16533 + }, + { + "epoch": 0.7745350634749614, + "grad_norm": 0.5370030767914572, + "learning_rate": 3.5050063382995695e-06, + "loss": 0.2637, + "step": 16534 + }, + { + "epoch": 0.7745819084648897, + "grad_norm": 0.5761732378942627, + "learning_rate": 3.5048326844194603e-06, + "loss": 0.281, + "step": 16535 + }, + { + "epoch": 0.774628753454818, + "grad_norm": 0.605937289960637, + "learning_rate": 3.504659024756792e-06, + "loss": 0.2918, + "step": 16536 + }, + { + "epoch": 0.7746755984447463, + "grad_norm": 0.5869385269097884, + "learning_rate": 3.5044853593125632e-06, + "loss": 0.2895, + "step": 16537 + }, + { + "epoch": 0.7747224434346747, + "grad_norm": 0.5856123105637057, + "learning_rate": 3.5043116880877743e-06, + "loss": 0.2859, + "step": 16538 + }, + { + "epoch": 0.774769288424603, + "grad_norm": 0.5856262063298812, + "learning_rate": 3.5041380110834234e-06, + "loss": 0.2805, + "step": 16539 + }, + { + "epoch": 0.7748161334145313, + "grad_norm": 0.5820342848307632, + "learning_rate": 3.503964328300511e-06, + "loss": 0.2826, + "step": 16540 + }, + { + "epoch": 0.7748629784044596, + "grad_norm": 0.6454689520790907, + "learning_rate": 3.503790639740036e-06, + "loss": 0.2845, + "step": 16541 + }, + { + "epoch": 0.774909823394388, + "grad_norm": 0.5118910048703823, + "learning_rate": 3.5036169454029985e-06, + "loss": 0.2483, + "step": 16542 + }, + { + "epoch": 0.7749566683843163, + "grad_norm": 0.5632063792219165, + "learning_rate": 3.503443245290397e-06, + "loss": 0.2805, + "step": 16543 + }, + { + "epoch": 0.7750035133742447, + "grad_norm": 0.6144397479058125, + "learning_rate": 3.5032695394032327e-06, + "loss": 0.2889, + "step": 16544 + }, + { + "epoch": 0.7750503583641729, + "grad_norm": 0.6070583860495714, + "learning_rate": 3.5030958277425038e-06, + "loss": 0.2703, + "step": 16545 + }, + { + "epoch": 0.7750972033541013, + "grad_norm": 0.5622984137700671, + "learning_rate": 3.502922110309211e-06, + "loss": 0.2667, + "step": 16546 + }, + { + "epoch": 0.7751440483440296, + "grad_norm": 0.6320633727675214, + "learning_rate": 3.5027483871043526e-06, + "loss": 0.2977, + "step": 16547 + }, + { + "epoch": 0.775190893333958, + "grad_norm": 0.641789200306454, + "learning_rate": 3.5025746581289298e-06, + "loss": 0.306, + "step": 16548 + }, + { + "epoch": 0.7752377383238862, + "grad_norm": 0.5882145880923696, + "learning_rate": 3.502400923383943e-06, + "loss": 0.2719, + "step": 16549 + }, + { + "epoch": 0.7752845833138146, + "grad_norm": 0.6251774589570127, + "learning_rate": 3.5022271828703893e-06, + "loss": 0.2793, + "step": 16550 + }, + { + "epoch": 0.7753314283037429, + "grad_norm": 0.5735702046041569, + "learning_rate": 3.5020534365892706e-06, + "loss": 0.2672, + "step": 16551 + }, + { + "epoch": 0.7753782732936713, + "grad_norm": 0.5757144085657229, + "learning_rate": 3.501879684541586e-06, + "loss": 0.2905, + "step": 16552 + }, + { + "epoch": 0.7754251182835996, + "grad_norm": 0.6039261137639467, + "learning_rate": 3.5017059267283352e-06, + "loss": 0.2804, + "step": 16553 + }, + { + "epoch": 0.7754719632735279, + "grad_norm": 0.6533018682485463, + "learning_rate": 3.5015321631505196e-06, + "loss": 0.3006, + "step": 16554 + }, + { + "epoch": 0.7755188082634562, + "grad_norm": 0.5908341831266262, + "learning_rate": 3.5013583938091378e-06, + "loss": 0.2876, + "step": 16555 + }, + { + "epoch": 0.7755656532533846, + "grad_norm": 0.5699589754757304, + "learning_rate": 3.50118461870519e-06, + "loss": 0.291, + "step": 16556 + }, + { + "epoch": 0.7756124982433129, + "grad_norm": 0.6101489447046725, + "learning_rate": 3.5010108378396755e-06, + "loss": 0.2903, + "step": 16557 + }, + { + "epoch": 0.7756593432332411, + "grad_norm": 0.6386701676307922, + "learning_rate": 3.5008370512135966e-06, + "loss": 0.2748, + "step": 16558 + }, + { + "epoch": 0.7757061882231695, + "grad_norm": 0.647887061360918, + "learning_rate": 3.5006632588279515e-06, + "loss": 0.3056, + "step": 16559 + }, + { + "epoch": 0.7757530332130979, + "grad_norm": 0.642347773967654, + "learning_rate": 3.5004894606837407e-06, + "loss": 0.2982, + "step": 16560 + }, + { + "epoch": 0.7757998782030262, + "grad_norm": 0.5840254366789117, + "learning_rate": 3.5003156567819645e-06, + "loss": 0.2784, + "step": 16561 + }, + { + "epoch": 0.7758467231929546, + "grad_norm": 0.5845710994050226, + "learning_rate": 3.5001418471236236e-06, + "loss": 0.2648, + "step": 16562 + }, + { + "epoch": 0.7758935681828828, + "grad_norm": 0.6019344411445761, + "learning_rate": 3.499968031709717e-06, + "loss": 0.2837, + "step": 16563 + }, + { + "epoch": 0.7759404131728111, + "grad_norm": 0.5961202606475619, + "learning_rate": 3.4997942105412463e-06, + "loss": 0.2919, + "step": 16564 + }, + { + "epoch": 0.7759872581627395, + "grad_norm": 0.5878986182450224, + "learning_rate": 3.499620383619211e-06, + "loss": 0.2877, + "step": 16565 + }, + { + "epoch": 0.7760341031526679, + "grad_norm": 0.5665555195139172, + "learning_rate": 3.499446550944612e-06, + "loss": 0.2569, + "step": 16566 + }, + { + "epoch": 0.7760809481425961, + "grad_norm": 0.5992317647953181, + "learning_rate": 3.4992727125184488e-06, + "loss": 0.2785, + "step": 16567 + }, + { + "epoch": 0.7761277931325244, + "grad_norm": 0.5630768853046781, + "learning_rate": 3.4990988683417225e-06, + "loss": 0.2708, + "step": 16568 + }, + { + "epoch": 0.7761746381224528, + "grad_norm": 0.6102387893741985, + "learning_rate": 3.4989250184154332e-06, + "loss": 0.3159, + "step": 16569 + }, + { + "epoch": 0.7762214831123811, + "grad_norm": 0.5714615000031549, + "learning_rate": 3.4987511627405816e-06, + "loss": 0.2835, + "step": 16570 + }, + { + "epoch": 0.7762683281023095, + "grad_norm": 0.5969855269170447, + "learning_rate": 3.498577301318168e-06, + "loss": 0.2856, + "step": 16571 + }, + { + "epoch": 0.7763151730922377, + "grad_norm": 0.5649053022545322, + "learning_rate": 3.4984034341491936e-06, + "loss": 0.2518, + "step": 16572 + }, + { + "epoch": 0.7763620180821661, + "grad_norm": 0.6058452796776932, + "learning_rate": 3.498229561234658e-06, + "loss": 0.2864, + "step": 16573 + }, + { + "epoch": 0.7764088630720944, + "grad_norm": 0.5550433374730768, + "learning_rate": 3.4980556825755614e-06, + "loss": 0.2698, + "step": 16574 + }, + { + "epoch": 0.7764557080620228, + "grad_norm": 0.5664706357350204, + "learning_rate": 3.497881798172906e-06, + "loss": 0.285, + "step": 16575 + }, + { + "epoch": 0.776502553051951, + "grad_norm": 0.5626332573776135, + "learning_rate": 3.497707908027692e-06, + "loss": 0.2874, + "step": 16576 + }, + { + "epoch": 0.7765493980418794, + "grad_norm": 0.5976855421360264, + "learning_rate": 3.4975340121409184e-06, + "loss": 0.2867, + "step": 16577 + }, + { + "epoch": 0.7765962430318077, + "grad_norm": 0.5597900657785451, + "learning_rate": 3.497360110513588e-06, + "loss": 0.2742, + "step": 16578 + }, + { + "epoch": 0.7766430880217361, + "grad_norm": 0.6203220987046523, + "learning_rate": 3.4971862031467012e-06, + "loss": 0.307, + "step": 16579 + }, + { + "epoch": 0.7766899330116644, + "grad_norm": 0.5888426206473232, + "learning_rate": 3.4970122900412586e-06, + "loss": 0.2887, + "step": 16580 + }, + { + "epoch": 0.7767367780015927, + "grad_norm": 0.6309702372196277, + "learning_rate": 3.49683837119826e-06, + "loss": 0.2744, + "step": 16581 + }, + { + "epoch": 0.776783622991521, + "grad_norm": 0.5875587554350893, + "learning_rate": 3.496664446618708e-06, + "loss": 0.265, + "step": 16582 + }, + { + "epoch": 0.7768304679814494, + "grad_norm": 0.5618601606455808, + "learning_rate": 3.4964905163036022e-06, + "loss": 0.2599, + "step": 16583 + }, + { + "epoch": 0.7768773129713777, + "grad_norm": 0.6027769907469195, + "learning_rate": 3.4963165802539433e-06, + "loss": 0.2618, + "step": 16584 + }, + { + "epoch": 0.776924157961306, + "grad_norm": 0.6095988118822128, + "learning_rate": 3.496142638470734e-06, + "loss": 0.2779, + "step": 16585 + }, + { + "epoch": 0.7769710029512343, + "grad_norm": 0.5974946780089528, + "learning_rate": 3.4959686909549735e-06, + "loss": 0.31, + "step": 16586 + }, + { + "epoch": 0.7770178479411627, + "grad_norm": 0.582457728004303, + "learning_rate": 3.495794737707664e-06, + "loss": 0.2747, + "step": 16587 + }, + { + "epoch": 0.777064692931091, + "grad_norm": 0.54048189483655, + "learning_rate": 3.495620778729806e-06, + "loss": 0.2641, + "step": 16588 + }, + { + "epoch": 0.7771115379210194, + "grad_norm": 0.5699866057363818, + "learning_rate": 3.4954468140223996e-06, + "loss": 0.2612, + "step": 16589 + }, + { + "epoch": 0.7771583829109476, + "grad_norm": 0.5808089451446475, + "learning_rate": 3.495272843586448e-06, + "loss": 0.2738, + "step": 16590 + }, + { + "epoch": 0.777205227900876, + "grad_norm": 0.6360548577967123, + "learning_rate": 3.4950988674229515e-06, + "loss": 0.2835, + "step": 16591 + }, + { + "epoch": 0.7772520728908043, + "grad_norm": 0.5809934306223346, + "learning_rate": 3.4949248855329105e-06, + "loss": 0.2694, + "step": 16592 + }, + { + "epoch": 0.7772989178807327, + "grad_norm": 0.57927468455845, + "learning_rate": 3.4947508979173274e-06, + "loss": 0.3016, + "step": 16593 + }, + { + "epoch": 0.7773457628706609, + "grad_norm": 0.6187634263809573, + "learning_rate": 3.4945769045772026e-06, + "loss": 0.2785, + "step": 16594 + }, + { + "epoch": 0.7773926078605893, + "grad_norm": 0.6113101631228196, + "learning_rate": 3.4944029055135377e-06, + "loss": 0.2865, + "step": 16595 + }, + { + "epoch": 0.7774394528505176, + "grad_norm": 0.5427638027805995, + "learning_rate": 3.4942289007273334e-06, + "loss": 0.2659, + "step": 16596 + }, + { + "epoch": 0.777486297840446, + "grad_norm": 0.5357434981231721, + "learning_rate": 3.494054890219593e-06, + "loss": 0.27, + "step": 16597 + }, + { + "epoch": 0.7775331428303743, + "grad_norm": 0.5304247963751721, + "learning_rate": 3.493880873991316e-06, + "loss": 0.2665, + "step": 16598 + }, + { + "epoch": 0.7775799878203026, + "grad_norm": 0.597814896072471, + "learning_rate": 3.4937068520435036e-06, + "loss": 0.2967, + "step": 16599 + }, + { + "epoch": 0.7776268328102309, + "grad_norm": 0.5931069289722285, + "learning_rate": 3.4935328243771594e-06, + "loss": 0.2766, + "step": 16600 + }, + { + "epoch": 0.7776736778001593, + "grad_norm": 0.5898588647967847, + "learning_rate": 3.4933587909932826e-06, + "loss": 0.2848, + "step": 16601 + }, + { + "epoch": 0.7777205227900876, + "grad_norm": 0.556725524501589, + "learning_rate": 3.4931847518928753e-06, + "loss": 0.2765, + "step": 16602 + }, + { + "epoch": 0.7777673677800159, + "grad_norm": 0.5839256482580787, + "learning_rate": 3.4930107070769396e-06, + "loss": 0.2921, + "step": 16603 + }, + { + "epoch": 0.7778142127699442, + "grad_norm": 0.6107569922926096, + "learning_rate": 3.492836656546478e-06, + "loss": 0.2851, + "step": 16604 + }, + { + "epoch": 0.7778610577598726, + "grad_norm": 0.5735380404779781, + "learning_rate": 3.4926626003024898e-06, + "loss": 0.2832, + "step": 16605 + }, + { + "epoch": 0.7779079027498009, + "grad_norm": 0.5947775751106327, + "learning_rate": 3.492488538345978e-06, + "loss": 0.2817, + "step": 16606 + }, + { + "epoch": 0.7779547477397293, + "grad_norm": 0.5800746911437085, + "learning_rate": 3.492314470677944e-06, + "loss": 0.2837, + "step": 16607 + }, + { + "epoch": 0.7780015927296575, + "grad_norm": 0.5677245741278657, + "learning_rate": 3.4921403972993905e-06, + "loss": 0.2768, + "step": 16608 + }, + { + "epoch": 0.7780484377195859, + "grad_norm": 0.5885483767158499, + "learning_rate": 3.491966318211317e-06, + "loss": 0.2976, + "step": 16609 + }, + { + "epoch": 0.7780952827095142, + "grad_norm": 0.5547638093564632, + "learning_rate": 3.491792233414728e-06, + "loss": 0.2809, + "step": 16610 + }, + { + "epoch": 0.7781421276994426, + "grad_norm": 0.5727051438034968, + "learning_rate": 3.4916181429106232e-06, + "loss": 0.2801, + "step": 16611 + }, + { + "epoch": 0.7781889726893708, + "grad_norm": 0.6219581327543838, + "learning_rate": 3.4914440467000054e-06, + "loss": 0.2905, + "step": 16612 + }, + { + "epoch": 0.7782358176792992, + "grad_norm": 0.5437306381854433, + "learning_rate": 3.4912699447838766e-06, + "loss": 0.3053, + "step": 16613 + }, + { + "epoch": 0.7782826626692275, + "grad_norm": 0.6194789816604632, + "learning_rate": 3.4910958371632384e-06, + "loss": 0.2814, + "step": 16614 + }, + { + "epoch": 0.7783295076591559, + "grad_norm": 0.654763246519841, + "learning_rate": 3.4909217238390925e-06, + "loss": 0.3011, + "step": 16615 + }, + { + "epoch": 0.7783763526490842, + "grad_norm": 0.5416751060857646, + "learning_rate": 3.490747604812441e-06, + "loss": 0.253, + "step": 16616 + }, + { + "epoch": 0.7784231976390125, + "grad_norm": 0.567771801604999, + "learning_rate": 3.490573480084286e-06, + "loss": 0.27, + "step": 16617 + }, + { + "epoch": 0.7784700426289408, + "grad_norm": 0.5715374185622426, + "learning_rate": 3.49039934965563e-06, + "loss": 0.2871, + "step": 16618 + }, + { + "epoch": 0.7785168876188692, + "grad_norm": 0.5589313859948629, + "learning_rate": 3.4902252135274745e-06, + "loss": 0.2669, + "step": 16619 + }, + { + "epoch": 0.7785637326087975, + "grad_norm": 0.6013759870625808, + "learning_rate": 3.490051071700822e-06, + "loss": 0.2641, + "step": 16620 + }, + { + "epoch": 0.7786105775987258, + "grad_norm": 0.6410313673188072, + "learning_rate": 3.489876924176674e-06, + "loss": 0.315, + "step": 16621 + }, + { + "epoch": 0.7786574225886541, + "grad_norm": 0.5687475566603611, + "learning_rate": 3.4897027709560333e-06, + "loss": 0.2581, + "step": 16622 + }, + { + "epoch": 0.7787042675785825, + "grad_norm": 0.6046812125234233, + "learning_rate": 3.489528612039902e-06, + "loss": 0.2776, + "step": 16623 + }, + { + "epoch": 0.7787511125685108, + "grad_norm": 0.5879706565898778, + "learning_rate": 3.489354447429282e-06, + "loss": 0.2995, + "step": 16624 + }, + { + "epoch": 0.7787979575584392, + "grad_norm": 0.568239654031859, + "learning_rate": 3.489180277125176e-06, + "loss": 0.278, + "step": 16625 + }, + { + "epoch": 0.7788448025483674, + "grad_norm": 0.6128775863318255, + "learning_rate": 3.489006101128586e-06, + "loss": 0.2655, + "step": 16626 + }, + { + "epoch": 0.7788916475382958, + "grad_norm": 0.5160124179243598, + "learning_rate": 3.488831919440514e-06, + "loss": 0.2657, + "step": 16627 + }, + { + "epoch": 0.7789384925282241, + "grad_norm": 0.6307537517860972, + "learning_rate": 3.4886577320619636e-06, + "loss": 0.3042, + "step": 16628 + }, + { + "epoch": 0.7789853375181525, + "grad_norm": 0.5746369092667861, + "learning_rate": 3.4884835389939363e-06, + "loss": 0.257, + "step": 16629 + }, + { + "epoch": 0.7790321825080807, + "grad_norm": 0.5769985701926416, + "learning_rate": 3.4883093402374345e-06, + "loss": 0.2819, + "step": 16630 + }, + { + "epoch": 0.7790790274980091, + "grad_norm": 0.600568444278612, + "learning_rate": 3.4881351357934613e-06, + "loss": 0.2893, + "step": 16631 + }, + { + "epoch": 0.7791258724879374, + "grad_norm": 0.5851025505749371, + "learning_rate": 3.4879609256630183e-06, + "loss": 0.2891, + "step": 16632 + }, + { + "epoch": 0.7791727174778658, + "grad_norm": 0.60767250775044, + "learning_rate": 3.4877867098471086e-06, + "loss": 0.2764, + "step": 16633 + }, + { + "epoch": 0.7792195624677941, + "grad_norm": 0.6423124422757627, + "learning_rate": 3.4876124883467345e-06, + "loss": 0.2814, + "step": 16634 + }, + { + "epoch": 0.7792664074577224, + "grad_norm": 0.6010157508275259, + "learning_rate": 3.487438261162899e-06, + "loss": 0.2661, + "step": 16635 + }, + { + "epoch": 0.7793132524476507, + "grad_norm": 0.6097783445603059, + "learning_rate": 3.4872640282966043e-06, + "loss": 0.2802, + "step": 16636 + }, + { + "epoch": 0.7793600974375791, + "grad_norm": 0.5536087436462198, + "learning_rate": 3.487089789748853e-06, + "loss": 0.2722, + "step": 16637 + }, + { + "epoch": 0.7794069424275074, + "grad_norm": 0.614279112297843, + "learning_rate": 3.4869155455206483e-06, + "loss": 0.2928, + "step": 16638 + }, + { + "epoch": 0.7794537874174357, + "grad_norm": 0.5959458965637671, + "learning_rate": 3.486741295612993e-06, + "loss": 0.2904, + "step": 16639 + }, + { + "epoch": 0.779500632407364, + "grad_norm": 0.6109919054930251, + "learning_rate": 3.4865670400268896e-06, + "loss": 0.2686, + "step": 16640 + }, + { + "epoch": 0.7795474773972924, + "grad_norm": 0.6007965837393413, + "learning_rate": 3.48639277876334e-06, + "loss": 0.2983, + "step": 16641 + }, + { + "epoch": 0.7795943223872207, + "grad_norm": 0.5803419855537095, + "learning_rate": 3.4862185118233487e-06, + "loss": 0.2956, + "step": 16642 + }, + { + "epoch": 0.7796411673771491, + "grad_norm": 0.5957825561524024, + "learning_rate": 3.486044239207918e-06, + "loss": 0.2838, + "step": 16643 + }, + { + "epoch": 0.7796880123670773, + "grad_norm": 0.566079742869656, + "learning_rate": 3.4858699609180497e-06, + "loss": 0.297, + "step": 16644 + }, + { + "epoch": 0.7797348573570057, + "grad_norm": 0.5923166186337331, + "learning_rate": 3.4856956769547475e-06, + "loss": 0.2926, + "step": 16645 + }, + { + "epoch": 0.779781702346934, + "grad_norm": 0.6046175771988193, + "learning_rate": 3.485521387319015e-06, + "loss": 0.2885, + "step": 16646 + }, + { + "epoch": 0.7798285473368624, + "grad_norm": 0.5543297728359422, + "learning_rate": 3.4853470920118547e-06, + "loss": 0.2754, + "step": 16647 + }, + { + "epoch": 0.7798753923267906, + "grad_norm": 0.5926919399049128, + "learning_rate": 3.485172791034269e-06, + "loss": 0.2678, + "step": 16648 + }, + { + "epoch": 0.779922237316719, + "grad_norm": 0.5512409124466136, + "learning_rate": 3.484998484387262e-06, + "loss": 0.2817, + "step": 16649 + }, + { + "epoch": 0.7799690823066473, + "grad_norm": 0.5912070067705748, + "learning_rate": 3.484824172071836e-06, + "loss": 0.2874, + "step": 16650 + }, + { + "epoch": 0.7800159272965757, + "grad_norm": 0.6130108491190648, + "learning_rate": 3.4846498540889946e-06, + "loss": 0.2778, + "step": 16651 + }, + { + "epoch": 0.780062772286504, + "grad_norm": 0.5999830081590743, + "learning_rate": 3.4844755304397403e-06, + "loss": 0.2853, + "step": 16652 + }, + { + "epoch": 0.7801096172764322, + "grad_norm": 0.5631020873757775, + "learning_rate": 3.484301201125078e-06, + "loss": 0.2791, + "step": 16653 + }, + { + "epoch": 0.7801564622663606, + "grad_norm": 0.6120137687970444, + "learning_rate": 3.4841268661460082e-06, + "loss": 0.2652, + "step": 16654 + }, + { + "epoch": 0.780203307256289, + "grad_norm": 0.5654051817902483, + "learning_rate": 3.4839525255035366e-06, + "loss": 0.2627, + "step": 16655 + }, + { + "epoch": 0.7802501522462173, + "grad_norm": 0.5829233729026302, + "learning_rate": 3.4837781791986645e-06, + "loss": 0.273, + "step": 16656 + }, + { + "epoch": 0.7802969972361455, + "grad_norm": 0.5446357629870586, + "learning_rate": 3.483603827232397e-06, + "loss": 0.2504, + "step": 16657 + }, + { + "epoch": 0.7803438422260739, + "grad_norm": 0.5690389571069655, + "learning_rate": 3.483429469605737e-06, + "loss": 0.2773, + "step": 16658 + }, + { + "epoch": 0.7803906872160022, + "grad_norm": 0.6521180411101707, + "learning_rate": 3.483255106319687e-06, + "loss": 0.3067, + "step": 16659 + }, + { + "epoch": 0.7804375322059306, + "grad_norm": 0.6113196774265445, + "learning_rate": 3.4830807373752513e-06, + "loss": 0.2896, + "step": 16660 + }, + { + "epoch": 0.780484377195859, + "grad_norm": 0.6107394130378992, + "learning_rate": 3.4829063627734327e-06, + "loss": 0.2953, + "step": 16661 + }, + { + "epoch": 0.7805312221857872, + "grad_norm": 0.576598402831347, + "learning_rate": 3.482731982515235e-06, + "loss": 0.2432, + "step": 16662 + }, + { + "epoch": 0.7805780671757155, + "grad_norm": 0.5621038408139226, + "learning_rate": 3.482557596601662e-06, + "loss": 0.2767, + "step": 16663 + }, + { + "epoch": 0.7806249121656439, + "grad_norm": 0.641257846522262, + "learning_rate": 3.4823832050337177e-06, + "loss": 0.2979, + "step": 16664 + }, + { + "epoch": 0.7806717571555722, + "grad_norm": 0.5889968069381972, + "learning_rate": 3.482208807812404e-06, + "loss": 0.2684, + "step": 16665 + }, + { + "epoch": 0.7807186021455005, + "grad_norm": 0.5204388742816394, + "learning_rate": 3.4820344049387257e-06, + "loss": 0.2546, + "step": 16666 + }, + { + "epoch": 0.7807654471354288, + "grad_norm": 0.5370343544391277, + "learning_rate": 3.481859996413686e-06, + "loss": 0.2594, + "step": 16667 + }, + { + "epoch": 0.7808122921253572, + "grad_norm": 0.5990642004508308, + "learning_rate": 3.4816855822382895e-06, + "loss": 0.2703, + "step": 16668 + }, + { + "epoch": 0.7808591371152855, + "grad_norm": 0.5272419045818637, + "learning_rate": 3.4815111624135385e-06, + "loss": 0.2558, + "step": 16669 + }, + { + "epoch": 0.7809059821052139, + "grad_norm": 0.5558404277459591, + "learning_rate": 3.4813367369404377e-06, + "loss": 0.2608, + "step": 16670 + }, + { + "epoch": 0.7809528270951421, + "grad_norm": 0.599279050583374, + "learning_rate": 3.4811623058199908e-06, + "loss": 0.2812, + "step": 16671 + }, + { + "epoch": 0.7809996720850705, + "grad_norm": 0.6054365967507511, + "learning_rate": 3.4809878690532006e-06, + "loss": 0.2856, + "step": 16672 + }, + { + "epoch": 0.7810465170749988, + "grad_norm": 0.6056902397094496, + "learning_rate": 3.4808134266410726e-06, + "loss": 0.2688, + "step": 16673 + }, + { + "epoch": 0.7810933620649272, + "grad_norm": 0.5876093408353246, + "learning_rate": 3.48063897858461e-06, + "loss": 0.2781, + "step": 16674 + }, + { + "epoch": 0.7811402070548554, + "grad_norm": 0.5937097774742501, + "learning_rate": 3.480464524884816e-06, + "loss": 0.2736, + "step": 16675 + }, + { + "epoch": 0.7811870520447838, + "grad_norm": 0.6267563407119189, + "learning_rate": 3.480290065542695e-06, + "loss": 0.2893, + "step": 16676 + }, + { + "epoch": 0.7812338970347121, + "grad_norm": 0.5744656687093154, + "learning_rate": 3.480115600559252e-06, + "loss": 0.2745, + "step": 16677 + }, + { + "epoch": 0.7812807420246405, + "grad_norm": 0.5981058318484271, + "learning_rate": 3.47994112993549e-06, + "loss": 0.2849, + "step": 16678 + }, + { + "epoch": 0.7813275870145688, + "grad_norm": 0.5753906678275171, + "learning_rate": 3.4797666536724118e-06, + "loss": 0.2886, + "step": 16679 + }, + { + "epoch": 0.7813744320044971, + "grad_norm": 0.6351004021605751, + "learning_rate": 3.4795921717710234e-06, + "loss": 0.2703, + "step": 16680 + }, + { + "epoch": 0.7814212769944254, + "grad_norm": 0.5753796843253352, + "learning_rate": 3.479417684232329e-06, + "loss": 0.2795, + "step": 16681 + }, + { + "epoch": 0.7814681219843538, + "grad_norm": 0.5933707909080194, + "learning_rate": 3.479243191057331e-06, + "loss": 0.2958, + "step": 16682 + }, + { + "epoch": 0.7815149669742821, + "grad_norm": 0.5635065895410973, + "learning_rate": 3.4790686922470353e-06, + "loss": 0.277, + "step": 16683 + }, + { + "epoch": 0.7815618119642104, + "grad_norm": 0.6097046944280704, + "learning_rate": 3.478894187802445e-06, + "loss": 0.2966, + "step": 16684 + }, + { + "epoch": 0.7816086569541387, + "grad_norm": 0.6171803803090741, + "learning_rate": 3.4787196777245646e-06, + "loss": 0.2779, + "step": 16685 + }, + { + "epoch": 0.7816555019440671, + "grad_norm": 0.6710151407579308, + "learning_rate": 3.4785451620143982e-06, + "loss": 0.2962, + "step": 16686 + }, + { + "epoch": 0.7817023469339954, + "grad_norm": 0.5641199186176757, + "learning_rate": 3.4783706406729506e-06, + "loss": 0.2736, + "step": 16687 + }, + { + "epoch": 0.7817491919239238, + "grad_norm": 0.5682367432547027, + "learning_rate": 3.478196113701226e-06, + "loss": 0.2605, + "step": 16688 + }, + { + "epoch": 0.781796036913852, + "grad_norm": 0.5392193470315307, + "learning_rate": 3.478021581100229e-06, + "loss": 0.2725, + "step": 16689 + }, + { + "epoch": 0.7818428819037804, + "grad_norm": 0.6061441204739509, + "learning_rate": 3.4778470428709626e-06, + "loss": 0.2754, + "step": 16690 + }, + { + "epoch": 0.7818897268937087, + "grad_norm": 0.554689619548711, + "learning_rate": 3.4776724990144335e-06, + "loss": 0.275, + "step": 16691 + }, + { + "epoch": 0.7819365718836371, + "grad_norm": 0.5909452217215869, + "learning_rate": 3.4774979495316443e-06, + "loss": 0.28, + "step": 16692 + }, + { + "epoch": 0.7819834168735653, + "grad_norm": 0.5598309846360451, + "learning_rate": 3.4773233944235996e-06, + "loss": 0.2483, + "step": 16693 + }, + { + "epoch": 0.7820302618634937, + "grad_norm": 0.5944546049405219, + "learning_rate": 3.4771488336913044e-06, + "loss": 0.2792, + "step": 16694 + }, + { + "epoch": 0.782077106853422, + "grad_norm": 0.5960605629667991, + "learning_rate": 3.476974267335764e-06, + "loss": 0.2718, + "step": 16695 + }, + { + "epoch": 0.7821239518433504, + "grad_norm": 0.6664981788785374, + "learning_rate": 3.4767996953579817e-06, + "loss": 0.2999, + "step": 16696 + }, + { + "epoch": 0.7821707968332787, + "grad_norm": 0.5753426659303802, + "learning_rate": 3.4766251177589625e-06, + "loss": 0.2973, + "step": 16697 + }, + { + "epoch": 0.782217641823207, + "grad_norm": 0.5769642051446198, + "learning_rate": 3.476450534539712e-06, + "loss": 0.2786, + "step": 16698 + }, + { + "epoch": 0.7822644868131353, + "grad_norm": 0.586599565646139, + "learning_rate": 3.476275945701234e-06, + "loss": 0.2936, + "step": 16699 + }, + { + "epoch": 0.7823113318030637, + "grad_norm": 0.5640171918500366, + "learning_rate": 3.476101351244533e-06, + "loss": 0.2939, + "step": 16700 + }, + { + "epoch": 0.782358176792992, + "grad_norm": 0.5892833233294574, + "learning_rate": 3.4759267511706142e-06, + "loss": 0.2896, + "step": 16701 + }, + { + "epoch": 0.7824050217829203, + "grad_norm": 0.6164331002288802, + "learning_rate": 3.475752145480482e-06, + "loss": 0.294, + "step": 16702 + }, + { + "epoch": 0.7824518667728486, + "grad_norm": 0.6392654048662926, + "learning_rate": 3.4755775341751413e-06, + "loss": 0.2979, + "step": 16703 + }, + { + "epoch": 0.782498711762777, + "grad_norm": 0.5935237768662349, + "learning_rate": 3.4754029172555974e-06, + "loss": 0.2837, + "step": 16704 + }, + { + "epoch": 0.7825455567527053, + "grad_norm": 0.5784693154123911, + "learning_rate": 3.4752282947228542e-06, + "loss": 0.2625, + "step": 16705 + }, + { + "epoch": 0.7825924017426337, + "grad_norm": 0.5902233623163574, + "learning_rate": 3.475053666577918e-06, + "loss": 0.2905, + "step": 16706 + }, + { + "epoch": 0.7826392467325619, + "grad_norm": 0.5774317338157302, + "learning_rate": 3.4748790328217936e-06, + "loss": 0.2936, + "step": 16707 + }, + { + "epoch": 0.7826860917224903, + "grad_norm": 0.650376622553714, + "learning_rate": 3.474704393455484e-06, + "loss": 0.312, + "step": 16708 + }, + { + "epoch": 0.7827329367124186, + "grad_norm": 0.5698727183488268, + "learning_rate": 3.4745297484799965e-06, + "loss": 0.2769, + "step": 16709 + }, + { + "epoch": 0.782779781702347, + "grad_norm": 0.5661701419368544, + "learning_rate": 3.474355097896336e-06, + "loss": 0.2707, + "step": 16710 + }, + { + "epoch": 0.7828266266922752, + "grad_norm": 0.6078640526288058, + "learning_rate": 3.4741804417055046e-06, + "loss": 0.2793, + "step": 16711 + }, + { + "epoch": 0.7828734716822036, + "grad_norm": 0.5978972647245856, + "learning_rate": 3.4740057799085115e-06, + "loss": 0.2978, + "step": 16712 + }, + { + "epoch": 0.7829203166721319, + "grad_norm": 0.5972018624899551, + "learning_rate": 3.4738311125063596e-06, + "loss": 0.298, + "step": 16713 + }, + { + "epoch": 0.7829671616620603, + "grad_norm": 0.5291368232507112, + "learning_rate": 3.473656439500054e-06, + "loss": 0.2585, + "step": 16714 + }, + { + "epoch": 0.7830140066519886, + "grad_norm": 0.5967974637455733, + "learning_rate": 3.4734817608905994e-06, + "loss": 0.2784, + "step": 16715 + }, + { + "epoch": 0.7830608516419169, + "grad_norm": 0.600067491038701, + "learning_rate": 3.4733070766790037e-06, + "loss": 0.3089, + "step": 16716 + }, + { + "epoch": 0.7831076966318452, + "grad_norm": 0.628184758281777, + "learning_rate": 3.4731323868662697e-06, + "loss": 0.297, + "step": 16717 + }, + { + "epoch": 0.7831545416217736, + "grad_norm": 0.5951178506941253, + "learning_rate": 3.472957691453403e-06, + "loss": 0.2666, + "step": 16718 + }, + { + "epoch": 0.7832013866117019, + "grad_norm": 0.5888626530742748, + "learning_rate": 3.47278299044141e-06, + "loss": 0.2903, + "step": 16719 + }, + { + "epoch": 0.7832482316016302, + "grad_norm": 0.6129703590990754, + "learning_rate": 3.472608283831295e-06, + "loss": 0.2911, + "step": 16720 + }, + { + "epoch": 0.7832950765915585, + "grad_norm": 0.5858665503475405, + "learning_rate": 3.4724335716240637e-06, + "loss": 0.289, + "step": 16721 + }, + { + "epoch": 0.7833419215814869, + "grad_norm": 0.5586836457211337, + "learning_rate": 3.472258853820722e-06, + "loss": 0.2702, + "step": 16722 + }, + { + "epoch": 0.7833887665714152, + "grad_norm": 0.6102183687167162, + "learning_rate": 3.4720841304222747e-06, + "loss": 0.307, + "step": 16723 + }, + { + "epoch": 0.7834356115613436, + "grad_norm": 0.5791315945640626, + "learning_rate": 3.471909401429727e-06, + "loss": 0.2883, + "step": 16724 + }, + { + "epoch": 0.7834824565512718, + "grad_norm": 0.6182436524833209, + "learning_rate": 3.471734666844086e-06, + "loss": 0.2773, + "step": 16725 + }, + { + "epoch": 0.7835293015412002, + "grad_norm": 0.6574261353261107, + "learning_rate": 3.4715599266663558e-06, + "loss": 0.2952, + "step": 16726 + }, + { + "epoch": 0.7835761465311285, + "grad_norm": 0.6194090234979176, + "learning_rate": 3.4713851808975423e-06, + "loss": 0.2842, + "step": 16727 + }, + { + "epoch": 0.7836229915210569, + "grad_norm": 0.5986194308909013, + "learning_rate": 3.471210429538652e-06, + "loss": 0.3186, + "step": 16728 + }, + { + "epoch": 0.7836698365109851, + "grad_norm": 0.570215870549412, + "learning_rate": 3.4710356725906887e-06, + "loss": 0.2654, + "step": 16729 + }, + { + "epoch": 0.7837166815009134, + "grad_norm": 0.613698725715801, + "learning_rate": 3.47086091005466e-06, + "loss": 0.2884, + "step": 16730 + }, + { + "epoch": 0.7837635264908418, + "grad_norm": 0.6177430850337535, + "learning_rate": 3.4706861419315703e-06, + "loss": 0.2733, + "step": 16731 + }, + { + "epoch": 0.7838103714807702, + "grad_norm": 0.5626493420758067, + "learning_rate": 3.4705113682224256e-06, + "loss": 0.2837, + "step": 16732 + }, + { + "epoch": 0.7838572164706985, + "grad_norm": 0.5733237907660454, + "learning_rate": 3.470336588928232e-06, + "loss": 0.2805, + "step": 16733 + }, + { + "epoch": 0.7839040614606267, + "grad_norm": 0.6233495126797726, + "learning_rate": 3.470161804049996e-06, + "loss": 0.2847, + "step": 16734 + }, + { + "epoch": 0.7839509064505551, + "grad_norm": 0.5940119583501486, + "learning_rate": 3.4699870135887214e-06, + "loss": 0.2936, + "step": 16735 + }, + { + "epoch": 0.7839977514404834, + "grad_norm": 0.5267538386097521, + "learning_rate": 3.469812217545416e-06, + "loss": 0.2628, + "step": 16736 + }, + { + "epoch": 0.7840445964304118, + "grad_norm": 0.6348512325640245, + "learning_rate": 3.469637415921085e-06, + "loss": 0.298, + "step": 16737 + }, + { + "epoch": 0.78409144142034, + "grad_norm": 0.5723076705823732, + "learning_rate": 3.469462608716735e-06, + "loss": 0.2683, + "step": 16738 + }, + { + "epoch": 0.7841382864102684, + "grad_norm": 0.5444935525601048, + "learning_rate": 3.4692877959333704e-06, + "loss": 0.2796, + "step": 16739 + }, + { + "epoch": 0.7841851314001967, + "grad_norm": 0.5900638966399795, + "learning_rate": 3.4691129775719983e-06, + "loss": 0.2981, + "step": 16740 + }, + { + "epoch": 0.7842319763901251, + "grad_norm": 0.5890057409938495, + "learning_rate": 3.4689381536336253e-06, + "loss": 0.2817, + "step": 16741 + }, + { + "epoch": 0.7842788213800534, + "grad_norm": 0.5729102190831549, + "learning_rate": 3.468763324119256e-06, + "loss": 0.2682, + "step": 16742 + }, + { + "epoch": 0.7843256663699817, + "grad_norm": 0.6166289179386714, + "learning_rate": 3.468588489029897e-06, + "loss": 0.2869, + "step": 16743 + }, + { + "epoch": 0.78437251135991, + "grad_norm": 0.6040053504212799, + "learning_rate": 3.468413648366556e-06, + "loss": 0.2748, + "step": 16744 + }, + { + "epoch": 0.7844193563498384, + "grad_norm": 0.576395414438001, + "learning_rate": 3.4682388021302364e-06, + "loss": 0.2708, + "step": 16745 + }, + { + "epoch": 0.7844662013397667, + "grad_norm": 0.6540054986835157, + "learning_rate": 3.4680639503219464e-06, + "loss": 0.2915, + "step": 16746 + }, + { + "epoch": 0.784513046329695, + "grad_norm": 0.5766219090739615, + "learning_rate": 3.4678890929426923e-06, + "loss": 0.2787, + "step": 16747 + }, + { + "epoch": 0.7845598913196233, + "grad_norm": 0.5963954409024754, + "learning_rate": 3.467714229993479e-06, + "loss": 0.2588, + "step": 16748 + }, + { + "epoch": 0.7846067363095517, + "grad_norm": 0.5747415553082762, + "learning_rate": 3.467539361475314e-06, + "loss": 0.2716, + "step": 16749 + }, + { + "epoch": 0.78465358129948, + "grad_norm": 0.6092051561424447, + "learning_rate": 3.4673644873892032e-06, + "loss": 0.2706, + "step": 16750 + }, + { + "epoch": 0.7847004262894084, + "grad_norm": 0.560239945269961, + "learning_rate": 3.4671896077361522e-06, + "loss": 0.2951, + "step": 16751 + }, + { + "epoch": 0.7847472712793366, + "grad_norm": 0.5965333661821235, + "learning_rate": 3.4670147225171685e-06, + "loss": 0.2716, + "step": 16752 + }, + { + "epoch": 0.784794116269265, + "grad_norm": 0.6414618834072017, + "learning_rate": 3.4668398317332584e-06, + "loss": 0.2961, + "step": 16753 + }, + { + "epoch": 0.7848409612591933, + "grad_norm": 0.5742070419632562, + "learning_rate": 3.466664935385428e-06, + "loss": 0.2698, + "step": 16754 + }, + { + "epoch": 0.7848878062491217, + "grad_norm": 0.5808538232256388, + "learning_rate": 3.466490033474683e-06, + "loss": 0.2746, + "step": 16755 + }, + { + "epoch": 0.7849346512390499, + "grad_norm": 0.536069523066858, + "learning_rate": 3.4663151260020322e-06, + "loss": 0.2625, + "step": 16756 + }, + { + "epoch": 0.7849814962289783, + "grad_norm": 0.5860757905824613, + "learning_rate": 3.4661402129684796e-06, + "loss": 0.2654, + "step": 16757 + }, + { + "epoch": 0.7850283412189066, + "grad_norm": 0.6465138752725642, + "learning_rate": 3.465965294375033e-06, + "loss": 0.297, + "step": 16758 + }, + { + "epoch": 0.785075186208835, + "grad_norm": 0.6195497653141882, + "learning_rate": 3.4657903702227e-06, + "loss": 0.2883, + "step": 16759 + }, + { + "epoch": 0.7851220311987633, + "grad_norm": 0.5701419839690145, + "learning_rate": 3.4656154405124854e-06, + "loss": 0.2697, + "step": 16760 + }, + { + "epoch": 0.7851688761886916, + "grad_norm": 0.623833247775768, + "learning_rate": 3.4654405052453966e-06, + "loss": 0.2676, + "step": 16761 + }, + { + "epoch": 0.7852157211786199, + "grad_norm": 0.6482862870186255, + "learning_rate": 3.4652655644224404e-06, + "loss": 0.2937, + "step": 16762 + }, + { + "epoch": 0.7852625661685483, + "grad_norm": 0.5913144443408656, + "learning_rate": 3.465090618044623e-06, + "loss": 0.2789, + "step": 16763 + }, + { + "epoch": 0.7853094111584766, + "grad_norm": 0.5403570515874419, + "learning_rate": 3.464915666112952e-06, + "loss": 0.2762, + "step": 16764 + }, + { + "epoch": 0.7853562561484049, + "grad_norm": 0.6249983407284805, + "learning_rate": 3.4647407086284344e-06, + "loss": 0.291, + "step": 16765 + }, + { + "epoch": 0.7854031011383332, + "grad_norm": 0.6681841786428167, + "learning_rate": 3.464565745592076e-06, + "loss": 0.2707, + "step": 16766 + }, + { + "epoch": 0.7854499461282616, + "grad_norm": 0.675526727367917, + "learning_rate": 3.464390777004884e-06, + "loss": 0.2942, + "step": 16767 + }, + { + "epoch": 0.7854967911181899, + "grad_norm": 0.6102049911681777, + "learning_rate": 3.4642158028678663e-06, + "loss": 0.287, + "step": 16768 + }, + { + "epoch": 0.7855436361081183, + "grad_norm": 0.6355551378792184, + "learning_rate": 3.4640408231820284e-06, + "loss": 0.2824, + "step": 16769 + }, + { + "epoch": 0.7855904810980465, + "grad_norm": 0.6429743978771146, + "learning_rate": 3.4638658379483775e-06, + "loss": 0.2812, + "step": 16770 + }, + { + "epoch": 0.7856373260879749, + "grad_norm": 0.5798856004201426, + "learning_rate": 3.4636908471679217e-06, + "loss": 0.3159, + "step": 16771 + }, + { + "epoch": 0.7856841710779032, + "grad_norm": 0.610082223082052, + "learning_rate": 3.463515850841667e-06, + "loss": 0.2632, + "step": 16772 + }, + { + "epoch": 0.7857310160678316, + "grad_norm": 0.5921259255420763, + "learning_rate": 3.4633408489706204e-06, + "loss": 0.2794, + "step": 16773 + }, + { + "epoch": 0.7857778610577598, + "grad_norm": 0.6499311325702996, + "learning_rate": 3.46316584155579e-06, + "loss": 0.2972, + "step": 16774 + }, + { + "epoch": 0.7858247060476882, + "grad_norm": 0.564118304951404, + "learning_rate": 3.4629908285981818e-06, + "loss": 0.2604, + "step": 16775 + }, + { + "epoch": 0.7858715510376165, + "grad_norm": 0.6062386479811516, + "learning_rate": 3.462815810098803e-06, + "loss": 0.2671, + "step": 16776 + }, + { + "epoch": 0.7859183960275449, + "grad_norm": 0.6321957324631877, + "learning_rate": 3.462640786058662e-06, + "loss": 0.3003, + "step": 16777 + }, + { + "epoch": 0.7859652410174732, + "grad_norm": 0.6195334452147404, + "learning_rate": 3.4624657564787652e-06, + "loss": 0.3056, + "step": 16778 + }, + { + "epoch": 0.7860120860074015, + "grad_norm": 0.6252795992101094, + "learning_rate": 3.46229072136012e-06, + "loss": 0.3013, + "step": 16779 + }, + { + "epoch": 0.7860589309973298, + "grad_norm": 0.594868598903056, + "learning_rate": 3.4621156807037327e-06, + "loss": 0.2644, + "step": 16780 + }, + { + "epoch": 0.7861057759872582, + "grad_norm": 0.5317283220975227, + "learning_rate": 3.4619406345106123e-06, + "loss": 0.2549, + "step": 16781 + }, + { + "epoch": 0.7861526209771865, + "grad_norm": 0.5880627046695774, + "learning_rate": 3.4617655827817647e-06, + "loss": 0.2756, + "step": 16782 + }, + { + "epoch": 0.7861994659671148, + "grad_norm": 0.5638291307330723, + "learning_rate": 3.4615905255181985e-06, + "loss": 0.2704, + "step": 16783 + }, + { + "epoch": 0.7862463109570431, + "grad_norm": 0.6053540468595855, + "learning_rate": 3.4614154627209195e-06, + "loss": 0.2896, + "step": 16784 + }, + { + "epoch": 0.7862931559469715, + "grad_norm": 0.5561604251986613, + "learning_rate": 3.461240394390937e-06, + "loss": 0.2831, + "step": 16785 + }, + { + "epoch": 0.7863400009368998, + "grad_norm": 0.6301206937507392, + "learning_rate": 3.461065320529258e-06, + "loss": 0.2896, + "step": 16786 + }, + { + "epoch": 0.7863868459268282, + "grad_norm": 0.5736203284572511, + "learning_rate": 3.460890241136889e-06, + "loss": 0.2723, + "step": 16787 + }, + { + "epoch": 0.7864336909167564, + "grad_norm": 0.6052222049106802, + "learning_rate": 3.4607151562148377e-06, + "loss": 0.2849, + "step": 16788 + }, + { + "epoch": 0.7864805359066848, + "grad_norm": 0.5618706270543703, + "learning_rate": 3.460540065764113e-06, + "loss": 0.2836, + "step": 16789 + }, + { + "epoch": 0.7865273808966131, + "grad_norm": 0.5699619759380447, + "learning_rate": 3.4603649697857215e-06, + "loss": 0.2711, + "step": 16790 + }, + { + "epoch": 0.7865742258865415, + "grad_norm": 0.5431176215706233, + "learning_rate": 3.4601898682806707e-06, + "loss": 0.2809, + "step": 16791 + }, + { + "epoch": 0.7866210708764697, + "grad_norm": 0.5765767857904038, + "learning_rate": 3.460014761249969e-06, + "loss": 0.2665, + "step": 16792 + }, + { + "epoch": 0.7866679158663981, + "grad_norm": 0.563145313948342, + "learning_rate": 3.4598396486946235e-06, + "loss": 0.2756, + "step": 16793 + }, + { + "epoch": 0.7867147608563264, + "grad_norm": 0.5961051959453749, + "learning_rate": 3.4596645306156417e-06, + "loss": 0.2765, + "step": 16794 + }, + { + "epoch": 0.7867616058462548, + "grad_norm": 0.6457129436302962, + "learning_rate": 3.459489407014032e-06, + "loss": 0.3025, + "step": 16795 + }, + { + "epoch": 0.7868084508361831, + "grad_norm": 0.544439878976224, + "learning_rate": 3.4593142778908018e-06, + "loss": 0.2775, + "step": 16796 + }, + { + "epoch": 0.7868552958261114, + "grad_norm": 0.5687217635615712, + "learning_rate": 3.459139143246959e-06, + "loss": 0.2598, + "step": 16797 + }, + { + "epoch": 0.7869021408160397, + "grad_norm": 0.5821535333226707, + "learning_rate": 3.458964003083512e-06, + "loss": 0.2715, + "step": 16798 + }, + { + "epoch": 0.7869489858059681, + "grad_norm": 0.572573299359016, + "learning_rate": 3.4587888574014673e-06, + "loss": 0.2753, + "step": 16799 + }, + { + "epoch": 0.7869958307958964, + "grad_norm": 0.6559191340572678, + "learning_rate": 3.458613706201834e-06, + "loss": 0.3052, + "step": 16800 + }, + { + "epoch": 0.7870426757858247, + "grad_norm": 0.5918390174499145, + "learning_rate": 3.4584385494856203e-06, + "loss": 0.2817, + "step": 16801 + }, + { + "epoch": 0.787089520775753, + "grad_norm": 0.6233405160960641, + "learning_rate": 3.4582633872538336e-06, + "loss": 0.2967, + "step": 16802 + }, + { + "epoch": 0.7871363657656814, + "grad_norm": 0.5790008246261137, + "learning_rate": 3.4580882195074817e-06, + "loss": 0.269, + "step": 16803 + }, + { + "epoch": 0.7871832107556097, + "grad_norm": 0.6198157964238579, + "learning_rate": 3.4579130462475725e-06, + "loss": 0.2866, + "step": 16804 + }, + { + "epoch": 0.7872300557455381, + "grad_norm": 0.5395753577337495, + "learning_rate": 3.457737867475115e-06, + "loss": 0.2762, + "step": 16805 + }, + { + "epoch": 0.7872769007354663, + "grad_norm": 0.592115548281534, + "learning_rate": 3.4575626831911165e-06, + "loss": 0.2815, + "step": 16806 + }, + { + "epoch": 0.7873237457253947, + "grad_norm": 0.6432484019329626, + "learning_rate": 3.4573874933965855e-06, + "loss": 0.2741, + "step": 16807 + }, + { + "epoch": 0.787370590715323, + "grad_norm": 0.5754691362006946, + "learning_rate": 3.4572122980925304e-06, + "loss": 0.2905, + "step": 16808 + }, + { + "epoch": 0.7874174357052514, + "grad_norm": 0.5702214583126946, + "learning_rate": 3.4570370972799583e-06, + "loss": 0.2719, + "step": 16809 + }, + { + "epoch": 0.7874642806951796, + "grad_norm": 0.6653884117277584, + "learning_rate": 3.4568618909598793e-06, + "loss": 0.2996, + "step": 16810 + }, + { + "epoch": 0.787511125685108, + "grad_norm": 0.6090209175468323, + "learning_rate": 3.4566866791333005e-06, + "loss": 0.2841, + "step": 16811 + }, + { + "epoch": 0.7875579706750363, + "grad_norm": 0.5823984889087371, + "learning_rate": 3.4565114618012295e-06, + "loss": 0.2705, + "step": 16812 + }, + { + "epoch": 0.7876048156649647, + "grad_norm": 0.6060776107125581, + "learning_rate": 3.456336238964676e-06, + "loss": 0.3013, + "step": 16813 + }, + { + "epoch": 0.787651660654893, + "grad_norm": 0.5955329113383203, + "learning_rate": 3.456161010624648e-06, + "loss": 0.2862, + "step": 16814 + }, + { + "epoch": 0.7876985056448212, + "grad_norm": 0.5662016033127271, + "learning_rate": 3.4559857767821533e-06, + "loss": 0.2986, + "step": 16815 + }, + { + "epoch": 0.7877453506347496, + "grad_norm": 0.600119345671453, + "learning_rate": 3.4558105374382007e-06, + "loss": 0.2982, + "step": 16816 + }, + { + "epoch": 0.787792195624678, + "grad_norm": 0.5458456774411825, + "learning_rate": 3.4556352925937986e-06, + "loss": 0.2611, + "step": 16817 + }, + { + "epoch": 0.7878390406146063, + "grad_norm": 0.5904749898669444, + "learning_rate": 3.4554600422499563e-06, + "loss": 0.2708, + "step": 16818 + }, + { + "epoch": 0.7878858856045345, + "grad_norm": 0.6127980958748546, + "learning_rate": 3.455284786407681e-06, + "loss": 0.2925, + "step": 16819 + }, + { + "epoch": 0.7879327305944629, + "grad_norm": 0.5852498755492193, + "learning_rate": 3.4551095250679823e-06, + "loss": 0.2775, + "step": 16820 + }, + { + "epoch": 0.7879795755843912, + "grad_norm": 0.5973365093528673, + "learning_rate": 3.4549342582318678e-06, + "loss": 0.2925, + "step": 16821 + }, + { + "epoch": 0.7880264205743196, + "grad_norm": 0.6219663452588435, + "learning_rate": 3.4547589859003466e-06, + "loss": 0.2821, + "step": 16822 + }, + { + "epoch": 0.788073265564248, + "grad_norm": 0.5902289658508207, + "learning_rate": 3.454583708074428e-06, + "loss": 0.2669, + "step": 16823 + }, + { + "epoch": 0.7881201105541762, + "grad_norm": 0.5493899611843638, + "learning_rate": 3.45440842475512e-06, + "loss": 0.25, + "step": 16824 + }, + { + "epoch": 0.7881669555441045, + "grad_norm": 0.5913783800671258, + "learning_rate": 3.454233135943431e-06, + "loss": 0.2697, + "step": 16825 + }, + { + "epoch": 0.7882138005340329, + "grad_norm": 0.5640505066719033, + "learning_rate": 3.4540578416403704e-06, + "loss": 0.2753, + "step": 16826 + }, + { + "epoch": 0.7882606455239612, + "grad_norm": 0.5788571510514096, + "learning_rate": 3.4538825418469463e-06, + "loss": 0.2853, + "step": 16827 + }, + { + "epoch": 0.7883074905138895, + "grad_norm": 0.5915435264227423, + "learning_rate": 3.4537072365641685e-06, + "loss": 0.2918, + "step": 16828 + }, + { + "epoch": 0.7883543355038178, + "grad_norm": 0.5378858518210903, + "learning_rate": 3.453531925793045e-06, + "loss": 0.255, + "step": 16829 + }, + { + "epoch": 0.7884011804937462, + "grad_norm": 0.6093473668860329, + "learning_rate": 3.453356609534585e-06, + "loss": 0.2947, + "step": 16830 + }, + { + "epoch": 0.7884480254836745, + "grad_norm": 0.566786884644955, + "learning_rate": 3.4531812877897975e-06, + "loss": 0.2894, + "step": 16831 + }, + { + "epoch": 0.7884948704736029, + "grad_norm": 0.6134857394147328, + "learning_rate": 3.4530059605596912e-06, + "loss": 0.2784, + "step": 16832 + }, + { + "epoch": 0.7885417154635311, + "grad_norm": 0.5304340520315907, + "learning_rate": 3.4528306278452745e-06, + "loss": 0.2954, + "step": 16833 + }, + { + "epoch": 0.7885885604534595, + "grad_norm": 0.5496359591752396, + "learning_rate": 3.4526552896475574e-06, + "loss": 0.2734, + "step": 16834 + }, + { + "epoch": 0.7886354054433878, + "grad_norm": 0.6229793598700015, + "learning_rate": 3.452479945967549e-06, + "loss": 0.289, + "step": 16835 + }, + { + "epoch": 0.7886822504333162, + "grad_norm": 0.5265116577845397, + "learning_rate": 3.4523045968062573e-06, + "loss": 0.2747, + "step": 16836 + }, + { + "epoch": 0.7887290954232444, + "grad_norm": 0.5942827795270983, + "learning_rate": 3.452129242164692e-06, + "loss": 0.2806, + "step": 16837 + }, + { + "epoch": 0.7887759404131728, + "grad_norm": 0.5443529827165288, + "learning_rate": 3.4519538820438627e-06, + "loss": 0.269, + "step": 16838 + }, + { + "epoch": 0.7888227854031011, + "grad_norm": 0.6147476646322583, + "learning_rate": 3.451778516444778e-06, + "loss": 0.2726, + "step": 16839 + }, + { + "epoch": 0.7888696303930295, + "grad_norm": 0.5745506816360438, + "learning_rate": 3.4516031453684466e-06, + "loss": 0.273, + "step": 16840 + }, + { + "epoch": 0.7889164753829578, + "grad_norm": 0.6517149807537658, + "learning_rate": 3.4514277688158787e-06, + "loss": 0.2736, + "step": 16841 + }, + { + "epoch": 0.7889633203728861, + "grad_norm": 0.565182148202619, + "learning_rate": 3.451252386788083e-06, + "loss": 0.2559, + "step": 16842 + }, + { + "epoch": 0.7890101653628144, + "grad_norm": 0.5411288372310212, + "learning_rate": 3.4510769992860693e-06, + "loss": 0.2498, + "step": 16843 + }, + { + "epoch": 0.7890570103527428, + "grad_norm": 0.5574455570503091, + "learning_rate": 3.4509016063108462e-06, + "loss": 0.2752, + "step": 16844 + }, + { + "epoch": 0.7891038553426711, + "grad_norm": 0.579775383030774, + "learning_rate": 3.4507262078634228e-06, + "loss": 0.2772, + "step": 16845 + }, + { + "epoch": 0.7891507003325994, + "grad_norm": 0.5629358825391029, + "learning_rate": 3.4505508039448098e-06, + "loss": 0.2604, + "step": 16846 + }, + { + "epoch": 0.7891975453225277, + "grad_norm": 0.6104852119869443, + "learning_rate": 3.450375394556016e-06, + "loss": 0.2903, + "step": 16847 + }, + { + "epoch": 0.7892443903124561, + "grad_norm": 0.6099206149204419, + "learning_rate": 3.45019997969805e-06, + "loss": 0.2797, + "step": 16848 + }, + { + "epoch": 0.7892912353023844, + "grad_norm": 0.6392689699987876, + "learning_rate": 3.4500245593719223e-06, + "loss": 0.2922, + "step": 16849 + }, + { + "epoch": 0.7893380802923128, + "grad_norm": 0.5899707653406999, + "learning_rate": 3.4498491335786423e-06, + "loss": 0.2729, + "step": 16850 + }, + { + "epoch": 0.789384925282241, + "grad_norm": 0.6211763791354641, + "learning_rate": 3.4496737023192182e-06, + "loss": 0.2916, + "step": 16851 + }, + { + "epoch": 0.7894317702721694, + "grad_norm": 0.6045552940870533, + "learning_rate": 3.4494982655946617e-06, + "loss": 0.304, + "step": 16852 + }, + { + "epoch": 0.7894786152620977, + "grad_norm": 0.6032113704466646, + "learning_rate": 3.4493228234059817e-06, + "loss": 0.2619, + "step": 16853 + }, + { + "epoch": 0.7895254602520261, + "grad_norm": 0.6088029029351965, + "learning_rate": 3.449147375754186e-06, + "loss": 0.2984, + "step": 16854 + }, + { + "epoch": 0.7895723052419543, + "grad_norm": 0.5928293058389569, + "learning_rate": 3.448971922640286e-06, + "loss": 0.2857, + "step": 16855 + }, + { + "epoch": 0.7896191502318827, + "grad_norm": 0.6346335261159565, + "learning_rate": 3.4487964640652925e-06, + "loss": 0.288, + "step": 16856 + }, + { + "epoch": 0.789665995221811, + "grad_norm": 0.6176419524666381, + "learning_rate": 3.4486210000302127e-06, + "loss": 0.2774, + "step": 16857 + }, + { + "epoch": 0.7897128402117394, + "grad_norm": 0.5677599096990696, + "learning_rate": 3.4484455305360576e-06, + "loss": 0.2684, + "step": 16858 + }, + { + "epoch": 0.7897596852016677, + "grad_norm": 0.5318899166472318, + "learning_rate": 3.4482700555838374e-06, + "loss": 0.2508, + "step": 16859 + }, + { + "epoch": 0.789806530191596, + "grad_norm": 0.6095385297467426, + "learning_rate": 3.4480945751745608e-06, + "loss": 0.2943, + "step": 16860 + }, + { + "epoch": 0.7898533751815243, + "grad_norm": 0.5814973320421573, + "learning_rate": 3.447919089309238e-06, + "loss": 0.3031, + "step": 16861 + }, + { + "epoch": 0.7899002201714527, + "grad_norm": 0.5508529789389895, + "learning_rate": 3.4477435979888797e-06, + "loss": 0.2883, + "step": 16862 + }, + { + "epoch": 0.789947065161381, + "grad_norm": 0.5688105947851227, + "learning_rate": 3.447568101214495e-06, + "loss": 0.3133, + "step": 16863 + }, + { + "epoch": 0.7899939101513093, + "grad_norm": 0.5368201670148465, + "learning_rate": 3.447392598987094e-06, + "loss": 0.2683, + "step": 16864 + }, + { + "epoch": 0.7900407551412376, + "grad_norm": 0.6264977994121829, + "learning_rate": 3.4472170913076865e-06, + "loss": 0.3047, + "step": 16865 + }, + { + "epoch": 0.790087600131166, + "grad_norm": 0.605021897025703, + "learning_rate": 3.4470415781772828e-06, + "loss": 0.2726, + "step": 16866 + }, + { + "epoch": 0.7901344451210943, + "grad_norm": 0.5799880459591096, + "learning_rate": 3.446866059596893e-06, + "loss": 0.2691, + "step": 16867 + }, + { + "epoch": 0.7901812901110227, + "grad_norm": 0.6067871634574112, + "learning_rate": 3.446690535567527e-06, + "loss": 0.284, + "step": 16868 + }, + { + "epoch": 0.7902281351009509, + "grad_norm": 0.6062189445783285, + "learning_rate": 3.446515006090194e-06, + "loss": 0.2979, + "step": 16869 + }, + { + "epoch": 0.7902749800908793, + "grad_norm": 0.5900258751405889, + "learning_rate": 3.4463394711659063e-06, + "loss": 0.2866, + "step": 16870 + }, + { + "epoch": 0.7903218250808076, + "grad_norm": 0.5353812919627325, + "learning_rate": 3.446163930795672e-06, + "loss": 0.263, + "step": 16871 + }, + { + "epoch": 0.790368670070736, + "grad_norm": 0.6113819036070524, + "learning_rate": 3.4459883849805032e-06, + "loss": 0.2613, + "step": 16872 + }, + { + "epoch": 0.7904155150606642, + "grad_norm": 0.5758055701203356, + "learning_rate": 3.445812833721408e-06, + "loss": 0.263, + "step": 16873 + }, + { + "epoch": 0.7904623600505926, + "grad_norm": 0.6032135311496948, + "learning_rate": 3.445637277019398e-06, + "loss": 0.2697, + "step": 16874 + }, + { + "epoch": 0.7905092050405209, + "grad_norm": 0.6068504466298965, + "learning_rate": 3.445461714875483e-06, + "loss": 0.278, + "step": 16875 + }, + { + "epoch": 0.7905560500304493, + "grad_norm": 0.618119338245358, + "learning_rate": 3.4452861472906734e-06, + "loss": 0.2852, + "step": 16876 + }, + { + "epoch": 0.7906028950203776, + "grad_norm": 0.5438708043039496, + "learning_rate": 3.4451105742659797e-06, + "loss": 0.2765, + "step": 16877 + }, + { + "epoch": 0.7906497400103059, + "grad_norm": 0.6395154466370105, + "learning_rate": 3.4449349958024123e-06, + "loss": 0.3029, + "step": 16878 + }, + { + "epoch": 0.7906965850002342, + "grad_norm": 0.5559079364262639, + "learning_rate": 3.444759411900981e-06, + "loss": 0.2626, + "step": 16879 + }, + { + "epoch": 0.7907434299901626, + "grad_norm": 0.5893762208754789, + "learning_rate": 3.4445838225626975e-06, + "loss": 0.2647, + "step": 16880 + }, + { + "epoch": 0.7907902749800909, + "grad_norm": 0.5942937182912448, + "learning_rate": 3.4444082277885715e-06, + "loss": 0.2861, + "step": 16881 + }, + { + "epoch": 0.7908371199700192, + "grad_norm": 0.6225407657375622, + "learning_rate": 3.4442326275796135e-06, + "loss": 0.2749, + "step": 16882 + }, + { + "epoch": 0.7908839649599475, + "grad_norm": 0.6492149694210742, + "learning_rate": 3.444057021936833e-06, + "loss": 0.2834, + "step": 16883 + }, + { + "epoch": 0.7909308099498759, + "grad_norm": 0.57590365399157, + "learning_rate": 3.4438814108612426e-06, + "loss": 0.2785, + "step": 16884 + }, + { + "epoch": 0.7909776549398042, + "grad_norm": 0.6410575821650282, + "learning_rate": 3.443705794353852e-06, + "loss": 0.2914, + "step": 16885 + }, + { + "epoch": 0.7910244999297326, + "grad_norm": 0.6593711228005162, + "learning_rate": 3.443530172415671e-06, + "loss": 0.3063, + "step": 16886 + }, + { + "epoch": 0.7910713449196608, + "grad_norm": 0.6456721062922034, + "learning_rate": 3.4433545450477118e-06, + "loss": 0.2871, + "step": 16887 + }, + { + "epoch": 0.7911181899095892, + "grad_norm": 0.6202752173061128, + "learning_rate": 3.443178912250984e-06, + "loss": 0.2967, + "step": 16888 + }, + { + "epoch": 0.7911650348995175, + "grad_norm": 0.5967550721761018, + "learning_rate": 3.4430032740264983e-06, + "loss": 0.2658, + "step": 16889 + }, + { + "epoch": 0.7912118798894459, + "grad_norm": 0.5794741556619581, + "learning_rate": 3.442827630375266e-06, + "loss": 0.2774, + "step": 16890 + }, + { + "epoch": 0.7912587248793741, + "grad_norm": 0.5987436592655077, + "learning_rate": 3.442651981298298e-06, + "loss": 0.2839, + "step": 16891 + }, + { + "epoch": 0.7913055698693024, + "grad_norm": 0.6203721829732918, + "learning_rate": 3.4424763267966044e-06, + "loss": 0.2734, + "step": 16892 + }, + { + "epoch": 0.7913524148592308, + "grad_norm": 0.6359653533169377, + "learning_rate": 3.442300666871197e-06, + "loss": 0.283, + "step": 16893 + }, + { + "epoch": 0.7913992598491592, + "grad_norm": 0.5777504280459972, + "learning_rate": 3.4421250015230856e-06, + "loss": 0.2705, + "step": 16894 + }, + { + "epoch": 0.7914461048390875, + "grad_norm": 0.6344208564489328, + "learning_rate": 3.4419493307532813e-06, + "loss": 0.2841, + "step": 16895 + }, + { + "epoch": 0.7914929498290157, + "grad_norm": 0.5500246183566706, + "learning_rate": 3.4417736545627955e-06, + "loss": 0.2731, + "step": 16896 + }, + { + "epoch": 0.7915397948189441, + "grad_norm": 0.5877132619093529, + "learning_rate": 3.441597972952639e-06, + "loss": 0.2792, + "step": 16897 + }, + { + "epoch": 0.7915866398088724, + "grad_norm": 0.596682353742933, + "learning_rate": 3.4414222859238233e-06, + "loss": 0.2968, + "step": 16898 + }, + { + "epoch": 0.7916334847988008, + "grad_norm": 0.624888077374717, + "learning_rate": 3.4412465934773587e-06, + "loss": 0.3028, + "step": 16899 + }, + { + "epoch": 0.791680329788729, + "grad_norm": 0.6056284473905884, + "learning_rate": 3.4410708956142564e-06, + "loss": 0.2892, + "step": 16900 + }, + { + "epoch": 0.7917271747786574, + "grad_norm": 0.5570685133041936, + "learning_rate": 3.440895192335528e-06, + "loss": 0.2776, + "step": 16901 + }, + { + "epoch": 0.7917740197685857, + "grad_norm": 0.5963595718876551, + "learning_rate": 3.4407194836421844e-06, + "loss": 0.2796, + "step": 16902 + }, + { + "epoch": 0.7918208647585141, + "grad_norm": 0.5759811333290372, + "learning_rate": 3.4405437695352357e-06, + "loss": 0.2773, + "step": 16903 + }, + { + "epoch": 0.7918677097484424, + "grad_norm": 0.6087647342617762, + "learning_rate": 3.440368050015694e-06, + "loss": 0.2722, + "step": 16904 + }, + { + "epoch": 0.7919145547383707, + "grad_norm": 0.5871010862231039, + "learning_rate": 3.4401923250845713e-06, + "loss": 0.2846, + "step": 16905 + }, + { + "epoch": 0.791961399728299, + "grad_norm": 0.6300083883551836, + "learning_rate": 3.4400165947428775e-06, + "loss": 0.2743, + "step": 16906 + }, + { + "epoch": 0.7920082447182274, + "grad_norm": 0.6449598453492187, + "learning_rate": 3.4398408589916247e-06, + "loss": 0.2825, + "step": 16907 + }, + { + "epoch": 0.7920550897081557, + "grad_norm": 0.6343397111401722, + "learning_rate": 3.439665117831824e-06, + "loss": 0.289, + "step": 16908 + }, + { + "epoch": 0.792101934698084, + "grad_norm": 0.6094405599264603, + "learning_rate": 3.4394893712644872e-06, + "loss": 0.2808, + "step": 16909 + }, + { + "epoch": 0.7921487796880123, + "grad_norm": 0.58652918070377, + "learning_rate": 3.4393136192906244e-06, + "loss": 0.2914, + "step": 16910 + }, + { + "epoch": 0.7921956246779407, + "grad_norm": 0.5978614063895038, + "learning_rate": 3.4391378619112485e-06, + "loss": 0.2584, + "step": 16911 + }, + { + "epoch": 0.792242469667869, + "grad_norm": 0.5760273272826129, + "learning_rate": 3.43896209912737e-06, + "loss": 0.295, + "step": 16912 + }, + { + "epoch": 0.7922893146577974, + "grad_norm": 0.6022124816280736, + "learning_rate": 3.4387863309400005e-06, + "loss": 0.2803, + "step": 16913 + }, + { + "epoch": 0.7923361596477256, + "grad_norm": 0.6191780417530355, + "learning_rate": 3.4386105573501516e-06, + "loss": 0.2924, + "step": 16914 + }, + { + "epoch": 0.792383004637654, + "grad_norm": 0.6362735714185278, + "learning_rate": 3.4384347783588346e-06, + "loss": 0.291, + "step": 16915 + }, + { + "epoch": 0.7924298496275823, + "grad_norm": 0.5762175510533819, + "learning_rate": 3.4382589939670617e-06, + "loss": 0.2919, + "step": 16916 + }, + { + "epoch": 0.7924766946175107, + "grad_norm": 0.5404323522341972, + "learning_rate": 3.4380832041758443e-06, + "loss": 0.2553, + "step": 16917 + }, + { + "epoch": 0.7925235396074389, + "grad_norm": 0.5821960107040002, + "learning_rate": 3.4379074089861936e-06, + "loss": 0.296, + "step": 16918 + }, + { + "epoch": 0.7925703845973673, + "grad_norm": 0.5553732850971198, + "learning_rate": 3.4377316083991215e-06, + "loss": 0.2718, + "step": 16919 + }, + { + "epoch": 0.7926172295872956, + "grad_norm": 0.5592764786176777, + "learning_rate": 3.43755580241564e-06, + "loss": 0.2629, + "step": 16920 + }, + { + "epoch": 0.792664074577224, + "grad_norm": 0.5505120494970699, + "learning_rate": 3.4373799910367594e-06, + "loss": 0.2665, + "step": 16921 + }, + { + "epoch": 0.7927109195671523, + "grad_norm": 0.6162918097914714, + "learning_rate": 3.4372041742634937e-06, + "loss": 0.3036, + "step": 16922 + }, + { + "epoch": 0.7927577645570806, + "grad_norm": 0.5294094702998076, + "learning_rate": 3.4370283520968534e-06, + "loss": 0.2533, + "step": 16923 + }, + { + "epoch": 0.7928046095470089, + "grad_norm": 0.5983655555557293, + "learning_rate": 3.4368525245378496e-06, + "loss": 0.2723, + "step": 16924 + }, + { + "epoch": 0.7928514545369373, + "grad_norm": 0.5911100275629216, + "learning_rate": 3.4366766915874956e-06, + "loss": 0.2756, + "step": 16925 + }, + { + "epoch": 0.7928982995268656, + "grad_norm": 0.5975724748760592, + "learning_rate": 3.4365008532468034e-06, + "loss": 0.292, + "step": 16926 + }, + { + "epoch": 0.7929451445167939, + "grad_norm": 0.594241686211431, + "learning_rate": 3.4363250095167833e-06, + "loss": 0.284, + "step": 16927 + }, + { + "epoch": 0.7929919895067222, + "grad_norm": 0.6360248455790504, + "learning_rate": 3.4361491603984477e-06, + "loss": 0.3019, + "step": 16928 + }, + { + "epoch": 0.7930388344966506, + "grad_norm": 0.5567943587361458, + "learning_rate": 3.43597330589281e-06, + "loss": 0.2701, + "step": 16929 + }, + { + "epoch": 0.7930856794865789, + "grad_norm": 0.5667388246376142, + "learning_rate": 3.43579744600088e-06, + "loss": 0.267, + "step": 16930 + }, + { + "epoch": 0.7931325244765073, + "grad_norm": 0.6440012388163591, + "learning_rate": 3.4356215807236716e-06, + "loss": 0.2912, + "step": 16931 + }, + { + "epoch": 0.7931793694664355, + "grad_norm": 0.5727560767961952, + "learning_rate": 3.435445710062196e-06, + "loss": 0.2712, + "step": 16932 + }, + { + "epoch": 0.7932262144563639, + "grad_norm": 0.5655352667248886, + "learning_rate": 3.4352698340174663e-06, + "loss": 0.2495, + "step": 16933 + }, + { + "epoch": 0.7932730594462922, + "grad_norm": 0.5465663287800026, + "learning_rate": 3.4350939525904925e-06, + "loss": 0.266, + "step": 16934 + }, + { + "epoch": 0.7933199044362206, + "grad_norm": 0.619193023157657, + "learning_rate": 3.434918065782289e-06, + "loss": 0.2836, + "step": 16935 + }, + { + "epoch": 0.7933667494261488, + "grad_norm": 0.5888775498299438, + "learning_rate": 3.434742173593866e-06, + "loss": 0.2853, + "step": 16936 + }, + { + "epoch": 0.7934135944160772, + "grad_norm": 0.5762843882696169, + "learning_rate": 3.434566276026238e-06, + "loss": 0.2751, + "step": 16937 + }, + { + "epoch": 0.7934604394060055, + "grad_norm": 0.5541375647902739, + "learning_rate": 3.434390373080415e-06, + "loss": 0.2803, + "step": 16938 + }, + { + "epoch": 0.7935072843959339, + "grad_norm": 0.5667338804175094, + "learning_rate": 3.4342144647574105e-06, + "loss": 0.2636, + "step": 16939 + }, + { + "epoch": 0.7935541293858622, + "grad_norm": 0.5598174175704446, + "learning_rate": 3.4340385510582367e-06, + "loss": 0.2641, + "step": 16940 + }, + { + "epoch": 0.7936009743757905, + "grad_norm": 0.579985223813251, + "learning_rate": 3.4338626319839058e-06, + "loss": 0.2647, + "step": 16941 + }, + { + "epoch": 0.7936478193657188, + "grad_norm": 0.5837214702278812, + "learning_rate": 3.4336867075354303e-06, + "loss": 0.2816, + "step": 16942 + }, + { + "epoch": 0.7936946643556472, + "grad_norm": 0.621547184408836, + "learning_rate": 3.433510777713822e-06, + "loss": 0.2821, + "step": 16943 + }, + { + "epoch": 0.7937415093455755, + "grad_norm": 0.6317955155788614, + "learning_rate": 3.433334842520094e-06, + "loss": 0.2819, + "step": 16944 + }, + { + "epoch": 0.7937883543355038, + "grad_norm": 0.6189947345137191, + "learning_rate": 3.433158901955259e-06, + "loss": 0.2806, + "step": 16945 + }, + { + "epoch": 0.7938351993254321, + "grad_norm": 0.7059803843349683, + "learning_rate": 3.4329829560203284e-06, + "loss": 0.3118, + "step": 16946 + }, + { + "epoch": 0.7938820443153605, + "grad_norm": 0.5889940681589374, + "learning_rate": 3.432807004716316e-06, + "loss": 0.2562, + "step": 16947 + }, + { + "epoch": 0.7939288893052888, + "grad_norm": 0.5663451403795413, + "learning_rate": 3.4326310480442333e-06, + "loss": 0.2739, + "step": 16948 + }, + { + "epoch": 0.7939757342952172, + "grad_norm": 0.6142056299166513, + "learning_rate": 3.4324550860050933e-06, + "loss": 0.2819, + "step": 16949 + }, + { + "epoch": 0.7940225792851454, + "grad_norm": 0.572063153357161, + "learning_rate": 3.432279118599909e-06, + "loss": 0.2884, + "step": 16950 + }, + { + "epoch": 0.7940694242750738, + "grad_norm": 0.6130379663094029, + "learning_rate": 3.432103145829693e-06, + "loss": 0.2715, + "step": 16951 + }, + { + "epoch": 0.7941162692650021, + "grad_norm": 0.618933001150569, + "learning_rate": 3.4319271676954565e-06, + "loss": 0.2724, + "step": 16952 + }, + { + "epoch": 0.7941631142549305, + "grad_norm": 0.62943873932764, + "learning_rate": 3.4317511841982136e-06, + "loss": 0.2805, + "step": 16953 + }, + { + "epoch": 0.7942099592448587, + "grad_norm": 0.6297251378972543, + "learning_rate": 3.431575195338978e-06, + "loss": 0.2813, + "step": 16954 + }, + { + "epoch": 0.7942568042347871, + "grad_norm": 0.5926677790578246, + "learning_rate": 3.43139920111876e-06, + "loss": 0.2649, + "step": 16955 + }, + { + "epoch": 0.7943036492247154, + "grad_norm": 0.5648731404871099, + "learning_rate": 3.4312232015385745e-06, + "loss": 0.2902, + "step": 16956 + }, + { + "epoch": 0.7943504942146438, + "grad_norm": 0.6110639163081149, + "learning_rate": 3.431047196599433e-06, + "loss": 0.2816, + "step": 16957 + }, + { + "epoch": 0.7943973392045721, + "grad_norm": 0.586325464187473, + "learning_rate": 3.4308711863023496e-06, + "loss": 0.2843, + "step": 16958 + }, + { + "epoch": 0.7944441841945004, + "grad_norm": 0.5988796453522881, + "learning_rate": 3.4306951706483356e-06, + "loss": 0.2674, + "step": 16959 + }, + { + "epoch": 0.7944910291844287, + "grad_norm": 0.6118214821522845, + "learning_rate": 3.4305191496384057e-06, + "loss": 0.2698, + "step": 16960 + }, + { + "epoch": 0.7945378741743571, + "grad_norm": 0.5784844154687299, + "learning_rate": 3.4303431232735716e-06, + "loss": 0.2947, + "step": 16961 + }, + { + "epoch": 0.7945847191642854, + "grad_norm": 0.5869551052016507, + "learning_rate": 3.4301670915548463e-06, + "loss": 0.2984, + "step": 16962 + }, + { + "epoch": 0.7946315641542137, + "grad_norm": 0.5718047752250529, + "learning_rate": 3.429991054483244e-06, + "loss": 0.272, + "step": 16963 + }, + { + "epoch": 0.794678409144142, + "grad_norm": 0.5898493250643319, + "learning_rate": 3.4298150120597764e-06, + "loss": 0.2934, + "step": 16964 + }, + { + "epoch": 0.7947252541340704, + "grad_norm": 0.5736247213278658, + "learning_rate": 3.4296389642854565e-06, + "loss": 0.2908, + "step": 16965 + }, + { + "epoch": 0.7947720991239987, + "grad_norm": 0.5967009117991005, + "learning_rate": 3.4294629111612986e-06, + "loss": 0.2714, + "step": 16966 + }, + { + "epoch": 0.7948189441139271, + "grad_norm": 0.5995437339459001, + "learning_rate": 3.4292868526883156e-06, + "loss": 0.2698, + "step": 16967 + }, + { + "epoch": 0.7948657891038553, + "grad_norm": 0.5962097926054211, + "learning_rate": 3.4291107888675202e-06, + "loss": 0.295, + "step": 16968 + }, + { + "epoch": 0.7949126340937837, + "grad_norm": 0.5983172461620436, + "learning_rate": 3.4289347196999255e-06, + "loss": 0.265, + "step": 16969 + }, + { + "epoch": 0.794959479083712, + "grad_norm": 0.5977858275343828, + "learning_rate": 3.4287586451865445e-06, + "loss": 0.2866, + "step": 16970 + }, + { + "epoch": 0.7950063240736404, + "grad_norm": 0.6206443766094004, + "learning_rate": 3.428582565328392e-06, + "loss": 0.2986, + "step": 16971 + }, + { + "epoch": 0.7950531690635686, + "grad_norm": 0.5846623214119517, + "learning_rate": 3.42840648012648e-06, + "loss": 0.2954, + "step": 16972 + }, + { + "epoch": 0.795100014053497, + "grad_norm": 0.5610457469241718, + "learning_rate": 3.4282303895818215e-06, + "loss": 0.2767, + "step": 16973 + }, + { + "epoch": 0.7951468590434253, + "grad_norm": 0.5998756082383354, + "learning_rate": 3.4280542936954297e-06, + "loss": 0.2858, + "step": 16974 + }, + { + "epoch": 0.7951937040333537, + "grad_norm": 0.6102710813232297, + "learning_rate": 3.4278781924683206e-06, + "loss": 0.2915, + "step": 16975 + }, + { + "epoch": 0.795240549023282, + "grad_norm": 0.5646905870618458, + "learning_rate": 3.427702085901504e-06, + "loss": 0.2738, + "step": 16976 + }, + { + "epoch": 0.7952873940132102, + "grad_norm": 0.6190996315242214, + "learning_rate": 3.427525973995996e-06, + "loss": 0.2837, + "step": 16977 + }, + { + "epoch": 0.7953342390031386, + "grad_norm": 0.6266202729046112, + "learning_rate": 3.4273498567528092e-06, + "loss": 0.2907, + "step": 16978 + }, + { + "epoch": 0.795381083993067, + "grad_norm": 0.6668108966916689, + "learning_rate": 3.427173734172957e-06, + "loss": 0.2883, + "step": 16979 + }, + { + "epoch": 0.7954279289829953, + "grad_norm": 0.5556052128059928, + "learning_rate": 3.4269976062574522e-06, + "loss": 0.2625, + "step": 16980 + }, + { + "epoch": 0.7954747739729235, + "grad_norm": 0.5798377923403928, + "learning_rate": 3.42682147300731e-06, + "loss": 0.2861, + "step": 16981 + }, + { + "epoch": 0.7955216189628519, + "grad_norm": 0.5715306823833878, + "learning_rate": 3.4266453344235434e-06, + "loss": 0.2638, + "step": 16982 + }, + { + "epoch": 0.7955684639527802, + "grad_norm": 0.5490266951563184, + "learning_rate": 3.426469190507165e-06, + "loss": 0.2649, + "step": 16983 + }, + { + "epoch": 0.7956153089427086, + "grad_norm": 0.5679684237283198, + "learning_rate": 3.4262930412591897e-06, + "loss": 0.2888, + "step": 16984 + }, + { + "epoch": 0.795662153932637, + "grad_norm": 0.5863566850035903, + "learning_rate": 3.4261168866806305e-06, + "loss": 0.2867, + "step": 16985 + }, + { + "epoch": 0.7957089989225652, + "grad_norm": 0.6099102171332281, + "learning_rate": 3.425940726772502e-06, + "loss": 0.278, + "step": 16986 + }, + { + "epoch": 0.7957558439124935, + "grad_norm": 0.5642340643770922, + "learning_rate": 3.425764561535817e-06, + "loss": 0.2663, + "step": 16987 + }, + { + "epoch": 0.7958026889024219, + "grad_norm": 0.6057747241271455, + "learning_rate": 3.425588390971589e-06, + "loss": 0.2674, + "step": 16988 + }, + { + "epoch": 0.7958495338923502, + "grad_norm": 0.5892997030489872, + "learning_rate": 3.4254122150808334e-06, + "loss": 0.2842, + "step": 16989 + }, + { + "epoch": 0.7958963788822785, + "grad_norm": 0.7705458466019808, + "learning_rate": 3.425236033864563e-06, + "loss": 0.271, + "step": 16990 + }, + { + "epoch": 0.7959432238722068, + "grad_norm": 0.6254385801711411, + "learning_rate": 3.4250598473237912e-06, + "loss": 0.2934, + "step": 16991 + }, + { + "epoch": 0.7959900688621352, + "grad_norm": 0.5681313419930918, + "learning_rate": 3.424883655459533e-06, + "loss": 0.2809, + "step": 16992 + }, + { + "epoch": 0.7960369138520635, + "grad_norm": 0.544464242216109, + "learning_rate": 3.424707458272801e-06, + "loss": 0.2597, + "step": 16993 + }, + { + "epoch": 0.7960837588419919, + "grad_norm": 0.5652159393174688, + "learning_rate": 3.4245312557646103e-06, + "loss": 0.2835, + "step": 16994 + }, + { + "epoch": 0.7961306038319201, + "grad_norm": 0.5915599720036986, + "learning_rate": 3.424355047935975e-06, + "loss": 0.2799, + "step": 16995 + }, + { + "epoch": 0.7961774488218485, + "grad_norm": 0.566481488429298, + "learning_rate": 3.424178834787909e-06, + "loss": 0.2608, + "step": 16996 + }, + { + "epoch": 0.7962242938117768, + "grad_norm": 0.5879203725796339, + "learning_rate": 3.4240026163214256e-06, + "loss": 0.2637, + "step": 16997 + }, + { + "epoch": 0.7962711388017052, + "grad_norm": 0.6241396468902912, + "learning_rate": 3.423826392537539e-06, + "loss": 0.283, + "step": 16998 + }, + { + "epoch": 0.7963179837916334, + "grad_norm": 0.638328169085319, + "learning_rate": 3.4236501634372643e-06, + "loss": 0.2771, + "step": 16999 + }, + { + "epoch": 0.7963648287815618, + "grad_norm": 0.5750748102768662, + "learning_rate": 3.4234739290216155e-06, + "loss": 0.2664, + "step": 17000 + }, + { + "epoch": 0.7964116737714901, + "grad_norm": 0.5937504148897625, + "learning_rate": 3.4232976892916054e-06, + "loss": 0.3282, + "step": 17001 + }, + { + "epoch": 0.7964585187614185, + "grad_norm": 0.5793960039377044, + "learning_rate": 3.42312144424825e-06, + "loss": 0.3139, + "step": 17002 + }, + { + "epoch": 0.7965053637513468, + "grad_norm": 0.6038782452555833, + "learning_rate": 3.4229451938925625e-06, + "loss": 0.3397, + "step": 17003 + }, + { + "epoch": 0.7965522087412751, + "grad_norm": 0.5707199068117438, + "learning_rate": 3.4227689382255562e-06, + "loss": 0.31, + "step": 17004 + }, + { + "epoch": 0.7965990537312034, + "grad_norm": 0.6221870499301544, + "learning_rate": 3.422592677248248e-06, + "loss": 0.3198, + "step": 17005 + }, + { + "epoch": 0.7966458987211318, + "grad_norm": 0.5740460759204797, + "learning_rate": 3.4224164109616497e-06, + "loss": 0.318, + "step": 17006 + }, + { + "epoch": 0.7966927437110601, + "grad_norm": 0.6094884016072711, + "learning_rate": 3.422240139366778e-06, + "loss": 0.3133, + "step": 17007 + }, + { + "epoch": 0.7967395887009884, + "grad_norm": 0.6096975379808329, + "learning_rate": 3.4220638624646452e-06, + "loss": 0.3446, + "step": 17008 + }, + { + "epoch": 0.7967864336909167, + "grad_norm": 0.595099271376989, + "learning_rate": 3.421887580256267e-06, + "loss": 0.3477, + "step": 17009 + }, + { + "epoch": 0.7968332786808451, + "grad_norm": 0.6231915302959974, + "learning_rate": 3.421711292742658e-06, + "loss": 0.3371, + "step": 17010 + }, + { + "epoch": 0.7968801236707734, + "grad_norm": 0.6451652457213307, + "learning_rate": 3.421534999924831e-06, + "loss": 0.3228, + "step": 17011 + }, + { + "epoch": 0.7969269686607018, + "grad_norm": 0.5836554107878859, + "learning_rate": 3.4213587018038025e-06, + "loss": 0.3145, + "step": 17012 + }, + { + "epoch": 0.79697381365063, + "grad_norm": 0.6477809180843884, + "learning_rate": 3.4211823983805866e-06, + "loss": 0.3443, + "step": 17013 + }, + { + "epoch": 0.7970206586405584, + "grad_norm": 0.5972733178211722, + "learning_rate": 3.4210060896561963e-06, + "loss": 0.3235, + "step": 17014 + }, + { + "epoch": 0.7970675036304867, + "grad_norm": 0.5704870710010849, + "learning_rate": 3.420829775631648e-06, + "loss": 0.3202, + "step": 17015 + }, + { + "epoch": 0.7971143486204151, + "grad_norm": 0.6213660154503949, + "learning_rate": 3.4206534563079565e-06, + "loss": 0.3434, + "step": 17016 + }, + { + "epoch": 0.7971611936103433, + "grad_norm": 0.6116210937606672, + "learning_rate": 3.420477131686135e-06, + "loss": 0.3148, + "step": 17017 + }, + { + "epoch": 0.7972080386002717, + "grad_norm": 0.5839222552566401, + "learning_rate": 3.420300801767199e-06, + "loss": 0.3129, + "step": 17018 + }, + { + "epoch": 0.7972548835902, + "grad_norm": 0.5701082400994041, + "learning_rate": 3.420124466552163e-06, + "loss": 0.2957, + "step": 17019 + }, + { + "epoch": 0.7973017285801284, + "grad_norm": 0.6076317251350336, + "learning_rate": 3.4199481260420424e-06, + "loss": 0.2998, + "step": 17020 + }, + { + "epoch": 0.7973485735700567, + "grad_norm": 0.6215005195113917, + "learning_rate": 3.4197717802378515e-06, + "loss": 0.3354, + "step": 17021 + }, + { + "epoch": 0.797395418559985, + "grad_norm": 0.5971527797115509, + "learning_rate": 3.4195954291406054e-06, + "loss": 0.3285, + "step": 17022 + }, + { + "epoch": 0.7974422635499133, + "grad_norm": 0.579036652239215, + "learning_rate": 3.4194190727513178e-06, + "loss": 0.3301, + "step": 17023 + }, + { + "epoch": 0.7974891085398417, + "grad_norm": 0.6631396019253196, + "learning_rate": 3.4192427110710058e-06, + "loss": 0.3245, + "step": 17024 + }, + { + "epoch": 0.79753595352977, + "grad_norm": 0.5790779298218766, + "learning_rate": 3.419066344100682e-06, + "loss": 0.3233, + "step": 17025 + }, + { + "epoch": 0.7975827985196983, + "grad_norm": 0.587306913739114, + "learning_rate": 3.4188899718413626e-06, + "loss": 0.3334, + "step": 17026 + }, + { + "epoch": 0.7976296435096266, + "grad_norm": 0.5549745137940861, + "learning_rate": 3.418713594294063e-06, + "loss": 0.2982, + "step": 17027 + }, + { + "epoch": 0.797676488499555, + "grad_norm": 0.5535380190894807, + "learning_rate": 3.418537211459797e-06, + "loss": 0.3138, + "step": 17028 + }, + { + "epoch": 0.7977233334894833, + "grad_norm": 0.5767355149935813, + "learning_rate": 3.41836082333958e-06, + "loss": 0.3164, + "step": 17029 + }, + { + "epoch": 0.7977701784794117, + "grad_norm": 0.6078804260191407, + "learning_rate": 3.418184429934428e-06, + "loss": 0.3244, + "step": 17030 + }, + { + "epoch": 0.7978170234693399, + "grad_norm": 0.6313147926337186, + "learning_rate": 3.4180080312453555e-06, + "loss": 0.3558, + "step": 17031 + }, + { + "epoch": 0.7978638684592683, + "grad_norm": 0.5635476475306045, + "learning_rate": 3.4178316272733763e-06, + "loss": 0.3186, + "step": 17032 + }, + { + "epoch": 0.7979107134491966, + "grad_norm": 0.5963212642312098, + "learning_rate": 3.417655218019508e-06, + "loss": 0.307, + "step": 17033 + }, + { + "epoch": 0.797957558439125, + "grad_norm": 0.5412885790963132, + "learning_rate": 3.417478803484764e-06, + "loss": 0.2952, + "step": 17034 + }, + { + "epoch": 0.7980044034290532, + "grad_norm": 0.563336059267598, + "learning_rate": 3.4173023836701603e-06, + "loss": 0.3242, + "step": 17035 + }, + { + "epoch": 0.7980512484189816, + "grad_norm": 0.5575423935385628, + "learning_rate": 3.417125958576712e-06, + "loss": 0.2992, + "step": 17036 + }, + { + "epoch": 0.7980980934089099, + "grad_norm": 0.5842218027319768, + "learning_rate": 3.4169495282054334e-06, + "loss": 0.3396, + "step": 17037 + }, + { + "epoch": 0.7981449383988383, + "grad_norm": 0.5573204113079182, + "learning_rate": 3.4167730925573416e-06, + "loss": 0.3195, + "step": 17038 + }, + { + "epoch": 0.7981917833887666, + "grad_norm": 0.5621129971833012, + "learning_rate": 3.4165966516334513e-06, + "loss": 0.3106, + "step": 17039 + }, + { + "epoch": 0.7982386283786949, + "grad_norm": 0.6414798333917209, + "learning_rate": 3.4164202054347773e-06, + "loss": 0.3232, + "step": 17040 + }, + { + "epoch": 0.7982854733686232, + "grad_norm": 0.5852448384798187, + "learning_rate": 3.416243753962335e-06, + "loss": 0.3191, + "step": 17041 + }, + { + "epoch": 0.7983323183585516, + "grad_norm": 0.5999191682242055, + "learning_rate": 3.416067297217141e-06, + "loss": 0.3147, + "step": 17042 + }, + { + "epoch": 0.7983791633484799, + "grad_norm": 0.6116597494211732, + "learning_rate": 3.415890835200209e-06, + "loss": 0.3213, + "step": 17043 + }, + { + "epoch": 0.7984260083384082, + "grad_norm": 0.5906990730760284, + "learning_rate": 3.4157143679125555e-06, + "loss": 0.3091, + "step": 17044 + }, + { + "epoch": 0.7984728533283365, + "grad_norm": 0.5775944258740603, + "learning_rate": 3.415537895355197e-06, + "loss": 0.3065, + "step": 17045 + }, + { + "epoch": 0.7985196983182649, + "grad_norm": 0.6717585484160602, + "learning_rate": 3.415361417529147e-06, + "loss": 0.3464, + "step": 17046 + }, + { + "epoch": 0.7985665433081932, + "grad_norm": 0.6151308006337446, + "learning_rate": 3.4151849344354225e-06, + "loss": 0.3328, + "step": 17047 + }, + { + "epoch": 0.7986133882981216, + "grad_norm": 0.5668276684380555, + "learning_rate": 3.415008446075039e-06, + "loss": 0.3125, + "step": 17048 + }, + { + "epoch": 0.7986602332880498, + "grad_norm": 0.6248293099170764, + "learning_rate": 3.4148319524490115e-06, + "loss": 0.3375, + "step": 17049 + }, + { + "epoch": 0.7987070782779782, + "grad_norm": 0.5628565597478912, + "learning_rate": 3.414655453558356e-06, + "loss": 0.3103, + "step": 17050 + }, + { + "epoch": 0.7987539232679065, + "grad_norm": 0.6077668133624109, + "learning_rate": 3.4144789494040888e-06, + "loss": 0.3231, + "step": 17051 + }, + { + "epoch": 0.7988007682578349, + "grad_norm": 0.5460328012434079, + "learning_rate": 3.414302439987225e-06, + "loss": 0.2953, + "step": 17052 + }, + { + "epoch": 0.7988476132477631, + "grad_norm": 0.5750620957401873, + "learning_rate": 3.4141259253087795e-06, + "loss": 0.3225, + "step": 17053 + }, + { + "epoch": 0.7988944582376915, + "grad_norm": 0.6424345915160088, + "learning_rate": 3.4139494053697697e-06, + "loss": 0.3235, + "step": 17054 + }, + { + "epoch": 0.7989413032276198, + "grad_norm": 0.5795960783369621, + "learning_rate": 3.4137728801712105e-06, + "loss": 0.3265, + "step": 17055 + }, + { + "epoch": 0.7989881482175482, + "grad_norm": 0.5458249220425789, + "learning_rate": 3.413596349714119e-06, + "loss": 0.3122, + "step": 17056 + }, + { + "epoch": 0.7990349932074765, + "grad_norm": 0.5477882291853653, + "learning_rate": 3.4134198139995095e-06, + "loss": 0.3145, + "step": 17057 + }, + { + "epoch": 0.7990818381974047, + "grad_norm": 0.5536381005013848, + "learning_rate": 3.413243273028398e-06, + "loss": 0.3113, + "step": 17058 + }, + { + "epoch": 0.7991286831873331, + "grad_norm": 0.5829807454046095, + "learning_rate": 3.413066726801802e-06, + "loss": 0.3066, + "step": 17059 + }, + { + "epoch": 0.7991755281772615, + "grad_norm": 0.580807955191768, + "learning_rate": 3.4128901753207362e-06, + "loss": 0.3203, + "step": 17060 + }, + { + "epoch": 0.7992223731671898, + "grad_norm": 0.6244354863277788, + "learning_rate": 3.4127136185862163e-06, + "loss": 0.3119, + "step": 17061 + }, + { + "epoch": 0.799269218157118, + "grad_norm": 0.5948104661494776, + "learning_rate": 3.4125370565992594e-06, + "loss": 0.3261, + "step": 17062 + }, + { + "epoch": 0.7993160631470464, + "grad_norm": 0.5538930559875186, + "learning_rate": 3.4123604893608808e-06, + "loss": 0.291, + "step": 17063 + }, + { + "epoch": 0.7993629081369747, + "grad_norm": 0.6048464719950233, + "learning_rate": 3.4121839168720973e-06, + "loss": 0.321, + "step": 17064 + }, + { + "epoch": 0.7994097531269031, + "grad_norm": 0.5635370097289366, + "learning_rate": 3.4120073391339237e-06, + "loss": 0.3198, + "step": 17065 + }, + { + "epoch": 0.7994565981168315, + "grad_norm": 0.5683269191413354, + "learning_rate": 3.4118307561473785e-06, + "loss": 0.3191, + "step": 17066 + }, + { + "epoch": 0.7995034431067597, + "grad_norm": 0.5929994633085689, + "learning_rate": 3.4116541679134756e-06, + "loss": 0.3283, + "step": 17067 + }, + { + "epoch": 0.799550288096688, + "grad_norm": 0.571113968976484, + "learning_rate": 3.411477574433232e-06, + "loss": 0.3071, + "step": 17068 + }, + { + "epoch": 0.7995971330866164, + "grad_norm": 0.6332685082770442, + "learning_rate": 3.411300975707665e-06, + "loss": 0.3192, + "step": 17069 + }, + { + "epoch": 0.7996439780765447, + "grad_norm": 0.6118691666356225, + "learning_rate": 3.411124371737789e-06, + "loss": 0.3451, + "step": 17070 + }, + { + "epoch": 0.799690823066473, + "grad_norm": 0.6118761783712099, + "learning_rate": 3.4109477625246214e-06, + "loss": 0.3177, + "step": 17071 + }, + { + "epoch": 0.7997376680564013, + "grad_norm": 0.6120546695306497, + "learning_rate": 3.4107711480691784e-06, + "loss": 0.3568, + "step": 17072 + }, + { + "epoch": 0.7997845130463297, + "grad_norm": 0.6176434431903544, + "learning_rate": 3.410594528372477e-06, + "loss": 0.3224, + "step": 17073 + }, + { + "epoch": 0.799831358036258, + "grad_norm": 0.5659048173917777, + "learning_rate": 3.4104179034355323e-06, + "loss": 0.3242, + "step": 17074 + }, + { + "epoch": 0.7998782030261864, + "grad_norm": 0.5404757770240457, + "learning_rate": 3.4102412732593616e-06, + "loss": 0.3052, + "step": 17075 + }, + { + "epoch": 0.7999250480161146, + "grad_norm": 0.6305029676283133, + "learning_rate": 3.4100646378449807e-06, + "loss": 0.3091, + "step": 17076 + }, + { + "epoch": 0.799971893006043, + "grad_norm": 0.5826009896206865, + "learning_rate": 3.409887997193407e-06, + "loss": 0.3112, + "step": 17077 + }, + { + "epoch": 0.8000187379959713, + "grad_norm": 0.6237394730994239, + "learning_rate": 3.4097113513056564e-06, + "loss": 0.3286, + "step": 17078 + }, + { + "epoch": 0.8000655829858997, + "grad_norm": 0.629925041465942, + "learning_rate": 3.4095347001827455e-06, + "loss": 0.3024, + "step": 17079 + }, + { + "epoch": 0.8001124279758279, + "grad_norm": 0.6148995255921418, + "learning_rate": 3.4093580438256914e-06, + "loss": 0.3277, + "step": 17080 + }, + { + "epoch": 0.8001592729657563, + "grad_norm": 0.5918698563550401, + "learning_rate": 3.4091813822355102e-06, + "loss": 0.296, + "step": 17081 + }, + { + "epoch": 0.8002061179556846, + "grad_norm": 0.5795322826266175, + "learning_rate": 3.4090047154132184e-06, + "loss": 0.3143, + "step": 17082 + }, + { + "epoch": 0.800252962945613, + "grad_norm": 0.6209772004854075, + "learning_rate": 3.408828043359833e-06, + "loss": 0.3171, + "step": 17083 + }, + { + "epoch": 0.8002998079355413, + "grad_norm": 0.61298707823353, + "learning_rate": 3.408651366076371e-06, + "loss": 0.3413, + "step": 17084 + }, + { + "epoch": 0.8003466529254696, + "grad_norm": 0.5384767128066763, + "learning_rate": 3.408474683563848e-06, + "loss": 0.3052, + "step": 17085 + }, + { + "epoch": 0.8003934979153979, + "grad_norm": 0.5886222124873897, + "learning_rate": 3.4082979958232822e-06, + "loss": 0.3049, + "step": 17086 + }, + { + "epoch": 0.8004403429053263, + "grad_norm": 0.5782701273873166, + "learning_rate": 3.4081213028556896e-06, + "loss": 0.3219, + "step": 17087 + }, + { + "epoch": 0.8004871878952546, + "grad_norm": 0.5941203586499313, + "learning_rate": 3.407944604662088e-06, + "loss": 0.3172, + "step": 17088 + }, + { + "epoch": 0.8005340328851829, + "grad_norm": 0.5789513931032569, + "learning_rate": 3.4077679012434916e-06, + "loss": 0.3323, + "step": 17089 + }, + { + "epoch": 0.8005808778751112, + "grad_norm": 0.5443991364974211, + "learning_rate": 3.4075911926009203e-06, + "loss": 0.3034, + "step": 17090 + }, + { + "epoch": 0.8006277228650396, + "grad_norm": 0.5992030808972486, + "learning_rate": 3.4074144787353898e-06, + "loss": 0.3174, + "step": 17091 + }, + { + "epoch": 0.8006745678549679, + "grad_norm": 0.6209562710121321, + "learning_rate": 3.4072377596479167e-06, + "loss": 0.3279, + "step": 17092 + }, + { + "epoch": 0.8007214128448963, + "grad_norm": 0.569918227201259, + "learning_rate": 3.4070610353395177e-06, + "loss": 0.3197, + "step": 17093 + }, + { + "epoch": 0.8007682578348245, + "grad_norm": 0.635414374347227, + "learning_rate": 3.406884305811212e-06, + "loss": 0.3222, + "step": 17094 + }, + { + "epoch": 0.8008151028247529, + "grad_norm": 0.6523380032792716, + "learning_rate": 3.406707571064014e-06, + "loss": 0.3302, + "step": 17095 + }, + { + "epoch": 0.8008619478146812, + "grad_norm": 0.6549185971620096, + "learning_rate": 3.4065308310989415e-06, + "loss": 0.365, + "step": 17096 + }, + { + "epoch": 0.8009087928046096, + "grad_norm": 0.6985110858926904, + "learning_rate": 3.4063540859170126e-06, + "loss": 0.3272, + "step": 17097 + }, + { + "epoch": 0.8009556377945378, + "grad_norm": 0.6002954164982518, + "learning_rate": 3.4061773355192436e-06, + "loss": 0.3052, + "step": 17098 + }, + { + "epoch": 0.8010024827844662, + "grad_norm": 0.6170841079332453, + "learning_rate": 3.4060005799066515e-06, + "loss": 0.318, + "step": 17099 + }, + { + "epoch": 0.8010493277743945, + "grad_norm": 0.61778917828122, + "learning_rate": 3.405823819080254e-06, + "loss": 0.3506, + "step": 17100 + }, + { + "epoch": 0.8010961727643229, + "grad_norm": 0.5849372806137031, + "learning_rate": 3.4056470530410683e-06, + "loss": 0.313, + "step": 17101 + }, + { + "epoch": 0.8011430177542512, + "grad_norm": 0.589674014433771, + "learning_rate": 3.405470281790111e-06, + "loss": 0.3178, + "step": 17102 + }, + { + "epoch": 0.8011898627441795, + "grad_norm": 0.6103067141270772, + "learning_rate": 3.4052935053284005e-06, + "loss": 0.3471, + "step": 17103 + }, + { + "epoch": 0.8012367077341078, + "grad_norm": 0.557522695508912, + "learning_rate": 3.405116723656953e-06, + "loss": 0.3171, + "step": 17104 + }, + { + "epoch": 0.8012835527240362, + "grad_norm": 0.5716699538942644, + "learning_rate": 3.4049399367767854e-06, + "loss": 0.3087, + "step": 17105 + }, + { + "epoch": 0.8013303977139645, + "grad_norm": 0.5808008729104381, + "learning_rate": 3.4047631446889174e-06, + "loss": 0.3215, + "step": 17106 + }, + { + "epoch": 0.8013772427038928, + "grad_norm": 0.6174922916686181, + "learning_rate": 3.4045863473943643e-06, + "loss": 0.3391, + "step": 17107 + }, + { + "epoch": 0.8014240876938211, + "grad_norm": 0.5548985840911332, + "learning_rate": 3.404409544894144e-06, + "loss": 0.3138, + "step": 17108 + }, + { + "epoch": 0.8014709326837495, + "grad_norm": 0.6108491871348751, + "learning_rate": 3.404232737189274e-06, + "loss": 0.3309, + "step": 17109 + }, + { + "epoch": 0.8015177776736778, + "grad_norm": 0.5824537307822448, + "learning_rate": 3.404055924280772e-06, + "loss": 0.3168, + "step": 17110 + }, + { + "epoch": 0.8015646226636062, + "grad_norm": 0.6050865232695564, + "learning_rate": 3.403879106169655e-06, + "loss": 0.3218, + "step": 17111 + }, + { + "epoch": 0.8016114676535344, + "grad_norm": 0.5978546677673441, + "learning_rate": 3.403702282856942e-06, + "loss": 0.3225, + "step": 17112 + }, + { + "epoch": 0.8016583126434628, + "grad_norm": 0.6096884129136615, + "learning_rate": 3.403525454343648e-06, + "loss": 0.3332, + "step": 17113 + }, + { + "epoch": 0.8017051576333911, + "grad_norm": 0.5875349177076665, + "learning_rate": 3.403348620630793e-06, + "loss": 0.3111, + "step": 17114 + }, + { + "epoch": 0.8017520026233195, + "grad_norm": 0.6111134426111992, + "learning_rate": 3.4031717817193943e-06, + "loss": 0.3269, + "step": 17115 + }, + { + "epoch": 0.8017988476132477, + "grad_norm": 0.6077597834681182, + "learning_rate": 3.402994937610468e-06, + "loss": 0.3202, + "step": 17116 + }, + { + "epoch": 0.8018456926031761, + "grad_norm": 0.5462335073001832, + "learning_rate": 3.402818088305033e-06, + "loss": 0.2957, + "step": 17117 + }, + { + "epoch": 0.8018925375931044, + "grad_norm": 0.5747669361190263, + "learning_rate": 3.4026412338041072e-06, + "loss": 0.2971, + "step": 17118 + }, + { + "epoch": 0.8019393825830328, + "grad_norm": 0.5908005791246171, + "learning_rate": 3.4024643741087075e-06, + "loss": 0.333, + "step": 17119 + }, + { + "epoch": 0.8019862275729611, + "grad_norm": 0.5543877416118305, + "learning_rate": 3.402287509219852e-06, + "loss": 0.3184, + "step": 17120 + }, + { + "epoch": 0.8020330725628894, + "grad_norm": 0.6377443588436692, + "learning_rate": 3.4021106391385595e-06, + "loss": 0.3515, + "step": 17121 + }, + { + "epoch": 0.8020799175528177, + "grad_norm": 0.6168342135176369, + "learning_rate": 3.4019337638658466e-06, + "loss": 0.3344, + "step": 17122 + }, + { + "epoch": 0.8021267625427461, + "grad_norm": 0.6607964595541277, + "learning_rate": 3.4017568834027313e-06, + "loss": 0.3024, + "step": 17123 + }, + { + "epoch": 0.8021736075326744, + "grad_norm": 0.5897281685780013, + "learning_rate": 3.4015799977502323e-06, + "loss": 0.3344, + "step": 17124 + }, + { + "epoch": 0.8022204525226027, + "grad_norm": 0.577316624942551, + "learning_rate": 3.4014031069093667e-06, + "loss": 0.3138, + "step": 17125 + }, + { + "epoch": 0.802267297512531, + "grad_norm": 0.5751960271712041, + "learning_rate": 3.401226210881153e-06, + "loss": 0.3226, + "step": 17126 + }, + { + "epoch": 0.8023141425024594, + "grad_norm": 0.6319006074810071, + "learning_rate": 3.4010493096666087e-06, + "loss": 0.3319, + "step": 17127 + }, + { + "epoch": 0.8023609874923877, + "grad_norm": 0.6335823514795149, + "learning_rate": 3.4008724032667517e-06, + "loss": 0.3362, + "step": 17128 + }, + { + "epoch": 0.8024078324823161, + "grad_norm": 0.5995683892758409, + "learning_rate": 3.400695491682601e-06, + "loss": 0.3123, + "step": 17129 + }, + { + "epoch": 0.8024546774722443, + "grad_norm": 0.57472844140816, + "learning_rate": 3.4005185749151748e-06, + "loss": 0.3166, + "step": 17130 + }, + { + "epoch": 0.8025015224621727, + "grad_norm": 0.5688655863728593, + "learning_rate": 3.4003416529654894e-06, + "loss": 0.2815, + "step": 17131 + }, + { + "epoch": 0.802548367452101, + "grad_norm": 0.6148516385812312, + "learning_rate": 3.4001647258345645e-06, + "loss": 0.3267, + "step": 17132 + }, + { + "epoch": 0.8025952124420294, + "grad_norm": 0.6200637252661556, + "learning_rate": 3.3999877935234182e-06, + "loss": 0.3392, + "step": 17133 + }, + { + "epoch": 0.8026420574319576, + "grad_norm": 0.6511054323773668, + "learning_rate": 3.3998108560330674e-06, + "loss": 0.3586, + "step": 17134 + }, + { + "epoch": 0.802688902421886, + "grad_norm": 0.5461173639001087, + "learning_rate": 3.3996339133645318e-06, + "loss": 0.2843, + "step": 17135 + }, + { + "epoch": 0.8027357474118143, + "grad_norm": 0.5790879373853315, + "learning_rate": 3.3994569655188296e-06, + "loss": 0.2916, + "step": 17136 + }, + { + "epoch": 0.8027825924017427, + "grad_norm": 0.5864178283546437, + "learning_rate": 3.399280012496978e-06, + "loss": 0.3105, + "step": 17137 + }, + { + "epoch": 0.802829437391671, + "grad_norm": 0.6031300687925113, + "learning_rate": 3.399103054299996e-06, + "loss": 0.3116, + "step": 17138 + }, + { + "epoch": 0.8028762823815992, + "grad_norm": 0.5614652319735334, + "learning_rate": 3.3989260909289022e-06, + "loss": 0.3154, + "step": 17139 + }, + { + "epoch": 0.8029231273715276, + "grad_norm": 0.6270079687962368, + "learning_rate": 3.3987491223847146e-06, + "loss": 0.3391, + "step": 17140 + }, + { + "epoch": 0.802969972361456, + "grad_norm": 0.5672580571297232, + "learning_rate": 3.3985721486684514e-06, + "loss": 0.3097, + "step": 17141 + }, + { + "epoch": 0.8030168173513843, + "grad_norm": 0.5677833560488097, + "learning_rate": 3.3983951697811318e-06, + "loss": 0.3009, + "step": 17142 + }, + { + "epoch": 0.8030636623413125, + "grad_norm": 0.6036300560262292, + "learning_rate": 3.398218185723774e-06, + "loss": 0.3124, + "step": 17143 + }, + { + "epoch": 0.8031105073312409, + "grad_norm": 0.6006120868521932, + "learning_rate": 3.3980411964973954e-06, + "loss": 0.3227, + "step": 17144 + }, + { + "epoch": 0.8031573523211692, + "grad_norm": 0.6284642521225086, + "learning_rate": 3.3978642021030158e-06, + "loss": 0.3011, + "step": 17145 + }, + { + "epoch": 0.8032041973110976, + "grad_norm": 0.5676034577848731, + "learning_rate": 3.3976872025416535e-06, + "loss": 0.3083, + "step": 17146 + }, + { + "epoch": 0.803251042301026, + "grad_norm": 0.6140368776284375, + "learning_rate": 3.397510197814327e-06, + "loss": 0.3355, + "step": 17147 + }, + { + "epoch": 0.8032978872909542, + "grad_norm": 0.569741919135034, + "learning_rate": 3.397333187922055e-06, + "loss": 0.3205, + "step": 17148 + }, + { + "epoch": 0.8033447322808825, + "grad_norm": 0.5675743699821187, + "learning_rate": 3.397156172865856e-06, + "loss": 0.2983, + "step": 17149 + }, + { + "epoch": 0.8033915772708109, + "grad_norm": 0.5955787183233984, + "learning_rate": 3.3969791526467486e-06, + "loss": 0.3338, + "step": 17150 + }, + { + "epoch": 0.8034384222607392, + "grad_norm": 0.613745625053364, + "learning_rate": 3.3968021272657515e-06, + "loss": 0.3316, + "step": 17151 + }, + { + "epoch": 0.8034852672506675, + "grad_norm": 0.5689532664488625, + "learning_rate": 3.396625096723884e-06, + "loss": 0.3159, + "step": 17152 + }, + { + "epoch": 0.8035321122405958, + "grad_norm": 0.567935936894977, + "learning_rate": 3.396448061022164e-06, + "loss": 0.3085, + "step": 17153 + }, + { + "epoch": 0.8035789572305242, + "grad_norm": 0.560176041428213, + "learning_rate": 3.3962710201616104e-06, + "loss": 0.3131, + "step": 17154 + }, + { + "epoch": 0.8036258022204525, + "grad_norm": 0.6377782629296677, + "learning_rate": 3.396093974143243e-06, + "loss": 0.3084, + "step": 17155 + }, + { + "epoch": 0.8036726472103809, + "grad_norm": 0.612873624462806, + "learning_rate": 3.39591692296808e-06, + "loss": 0.315, + "step": 17156 + }, + { + "epoch": 0.8037194922003091, + "grad_norm": 0.5579720434965428, + "learning_rate": 3.39573986663714e-06, + "loss": 0.3164, + "step": 17157 + }, + { + "epoch": 0.8037663371902375, + "grad_norm": 0.5425926101356786, + "learning_rate": 3.395562805151443e-06, + "loss": 0.3149, + "step": 17158 + }, + { + "epoch": 0.8038131821801658, + "grad_norm": 0.5898142868639199, + "learning_rate": 3.395385738512006e-06, + "loss": 0.3201, + "step": 17159 + }, + { + "epoch": 0.8038600271700942, + "grad_norm": 0.6700072163985606, + "learning_rate": 3.3952086667198497e-06, + "loss": 0.341, + "step": 17160 + }, + { + "epoch": 0.8039068721600224, + "grad_norm": 0.579025580733767, + "learning_rate": 3.395031589775992e-06, + "loss": 0.2982, + "step": 17161 + }, + { + "epoch": 0.8039537171499508, + "grad_norm": 0.6242812290054713, + "learning_rate": 3.3948545076814534e-06, + "loss": 0.3228, + "step": 17162 + }, + { + "epoch": 0.8040005621398791, + "grad_norm": 0.5557561916572806, + "learning_rate": 3.394677420437251e-06, + "loss": 0.3126, + "step": 17163 + }, + { + "epoch": 0.8040474071298075, + "grad_norm": 0.6667527821987494, + "learning_rate": 3.394500328044406e-06, + "loss": 0.3292, + "step": 17164 + }, + { + "epoch": 0.8040942521197358, + "grad_norm": 0.6174790909785887, + "learning_rate": 3.3943232305039355e-06, + "loss": 0.341, + "step": 17165 + }, + { + "epoch": 0.8041410971096641, + "grad_norm": 0.6358206614681229, + "learning_rate": 3.39414612781686e-06, + "loss": 0.3281, + "step": 17166 + }, + { + "epoch": 0.8041879420995924, + "grad_norm": 0.6507013725693132, + "learning_rate": 3.393969019984198e-06, + "loss": 0.3332, + "step": 17167 + }, + { + "epoch": 0.8042347870895208, + "grad_norm": 0.6011633731633674, + "learning_rate": 3.393791907006969e-06, + "loss": 0.3337, + "step": 17168 + }, + { + "epoch": 0.8042816320794491, + "grad_norm": 0.625599085341678, + "learning_rate": 3.3936147888861924e-06, + "loss": 0.2981, + "step": 17169 + }, + { + "epoch": 0.8043284770693774, + "grad_norm": 0.5613701730722155, + "learning_rate": 3.3934376656228874e-06, + "loss": 0.3034, + "step": 17170 + }, + { + "epoch": 0.8043753220593057, + "grad_norm": 0.5913959966022461, + "learning_rate": 3.3932605372180734e-06, + "loss": 0.3217, + "step": 17171 + }, + { + "epoch": 0.8044221670492341, + "grad_norm": 0.5988961648338988, + "learning_rate": 3.393083403672769e-06, + "loss": 0.3227, + "step": 17172 + }, + { + "epoch": 0.8044690120391624, + "grad_norm": 0.5916514113594171, + "learning_rate": 3.392906264987994e-06, + "loss": 0.3216, + "step": 17173 + }, + { + "epoch": 0.8045158570290908, + "grad_norm": 0.6136405856027396, + "learning_rate": 3.3927291211647685e-06, + "loss": 0.3311, + "step": 17174 + }, + { + "epoch": 0.804562702019019, + "grad_norm": 0.6143909263395849, + "learning_rate": 3.3925519722041106e-06, + "loss": 0.3182, + "step": 17175 + }, + { + "epoch": 0.8046095470089474, + "grad_norm": 0.5957229007571998, + "learning_rate": 3.3923748181070415e-06, + "loss": 0.3116, + "step": 17176 + }, + { + "epoch": 0.8046563919988757, + "grad_norm": 0.6110097712450521, + "learning_rate": 3.392197658874578e-06, + "loss": 0.3401, + "step": 17177 + }, + { + "epoch": 0.8047032369888041, + "grad_norm": 0.6174561925877835, + "learning_rate": 3.3920204945077428e-06, + "loss": 0.326, + "step": 17178 + }, + { + "epoch": 0.8047500819787323, + "grad_norm": 0.651427097020684, + "learning_rate": 3.3918433250075532e-06, + "loss": 0.3356, + "step": 17179 + }, + { + "epoch": 0.8047969269686607, + "grad_norm": 0.6480729429862393, + "learning_rate": 3.3916661503750292e-06, + "loss": 0.3041, + "step": 17180 + }, + { + "epoch": 0.804843771958589, + "grad_norm": 0.597590765887233, + "learning_rate": 3.3914889706111907e-06, + "loss": 0.3342, + "step": 17181 + }, + { + "epoch": 0.8048906169485174, + "grad_norm": 0.5836382233470232, + "learning_rate": 3.3913117857170573e-06, + "loss": 0.3334, + "step": 17182 + }, + { + "epoch": 0.8049374619384457, + "grad_norm": 0.6185721184669699, + "learning_rate": 3.3911345956936487e-06, + "loss": 0.3279, + "step": 17183 + }, + { + "epoch": 0.804984306928374, + "grad_norm": 0.6089851150341156, + "learning_rate": 3.3909574005419836e-06, + "loss": 0.3167, + "step": 17184 + }, + { + "epoch": 0.8050311519183023, + "grad_norm": 0.6432041904116139, + "learning_rate": 3.3907802002630846e-06, + "loss": 0.3363, + "step": 17185 + }, + { + "epoch": 0.8050779969082307, + "grad_norm": 3.1245052973359915, + "learning_rate": 3.3906029948579676e-06, + "loss": 0.316, + "step": 17186 + }, + { + "epoch": 0.805124841898159, + "grad_norm": 0.5970224878362047, + "learning_rate": 3.3904257843276545e-06, + "loss": 0.3108, + "step": 17187 + }, + { + "epoch": 0.8051716868880873, + "grad_norm": 0.6096929432263598, + "learning_rate": 3.3902485686731656e-06, + "loss": 0.3175, + "step": 17188 + }, + { + "epoch": 0.8052185318780156, + "grad_norm": 0.6582486596603243, + "learning_rate": 3.3900713478955195e-06, + "loss": 0.3378, + "step": 17189 + }, + { + "epoch": 0.805265376867944, + "grad_norm": 0.6520662525169593, + "learning_rate": 3.3898941219957365e-06, + "loss": 0.3236, + "step": 17190 + }, + { + "epoch": 0.8053122218578723, + "grad_norm": 0.5931521362527455, + "learning_rate": 3.3897168909748367e-06, + "loss": 0.3161, + "step": 17191 + }, + { + "epoch": 0.8053590668478007, + "grad_norm": 0.5963125452206554, + "learning_rate": 3.38953965483384e-06, + "loss": 0.3281, + "step": 17192 + }, + { + "epoch": 0.8054059118377289, + "grad_norm": 0.595245673547831, + "learning_rate": 3.3893624135737653e-06, + "loss": 0.331, + "step": 17193 + }, + { + "epoch": 0.8054527568276573, + "grad_norm": 0.5789262611249043, + "learning_rate": 3.389185167195634e-06, + "loss": 0.3194, + "step": 17194 + }, + { + "epoch": 0.8054996018175856, + "grad_norm": 0.5679685366639019, + "learning_rate": 3.389007915700465e-06, + "loss": 0.3143, + "step": 17195 + }, + { + "epoch": 0.805546446807514, + "grad_norm": 0.5904639397172078, + "learning_rate": 3.3888306590892794e-06, + "loss": 0.3042, + "step": 17196 + }, + { + "epoch": 0.8055932917974422, + "grad_norm": 0.603799360470938, + "learning_rate": 3.3886533973630977e-06, + "loss": 0.315, + "step": 17197 + }, + { + "epoch": 0.8056401367873706, + "grad_norm": 0.5870592879355719, + "learning_rate": 3.3884761305229372e-06, + "loss": 0.3224, + "step": 17198 + }, + { + "epoch": 0.8056869817772989, + "grad_norm": 0.5841675321676248, + "learning_rate": 3.3882988585698208e-06, + "loss": 0.3174, + "step": 17199 + }, + { + "epoch": 0.8057338267672273, + "grad_norm": 0.6504686691167048, + "learning_rate": 3.388121581504768e-06, + "loss": 0.3319, + "step": 17200 + }, + { + "epoch": 0.8057806717571556, + "grad_norm": 0.595425846233161, + "learning_rate": 3.387944299328798e-06, + "loss": 0.3204, + "step": 17201 + }, + { + "epoch": 0.8058275167470839, + "grad_norm": 0.5962847319608012, + "learning_rate": 3.3877670120429325e-06, + "loss": 0.3254, + "step": 17202 + }, + { + "epoch": 0.8058743617370122, + "grad_norm": 0.6117703026040238, + "learning_rate": 3.387589719648191e-06, + "loss": 0.3484, + "step": 17203 + }, + { + "epoch": 0.8059212067269406, + "grad_norm": 0.6616893135899433, + "learning_rate": 3.387412422145593e-06, + "loss": 0.3459, + "step": 17204 + }, + { + "epoch": 0.8059680517168689, + "grad_norm": 0.5343452466500852, + "learning_rate": 3.3872351195361595e-06, + "loss": 0.3045, + "step": 17205 + }, + { + "epoch": 0.8060148967067972, + "grad_norm": 0.5814618802165982, + "learning_rate": 3.387057811820912e-06, + "loss": 0.3055, + "step": 17206 + }, + { + "epoch": 0.8060617416967255, + "grad_norm": 0.5870552595504981, + "learning_rate": 3.3868804990008684e-06, + "loss": 0.3254, + "step": 17207 + }, + { + "epoch": 0.8061085866866539, + "grad_norm": 0.6210017924938322, + "learning_rate": 3.3867031810770513e-06, + "loss": 0.3266, + "step": 17208 + }, + { + "epoch": 0.8061554316765822, + "grad_norm": 0.5935620127836826, + "learning_rate": 3.38652585805048e-06, + "loss": 0.293, + "step": 17209 + }, + { + "epoch": 0.8062022766665106, + "grad_norm": 0.5214189571124971, + "learning_rate": 3.3863485299221756e-06, + "loss": 0.3054, + "step": 17210 + }, + { + "epoch": 0.8062491216564388, + "grad_norm": 0.5923020864515794, + "learning_rate": 3.3861711966931575e-06, + "loss": 0.3084, + "step": 17211 + }, + { + "epoch": 0.8062959666463672, + "grad_norm": 0.6301561560915871, + "learning_rate": 3.3859938583644467e-06, + "loss": 0.3207, + "step": 17212 + }, + { + "epoch": 0.8063428116362955, + "grad_norm": 0.6067900857303461, + "learning_rate": 3.385816514937065e-06, + "loss": 0.335, + "step": 17213 + }, + { + "epoch": 0.8063896566262239, + "grad_norm": 0.5739824132130337, + "learning_rate": 3.3856391664120314e-06, + "loss": 0.3078, + "step": 17214 + }, + { + "epoch": 0.8064365016161521, + "grad_norm": 0.5921527525542846, + "learning_rate": 3.3854618127903664e-06, + "loss": 0.3175, + "step": 17215 + }, + { + "epoch": 0.8064833466060805, + "grad_norm": 0.6295630230247379, + "learning_rate": 3.3852844540730923e-06, + "loss": 0.3446, + "step": 17216 + }, + { + "epoch": 0.8065301915960088, + "grad_norm": 0.6095336520719038, + "learning_rate": 3.385107090261228e-06, + "loss": 0.3028, + "step": 17217 + }, + { + "epoch": 0.8065770365859372, + "grad_norm": 0.5897460983993164, + "learning_rate": 3.3849297213557946e-06, + "loss": 0.3476, + "step": 17218 + }, + { + "epoch": 0.8066238815758655, + "grad_norm": 0.5811937410320193, + "learning_rate": 3.3847523473578136e-06, + "loss": 0.3074, + "step": 17219 + }, + { + "epoch": 0.8066707265657937, + "grad_norm": 0.5666465365356703, + "learning_rate": 3.3845749682683053e-06, + "loss": 0.3023, + "step": 17220 + }, + { + "epoch": 0.8067175715557221, + "grad_norm": 0.5750653695942557, + "learning_rate": 3.3843975840882903e-06, + "loss": 0.3226, + "step": 17221 + }, + { + "epoch": 0.8067644165456505, + "grad_norm": 0.5727338226647096, + "learning_rate": 3.384220194818789e-06, + "loss": 0.3129, + "step": 17222 + }, + { + "epoch": 0.8068112615355788, + "grad_norm": 0.5994742672224694, + "learning_rate": 3.384042800460824e-06, + "loss": 0.346, + "step": 17223 + }, + { + "epoch": 0.806858106525507, + "grad_norm": 0.5389393352399154, + "learning_rate": 3.3838654010154135e-06, + "loss": 0.3228, + "step": 17224 + }, + { + "epoch": 0.8069049515154354, + "grad_norm": 0.5352749218883986, + "learning_rate": 3.38368799648358e-06, + "loss": 0.3082, + "step": 17225 + }, + { + "epoch": 0.8069517965053637, + "grad_norm": 0.5939658989410069, + "learning_rate": 3.3835105868663444e-06, + "loss": 0.3176, + "step": 17226 + }, + { + "epoch": 0.8069986414952921, + "grad_norm": 0.5706028927740099, + "learning_rate": 3.383333172164728e-06, + "loss": 0.3071, + "step": 17227 + }, + { + "epoch": 0.8070454864852205, + "grad_norm": 0.59090729482902, + "learning_rate": 3.3831557523797508e-06, + "loss": 0.3192, + "step": 17228 + }, + { + "epoch": 0.8070923314751487, + "grad_norm": 0.6003923638440625, + "learning_rate": 3.3829783275124332e-06, + "loss": 0.3259, + "step": 17229 + }, + { + "epoch": 0.807139176465077, + "grad_norm": 0.6077007961017412, + "learning_rate": 3.382800897563799e-06, + "loss": 0.3106, + "step": 17230 + }, + { + "epoch": 0.8071860214550054, + "grad_norm": 0.5683403207665146, + "learning_rate": 3.3826234625348664e-06, + "loss": 0.3197, + "step": 17231 + }, + { + "epoch": 0.8072328664449337, + "grad_norm": 0.590698492327469, + "learning_rate": 3.3824460224266576e-06, + "loss": 0.3123, + "step": 17232 + }, + { + "epoch": 0.807279711434862, + "grad_norm": 0.6179720739922548, + "learning_rate": 3.3822685772401936e-06, + "loss": 0.3293, + "step": 17233 + }, + { + "epoch": 0.8073265564247903, + "grad_norm": 0.5806011009607782, + "learning_rate": 3.3820911269764973e-06, + "loss": 0.3306, + "step": 17234 + }, + { + "epoch": 0.8073734014147187, + "grad_norm": 0.5698948203831112, + "learning_rate": 3.3819136716365862e-06, + "loss": 0.2939, + "step": 17235 + }, + { + "epoch": 0.807420246404647, + "grad_norm": 0.6335482170608927, + "learning_rate": 3.3817362112214846e-06, + "loss": 0.3181, + "step": 17236 + }, + { + "epoch": 0.8074670913945754, + "grad_norm": 0.5787952938322124, + "learning_rate": 3.3815587457322122e-06, + "loss": 0.3121, + "step": 17237 + }, + { + "epoch": 0.8075139363845036, + "grad_norm": 0.5793991892385398, + "learning_rate": 3.3813812751697914e-06, + "loss": 0.3005, + "step": 17238 + }, + { + "epoch": 0.807560781374432, + "grad_norm": 0.5681229312003576, + "learning_rate": 3.3812037995352425e-06, + "loss": 0.3151, + "step": 17239 + }, + { + "epoch": 0.8076076263643603, + "grad_norm": 0.6754082337320759, + "learning_rate": 3.3810263188295877e-06, + "loss": 0.3429, + "step": 17240 + }, + { + "epoch": 0.8076544713542887, + "grad_norm": 0.6248877756762603, + "learning_rate": 3.380848833053848e-06, + "loss": 0.3442, + "step": 17241 + }, + { + "epoch": 0.8077013163442169, + "grad_norm": 0.6251300822497108, + "learning_rate": 3.3806713422090436e-06, + "loss": 0.3185, + "step": 17242 + }, + { + "epoch": 0.8077481613341453, + "grad_norm": 0.5898428947854926, + "learning_rate": 3.3804938462961977e-06, + "loss": 0.3096, + "step": 17243 + }, + { + "epoch": 0.8077950063240736, + "grad_norm": 0.5962890222943185, + "learning_rate": 3.380316345316331e-06, + "loss": 0.3119, + "step": 17244 + }, + { + "epoch": 0.807841851314002, + "grad_norm": 0.6646807973633322, + "learning_rate": 3.380138839270465e-06, + "loss": 0.3537, + "step": 17245 + }, + { + "epoch": 0.8078886963039303, + "grad_norm": 0.6153444272723124, + "learning_rate": 3.379961328159621e-06, + "loss": 0.3421, + "step": 17246 + }, + { + "epoch": 0.8079355412938586, + "grad_norm": 0.6145474735538662, + "learning_rate": 3.3797838119848203e-06, + "loss": 0.3337, + "step": 17247 + }, + { + "epoch": 0.8079823862837869, + "grad_norm": 0.5653051607212559, + "learning_rate": 3.3796062907470856e-06, + "loss": 0.3245, + "step": 17248 + }, + { + "epoch": 0.8080292312737153, + "grad_norm": 0.5960015619613874, + "learning_rate": 3.379428764447438e-06, + "loss": 0.3192, + "step": 17249 + }, + { + "epoch": 0.8080760762636436, + "grad_norm": 0.5889691955072082, + "learning_rate": 3.379251233086898e-06, + "loss": 0.3047, + "step": 17250 + }, + { + "epoch": 0.8081229212535719, + "grad_norm": 0.6539989509749043, + "learning_rate": 3.379073696666489e-06, + "loss": 0.3176, + "step": 17251 + }, + { + "epoch": 0.8081697662435002, + "grad_norm": 0.5883922595681689, + "learning_rate": 3.3788961551872312e-06, + "loss": 0.3007, + "step": 17252 + }, + { + "epoch": 0.8082166112334286, + "grad_norm": 0.6025910221309095, + "learning_rate": 3.378718608650147e-06, + "loss": 0.316, + "step": 17253 + }, + { + "epoch": 0.8082634562233569, + "grad_norm": 0.6019689920579675, + "learning_rate": 3.3785410570562583e-06, + "loss": 0.3165, + "step": 17254 + }, + { + "epoch": 0.8083103012132853, + "grad_norm": 0.6260940466443945, + "learning_rate": 3.3783635004065875e-06, + "loss": 0.3404, + "step": 17255 + }, + { + "epoch": 0.8083571462032135, + "grad_norm": 0.6203717168750504, + "learning_rate": 3.3781859387021536e-06, + "loss": 0.3162, + "step": 17256 + }, + { + "epoch": 0.8084039911931419, + "grad_norm": 0.5810093380323956, + "learning_rate": 3.378008371943981e-06, + "loss": 0.2968, + "step": 17257 + }, + { + "epoch": 0.8084508361830702, + "grad_norm": 0.540222037790054, + "learning_rate": 3.3778308001330917e-06, + "loss": 0.3107, + "step": 17258 + }, + { + "epoch": 0.8084976811729986, + "grad_norm": 0.566731381355941, + "learning_rate": 3.377653223270506e-06, + "loss": 0.3116, + "step": 17259 + }, + { + "epoch": 0.8085445261629268, + "grad_norm": 0.639262306492617, + "learning_rate": 3.3774756413572466e-06, + "loss": 0.3305, + "step": 17260 + }, + { + "epoch": 0.8085913711528552, + "grad_norm": 0.6449378365318849, + "learning_rate": 3.3772980543943364e-06, + "loss": 0.3158, + "step": 17261 + }, + { + "epoch": 0.8086382161427835, + "grad_norm": 0.6347401444617093, + "learning_rate": 3.377120462382796e-06, + "loss": 0.3237, + "step": 17262 + }, + { + "epoch": 0.8086850611327119, + "grad_norm": 0.5922866448473502, + "learning_rate": 3.376942865323647e-06, + "loss": 0.3382, + "step": 17263 + }, + { + "epoch": 0.8087319061226402, + "grad_norm": 0.5547451919051032, + "learning_rate": 3.3767652632179127e-06, + "loss": 0.2864, + "step": 17264 + }, + { + "epoch": 0.8087787511125685, + "grad_norm": 0.5620128918768401, + "learning_rate": 3.3765876560666146e-06, + "loss": 0.3034, + "step": 17265 + }, + { + "epoch": 0.8088255961024968, + "grad_norm": 0.537545434309204, + "learning_rate": 3.3764100438707754e-06, + "loss": 0.2979, + "step": 17266 + }, + { + "epoch": 0.8088724410924252, + "grad_norm": 0.5456057977635068, + "learning_rate": 3.376232426631416e-06, + "loss": 0.29, + "step": 17267 + }, + { + "epoch": 0.8089192860823535, + "grad_norm": 0.6255074925796631, + "learning_rate": 3.3760548043495596e-06, + "loss": 0.3493, + "step": 17268 + }, + { + "epoch": 0.8089661310722818, + "grad_norm": 0.5597735794418678, + "learning_rate": 3.375877177026228e-06, + "loss": 0.3235, + "step": 17269 + }, + { + "epoch": 0.8090129760622101, + "grad_norm": 0.5471648959610291, + "learning_rate": 3.375699544662443e-06, + "loss": 0.3173, + "step": 17270 + }, + { + "epoch": 0.8090598210521385, + "grad_norm": 0.5662590771593011, + "learning_rate": 3.3755219072592273e-06, + "loss": 0.3182, + "step": 17271 + }, + { + "epoch": 0.8091066660420668, + "grad_norm": 0.543958777210564, + "learning_rate": 3.375344264817604e-06, + "loss": 0.304, + "step": 17272 + }, + { + "epoch": 0.8091535110319952, + "grad_norm": 0.5967978060058118, + "learning_rate": 3.375166617338593e-06, + "loss": 0.2976, + "step": 17273 + }, + { + "epoch": 0.8092003560219234, + "grad_norm": 0.7732887907065963, + "learning_rate": 3.3749889648232187e-06, + "loss": 0.3166, + "step": 17274 + }, + { + "epoch": 0.8092472010118518, + "grad_norm": 0.671492729828172, + "learning_rate": 3.374811307272503e-06, + "loss": 0.3178, + "step": 17275 + }, + { + "epoch": 0.8092940460017801, + "grad_norm": 0.5910367779094358, + "learning_rate": 3.3746336446874684e-06, + "loss": 0.3052, + "step": 17276 + }, + { + "epoch": 0.8093408909917085, + "grad_norm": 0.5898614098504739, + "learning_rate": 3.3744559770691364e-06, + "loss": 0.3235, + "step": 17277 + }, + { + "epoch": 0.8093877359816367, + "grad_norm": 0.6029576264473466, + "learning_rate": 3.37427830441853e-06, + "loss": 0.3167, + "step": 17278 + }, + { + "epoch": 0.8094345809715651, + "grad_norm": 0.6051545066530998, + "learning_rate": 3.374100626736672e-06, + "loss": 0.3454, + "step": 17279 + }, + { + "epoch": 0.8094814259614934, + "grad_norm": 0.6268843261527943, + "learning_rate": 3.3739229440245845e-06, + "loss": 0.3085, + "step": 17280 + }, + { + "epoch": 0.8095282709514218, + "grad_norm": 0.5739689660121615, + "learning_rate": 3.37374525628329e-06, + "loss": 0.3133, + "step": 17281 + }, + { + "epoch": 0.8095751159413501, + "grad_norm": 0.5844609462420988, + "learning_rate": 3.373567563513811e-06, + "loss": 0.3173, + "step": 17282 + }, + { + "epoch": 0.8096219609312784, + "grad_norm": 0.6019064959892816, + "learning_rate": 3.3733898657171715e-06, + "loss": 0.3151, + "step": 17283 + }, + { + "epoch": 0.8096688059212067, + "grad_norm": 0.5742784789554604, + "learning_rate": 3.3732121628943914e-06, + "loss": 0.3293, + "step": 17284 + }, + { + "epoch": 0.8097156509111351, + "grad_norm": 0.5920305692647843, + "learning_rate": 3.3730344550464948e-06, + "loss": 0.3431, + "step": 17285 + }, + { + "epoch": 0.8097624959010634, + "grad_norm": 0.599351742561868, + "learning_rate": 3.3728567421745052e-06, + "loss": 0.3207, + "step": 17286 + }, + { + "epoch": 0.8098093408909917, + "grad_norm": 0.595576429315094, + "learning_rate": 3.3726790242794443e-06, + "loss": 0.3218, + "step": 17287 + }, + { + "epoch": 0.80985618588092, + "grad_norm": 0.585857175399497, + "learning_rate": 3.3725013013623343e-06, + "loss": 0.3121, + "step": 17288 + }, + { + "epoch": 0.8099030308708484, + "grad_norm": 0.5591993356634475, + "learning_rate": 3.3723235734241993e-06, + "loss": 0.3086, + "step": 17289 + }, + { + "epoch": 0.8099498758607767, + "grad_norm": 0.6167296531162157, + "learning_rate": 3.3721458404660614e-06, + "loss": 0.3298, + "step": 17290 + }, + { + "epoch": 0.8099967208507051, + "grad_norm": 0.5791258925708104, + "learning_rate": 3.3719681024889428e-06, + "loss": 0.3021, + "step": 17291 + }, + { + "epoch": 0.8100435658406333, + "grad_norm": 0.5722037191562466, + "learning_rate": 3.371790359493867e-06, + "loss": 0.3078, + "step": 17292 + }, + { + "epoch": 0.8100904108305617, + "grad_norm": 0.5914715753979982, + "learning_rate": 3.3716126114818577e-06, + "loss": 0.3137, + "step": 17293 + }, + { + "epoch": 0.81013725582049, + "grad_norm": 0.6316612965793545, + "learning_rate": 3.371434858453936e-06, + "loss": 0.3101, + "step": 17294 + }, + { + "epoch": 0.8101841008104184, + "grad_norm": 0.6249151052817585, + "learning_rate": 3.371257100411126e-06, + "loss": 0.3185, + "step": 17295 + }, + { + "epoch": 0.8102309458003466, + "grad_norm": 0.6141135432658896, + "learning_rate": 3.37107933735445e-06, + "loss": 0.3313, + "step": 17296 + }, + { + "epoch": 0.810277790790275, + "grad_norm": 0.5930665400761956, + "learning_rate": 3.3709015692849316e-06, + "loss": 0.3329, + "step": 17297 + }, + { + "epoch": 0.8103246357802033, + "grad_norm": 0.6094646215196644, + "learning_rate": 3.370723796203594e-06, + "loss": 0.3083, + "step": 17298 + }, + { + "epoch": 0.8103714807701317, + "grad_norm": 0.6149785318832524, + "learning_rate": 3.370546018111459e-06, + "loss": 0.324, + "step": 17299 + }, + { + "epoch": 0.81041832576006, + "grad_norm": 0.5812938285824879, + "learning_rate": 3.370368235009551e-06, + "loss": 0.3321, + "step": 17300 + }, + { + "epoch": 0.8104651707499883, + "grad_norm": 0.6080653304268073, + "learning_rate": 3.3701904468988928e-06, + "loss": 0.3207, + "step": 17301 + }, + { + "epoch": 0.8105120157399166, + "grad_norm": 0.5904128560579753, + "learning_rate": 3.3700126537805065e-06, + "loss": 0.3257, + "step": 17302 + }, + { + "epoch": 0.810558860729845, + "grad_norm": 0.6464214656859432, + "learning_rate": 3.369834855655416e-06, + "loss": 0.3358, + "step": 17303 + }, + { + "epoch": 0.8106057057197733, + "grad_norm": 0.5831741095620913, + "learning_rate": 3.3696570525246456e-06, + "loss": 0.317, + "step": 17304 + }, + { + "epoch": 0.8106525507097015, + "grad_norm": 0.5759046106120425, + "learning_rate": 3.3694792443892165e-06, + "loss": 0.3171, + "step": 17305 + }, + { + "epoch": 0.8106993956996299, + "grad_norm": 0.6310405123733139, + "learning_rate": 3.3693014312501533e-06, + "loss": 0.3227, + "step": 17306 + }, + { + "epoch": 0.8107462406895583, + "grad_norm": 0.6380102734936787, + "learning_rate": 3.3691236131084787e-06, + "loss": 0.3445, + "step": 17307 + }, + { + "epoch": 0.8107930856794866, + "grad_norm": 0.5604933633769331, + "learning_rate": 3.3689457899652165e-06, + "loss": 0.3098, + "step": 17308 + }, + { + "epoch": 0.810839930669415, + "grad_norm": 0.6124819194961583, + "learning_rate": 3.3687679618213894e-06, + "loss": 0.3436, + "step": 17309 + }, + { + "epoch": 0.8108867756593432, + "grad_norm": 0.608768425155536, + "learning_rate": 3.368590128678021e-06, + "loss": 0.3142, + "step": 17310 + }, + { + "epoch": 0.8109336206492715, + "grad_norm": 0.5734742661887002, + "learning_rate": 3.368412290536135e-06, + "loss": 0.3052, + "step": 17311 + }, + { + "epoch": 0.8109804656391999, + "grad_norm": 0.6043693307875865, + "learning_rate": 3.3682344473967544e-06, + "loss": 0.3401, + "step": 17312 + }, + { + "epoch": 0.8110273106291283, + "grad_norm": 0.6405143199562469, + "learning_rate": 3.368056599260903e-06, + "loss": 0.3258, + "step": 17313 + }, + { + "epoch": 0.8110741556190565, + "grad_norm": 0.6193057877015572, + "learning_rate": 3.367878746129604e-06, + "loss": 0.3173, + "step": 17314 + }, + { + "epoch": 0.8111210006089848, + "grad_norm": 0.5819997389554687, + "learning_rate": 3.3677008880038796e-06, + "loss": 0.3176, + "step": 17315 + }, + { + "epoch": 0.8111678455989132, + "grad_norm": 0.6019308553220987, + "learning_rate": 3.3675230248847564e-06, + "loss": 0.3056, + "step": 17316 + }, + { + "epoch": 0.8112146905888415, + "grad_norm": 0.6777871845198874, + "learning_rate": 3.367345156773255e-06, + "loss": 0.3246, + "step": 17317 + }, + { + "epoch": 0.8112615355787699, + "grad_norm": 0.6094761023169945, + "learning_rate": 3.3671672836704013e-06, + "loss": 0.319, + "step": 17318 + }, + { + "epoch": 0.8113083805686981, + "grad_norm": 0.5681674399824471, + "learning_rate": 3.3669894055772175e-06, + "loss": 0.32, + "step": 17319 + }, + { + "epoch": 0.8113552255586265, + "grad_norm": 0.6166020853782805, + "learning_rate": 3.3668115224947267e-06, + "loss": 0.3351, + "step": 17320 + }, + { + "epoch": 0.8114020705485548, + "grad_norm": 0.6401609114634655, + "learning_rate": 3.3666336344239547e-06, + "loss": 0.3447, + "step": 17321 + }, + { + "epoch": 0.8114489155384832, + "grad_norm": 0.586229805367424, + "learning_rate": 3.366455741365924e-06, + "loss": 0.3112, + "step": 17322 + }, + { + "epoch": 0.8114957605284114, + "grad_norm": 0.5935985594129058, + "learning_rate": 3.366277843321657e-06, + "loss": 0.3465, + "step": 17323 + }, + { + "epoch": 0.8115426055183398, + "grad_norm": 0.5790961090312758, + "learning_rate": 3.366099940292179e-06, + "loss": 0.3096, + "step": 17324 + }, + { + "epoch": 0.8115894505082681, + "grad_norm": 0.5735363462784066, + "learning_rate": 3.3659220322785147e-06, + "loss": 0.3091, + "step": 17325 + }, + { + "epoch": 0.8116362954981965, + "grad_norm": 0.57378175673451, + "learning_rate": 3.3657441192816858e-06, + "loss": 0.3039, + "step": 17326 + }, + { + "epoch": 0.8116831404881248, + "grad_norm": 0.6073908564790188, + "learning_rate": 3.365566201302717e-06, + "loss": 0.3214, + "step": 17327 + }, + { + "epoch": 0.8117299854780531, + "grad_norm": 0.5907958397066021, + "learning_rate": 3.365388278342633e-06, + "loss": 0.3221, + "step": 17328 + }, + { + "epoch": 0.8117768304679814, + "grad_norm": 0.5744582741398543, + "learning_rate": 3.3652103504024567e-06, + "loss": 0.3059, + "step": 17329 + }, + { + "epoch": 0.8118236754579098, + "grad_norm": 0.5704605567217023, + "learning_rate": 3.3650324174832117e-06, + "loss": 0.3149, + "step": 17330 + }, + { + "epoch": 0.8118705204478381, + "grad_norm": 0.5809811562484064, + "learning_rate": 3.3648544795859235e-06, + "loss": 0.3165, + "step": 17331 + }, + { + "epoch": 0.8119173654377664, + "grad_norm": 0.5518998812029946, + "learning_rate": 3.3646765367116146e-06, + "loss": 0.3116, + "step": 17332 + }, + { + "epoch": 0.8119642104276947, + "grad_norm": 0.5999895720519807, + "learning_rate": 3.36449858886131e-06, + "loss": 0.3225, + "step": 17333 + }, + { + "epoch": 0.8120110554176231, + "grad_norm": 0.5783295107967091, + "learning_rate": 3.3643206360360324e-06, + "loss": 0.3129, + "step": 17334 + }, + { + "epoch": 0.8120579004075514, + "grad_norm": 0.5851011114361704, + "learning_rate": 3.364142678236807e-06, + "loss": 0.3259, + "step": 17335 + }, + { + "epoch": 0.8121047453974798, + "grad_norm": 0.635891259097742, + "learning_rate": 3.363964715464658e-06, + "loss": 0.3052, + "step": 17336 + }, + { + "epoch": 0.812151590387408, + "grad_norm": 0.5842289305719323, + "learning_rate": 3.3637867477206097e-06, + "loss": 0.3019, + "step": 17337 + }, + { + "epoch": 0.8121984353773364, + "grad_norm": 0.5888273639079411, + "learning_rate": 3.363608775005685e-06, + "loss": 0.3138, + "step": 17338 + }, + { + "epoch": 0.8122452803672647, + "grad_norm": 0.6134539784250301, + "learning_rate": 3.363430797320909e-06, + "loss": 0.307, + "step": 17339 + }, + { + "epoch": 0.8122921253571931, + "grad_norm": 0.5882659855846514, + "learning_rate": 3.3632528146673067e-06, + "loss": 0.2922, + "step": 17340 + }, + { + "epoch": 0.8123389703471213, + "grad_norm": 0.5891884821383054, + "learning_rate": 3.3630748270459e-06, + "loss": 0.336, + "step": 17341 + }, + { + "epoch": 0.8123858153370497, + "grad_norm": 0.5737945002236573, + "learning_rate": 3.3628968344577156e-06, + "loss": 0.3131, + "step": 17342 + }, + { + "epoch": 0.812432660326978, + "grad_norm": 0.5427793242486013, + "learning_rate": 3.3627188369037767e-06, + "loss": 0.2956, + "step": 17343 + }, + { + "epoch": 0.8124795053169064, + "grad_norm": 0.5586484320935226, + "learning_rate": 3.362540834385107e-06, + "loss": 0.3177, + "step": 17344 + }, + { + "epoch": 0.8125263503068347, + "grad_norm": 0.6322376610256671, + "learning_rate": 3.362362826902732e-06, + "loss": 0.3218, + "step": 17345 + }, + { + "epoch": 0.812573195296763, + "grad_norm": 0.6507317758588446, + "learning_rate": 3.3621848144576764e-06, + "loss": 0.3311, + "step": 17346 + }, + { + "epoch": 0.8126200402866913, + "grad_norm": 0.6192640913399907, + "learning_rate": 3.3620067970509627e-06, + "loss": 0.333, + "step": 17347 + }, + { + "epoch": 0.8126668852766197, + "grad_norm": 0.6333800904761785, + "learning_rate": 3.3618287746836163e-06, + "loss": 0.3379, + "step": 17348 + }, + { + "epoch": 0.812713730266548, + "grad_norm": 0.6069524687348303, + "learning_rate": 3.361650747356663e-06, + "loss": 0.3082, + "step": 17349 + }, + { + "epoch": 0.8127605752564763, + "grad_norm": 0.6135265415362412, + "learning_rate": 3.3614727150711264e-06, + "loss": 0.3074, + "step": 17350 + }, + { + "epoch": 0.8128074202464046, + "grad_norm": 0.5505357894775746, + "learning_rate": 3.3612946778280297e-06, + "loss": 0.3098, + "step": 17351 + }, + { + "epoch": 0.812854265236333, + "grad_norm": 0.6018103606564187, + "learning_rate": 3.361116635628399e-06, + "loss": 0.3166, + "step": 17352 + }, + { + "epoch": 0.8129011102262613, + "grad_norm": 0.5669070709254204, + "learning_rate": 3.3609385884732594e-06, + "loss": 0.3144, + "step": 17353 + }, + { + "epoch": 0.8129479552161897, + "grad_norm": 0.5836872335162228, + "learning_rate": 3.3607605363636334e-06, + "loss": 0.3145, + "step": 17354 + }, + { + "epoch": 0.8129948002061179, + "grad_norm": 0.6126715091773157, + "learning_rate": 3.3605824793005478e-06, + "loss": 0.3208, + "step": 17355 + }, + { + "epoch": 0.8130416451960463, + "grad_norm": 0.5971671348325791, + "learning_rate": 3.360404417285026e-06, + "loss": 0.3159, + "step": 17356 + }, + { + "epoch": 0.8130884901859746, + "grad_norm": 0.6089838437590955, + "learning_rate": 3.360226350318093e-06, + "loss": 0.3435, + "step": 17357 + }, + { + "epoch": 0.813135335175903, + "grad_norm": 0.5568767379625582, + "learning_rate": 3.3600482784007732e-06, + "loss": 0.3035, + "step": 17358 + }, + { + "epoch": 0.8131821801658312, + "grad_norm": 0.5679164547890339, + "learning_rate": 3.3598702015340924e-06, + "loss": 0.3023, + "step": 17359 + }, + { + "epoch": 0.8132290251557596, + "grad_norm": 0.568594249502136, + "learning_rate": 3.3596921197190747e-06, + "loss": 0.3231, + "step": 17360 + }, + { + "epoch": 0.8132758701456879, + "grad_norm": 0.6471808582307402, + "learning_rate": 3.359514032956744e-06, + "loss": 0.3322, + "step": 17361 + }, + { + "epoch": 0.8133227151356163, + "grad_norm": 0.6273185207975756, + "learning_rate": 3.359335941248127e-06, + "loss": 0.3057, + "step": 17362 + }, + { + "epoch": 0.8133695601255446, + "grad_norm": 0.5408374264166083, + "learning_rate": 3.3591578445942473e-06, + "loss": 0.3115, + "step": 17363 + }, + { + "epoch": 0.8134164051154729, + "grad_norm": 0.583368605822333, + "learning_rate": 3.35897974299613e-06, + "loss": 0.3177, + "step": 17364 + }, + { + "epoch": 0.8134632501054012, + "grad_norm": 0.5919867628439838, + "learning_rate": 3.3588016364548003e-06, + "loss": 0.3209, + "step": 17365 + }, + { + "epoch": 0.8135100950953296, + "grad_norm": 0.5953463389621394, + "learning_rate": 3.358623524971283e-06, + "loss": 0.3169, + "step": 17366 + }, + { + "epoch": 0.8135569400852579, + "grad_norm": 0.5808800831789777, + "learning_rate": 3.3584454085466034e-06, + "loss": 0.327, + "step": 17367 + }, + { + "epoch": 0.8136037850751862, + "grad_norm": 0.5758952932753246, + "learning_rate": 3.358267287181786e-06, + "loss": 0.3117, + "step": 17368 + }, + { + "epoch": 0.8136506300651145, + "grad_norm": 0.6327930951416041, + "learning_rate": 3.3580891608778558e-06, + "loss": 0.3425, + "step": 17369 + }, + { + "epoch": 0.8136974750550429, + "grad_norm": 0.5527646478535945, + "learning_rate": 3.3579110296358386e-06, + "loss": 0.3265, + "step": 17370 + }, + { + "epoch": 0.8137443200449712, + "grad_norm": 0.5858190469688596, + "learning_rate": 3.3577328934567594e-06, + "loss": 0.2974, + "step": 17371 + }, + { + "epoch": 0.8137911650348996, + "grad_norm": 0.5934032322838556, + "learning_rate": 3.357554752341642e-06, + "loss": 0.3186, + "step": 17372 + }, + { + "epoch": 0.8138380100248278, + "grad_norm": 0.5989930159404246, + "learning_rate": 3.3573766062915126e-06, + "loss": 0.3372, + "step": 17373 + }, + { + "epoch": 0.8138848550147562, + "grad_norm": 0.5772650399201273, + "learning_rate": 3.357198455307398e-06, + "loss": 0.3216, + "step": 17374 + }, + { + "epoch": 0.8139317000046845, + "grad_norm": 0.5443095021440284, + "learning_rate": 3.3570202993903202e-06, + "loss": 0.3135, + "step": 17375 + }, + { + "epoch": 0.8139785449946129, + "grad_norm": 0.6161415839666502, + "learning_rate": 3.3568421385413053e-06, + "loss": 0.3163, + "step": 17376 + }, + { + "epoch": 0.8140253899845411, + "grad_norm": 0.5671329371662597, + "learning_rate": 3.3566639727613803e-06, + "loss": 0.3028, + "step": 17377 + }, + { + "epoch": 0.8140722349744695, + "grad_norm": 0.5767697308470068, + "learning_rate": 3.3564858020515703e-06, + "loss": 0.3131, + "step": 17378 + }, + { + "epoch": 0.8141190799643978, + "grad_norm": 0.5904064518226285, + "learning_rate": 3.356307626412898e-06, + "loss": 0.3183, + "step": 17379 + }, + { + "epoch": 0.8141659249543262, + "grad_norm": 0.6021949489795295, + "learning_rate": 3.3561294458463917e-06, + "loss": 0.3134, + "step": 17380 + }, + { + "epoch": 0.8142127699442545, + "grad_norm": 0.6172727794938433, + "learning_rate": 3.3559512603530755e-06, + "loss": 0.3296, + "step": 17381 + }, + { + "epoch": 0.8142596149341828, + "grad_norm": 0.6081429357165508, + "learning_rate": 3.3557730699339743e-06, + "loss": 0.3335, + "step": 17382 + }, + { + "epoch": 0.8143064599241111, + "grad_norm": 0.6260899172542252, + "learning_rate": 3.355594874590115e-06, + "loss": 0.3239, + "step": 17383 + }, + { + "epoch": 0.8143533049140395, + "grad_norm": 0.5945692426498533, + "learning_rate": 3.355416674322522e-06, + "loss": 0.3268, + "step": 17384 + }, + { + "epoch": 0.8144001499039678, + "grad_norm": 0.5678460477201165, + "learning_rate": 3.3552384691322203e-06, + "loss": 0.2965, + "step": 17385 + }, + { + "epoch": 0.814446994893896, + "grad_norm": 0.5748504129716365, + "learning_rate": 3.3550602590202375e-06, + "loss": 0.3008, + "step": 17386 + }, + { + "epoch": 0.8144938398838244, + "grad_norm": 0.5690658730621982, + "learning_rate": 3.3548820439875964e-06, + "loss": 0.3084, + "step": 17387 + }, + { + "epoch": 0.8145406848737528, + "grad_norm": 0.6214955953422556, + "learning_rate": 3.354703824035325e-06, + "loss": 0.3387, + "step": 17388 + }, + { + "epoch": 0.8145875298636811, + "grad_norm": 0.6268579278169669, + "learning_rate": 3.3545255991644477e-06, + "loss": 0.3152, + "step": 17389 + }, + { + "epoch": 0.8146343748536095, + "grad_norm": 0.62675095706936, + "learning_rate": 3.35434736937599e-06, + "loss": 0.3288, + "step": 17390 + }, + { + "epoch": 0.8146812198435377, + "grad_norm": 0.616074802064109, + "learning_rate": 3.354169134670978e-06, + "loss": 0.3264, + "step": 17391 + }, + { + "epoch": 0.814728064833466, + "grad_norm": 0.5917293846562439, + "learning_rate": 3.353990895050438e-06, + "loss": 0.3046, + "step": 17392 + }, + { + "epoch": 0.8147749098233944, + "grad_norm": 0.5508005392776459, + "learning_rate": 3.3538126505153945e-06, + "loss": 0.3097, + "step": 17393 + }, + { + "epoch": 0.8148217548133228, + "grad_norm": 0.5897841450380656, + "learning_rate": 3.353634401066873e-06, + "loss": 0.3326, + "step": 17394 + }, + { + "epoch": 0.814868599803251, + "grad_norm": 0.5982687172802085, + "learning_rate": 3.3534561467059017e-06, + "loss": 0.3288, + "step": 17395 + }, + { + "epoch": 0.8149154447931793, + "grad_norm": 0.5746808228321002, + "learning_rate": 3.3532778874335035e-06, + "loss": 0.3074, + "step": 17396 + }, + { + "epoch": 0.8149622897831077, + "grad_norm": 0.6123777140992778, + "learning_rate": 3.3530996232507062e-06, + "loss": 0.3311, + "step": 17397 + }, + { + "epoch": 0.815009134773036, + "grad_norm": 0.5992057394184537, + "learning_rate": 3.3529213541585348e-06, + "loss": 0.307, + "step": 17398 + }, + { + "epoch": 0.8150559797629644, + "grad_norm": 0.5882483810787841, + "learning_rate": 3.352743080158016e-06, + "loss": 0.2953, + "step": 17399 + }, + { + "epoch": 0.8151028247528926, + "grad_norm": 0.5793100485498981, + "learning_rate": 3.3525648012501737e-06, + "loss": 0.3215, + "step": 17400 + }, + { + "epoch": 0.815149669742821, + "grad_norm": 0.5850440516263532, + "learning_rate": 3.352386517436036e-06, + "loss": 0.309, + "step": 17401 + }, + { + "epoch": 0.8151965147327493, + "grad_norm": 0.6044800922664849, + "learning_rate": 3.3522082287166285e-06, + "loss": 0.3207, + "step": 17402 + }, + { + "epoch": 0.8152433597226777, + "grad_norm": 0.5965809490945424, + "learning_rate": 3.3520299350929764e-06, + "loss": 0.3246, + "step": 17403 + }, + { + "epoch": 0.8152902047126059, + "grad_norm": 0.5969990107858973, + "learning_rate": 3.3518516365661064e-06, + "loss": 0.3139, + "step": 17404 + }, + { + "epoch": 0.8153370497025343, + "grad_norm": 0.6775948274756187, + "learning_rate": 3.351673333137044e-06, + "loss": 0.3265, + "step": 17405 + }, + { + "epoch": 0.8153838946924626, + "grad_norm": 0.5835377688495043, + "learning_rate": 3.3514950248068156e-06, + "loss": 0.319, + "step": 17406 + }, + { + "epoch": 0.815430739682391, + "grad_norm": 0.5842047329810716, + "learning_rate": 3.3513167115764476e-06, + "loss": 0.3361, + "step": 17407 + }, + { + "epoch": 0.8154775846723193, + "grad_norm": 0.519635639398575, + "learning_rate": 3.351138393446966e-06, + "loss": 0.2944, + "step": 17408 + }, + { + "epoch": 0.8155244296622476, + "grad_norm": 0.5445007563611988, + "learning_rate": 3.350960070419397e-06, + "loss": 0.3309, + "step": 17409 + }, + { + "epoch": 0.8155712746521759, + "grad_norm": 0.6511906707223373, + "learning_rate": 3.3507817424947666e-06, + "loss": 0.34, + "step": 17410 + }, + { + "epoch": 0.8156181196421043, + "grad_norm": 0.6343396994459308, + "learning_rate": 3.3506034096741003e-06, + "loss": 0.319, + "step": 17411 + }, + { + "epoch": 0.8156649646320326, + "grad_norm": 0.5963605153675988, + "learning_rate": 3.3504250719584264e-06, + "loss": 0.3457, + "step": 17412 + }, + { + "epoch": 0.8157118096219609, + "grad_norm": 0.6028956684392959, + "learning_rate": 3.3502467293487693e-06, + "loss": 0.3179, + "step": 17413 + }, + { + "epoch": 0.8157586546118892, + "grad_norm": 0.5433879209035322, + "learning_rate": 3.350068381846156e-06, + "loss": 0.3076, + "step": 17414 + }, + { + "epoch": 0.8158054996018176, + "grad_norm": 0.6224145044441942, + "learning_rate": 3.349890029451612e-06, + "loss": 0.3385, + "step": 17415 + }, + { + "epoch": 0.8158523445917459, + "grad_norm": 0.6306631412891115, + "learning_rate": 3.3497116721661666e-06, + "loss": 0.3326, + "step": 17416 + }, + { + "epoch": 0.8158991895816743, + "grad_norm": 0.5973046494748895, + "learning_rate": 3.349533309990842e-06, + "loss": 0.3344, + "step": 17417 + }, + { + "epoch": 0.8159460345716025, + "grad_norm": 0.630042213891708, + "learning_rate": 3.3493549429266675e-06, + "loss": 0.3181, + "step": 17418 + }, + { + "epoch": 0.8159928795615309, + "grad_norm": 0.5870987733017412, + "learning_rate": 3.3491765709746694e-06, + "loss": 0.3377, + "step": 17419 + }, + { + "epoch": 0.8160397245514592, + "grad_norm": 0.595393803553946, + "learning_rate": 3.348998194135873e-06, + "loss": 0.3134, + "step": 17420 + }, + { + "epoch": 0.8160865695413876, + "grad_norm": 0.6065759821325497, + "learning_rate": 3.3488198124113047e-06, + "loss": 0.3167, + "step": 17421 + }, + { + "epoch": 0.8161334145313158, + "grad_norm": 0.5977110738857381, + "learning_rate": 3.3486414258019922e-06, + "loss": 0.3338, + "step": 17422 + }, + { + "epoch": 0.8161802595212442, + "grad_norm": 0.6205365792158646, + "learning_rate": 3.348463034308963e-06, + "loss": 0.3303, + "step": 17423 + }, + { + "epoch": 0.8162271045111725, + "grad_norm": 0.6297526681716666, + "learning_rate": 3.348284637933241e-06, + "loss": 0.3412, + "step": 17424 + }, + { + "epoch": 0.8162739495011009, + "grad_norm": 0.6047646131792236, + "learning_rate": 3.348106236675853e-06, + "loss": 0.3134, + "step": 17425 + }, + { + "epoch": 0.8163207944910292, + "grad_norm": 0.6178485506788269, + "learning_rate": 3.3479278305378288e-06, + "loss": 0.3215, + "step": 17426 + }, + { + "epoch": 0.8163676394809575, + "grad_norm": 0.5585795765227762, + "learning_rate": 3.347749419520192e-06, + "loss": 0.31, + "step": 17427 + }, + { + "epoch": 0.8164144844708858, + "grad_norm": 0.5687426997138934, + "learning_rate": 3.3475710036239705e-06, + "loss": 0.3094, + "step": 17428 + }, + { + "epoch": 0.8164613294608142, + "grad_norm": 0.6339116977446289, + "learning_rate": 3.3473925828501914e-06, + "loss": 0.3307, + "step": 17429 + }, + { + "epoch": 0.8165081744507425, + "grad_norm": 0.6150387635461741, + "learning_rate": 3.3472141571998806e-06, + "loss": 0.321, + "step": 17430 + }, + { + "epoch": 0.8165550194406708, + "grad_norm": 0.5991502739619393, + "learning_rate": 3.347035726674065e-06, + "loss": 0.3269, + "step": 17431 + }, + { + "epoch": 0.8166018644305991, + "grad_norm": 0.5837370744213563, + "learning_rate": 3.346857291273772e-06, + "loss": 0.3092, + "step": 17432 + }, + { + "epoch": 0.8166487094205275, + "grad_norm": 0.5521263128550704, + "learning_rate": 3.346678851000028e-06, + "loss": 0.3055, + "step": 17433 + }, + { + "epoch": 0.8166955544104558, + "grad_norm": 0.5443201730111465, + "learning_rate": 3.3465004058538598e-06, + "loss": 0.3052, + "step": 17434 + }, + { + "epoch": 0.8167423994003842, + "grad_norm": 0.6179245029883973, + "learning_rate": 3.3463219558362953e-06, + "loss": 0.3278, + "step": 17435 + }, + { + "epoch": 0.8167892443903124, + "grad_norm": 0.6198653753296033, + "learning_rate": 3.34614350094836e-06, + "loss": 0.3338, + "step": 17436 + }, + { + "epoch": 0.8168360893802408, + "grad_norm": 0.5661641727026251, + "learning_rate": 3.3459650411910817e-06, + "loss": 0.3205, + "step": 17437 + }, + { + "epoch": 0.8168829343701691, + "grad_norm": 0.5674358655639358, + "learning_rate": 3.3457865765654875e-06, + "loss": 0.3097, + "step": 17438 + }, + { + "epoch": 0.8169297793600975, + "grad_norm": 0.5593639919529116, + "learning_rate": 3.345608107072603e-06, + "loss": 0.3054, + "step": 17439 + }, + { + "epoch": 0.8169766243500257, + "grad_norm": 0.5477468765829265, + "learning_rate": 3.3454296327134577e-06, + "loss": 0.3074, + "step": 17440 + }, + { + "epoch": 0.8170234693399541, + "grad_norm": 0.5972008795640151, + "learning_rate": 3.3452511534890774e-06, + "loss": 0.3067, + "step": 17441 + }, + { + "epoch": 0.8170703143298824, + "grad_norm": 0.5794890037392325, + "learning_rate": 3.345072669400488e-06, + "loss": 0.3117, + "step": 17442 + }, + { + "epoch": 0.8171171593198108, + "grad_norm": 0.5538105688079206, + "learning_rate": 3.344894180448718e-06, + "loss": 0.3215, + "step": 17443 + }, + { + "epoch": 0.8171640043097391, + "grad_norm": 0.5941033226077905, + "learning_rate": 3.3447156866347956e-06, + "loss": 0.3209, + "step": 17444 + }, + { + "epoch": 0.8172108492996674, + "grad_norm": 0.5617607332141579, + "learning_rate": 3.3445371879597453e-06, + "loss": 0.3126, + "step": 17445 + }, + { + "epoch": 0.8172576942895957, + "grad_norm": 0.6085019482270363, + "learning_rate": 3.344358684424596e-06, + "loss": 0.3228, + "step": 17446 + }, + { + "epoch": 0.8173045392795241, + "grad_norm": 0.5914176200622869, + "learning_rate": 3.3441801760303756e-06, + "loss": 0.311, + "step": 17447 + }, + { + "epoch": 0.8173513842694524, + "grad_norm": 0.52344484955273, + "learning_rate": 3.3440016627781103e-06, + "loss": 0.3134, + "step": 17448 + }, + { + "epoch": 0.8173982292593807, + "grad_norm": 0.6502312475757518, + "learning_rate": 3.3438231446688263e-06, + "loss": 0.333, + "step": 17449 + }, + { + "epoch": 0.817445074249309, + "grad_norm": 0.569509543666145, + "learning_rate": 3.3436446217035532e-06, + "loss": 0.3238, + "step": 17450 + }, + { + "epoch": 0.8174919192392374, + "grad_norm": 0.6390593196881976, + "learning_rate": 3.3434660938833173e-06, + "loss": 0.3331, + "step": 17451 + }, + { + "epoch": 0.8175387642291657, + "grad_norm": 0.5592576784491977, + "learning_rate": 3.343287561209146e-06, + "loss": 0.3002, + "step": 17452 + }, + { + "epoch": 0.8175856092190941, + "grad_norm": 0.5744805184606985, + "learning_rate": 3.343109023682067e-06, + "loss": 0.3003, + "step": 17453 + }, + { + "epoch": 0.8176324542090223, + "grad_norm": 0.5468512606327264, + "learning_rate": 3.342930481303107e-06, + "loss": 0.3087, + "step": 17454 + }, + { + "epoch": 0.8176792991989507, + "grad_norm": 0.6060857952446609, + "learning_rate": 3.342751934073294e-06, + "loss": 0.3159, + "step": 17455 + }, + { + "epoch": 0.817726144188879, + "grad_norm": 0.6351129430342631, + "learning_rate": 3.3425733819936555e-06, + "loss": 0.3273, + "step": 17456 + }, + { + "epoch": 0.8177729891788074, + "grad_norm": 0.6262520555932196, + "learning_rate": 3.3423948250652187e-06, + "loss": 0.3151, + "step": 17457 + }, + { + "epoch": 0.8178198341687356, + "grad_norm": 0.6546183959148191, + "learning_rate": 3.342216263289012e-06, + "loss": 0.3304, + "step": 17458 + }, + { + "epoch": 0.817866679158664, + "grad_norm": 0.5766430491094235, + "learning_rate": 3.342037696666062e-06, + "loss": 0.3257, + "step": 17459 + }, + { + "epoch": 0.8179135241485923, + "grad_norm": 0.5927526532051952, + "learning_rate": 3.3418591251973968e-06, + "loss": 0.3249, + "step": 17460 + }, + { + "epoch": 0.8179603691385207, + "grad_norm": 0.5986344372187715, + "learning_rate": 3.3416805488840443e-06, + "loss": 0.3043, + "step": 17461 + }, + { + "epoch": 0.818007214128449, + "grad_norm": 0.5739332544647373, + "learning_rate": 3.341501967727031e-06, + "loss": 0.3037, + "step": 17462 + }, + { + "epoch": 0.8180540591183773, + "grad_norm": 0.6143581367729737, + "learning_rate": 3.341323381727386e-06, + "loss": 0.3051, + "step": 17463 + }, + { + "epoch": 0.8181009041083056, + "grad_norm": 0.5745792948966365, + "learning_rate": 3.3411447908861355e-06, + "loss": 0.3179, + "step": 17464 + }, + { + "epoch": 0.818147749098234, + "grad_norm": 0.6661786698180115, + "learning_rate": 3.34096619520431e-06, + "loss": 0.3384, + "step": 17465 + }, + { + "epoch": 0.8181945940881623, + "grad_norm": 0.5603952932650024, + "learning_rate": 3.340787594682934e-06, + "loss": 0.3245, + "step": 17466 + }, + { + "epoch": 0.8182414390780905, + "grad_norm": 0.5702799009205053, + "learning_rate": 3.3406089893230365e-06, + "loss": 0.3209, + "step": 17467 + }, + { + "epoch": 0.8182882840680189, + "grad_norm": 0.5938485201134404, + "learning_rate": 3.340430379125646e-06, + "loss": 0.3375, + "step": 17468 + }, + { + "epoch": 0.8183351290579473, + "grad_norm": 0.6005868254483577, + "learning_rate": 3.34025176409179e-06, + "loss": 0.339, + "step": 17469 + }, + { + "epoch": 0.8183819740478756, + "grad_norm": 0.5617908306660884, + "learning_rate": 3.340073144222496e-06, + "loss": 0.3021, + "step": 17470 + }, + { + "epoch": 0.818428819037804, + "grad_norm": 0.6241716433080329, + "learning_rate": 3.3398945195187926e-06, + "loss": 0.3238, + "step": 17471 + }, + { + "epoch": 0.8184756640277322, + "grad_norm": 0.5730148985459991, + "learning_rate": 3.3397158899817073e-06, + "loss": 0.2977, + "step": 17472 + }, + { + "epoch": 0.8185225090176605, + "grad_norm": 0.5476695915957419, + "learning_rate": 3.3395372556122673e-06, + "loss": 0.3108, + "step": 17473 + }, + { + "epoch": 0.8185693540075889, + "grad_norm": 0.5599939972690324, + "learning_rate": 3.339358616411502e-06, + "loss": 0.3011, + "step": 17474 + }, + { + "epoch": 0.8186161989975173, + "grad_norm": 0.5616143390367612, + "learning_rate": 3.3391799723804397e-06, + "loss": 0.3168, + "step": 17475 + }, + { + "epoch": 0.8186630439874455, + "grad_norm": 0.5994586903677004, + "learning_rate": 3.3390013235201057e-06, + "loss": 0.3301, + "step": 17476 + }, + { + "epoch": 0.8187098889773738, + "grad_norm": 0.6057860714465526, + "learning_rate": 3.338822669831532e-06, + "loss": 0.3332, + "step": 17477 + }, + { + "epoch": 0.8187567339673022, + "grad_norm": 0.635933198115345, + "learning_rate": 3.3386440113157427e-06, + "loss": 0.3184, + "step": 17478 + }, + { + "epoch": 0.8188035789572305, + "grad_norm": 0.6316860528657939, + "learning_rate": 3.3384653479737695e-06, + "loss": 0.3366, + "step": 17479 + }, + { + "epoch": 0.8188504239471589, + "grad_norm": 0.6047973758626797, + "learning_rate": 3.3382866798066383e-06, + "loss": 0.334, + "step": 17480 + }, + { + "epoch": 0.8188972689370871, + "grad_norm": 0.5658900136181784, + "learning_rate": 3.3381080068153775e-06, + "loss": 0.3017, + "step": 17481 + }, + { + "epoch": 0.8189441139270155, + "grad_norm": 0.5555289842187463, + "learning_rate": 3.3379293290010167e-06, + "loss": 0.3159, + "step": 17482 + }, + { + "epoch": 0.8189909589169438, + "grad_norm": 0.5680699670010189, + "learning_rate": 3.3377506463645824e-06, + "loss": 0.3118, + "step": 17483 + }, + { + "epoch": 0.8190378039068722, + "grad_norm": 0.601285130377314, + "learning_rate": 3.3375719589071043e-06, + "loss": 0.3412, + "step": 17484 + }, + { + "epoch": 0.8190846488968004, + "grad_norm": 0.5751061042904545, + "learning_rate": 3.3373932666296093e-06, + "loss": 0.3189, + "step": 17485 + }, + { + "epoch": 0.8191314938867288, + "grad_norm": 0.6313894201122658, + "learning_rate": 3.337214569533127e-06, + "loss": 0.3166, + "step": 17486 + }, + { + "epoch": 0.8191783388766571, + "grad_norm": 0.6220023840687209, + "learning_rate": 3.3370358676186857e-06, + "loss": 0.325, + "step": 17487 + }, + { + "epoch": 0.8192251838665855, + "grad_norm": 0.5821169940541523, + "learning_rate": 3.336857160887313e-06, + "loss": 0.2967, + "step": 17488 + }, + { + "epoch": 0.8192720288565138, + "grad_norm": 0.6261495110199853, + "learning_rate": 3.336678449340038e-06, + "loss": 0.3303, + "step": 17489 + }, + { + "epoch": 0.8193188738464421, + "grad_norm": 0.6411210532968277, + "learning_rate": 3.336499732977888e-06, + "loss": 0.3303, + "step": 17490 + }, + { + "epoch": 0.8193657188363704, + "grad_norm": 0.5727818462190195, + "learning_rate": 3.336321011801893e-06, + "loss": 0.3324, + "step": 17491 + }, + { + "epoch": 0.8194125638262988, + "grad_norm": 0.6634509919603728, + "learning_rate": 3.3361422858130797e-06, + "loss": 0.3321, + "step": 17492 + }, + { + "epoch": 0.8194594088162271, + "grad_norm": 0.57445713019447, + "learning_rate": 3.3359635550124797e-06, + "loss": 0.3042, + "step": 17493 + }, + { + "epoch": 0.8195062538061554, + "grad_norm": 0.574575147047614, + "learning_rate": 3.335784819401118e-06, + "loss": 0.3108, + "step": 17494 + }, + { + "epoch": 0.8195530987960837, + "grad_norm": 0.6188862460888332, + "learning_rate": 3.335606078980025e-06, + "loss": 0.3062, + "step": 17495 + }, + { + "epoch": 0.8195999437860121, + "grad_norm": 0.6147045316328162, + "learning_rate": 3.335427333750229e-06, + "loss": 0.3345, + "step": 17496 + }, + { + "epoch": 0.8196467887759404, + "grad_norm": 0.5855410306329243, + "learning_rate": 3.335248583712759e-06, + "loss": 0.3199, + "step": 17497 + }, + { + "epoch": 0.8196936337658688, + "grad_norm": 0.6099956969854046, + "learning_rate": 3.3350698288686436e-06, + "loss": 0.3329, + "step": 17498 + }, + { + "epoch": 0.819740478755797, + "grad_norm": 0.5629342958553832, + "learning_rate": 3.3348910692189107e-06, + "loss": 0.3161, + "step": 17499 + }, + { + "epoch": 0.8197873237457254, + "grad_norm": 0.6279356863091141, + "learning_rate": 3.3347123047645897e-06, + "loss": 0.3389, + "step": 17500 + }, + { + "epoch": 0.8198341687356537, + "grad_norm": 0.5607537199883504, + "learning_rate": 3.334533535506709e-06, + "loss": 0.3097, + "step": 17501 + }, + { + "epoch": 0.8198810137255821, + "grad_norm": 0.6862597998202523, + "learning_rate": 3.3343547614462978e-06, + "loss": 0.3605, + "step": 17502 + }, + { + "epoch": 0.8199278587155103, + "grad_norm": 0.5744906507742334, + "learning_rate": 3.3341759825843847e-06, + "loss": 0.2988, + "step": 17503 + }, + { + "epoch": 0.8199747037054387, + "grad_norm": 0.5230052521916352, + "learning_rate": 3.333997198921998e-06, + "loss": 0.2953, + "step": 17504 + }, + { + "epoch": 0.820021548695367, + "grad_norm": 0.577012203028168, + "learning_rate": 3.3338184104601674e-06, + "loss": 0.2888, + "step": 17505 + }, + { + "epoch": 0.8200683936852954, + "grad_norm": 0.6444573456786779, + "learning_rate": 3.3336396171999207e-06, + "loss": 0.3341, + "step": 17506 + }, + { + "epoch": 0.8201152386752237, + "grad_norm": 0.5845811301886888, + "learning_rate": 3.333460819142289e-06, + "loss": 0.3446, + "step": 17507 + }, + { + "epoch": 0.820162083665152, + "grad_norm": 0.5920601927443985, + "learning_rate": 3.333282016288299e-06, + "loss": 0.3144, + "step": 17508 + }, + { + "epoch": 0.8202089286550803, + "grad_norm": 0.5724508717510498, + "learning_rate": 3.33310320863898e-06, + "loss": 0.2993, + "step": 17509 + }, + { + "epoch": 0.8202557736450087, + "grad_norm": 0.6436363409482708, + "learning_rate": 3.332924396195362e-06, + "loss": 0.3575, + "step": 17510 + }, + { + "epoch": 0.820302618634937, + "grad_norm": 0.5882307800696956, + "learning_rate": 3.3327455789584735e-06, + "loss": 0.3158, + "step": 17511 + }, + { + "epoch": 0.8203494636248653, + "grad_norm": 0.566801247383836, + "learning_rate": 3.332566756929343e-06, + "loss": 0.3037, + "step": 17512 + }, + { + "epoch": 0.8203963086147936, + "grad_norm": 0.5557687740798162, + "learning_rate": 3.332387930109e-06, + "loss": 0.3167, + "step": 17513 + }, + { + "epoch": 0.820443153604722, + "grad_norm": 0.574192865712626, + "learning_rate": 3.3322090984984745e-06, + "loss": 0.3094, + "step": 17514 + }, + { + "epoch": 0.8204899985946503, + "grad_norm": 0.6035616976879388, + "learning_rate": 3.3320302620987944e-06, + "loss": 0.3169, + "step": 17515 + }, + { + "epoch": 0.8205368435845787, + "grad_norm": 0.5907289173180118, + "learning_rate": 3.3318514209109888e-06, + "loss": 0.3069, + "step": 17516 + }, + { + "epoch": 0.8205836885745069, + "grad_norm": 0.6134563784929727, + "learning_rate": 3.331672574936088e-06, + "loss": 0.3363, + "step": 17517 + }, + { + "epoch": 0.8206305335644353, + "grad_norm": 0.6159680130573844, + "learning_rate": 3.3314937241751206e-06, + "loss": 0.3264, + "step": 17518 + }, + { + "epoch": 0.8206773785543636, + "grad_norm": 0.6221882122765423, + "learning_rate": 3.3313148686291154e-06, + "loss": 0.3441, + "step": 17519 + }, + { + "epoch": 0.820724223544292, + "grad_norm": 0.6068496927177045, + "learning_rate": 3.3311360082991017e-06, + "loss": 0.3138, + "step": 17520 + }, + { + "epoch": 0.8207710685342202, + "grad_norm": 0.6111698953900156, + "learning_rate": 3.3309571431861097e-06, + "loss": 0.3172, + "step": 17521 + }, + { + "epoch": 0.8208179135241486, + "grad_norm": 0.5414252905365557, + "learning_rate": 3.3307782732911682e-06, + "loss": 0.3019, + "step": 17522 + }, + { + "epoch": 0.8208647585140769, + "grad_norm": 0.5768908945520714, + "learning_rate": 3.330599398615307e-06, + "loss": 0.3109, + "step": 17523 + }, + { + "epoch": 0.8209116035040053, + "grad_norm": 0.6129319286050937, + "learning_rate": 3.3304205191595547e-06, + "loss": 0.3162, + "step": 17524 + }, + { + "epoch": 0.8209584484939336, + "grad_norm": 0.5909366326142271, + "learning_rate": 3.33024163492494e-06, + "loss": 0.3317, + "step": 17525 + }, + { + "epoch": 0.8210052934838619, + "grad_norm": 0.5542012469521788, + "learning_rate": 3.3300627459124946e-06, + "loss": 0.3167, + "step": 17526 + }, + { + "epoch": 0.8210521384737902, + "grad_norm": 0.615361605107913, + "learning_rate": 3.3298838521232462e-06, + "loss": 0.3296, + "step": 17527 + }, + { + "epoch": 0.8210989834637186, + "grad_norm": 0.5615762033632794, + "learning_rate": 3.329704953558225e-06, + "loss": 0.2867, + "step": 17528 + }, + { + "epoch": 0.8211458284536469, + "grad_norm": 0.5938561480220644, + "learning_rate": 3.3295260502184607e-06, + "loss": 0.307, + "step": 17529 + }, + { + "epoch": 0.8211926734435752, + "grad_norm": 0.5477045229212418, + "learning_rate": 3.329347142104982e-06, + "loss": 0.2965, + "step": 17530 + }, + { + "epoch": 0.8212395184335035, + "grad_norm": 0.5903290075310753, + "learning_rate": 3.3291682292188188e-06, + "loss": 0.333, + "step": 17531 + }, + { + "epoch": 0.8212863634234319, + "grad_norm": 0.5937014217412463, + "learning_rate": 3.328989311561002e-06, + "loss": 0.3036, + "step": 17532 + }, + { + "epoch": 0.8213332084133602, + "grad_norm": 0.6030273790503479, + "learning_rate": 3.328810389132558e-06, + "loss": 0.3394, + "step": 17533 + }, + { + "epoch": 0.8213800534032886, + "grad_norm": 0.6150288873358888, + "learning_rate": 3.3286314619345195e-06, + "loss": 0.3343, + "step": 17534 + }, + { + "epoch": 0.8214268983932168, + "grad_norm": 0.5359067431856777, + "learning_rate": 3.328452529967916e-06, + "loss": 0.2951, + "step": 17535 + }, + { + "epoch": 0.8214737433831452, + "grad_norm": 0.6186308622480243, + "learning_rate": 3.328273593233776e-06, + "loss": 0.3312, + "step": 17536 + }, + { + "epoch": 0.8215205883730735, + "grad_norm": 0.5660967421137169, + "learning_rate": 3.3280946517331287e-06, + "loss": 0.3303, + "step": 17537 + }, + { + "epoch": 0.8215674333630019, + "grad_norm": 0.587143986063708, + "learning_rate": 3.3279157054670057e-06, + "loss": 0.2969, + "step": 17538 + }, + { + "epoch": 0.8216142783529301, + "grad_norm": 0.6662233450398917, + "learning_rate": 3.327736754436436e-06, + "loss": 0.3494, + "step": 17539 + }, + { + "epoch": 0.8216611233428585, + "grad_norm": 0.5679060487303286, + "learning_rate": 3.3275577986424484e-06, + "loss": 0.3162, + "step": 17540 + }, + { + "epoch": 0.8217079683327868, + "grad_norm": 0.5936910597534263, + "learning_rate": 3.327378838086075e-06, + "loss": 0.3167, + "step": 17541 + }, + { + "epoch": 0.8217548133227152, + "grad_norm": 0.5829501933471992, + "learning_rate": 3.3271998727683436e-06, + "loss": 0.3145, + "step": 17542 + }, + { + "epoch": 0.8218016583126435, + "grad_norm": 0.58725410092454, + "learning_rate": 3.3270209026902843e-06, + "loss": 0.3342, + "step": 17543 + }, + { + "epoch": 0.8218485033025718, + "grad_norm": 0.6248194248580285, + "learning_rate": 3.3268419278529285e-06, + "loss": 0.3246, + "step": 17544 + }, + { + "epoch": 0.8218953482925001, + "grad_norm": 0.5527389741599009, + "learning_rate": 3.3266629482573054e-06, + "loss": 0.2977, + "step": 17545 + }, + { + "epoch": 0.8219421932824285, + "grad_norm": 0.5896193579999561, + "learning_rate": 3.3264839639044438e-06, + "loss": 0.3033, + "step": 17546 + }, + { + "epoch": 0.8219890382723568, + "grad_norm": 0.6073042987643409, + "learning_rate": 3.3263049747953757e-06, + "loss": 0.3606, + "step": 17547 + }, + { + "epoch": 0.822035883262285, + "grad_norm": 0.5596164181196167, + "learning_rate": 3.326125980931129e-06, + "loss": 0.3092, + "step": 17548 + }, + { + "epoch": 0.8220827282522134, + "grad_norm": 0.5770008550977348, + "learning_rate": 3.325946982312736e-06, + "loss": 0.3169, + "step": 17549 + }, + { + "epoch": 0.8221295732421418, + "grad_norm": 0.518027979750397, + "learning_rate": 3.3257679789412256e-06, + "loss": 0.301, + "step": 17550 + }, + { + "epoch": 0.8221764182320701, + "grad_norm": 0.5517856624186109, + "learning_rate": 3.325588970817627e-06, + "loss": 0.3233, + "step": 17551 + }, + { + "epoch": 0.8222232632219985, + "grad_norm": 0.5649198023899203, + "learning_rate": 3.3254099579429725e-06, + "loss": 0.2988, + "step": 17552 + }, + { + "epoch": 0.8222701082119267, + "grad_norm": 0.6158638741285121, + "learning_rate": 3.3252309403182904e-06, + "loss": 0.3272, + "step": 17553 + }, + { + "epoch": 0.822316953201855, + "grad_norm": 0.5937405350468196, + "learning_rate": 3.325051917944612e-06, + "loss": 0.3136, + "step": 17554 + }, + { + "epoch": 0.8223637981917834, + "grad_norm": 0.57167847313779, + "learning_rate": 3.3248728908229673e-06, + "loss": 0.2984, + "step": 17555 + }, + { + "epoch": 0.8224106431817118, + "grad_norm": 0.5467376766539515, + "learning_rate": 3.3246938589543864e-06, + "loss": 0.3025, + "step": 17556 + }, + { + "epoch": 0.82245748817164, + "grad_norm": 0.5741444790210138, + "learning_rate": 3.3245148223399e-06, + "loss": 0.3121, + "step": 17557 + }, + { + "epoch": 0.8225043331615683, + "grad_norm": 0.5935284801645242, + "learning_rate": 3.324335780980537e-06, + "loss": 0.3335, + "step": 17558 + }, + { + "epoch": 0.8225511781514967, + "grad_norm": 0.5828139137958624, + "learning_rate": 3.3241567348773295e-06, + "loss": 0.294, + "step": 17559 + }, + { + "epoch": 0.822598023141425, + "grad_norm": 0.5952561784449629, + "learning_rate": 3.323977684031307e-06, + "loss": 0.3117, + "step": 17560 + }, + { + "epoch": 0.8226448681313534, + "grad_norm": 0.5895542273093647, + "learning_rate": 3.3237986284434996e-06, + "loss": 0.3359, + "step": 17561 + }, + { + "epoch": 0.8226917131212816, + "grad_norm": 0.6064447188931433, + "learning_rate": 3.3236195681149386e-06, + "loss": 0.3223, + "step": 17562 + }, + { + "epoch": 0.82273855811121, + "grad_norm": 0.5700288752934223, + "learning_rate": 3.323440503046655e-06, + "loss": 0.3177, + "step": 17563 + }, + { + "epoch": 0.8227854031011383, + "grad_norm": 0.5906751823424409, + "learning_rate": 3.3232614332396766e-06, + "loss": 0.3107, + "step": 17564 + }, + { + "epoch": 0.8228322480910667, + "grad_norm": 0.582175574815764, + "learning_rate": 3.3230823586950356e-06, + "loss": 0.3321, + "step": 17565 + }, + { + "epoch": 0.8228790930809949, + "grad_norm": 0.5912966721659165, + "learning_rate": 3.322903279413764e-06, + "loss": 0.3296, + "step": 17566 + }, + { + "epoch": 0.8229259380709233, + "grad_norm": 0.6244987744076256, + "learning_rate": 3.3227241953968897e-06, + "loss": 0.3401, + "step": 17567 + }, + { + "epoch": 0.8229727830608516, + "grad_norm": 0.5808934324948755, + "learning_rate": 3.3225451066454445e-06, + "loss": 0.3288, + "step": 17568 + }, + { + "epoch": 0.82301962805078, + "grad_norm": 0.5831341047865738, + "learning_rate": 3.322366013160459e-06, + "loss": 0.3197, + "step": 17569 + }, + { + "epoch": 0.8230664730407083, + "grad_norm": 0.5843814704625836, + "learning_rate": 3.322186914942964e-06, + "loss": 0.3162, + "step": 17570 + }, + { + "epoch": 0.8231133180306366, + "grad_norm": 0.555458536770558, + "learning_rate": 3.3220078119939904e-06, + "loss": 0.3107, + "step": 17571 + }, + { + "epoch": 0.8231601630205649, + "grad_norm": 0.5558242428946407, + "learning_rate": 3.3218287043145673e-06, + "loss": 0.3112, + "step": 17572 + }, + { + "epoch": 0.8232070080104933, + "grad_norm": 0.593581098118164, + "learning_rate": 3.3216495919057278e-06, + "loss": 0.3053, + "step": 17573 + }, + { + "epoch": 0.8232538530004216, + "grad_norm": 0.6732252880200732, + "learning_rate": 3.3214704747685004e-06, + "loss": 0.3448, + "step": 17574 + }, + { + "epoch": 0.8233006979903499, + "grad_norm": 0.5641702664434153, + "learning_rate": 3.3212913529039174e-06, + "loss": 0.3182, + "step": 17575 + }, + { + "epoch": 0.8233475429802782, + "grad_norm": 0.6280490240087411, + "learning_rate": 3.321112226313009e-06, + "loss": 0.3256, + "step": 17576 + }, + { + "epoch": 0.8233943879702066, + "grad_norm": 0.6017401545201484, + "learning_rate": 3.320933094996806e-06, + "loss": 0.3269, + "step": 17577 + }, + { + "epoch": 0.8234412329601349, + "grad_norm": 0.6196838936390873, + "learning_rate": 3.3207539589563397e-06, + "loss": 0.318, + "step": 17578 + }, + { + "epoch": 0.8234880779500633, + "grad_norm": 0.5847184113266414, + "learning_rate": 3.3205748181926402e-06, + "loss": 0.3099, + "step": 17579 + }, + { + "epoch": 0.8235349229399915, + "grad_norm": 0.5835501192243002, + "learning_rate": 3.3203956727067393e-06, + "loss": 0.3032, + "step": 17580 + }, + { + "epoch": 0.8235817679299199, + "grad_norm": 0.598155555422339, + "learning_rate": 3.3202165224996673e-06, + "loss": 0.321, + "step": 17581 + }, + { + "epoch": 0.8236286129198482, + "grad_norm": 0.5955149030910344, + "learning_rate": 3.320037367572455e-06, + "loss": 0.3348, + "step": 17582 + }, + { + "epoch": 0.8236754579097766, + "grad_norm": 0.7382172042300428, + "learning_rate": 3.3198582079261343e-06, + "loss": 0.3248, + "step": 17583 + }, + { + "epoch": 0.8237223028997048, + "grad_norm": 0.5614641395612447, + "learning_rate": 3.319679043561736e-06, + "loss": 0.3077, + "step": 17584 + }, + { + "epoch": 0.8237691478896332, + "grad_norm": 0.5574624043047306, + "learning_rate": 3.3194998744802896e-06, + "loss": 0.3196, + "step": 17585 + }, + { + "epoch": 0.8238159928795615, + "grad_norm": 0.5904606496788956, + "learning_rate": 3.3193207006828282e-06, + "loss": 0.3061, + "step": 17586 + }, + { + "epoch": 0.8238628378694899, + "grad_norm": 0.5820271649569503, + "learning_rate": 3.319141522170382e-06, + "loss": 0.3258, + "step": 17587 + }, + { + "epoch": 0.8239096828594182, + "grad_norm": 0.5572782175348606, + "learning_rate": 3.318962338943983e-06, + "loss": 0.3048, + "step": 17588 + }, + { + "epoch": 0.8239565278493465, + "grad_norm": 0.5913270961093905, + "learning_rate": 3.31878315100466e-06, + "loss": 0.301, + "step": 17589 + }, + { + "epoch": 0.8240033728392748, + "grad_norm": 0.5480354683804856, + "learning_rate": 3.318603958353447e-06, + "loss": 0.2821, + "step": 17590 + }, + { + "epoch": 0.8240502178292032, + "grad_norm": 0.6415647258756306, + "learning_rate": 3.318424760991373e-06, + "loss": 0.3332, + "step": 17591 + }, + { + "epoch": 0.8240970628191315, + "grad_norm": 0.5717242233160955, + "learning_rate": 3.3182455589194713e-06, + "loss": 0.3181, + "step": 17592 + }, + { + "epoch": 0.8241439078090598, + "grad_norm": 0.6431558041089565, + "learning_rate": 3.318066352138771e-06, + "loss": 0.3259, + "step": 17593 + }, + { + "epoch": 0.8241907527989881, + "grad_norm": 0.572398545367792, + "learning_rate": 3.3178871406503053e-06, + "loss": 0.3161, + "step": 17594 + }, + { + "epoch": 0.8242375977889165, + "grad_norm": 0.5613404728282161, + "learning_rate": 3.317707924455104e-06, + "loss": 0.3079, + "step": 17595 + }, + { + "epoch": 0.8242844427788448, + "grad_norm": 0.5838874175302936, + "learning_rate": 3.3175287035542e-06, + "loss": 0.3277, + "step": 17596 + }, + { + "epoch": 0.8243312877687732, + "grad_norm": 0.5832860361398874, + "learning_rate": 3.3173494779486225e-06, + "loss": 0.3032, + "step": 17597 + }, + { + "epoch": 0.8243781327587014, + "grad_norm": 0.6309761368140906, + "learning_rate": 3.317170247639405e-06, + "loss": 0.3235, + "step": 17598 + }, + { + "epoch": 0.8244249777486298, + "grad_norm": 0.5778007709942714, + "learning_rate": 3.3169910126275788e-06, + "loss": 0.2955, + "step": 17599 + }, + { + "epoch": 0.8244718227385581, + "grad_norm": 0.6301566481479214, + "learning_rate": 3.3168117729141735e-06, + "loss": 0.3121, + "step": 17600 + }, + { + "epoch": 0.8245186677284865, + "grad_norm": 0.6237611895461873, + "learning_rate": 3.316632528500222e-06, + "loss": 0.329, + "step": 17601 + }, + { + "epoch": 0.8245655127184147, + "grad_norm": 0.6143391394050948, + "learning_rate": 3.3164532793867565e-06, + "loss": 0.3521, + "step": 17602 + }, + { + "epoch": 0.8246123577083431, + "grad_norm": 0.6009161612116154, + "learning_rate": 3.316274025574806e-06, + "loss": 0.3029, + "step": 17603 + }, + { + "epoch": 0.8246592026982714, + "grad_norm": 0.585078910214368, + "learning_rate": 3.316094767065404e-06, + "loss": 0.3181, + "step": 17604 + }, + { + "epoch": 0.8247060476881998, + "grad_norm": 0.5684676275711127, + "learning_rate": 3.3159155038595838e-06, + "loss": 0.3088, + "step": 17605 + }, + { + "epoch": 0.8247528926781281, + "grad_norm": 0.5880017741352852, + "learning_rate": 3.315736235958372e-06, + "loss": 0.3371, + "step": 17606 + }, + { + "epoch": 0.8247997376680564, + "grad_norm": 0.6532151200118297, + "learning_rate": 3.315556963362805e-06, + "loss": 0.2906, + "step": 17607 + }, + { + "epoch": 0.8248465826579847, + "grad_norm": 0.5773584397099819, + "learning_rate": 3.3153776860739123e-06, + "loss": 0.2943, + "step": 17608 + }, + { + "epoch": 0.8248934276479131, + "grad_norm": 0.6092042501151536, + "learning_rate": 3.315198404092726e-06, + "loss": 0.2992, + "step": 17609 + }, + { + "epoch": 0.8249402726378414, + "grad_norm": 0.6177420272561889, + "learning_rate": 3.3150191174202772e-06, + "loss": 0.3309, + "step": 17610 + }, + { + "epoch": 0.8249871176277697, + "grad_norm": 0.5454628966918403, + "learning_rate": 3.3148398260575985e-06, + "loss": 0.2974, + "step": 17611 + }, + { + "epoch": 0.825033962617698, + "grad_norm": 0.5882675329679272, + "learning_rate": 3.314660530005722e-06, + "loss": 0.3279, + "step": 17612 + }, + { + "epoch": 0.8250808076076264, + "grad_norm": 0.5801218017689985, + "learning_rate": 3.314481229265678e-06, + "loss": 0.3115, + "step": 17613 + }, + { + "epoch": 0.8251276525975547, + "grad_norm": 0.5855229840074865, + "learning_rate": 3.3143019238385e-06, + "loss": 0.3321, + "step": 17614 + }, + { + "epoch": 0.8251744975874831, + "grad_norm": 0.56750141449964, + "learning_rate": 3.314122613725219e-06, + "loss": 0.3057, + "step": 17615 + }, + { + "epoch": 0.8252213425774113, + "grad_norm": 0.6393893607230117, + "learning_rate": 3.313943298926866e-06, + "loss": 0.3442, + "step": 17616 + }, + { + "epoch": 0.8252681875673397, + "grad_norm": 0.5736614605066475, + "learning_rate": 3.3137639794444753e-06, + "loss": 0.3133, + "step": 17617 + }, + { + "epoch": 0.825315032557268, + "grad_norm": 0.6731647104300413, + "learning_rate": 3.3135846552790764e-06, + "loss": 0.3174, + "step": 17618 + }, + { + "epoch": 0.8253618775471964, + "grad_norm": 0.634768952116213, + "learning_rate": 3.313405326431703e-06, + "loss": 0.3258, + "step": 17619 + }, + { + "epoch": 0.8254087225371246, + "grad_norm": 0.611402119831354, + "learning_rate": 3.313225992903386e-06, + "loss": 0.3318, + "step": 17620 + }, + { + "epoch": 0.825455567527053, + "grad_norm": 0.6433381279013441, + "learning_rate": 3.3130466546951577e-06, + "loss": 0.332, + "step": 17621 + }, + { + "epoch": 0.8255024125169813, + "grad_norm": 0.5591245841261884, + "learning_rate": 3.3128673118080506e-06, + "loss": 0.3152, + "step": 17622 + }, + { + "epoch": 0.8255492575069097, + "grad_norm": 0.5643811939504615, + "learning_rate": 3.312687964243096e-06, + "loss": 0.3138, + "step": 17623 + }, + { + "epoch": 0.825596102496838, + "grad_norm": 0.5747010148631803, + "learning_rate": 3.312508612001327e-06, + "loss": 0.3323, + "step": 17624 + }, + { + "epoch": 0.8256429474867663, + "grad_norm": 0.6255031098212905, + "learning_rate": 3.3123292550837745e-06, + "loss": 0.321, + "step": 17625 + }, + { + "epoch": 0.8256897924766946, + "grad_norm": 0.5932604941370331, + "learning_rate": 3.3121498934914714e-06, + "loss": 0.3095, + "step": 17626 + }, + { + "epoch": 0.825736637466623, + "grad_norm": 0.6309299159802004, + "learning_rate": 3.3119705272254502e-06, + "loss": 0.333, + "step": 17627 + }, + { + "epoch": 0.8257834824565513, + "grad_norm": 0.5722191746453729, + "learning_rate": 3.311791156286742e-06, + "loss": 0.3362, + "step": 17628 + }, + { + "epoch": 0.8258303274464796, + "grad_norm": 0.5932791217978206, + "learning_rate": 3.311611780676381e-06, + "loss": 0.3303, + "step": 17629 + }, + { + "epoch": 0.8258771724364079, + "grad_norm": 0.5974846051905645, + "learning_rate": 3.3114324003953975e-06, + "loss": 0.3323, + "step": 17630 + }, + { + "epoch": 0.8259240174263363, + "grad_norm": 0.5919016002540426, + "learning_rate": 3.3112530154448243e-06, + "loss": 0.3306, + "step": 17631 + }, + { + "epoch": 0.8259708624162646, + "grad_norm": 0.6603808912803388, + "learning_rate": 3.3110736258256935e-06, + "loss": 0.3337, + "step": 17632 + }, + { + "epoch": 0.826017707406193, + "grad_norm": 0.5979181431078777, + "learning_rate": 3.310894231539039e-06, + "loss": 0.3431, + "step": 17633 + }, + { + "epoch": 0.8260645523961212, + "grad_norm": 0.6406817066581425, + "learning_rate": 3.3107148325858913e-06, + "loss": 0.3226, + "step": 17634 + }, + { + "epoch": 0.8261113973860496, + "grad_norm": 0.6305263312205458, + "learning_rate": 3.310535428967283e-06, + "loss": 0.3102, + "step": 17635 + }, + { + "epoch": 0.8261582423759779, + "grad_norm": 0.5879144715965414, + "learning_rate": 3.310356020684248e-06, + "loss": 0.3119, + "step": 17636 + }, + { + "epoch": 0.8262050873659063, + "grad_norm": 0.5948961103980729, + "learning_rate": 3.3101766077378174e-06, + "loss": 0.308, + "step": 17637 + }, + { + "epoch": 0.8262519323558345, + "grad_norm": 0.5470801097749868, + "learning_rate": 3.309997190129024e-06, + "loss": 0.2942, + "step": 17638 + }, + { + "epoch": 0.8262987773457628, + "grad_norm": 0.6090098349337731, + "learning_rate": 3.3098177678589005e-06, + "loss": 0.3058, + "step": 17639 + }, + { + "epoch": 0.8263456223356912, + "grad_norm": 0.6260414125214537, + "learning_rate": 3.309638340928479e-06, + "loss": 0.3022, + "step": 17640 + }, + { + "epoch": 0.8263924673256196, + "grad_norm": 0.5711498086210549, + "learning_rate": 3.3094589093387926e-06, + "loss": 0.3117, + "step": 17641 + }, + { + "epoch": 0.8264393123155479, + "grad_norm": 0.5753502194834885, + "learning_rate": 3.309279473090874e-06, + "loss": 0.3149, + "step": 17642 + }, + { + "epoch": 0.8264861573054761, + "grad_norm": 0.5991595175845431, + "learning_rate": 3.3091000321857547e-06, + "loss": 0.3259, + "step": 17643 + }, + { + "epoch": 0.8265330022954045, + "grad_norm": 0.5829421538516976, + "learning_rate": 3.308920586624468e-06, + "loss": 0.3156, + "step": 17644 + }, + { + "epoch": 0.8265798472853328, + "grad_norm": 0.6021020730682428, + "learning_rate": 3.3087411364080474e-06, + "loss": 0.3082, + "step": 17645 + }, + { + "epoch": 0.8266266922752612, + "grad_norm": 0.623055155634862, + "learning_rate": 3.3085616815375243e-06, + "loss": 0.3397, + "step": 17646 + }, + { + "epoch": 0.8266735372651894, + "grad_norm": 0.5553189380624857, + "learning_rate": 3.3083822220139316e-06, + "loss": 0.3073, + "step": 17647 + }, + { + "epoch": 0.8267203822551178, + "grad_norm": 0.6242363911975419, + "learning_rate": 3.3082027578383035e-06, + "loss": 0.338, + "step": 17648 + }, + { + "epoch": 0.8267672272450461, + "grad_norm": 0.5206284628516132, + "learning_rate": 3.3080232890116704e-06, + "loss": 0.2835, + "step": 17649 + }, + { + "epoch": 0.8268140722349745, + "grad_norm": 0.6387871402513338, + "learning_rate": 3.3078438155350675e-06, + "loss": 0.3367, + "step": 17650 + }, + { + "epoch": 0.8268609172249028, + "grad_norm": 0.6011738888651377, + "learning_rate": 3.3076643374095263e-06, + "loss": 0.3353, + "step": 17651 + }, + { + "epoch": 0.8269077622148311, + "grad_norm": 0.5599160431730219, + "learning_rate": 3.307484854636079e-06, + "loss": 0.3005, + "step": 17652 + }, + { + "epoch": 0.8269546072047594, + "grad_norm": 0.6199516696126733, + "learning_rate": 3.3073053672157594e-06, + "loss": 0.3214, + "step": 17653 + }, + { + "epoch": 0.8270014521946878, + "grad_norm": 0.608743529136388, + "learning_rate": 3.3071258751496017e-06, + "loss": 0.3028, + "step": 17654 + }, + { + "epoch": 0.8270482971846161, + "grad_norm": 0.6266365613757587, + "learning_rate": 3.306946378438636e-06, + "loss": 0.3169, + "step": 17655 + }, + { + "epoch": 0.8270951421745444, + "grad_norm": 0.5813417393365905, + "learning_rate": 3.3067668770838968e-06, + "loss": 0.2896, + "step": 17656 + }, + { + "epoch": 0.8271419871644727, + "grad_norm": 0.5692153083297197, + "learning_rate": 3.3065873710864175e-06, + "loss": 0.312, + "step": 17657 + }, + { + "epoch": 0.8271888321544011, + "grad_norm": 0.6329916556639948, + "learning_rate": 3.3064078604472312e-06, + "loss": 0.3349, + "step": 17658 + }, + { + "epoch": 0.8272356771443294, + "grad_norm": 0.539341965247224, + "learning_rate": 3.3062283451673695e-06, + "loss": 0.311, + "step": 17659 + }, + { + "epoch": 0.8272825221342578, + "grad_norm": 0.5518856108447768, + "learning_rate": 3.306048825247866e-06, + "loss": 0.3104, + "step": 17660 + }, + { + "epoch": 0.827329367124186, + "grad_norm": 0.5951526157744293, + "learning_rate": 3.305869300689755e-06, + "loss": 0.3216, + "step": 17661 + }, + { + "epoch": 0.8273762121141144, + "grad_norm": 0.5190961410285427, + "learning_rate": 3.3056897714940678e-06, + "loss": 0.2858, + "step": 17662 + }, + { + "epoch": 0.8274230571040427, + "grad_norm": 0.5554890857293348, + "learning_rate": 3.305510237661839e-06, + "loss": 0.313, + "step": 17663 + }, + { + "epoch": 0.8274699020939711, + "grad_norm": 0.6319163841870774, + "learning_rate": 3.3053306991941014e-06, + "loss": 0.3353, + "step": 17664 + }, + { + "epoch": 0.8275167470838993, + "grad_norm": 0.5981715364480994, + "learning_rate": 3.3051511560918882e-06, + "loss": 0.3197, + "step": 17665 + }, + { + "epoch": 0.8275635920738277, + "grad_norm": 0.6230905348605568, + "learning_rate": 3.304971608356232e-06, + "loss": 0.3368, + "step": 17666 + }, + { + "epoch": 0.827610437063756, + "grad_norm": 0.5924546182033626, + "learning_rate": 3.304792055988166e-06, + "loss": 0.3395, + "step": 17667 + }, + { + "epoch": 0.8276572820536844, + "grad_norm": 0.6298048555176644, + "learning_rate": 3.3046124989887254e-06, + "loss": 0.32, + "step": 17668 + }, + { + "epoch": 0.8277041270436127, + "grad_norm": 0.5868498847462519, + "learning_rate": 3.3044329373589412e-06, + "loss": 0.3288, + "step": 17669 + }, + { + "epoch": 0.827750972033541, + "grad_norm": 0.639418012614922, + "learning_rate": 3.3042533710998476e-06, + "loss": 0.3138, + "step": 17670 + }, + { + "epoch": 0.8277978170234693, + "grad_norm": 0.581719259263548, + "learning_rate": 3.3040738002124783e-06, + "loss": 0.3132, + "step": 17671 + }, + { + "epoch": 0.8278446620133977, + "grad_norm": 0.6240270417250395, + "learning_rate": 3.3038942246978663e-06, + "loss": 0.3172, + "step": 17672 + }, + { + "epoch": 0.827891507003326, + "grad_norm": 0.5894964342007932, + "learning_rate": 3.3037146445570444e-06, + "loss": 0.3191, + "step": 17673 + }, + { + "epoch": 0.8279383519932543, + "grad_norm": 0.5684259197970146, + "learning_rate": 3.303535059791047e-06, + "loss": 0.3135, + "step": 17674 + }, + { + "epoch": 0.8279851969831826, + "grad_norm": 0.6574571885198631, + "learning_rate": 3.3033554704009084e-06, + "loss": 0.3535, + "step": 17675 + }, + { + "epoch": 0.828032041973111, + "grad_norm": 0.5894247224703749, + "learning_rate": 3.3031758763876596e-06, + "loss": 0.3028, + "step": 17676 + }, + { + "epoch": 0.8280788869630393, + "grad_norm": 0.5600769215524842, + "learning_rate": 3.3029962777523363e-06, + "loss": 0.3058, + "step": 17677 + }, + { + "epoch": 0.8281257319529677, + "grad_norm": 0.5654310305845329, + "learning_rate": 3.302816674495971e-06, + "loss": 0.3068, + "step": 17678 + }, + { + "epoch": 0.8281725769428959, + "grad_norm": 0.5473120115282298, + "learning_rate": 3.3026370666195978e-06, + "loss": 0.313, + "step": 17679 + }, + { + "epoch": 0.8282194219328243, + "grad_norm": 0.6124336139658307, + "learning_rate": 3.3024574541242493e-06, + "loss": 0.3283, + "step": 17680 + }, + { + "epoch": 0.8282662669227526, + "grad_norm": 0.594016112760748, + "learning_rate": 3.302277837010961e-06, + "loss": 0.3126, + "step": 17681 + }, + { + "epoch": 0.828313111912681, + "grad_norm": 0.6074495239220287, + "learning_rate": 3.3020982152807644e-06, + "loss": 0.3057, + "step": 17682 + }, + { + "epoch": 0.8283599569026092, + "grad_norm": 0.5965510892889906, + "learning_rate": 3.3019185889346943e-06, + "loss": 0.3235, + "step": 17683 + }, + { + "epoch": 0.8284068018925376, + "grad_norm": 0.6102018354181763, + "learning_rate": 3.3017389579737845e-06, + "loss": 0.3247, + "step": 17684 + }, + { + "epoch": 0.8284536468824659, + "grad_norm": 0.5716838620846916, + "learning_rate": 3.3015593223990693e-06, + "loss": 0.3041, + "step": 17685 + }, + { + "epoch": 0.8285004918723943, + "grad_norm": 0.6234839420252368, + "learning_rate": 3.30137968221158e-06, + "loss": 0.3232, + "step": 17686 + }, + { + "epoch": 0.8285473368623226, + "grad_norm": 0.6511995975397757, + "learning_rate": 3.3012000374123533e-06, + "loss": 0.3255, + "step": 17687 + }, + { + "epoch": 0.8285941818522509, + "grad_norm": 0.621902501113232, + "learning_rate": 3.301020388002421e-06, + "loss": 0.3199, + "step": 17688 + }, + { + "epoch": 0.8286410268421792, + "grad_norm": 0.5389635706292221, + "learning_rate": 3.3008407339828188e-06, + "loss": 0.3141, + "step": 17689 + }, + { + "epoch": 0.8286878718321076, + "grad_norm": 0.6047332590864111, + "learning_rate": 3.3006610753545777e-06, + "loss": 0.3263, + "step": 17690 + }, + { + "epoch": 0.8287347168220359, + "grad_norm": 0.6884302911159076, + "learning_rate": 3.3004814121187344e-06, + "loss": 0.3358, + "step": 17691 + }, + { + "epoch": 0.8287815618119642, + "grad_norm": 0.6279663026110418, + "learning_rate": 3.3003017442763224e-06, + "loss": 0.3113, + "step": 17692 + }, + { + "epoch": 0.8288284068018925, + "grad_norm": 0.6004460303311577, + "learning_rate": 3.3001220718283737e-06, + "loss": 0.3324, + "step": 17693 + }, + { + "epoch": 0.8288752517918209, + "grad_norm": 0.6080515901782919, + "learning_rate": 3.2999423947759247e-06, + "loss": 0.3264, + "step": 17694 + }, + { + "epoch": 0.8289220967817492, + "grad_norm": 0.5165456198852475, + "learning_rate": 3.299762713120007e-06, + "loss": 0.2939, + "step": 17695 + }, + { + "epoch": 0.8289689417716776, + "grad_norm": 0.5902669607330218, + "learning_rate": 3.2995830268616567e-06, + "loss": 0.3086, + "step": 17696 + }, + { + "epoch": 0.8290157867616058, + "grad_norm": 0.5995976223602507, + "learning_rate": 3.299403336001908e-06, + "loss": 0.3239, + "step": 17697 + }, + { + "epoch": 0.8290626317515342, + "grad_norm": 0.6205670568023854, + "learning_rate": 3.2992236405417927e-06, + "loss": 0.3387, + "step": 17698 + }, + { + "epoch": 0.8291094767414625, + "grad_norm": 0.5903792009960664, + "learning_rate": 3.2990439404823465e-06, + "loss": 0.313, + "step": 17699 + }, + { + "epoch": 0.8291563217313909, + "grad_norm": 0.5983596574900963, + "learning_rate": 3.2988642358246038e-06, + "loss": 0.327, + "step": 17700 + }, + { + "epoch": 0.8292031667213191, + "grad_norm": 0.5538438523633438, + "learning_rate": 3.2986845265695973e-06, + "loss": 0.3106, + "step": 17701 + }, + { + "epoch": 0.8292500117112475, + "grad_norm": 0.6140383712854378, + "learning_rate": 3.2985048127183622e-06, + "loss": 0.3191, + "step": 17702 + }, + { + "epoch": 0.8292968567011758, + "grad_norm": 0.6034945611978368, + "learning_rate": 3.2983250942719336e-06, + "loss": 0.3382, + "step": 17703 + }, + { + "epoch": 0.8293437016911042, + "grad_norm": 0.5658689142862464, + "learning_rate": 3.298145371231344e-06, + "loss": 0.3166, + "step": 17704 + }, + { + "epoch": 0.8293905466810325, + "grad_norm": 0.5694421152866203, + "learning_rate": 3.2979656435976283e-06, + "loss": 0.2999, + "step": 17705 + }, + { + "epoch": 0.8294373916709608, + "grad_norm": 0.6184534773276309, + "learning_rate": 3.2977859113718206e-06, + "loss": 0.3357, + "step": 17706 + }, + { + "epoch": 0.8294842366608891, + "grad_norm": 0.5857638106990427, + "learning_rate": 3.2976061745549567e-06, + "loss": 0.2955, + "step": 17707 + }, + { + "epoch": 0.8295310816508175, + "grad_norm": 0.6045897425815083, + "learning_rate": 3.297426433148068e-06, + "loss": 0.327, + "step": 17708 + }, + { + "epoch": 0.8295779266407458, + "grad_norm": 0.6090427837290415, + "learning_rate": 3.2972466871521924e-06, + "loss": 0.3108, + "step": 17709 + }, + { + "epoch": 0.829624771630674, + "grad_norm": 0.6478781457485622, + "learning_rate": 3.2970669365683617e-06, + "loss": 0.3224, + "step": 17710 + }, + { + "epoch": 0.8296716166206024, + "grad_norm": 0.5639105733178352, + "learning_rate": 3.2968871813976105e-06, + "loss": 0.317, + "step": 17711 + }, + { + "epoch": 0.8297184616105308, + "grad_norm": 0.5645583332162768, + "learning_rate": 3.2967074216409747e-06, + "loss": 0.3192, + "step": 17712 + }, + { + "epoch": 0.8297653066004591, + "grad_norm": 0.6034049425230337, + "learning_rate": 3.296527657299488e-06, + "loss": 0.3253, + "step": 17713 + }, + { + "epoch": 0.8298121515903875, + "grad_norm": 0.6262828788094132, + "learning_rate": 3.2963478883741838e-06, + "loss": 0.3228, + "step": 17714 + }, + { + "epoch": 0.8298589965803157, + "grad_norm": 0.6027515323965867, + "learning_rate": 3.296168114866099e-06, + "loss": 0.332, + "step": 17715 + }, + { + "epoch": 0.829905841570244, + "grad_norm": 0.5717372327423317, + "learning_rate": 3.295988336776266e-06, + "loss": 0.3101, + "step": 17716 + }, + { + "epoch": 0.8299526865601724, + "grad_norm": 0.5931290670430787, + "learning_rate": 3.2958085541057205e-06, + "loss": 0.322, + "step": 17717 + }, + { + "epoch": 0.8299995315501008, + "grad_norm": 0.5955654857722398, + "learning_rate": 3.2956287668554963e-06, + "loss": 0.3152, + "step": 17718 + }, + { + "epoch": 0.830046376540029, + "grad_norm": 0.5722707308953927, + "learning_rate": 3.2954489750266287e-06, + "loss": 0.3272, + "step": 17719 + }, + { + "epoch": 0.8300932215299573, + "grad_norm": 0.5734885618562031, + "learning_rate": 3.295269178620153e-06, + "loss": 0.31, + "step": 17720 + }, + { + "epoch": 0.8301400665198857, + "grad_norm": 0.6175946236039211, + "learning_rate": 3.2950893776371025e-06, + "loss": 0.297, + "step": 17721 + }, + { + "epoch": 0.830186911509814, + "grad_norm": 0.681854631343864, + "learning_rate": 3.294909572078512e-06, + "loss": 0.3324, + "step": 17722 + }, + { + "epoch": 0.8302337564997424, + "grad_norm": 0.6440566331756629, + "learning_rate": 3.2947297619454165e-06, + "loss": 0.3258, + "step": 17723 + }, + { + "epoch": 0.8302806014896706, + "grad_norm": 0.5394036434025563, + "learning_rate": 3.2945499472388527e-06, + "loss": 0.2997, + "step": 17724 + }, + { + "epoch": 0.830327446479599, + "grad_norm": 0.576054528298345, + "learning_rate": 3.294370127959852e-06, + "loss": 0.3216, + "step": 17725 + }, + { + "epoch": 0.8303742914695273, + "grad_norm": 0.6552283927725088, + "learning_rate": 3.294190304109451e-06, + "loss": 0.324, + "step": 17726 + }, + { + "epoch": 0.8304211364594557, + "grad_norm": 0.6078781124618161, + "learning_rate": 3.294010475688685e-06, + "loss": 0.3361, + "step": 17727 + }, + { + "epoch": 0.8304679814493839, + "grad_norm": 0.6027908365523132, + "learning_rate": 3.2938306426985884e-06, + "loss": 0.3007, + "step": 17728 + }, + { + "epoch": 0.8305148264393123, + "grad_norm": 0.619863286492411, + "learning_rate": 3.2936508051401955e-06, + "loss": 0.3337, + "step": 17729 + }, + { + "epoch": 0.8305616714292406, + "grad_norm": 0.595365374684415, + "learning_rate": 3.2934709630145416e-06, + "loss": 0.3329, + "step": 17730 + }, + { + "epoch": 0.830608516419169, + "grad_norm": 0.5721186357579049, + "learning_rate": 3.2932911163226622e-06, + "loss": 0.3138, + "step": 17731 + }, + { + "epoch": 0.8306553614090973, + "grad_norm": 0.6383058209455609, + "learning_rate": 3.2931112650655917e-06, + "loss": 0.3252, + "step": 17732 + }, + { + "epoch": 0.8307022063990256, + "grad_norm": 0.6563538685632992, + "learning_rate": 3.2929314092443654e-06, + "loss": 0.3332, + "step": 17733 + }, + { + "epoch": 0.8307490513889539, + "grad_norm": 0.6039224762761162, + "learning_rate": 3.292751548860018e-06, + "loss": 0.3372, + "step": 17734 + }, + { + "epoch": 0.8307958963788823, + "grad_norm": 0.5472936527505421, + "learning_rate": 3.2925716839135842e-06, + "loss": 0.3178, + "step": 17735 + }, + { + "epoch": 0.8308427413688106, + "grad_norm": 0.5769059279025363, + "learning_rate": 3.2923918144060995e-06, + "loss": 0.3262, + "step": 17736 + }, + { + "epoch": 0.8308895863587389, + "grad_norm": 0.5664356302438034, + "learning_rate": 3.2922119403385994e-06, + "loss": 0.3166, + "step": 17737 + }, + { + "epoch": 0.8309364313486672, + "grad_norm": 0.6318586980591251, + "learning_rate": 3.2920320617121194e-06, + "loss": 0.3185, + "step": 17738 + }, + { + "epoch": 0.8309832763385956, + "grad_norm": 0.6219226858784768, + "learning_rate": 3.2918521785276936e-06, + "loss": 0.316, + "step": 17739 + }, + { + "epoch": 0.8310301213285239, + "grad_norm": 0.6169498826668104, + "learning_rate": 3.2916722907863564e-06, + "loss": 0.3376, + "step": 17740 + }, + { + "epoch": 0.8310769663184523, + "grad_norm": 0.5858623498555059, + "learning_rate": 3.2914923984891458e-06, + "loss": 0.3059, + "step": 17741 + }, + { + "epoch": 0.8311238113083805, + "grad_norm": 0.6502188323050078, + "learning_rate": 3.2913125016370946e-06, + "loss": 0.3419, + "step": 17742 + }, + { + "epoch": 0.8311706562983089, + "grad_norm": 0.5669114119993567, + "learning_rate": 3.291132600231238e-06, + "loss": 0.3274, + "step": 17743 + }, + { + "epoch": 0.8312175012882372, + "grad_norm": 0.5848446498616559, + "learning_rate": 3.290952694272613e-06, + "loss": 0.3198, + "step": 17744 + }, + { + "epoch": 0.8312643462781656, + "grad_norm": 0.5552909712719561, + "learning_rate": 3.2907727837622542e-06, + "loss": 0.2876, + "step": 17745 + }, + { + "epoch": 0.8313111912680938, + "grad_norm": 0.5911382243275805, + "learning_rate": 3.2905928687011966e-06, + "loss": 0.3026, + "step": 17746 + }, + { + "epoch": 0.8313580362580222, + "grad_norm": 0.5944974398183226, + "learning_rate": 3.2904129490904756e-06, + "loss": 0.3095, + "step": 17747 + }, + { + "epoch": 0.8314048812479505, + "grad_norm": 0.6000583891931928, + "learning_rate": 3.290233024931127e-06, + "loss": 0.3165, + "step": 17748 + }, + { + "epoch": 0.8314517262378789, + "grad_norm": 0.5773078660610838, + "learning_rate": 3.290053096224186e-06, + "loss": 0.3239, + "step": 17749 + }, + { + "epoch": 0.8314985712278072, + "grad_norm": 0.6186183010096417, + "learning_rate": 3.2898731629706878e-06, + "loss": 0.3117, + "step": 17750 + }, + { + "epoch": 0.8315454162177355, + "grad_norm": 0.5886706887948175, + "learning_rate": 3.289693225171668e-06, + "loss": 0.3535, + "step": 17751 + }, + { + "epoch": 0.8315922612076638, + "grad_norm": 0.6175058067243624, + "learning_rate": 3.2895132828281624e-06, + "loss": 0.3214, + "step": 17752 + }, + { + "epoch": 0.8316391061975922, + "grad_norm": 0.563213968331867, + "learning_rate": 3.289333335941206e-06, + "loss": 0.3107, + "step": 17753 + }, + { + "epoch": 0.8316859511875205, + "grad_norm": 0.590141014600238, + "learning_rate": 3.289153384511835e-06, + "loss": 0.3095, + "step": 17754 + }, + { + "epoch": 0.8317327961774488, + "grad_norm": 0.6003752331093963, + "learning_rate": 3.2889734285410846e-06, + "loss": 0.3115, + "step": 17755 + }, + { + "epoch": 0.8317796411673771, + "grad_norm": 0.6121118407164532, + "learning_rate": 3.28879346802999e-06, + "loss": 0.3107, + "step": 17756 + }, + { + "epoch": 0.8318264861573055, + "grad_norm": 0.5773411164212475, + "learning_rate": 3.2886135029795875e-06, + "loss": 0.3092, + "step": 17757 + }, + { + "epoch": 0.8318733311472338, + "grad_norm": 0.5761077875920549, + "learning_rate": 3.288433533390912e-06, + "loss": 0.3457, + "step": 17758 + }, + { + "epoch": 0.8319201761371622, + "grad_norm": 0.5933575918754134, + "learning_rate": 3.2882535592650007e-06, + "loss": 0.323, + "step": 17759 + }, + { + "epoch": 0.8319670211270904, + "grad_norm": 0.5618999666305949, + "learning_rate": 3.2880735806028873e-06, + "loss": 0.3091, + "step": 17760 + }, + { + "epoch": 0.8320138661170188, + "grad_norm": 0.5962665711471543, + "learning_rate": 3.287893597405609e-06, + "loss": 0.3457, + "step": 17761 + }, + { + "epoch": 0.8320607111069471, + "grad_norm": 0.6347483383335661, + "learning_rate": 3.2877136096742014e-06, + "loss": 0.3356, + "step": 17762 + }, + { + "epoch": 0.8321075560968755, + "grad_norm": 0.574996292805131, + "learning_rate": 3.2875336174096994e-06, + "loss": 0.3024, + "step": 17763 + }, + { + "epoch": 0.8321544010868037, + "grad_norm": 0.594913850650328, + "learning_rate": 3.2873536206131396e-06, + "loss": 0.3196, + "step": 17764 + }, + { + "epoch": 0.8322012460767321, + "grad_norm": 0.5698384197161142, + "learning_rate": 3.2871736192855574e-06, + "loss": 0.3129, + "step": 17765 + }, + { + "epoch": 0.8322480910666604, + "grad_norm": 0.5631999352645712, + "learning_rate": 3.286993613427989e-06, + "loss": 0.3219, + "step": 17766 + }, + { + "epoch": 0.8322949360565888, + "grad_norm": 0.5728381225260668, + "learning_rate": 3.286813603041471e-06, + "loss": 0.3147, + "step": 17767 + }, + { + "epoch": 0.8323417810465171, + "grad_norm": 0.5615231152182998, + "learning_rate": 3.2866335881270375e-06, + "loss": 0.2897, + "step": 17768 + }, + { + "epoch": 0.8323886260364454, + "grad_norm": 0.5628953295940946, + "learning_rate": 3.2864535686857256e-06, + "loss": 0.294, + "step": 17769 + }, + { + "epoch": 0.8324354710263737, + "grad_norm": 0.5706112719557221, + "learning_rate": 3.286273544718571e-06, + "loss": 0.3148, + "step": 17770 + }, + { + "epoch": 0.8324823160163021, + "grad_norm": 0.5475446275278891, + "learning_rate": 3.2860935162266096e-06, + "loss": 0.3061, + "step": 17771 + }, + { + "epoch": 0.8325291610062304, + "grad_norm": 0.5358482180449894, + "learning_rate": 3.2859134832108774e-06, + "loss": 0.3127, + "step": 17772 + }, + { + "epoch": 0.8325760059961587, + "grad_norm": 0.558013760489009, + "learning_rate": 3.2857334456724123e-06, + "loss": 0.3025, + "step": 17773 + }, + { + "epoch": 0.832622850986087, + "grad_norm": 0.6329047977537313, + "learning_rate": 3.2855534036122473e-06, + "loss": 0.3133, + "step": 17774 + }, + { + "epoch": 0.8326696959760154, + "grad_norm": 0.5884619144103738, + "learning_rate": 3.2853733570314196e-06, + "loss": 0.3271, + "step": 17775 + }, + { + "epoch": 0.8327165409659437, + "grad_norm": 0.5830863989710741, + "learning_rate": 3.285193305930966e-06, + "loss": 0.3307, + "step": 17776 + }, + { + "epoch": 0.8327633859558721, + "grad_norm": 0.5804446206974224, + "learning_rate": 3.2850132503119227e-06, + "loss": 0.302, + "step": 17777 + }, + { + "epoch": 0.8328102309458003, + "grad_norm": 0.606311772117175, + "learning_rate": 3.284833190175325e-06, + "loss": 0.3149, + "step": 17778 + }, + { + "epoch": 0.8328570759357287, + "grad_norm": 0.5717936245958454, + "learning_rate": 3.2846531255222096e-06, + "loss": 0.3233, + "step": 17779 + }, + { + "epoch": 0.832903920925657, + "grad_norm": 0.5537368579435732, + "learning_rate": 3.284473056353613e-06, + "loss": 0.2983, + "step": 17780 + }, + { + "epoch": 0.8329507659155854, + "grad_norm": 0.607340403345124, + "learning_rate": 3.2842929826705704e-06, + "loss": 0.3371, + "step": 17781 + }, + { + "epoch": 0.8329976109055136, + "grad_norm": 0.6182667556805758, + "learning_rate": 3.2841129044741197e-06, + "loss": 0.3491, + "step": 17782 + }, + { + "epoch": 0.833044455895442, + "grad_norm": 0.6474810286626493, + "learning_rate": 3.283932821765296e-06, + "loss": 0.3385, + "step": 17783 + }, + { + "epoch": 0.8330913008853703, + "grad_norm": 0.5354386273774164, + "learning_rate": 3.283752734545136e-06, + "loss": 0.3008, + "step": 17784 + }, + { + "epoch": 0.8331381458752987, + "grad_norm": 0.5461743539953471, + "learning_rate": 3.2835726428146757e-06, + "loss": 0.3065, + "step": 17785 + }, + { + "epoch": 0.833184990865227, + "grad_norm": 0.7112079172776754, + "learning_rate": 3.2833925465749516e-06, + "loss": 0.315, + "step": 17786 + }, + { + "epoch": 0.8332318358551553, + "grad_norm": 0.6448560114438939, + "learning_rate": 3.283212445827001e-06, + "loss": 0.3421, + "step": 17787 + }, + { + "epoch": 0.8332786808450836, + "grad_norm": 0.5856563474704728, + "learning_rate": 3.2830323405718596e-06, + "loss": 0.3064, + "step": 17788 + }, + { + "epoch": 0.833325525835012, + "grad_norm": 0.588029825358394, + "learning_rate": 3.282852230810563e-06, + "loss": 0.3146, + "step": 17789 + }, + { + "epoch": 0.8333723708249403, + "grad_norm": 0.6370513447932377, + "learning_rate": 3.282672116544149e-06, + "loss": 0.3357, + "step": 17790 + }, + { + "epoch": 0.8334192158148686, + "grad_norm": 0.6444449087899093, + "learning_rate": 3.2824919977736545e-06, + "loss": 0.3284, + "step": 17791 + }, + { + "epoch": 0.8334660608047969, + "grad_norm": 0.6119005942105509, + "learning_rate": 3.282311874500114e-06, + "loss": 0.3029, + "step": 17792 + }, + { + "epoch": 0.8335129057947253, + "grad_norm": 0.5946086979960045, + "learning_rate": 3.282131746724566e-06, + "loss": 0.3083, + "step": 17793 + }, + { + "epoch": 0.8335597507846536, + "grad_norm": 0.5771779214461903, + "learning_rate": 3.2819516144480467e-06, + "loss": 0.3111, + "step": 17794 + }, + { + "epoch": 0.833606595774582, + "grad_norm": 0.6237821684382979, + "learning_rate": 3.2817714776715915e-06, + "loss": 0.3303, + "step": 17795 + }, + { + "epoch": 0.8336534407645102, + "grad_norm": 0.5827245154052035, + "learning_rate": 3.281591336396238e-06, + "loss": 0.3302, + "step": 17796 + }, + { + "epoch": 0.8337002857544386, + "grad_norm": 0.6123891283555716, + "learning_rate": 3.281411190623024e-06, + "loss": 0.3224, + "step": 17797 + }, + { + "epoch": 0.8337471307443669, + "grad_norm": 0.5695770584676885, + "learning_rate": 3.281231040352984e-06, + "loss": 0.2979, + "step": 17798 + }, + { + "epoch": 0.8337939757342953, + "grad_norm": 0.5655657625598419, + "learning_rate": 3.281050885587156e-06, + "loss": 0.3042, + "step": 17799 + }, + { + "epoch": 0.8338408207242235, + "grad_norm": 0.6031001454322202, + "learning_rate": 3.2808707263265758e-06, + "loss": 0.3206, + "step": 17800 + }, + { + "epoch": 0.8338876657141518, + "grad_norm": 0.5840766627806825, + "learning_rate": 3.2806905625722818e-06, + "loss": 0.3191, + "step": 17801 + }, + { + "epoch": 0.8339345107040802, + "grad_norm": 0.5778363775476049, + "learning_rate": 3.2805103943253093e-06, + "loss": 0.3157, + "step": 17802 + }, + { + "epoch": 0.8339813556940086, + "grad_norm": 0.5276476846874693, + "learning_rate": 3.280330221586696e-06, + "loss": 0.2955, + "step": 17803 + }, + { + "epoch": 0.8340282006839369, + "grad_norm": 0.5773056800900974, + "learning_rate": 3.2801500443574784e-06, + "loss": 0.2981, + "step": 17804 + }, + { + "epoch": 0.8340750456738651, + "grad_norm": 0.5832669937688117, + "learning_rate": 3.2799698626386926e-06, + "loss": 0.3233, + "step": 17805 + }, + { + "epoch": 0.8341218906637935, + "grad_norm": 0.5750867229403691, + "learning_rate": 3.279789676431377e-06, + "loss": 0.3171, + "step": 17806 + }, + { + "epoch": 0.8341687356537218, + "grad_norm": 0.630028373669014, + "learning_rate": 3.2796094857365675e-06, + "loss": 0.3426, + "step": 17807 + }, + { + "epoch": 0.8342155806436502, + "grad_norm": 0.5977330480836396, + "learning_rate": 3.2794292905553017e-06, + "loss": 0.3009, + "step": 17808 + }, + { + "epoch": 0.8342624256335784, + "grad_norm": 0.6306969118891292, + "learning_rate": 3.2792490908886158e-06, + "loss": 0.3304, + "step": 17809 + }, + { + "epoch": 0.8343092706235068, + "grad_norm": 0.6563674296170918, + "learning_rate": 3.2790688867375477e-06, + "loss": 0.3465, + "step": 17810 + }, + { + "epoch": 0.8343561156134351, + "grad_norm": 0.5944402958610506, + "learning_rate": 3.2788886781031337e-06, + "loss": 0.337, + "step": 17811 + }, + { + "epoch": 0.8344029606033635, + "grad_norm": 0.5667454805643557, + "learning_rate": 3.2787084649864116e-06, + "loss": 0.3063, + "step": 17812 + }, + { + "epoch": 0.8344498055932918, + "grad_norm": 0.6424089252677126, + "learning_rate": 3.2785282473884172e-06, + "loss": 0.3294, + "step": 17813 + }, + { + "epoch": 0.8344966505832201, + "grad_norm": 0.5729764027774534, + "learning_rate": 3.2783480253101886e-06, + "loss": 0.3115, + "step": 17814 + }, + { + "epoch": 0.8345434955731484, + "grad_norm": 0.6206893366135716, + "learning_rate": 3.2781677987527634e-06, + "loss": 0.3273, + "step": 17815 + }, + { + "epoch": 0.8345903405630768, + "grad_norm": 0.5539794112264438, + "learning_rate": 3.277987567717177e-06, + "loss": 0.2953, + "step": 17816 + }, + { + "epoch": 0.8346371855530051, + "grad_norm": 0.5424702865033996, + "learning_rate": 3.277807332204468e-06, + "loss": 0.3043, + "step": 17817 + }, + { + "epoch": 0.8346840305429334, + "grad_norm": 0.5864861458463687, + "learning_rate": 3.277627092215674e-06, + "loss": 0.3185, + "step": 17818 + }, + { + "epoch": 0.8347308755328617, + "grad_norm": 0.5378529480274921, + "learning_rate": 3.277446847751831e-06, + "loss": 0.2877, + "step": 17819 + }, + { + "epoch": 0.8347777205227901, + "grad_norm": 0.6015384221248421, + "learning_rate": 3.2772665988139768e-06, + "loss": 0.3145, + "step": 17820 + }, + { + "epoch": 0.8348245655127184, + "grad_norm": 0.585047912794426, + "learning_rate": 3.2770863454031486e-06, + "loss": 0.3148, + "step": 17821 + }, + { + "epoch": 0.8348714105026468, + "grad_norm": 0.5741087636703429, + "learning_rate": 3.2769060875203845e-06, + "loss": 0.3027, + "step": 17822 + }, + { + "epoch": 0.834918255492575, + "grad_norm": 0.5745856242363564, + "learning_rate": 3.2767258251667205e-06, + "loss": 0.3, + "step": 17823 + }, + { + "epoch": 0.8349651004825034, + "grad_norm": 0.5700763235184672, + "learning_rate": 3.276545558343195e-06, + "loss": 0.2951, + "step": 17824 + }, + { + "epoch": 0.8350119454724317, + "grad_norm": 0.628265273905923, + "learning_rate": 3.276365287050845e-06, + "loss": 0.3479, + "step": 17825 + }, + { + "epoch": 0.8350587904623601, + "grad_norm": 0.6079830045216551, + "learning_rate": 3.2761850112907072e-06, + "loss": 0.3204, + "step": 17826 + }, + { + "epoch": 0.8351056354522883, + "grad_norm": 0.5906656707642493, + "learning_rate": 3.27600473106382e-06, + "loss": 0.3205, + "step": 17827 + }, + { + "epoch": 0.8351524804422167, + "grad_norm": 0.5404668533407612, + "learning_rate": 3.2758244463712207e-06, + "loss": 0.3157, + "step": 17828 + }, + { + "epoch": 0.835199325432145, + "grad_norm": 0.5767346314343473, + "learning_rate": 3.2756441572139474e-06, + "loss": 0.3329, + "step": 17829 + }, + { + "epoch": 0.8352461704220734, + "grad_norm": 0.5934608674188248, + "learning_rate": 3.275463863593036e-06, + "loss": 0.3166, + "step": 17830 + }, + { + "epoch": 0.8352930154120017, + "grad_norm": 0.572788833363502, + "learning_rate": 3.275283565509525e-06, + "loss": 0.3271, + "step": 17831 + }, + { + "epoch": 0.83533986040193, + "grad_norm": 0.5789598148293167, + "learning_rate": 3.275103262964453e-06, + "loss": 0.3127, + "step": 17832 + }, + { + "epoch": 0.8353867053918583, + "grad_norm": 0.6096536978419775, + "learning_rate": 3.2749229559588554e-06, + "loss": 0.3061, + "step": 17833 + }, + { + "epoch": 0.8354335503817867, + "grad_norm": 0.5685454507895772, + "learning_rate": 3.2747426444937714e-06, + "loss": 0.3157, + "step": 17834 + }, + { + "epoch": 0.835480395371715, + "grad_norm": 0.5850980416342738, + "learning_rate": 3.2745623285702375e-06, + "loss": 0.303, + "step": 17835 + }, + { + "epoch": 0.8355272403616433, + "grad_norm": 0.5992234644407274, + "learning_rate": 3.274382008189293e-06, + "loss": 0.3028, + "step": 17836 + }, + { + "epoch": 0.8355740853515716, + "grad_norm": 0.6700879205260716, + "learning_rate": 3.2742016833519746e-06, + "loss": 0.3219, + "step": 17837 + }, + { + "epoch": 0.8356209303415, + "grad_norm": 0.5890541940629529, + "learning_rate": 3.2740213540593195e-06, + "loss": 0.2971, + "step": 17838 + }, + { + "epoch": 0.8356677753314283, + "grad_norm": 0.5692125891421507, + "learning_rate": 3.273841020312367e-06, + "loss": 0.3129, + "step": 17839 + }, + { + "epoch": 0.8357146203213567, + "grad_norm": 0.6311730733377274, + "learning_rate": 3.2736606821121535e-06, + "loss": 0.3235, + "step": 17840 + }, + { + "epoch": 0.8357614653112849, + "grad_norm": 0.5905451971367637, + "learning_rate": 3.273480339459717e-06, + "loss": 0.3124, + "step": 17841 + }, + { + "epoch": 0.8358083103012133, + "grad_norm": 0.5858025106848578, + "learning_rate": 3.273299992356095e-06, + "loss": 0.3115, + "step": 17842 + }, + { + "epoch": 0.8358551552911416, + "grad_norm": 0.5953576235459124, + "learning_rate": 3.2731196408023275e-06, + "loss": 0.3125, + "step": 17843 + }, + { + "epoch": 0.83590200028107, + "grad_norm": 0.6051397748997019, + "learning_rate": 3.2729392847994494e-06, + "loss": 0.3166, + "step": 17844 + }, + { + "epoch": 0.8359488452709982, + "grad_norm": 0.6771122930185068, + "learning_rate": 3.2727589243485e-06, + "loss": 0.3362, + "step": 17845 + }, + { + "epoch": 0.8359956902609266, + "grad_norm": 0.6915414944379801, + "learning_rate": 3.272578559450518e-06, + "loss": 0.319, + "step": 17846 + }, + { + "epoch": 0.8360425352508549, + "grad_norm": 0.6203580801712405, + "learning_rate": 3.2723981901065404e-06, + "loss": 0.3047, + "step": 17847 + }, + { + "epoch": 0.8360893802407833, + "grad_norm": 0.7096436940476677, + "learning_rate": 3.2722178163176048e-06, + "loss": 0.335, + "step": 17848 + }, + { + "epoch": 0.8361362252307116, + "grad_norm": 0.5812872855948975, + "learning_rate": 3.27203743808475e-06, + "loss": 0.308, + "step": 17849 + }, + { + "epoch": 0.8361830702206399, + "grad_norm": 0.5857614047437789, + "learning_rate": 3.2718570554090146e-06, + "loss": 0.3037, + "step": 17850 + }, + { + "epoch": 0.8362299152105682, + "grad_norm": 0.5754635629965774, + "learning_rate": 3.271676668291435e-06, + "loss": 0.3104, + "step": 17851 + }, + { + "epoch": 0.8362767602004966, + "grad_norm": 0.5882492310977088, + "learning_rate": 3.2714962767330507e-06, + "loss": 0.2899, + "step": 17852 + }, + { + "epoch": 0.8363236051904249, + "grad_norm": 0.5801285057210002, + "learning_rate": 3.271315880734899e-06, + "loss": 0.3079, + "step": 17853 + }, + { + "epoch": 0.8363704501803532, + "grad_norm": 0.5781612777535866, + "learning_rate": 3.2711354802980174e-06, + "loss": 0.3041, + "step": 17854 + }, + { + "epoch": 0.8364172951702815, + "grad_norm": 0.6368460683178253, + "learning_rate": 3.270955075423445e-06, + "loss": 0.3252, + "step": 17855 + }, + { + "epoch": 0.8364641401602099, + "grad_norm": 0.6419344344119288, + "learning_rate": 3.2707746661122207e-06, + "loss": 0.3505, + "step": 17856 + }, + { + "epoch": 0.8365109851501382, + "grad_norm": 0.5632801716881468, + "learning_rate": 3.270594252365382e-06, + "loss": 0.3218, + "step": 17857 + }, + { + "epoch": 0.8365578301400666, + "grad_norm": 0.5734913441403301, + "learning_rate": 3.270413834183967e-06, + "loss": 0.305, + "step": 17858 + }, + { + "epoch": 0.8366046751299948, + "grad_norm": 0.5917703940149427, + "learning_rate": 3.2702334115690137e-06, + "loss": 0.3089, + "step": 17859 + }, + { + "epoch": 0.8366515201199232, + "grad_norm": 0.554687509043847, + "learning_rate": 3.2700529845215613e-06, + "loss": 0.3161, + "step": 17860 + }, + { + "epoch": 0.8366983651098515, + "grad_norm": 0.5810373394157189, + "learning_rate": 3.269872553042647e-06, + "loss": 0.2987, + "step": 17861 + }, + { + "epoch": 0.8367452100997799, + "grad_norm": 0.5939314441033166, + "learning_rate": 3.2696921171333098e-06, + "loss": 0.317, + "step": 17862 + }, + { + "epoch": 0.8367920550897081, + "grad_norm": 0.6573165134230521, + "learning_rate": 3.2695116767945874e-06, + "loss": 0.3198, + "step": 17863 + }, + { + "epoch": 0.8368389000796365, + "grad_norm": 0.5696282367707174, + "learning_rate": 3.2693312320275197e-06, + "loss": 0.3081, + "step": 17864 + }, + { + "epoch": 0.8368857450695648, + "grad_norm": 0.564544324568751, + "learning_rate": 3.2691507828331432e-06, + "loss": 0.3232, + "step": 17865 + }, + { + "epoch": 0.8369325900594932, + "grad_norm": 0.5949565465006363, + "learning_rate": 3.2689703292124975e-06, + "loss": 0.3215, + "step": 17866 + }, + { + "epoch": 0.8369794350494215, + "grad_norm": 0.5864143217521264, + "learning_rate": 3.268789871166621e-06, + "loss": 0.3252, + "step": 17867 + }, + { + "epoch": 0.8370262800393498, + "grad_norm": 0.6452241101524553, + "learning_rate": 3.2686094086965518e-06, + "loss": 0.3408, + "step": 17868 + }, + { + "epoch": 0.8370731250292781, + "grad_norm": 0.6072296433752834, + "learning_rate": 3.2684289418033288e-06, + "loss": 0.3339, + "step": 17869 + }, + { + "epoch": 0.8371199700192065, + "grad_norm": 0.5817601035523315, + "learning_rate": 3.2682484704879907e-06, + "loss": 0.307, + "step": 17870 + }, + { + "epoch": 0.8371668150091348, + "grad_norm": 0.6151916191544229, + "learning_rate": 3.268067994751575e-06, + "loss": 0.3143, + "step": 17871 + }, + { + "epoch": 0.837213659999063, + "grad_norm": 0.5855321716202982, + "learning_rate": 3.267887514595122e-06, + "loss": 0.2977, + "step": 17872 + }, + { + "epoch": 0.8372605049889914, + "grad_norm": 0.6220811322332714, + "learning_rate": 3.267707030019669e-06, + "loss": 0.318, + "step": 17873 + }, + { + "epoch": 0.8373073499789198, + "grad_norm": 0.5455931856099694, + "learning_rate": 3.2675265410262547e-06, + "loss": 0.2877, + "step": 17874 + }, + { + "epoch": 0.8373541949688481, + "grad_norm": 0.5794444696274608, + "learning_rate": 3.267346047615918e-06, + "loss": 0.3225, + "step": 17875 + }, + { + "epoch": 0.8374010399587765, + "grad_norm": 0.6500974051308396, + "learning_rate": 3.2671655497896982e-06, + "loss": 0.3226, + "step": 17876 + }, + { + "epoch": 0.8374478849487047, + "grad_norm": 0.6331917017462764, + "learning_rate": 3.2669850475486324e-06, + "loss": 0.3349, + "step": 17877 + }, + { + "epoch": 0.837494729938633, + "grad_norm": 0.5993232971895649, + "learning_rate": 3.2668045408937614e-06, + "loss": 0.3128, + "step": 17878 + }, + { + "epoch": 0.8375415749285614, + "grad_norm": 0.5704026340303099, + "learning_rate": 3.266624029826123e-06, + "loss": 0.3137, + "step": 17879 + }, + { + "epoch": 0.8375884199184898, + "grad_norm": 0.5726947123495398, + "learning_rate": 3.2664435143467552e-06, + "loss": 0.3205, + "step": 17880 + }, + { + "epoch": 0.837635264908418, + "grad_norm": 0.5649662104504999, + "learning_rate": 3.2662629944566988e-06, + "loss": 0.3213, + "step": 17881 + }, + { + "epoch": 0.8376821098983463, + "grad_norm": 0.576866829177397, + "learning_rate": 3.266082470156991e-06, + "loss": 0.3156, + "step": 17882 + }, + { + "epoch": 0.8377289548882747, + "grad_norm": 0.6450315988515675, + "learning_rate": 3.2659019414486705e-06, + "loss": 0.3285, + "step": 17883 + }, + { + "epoch": 0.837775799878203, + "grad_norm": 0.5920528938402577, + "learning_rate": 3.265721408332777e-06, + "loss": 0.315, + "step": 17884 + }, + { + "epoch": 0.8378226448681314, + "grad_norm": 0.6167473776293582, + "learning_rate": 3.26554087081035e-06, + "loss": 0.3207, + "step": 17885 + }, + { + "epoch": 0.8378694898580596, + "grad_norm": 0.5845680223168109, + "learning_rate": 3.2653603288824277e-06, + "loss": 0.2885, + "step": 17886 + }, + { + "epoch": 0.837916334847988, + "grad_norm": 0.5888405923950848, + "learning_rate": 3.2651797825500473e-06, + "loss": 0.3301, + "step": 17887 + }, + { + "epoch": 0.8379631798379163, + "grad_norm": 0.5544003491584891, + "learning_rate": 3.2649992318142514e-06, + "loss": 0.3033, + "step": 17888 + }, + { + "epoch": 0.8380100248278447, + "grad_norm": 0.5863945627285524, + "learning_rate": 3.264818676676077e-06, + "loss": 0.3214, + "step": 17889 + }, + { + "epoch": 0.8380568698177729, + "grad_norm": 0.5911575208769058, + "learning_rate": 3.2646381171365626e-06, + "loss": 0.3138, + "step": 17890 + }, + { + "epoch": 0.8381037148077013, + "grad_norm": 0.5901056773520867, + "learning_rate": 3.2644575531967487e-06, + "loss": 0.3082, + "step": 17891 + }, + { + "epoch": 0.8381505597976296, + "grad_norm": 0.5612386750568102, + "learning_rate": 3.264276984857674e-06, + "loss": 0.2946, + "step": 17892 + }, + { + "epoch": 0.838197404787558, + "grad_norm": 0.578764941978358, + "learning_rate": 3.264096412120377e-06, + "loss": 0.3156, + "step": 17893 + }, + { + "epoch": 0.8382442497774863, + "grad_norm": 0.5544671331677246, + "learning_rate": 3.263915834985896e-06, + "loss": 0.3022, + "step": 17894 + }, + { + "epoch": 0.8382910947674146, + "grad_norm": 0.5657492891642456, + "learning_rate": 3.263735253455273e-06, + "loss": 0.304, + "step": 17895 + }, + { + "epoch": 0.8383379397573429, + "grad_norm": 0.5876454565862218, + "learning_rate": 3.263554667529545e-06, + "loss": 0.306, + "step": 17896 + }, + { + "epoch": 0.8383847847472713, + "grad_norm": 0.6514550508598274, + "learning_rate": 3.263374077209751e-06, + "loss": 0.3431, + "step": 17897 + }, + { + "epoch": 0.8384316297371996, + "grad_norm": 0.5664573389272596, + "learning_rate": 3.2631934824969324e-06, + "loss": 0.2987, + "step": 17898 + }, + { + "epoch": 0.8384784747271279, + "grad_norm": 0.6178620170947303, + "learning_rate": 3.2630128833921265e-06, + "loss": 0.3263, + "step": 17899 + }, + { + "epoch": 0.8385253197170562, + "grad_norm": 0.6490516380365563, + "learning_rate": 3.262832279896373e-06, + "loss": 0.3428, + "step": 17900 + }, + { + "epoch": 0.8385721647069846, + "grad_norm": 0.6343669303452153, + "learning_rate": 3.262651672010712e-06, + "loss": 0.3298, + "step": 17901 + }, + { + "epoch": 0.8386190096969129, + "grad_norm": 0.5603672466616503, + "learning_rate": 3.262471059736182e-06, + "loss": 0.3175, + "step": 17902 + }, + { + "epoch": 0.8386658546868413, + "grad_norm": 0.6262153389660605, + "learning_rate": 3.262290443073823e-06, + "loss": 0.3242, + "step": 17903 + }, + { + "epoch": 0.8387126996767695, + "grad_norm": 0.570803024152548, + "learning_rate": 3.262109822024674e-06, + "loss": 0.3084, + "step": 17904 + }, + { + "epoch": 0.8387595446666979, + "grad_norm": 0.6479662078807084, + "learning_rate": 3.261929196589774e-06, + "loss": 0.3165, + "step": 17905 + }, + { + "epoch": 0.8388063896566262, + "grad_norm": 0.5993200192730248, + "learning_rate": 3.2617485667701633e-06, + "loss": 0.3184, + "step": 17906 + }, + { + "epoch": 0.8388532346465546, + "grad_norm": 0.6017378594994927, + "learning_rate": 3.2615679325668814e-06, + "loss": 0.309, + "step": 17907 + }, + { + "epoch": 0.8389000796364828, + "grad_norm": 0.5458366370002365, + "learning_rate": 3.2613872939809664e-06, + "loss": 0.3096, + "step": 17908 + }, + { + "epoch": 0.8389469246264112, + "grad_norm": 0.5987106484201543, + "learning_rate": 3.26120665101346e-06, + "loss": 0.3254, + "step": 17909 + }, + { + "epoch": 0.8389937696163395, + "grad_norm": 0.6062495648012683, + "learning_rate": 3.2610260036654005e-06, + "loss": 0.3305, + "step": 17910 + }, + { + "epoch": 0.8390406146062679, + "grad_norm": 0.5853572607513339, + "learning_rate": 3.260845351937827e-06, + "loss": 0.3125, + "step": 17911 + }, + { + "epoch": 0.8390874595961962, + "grad_norm": 0.5835234484952172, + "learning_rate": 3.26066469583178e-06, + "loss": 0.3216, + "step": 17912 + }, + { + "epoch": 0.8391343045861245, + "grad_norm": 0.6432594286471296, + "learning_rate": 3.2604840353482997e-06, + "loss": 0.3463, + "step": 17913 + }, + { + "epoch": 0.8391811495760528, + "grad_norm": 0.5732891418809507, + "learning_rate": 3.260303370488424e-06, + "loss": 0.31, + "step": 17914 + }, + { + "epoch": 0.8392279945659812, + "grad_norm": 0.6102759975162445, + "learning_rate": 3.2601227012531934e-06, + "loss": 0.3099, + "step": 17915 + }, + { + "epoch": 0.8392748395559095, + "grad_norm": 0.5490448839126675, + "learning_rate": 3.2599420276436485e-06, + "loss": 0.3186, + "step": 17916 + }, + { + "epoch": 0.8393216845458378, + "grad_norm": 0.5939286279533845, + "learning_rate": 3.2597613496608276e-06, + "loss": 0.3046, + "step": 17917 + }, + { + "epoch": 0.8393685295357661, + "grad_norm": 0.585267000744627, + "learning_rate": 3.259580667305771e-06, + "loss": 0.3247, + "step": 17918 + }, + { + "epoch": 0.8394153745256945, + "grad_norm": 0.5887766678995184, + "learning_rate": 3.259399980579519e-06, + "loss": 0.3572, + "step": 17919 + }, + { + "epoch": 0.8394622195156228, + "grad_norm": 0.5911760012787269, + "learning_rate": 3.259219289483111e-06, + "loss": 0.3277, + "step": 17920 + }, + { + "epoch": 0.8395090645055512, + "grad_norm": 0.5611234737100684, + "learning_rate": 3.259038594017586e-06, + "loss": 0.3217, + "step": 17921 + }, + { + "epoch": 0.8395559094954794, + "grad_norm": 0.6121536578607083, + "learning_rate": 3.2588578941839855e-06, + "loss": 0.3316, + "step": 17922 + }, + { + "epoch": 0.8396027544854078, + "grad_norm": 0.6259690285041292, + "learning_rate": 3.2586771899833485e-06, + "loss": 0.335, + "step": 17923 + }, + { + "epoch": 0.8396495994753361, + "grad_norm": 0.5502540415484223, + "learning_rate": 3.258496481416715e-06, + "loss": 0.3046, + "step": 17924 + }, + { + "epoch": 0.8396964444652645, + "grad_norm": 0.5498206850695965, + "learning_rate": 3.258315768485125e-06, + "loss": 0.3239, + "step": 17925 + }, + { + "epoch": 0.8397432894551927, + "grad_norm": 0.5581141501601424, + "learning_rate": 3.258135051189617e-06, + "loss": 0.3187, + "step": 17926 + }, + { + "epoch": 0.8397901344451211, + "grad_norm": 0.6024412335545467, + "learning_rate": 3.257954329531234e-06, + "loss": 0.3395, + "step": 17927 + }, + { + "epoch": 0.8398369794350494, + "grad_norm": 0.5532389987588706, + "learning_rate": 3.257773603511014e-06, + "loss": 0.3059, + "step": 17928 + }, + { + "epoch": 0.8398838244249778, + "grad_norm": 0.5650441589878975, + "learning_rate": 3.2575928731299965e-06, + "loss": 0.3043, + "step": 17929 + }, + { + "epoch": 0.8399306694149061, + "grad_norm": 0.5683826771356268, + "learning_rate": 3.257412138389223e-06, + "loss": 0.3099, + "step": 17930 + }, + { + "epoch": 0.8399775144048344, + "grad_norm": 0.5568194961198708, + "learning_rate": 3.2572313992897335e-06, + "loss": 0.3077, + "step": 17931 + }, + { + "epoch": 0.8400243593947627, + "grad_norm": 0.5827169471297866, + "learning_rate": 3.2570506558325664e-06, + "loss": 0.301, + "step": 17932 + }, + { + "epoch": 0.8400712043846911, + "grad_norm": 0.6030854317262843, + "learning_rate": 3.2568699080187637e-06, + "loss": 0.3114, + "step": 17933 + }, + { + "epoch": 0.8401180493746194, + "grad_norm": 0.5635626679900977, + "learning_rate": 3.256689155849366e-06, + "loss": 0.3002, + "step": 17934 + }, + { + "epoch": 0.8401648943645477, + "grad_norm": 0.5503724263342976, + "learning_rate": 3.256508399325411e-06, + "loss": 0.3088, + "step": 17935 + }, + { + "epoch": 0.840211739354476, + "grad_norm": 0.5697204961437649, + "learning_rate": 3.2563276384479404e-06, + "loss": 0.3199, + "step": 17936 + }, + { + "epoch": 0.8402585843444044, + "grad_norm": 0.6848897298605731, + "learning_rate": 3.2561468732179947e-06, + "loss": 0.3383, + "step": 17937 + }, + { + "epoch": 0.8403054293343327, + "grad_norm": 0.5930545529235073, + "learning_rate": 3.255966103636614e-06, + "loss": 0.3275, + "step": 17938 + }, + { + "epoch": 0.8403522743242611, + "grad_norm": 0.5851874880245603, + "learning_rate": 3.255785329704838e-06, + "loss": 0.3072, + "step": 17939 + }, + { + "epoch": 0.8403991193141893, + "grad_norm": 0.6529299341304428, + "learning_rate": 3.2556045514237074e-06, + "loss": 0.3381, + "step": 17940 + }, + { + "epoch": 0.8404459643041177, + "grad_norm": 0.5807529029832091, + "learning_rate": 3.255423768794263e-06, + "loss": 0.2897, + "step": 17941 + }, + { + "epoch": 0.840492809294046, + "grad_norm": 0.6194611070790654, + "learning_rate": 3.2552429818175436e-06, + "loss": 0.3266, + "step": 17942 + }, + { + "epoch": 0.8405396542839744, + "grad_norm": 0.5521086321941551, + "learning_rate": 3.2550621904945917e-06, + "loss": 0.3076, + "step": 17943 + }, + { + "epoch": 0.8405864992739026, + "grad_norm": 0.5700170213734503, + "learning_rate": 3.2548813948264467e-06, + "loss": 0.3003, + "step": 17944 + }, + { + "epoch": 0.840633344263831, + "grad_norm": 0.5959126165741242, + "learning_rate": 3.254700594814148e-06, + "loss": 0.3218, + "step": 17945 + }, + { + "epoch": 0.8406801892537593, + "grad_norm": 0.5737091203429496, + "learning_rate": 3.2545197904587377e-06, + "loss": 0.3054, + "step": 17946 + }, + { + "epoch": 0.8407270342436877, + "grad_norm": 0.5901298958294517, + "learning_rate": 3.254338981761256e-06, + "loss": 0.3169, + "step": 17947 + }, + { + "epoch": 0.840773879233616, + "grad_norm": 0.5678312566044477, + "learning_rate": 3.2541581687227423e-06, + "loss": 0.3012, + "step": 17948 + }, + { + "epoch": 0.8408207242235443, + "grad_norm": 0.6332456362511069, + "learning_rate": 3.2539773513442386e-06, + "loss": 0.326, + "step": 17949 + }, + { + "epoch": 0.8408675692134726, + "grad_norm": 0.5905834999846326, + "learning_rate": 3.253796529626784e-06, + "loss": 0.3219, + "step": 17950 + }, + { + "epoch": 0.840914414203401, + "grad_norm": 0.6559254351048645, + "learning_rate": 3.253615703571421e-06, + "loss": 0.3288, + "step": 17951 + }, + { + "epoch": 0.8409612591933293, + "grad_norm": 0.6003401954208818, + "learning_rate": 3.2534348731791884e-06, + "loss": 0.3335, + "step": 17952 + }, + { + "epoch": 0.8410081041832576, + "grad_norm": 0.6189344102449441, + "learning_rate": 3.253254038451127e-06, + "loss": 0.3219, + "step": 17953 + }, + { + "epoch": 0.8410549491731859, + "grad_norm": 0.5455627525308197, + "learning_rate": 3.253073199388278e-06, + "loss": 0.2819, + "step": 17954 + }, + { + "epoch": 0.8411017941631143, + "grad_norm": 0.6010241525610124, + "learning_rate": 3.252892355991683e-06, + "loss": 0.3289, + "step": 17955 + }, + { + "epoch": 0.8411486391530426, + "grad_norm": 0.6140317293584775, + "learning_rate": 3.2527115082623808e-06, + "loss": 0.3241, + "step": 17956 + }, + { + "epoch": 0.841195484142971, + "grad_norm": 0.5818065526017517, + "learning_rate": 3.252530656201413e-06, + "loss": 0.3112, + "step": 17957 + }, + { + "epoch": 0.8412423291328992, + "grad_norm": 0.653811219693046, + "learning_rate": 3.2523497998098208e-06, + "loss": 0.3227, + "step": 17958 + }, + { + "epoch": 0.8412891741228276, + "grad_norm": 0.5372748247924892, + "learning_rate": 3.2521689390886446e-06, + "loss": 0.2924, + "step": 17959 + }, + { + "epoch": 0.8413360191127559, + "grad_norm": 0.598190363088198, + "learning_rate": 3.2519880740389247e-06, + "loss": 0.3026, + "step": 17960 + }, + { + "epoch": 0.8413828641026843, + "grad_norm": 0.614876539971832, + "learning_rate": 3.2518072046617032e-06, + "loss": 0.3334, + "step": 17961 + }, + { + "epoch": 0.8414297090926125, + "grad_norm": 0.5820181652032826, + "learning_rate": 3.2516263309580205e-06, + "loss": 0.3119, + "step": 17962 + }, + { + "epoch": 0.8414765540825409, + "grad_norm": 0.6143554591229746, + "learning_rate": 3.2514454529289157e-06, + "loss": 0.3182, + "step": 17963 + }, + { + "epoch": 0.8415233990724692, + "grad_norm": 0.5724357671025853, + "learning_rate": 3.251264570575432e-06, + "loss": 0.3075, + "step": 17964 + }, + { + "epoch": 0.8415702440623976, + "grad_norm": 0.607004445117515, + "learning_rate": 3.2510836838986104e-06, + "loss": 0.3203, + "step": 17965 + }, + { + "epoch": 0.8416170890523259, + "grad_norm": 0.5914042299201293, + "learning_rate": 3.2509027928994897e-06, + "loss": 0.3109, + "step": 17966 + }, + { + "epoch": 0.8416639340422541, + "grad_norm": 0.5469472902513314, + "learning_rate": 3.250721897579112e-06, + "loss": 0.3263, + "step": 17967 + }, + { + "epoch": 0.8417107790321825, + "grad_norm": 0.5952109055846843, + "learning_rate": 3.2505409979385193e-06, + "loss": 0.3173, + "step": 17968 + }, + { + "epoch": 0.8417576240221109, + "grad_norm": 0.614850877066332, + "learning_rate": 3.2503600939787515e-06, + "loss": 0.3221, + "step": 17969 + }, + { + "epoch": 0.8418044690120392, + "grad_norm": 0.5979440303608118, + "learning_rate": 3.2501791857008503e-06, + "loss": 0.3303, + "step": 17970 + }, + { + "epoch": 0.8418513140019674, + "grad_norm": 0.5881333762307658, + "learning_rate": 3.249998273105856e-06, + "loss": 0.3175, + "step": 17971 + }, + { + "epoch": 0.8418981589918958, + "grad_norm": 0.6020856303252703, + "learning_rate": 3.2498173561948104e-06, + "loss": 0.308, + "step": 17972 + }, + { + "epoch": 0.8419450039818241, + "grad_norm": 0.624633837447461, + "learning_rate": 3.249636434968754e-06, + "loss": 0.3029, + "step": 17973 + }, + { + "epoch": 0.8419918489717525, + "grad_norm": 0.6071594697185134, + "learning_rate": 3.249455509428729e-06, + "loss": 0.3179, + "step": 17974 + }, + { + "epoch": 0.8420386939616809, + "grad_norm": 0.5880590986743452, + "learning_rate": 3.249274579575775e-06, + "loss": 0.3198, + "step": 17975 + }, + { + "epoch": 0.8420855389516091, + "grad_norm": 0.5786129118798365, + "learning_rate": 3.249093645410935e-06, + "loss": 0.3213, + "step": 17976 + }, + { + "epoch": 0.8421323839415374, + "grad_norm": 0.6853894939703504, + "learning_rate": 3.248912706935249e-06, + "loss": 0.335, + "step": 17977 + }, + { + "epoch": 0.8421792289314658, + "grad_norm": 0.5773913832610453, + "learning_rate": 3.2487317641497583e-06, + "loss": 0.3188, + "step": 17978 + }, + { + "epoch": 0.8422260739213941, + "grad_norm": 0.6291529735495363, + "learning_rate": 3.2485508170555047e-06, + "loss": 0.3521, + "step": 17979 + }, + { + "epoch": 0.8422729189113224, + "grad_norm": 0.5831664466426382, + "learning_rate": 3.24836986565353e-06, + "loss": 0.3242, + "step": 17980 + }, + { + "epoch": 0.8423197639012507, + "grad_norm": 0.6506884347903794, + "learning_rate": 3.2481889099448737e-06, + "loss": 0.3361, + "step": 17981 + }, + { + "epoch": 0.8423666088911791, + "grad_norm": 0.6026162776004348, + "learning_rate": 3.2480079499305784e-06, + "loss": 0.3091, + "step": 17982 + }, + { + "epoch": 0.8424134538811074, + "grad_norm": 0.6242622947263726, + "learning_rate": 3.247826985611686e-06, + "loss": 0.323, + "step": 17983 + }, + { + "epoch": 0.8424602988710358, + "grad_norm": 0.567154309589139, + "learning_rate": 3.247646016989237e-06, + "loss": 0.3148, + "step": 17984 + }, + { + "epoch": 0.842507143860964, + "grad_norm": 0.6088631117527312, + "learning_rate": 3.247465044064273e-06, + "loss": 0.3465, + "step": 17985 + }, + { + "epoch": 0.8425539888508924, + "grad_norm": 0.5611651339116978, + "learning_rate": 3.2472840668378357e-06, + "loss": 0.3194, + "step": 17986 + }, + { + "epoch": 0.8426008338408207, + "grad_norm": 0.6380403310116932, + "learning_rate": 3.2471030853109664e-06, + "loss": 0.3292, + "step": 17987 + }, + { + "epoch": 0.8426476788307491, + "grad_norm": 0.5721740151738782, + "learning_rate": 3.246922099484706e-06, + "loss": 0.304, + "step": 17988 + }, + { + "epoch": 0.8426945238206773, + "grad_norm": 0.6008150379098888, + "learning_rate": 3.2467411093600975e-06, + "loss": 0.3145, + "step": 17989 + }, + { + "epoch": 0.8427413688106057, + "grad_norm": 0.6144340710077881, + "learning_rate": 3.2465601149381817e-06, + "loss": 0.3354, + "step": 17990 + }, + { + "epoch": 0.842788213800534, + "grad_norm": 0.6452219138314396, + "learning_rate": 3.2463791162199994e-06, + "loss": 0.3283, + "step": 17991 + }, + { + "epoch": 0.8428350587904624, + "grad_norm": 0.5945812926990519, + "learning_rate": 3.246198113206593e-06, + "loss": 0.3272, + "step": 17992 + }, + { + "epoch": 0.8428819037803907, + "grad_norm": 0.57583791305225, + "learning_rate": 3.2460171058990044e-06, + "loss": 0.2974, + "step": 17993 + }, + { + "epoch": 0.842928748770319, + "grad_norm": 0.6243257131098173, + "learning_rate": 3.2458360942982744e-06, + "loss": 0.3612, + "step": 17994 + }, + { + "epoch": 0.8429755937602473, + "grad_norm": 0.5993090600889522, + "learning_rate": 3.2456550784054454e-06, + "loss": 0.3309, + "step": 17995 + }, + { + "epoch": 0.8430224387501757, + "grad_norm": 0.548979325583474, + "learning_rate": 3.245474058221558e-06, + "loss": 0.2879, + "step": 17996 + }, + { + "epoch": 0.843069283740104, + "grad_norm": 0.6529516299724735, + "learning_rate": 3.245293033747656e-06, + "loss": 0.3206, + "step": 17997 + }, + { + "epoch": 0.8431161287300323, + "grad_norm": 0.5923043745537424, + "learning_rate": 3.2451120049847796e-06, + "loss": 0.3219, + "step": 17998 + }, + { + "epoch": 0.8431629737199606, + "grad_norm": 0.6445798740530292, + "learning_rate": 3.2449309719339706e-06, + "loss": 0.3202, + "step": 17999 + }, + { + "epoch": 0.843209818709889, + "grad_norm": 0.5733704149471924, + "learning_rate": 3.2447499345962715e-06, + "loss": 0.3281, + "step": 18000 + }, + { + "epoch": 0.8432566636998173, + "grad_norm": 0.5451688195548295, + "learning_rate": 3.244568892972724e-06, + "loss": 0.2984, + "step": 18001 + }, + { + "epoch": 0.8433035086897457, + "grad_norm": 0.6662396468057522, + "learning_rate": 3.244387847064368e-06, + "loss": 0.3237, + "step": 18002 + }, + { + "epoch": 0.8433503536796739, + "grad_norm": 0.5855169543316082, + "learning_rate": 3.2442067968722477e-06, + "loss": 0.3201, + "step": 18003 + }, + { + "epoch": 0.8433971986696023, + "grad_norm": 0.5773499475096544, + "learning_rate": 3.244025742397406e-06, + "loss": 0.3196, + "step": 18004 + }, + { + "epoch": 0.8434440436595306, + "grad_norm": 0.5989499154932639, + "learning_rate": 3.2438446836408814e-06, + "loss": 0.311, + "step": 18005 + }, + { + "epoch": 0.843490888649459, + "grad_norm": 0.6011212902496824, + "learning_rate": 3.2436636206037174e-06, + "loss": 0.3315, + "step": 18006 + }, + { + "epoch": 0.8435377336393872, + "grad_norm": 0.6446932400176855, + "learning_rate": 3.2434825532869575e-06, + "loss": 0.3243, + "step": 18007 + }, + { + "epoch": 0.8435845786293156, + "grad_norm": 0.5928958060791844, + "learning_rate": 3.243301481691642e-06, + "loss": 0.3206, + "step": 18008 + }, + { + "epoch": 0.8436314236192439, + "grad_norm": 0.5853130245735529, + "learning_rate": 3.2431204058188125e-06, + "loss": 0.3096, + "step": 18009 + }, + { + "epoch": 0.8436782686091723, + "grad_norm": 0.6223146531180189, + "learning_rate": 3.2429393256695128e-06, + "loss": 0.338, + "step": 18010 + }, + { + "epoch": 0.8437251135991006, + "grad_norm": 0.6270086817464511, + "learning_rate": 3.2427582412447838e-06, + "loss": 0.3182, + "step": 18011 + }, + { + "epoch": 0.8437719585890289, + "grad_norm": 0.5871070899665982, + "learning_rate": 3.2425771525456673e-06, + "loss": 0.3088, + "step": 18012 + }, + { + "epoch": 0.8438188035789572, + "grad_norm": 0.6030755194897575, + "learning_rate": 3.242396059573206e-06, + "loss": 0.3203, + "step": 18013 + }, + { + "epoch": 0.8438656485688856, + "grad_norm": 0.6156386323667233, + "learning_rate": 3.242214962328443e-06, + "loss": 0.3101, + "step": 18014 + }, + { + "epoch": 0.8439124935588139, + "grad_norm": 0.6323648985021094, + "learning_rate": 3.242033860812418e-06, + "loss": 0.3277, + "step": 18015 + }, + { + "epoch": 0.8439593385487422, + "grad_norm": 0.6159268735173906, + "learning_rate": 3.241852755026176e-06, + "loss": 0.3441, + "step": 18016 + }, + { + "epoch": 0.8440061835386705, + "grad_norm": 0.6041939028934628, + "learning_rate": 3.2416716449707564e-06, + "loss": 0.3257, + "step": 18017 + }, + { + "epoch": 0.8440530285285989, + "grad_norm": 0.583573605754065, + "learning_rate": 3.241490530647204e-06, + "loss": 0.3229, + "step": 18018 + }, + { + "epoch": 0.8440998735185272, + "grad_norm": 0.5273366765160595, + "learning_rate": 3.24130941205656e-06, + "loss": 0.3096, + "step": 18019 + }, + { + "epoch": 0.8441467185084556, + "grad_norm": 0.6434169817952696, + "learning_rate": 3.2411282891998657e-06, + "loss": 0.355, + "step": 18020 + }, + { + "epoch": 0.8441935634983838, + "grad_norm": 0.5638752562303275, + "learning_rate": 3.240947162078165e-06, + "loss": 0.2938, + "step": 18021 + }, + { + "epoch": 0.8442404084883122, + "grad_norm": 0.5962999066787479, + "learning_rate": 3.2407660306925e-06, + "loss": 0.3395, + "step": 18022 + }, + { + "epoch": 0.8442872534782405, + "grad_norm": 0.5888273771614161, + "learning_rate": 3.2405848950439118e-06, + "loss": 0.3295, + "step": 18023 + }, + { + "epoch": 0.8443340984681689, + "grad_norm": 0.5693211477262872, + "learning_rate": 3.240403755133444e-06, + "loss": 0.3057, + "step": 18024 + }, + { + "epoch": 0.8443809434580971, + "grad_norm": 0.5280167588769825, + "learning_rate": 3.240222610962139e-06, + "loss": 0.2915, + "step": 18025 + }, + { + "epoch": 0.8444277884480255, + "grad_norm": 0.6131963375995245, + "learning_rate": 3.240041462531039e-06, + "loss": 0.3011, + "step": 18026 + }, + { + "epoch": 0.8444746334379538, + "grad_norm": 0.6103651630694701, + "learning_rate": 3.239860309841185e-06, + "loss": 0.3091, + "step": 18027 + }, + { + "epoch": 0.8445214784278822, + "grad_norm": 0.5785178873160645, + "learning_rate": 3.239679152893623e-06, + "loss": 0.3217, + "step": 18028 + }, + { + "epoch": 0.8445683234178105, + "grad_norm": 0.6143680050280323, + "learning_rate": 3.239497991689392e-06, + "loss": 0.3134, + "step": 18029 + }, + { + "epoch": 0.8446151684077388, + "grad_norm": 0.6777804321347815, + "learning_rate": 3.239316826229536e-06, + "loss": 0.3144, + "step": 18030 + }, + { + "epoch": 0.8446620133976671, + "grad_norm": 0.6086101233672485, + "learning_rate": 3.239135656515098e-06, + "loss": 0.3171, + "step": 18031 + }, + { + "epoch": 0.8447088583875955, + "grad_norm": 0.5483347438207086, + "learning_rate": 3.23895448254712e-06, + "loss": 0.3086, + "step": 18032 + }, + { + "epoch": 0.8447557033775238, + "grad_norm": 0.5657304553456951, + "learning_rate": 3.2387733043266447e-06, + "loss": 0.318, + "step": 18033 + }, + { + "epoch": 0.844802548367452, + "grad_norm": 0.5742558582556746, + "learning_rate": 3.2385921218547137e-06, + "loss": 0.3094, + "step": 18034 + }, + { + "epoch": 0.8448493933573804, + "grad_norm": 0.5742418524014529, + "learning_rate": 3.2384109351323724e-06, + "loss": 0.3078, + "step": 18035 + }, + { + "epoch": 0.8448962383473088, + "grad_norm": 0.5969857463481506, + "learning_rate": 3.23822974416066e-06, + "loss": 0.3256, + "step": 18036 + }, + { + "epoch": 0.8449430833372371, + "grad_norm": 0.5542831370878751, + "learning_rate": 3.238048548940622e-06, + "loss": 0.3271, + "step": 18037 + }, + { + "epoch": 0.8449899283271655, + "grad_norm": 0.6435334742459932, + "learning_rate": 3.2378673494733004e-06, + "loss": 0.3338, + "step": 18038 + }, + { + "epoch": 0.8450367733170937, + "grad_norm": 0.6139912144528432, + "learning_rate": 3.237686145759737e-06, + "loss": 0.3161, + "step": 18039 + }, + { + "epoch": 0.845083618307022, + "grad_norm": 0.6558850219009659, + "learning_rate": 3.237504937800975e-06, + "loss": 0.3181, + "step": 18040 + }, + { + "epoch": 0.8451304632969504, + "grad_norm": 0.6031879630003343, + "learning_rate": 3.237323725598058e-06, + "loss": 0.3342, + "step": 18041 + }, + { + "epoch": 0.8451773082868788, + "grad_norm": 0.5897806971404075, + "learning_rate": 3.2371425091520287e-06, + "loss": 0.3138, + "step": 18042 + }, + { + "epoch": 0.845224153276807, + "grad_norm": 0.5957773965812897, + "learning_rate": 3.2369612884639283e-06, + "loss": 0.3091, + "step": 18043 + }, + { + "epoch": 0.8452709982667354, + "grad_norm": 0.6023957212455191, + "learning_rate": 3.2367800635348025e-06, + "loss": 0.3301, + "step": 18044 + }, + { + "epoch": 0.8453178432566637, + "grad_norm": 0.5670294103043296, + "learning_rate": 3.2365988343656907e-06, + "loss": 0.318, + "step": 18045 + }, + { + "epoch": 0.845364688246592, + "grad_norm": 0.5805780681730277, + "learning_rate": 3.2364176009576393e-06, + "loss": 0.316, + "step": 18046 + }, + { + "epoch": 0.8454115332365204, + "grad_norm": 0.5775832113343629, + "learning_rate": 3.2362363633116894e-06, + "loss": 0.2989, + "step": 18047 + }, + { + "epoch": 0.8454583782264486, + "grad_norm": 0.5668971857925663, + "learning_rate": 3.2360551214288837e-06, + "loss": 0.302, + "step": 18048 + }, + { + "epoch": 0.845505223216377, + "grad_norm": 0.6100607627214031, + "learning_rate": 3.2358738753102665e-06, + "loss": 0.3089, + "step": 18049 + }, + { + "epoch": 0.8455520682063054, + "grad_norm": 0.5948257914066108, + "learning_rate": 3.2356926249568797e-06, + "loss": 0.3177, + "step": 18050 + }, + { + "epoch": 0.8455989131962337, + "grad_norm": 0.5991320070758335, + "learning_rate": 3.2355113703697664e-06, + "loss": 0.3239, + "step": 18051 + }, + { + "epoch": 0.8456457581861619, + "grad_norm": 0.5671073198528015, + "learning_rate": 3.2353301115499703e-06, + "loss": 0.3232, + "step": 18052 + }, + { + "epoch": 0.8456926031760903, + "grad_norm": 0.5461976538771351, + "learning_rate": 3.235148848498535e-06, + "loss": 0.3091, + "step": 18053 + }, + { + "epoch": 0.8457394481660186, + "grad_norm": 0.575454732260166, + "learning_rate": 3.2349675812165016e-06, + "loss": 0.3045, + "step": 18054 + }, + { + "epoch": 0.845786293155947, + "grad_norm": 0.5923712315282148, + "learning_rate": 3.234786309704915e-06, + "loss": 0.3341, + "step": 18055 + }, + { + "epoch": 0.8458331381458754, + "grad_norm": 0.5825734546148394, + "learning_rate": 3.2346050339648182e-06, + "loss": 0.3128, + "step": 18056 + }, + { + "epoch": 0.8458799831358036, + "grad_norm": 0.5765901245801452, + "learning_rate": 3.234423753997254e-06, + "loss": 0.3201, + "step": 18057 + }, + { + "epoch": 0.8459268281257319, + "grad_norm": 0.578555607571055, + "learning_rate": 3.2342424698032647e-06, + "loss": 0.3101, + "step": 18058 + }, + { + "epoch": 0.8459736731156603, + "grad_norm": 0.6145369645913444, + "learning_rate": 3.234061181383896e-06, + "loss": 0.3265, + "step": 18059 + }, + { + "epoch": 0.8460205181055886, + "grad_norm": 0.6342392919767837, + "learning_rate": 3.233879888740189e-06, + "loss": 0.3414, + "step": 18060 + }, + { + "epoch": 0.8460673630955169, + "grad_norm": 0.5368935700399764, + "learning_rate": 3.2336985918731878e-06, + "loss": 0.3282, + "step": 18061 + }, + { + "epoch": 0.8461142080854452, + "grad_norm": 0.5481250408421476, + "learning_rate": 3.2335172907839352e-06, + "loss": 0.2953, + "step": 18062 + }, + { + "epoch": 0.8461610530753736, + "grad_norm": 0.580337200033054, + "learning_rate": 3.2333359854734758e-06, + "loss": 0.3083, + "step": 18063 + }, + { + "epoch": 0.8462078980653019, + "grad_norm": 0.6046048720393613, + "learning_rate": 3.2331546759428513e-06, + "loss": 0.3283, + "step": 18064 + }, + { + "epoch": 0.8462547430552303, + "grad_norm": 0.6101439324926062, + "learning_rate": 3.2329733621931065e-06, + "loss": 0.3188, + "step": 18065 + }, + { + "epoch": 0.8463015880451585, + "grad_norm": 0.5709775271202812, + "learning_rate": 3.2327920442252834e-06, + "loss": 0.3001, + "step": 18066 + }, + { + "epoch": 0.8463484330350869, + "grad_norm": 0.6104870677786065, + "learning_rate": 3.2326107220404267e-06, + "loss": 0.3184, + "step": 18067 + }, + { + "epoch": 0.8463952780250152, + "grad_norm": 0.6195800217647854, + "learning_rate": 3.2324293956395804e-06, + "loss": 0.3158, + "step": 18068 + }, + { + "epoch": 0.8464421230149436, + "grad_norm": 0.5447015758943771, + "learning_rate": 3.2322480650237854e-06, + "loss": 0.2948, + "step": 18069 + }, + { + "epoch": 0.8464889680048718, + "grad_norm": 0.5450607127410261, + "learning_rate": 3.2320667301940876e-06, + "loss": 0.3102, + "step": 18070 + }, + { + "epoch": 0.8465358129948002, + "grad_norm": 0.5780846064394691, + "learning_rate": 3.2318853911515304e-06, + "loss": 0.3112, + "step": 18071 + }, + { + "epoch": 0.8465826579847285, + "grad_norm": 0.5832127666744771, + "learning_rate": 3.2317040478971556e-06, + "loss": 0.3281, + "step": 18072 + }, + { + "epoch": 0.8466295029746569, + "grad_norm": 0.6061935445414259, + "learning_rate": 3.231522700432008e-06, + "loss": 0.3251, + "step": 18073 + }, + { + "epoch": 0.8466763479645852, + "grad_norm": 0.6292631170334063, + "learning_rate": 3.231341348757132e-06, + "loss": 0.3235, + "step": 18074 + }, + { + "epoch": 0.8467231929545135, + "grad_norm": 0.5559708273911165, + "learning_rate": 3.23115999287357e-06, + "loss": 0.3147, + "step": 18075 + }, + { + "epoch": 0.8467700379444418, + "grad_norm": 0.5918403327511251, + "learning_rate": 3.2309786327823654e-06, + "loss": 0.3358, + "step": 18076 + }, + { + "epoch": 0.8468168829343702, + "grad_norm": 0.5672748876216371, + "learning_rate": 3.2307972684845633e-06, + "loss": 0.3327, + "step": 18077 + }, + { + "epoch": 0.8468637279242985, + "grad_norm": 0.569339154750306, + "learning_rate": 3.2306158999812066e-06, + "loss": 0.3233, + "step": 18078 + }, + { + "epoch": 0.8469105729142268, + "grad_norm": 0.5944204181909329, + "learning_rate": 3.2304345272733384e-06, + "loss": 0.32, + "step": 18079 + }, + { + "epoch": 0.8469574179041551, + "grad_norm": 0.6239153174871254, + "learning_rate": 3.230253150362003e-06, + "loss": 0.3092, + "step": 18080 + }, + { + "epoch": 0.8470042628940835, + "grad_norm": 0.5879353551996166, + "learning_rate": 3.2300717692482452e-06, + "loss": 0.3113, + "step": 18081 + }, + { + "epoch": 0.8470511078840118, + "grad_norm": 0.6414189950673483, + "learning_rate": 3.229890383933107e-06, + "loss": 0.3273, + "step": 18082 + }, + { + "epoch": 0.8470979528739402, + "grad_norm": 0.5840069189847349, + "learning_rate": 3.2297089944176334e-06, + "loss": 0.3113, + "step": 18083 + }, + { + "epoch": 0.8471447978638684, + "grad_norm": 0.6262092296636668, + "learning_rate": 3.2295276007028683e-06, + "loss": 0.3252, + "step": 18084 + }, + { + "epoch": 0.8471916428537968, + "grad_norm": 0.6478562958968118, + "learning_rate": 3.2293462027898547e-06, + "loss": 0.3143, + "step": 18085 + }, + { + "epoch": 0.8472384878437251, + "grad_norm": 0.6164838625805267, + "learning_rate": 3.2291648006796374e-06, + "loss": 0.3185, + "step": 18086 + }, + { + "epoch": 0.8472853328336535, + "grad_norm": 0.5902491263794745, + "learning_rate": 3.228983394373259e-06, + "loss": 0.3196, + "step": 18087 + }, + { + "epoch": 0.8473321778235817, + "grad_norm": 0.5726052959717349, + "learning_rate": 3.2288019838717655e-06, + "loss": 0.3184, + "step": 18088 + }, + { + "epoch": 0.8473790228135101, + "grad_norm": 0.5817795399867477, + "learning_rate": 3.2286205691761996e-06, + "loss": 0.3134, + "step": 18089 + }, + { + "epoch": 0.8474258678034384, + "grad_norm": 0.6055902784188792, + "learning_rate": 3.228439150287605e-06, + "loss": 0.316, + "step": 18090 + }, + { + "epoch": 0.8474727127933668, + "grad_norm": 0.5641615571884638, + "learning_rate": 3.2282577272070264e-06, + "loss": 0.3121, + "step": 18091 + }, + { + "epoch": 0.8475195577832951, + "grad_norm": 0.6101826423922249, + "learning_rate": 3.2280762999355074e-06, + "loss": 0.3274, + "step": 18092 + }, + { + "epoch": 0.8475664027732234, + "grad_norm": 0.630576001921109, + "learning_rate": 3.227894868474093e-06, + "loss": 0.3069, + "step": 18093 + }, + { + "epoch": 0.8476132477631517, + "grad_norm": 0.5635548396902624, + "learning_rate": 3.227713432823825e-06, + "loss": 0.3117, + "step": 18094 + }, + { + "epoch": 0.8476600927530801, + "grad_norm": 0.6623772317673117, + "learning_rate": 3.227531992985751e-06, + "loss": 0.3095, + "step": 18095 + }, + { + "epoch": 0.8477069377430084, + "grad_norm": 0.5654311614257979, + "learning_rate": 3.2273505489609127e-06, + "loss": 0.3253, + "step": 18096 + }, + { + "epoch": 0.8477537827329367, + "grad_norm": 0.5613060658034392, + "learning_rate": 3.227169100750354e-06, + "loss": 0.3239, + "step": 18097 + }, + { + "epoch": 0.847800627722865, + "grad_norm": 0.5693078635637547, + "learning_rate": 3.2269876483551205e-06, + "loss": 0.3089, + "step": 18098 + }, + { + "epoch": 0.8478474727127934, + "grad_norm": 0.5570392912865096, + "learning_rate": 3.226806191776256e-06, + "loss": 0.3088, + "step": 18099 + }, + { + "epoch": 0.8478943177027217, + "grad_norm": 0.6237999689964646, + "learning_rate": 3.226624731014804e-06, + "loss": 0.3082, + "step": 18100 + }, + { + "epoch": 0.8479411626926501, + "grad_norm": 0.5859673374984183, + "learning_rate": 3.22644326607181e-06, + "loss": 0.3266, + "step": 18101 + }, + { + "epoch": 0.8479880076825783, + "grad_norm": 0.5393024310899307, + "learning_rate": 3.2262617969483167e-06, + "loss": 0.3286, + "step": 18102 + }, + { + "epoch": 0.8480348526725067, + "grad_norm": 0.5833634534373522, + "learning_rate": 3.22608032364537e-06, + "loss": 0.321, + "step": 18103 + }, + { + "epoch": 0.848081697662435, + "grad_norm": 0.553953456705643, + "learning_rate": 3.225898846164013e-06, + "loss": 0.3227, + "step": 18104 + }, + { + "epoch": 0.8481285426523634, + "grad_norm": 0.5989937052148064, + "learning_rate": 3.2257173645052913e-06, + "loss": 0.3202, + "step": 18105 + }, + { + "epoch": 0.8481753876422916, + "grad_norm": 0.6254111844015074, + "learning_rate": 3.225535878670248e-06, + "loss": 0.323, + "step": 18106 + }, + { + "epoch": 0.84822223263222, + "grad_norm": 0.5968540335527617, + "learning_rate": 3.2253543886599282e-06, + "loss": 0.3167, + "step": 18107 + }, + { + "epoch": 0.8482690776221483, + "grad_norm": 0.619319618138773, + "learning_rate": 3.225172894475376e-06, + "loss": 0.3179, + "step": 18108 + }, + { + "epoch": 0.8483159226120767, + "grad_norm": 0.6049150833268241, + "learning_rate": 3.2249913961176365e-06, + "loss": 0.3224, + "step": 18109 + }, + { + "epoch": 0.848362767602005, + "grad_norm": 0.5966442091389778, + "learning_rate": 3.224809893587753e-06, + "loss": 0.313, + "step": 18110 + }, + { + "epoch": 0.8484096125919333, + "grad_norm": 0.6177682568006293, + "learning_rate": 3.2246283868867718e-06, + "loss": 0.3252, + "step": 18111 + }, + { + "epoch": 0.8484564575818616, + "grad_norm": 0.6427233972475839, + "learning_rate": 3.224446876015736e-06, + "loss": 0.326, + "step": 18112 + }, + { + "epoch": 0.84850330257179, + "grad_norm": 0.639234907040215, + "learning_rate": 3.2242653609756904e-06, + "loss": 0.299, + "step": 18113 + }, + { + "epoch": 0.8485501475617183, + "grad_norm": 0.6062092211277605, + "learning_rate": 3.224083841767679e-06, + "loss": 0.3012, + "step": 18114 + }, + { + "epoch": 0.8485969925516466, + "grad_norm": 0.5660993075228742, + "learning_rate": 3.2239023183927475e-06, + "loss": 0.3142, + "step": 18115 + }, + { + "epoch": 0.8486438375415749, + "grad_norm": 0.5625861425979175, + "learning_rate": 3.2237207908519406e-06, + "loss": 0.2984, + "step": 18116 + }, + { + "epoch": 0.8486906825315033, + "grad_norm": 0.5789270485801318, + "learning_rate": 3.223539259146302e-06, + "loss": 0.3053, + "step": 18117 + }, + { + "epoch": 0.8487375275214316, + "grad_norm": 0.6417203343218374, + "learning_rate": 3.223357723276877e-06, + "loss": 0.3328, + "step": 18118 + }, + { + "epoch": 0.84878437251136, + "grad_norm": 0.5421032622115851, + "learning_rate": 3.223176183244711e-06, + "loss": 0.3084, + "step": 18119 + }, + { + "epoch": 0.8488312175012882, + "grad_norm": 0.5665586553506359, + "learning_rate": 3.2229946390508466e-06, + "loss": 0.3039, + "step": 18120 + }, + { + "epoch": 0.8488780624912166, + "grad_norm": 0.5827346814029895, + "learning_rate": 3.2228130906963294e-06, + "loss": 0.3234, + "step": 18121 + }, + { + "epoch": 0.8489249074811449, + "grad_norm": 0.5315456420145854, + "learning_rate": 3.222631538182205e-06, + "loss": 0.2932, + "step": 18122 + }, + { + "epoch": 0.8489717524710733, + "grad_norm": 0.5518837837698032, + "learning_rate": 3.222449981509519e-06, + "loss": 0.2888, + "step": 18123 + }, + { + "epoch": 0.8490185974610015, + "grad_norm": 0.5771789248768127, + "learning_rate": 3.222268420679313e-06, + "loss": 0.3205, + "step": 18124 + }, + { + "epoch": 0.8490654424509299, + "grad_norm": 0.5553092517245807, + "learning_rate": 3.2220868556926344e-06, + "loss": 0.3086, + "step": 18125 + }, + { + "epoch": 0.8491122874408582, + "grad_norm": 0.6049111735696076, + "learning_rate": 3.2219052865505277e-06, + "loss": 0.3499, + "step": 18126 + }, + { + "epoch": 0.8491591324307866, + "grad_norm": 0.6096687080465242, + "learning_rate": 3.221723713254038e-06, + "loss": 0.3132, + "step": 18127 + }, + { + "epoch": 0.8492059774207149, + "grad_norm": 0.577279589565259, + "learning_rate": 3.2215421358042087e-06, + "loss": 0.3311, + "step": 18128 + }, + { + "epoch": 0.8492528224106431, + "grad_norm": 0.5900822612497845, + "learning_rate": 3.221360554202087e-06, + "loss": 0.325, + "step": 18129 + }, + { + "epoch": 0.8492996674005715, + "grad_norm": 0.5525394410302991, + "learning_rate": 3.221178968448716e-06, + "loss": 0.3202, + "step": 18130 + }, + { + "epoch": 0.8493465123904999, + "grad_norm": 0.6232048997052858, + "learning_rate": 3.220997378545141e-06, + "loss": 0.3095, + "step": 18131 + }, + { + "epoch": 0.8493933573804282, + "grad_norm": 0.6045229541013811, + "learning_rate": 3.2208157844924076e-06, + "loss": 0.331, + "step": 18132 + }, + { + "epoch": 0.8494402023703564, + "grad_norm": 0.5540715378271626, + "learning_rate": 3.220634186291561e-06, + "loss": 0.3117, + "step": 18133 + }, + { + "epoch": 0.8494870473602848, + "grad_norm": 0.5768693364750563, + "learning_rate": 3.2204525839436446e-06, + "loss": 0.3183, + "step": 18134 + }, + { + "epoch": 0.8495338923502131, + "grad_norm": 0.5916459055935498, + "learning_rate": 3.2202709774497054e-06, + "loss": 0.3369, + "step": 18135 + }, + { + "epoch": 0.8495807373401415, + "grad_norm": 0.5968571465466261, + "learning_rate": 3.2200893668107876e-06, + "loss": 0.347, + "step": 18136 + }, + { + "epoch": 0.8496275823300699, + "grad_norm": 0.5864290580289827, + "learning_rate": 3.2199077520279365e-06, + "loss": 0.3433, + "step": 18137 + }, + { + "epoch": 0.8496744273199981, + "grad_norm": 0.5802658608345211, + "learning_rate": 3.219726133102197e-06, + "loss": 0.3247, + "step": 18138 + }, + { + "epoch": 0.8497212723099264, + "grad_norm": 0.5832949413987429, + "learning_rate": 3.2195445100346146e-06, + "loss": 0.3233, + "step": 18139 + }, + { + "epoch": 0.8497681172998548, + "grad_norm": 0.5777507377456408, + "learning_rate": 3.2193628828262348e-06, + "loss": 0.2967, + "step": 18140 + }, + { + "epoch": 0.8498149622897831, + "grad_norm": 0.5794575833164222, + "learning_rate": 3.2191812514781025e-06, + "loss": 0.3279, + "step": 18141 + }, + { + "epoch": 0.8498618072797114, + "grad_norm": 0.5906311407792363, + "learning_rate": 3.2189996159912623e-06, + "loss": 0.3194, + "step": 18142 + }, + { + "epoch": 0.8499086522696397, + "grad_norm": 0.5788601550880133, + "learning_rate": 3.2188179763667597e-06, + "loss": 0.3286, + "step": 18143 + }, + { + "epoch": 0.8499554972595681, + "grad_norm": 0.5507805409750273, + "learning_rate": 3.2186363326056417e-06, + "loss": 0.3029, + "step": 18144 + }, + { + "epoch": 0.8500023422494964, + "grad_norm": 0.5723462694464376, + "learning_rate": 3.218454684708951e-06, + "loss": 0.3044, + "step": 18145 + }, + { + "epoch": 0.8500491872394248, + "grad_norm": 0.5753829029559, + "learning_rate": 3.2182730326777345e-06, + "loss": 0.3411, + "step": 18146 + }, + { + "epoch": 0.850096032229353, + "grad_norm": 0.6153001023922745, + "learning_rate": 3.218091376513037e-06, + "loss": 0.321, + "step": 18147 + }, + { + "epoch": 0.8501428772192814, + "grad_norm": 0.632898395928504, + "learning_rate": 3.217909716215905e-06, + "loss": 0.3273, + "step": 18148 + }, + { + "epoch": 0.8501897222092097, + "grad_norm": 1.0490580255469613, + "learning_rate": 3.2177280517873823e-06, + "loss": 0.3288, + "step": 18149 + }, + { + "epoch": 0.8502365671991381, + "grad_norm": 0.6197528608047552, + "learning_rate": 3.2175463832285146e-06, + "loss": 0.3391, + "step": 18150 + }, + { + "epoch": 0.8502834121890663, + "grad_norm": 0.5958282693314687, + "learning_rate": 3.2173647105403494e-06, + "loss": 0.3129, + "step": 18151 + }, + { + "epoch": 0.8503302571789947, + "grad_norm": 0.6299562897577514, + "learning_rate": 3.217183033723929e-06, + "loss": 0.3258, + "step": 18152 + }, + { + "epoch": 0.850377102168923, + "grad_norm": 0.5354186232714975, + "learning_rate": 3.2170013527803013e-06, + "loss": 0.2917, + "step": 18153 + }, + { + "epoch": 0.8504239471588514, + "grad_norm": 0.6621336667195664, + "learning_rate": 3.216819667710511e-06, + "loss": 0.3181, + "step": 18154 + }, + { + "epoch": 0.8504707921487797, + "grad_norm": 0.6076101007814995, + "learning_rate": 3.216637978515603e-06, + "loss": 0.3194, + "step": 18155 + }, + { + "epoch": 0.850517637138708, + "grad_norm": 0.5923770648043878, + "learning_rate": 3.216456285196624e-06, + "loss": 0.3268, + "step": 18156 + }, + { + "epoch": 0.8505644821286363, + "grad_norm": 0.5735733282779999, + "learning_rate": 3.2162745877546194e-06, + "loss": 0.2988, + "step": 18157 + }, + { + "epoch": 0.8506113271185647, + "grad_norm": 0.5606486023359973, + "learning_rate": 3.2160928861906343e-06, + "loss": 0.3161, + "step": 18158 + }, + { + "epoch": 0.850658172108493, + "grad_norm": 0.5387674371202339, + "learning_rate": 3.2159111805057146e-06, + "loss": 0.3009, + "step": 18159 + }, + { + "epoch": 0.8507050170984213, + "grad_norm": 0.5602725094853094, + "learning_rate": 3.215729470700906e-06, + "loss": 0.3129, + "step": 18160 + }, + { + "epoch": 0.8507518620883496, + "grad_norm": 0.6043534824709145, + "learning_rate": 3.2155477567772548e-06, + "loss": 0.3265, + "step": 18161 + }, + { + "epoch": 0.850798707078278, + "grad_norm": 0.6416553750237936, + "learning_rate": 3.2153660387358052e-06, + "loss": 0.3395, + "step": 18162 + }, + { + "epoch": 0.8508455520682063, + "grad_norm": 0.5799960070094848, + "learning_rate": 3.215184316577604e-06, + "loss": 0.3199, + "step": 18163 + }, + { + "epoch": 0.8508923970581347, + "grad_norm": 0.5225805630822147, + "learning_rate": 3.215002590303697e-06, + "loss": 0.2966, + "step": 18164 + }, + { + "epoch": 0.8509392420480629, + "grad_norm": 0.5776483225914802, + "learning_rate": 3.2148208599151302e-06, + "loss": 0.3082, + "step": 18165 + }, + { + "epoch": 0.8509860870379913, + "grad_norm": 0.6353169394423901, + "learning_rate": 3.2146391254129485e-06, + "loss": 0.3176, + "step": 18166 + }, + { + "epoch": 0.8510329320279196, + "grad_norm": 0.6290864870405096, + "learning_rate": 3.214457386798198e-06, + "loss": 0.3259, + "step": 18167 + }, + { + "epoch": 0.851079777017848, + "grad_norm": 0.5686125159170519, + "learning_rate": 3.214275644071926e-06, + "loss": 0.3127, + "step": 18168 + }, + { + "epoch": 0.8511266220077762, + "grad_norm": 0.5951962537945918, + "learning_rate": 3.2140938972351766e-06, + "loss": 0.3261, + "step": 18169 + }, + { + "epoch": 0.8511734669977046, + "grad_norm": 0.5323729599532135, + "learning_rate": 3.2139121462889954e-06, + "loss": 0.2931, + "step": 18170 + }, + { + "epoch": 0.8512203119876329, + "grad_norm": 0.5598322671516034, + "learning_rate": 3.2137303912344304e-06, + "loss": 0.3133, + "step": 18171 + }, + { + "epoch": 0.8512671569775613, + "grad_norm": 0.6105440796808775, + "learning_rate": 3.2135486320725264e-06, + "loss": 0.3053, + "step": 18172 + }, + { + "epoch": 0.8513140019674896, + "grad_norm": 0.6186102331677141, + "learning_rate": 3.2133668688043278e-06, + "loss": 0.32, + "step": 18173 + }, + { + "epoch": 0.8513608469574179, + "grad_norm": 0.6280411600247016, + "learning_rate": 3.2131851014308836e-06, + "loss": 0.3513, + "step": 18174 + }, + { + "epoch": 0.8514076919473462, + "grad_norm": 0.5694737325132879, + "learning_rate": 3.2130033299532383e-06, + "loss": 0.3288, + "step": 18175 + }, + { + "epoch": 0.8514545369372746, + "grad_norm": 0.5752219807699364, + "learning_rate": 3.2128215543724374e-06, + "loss": 0.3067, + "step": 18176 + }, + { + "epoch": 0.8515013819272029, + "grad_norm": 0.5620467763533775, + "learning_rate": 3.2126397746895276e-06, + "loss": 0.3085, + "step": 18177 + }, + { + "epoch": 0.8515482269171312, + "grad_norm": 0.546635428423993, + "learning_rate": 3.2124579909055557e-06, + "loss": 0.3046, + "step": 18178 + }, + { + "epoch": 0.8515950719070595, + "grad_norm": 0.5956344544322115, + "learning_rate": 3.212276203021567e-06, + "loss": 0.3092, + "step": 18179 + }, + { + "epoch": 0.8516419168969879, + "grad_norm": 0.6034528667062633, + "learning_rate": 3.2120944110386076e-06, + "loss": 0.3244, + "step": 18180 + }, + { + "epoch": 0.8516887618869162, + "grad_norm": 0.6247349581054729, + "learning_rate": 3.211912614957724e-06, + "loss": 0.3218, + "step": 18181 + }, + { + "epoch": 0.8517356068768446, + "grad_norm": 0.5606695212178487, + "learning_rate": 3.2117308147799626e-06, + "loss": 0.3144, + "step": 18182 + }, + { + "epoch": 0.8517824518667728, + "grad_norm": 0.6115329917630695, + "learning_rate": 3.2115490105063684e-06, + "loss": 0.2981, + "step": 18183 + }, + { + "epoch": 0.8518292968567012, + "grad_norm": 0.6098925384565907, + "learning_rate": 3.211367202137989e-06, + "loss": 0.3192, + "step": 18184 + }, + { + "epoch": 0.8518761418466295, + "grad_norm": 0.541399339474487, + "learning_rate": 3.2111853896758693e-06, + "loss": 0.3038, + "step": 18185 + }, + { + "epoch": 0.8519229868365579, + "grad_norm": 0.5900455985083582, + "learning_rate": 3.2110035731210575e-06, + "loss": 0.3211, + "step": 18186 + }, + { + "epoch": 0.8519698318264861, + "grad_norm": 0.5733287376084794, + "learning_rate": 3.210821752474599e-06, + "loss": 0.3193, + "step": 18187 + }, + { + "epoch": 0.8520166768164145, + "grad_norm": 0.5694846405905502, + "learning_rate": 3.210639927737539e-06, + "loss": 0.2993, + "step": 18188 + }, + { + "epoch": 0.8520635218063428, + "grad_norm": 0.6044242584956251, + "learning_rate": 3.2104580989109253e-06, + "loss": 0.3298, + "step": 18189 + }, + { + "epoch": 0.8521103667962712, + "grad_norm": 0.5756774237250614, + "learning_rate": 3.2102762659958043e-06, + "loss": 0.3068, + "step": 18190 + }, + { + "epoch": 0.8521572117861995, + "grad_norm": 0.5626636352823663, + "learning_rate": 3.2100944289932208e-06, + "loss": 0.3182, + "step": 18191 + }, + { + "epoch": 0.8522040567761278, + "grad_norm": 0.5853523689582172, + "learning_rate": 3.2099125879042225e-06, + "loss": 0.3111, + "step": 18192 + }, + { + "epoch": 0.8522509017660561, + "grad_norm": 0.6455041117830036, + "learning_rate": 3.209730742729857e-06, + "loss": 0.318, + "step": 18193 + }, + { + "epoch": 0.8522977467559845, + "grad_norm": 0.6207491957257179, + "learning_rate": 3.209548893471168e-06, + "loss": 0.3244, + "step": 18194 + }, + { + "epoch": 0.8523445917459128, + "grad_norm": 0.613576160697065, + "learning_rate": 3.209367040129204e-06, + "loss": 0.3263, + "step": 18195 + }, + { + "epoch": 0.8523914367358411, + "grad_norm": 0.564510895609994, + "learning_rate": 3.2091851827050113e-06, + "loss": 0.2698, + "step": 18196 + }, + { + "epoch": 0.8524382817257694, + "grad_norm": 0.606339513714876, + "learning_rate": 3.2090033211996357e-06, + "loss": 0.3276, + "step": 18197 + }, + { + "epoch": 0.8524851267156978, + "grad_norm": 0.569926305195752, + "learning_rate": 3.208821455614124e-06, + "loss": 0.3082, + "step": 18198 + }, + { + "epoch": 0.8525319717056261, + "grad_norm": 0.642970252761496, + "learning_rate": 3.2086395859495235e-06, + "loss": 0.3152, + "step": 18199 + }, + { + "epoch": 0.8525788166955545, + "grad_norm": 0.5364219564953805, + "learning_rate": 3.2084577122068804e-06, + "loss": 0.3112, + "step": 18200 + }, + { + "epoch": 0.8526256616854827, + "grad_norm": 0.5683743677257296, + "learning_rate": 3.2082758343872407e-06, + "loss": 0.3001, + "step": 18201 + }, + { + "epoch": 0.8526725066754111, + "grad_norm": 0.6191904896403737, + "learning_rate": 3.208093952491652e-06, + "loss": 0.3109, + "step": 18202 + }, + { + "epoch": 0.8527193516653394, + "grad_norm": 0.5605776068110098, + "learning_rate": 3.2079120665211605e-06, + "loss": 0.2974, + "step": 18203 + }, + { + "epoch": 0.8527661966552678, + "grad_norm": 0.6222462362209603, + "learning_rate": 3.2077301764768126e-06, + "loss": 0.3426, + "step": 18204 + }, + { + "epoch": 0.852813041645196, + "grad_norm": 0.6501074415825239, + "learning_rate": 3.207548282359656e-06, + "loss": 0.3227, + "step": 18205 + }, + { + "epoch": 0.8528598866351244, + "grad_norm": 0.6084824784409295, + "learning_rate": 3.207366384170736e-06, + "loss": 0.3118, + "step": 18206 + }, + { + "epoch": 0.8529067316250527, + "grad_norm": 0.6600608376279531, + "learning_rate": 3.2071844819111007e-06, + "loss": 0.3554, + "step": 18207 + }, + { + "epoch": 0.8529535766149811, + "grad_norm": 0.5659833956596237, + "learning_rate": 3.207002575581797e-06, + "loss": 0.3116, + "step": 18208 + }, + { + "epoch": 0.8530004216049094, + "grad_norm": 0.5823652485440347, + "learning_rate": 3.2068206651838708e-06, + "loss": 0.3286, + "step": 18209 + }, + { + "epoch": 0.8530472665948376, + "grad_norm": 0.6253428373794901, + "learning_rate": 3.2066387507183696e-06, + "loss": 0.3155, + "step": 18210 + }, + { + "epoch": 0.853094111584766, + "grad_norm": 0.6923728351753687, + "learning_rate": 3.2064568321863394e-06, + "loss": 0.3454, + "step": 18211 + }, + { + "epoch": 0.8531409565746944, + "grad_norm": 0.5995615864954599, + "learning_rate": 3.206274909588828e-06, + "loss": 0.3164, + "step": 18212 + }, + { + "epoch": 0.8531878015646227, + "grad_norm": 0.7129549042571129, + "learning_rate": 3.206092982926881e-06, + "loss": 0.3444, + "step": 18213 + }, + { + "epoch": 0.853234646554551, + "grad_norm": 0.6212872583497725, + "learning_rate": 3.205911052201548e-06, + "loss": 0.3412, + "step": 18214 + }, + { + "epoch": 0.8532814915444793, + "grad_norm": 0.6152201593851072, + "learning_rate": 3.205729117413874e-06, + "loss": 0.3183, + "step": 18215 + }, + { + "epoch": 0.8533283365344076, + "grad_norm": 0.6383850503955542, + "learning_rate": 3.2055471785649052e-06, + "loss": 0.3346, + "step": 18216 + }, + { + "epoch": 0.853375181524336, + "grad_norm": 0.5828578214370947, + "learning_rate": 3.2053652356556908e-06, + "loss": 0.3219, + "step": 18217 + }, + { + "epoch": 0.8534220265142644, + "grad_norm": 0.6042084401857319, + "learning_rate": 3.2051832886872764e-06, + "loss": 0.3395, + "step": 18218 + }, + { + "epoch": 0.8534688715041926, + "grad_norm": 0.6374897585262582, + "learning_rate": 3.2050013376607093e-06, + "loss": 0.3186, + "step": 18219 + }, + { + "epoch": 0.853515716494121, + "grad_norm": 0.6383632334077909, + "learning_rate": 3.204819382577037e-06, + "loss": 0.3491, + "step": 18220 + }, + { + "epoch": 0.8535625614840493, + "grad_norm": 0.5489885601496612, + "learning_rate": 3.2046374234373063e-06, + "loss": 0.3077, + "step": 18221 + }, + { + "epoch": 0.8536094064739776, + "grad_norm": 0.5567831557606199, + "learning_rate": 3.2044554602425638e-06, + "loss": 0.3035, + "step": 18222 + }, + { + "epoch": 0.8536562514639059, + "grad_norm": 0.6160335678663783, + "learning_rate": 3.204273492993858e-06, + "loss": 0.3396, + "step": 18223 + }, + { + "epoch": 0.8537030964538342, + "grad_norm": 0.5724737181152476, + "learning_rate": 3.2040915216922347e-06, + "loss": 0.3282, + "step": 18224 + }, + { + "epoch": 0.8537499414437626, + "grad_norm": 0.5883159555825522, + "learning_rate": 3.2039095463387417e-06, + "loss": 0.321, + "step": 18225 + }, + { + "epoch": 0.853796786433691, + "grad_norm": 0.616821979107895, + "learning_rate": 3.2037275669344258e-06, + "loss": 0.2986, + "step": 18226 + }, + { + "epoch": 0.8538436314236193, + "grad_norm": 0.5835821995067406, + "learning_rate": 3.2035455834803352e-06, + "loss": 0.2908, + "step": 18227 + }, + { + "epoch": 0.8538904764135475, + "grad_norm": 0.552248074895123, + "learning_rate": 3.2033635959775165e-06, + "loss": 0.3062, + "step": 18228 + }, + { + "epoch": 0.8539373214034759, + "grad_norm": 0.5657964898076966, + "learning_rate": 3.203181604427017e-06, + "loss": 0.3024, + "step": 18229 + }, + { + "epoch": 0.8539841663934042, + "grad_norm": 0.5935720696851503, + "learning_rate": 3.2029996088298843e-06, + "loss": 0.3118, + "step": 18230 + }, + { + "epoch": 0.8540310113833326, + "grad_norm": 0.5440145110755373, + "learning_rate": 3.2028176091871654e-06, + "loss": 0.3012, + "step": 18231 + }, + { + "epoch": 0.8540778563732608, + "grad_norm": 0.566642432799921, + "learning_rate": 3.202635605499908e-06, + "loss": 0.319, + "step": 18232 + }, + { + "epoch": 0.8541247013631892, + "grad_norm": 0.5900847035198901, + "learning_rate": 3.202453597769159e-06, + "loss": 0.3155, + "step": 18233 + }, + { + "epoch": 0.8541715463531175, + "grad_norm": 0.6209006074433886, + "learning_rate": 3.202271585995966e-06, + "loss": 0.3336, + "step": 18234 + }, + { + "epoch": 0.8542183913430459, + "grad_norm": 0.5905337845940121, + "learning_rate": 3.202089570181377e-06, + "loss": 0.3114, + "step": 18235 + }, + { + "epoch": 0.8542652363329742, + "grad_norm": 0.5703027686252052, + "learning_rate": 3.2019075503264383e-06, + "loss": 0.2911, + "step": 18236 + }, + { + "epoch": 0.8543120813229025, + "grad_norm": 0.6611416541390438, + "learning_rate": 3.2017255264321984e-06, + "loss": 0.3389, + "step": 18237 + }, + { + "epoch": 0.8543589263128308, + "grad_norm": 0.5769365854322119, + "learning_rate": 3.2015434984997048e-06, + "loss": 0.307, + "step": 18238 + }, + { + "epoch": 0.8544057713027592, + "grad_norm": 0.6114224772123217, + "learning_rate": 3.2013614665300048e-06, + "loss": 0.32, + "step": 18239 + }, + { + "epoch": 0.8544526162926875, + "grad_norm": 0.5781140568009954, + "learning_rate": 3.201179430524145e-06, + "loss": 0.3059, + "step": 18240 + }, + { + "epoch": 0.8544994612826158, + "grad_norm": 0.6563847286196545, + "learning_rate": 3.2009973904831743e-06, + "loss": 0.3456, + "step": 18241 + }, + { + "epoch": 0.8545463062725441, + "grad_norm": 0.6400235636662776, + "learning_rate": 3.2008153464081406e-06, + "loss": 0.3332, + "step": 18242 + }, + { + "epoch": 0.8545931512624725, + "grad_norm": 0.5700767424101302, + "learning_rate": 3.200633298300089e-06, + "loss": 0.3131, + "step": 18243 + }, + { + "epoch": 0.8546399962524008, + "grad_norm": 0.6349253621946369, + "learning_rate": 3.2004512461600694e-06, + "loss": 0.3289, + "step": 18244 + }, + { + "epoch": 0.8546868412423292, + "grad_norm": 0.6032911252284321, + "learning_rate": 3.2002691899891304e-06, + "loss": 0.3361, + "step": 18245 + }, + { + "epoch": 0.8547336862322574, + "grad_norm": 0.6276870332263302, + "learning_rate": 3.200087129788317e-06, + "loss": 0.3526, + "step": 18246 + }, + { + "epoch": 0.8547805312221858, + "grad_norm": 0.5963436282020935, + "learning_rate": 3.1999050655586776e-06, + "loss": 0.3022, + "step": 18247 + }, + { + "epoch": 0.8548273762121141, + "grad_norm": 0.5957017607263368, + "learning_rate": 3.199722997301261e-06, + "loss": 0.3246, + "step": 18248 + }, + { + "epoch": 0.8548742212020425, + "grad_norm": 0.5829656773430514, + "learning_rate": 3.199540925017115e-06, + "loss": 0.3191, + "step": 18249 + }, + { + "epoch": 0.8549210661919707, + "grad_norm": 0.6158542898881806, + "learning_rate": 3.199358848707286e-06, + "loss": 0.324, + "step": 18250 + }, + { + "epoch": 0.8549679111818991, + "grad_norm": 0.6145030136180388, + "learning_rate": 3.199176768372823e-06, + "loss": 0.3178, + "step": 18251 + }, + { + "epoch": 0.8550147561718274, + "grad_norm": 0.6235801570852403, + "learning_rate": 3.198994684014774e-06, + "loss": 0.331, + "step": 18252 + }, + { + "epoch": 0.8550616011617558, + "grad_norm": 0.5864241934241926, + "learning_rate": 3.1988125956341852e-06, + "loss": 0.3212, + "step": 18253 + }, + { + "epoch": 0.8551084461516841, + "grad_norm": 0.5511420974143143, + "learning_rate": 3.1986305032321065e-06, + "loss": 0.2948, + "step": 18254 + }, + { + "epoch": 0.8551552911416124, + "grad_norm": 0.5333936524275855, + "learning_rate": 3.1984484068095837e-06, + "loss": 0.3207, + "step": 18255 + }, + { + "epoch": 0.8552021361315407, + "grad_norm": 0.6084863496808288, + "learning_rate": 3.198266306367667e-06, + "loss": 0.308, + "step": 18256 + }, + { + "epoch": 0.8552489811214691, + "grad_norm": 0.5622576378328182, + "learning_rate": 3.1980842019074028e-06, + "loss": 0.2972, + "step": 18257 + }, + { + "epoch": 0.8552958261113974, + "grad_norm": 0.6138819476600957, + "learning_rate": 3.197902093429839e-06, + "loss": 0.3263, + "step": 18258 + }, + { + "epoch": 0.8553426711013257, + "grad_norm": 0.5692561948436314, + "learning_rate": 3.1977199809360247e-06, + "loss": 0.3154, + "step": 18259 + }, + { + "epoch": 0.855389516091254, + "grad_norm": 0.6127539463492725, + "learning_rate": 3.197537864427007e-06, + "loss": 0.3108, + "step": 18260 + }, + { + "epoch": 0.8554363610811824, + "grad_norm": 0.5803685917335152, + "learning_rate": 3.1973557439038343e-06, + "loss": 0.3131, + "step": 18261 + }, + { + "epoch": 0.8554832060711107, + "grad_norm": 0.5170297719410473, + "learning_rate": 3.197173619367554e-06, + "loss": 0.3019, + "step": 18262 + }, + { + "epoch": 0.8555300510610391, + "grad_norm": 0.5640936202825783, + "learning_rate": 3.1969914908192163e-06, + "loss": 0.3212, + "step": 18263 + }, + { + "epoch": 0.8555768960509673, + "grad_norm": 0.5656412297914365, + "learning_rate": 3.196809358259866e-06, + "loss": 0.3052, + "step": 18264 + }, + { + "epoch": 0.8556237410408957, + "grad_norm": 0.6200894642579501, + "learning_rate": 3.1966272216905538e-06, + "loss": 0.3326, + "step": 18265 + }, + { + "epoch": 0.855670586030824, + "grad_norm": 0.5836674789722818, + "learning_rate": 3.196445081112327e-06, + "loss": 0.3122, + "step": 18266 + }, + { + "epoch": 0.8557174310207524, + "grad_norm": 0.5876802803607946, + "learning_rate": 3.1962629365262336e-06, + "loss": 0.3248, + "step": 18267 + }, + { + "epoch": 0.8557642760106806, + "grad_norm": 0.6239373911903254, + "learning_rate": 3.1960807879333224e-06, + "loss": 0.3156, + "step": 18268 + }, + { + "epoch": 0.855811121000609, + "grad_norm": 0.609314634913316, + "learning_rate": 3.195898635334641e-06, + "loss": 0.3215, + "step": 18269 + }, + { + "epoch": 0.8558579659905373, + "grad_norm": 0.6154765614436289, + "learning_rate": 3.1957164787312376e-06, + "loss": 0.3339, + "step": 18270 + }, + { + "epoch": 0.8559048109804657, + "grad_norm": 0.579933039904434, + "learning_rate": 3.195534318124161e-06, + "loss": 0.3077, + "step": 18271 + }, + { + "epoch": 0.855951655970394, + "grad_norm": 0.58215100932328, + "learning_rate": 3.1953521535144593e-06, + "loss": 0.3099, + "step": 18272 + }, + { + "epoch": 0.8559985009603223, + "grad_norm": 0.5938552851506931, + "learning_rate": 3.195169984903181e-06, + "loss": 0.3327, + "step": 18273 + }, + { + "epoch": 0.8560453459502506, + "grad_norm": 0.6715175734895943, + "learning_rate": 3.1949878122913736e-06, + "loss": 0.3483, + "step": 18274 + }, + { + "epoch": 0.856092190940179, + "grad_norm": 0.5152089713662958, + "learning_rate": 3.1948056356800862e-06, + "loss": 0.2955, + "step": 18275 + }, + { + "epoch": 0.8561390359301073, + "grad_norm": 0.5822574911345747, + "learning_rate": 3.1946234550703664e-06, + "loss": 0.3131, + "step": 18276 + }, + { + "epoch": 0.8561858809200356, + "grad_norm": 0.5557863525842919, + "learning_rate": 3.194441270463264e-06, + "loss": 0.3103, + "step": 18277 + }, + { + "epoch": 0.8562327259099639, + "grad_norm": 0.5787428283468415, + "learning_rate": 3.1942590818598267e-06, + "loss": 0.29, + "step": 18278 + }, + { + "epoch": 0.8562795708998923, + "grad_norm": 0.5705735549998175, + "learning_rate": 3.1940768892611025e-06, + "loss": 0.3249, + "step": 18279 + }, + { + "epoch": 0.8563264158898206, + "grad_norm": 0.558406689232509, + "learning_rate": 3.1938946926681403e-06, + "loss": 0.311, + "step": 18280 + }, + { + "epoch": 0.856373260879749, + "grad_norm": 0.6087152368833164, + "learning_rate": 3.1937124920819886e-06, + "loss": 0.3275, + "step": 18281 + }, + { + "epoch": 0.8564201058696772, + "grad_norm": 0.6387612702932935, + "learning_rate": 3.1935302875036956e-06, + "loss": 0.3166, + "step": 18282 + }, + { + "epoch": 0.8564669508596056, + "grad_norm": 0.6524204221049987, + "learning_rate": 3.19334807893431e-06, + "loss": 0.3434, + "step": 18283 + }, + { + "epoch": 0.8565137958495339, + "grad_norm": 0.6086340790184057, + "learning_rate": 3.1931658663748818e-06, + "loss": 0.3291, + "step": 18284 + }, + { + "epoch": 0.8565606408394623, + "grad_norm": 0.6181717955299368, + "learning_rate": 3.1929836498264564e-06, + "loss": 0.3343, + "step": 18285 + }, + { + "epoch": 0.8566074858293905, + "grad_norm": 0.574163658027412, + "learning_rate": 3.1928014292900856e-06, + "loss": 0.3074, + "step": 18286 + }, + { + "epoch": 0.8566543308193189, + "grad_norm": 0.5902418872861432, + "learning_rate": 3.192619204766816e-06, + "loss": 0.3204, + "step": 18287 + }, + { + "epoch": 0.8567011758092472, + "grad_norm": 0.5663965887047829, + "learning_rate": 3.1924369762576975e-06, + "loss": 0.2993, + "step": 18288 + }, + { + "epoch": 0.8567480207991756, + "grad_norm": 0.5705870132195241, + "learning_rate": 3.1922547437637774e-06, + "loss": 0.3036, + "step": 18289 + }, + { + "epoch": 0.8567948657891039, + "grad_norm": 0.5728132737600924, + "learning_rate": 3.1920725072861064e-06, + "loss": 0.3008, + "step": 18290 + }, + { + "epoch": 0.8568417107790322, + "grad_norm": 0.5897099322287654, + "learning_rate": 3.1918902668257313e-06, + "loss": 0.3058, + "step": 18291 + }, + { + "epoch": 0.8568885557689605, + "grad_norm": 0.59625305414955, + "learning_rate": 3.1917080223837016e-06, + "loss": 0.3121, + "step": 18292 + }, + { + "epoch": 0.8569354007588889, + "grad_norm": 0.5956060633124154, + "learning_rate": 3.1915257739610665e-06, + "loss": 0.3209, + "step": 18293 + }, + { + "epoch": 0.8569822457488172, + "grad_norm": 0.5016601878504046, + "learning_rate": 3.1913435215588745e-06, + "loss": 0.3, + "step": 18294 + }, + { + "epoch": 0.8570290907387454, + "grad_norm": 0.6231640448211503, + "learning_rate": 3.191161265178174e-06, + "loss": 0.3065, + "step": 18295 + }, + { + "epoch": 0.8570759357286738, + "grad_norm": 0.6087496258200822, + "learning_rate": 3.190979004820014e-06, + "loss": 0.3199, + "step": 18296 + }, + { + "epoch": 0.8571227807186022, + "grad_norm": 0.5532077737926993, + "learning_rate": 3.1907967404854427e-06, + "loss": 0.3025, + "step": 18297 + }, + { + "epoch": 0.8571696257085305, + "grad_norm": 0.617153814361949, + "learning_rate": 3.190614472175511e-06, + "loss": 0.3286, + "step": 18298 + }, + { + "epoch": 0.8572164706984589, + "grad_norm": 0.6527347801410635, + "learning_rate": 3.1904321998912667e-06, + "loss": 0.3136, + "step": 18299 + }, + { + "epoch": 0.8572633156883871, + "grad_norm": 0.5818440131617881, + "learning_rate": 3.190249923633758e-06, + "loss": 0.3016, + "step": 18300 + }, + { + "epoch": 0.8573101606783154, + "grad_norm": 0.5877754046534953, + "learning_rate": 3.1900676434040345e-06, + "loss": 0.3278, + "step": 18301 + }, + { + "epoch": 0.8573570056682438, + "grad_norm": 0.6027601792427703, + "learning_rate": 3.1898853592031453e-06, + "loss": 0.3272, + "step": 18302 + }, + { + "epoch": 0.8574038506581722, + "grad_norm": 0.6444422907804755, + "learning_rate": 3.1897030710321393e-06, + "loss": 0.3305, + "step": 18303 + }, + { + "epoch": 0.8574506956481004, + "grad_norm": 0.5952484332665589, + "learning_rate": 3.189520778892065e-06, + "loss": 0.2993, + "step": 18304 + }, + { + "epoch": 0.8574975406380287, + "grad_norm": 0.6093160553588219, + "learning_rate": 3.1893384827839723e-06, + "loss": 0.3032, + "step": 18305 + }, + { + "epoch": 0.8575443856279571, + "grad_norm": 0.5818179153485573, + "learning_rate": 3.1891561827089106e-06, + "loss": 0.3276, + "step": 18306 + }, + { + "epoch": 0.8575912306178854, + "grad_norm": 0.6036033310238904, + "learning_rate": 3.1889738786679268e-06, + "loss": 0.3679, + "step": 18307 + }, + { + "epoch": 0.8576380756078138, + "grad_norm": 0.569538298934343, + "learning_rate": 3.188791570662073e-06, + "loss": 0.308, + "step": 18308 + }, + { + "epoch": 0.857684920597742, + "grad_norm": 0.6271208418359441, + "learning_rate": 3.1886092586923967e-06, + "loss": 0.3249, + "step": 18309 + }, + { + "epoch": 0.8577317655876704, + "grad_norm": 0.5868951004395283, + "learning_rate": 3.1884269427599456e-06, + "loss": 0.3053, + "step": 18310 + }, + { + "epoch": 0.8577786105775987, + "grad_norm": 0.5675427511991296, + "learning_rate": 3.188244622865772e-06, + "loss": 0.3277, + "step": 18311 + }, + { + "epoch": 0.8578254555675271, + "grad_norm": 0.6228524050937214, + "learning_rate": 3.1880622990109235e-06, + "loss": 0.3366, + "step": 18312 + }, + { + "epoch": 0.8578723005574553, + "grad_norm": 0.6356924500451803, + "learning_rate": 3.187879971196449e-06, + "loss": 0.3243, + "step": 18313 + }, + { + "epoch": 0.8579191455473837, + "grad_norm": 0.5714786413079601, + "learning_rate": 3.187697639423397e-06, + "loss": 0.3203, + "step": 18314 + }, + { + "epoch": 0.857965990537312, + "grad_norm": 0.5840840625981413, + "learning_rate": 3.18751530369282e-06, + "loss": 0.3152, + "step": 18315 + }, + { + "epoch": 0.8580128355272404, + "grad_norm": 0.648247009936575, + "learning_rate": 3.187332964005764e-06, + "loss": 0.3148, + "step": 18316 + }, + { + "epoch": 0.8580596805171687, + "grad_norm": 0.542624559555027, + "learning_rate": 3.1871506203632795e-06, + "loss": 0.3046, + "step": 18317 + }, + { + "epoch": 0.858106525507097, + "grad_norm": 0.5407431606511082, + "learning_rate": 3.186968272766417e-06, + "loss": 0.3059, + "step": 18318 + }, + { + "epoch": 0.8581533704970253, + "grad_norm": 0.593735126165342, + "learning_rate": 3.186785921216224e-06, + "loss": 0.342, + "step": 18319 + }, + { + "epoch": 0.8582002154869537, + "grad_norm": 0.6139361127646433, + "learning_rate": 3.1866035657137504e-06, + "loss": 0.3157, + "step": 18320 + }, + { + "epoch": 0.858247060476882, + "grad_norm": 0.638101272740244, + "learning_rate": 3.1864212062600465e-06, + "loss": 0.3385, + "step": 18321 + }, + { + "epoch": 0.8582939054668103, + "grad_norm": 0.6198698334155018, + "learning_rate": 3.1862388428561607e-06, + "loss": 0.2993, + "step": 18322 + }, + { + "epoch": 0.8583407504567386, + "grad_norm": 0.6158226745838379, + "learning_rate": 3.1860564755031427e-06, + "loss": 0.3175, + "step": 18323 + }, + { + "epoch": 0.858387595446667, + "grad_norm": 0.5889083741048827, + "learning_rate": 3.1858741042020423e-06, + "loss": 0.319, + "step": 18324 + }, + { + "epoch": 0.8584344404365953, + "grad_norm": 0.5802780485190733, + "learning_rate": 3.1856917289539085e-06, + "loss": 0.3133, + "step": 18325 + }, + { + "epoch": 0.8584812854265237, + "grad_norm": 0.5941083933904355, + "learning_rate": 3.1855093497597917e-06, + "loss": 0.3109, + "step": 18326 + }, + { + "epoch": 0.8585281304164519, + "grad_norm": 0.5568655484122883, + "learning_rate": 3.1853269666207405e-06, + "loss": 0.296, + "step": 18327 + }, + { + "epoch": 0.8585749754063803, + "grad_norm": 0.5485186795650613, + "learning_rate": 3.1851445795378043e-06, + "loss": 0.322, + "step": 18328 + }, + { + "epoch": 0.8586218203963086, + "grad_norm": 0.5698428255120268, + "learning_rate": 3.1849621885120344e-06, + "loss": 0.3124, + "step": 18329 + }, + { + "epoch": 0.858668665386237, + "grad_norm": 0.6056915801288719, + "learning_rate": 3.184779793544479e-06, + "loss": 0.3106, + "step": 18330 + }, + { + "epoch": 0.8587155103761652, + "grad_norm": 0.549435999862462, + "learning_rate": 3.1845973946361874e-06, + "loss": 0.3116, + "step": 18331 + }, + { + "epoch": 0.8587623553660936, + "grad_norm": 0.5969134379219946, + "learning_rate": 3.18441499178821e-06, + "loss": 0.298, + "step": 18332 + }, + { + "epoch": 0.8588092003560219, + "grad_norm": 0.5994876569961339, + "learning_rate": 3.184232585001598e-06, + "loss": 0.3251, + "step": 18333 + }, + { + "epoch": 0.8588560453459503, + "grad_norm": 0.5788552066654157, + "learning_rate": 3.184050174277397e-06, + "loss": 0.3046, + "step": 18334 + }, + { + "epoch": 0.8589028903358786, + "grad_norm": 0.6207872961363444, + "learning_rate": 3.18386775961666e-06, + "loss": 0.3106, + "step": 18335 + }, + { + "epoch": 0.8589497353258069, + "grad_norm": 0.5443020236121241, + "learning_rate": 3.183685341020436e-06, + "loss": 0.3061, + "step": 18336 + }, + { + "epoch": 0.8589965803157352, + "grad_norm": 0.5647787263647462, + "learning_rate": 3.1835029184897755e-06, + "loss": 0.321, + "step": 18337 + }, + { + "epoch": 0.8590434253056636, + "grad_norm": 0.5758079641866474, + "learning_rate": 3.1833204920257264e-06, + "loss": 0.3139, + "step": 18338 + }, + { + "epoch": 0.8590902702955919, + "grad_norm": 0.5689582547219888, + "learning_rate": 3.18313806162934e-06, + "loss": 0.3318, + "step": 18339 + }, + { + "epoch": 0.8591371152855202, + "grad_norm": 0.5989199147002319, + "learning_rate": 3.182955627301666e-06, + "loss": 0.319, + "step": 18340 + }, + { + "epoch": 0.8591839602754485, + "grad_norm": 0.6637612521439347, + "learning_rate": 3.1827731890437534e-06, + "loss": 0.3011, + "step": 18341 + }, + { + "epoch": 0.8592308052653769, + "grad_norm": 0.5895617865935788, + "learning_rate": 3.1825907468566535e-06, + "loss": 0.3253, + "step": 18342 + }, + { + "epoch": 0.8592776502553052, + "grad_norm": 0.5976380244220486, + "learning_rate": 3.1824083007414154e-06, + "loss": 0.3279, + "step": 18343 + }, + { + "epoch": 0.8593244952452336, + "grad_norm": 0.5800167192947552, + "learning_rate": 3.1822258506990882e-06, + "loss": 0.3106, + "step": 18344 + }, + { + "epoch": 0.8593713402351618, + "grad_norm": 0.5570738335315165, + "learning_rate": 3.1820433967307235e-06, + "loss": 0.3086, + "step": 18345 + }, + { + "epoch": 0.8594181852250902, + "grad_norm": 0.6153233019226328, + "learning_rate": 3.18186093883737e-06, + "loss": 0.3337, + "step": 18346 + }, + { + "epoch": 0.8594650302150185, + "grad_norm": 0.6785486428913492, + "learning_rate": 3.181678477020078e-06, + "loss": 0.3537, + "step": 18347 + }, + { + "epoch": 0.8595118752049469, + "grad_norm": 0.6305406997504333, + "learning_rate": 3.1814960112798986e-06, + "loss": 0.3155, + "step": 18348 + }, + { + "epoch": 0.8595587201948751, + "grad_norm": 0.5950150205557392, + "learning_rate": 3.18131354161788e-06, + "loss": 0.338, + "step": 18349 + }, + { + "epoch": 0.8596055651848035, + "grad_norm": 0.5967118938357807, + "learning_rate": 3.181131068035074e-06, + "loss": 0.3146, + "step": 18350 + }, + { + "epoch": 0.8596524101747318, + "grad_norm": 0.6211624866280143, + "learning_rate": 3.1809485905325294e-06, + "loss": 0.3258, + "step": 18351 + }, + { + "epoch": 0.8596992551646602, + "grad_norm": 0.5589978308667025, + "learning_rate": 3.180766109111296e-06, + "loss": 0.3112, + "step": 18352 + }, + { + "epoch": 0.8597461001545885, + "grad_norm": 0.5873946779347908, + "learning_rate": 3.180583623772426e-06, + "loss": 0.3113, + "step": 18353 + }, + { + "epoch": 0.8597929451445168, + "grad_norm": 0.6480334223766729, + "learning_rate": 3.180401134516968e-06, + "loss": 0.3149, + "step": 18354 + }, + { + "epoch": 0.8598397901344451, + "grad_norm": 0.6154580626528521, + "learning_rate": 3.1802186413459725e-06, + "loss": 0.3111, + "step": 18355 + }, + { + "epoch": 0.8598866351243735, + "grad_norm": 0.5825369068591669, + "learning_rate": 3.180036144260489e-06, + "loss": 0.3129, + "step": 18356 + }, + { + "epoch": 0.8599334801143018, + "grad_norm": 0.603693511870361, + "learning_rate": 3.1798536432615696e-06, + "loss": 0.3158, + "step": 18357 + }, + { + "epoch": 0.8599803251042301, + "grad_norm": 0.6476389881862485, + "learning_rate": 3.1796711383502633e-06, + "loss": 0.3221, + "step": 18358 + }, + { + "epoch": 0.8600271700941584, + "grad_norm": 0.5676472107050078, + "learning_rate": 3.1794886295276193e-06, + "loss": 0.3199, + "step": 18359 + }, + { + "epoch": 0.8600740150840868, + "grad_norm": 0.5778877176915537, + "learning_rate": 3.1793061167946896e-06, + "loss": 0.3251, + "step": 18360 + }, + { + "epoch": 0.8601208600740151, + "grad_norm": 0.5947726425933464, + "learning_rate": 3.1791236001525246e-06, + "loss": 0.3088, + "step": 18361 + }, + { + "epoch": 0.8601677050639435, + "grad_norm": 0.5915414095562661, + "learning_rate": 3.178941079602173e-06, + "loss": 0.3072, + "step": 18362 + }, + { + "epoch": 0.8602145500538717, + "grad_norm": 0.583151698746867, + "learning_rate": 3.1787585551446864e-06, + "loss": 0.3162, + "step": 18363 + }, + { + "epoch": 0.8602613950438001, + "grad_norm": 0.6205222706081204, + "learning_rate": 3.178576026781115e-06, + "loss": 0.3114, + "step": 18364 + }, + { + "epoch": 0.8603082400337284, + "grad_norm": 0.5609235660189644, + "learning_rate": 3.178393494512509e-06, + "loss": 0.3071, + "step": 18365 + }, + { + "epoch": 0.8603550850236568, + "grad_norm": 0.5738791990153987, + "learning_rate": 3.178210958339919e-06, + "loss": 0.3141, + "step": 18366 + }, + { + "epoch": 0.860401930013585, + "grad_norm": 0.5877053863020042, + "learning_rate": 3.1780284182643957e-06, + "loss": 0.317, + "step": 18367 + }, + { + "epoch": 0.8604487750035134, + "grad_norm": 0.5337796299717827, + "learning_rate": 3.177845874286989e-06, + "loss": 0.3048, + "step": 18368 + }, + { + "epoch": 0.8604956199934417, + "grad_norm": 0.5722105136984987, + "learning_rate": 3.1776633264087495e-06, + "loss": 0.3044, + "step": 18369 + }, + { + "epoch": 0.8605424649833701, + "grad_norm": 0.6276236561613926, + "learning_rate": 3.177480774630728e-06, + "loss": 0.3195, + "step": 18370 + }, + { + "epoch": 0.8605893099732984, + "grad_norm": 0.6140137506497713, + "learning_rate": 3.1772982189539752e-06, + "loss": 0.3182, + "step": 18371 + }, + { + "epoch": 0.8606361549632267, + "grad_norm": 0.6007395493903024, + "learning_rate": 3.177115659379541e-06, + "loss": 0.3145, + "step": 18372 + }, + { + "epoch": 0.860682999953155, + "grad_norm": 0.6505140939163708, + "learning_rate": 3.1769330959084766e-06, + "loss": 0.3082, + "step": 18373 + }, + { + "epoch": 0.8607298449430834, + "grad_norm": 0.6280074489522096, + "learning_rate": 3.176750528541832e-06, + "loss": 0.3292, + "step": 18374 + }, + { + "epoch": 0.8607766899330117, + "grad_norm": 0.5566380117784451, + "learning_rate": 3.1765679572806584e-06, + "loss": 0.2996, + "step": 18375 + }, + { + "epoch": 0.86082353492294, + "grad_norm": 0.5901131508492451, + "learning_rate": 3.176385382126007e-06, + "loss": 0.3193, + "step": 18376 + }, + { + "epoch": 0.8608703799128683, + "grad_norm": 0.5957597303931912, + "learning_rate": 3.1762028030789264e-06, + "loss": 0.3314, + "step": 18377 + }, + { + "epoch": 0.8609172249027967, + "grad_norm": 0.5945859207755393, + "learning_rate": 3.17602022014047e-06, + "loss": 0.314, + "step": 18378 + }, + { + "epoch": 0.860964069892725, + "grad_norm": 0.5706703529476954, + "learning_rate": 3.1758376333116863e-06, + "loss": 0.3035, + "step": 18379 + }, + { + "epoch": 0.8610109148826534, + "grad_norm": 0.586000819153911, + "learning_rate": 3.1756550425936266e-06, + "loss": 0.301, + "step": 18380 + }, + { + "epoch": 0.8610577598725816, + "grad_norm": 0.5454983540973981, + "learning_rate": 3.1754724479873427e-06, + "loss": 0.3106, + "step": 18381 + }, + { + "epoch": 0.86110460486251, + "grad_norm": 0.5726102100434179, + "learning_rate": 3.1752898494938844e-06, + "loss": 0.3176, + "step": 18382 + }, + { + "epoch": 0.8611514498524383, + "grad_norm": 0.5886164184279256, + "learning_rate": 3.1751072471143025e-06, + "loss": 0.3227, + "step": 18383 + }, + { + "epoch": 0.8611982948423667, + "grad_norm": 0.6230265946028666, + "learning_rate": 3.174924640849648e-06, + "loss": 0.3248, + "step": 18384 + }, + { + "epoch": 0.8612451398322949, + "grad_norm": 0.6730408017794698, + "learning_rate": 3.1747420307009728e-06, + "loss": 0.3424, + "step": 18385 + }, + { + "epoch": 0.8612919848222232, + "grad_norm": 0.5511184716828513, + "learning_rate": 3.174559416669326e-06, + "loss": 0.3134, + "step": 18386 + }, + { + "epoch": 0.8613388298121516, + "grad_norm": 0.573386491098741, + "learning_rate": 3.1743767987557587e-06, + "loss": 0.3112, + "step": 18387 + }, + { + "epoch": 0.86138567480208, + "grad_norm": 0.5947266776443302, + "learning_rate": 3.174194176961323e-06, + "loss": 0.3016, + "step": 18388 + }, + { + "epoch": 0.8614325197920083, + "grad_norm": 0.6806570594392473, + "learning_rate": 3.17401155128707e-06, + "loss": 0.3179, + "step": 18389 + }, + { + "epoch": 0.8614793647819365, + "grad_norm": 0.5978670877991745, + "learning_rate": 3.173828921734049e-06, + "loss": 0.3395, + "step": 18390 + }, + { + "epoch": 0.8615262097718649, + "grad_norm": 0.5899913228193706, + "learning_rate": 3.1736462883033125e-06, + "loss": 0.3201, + "step": 18391 + }, + { + "epoch": 0.8615730547617932, + "grad_norm": 0.5638080656172262, + "learning_rate": 3.1734636509959107e-06, + "loss": 0.3079, + "step": 18392 + }, + { + "epoch": 0.8616198997517216, + "grad_norm": 0.5884525906469865, + "learning_rate": 3.1732810098128948e-06, + "loss": 0.32, + "step": 18393 + }, + { + "epoch": 0.8616667447416498, + "grad_norm": 0.6078295594504769, + "learning_rate": 3.173098364755316e-06, + "loss": 0.2845, + "step": 18394 + }, + { + "epoch": 0.8617135897315782, + "grad_norm": 0.6312754838113595, + "learning_rate": 3.1729157158242246e-06, + "loss": 0.2987, + "step": 18395 + }, + { + "epoch": 0.8617604347215065, + "grad_norm": 0.5695552571751713, + "learning_rate": 3.172733063020673e-06, + "loss": 0.3044, + "step": 18396 + }, + { + "epoch": 0.8618072797114349, + "grad_norm": 0.5635735255251094, + "learning_rate": 3.1725504063457125e-06, + "loss": 0.2888, + "step": 18397 + }, + { + "epoch": 0.8618541247013632, + "grad_norm": 0.604085504071906, + "learning_rate": 3.172367745800392e-06, + "loss": 0.3273, + "step": 18398 + }, + { + "epoch": 0.8619009696912915, + "grad_norm": 0.6654366680490568, + "learning_rate": 3.1721850813857645e-06, + "loss": 0.3354, + "step": 18399 + }, + { + "epoch": 0.8619478146812198, + "grad_norm": 0.5501361476370744, + "learning_rate": 3.1720024131028815e-06, + "loss": 0.314, + "step": 18400 + }, + { + "epoch": 0.8619946596711482, + "grad_norm": 0.6246448175631673, + "learning_rate": 3.171819740952793e-06, + "loss": 0.3081, + "step": 18401 + }, + { + "epoch": 0.8620415046610765, + "grad_norm": 0.5942763337099889, + "learning_rate": 3.1716370649365504e-06, + "loss": 0.3221, + "step": 18402 + }, + { + "epoch": 0.8620883496510048, + "grad_norm": 0.5440133735482355, + "learning_rate": 3.1714543850552067e-06, + "loss": 0.3106, + "step": 18403 + }, + { + "epoch": 0.8621351946409331, + "grad_norm": 0.5793371822950804, + "learning_rate": 3.1712717013098105e-06, + "loss": 0.3153, + "step": 18404 + }, + { + "epoch": 0.8621820396308615, + "grad_norm": 0.6547819837918697, + "learning_rate": 3.171089013701414e-06, + "loss": 0.328, + "step": 18405 + }, + { + "epoch": 0.8622288846207898, + "grad_norm": 0.58506883831339, + "learning_rate": 3.17090632223107e-06, + "loss": 0.3051, + "step": 18406 + }, + { + "epoch": 0.8622757296107182, + "grad_norm": 0.5800505023509914, + "learning_rate": 3.170723626899829e-06, + "loss": 0.3104, + "step": 18407 + }, + { + "epoch": 0.8623225746006464, + "grad_norm": 0.5801913556180492, + "learning_rate": 3.1705409277087407e-06, + "loss": 0.2957, + "step": 18408 + }, + { + "epoch": 0.8623694195905748, + "grad_norm": 0.6132003562068653, + "learning_rate": 3.170358224658859e-06, + "loss": 0.3424, + "step": 18409 + }, + { + "epoch": 0.8624162645805031, + "grad_norm": 0.6600319002334645, + "learning_rate": 3.1701755177512337e-06, + "loss": 0.3406, + "step": 18410 + }, + { + "epoch": 0.8624631095704315, + "grad_norm": 0.5778477654409633, + "learning_rate": 3.1699928069869163e-06, + "loss": 0.3265, + "step": 18411 + }, + { + "epoch": 0.8625099545603597, + "grad_norm": 0.5533881234266226, + "learning_rate": 3.1698100923669596e-06, + "loss": 0.2984, + "step": 18412 + }, + { + "epoch": 0.8625567995502881, + "grad_norm": 0.604579397435892, + "learning_rate": 3.1696273738924138e-06, + "loss": 0.3047, + "step": 18413 + }, + { + "epoch": 0.8626036445402164, + "grad_norm": 0.5900139504972897, + "learning_rate": 3.1694446515643306e-06, + "loss": 0.3168, + "step": 18414 + }, + { + "epoch": 0.8626504895301448, + "grad_norm": 0.6048306652241258, + "learning_rate": 3.169261925383762e-06, + "loss": 0.3194, + "step": 18415 + }, + { + "epoch": 0.8626973345200731, + "grad_norm": 0.5767368748583239, + "learning_rate": 3.169079195351759e-06, + "loss": 0.3161, + "step": 18416 + }, + { + "epoch": 0.8627441795100014, + "grad_norm": 0.606679871230477, + "learning_rate": 3.1688964614693736e-06, + "loss": 0.3221, + "step": 18417 + }, + { + "epoch": 0.8627910244999297, + "grad_norm": 0.574366209658049, + "learning_rate": 3.1687137237376574e-06, + "loss": 0.3128, + "step": 18418 + }, + { + "epoch": 0.8628378694898581, + "grad_norm": 0.6358483918178955, + "learning_rate": 3.168530982157661e-06, + "loss": 0.3235, + "step": 18419 + }, + { + "epoch": 0.8628847144797864, + "grad_norm": 0.5907239715846861, + "learning_rate": 3.1683482367304375e-06, + "loss": 0.3185, + "step": 18420 + }, + { + "epoch": 0.8629315594697147, + "grad_norm": 0.6208761701800268, + "learning_rate": 3.1681654874570377e-06, + "loss": 0.3137, + "step": 18421 + }, + { + "epoch": 0.862978404459643, + "grad_norm": 0.5686036305143739, + "learning_rate": 3.167982734338513e-06, + "loss": 0.3198, + "step": 18422 + }, + { + "epoch": 0.8630252494495714, + "grad_norm": 0.6445199447085525, + "learning_rate": 3.167799977375916e-06, + "loss": 0.3324, + "step": 18423 + }, + { + "epoch": 0.8630720944394997, + "grad_norm": 0.6102766255980027, + "learning_rate": 3.167617216570299e-06, + "loss": 0.3417, + "step": 18424 + }, + { + "epoch": 0.8631189394294281, + "grad_norm": 0.6664811236452511, + "learning_rate": 3.167434451922711e-06, + "loss": 0.3407, + "step": 18425 + }, + { + "epoch": 0.8631657844193563, + "grad_norm": 0.6004174157257299, + "learning_rate": 3.167251683434206e-06, + "loss": 0.3255, + "step": 18426 + }, + { + "epoch": 0.8632126294092847, + "grad_norm": 0.6127402679954679, + "learning_rate": 3.1670689111058356e-06, + "loss": 0.298, + "step": 18427 + }, + { + "epoch": 0.863259474399213, + "grad_norm": 0.6186257323217018, + "learning_rate": 3.1668861349386514e-06, + "loss": 0.3322, + "step": 18428 + }, + { + "epoch": 0.8633063193891414, + "grad_norm": 0.5917391377321277, + "learning_rate": 3.1667033549337045e-06, + "loss": 0.3299, + "step": 18429 + }, + { + "epoch": 0.8633531643790696, + "grad_norm": 0.6153202456974975, + "learning_rate": 3.1665205710920478e-06, + "loss": 0.3055, + "step": 18430 + }, + { + "epoch": 0.863400009368998, + "grad_norm": 0.5834964685889129, + "learning_rate": 3.1663377834147328e-06, + "loss": 0.3536, + "step": 18431 + }, + { + "epoch": 0.8634468543589263, + "grad_norm": 0.5995208027951654, + "learning_rate": 3.166154991902811e-06, + "loss": 0.3271, + "step": 18432 + }, + { + "epoch": 0.8634936993488547, + "grad_norm": 0.5879433002879785, + "learning_rate": 3.165972196557335e-06, + "loss": 0.34, + "step": 18433 + }, + { + "epoch": 0.863540544338783, + "grad_norm": 0.6324145024923857, + "learning_rate": 3.165789397379356e-06, + "loss": 0.3515, + "step": 18434 + }, + { + "epoch": 0.8635873893287113, + "grad_norm": 0.5553825262897094, + "learning_rate": 3.1656065943699266e-06, + "loss": 0.3333, + "step": 18435 + }, + { + "epoch": 0.8636342343186396, + "grad_norm": 0.6466501182559339, + "learning_rate": 3.1654237875300984e-06, + "loss": 0.3241, + "step": 18436 + }, + { + "epoch": 0.863681079308568, + "grad_norm": 0.5909120090464959, + "learning_rate": 3.1652409768609236e-06, + "loss": 0.3088, + "step": 18437 + }, + { + "epoch": 0.8637279242984963, + "grad_norm": 0.6006822623178169, + "learning_rate": 3.1650581623634547e-06, + "loss": 0.3034, + "step": 18438 + }, + { + "epoch": 0.8637747692884246, + "grad_norm": 0.6320711206819695, + "learning_rate": 3.164875344038743e-06, + "loss": 0.331, + "step": 18439 + }, + { + "epoch": 0.8638216142783529, + "grad_norm": 0.5619620455796296, + "learning_rate": 3.16469252188784e-06, + "loss": 0.315, + "step": 18440 + }, + { + "epoch": 0.8638684592682813, + "grad_norm": 0.5794831304879293, + "learning_rate": 3.1645096959117993e-06, + "loss": 0.3369, + "step": 18441 + }, + { + "epoch": 0.8639153042582096, + "grad_norm": 0.5742387052001757, + "learning_rate": 3.164326866111672e-06, + "loss": 0.3066, + "step": 18442 + }, + { + "epoch": 0.8639621492481379, + "grad_norm": 0.6139904719293116, + "learning_rate": 3.164144032488511e-06, + "loss": 0.3047, + "step": 18443 + }, + { + "epoch": 0.8640089942380662, + "grad_norm": 0.6275246833133715, + "learning_rate": 3.1639611950433673e-06, + "loss": 0.3051, + "step": 18444 + }, + { + "epoch": 0.8640558392279946, + "grad_norm": 0.5903895885794715, + "learning_rate": 3.163778353777295e-06, + "loss": 0.3214, + "step": 18445 + }, + { + "epoch": 0.8641026842179229, + "grad_norm": 0.5891097835826632, + "learning_rate": 3.1635955086913444e-06, + "loss": 0.3223, + "step": 18446 + }, + { + "epoch": 0.8641495292078513, + "grad_norm": 0.5914933469399415, + "learning_rate": 3.163412659786568e-06, + "loss": 0.3047, + "step": 18447 + }, + { + "epoch": 0.8641963741977795, + "grad_norm": 0.5763966128023194, + "learning_rate": 3.163229807064019e-06, + "loss": 0.3197, + "step": 18448 + }, + { + "epoch": 0.8642432191877079, + "grad_norm": 0.6157952506576436, + "learning_rate": 3.1630469505247495e-06, + "loss": 0.2962, + "step": 18449 + }, + { + "epoch": 0.8642900641776362, + "grad_norm": 0.6095777316692912, + "learning_rate": 3.1628640901698104e-06, + "loss": 0.3299, + "step": 18450 + }, + { + "epoch": 0.8643369091675646, + "grad_norm": 0.5575407806773707, + "learning_rate": 3.162681226000256e-06, + "loss": 0.2979, + "step": 18451 + }, + { + "epoch": 0.8643837541574928, + "grad_norm": 0.5768796887498306, + "learning_rate": 3.1624983580171376e-06, + "loss": 0.3076, + "step": 18452 + }, + { + "epoch": 0.8644305991474212, + "grad_norm": 0.5569477231890604, + "learning_rate": 3.162315486221507e-06, + "loss": 0.3144, + "step": 18453 + }, + { + "epoch": 0.8644774441373495, + "grad_norm": 0.5954786152667495, + "learning_rate": 3.162132610614418e-06, + "loss": 0.3234, + "step": 18454 + }, + { + "epoch": 0.8645242891272779, + "grad_norm": 0.5229815391818573, + "learning_rate": 3.1619497311969223e-06, + "loss": 0.2913, + "step": 18455 + }, + { + "epoch": 0.8645711341172062, + "grad_norm": 0.5693120475238698, + "learning_rate": 3.1617668479700713e-06, + "loss": 0.3223, + "step": 18456 + }, + { + "epoch": 0.8646179791071344, + "grad_norm": 0.5739957894182348, + "learning_rate": 3.161583960934919e-06, + "loss": 0.3268, + "step": 18457 + }, + { + "epoch": 0.8646648240970628, + "grad_norm": 0.5935895707213058, + "learning_rate": 3.1614010700925174e-06, + "loss": 0.3354, + "step": 18458 + }, + { + "epoch": 0.8647116690869912, + "grad_norm": 0.6157761128474447, + "learning_rate": 3.1612181754439193e-06, + "loss": 0.3218, + "step": 18459 + }, + { + "epoch": 0.8647585140769195, + "grad_norm": 0.642810671997819, + "learning_rate": 3.161035276990176e-06, + "loss": 0.3257, + "step": 18460 + }, + { + "epoch": 0.8648053590668477, + "grad_norm": 0.5909004579116909, + "learning_rate": 3.1608523747323412e-06, + "loss": 0.3102, + "step": 18461 + }, + { + "epoch": 0.8648522040567761, + "grad_norm": 0.6305776898891889, + "learning_rate": 3.1606694686714674e-06, + "loss": 0.3148, + "step": 18462 + }, + { + "epoch": 0.8648990490467044, + "grad_norm": 0.5607379573625952, + "learning_rate": 3.160486558808606e-06, + "loss": 0.3143, + "step": 18463 + }, + { + "epoch": 0.8649458940366328, + "grad_norm": 0.5262353795881488, + "learning_rate": 3.160303645144811e-06, + "loss": 0.2988, + "step": 18464 + }, + { + "epoch": 0.8649927390265612, + "grad_norm": 0.5864692513506679, + "learning_rate": 3.1601207276811343e-06, + "loss": 0.3366, + "step": 18465 + }, + { + "epoch": 0.8650395840164894, + "grad_norm": 0.5939164240643808, + "learning_rate": 3.159937806418629e-06, + "loss": 0.3186, + "step": 18466 + }, + { + "epoch": 0.8650864290064177, + "grad_norm": 0.656841123446341, + "learning_rate": 3.1597548813583474e-06, + "loss": 0.3301, + "step": 18467 + }, + { + "epoch": 0.8651332739963461, + "grad_norm": 0.6063976247425322, + "learning_rate": 3.159571952501342e-06, + "loss": 0.3317, + "step": 18468 + }, + { + "epoch": 0.8651801189862744, + "grad_norm": 0.6220991218493286, + "learning_rate": 3.159389019848666e-06, + "loss": 0.3223, + "step": 18469 + }, + { + "epoch": 0.8652269639762027, + "grad_norm": 0.5882132470747141, + "learning_rate": 3.159206083401372e-06, + "loss": 0.2847, + "step": 18470 + }, + { + "epoch": 0.865273808966131, + "grad_norm": 0.5945480486377703, + "learning_rate": 3.1590231431605123e-06, + "loss": 0.3015, + "step": 18471 + }, + { + "epoch": 0.8653206539560594, + "grad_norm": 0.6312018670184016, + "learning_rate": 3.15884019912714e-06, + "loss": 0.3238, + "step": 18472 + }, + { + "epoch": 0.8653674989459877, + "grad_norm": 0.6111562706974509, + "learning_rate": 3.158657251302309e-06, + "loss": 0.3241, + "step": 18473 + }, + { + "epoch": 0.8654143439359161, + "grad_norm": 0.5859729561831362, + "learning_rate": 3.15847429968707e-06, + "loss": 0.3184, + "step": 18474 + }, + { + "epoch": 0.8654611889258443, + "grad_norm": 0.5942688642955068, + "learning_rate": 3.158291344282477e-06, + "loss": 0.3291, + "step": 18475 + }, + { + "epoch": 0.8655080339157727, + "grad_norm": 0.5993612469628868, + "learning_rate": 3.158108385089583e-06, + "loss": 0.318, + "step": 18476 + }, + { + "epoch": 0.865554878905701, + "grad_norm": 0.6025443045006242, + "learning_rate": 3.1579254221094413e-06, + "loss": 0.3231, + "step": 18477 + }, + { + "epoch": 0.8656017238956294, + "grad_norm": 0.6432040053728286, + "learning_rate": 3.1577424553431028e-06, + "loss": 0.3091, + "step": 18478 + }, + { + "epoch": 0.8656485688855576, + "grad_norm": 0.6023264485709093, + "learning_rate": 3.157559484791623e-06, + "loss": 0.3087, + "step": 18479 + }, + { + "epoch": 0.865695413875486, + "grad_norm": 0.6072249452888417, + "learning_rate": 3.1573765104560533e-06, + "loss": 0.3311, + "step": 18480 + }, + { + "epoch": 0.8657422588654143, + "grad_norm": 0.6024925033302426, + "learning_rate": 3.1571935323374468e-06, + "loss": 0.3276, + "step": 18481 + }, + { + "epoch": 0.8657891038553427, + "grad_norm": 0.5686532306053337, + "learning_rate": 3.157010550436857e-06, + "loss": 0.3116, + "step": 18482 + }, + { + "epoch": 0.865835948845271, + "grad_norm": 0.5870377137805504, + "learning_rate": 3.1568275647553366e-06, + "loss": 0.3092, + "step": 18483 + }, + { + "epoch": 0.8658827938351993, + "grad_norm": 0.6497385875879864, + "learning_rate": 3.156644575293938e-06, + "loss": 0.3311, + "step": 18484 + }, + { + "epoch": 0.8659296388251276, + "grad_norm": 0.5583383549211944, + "learning_rate": 3.156461582053716e-06, + "loss": 0.3192, + "step": 18485 + }, + { + "epoch": 0.865976483815056, + "grad_norm": 0.6031476706158312, + "learning_rate": 3.1562785850357214e-06, + "loss": 0.3129, + "step": 18486 + }, + { + "epoch": 0.8660233288049843, + "grad_norm": 0.5674701837701683, + "learning_rate": 3.156095584241009e-06, + "loss": 0.3072, + "step": 18487 + }, + { + "epoch": 0.8660701737949126, + "grad_norm": 0.6513942825053826, + "learning_rate": 3.1559125796706313e-06, + "loss": 0.3291, + "step": 18488 + }, + { + "epoch": 0.8661170187848409, + "grad_norm": 0.5905340374258607, + "learning_rate": 3.1557295713256417e-06, + "loss": 0.3131, + "step": 18489 + }, + { + "epoch": 0.8661638637747693, + "grad_norm": 0.6530020342072429, + "learning_rate": 3.1555465592070933e-06, + "loss": 0.3265, + "step": 18490 + }, + { + "epoch": 0.8662107087646976, + "grad_norm": 0.623133675507227, + "learning_rate": 3.1553635433160397e-06, + "loss": 0.3271, + "step": 18491 + }, + { + "epoch": 0.866257553754626, + "grad_norm": 0.5923523521696206, + "learning_rate": 3.1551805236535326e-06, + "loss": 0.3144, + "step": 18492 + }, + { + "epoch": 0.8663043987445542, + "grad_norm": 0.6010894184762509, + "learning_rate": 3.1549975002206268e-06, + "loss": 0.3286, + "step": 18493 + }, + { + "epoch": 0.8663512437344826, + "grad_norm": 0.638358540317387, + "learning_rate": 3.1548144730183757e-06, + "loss": 0.3445, + "step": 18494 + }, + { + "epoch": 0.8663980887244109, + "grad_norm": 0.5866071128905312, + "learning_rate": 3.154631442047831e-06, + "loss": 0.3046, + "step": 18495 + }, + { + "epoch": 0.8664449337143393, + "grad_norm": 0.6732749263103133, + "learning_rate": 3.154448407310046e-06, + "loss": 0.3502, + "step": 18496 + }, + { + "epoch": 0.8664917787042675, + "grad_norm": 0.6062721273378029, + "learning_rate": 3.1542653688060765e-06, + "loss": 0.2933, + "step": 18497 + }, + { + "epoch": 0.8665386236941959, + "grad_norm": 0.5846326886711922, + "learning_rate": 3.1540823265369736e-06, + "loss": 0.306, + "step": 18498 + }, + { + "epoch": 0.8665854686841242, + "grad_norm": 0.6101718078527392, + "learning_rate": 3.1538992805037914e-06, + "loss": 0.3007, + "step": 18499 + }, + { + "epoch": 0.8666323136740526, + "grad_norm": 0.5827694058917692, + "learning_rate": 3.153716230707583e-06, + "loss": 0.3235, + "step": 18500 + }, + { + "epoch": 0.8666791586639809, + "grad_norm": 0.6236392529812116, + "learning_rate": 3.1535331771494026e-06, + "loss": 0.3409, + "step": 18501 + }, + { + "epoch": 0.8667260036539092, + "grad_norm": 0.5955096350304923, + "learning_rate": 3.1533501198303025e-06, + "loss": 0.3135, + "step": 18502 + }, + { + "epoch": 0.8667728486438375, + "grad_norm": 0.5873696983960458, + "learning_rate": 3.1531670587513367e-06, + "loss": 0.3198, + "step": 18503 + }, + { + "epoch": 0.8668196936337659, + "grad_norm": 0.6214189656544289, + "learning_rate": 3.152983993913559e-06, + "loss": 0.314, + "step": 18504 + }, + { + "epoch": 0.8668665386236942, + "grad_norm": 0.5647316701380731, + "learning_rate": 3.152800925318022e-06, + "loss": 0.3089, + "step": 18505 + }, + { + "epoch": 0.8669133836136225, + "grad_norm": 0.5395659248689798, + "learning_rate": 3.1526178529657803e-06, + "loss": 0.2993, + "step": 18506 + }, + { + "epoch": 0.8669602286035508, + "grad_norm": 0.6006865723515931, + "learning_rate": 3.152434776857886e-06, + "loss": 0.3047, + "step": 18507 + }, + { + "epoch": 0.8670070735934792, + "grad_norm": 0.5399335934788991, + "learning_rate": 3.152251696995394e-06, + "loss": 0.307, + "step": 18508 + }, + { + "epoch": 0.8670539185834075, + "grad_norm": 0.6269398601858863, + "learning_rate": 3.1520686133793575e-06, + "loss": 0.3026, + "step": 18509 + }, + { + "epoch": 0.8671007635733359, + "grad_norm": 0.599523438171033, + "learning_rate": 3.1518855260108307e-06, + "loss": 0.3185, + "step": 18510 + }, + { + "epoch": 0.8671476085632641, + "grad_norm": 0.5650844623731537, + "learning_rate": 3.151702434890866e-06, + "loss": 0.3029, + "step": 18511 + }, + { + "epoch": 0.8671944535531925, + "grad_norm": 0.5590071263728755, + "learning_rate": 3.151519340020517e-06, + "loss": 0.3053, + "step": 18512 + }, + { + "epoch": 0.8672412985431208, + "grad_norm": 0.5650410738866549, + "learning_rate": 3.1513362414008387e-06, + "loss": 0.3342, + "step": 18513 + }, + { + "epoch": 0.8672881435330492, + "grad_norm": 0.5996683284329294, + "learning_rate": 3.1511531390328835e-06, + "loss": 0.319, + "step": 18514 + }, + { + "epoch": 0.8673349885229774, + "grad_norm": 0.6046250227477554, + "learning_rate": 3.150970032917706e-06, + "loss": 0.3307, + "step": 18515 + }, + { + "epoch": 0.8673818335129058, + "grad_norm": 0.6477096437370796, + "learning_rate": 3.1507869230563604e-06, + "loss": 0.3317, + "step": 18516 + }, + { + "epoch": 0.8674286785028341, + "grad_norm": 0.6084713197810953, + "learning_rate": 3.1506038094498983e-06, + "loss": 0.3333, + "step": 18517 + }, + { + "epoch": 0.8674755234927625, + "grad_norm": 0.5923377533736575, + "learning_rate": 3.1504206920993752e-06, + "loss": 0.2999, + "step": 18518 + }, + { + "epoch": 0.8675223684826908, + "grad_norm": 0.5770393349459324, + "learning_rate": 3.150237571005845e-06, + "loss": 0.3143, + "step": 18519 + }, + { + "epoch": 0.8675692134726191, + "grad_norm": 0.5660587899841731, + "learning_rate": 3.1500544461703598e-06, + "loss": 0.3253, + "step": 18520 + }, + { + "epoch": 0.8676160584625474, + "grad_norm": 0.6033971740914987, + "learning_rate": 3.1498713175939756e-06, + "loss": 0.3144, + "step": 18521 + }, + { + "epoch": 0.8676629034524758, + "grad_norm": 0.5872365985156629, + "learning_rate": 3.1496881852777454e-06, + "loss": 0.3318, + "step": 18522 + }, + { + "epoch": 0.8677097484424041, + "grad_norm": 0.5849966281894022, + "learning_rate": 3.1495050492227223e-06, + "loss": 0.3441, + "step": 18523 + }, + { + "epoch": 0.8677565934323324, + "grad_norm": 0.5693740109352973, + "learning_rate": 3.149321909429961e-06, + "loss": 0.3068, + "step": 18524 + }, + { + "epoch": 0.8678034384222607, + "grad_norm": 0.5926333753746974, + "learning_rate": 3.1491387659005167e-06, + "loss": 0.3154, + "step": 18525 + }, + { + "epoch": 0.8678502834121891, + "grad_norm": 0.5610988858430866, + "learning_rate": 3.1489556186354403e-06, + "loss": 0.3077, + "step": 18526 + }, + { + "epoch": 0.8678971284021174, + "grad_norm": 0.6300971340750336, + "learning_rate": 3.148772467635788e-06, + "loss": 0.3433, + "step": 18527 + }, + { + "epoch": 0.8679439733920458, + "grad_norm": 0.6011132077472758, + "learning_rate": 3.148589312902613e-06, + "loss": 0.3284, + "step": 18528 + }, + { + "epoch": 0.867990818381974, + "grad_norm": 0.6010062330619574, + "learning_rate": 3.1484061544369698e-06, + "loss": 0.3114, + "step": 18529 + }, + { + "epoch": 0.8680376633719024, + "grad_norm": 0.5263475518414077, + "learning_rate": 3.148222992239912e-06, + "loss": 0.2913, + "step": 18530 + }, + { + "epoch": 0.8680845083618307, + "grad_norm": 0.5631684595156058, + "learning_rate": 3.1480398263124938e-06, + "loss": 0.3184, + "step": 18531 + }, + { + "epoch": 0.8681313533517591, + "grad_norm": 0.568396131152649, + "learning_rate": 3.14785665665577e-06, + "loss": 0.2986, + "step": 18532 + }, + { + "epoch": 0.8681781983416873, + "grad_norm": 0.5800809925473143, + "learning_rate": 3.1476734832707926e-06, + "loss": 0.3142, + "step": 18533 + }, + { + "epoch": 0.8682250433316157, + "grad_norm": 0.613608994752134, + "learning_rate": 3.147490306158618e-06, + "loss": 0.3205, + "step": 18534 + }, + { + "epoch": 0.868271888321544, + "grad_norm": 0.6350896065987397, + "learning_rate": 3.147307125320299e-06, + "loss": 0.3223, + "step": 18535 + }, + { + "epoch": 0.8683187333114724, + "grad_norm": 0.5340867527855151, + "learning_rate": 3.1471239407568908e-06, + "loss": 0.3144, + "step": 18536 + }, + { + "epoch": 0.8683655783014007, + "grad_norm": 0.603744078986096, + "learning_rate": 3.1469407524694467e-06, + "loss": 0.3132, + "step": 18537 + }, + { + "epoch": 0.868412423291329, + "grad_norm": 0.665289634289852, + "learning_rate": 3.1467575604590206e-06, + "loss": 0.3255, + "step": 18538 + }, + { + "epoch": 0.8684592682812573, + "grad_norm": 0.5889959199953213, + "learning_rate": 3.146574364726668e-06, + "loss": 0.3007, + "step": 18539 + }, + { + "epoch": 0.8685061132711857, + "grad_norm": 0.6053488041125463, + "learning_rate": 3.1463911652734427e-06, + "loss": 0.2983, + "step": 18540 + }, + { + "epoch": 0.868552958261114, + "grad_norm": 0.5942183835409793, + "learning_rate": 3.146207962100398e-06, + "loss": 0.3304, + "step": 18541 + }, + { + "epoch": 0.8685998032510422, + "grad_norm": 0.6362438888393751, + "learning_rate": 3.146024755208589e-06, + "loss": 0.3271, + "step": 18542 + }, + { + "epoch": 0.8686466482409706, + "grad_norm": 0.5930863238088327, + "learning_rate": 3.1458415445990704e-06, + "loss": 0.3351, + "step": 18543 + }, + { + "epoch": 0.868693493230899, + "grad_norm": 0.564777831701196, + "learning_rate": 3.145658330272895e-06, + "loss": 0.2972, + "step": 18544 + }, + { + "epoch": 0.8687403382208273, + "grad_norm": 0.5777628389560238, + "learning_rate": 3.145475112231119e-06, + "loss": 0.3206, + "step": 18545 + }, + { + "epoch": 0.8687871832107557, + "grad_norm": 0.5590665393264963, + "learning_rate": 3.145291890474796e-06, + "loss": 0.2928, + "step": 18546 + }, + { + "epoch": 0.8688340282006839, + "grad_norm": 0.6065674465125579, + "learning_rate": 3.14510866500498e-06, + "loss": 0.3025, + "step": 18547 + }, + { + "epoch": 0.8688808731906122, + "grad_norm": 0.56777621236603, + "learning_rate": 3.1449254358227254e-06, + "loss": 0.3146, + "step": 18548 + }, + { + "epoch": 0.8689277181805406, + "grad_norm": 0.5771549742807998, + "learning_rate": 3.144742202929088e-06, + "loss": 0.3273, + "step": 18549 + }, + { + "epoch": 0.868974563170469, + "grad_norm": 0.548765735614487, + "learning_rate": 3.144558966325121e-06, + "loss": 0.2988, + "step": 18550 + }, + { + "epoch": 0.8690214081603972, + "grad_norm": 0.6003787629562809, + "learning_rate": 3.144375726011879e-06, + "loss": 0.3237, + "step": 18551 + }, + { + "epoch": 0.8690682531503255, + "grad_norm": 0.5844421442467049, + "learning_rate": 3.1441924819904166e-06, + "loss": 0.316, + "step": 18552 + }, + { + "epoch": 0.8691150981402539, + "grad_norm": 0.5932984458541946, + "learning_rate": 3.144009234261789e-06, + "loss": 0.3251, + "step": 18553 + }, + { + "epoch": 0.8691619431301822, + "grad_norm": 0.6232638050544628, + "learning_rate": 3.1438259828270488e-06, + "loss": 0.3113, + "step": 18554 + }, + { + "epoch": 0.8692087881201106, + "grad_norm": 0.5949244526367656, + "learning_rate": 3.143642727687253e-06, + "loss": 0.3125, + "step": 18555 + }, + { + "epoch": 0.8692556331100388, + "grad_norm": 0.6254214237615909, + "learning_rate": 3.143459468843454e-06, + "loss": 0.3184, + "step": 18556 + }, + { + "epoch": 0.8693024780999672, + "grad_norm": 0.5853370192397443, + "learning_rate": 3.1432762062967088e-06, + "loss": 0.3154, + "step": 18557 + }, + { + "epoch": 0.8693493230898955, + "grad_norm": 0.6224670076863464, + "learning_rate": 3.1430929400480702e-06, + "loss": 0.3228, + "step": 18558 + }, + { + "epoch": 0.8693961680798239, + "grad_norm": 0.586043185797866, + "learning_rate": 3.1429096700985927e-06, + "loss": 0.3131, + "step": 18559 + }, + { + "epoch": 0.8694430130697521, + "grad_norm": 0.6233603581414726, + "learning_rate": 3.142726396449333e-06, + "loss": 0.3217, + "step": 18560 + }, + { + "epoch": 0.8694898580596805, + "grad_norm": 0.5964874419882873, + "learning_rate": 3.1425431191013435e-06, + "loss": 0.3203, + "step": 18561 + }, + { + "epoch": 0.8695367030496088, + "grad_norm": 0.5800749315485552, + "learning_rate": 3.1423598380556797e-06, + "loss": 0.3002, + "step": 18562 + }, + { + "epoch": 0.8695835480395372, + "grad_norm": 0.5729333543247485, + "learning_rate": 3.1421765533133964e-06, + "loss": 0.3038, + "step": 18563 + }, + { + "epoch": 0.8696303930294655, + "grad_norm": 0.6433196342339563, + "learning_rate": 3.141993264875549e-06, + "loss": 0.318, + "step": 18564 + }, + { + "epoch": 0.8696772380193938, + "grad_norm": 0.6172346368669576, + "learning_rate": 3.1418099727431917e-06, + "loss": 0.3217, + "step": 18565 + }, + { + "epoch": 0.8697240830093221, + "grad_norm": 0.616102168252734, + "learning_rate": 3.1416266769173785e-06, + "loss": 0.3297, + "step": 18566 + }, + { + "epoch": 0.8697709279992505, + "grad_norm": 0.6169217495717629, + "learning_rate": 3.1414433773991658e-06, + "loss": 0.3284, + "step": 18567 + }, + { + "epoch": 0.8698177729891788, + "grad_norm": 0.5857240528552358, + "learning_rate": 3.141260074189608e-06, + "loss": 0.3259, + "step": 18568 + }, + { + "epoch": 0.8698646179791071, + "grad_norm": 0.5765997715831147, + "learning_rate": 3.141076767289759e-06, + "loss": 0.3186, + "step": 18569 + }, + { + "epoch": 0.8699114629690354, + "grad_norm": 0.5654289114872828, + "learning_rate": 3.1408934567006744e-06, + "loss": 0.3208, + "step": 18570 + }, + { + "epoch": 0.8699583079589638, + "grad_norm": 0.5956444363766186, + "learning_rate": 3.1407101424234098e-06, + "loss": 0.2986, + "step": 18571 + }, + { + "epoch": 0.8700051529488921, + "grad_norm": 0.5926531524760349, + "learning_rate": 3.1405268244590183e-06, + "loss": 0.3214, + "step": 18572 + }, + { + "epoch": 0.8700519979388205, + "grad_norm": 0.5747966859052642, + "learning_rate": 3.1403435028085568e-06, + "loss": 0.3266, + "step": 18573 + }, + { + "epoch": 0.8700988429287487, + "grad_norm": 0.6261856541975058, + "learning_rate": 3.140160177473079e-06, + "loss": 0.3258, + "step": 18574 + }, + { + "epoch": 0.8701456879186771, + "grad_norm": 0.6010837678323752, + "learning_rate": 3.13997684845364e-06, + "loss": 0.3374, + "step": 18575 + }, + { + "epoch": 0.8701925329086054, + "grad_norm": 0.6153732219516136, + "learning_rate": 3.1397935157512952e-06, + "loss": 0.3125, + "step": 18576 + }, + { + "epoch": 0.8702393778985338, + "grad_norm": 0.594031156932767, + "learning_rate": 3.1396101793670996e-06, + "loss": 0.3031, + "step": 18577 + }, + { + "epoch": 0.870286222888462, + "grad_norm": 0.6383371970393769, + "learning_rate": 3.1394268393021083e-06, + "loss": 0.316, + "step": 18578 + }, + { + "epoch": 0.8703330678783904, + "grad_norm": 0.620332103910736, + "learning_rate": 3.139243495557376e-06, + "loss": 0.3149, + "step": 18579 + }, + { + "epoch": 0.8703799128683187, + "grad_norm": 0.5846171111244355, + "learning_rate": 3.1390601481339583e-06, + "loss": 0.3147, + "step": 18580 + }, + { + "epoch": 0.8704267578582471, + "grad_norm": 0.6440155328898303, + "learning_rate": 3.1388767970329103e-06, + "loss": 0.3298, + "step": 18581 + }, + { + "epoch": 0.8704736028481754, + "grad_norm": 0.6138662721871809, + "learning_rate": 3.1386934422552862e-06, + "loss": 0.3339, + "step": 18582 + }, + { + "epoch": 0.8705204478381037, + "grad_norm": 0.5835418120528425, + "learning_rate": 3.1385100838021427e-06, + "loss": 0.3006, + "step": 18583 + }, + { + "epoch": 0.870567292828032, + "grad_norm": 0.5308983347837514, + "learning_rate": 3.1383267216745333e-06, + "loss": 0.3063, + "step": 18584 + }, + { + "epoch": 0.8706141378179604, + "grad_norm": 0.6352640708846293, + "learning_rate": 3.138143355873515e-06, + "loss": 0.3273, + "step": 18585 + }, + { + "epoch": 0.8706609828078887, + "grad_norm": 0.5658557102098316, + "learning_rate": 3.1379599864001415e-06, + "loss": 0.3055, + "step": 18586 + }, + { + "epoch": 0.870707827797817, + "grad_norm": 0.6525084056666062, + "learning_rate": 3.137776613255468e-06, + "loss": 0.3487, + "step": 18587 + }, + { + "epoch": 0.8707546727877453, + "grad_norm": 0.575773011578956, + "learning_rate": 3.1375932364405514e-06, + "loss": 0.3234, + "step": 18588 + }, + { + "epoch": 0.8708015177776737, + "grad_norm": 0.6122212331871377, + "learning_rate": 3.137409855956446e-06, + "loss": 0.3345, + "step": 18589 + }, + { + "epoch": 0.870848362767602, + "grad_norm": 0.6288526479655866, + "learning_rate": 3.1372264718042063e-06, + "loss": 0.3252, + "step": 18590 + }, + { + "epoch": 0.8708952077575304, + "grad_norm": 0.6115617265272787, + "learning_rate": 3.137043083984889e-06, + "loss": 0.3204, + "step": 18591 + }, + { + "epoch": 0.8709420527474586, + "grad_norm": 0.5681843427166151, + "learning_rate": 3.1368596924995486e-06, + "loss": 0.3107, + "step": 18592 + }, + { + "epoch": 0.870988897737387, + "grad_norm": 0.5756597702628268, + "learning_rate": 3.136676297349241e-06, + "loss": 0.3166, + "step": 18593 + }, + { + "epoch": 0.8710357427273153, + "grad_norm": 0.5413605406527542, + "learning_rate": 3.1364928985350206e-06, + "loss": 0.2921, + "step": 18594 + }, + { + "epoch": 0.8710825877172437, + "grad_norm": 0.6332487419240228, + "learning_rate": 3.1363094960579448e-06, + "loss": 0.3209, + "step": 18595 + }, + { + "epoch": 0.8711294327071719, + "grad_norm": 0.5525742317030929, + "learning_rate": 3.136126089919067e-06, + "loss": 0.3317, + "step": 18596 + }, + { + "epoch": 0.8711762776971003, + "grad_norm": 0.613877502644144, + "learning_rate": 3.1359426801194432e-06, + "loss": 0.3134, + "step": 18597 + }, + { + "epoch": 0.8712231226870286, + "grad_norm": 0.6448009308512159, + "learning_rate": 3.13575926666013e-06, + "loss": 0.3421, + "step": 18598 + }, + { + "epoch": 0.871269967676957, + "grad_norm": 0.6225314044245586, + "learning_rate": 3.135575849542181e-06, + "loss": 0.3454, + "step": 18599 + }, + { + "epoch": 0.8713168126668853, + "grad_norm": 0.5817609603766204, + "learning_rate": 3.135392428766653e-06, + "loss": 0.3305, + "step": 18600 + }, + { + "epoch": 0.8713636576568136, + "grad_norm": 0.5747782422239656, + "learning_rate": 3.135209004334602e-06, + "loss": 0.3261, + "step": 18601 + }, + { + "epoch": 0.8714105026467419, + "grad_norm": 0.5993782815074875, + "learning_rate": 3.1350255762470826e-06, + "loss": 0.3127, + "step": 18602 + }, + { + "epoch": 0.8714573476366703, + "grad_norm": 0.5879075136400422, + "learning_rate": 3.1348421445051497e-06, + "loss": 0.348, + "step": 18603 + }, + { + "epoch": 0.8715041926265986, + "grad_norm": 0.7840135333388223, + "learning_rate": 3.134658709109861e-06, + "loss": 0.3291, + "step": 18604 + }, + { + "epoch": 0.8715510376165269, + "grad_norm": 0.6111714074190403, + "learning_rate": 3.1344752700622697e-06, + "loss": 0.3254, + "step": 18605 + }, + { + "epoch": 0.8715978826064552, + "grad_norm": 0.5945372587424921, + "learning_rate": 3.1342918273634338e-06, + "loss": 0.2923, + "step": 18606 + }, + { + "epoch": 0.8716447275963836, + "grad_norm": 0.6073098474862552, + "learning_rate": 3.1341083810144076e-06, + "loss": 0.3181, + "step": 18607 + }, + { + "epoch": 0.8716915725863119, + "grad_norm": 0.5682106637758533, + "learning_rate": 3.1339249310162466e-06, + "loss": 0.3073, + "step": 18608 + }, + { + "epoch": 0.8717384175762403, + "grad_norm": 0.6110803100591181, + "learning_rate": 3.1337414773700075e-06, + "loss": 0.3066, + "step": 18609 + }, + { + "epoch": 0.8717852625661685, + "grad_norm": 0.537929266247186, + "learning_rate": 3.133558020076745e-06, + "loss": 0.3196, + "step": 18610 + }, + { + "epoch": 0.8718321075560969, + "grad_norm": 0.5806440011389615, + "learning_rate": 3.1333745591375155e-06, + "loss": 0.3187, + "step": 18611 + }, + { + "epoch": 0.8718789525460252, + "grad_norm": 0.6121074417860745, + "learning_rate": 3.133191094553375e-06, + "loss": 0.313, + "step": 18612 + }, + { + "epoch": 0.8719257975359536, + "grad_norm": 0.6259202454854884, + "learning_rate": 3.1330076263253782e-06, + "loss": 0.3184, + "step": 18613 + }, + { + "epoch": 0.8719726425258818, + "grad_norm": 0.6295922389668598, + "learning_rate": 3.1328241544545823e-06, + "loss": 0.3351, + "step": 18614 + }, + { + "epoch": 0.8720194875158102, + "grad_norm": 0.6538794567157967, + "learning_rate": 3.1326406789420415e-06, + "loss": 0.3376, + "step": 18615 + }, + { + "epoch": 0.8720663325057385, + "grad_norm": 0.6268893185770271, + "learning_rate": 3.1324571997888133e-06, + "loss": 0.3368, + "step": 18616 + }, + { + "epoch": 0.8721131774956669, + "grad_norm": 0.5995735194601483, + "learning_rate": 3.132273716995953e-06, + "loss": 0.3042, + "step": 18617 + }, + { + "epoch": 0.8721600224855952, + "grad_norm": 0.5771661964021609, + "learning_rate": 3.132090230564516e-06, + "loss": 0.3023, + "step": 18618 + }, + { + "epoch": 0.8722068674755235, + "grad_norm": 0.5624093580167702, + "learning_rate": 3.1319067404955587e-06, + "loss": 0.2974, + "step": 18619 + }, + { + "epoch": 0.8722537124654518, + "grad_norm": 0.5814355754977306, + "learning_rate": 3.1317232467901376e-06, + "loss": 0.3134, + "step": 18620 + }, + { + "epoch": 0.8723005574553802, + "grad_norm": 0.6325928530320487, + "learning_rate": 3.131539749449307e-06, + "loss": 0.3226, + "step": 18621 + }, + { + "epoch": 0.8723474024453085, + "grad_norm": 0.5709236068312534, + "learning_rate": 3.1313562484741245e-06, + "loss": 0.2868, + "step": 18622 + }, + { + "epoch": 0.8723942474352367, + "grad_norm": 0.6558296687996823, + "learning_rate": 3.1311727438656453e-06, + "loss": 0.3582, + "step": 18623 + }, + { + "epoch": 0.8724410924251651, + "grad_norm": 0.6279800846152634, + "learning_rate": 3.130989235624925e-06, + "loss": 0.3267, + "step": 18624 + }, + { + "epoch": 0.8724879374150935, + "grad_norm": 0.5815301862418272, + "learning_rate": 3.130805723753021e-06, + "loss": 0.3089, + "step": 18625 + }, + { + "epoch": 0.8725347824050218, + "grad_norm": 0.5388236050409049, + "learning_rate": 3.1306222082509884e-06, + "loss": 0.2996, + "step": 18626 + }, + { + "epoch": 0.8725816273949502, + "grad_norm": 0.643417469557864, + "learning_rate": 3.1304386891198833e-06, + "loss": 0.32, + "step": 18627 + }, + { + "epoch": 0.8726284723848784, + "grad_norm": 0.6282391766017725, + "learning_rate": 3.130255166360763e-06, + "loss": 0.3324, + "step": 18628 + }, + { + "epoch": 0.8726753173748067, + "grad_norm": 0.534438515824576, + "learning_rate": 3.1300716399746813e-06, + "loss": 0.289, + "step": 18629 + }, + { + "epoch": 0.8727221623647351, + "grad_norm": 0.5588243918234302, + "learning_rate": 3.129888109962696e-06, + "loss": 0.3101, + "step": 18630 + }, + { + "epoch": 0.8727690073546635, + "grad_norm": 0.585774177094519, + "learning_rate": 3.1297045763258637e-06, + "loss": 0.3098, + "step": 18631 + }, + { + "epoch": 0.8728158523445917, + "grad_norm": 0.5438872684788216, + "learning_rate": 3.1295210390652385e-06, + "loss": 0.3138, + "step": 18632 + }, + { + "epoch": 0.87286269733452, + "grad_norm": 0.5864397729596046, + "learning_rate": 3.1293374981818784e-06, + "loss": 0.3269, + "step": 18633 + }, + { + "epoch": 0.8729095423244484, + "grad_norm": 0.5489730602404415, + "learning_rate": 3.12915395367684e-06, + "loss": 0.3163, + "step": 18634 + }, + { + "epoch": 0.8729563873143767, + "grad_norm": 0.5713724296283598, + "learning_rate": 3.1289704055511785e-06, + "loss": 0.3097, + "step": 18635 + }, + { + "epoch": 0.8730032323043051, + "grad_norm": 0.5593593302937854, + "learning_rate": 3.12878685380595e-06, + "loss": 0.311, + "step": 18636 + }, + { + "epoch": 0.8730500772942333, + "grad_norm": 0.6441284461012948, + "learning_rate": 3.128603298442211e-06, + "loss": 0.3366, + "step": 18637 + }, + { + "epoch": 0.8730969222841617, + "grad_norm": 0.6390705662929489, + "learning_rate": 3.128419739461018e-06, + "loss": 0.3217, + "step": 18638 + }, + { + "epoch": 0.87314376727409, + "grad_norm": 0.6130994033090447, + "learning_rate": 3.128236176863428e-06, + "loss": 0.3219, + "step": 18639 + }, + { + "epoch": 0.8731906122640184, + "grad_norm": 0.6142460206812002, + "learning_rate": 3.128052610650496e-06, + "loss": 0.3231, + "step": 18640 + }, + { + "epoch": 0.8732374572539466, + "grad_norm": 0.581378611016878, + "learning_rate": 3.1278690408232805e-06, + "loss": 0.3153, + "step": 18641 + }, + { + "epoch": 0.873284302243875, + "grad_norm": 0.5643446226050326, + "learning_rate": 3.1276854673828344e-06, + "loss": 0.3261, + "step": 18642 + }, + { + "epoch": 0.8733311472338033, + "grad_norm": 0.524938195185724, + "learning_rate": 3.127501890330218e-06, + "loss": 0.2966, + "step": 18643 + }, + { + "epoch": 0.8733779922237317, + "grad_norm": 0.5831349176032928, + "learning_rate": 3.127318309666485e-06, + "loss": 0.3078, + "step": 18644 + }, + { + "epoch": 0.87342483721366, + "grad_norm": 0.5579527955843708, + "learning_rate": 3.1271347253926927e-06, + "loss": 0.313, + "step": 18645 + }, + { + "epoch": 0.8734716822035883, + "grad_norm": 0.5701122178715988, + "learning_rate": 3.1269511375098977e-06, + "loss": 0.301, + "step": 18646 + }, + { + "epoch": 0.8735185271935166, + "grad_norm": 0.5627072233585292, + "learning_rate": 3.1267675460191566e-06, + "loss": 0.3081, + "step": 18647 + }, + { + "epoch": 0.873565372183445, + "grad_norm": 0.6327322939639818, + "learning_rate": 3.1265839509215264e-06, + "loss": 0.3591, + "step": 18648 + }, + { + "epoch": 0.8736122171733733, + "grad_norm": 0.6195748215257342, + "learning_rate": 3.126400352218062e-06, + "loss": 0.346, + "step": 18649 + }, + { + "epoch": 0.8736590621633016, + "grad_norm": 0.5975686626692157, + "learning_rate": 3.1262167499098217e-06, + "loss": 0.3144, + "step": 18650 + }, + { + "epoch": 0.8737059071532299, + "grad_norm": 0.587155275441969, + "learning_rate": 3.126033143997862e-06, + "loss": 0.3251, + "step": 18651 + }, + { + "epoch": 0.8737527521431583, + "grad_norm": 0.5547362981965882, + "learning_rate": 3.1258495344832375e-06, + "loss": 0.3168, + "step": 18652 + }, + { + "epoch": 0.8737995971330866, + "grad_norm": 0.5774671851736427, + "learning_rate": 3.125665921367007e-06, + "loss": 0.3136, + "step": 18653 + }, + { + "epoch": 0.873846442123015, + "grad_norm": 0.651692746505016, + "learning_rate": 3.125482304650226e-06, + "loss": 0.3133, + "step": 18654 + }, + { + "epoch": 0.8738932871129432, + "grad_norm": 0.6009959384834831, + "learning_rate": 3.1252986843339523e-06, + "loss": 0.3127, + "step": 18655 + }, + { + "epoch": 0.8739401321028716, + "grad_norm": 0.5645727666575291, + "learning_rate": 3.1251150604192414e-06, + "loss": 0.3134, + "step": 18656 + }, + { + "epoch": 0.8739869770927999, + "grad_norm": 0.6161667880680697, + "learning_rate": 3.12493143290715e-06, + "loss": 0.3213, + "step": 18657 + }, + { + "epoch": 0.8740338220827283, + "grad_norm": 0.6028321626084905, + "learning_rate": 3.124747801798736e-06, + "loss": 0.323, + "step": 18658 + }, + { + "epoch": 0.8740806670726565, + "grad_norm": 0.5963455016965925, + "learning_rate": 3.1245641670950556e-06, + "loss": 0.303, + "step": 18659 + }, + { + "epoch": 0.8741275120625849, + "grad_norm": 0.6019584571559751, + "learning_rate": 3.124380528797164e-06, + "loss": 0.3205, + "step": 18660 + }, + { + "epoch": 0.8741743570525132, + "grad_norm": 0.6142349430722519, + "learning_rate": 3.1241968869061207e-06, + "loss": 0.3112, + "step": 18661 + }, + { + "epoch": 0.8742212020424416, + "grad_norm": 0.5417113994467209, + "learning_rate": 3.1240132414229813e-06, + "loss": 0.2802, + "step": 18662 + }, + { + "epoch": 0.8742680470323699, + "grad_norm": 0.6285256623381859, + "learning_rate": 3.1238295923488016e-06, + "loss": 0.3229, + "step": 18663 + }, + { + "epoch": 0.8743148920222982, + "grad_norm": 0.6029210989135573, + "learning_rate": 3.1236459396846393e-06, + "loss": 0.3208, + "step": 18664 + }, + { + "epoch": 0.8743617370122265, + "grad_norm": 0.6557212954748216, + "learning_rate": 3.1234622834315526e-06, + "loss": 0.3372, + "step": 18665 + }, + { + "epoch": 0.8744085820021549, + "grad_norm": 0.5721461451116174, + "learning_rate": 3.123278623590596e-06, + "loss": 0.3015, + "step": 18666 + }, + { + "epoch": 0.8744554269920832, + "grad_norm": 0.6087754843788642, + "learning_rate": 3.123094960162828e-06, + "loss": 0.3269, + "step": 18667 + }, + { + "epoch": 0.8745022719820115, + "grad_norm": 0.5806320257101912, + "learning_rate": 3.1229112931493056e-06, + "loss": 0.3131, + "step": 18668 + }, + { + "epoch": 0.8745491169719398, + "grad_norm": 0.6366208647687355, + "learning_rate": 3.1227276225510845e-06, + "loss": 0.3314, + "step": 18669 + }, + { + "epoch": 0.8745959619618682, + "grad_norm": 0.5413464008706891, + "learning_rate": 3.122543948369223e-06, + "loss": 0.3217, + "step": 18670 + }, + { + "epoch": 0.8746428069517965, + "grad_norm": 0.5584117141654205, + "learning_rate": 3.1223602706047773e-06, + "loss": 0.3157, + "step": 18671 + }, + { + "epoch": 0.8746896519417249, + "grad_norm": 0.5611119904454699, + "learning_rate": 3.1221765892588046e-06, + "loss": 0.2901, + "step": 18672 + }, + { + "epoch": 0.8747364969316531, + "grad_norm": 0.5740630625626846, + "learning_rate": 3.1219929043323618e-06, + "loss": 0.316, + "step": 18673 + }, + { + "epoch": 0.8747833419215815, + "grad_norm": 0.6340149594112247, + "learning_rate": 3.1218092158265068e-06, + "loss": 0.3348, + "step": 18674 + }, + { + "epoch": 0.8748301869115098, + "grad_norm": 0.5820997329091551, + "learning_rate": 3.1216255237422955e-06, + "loss": 0.3048, + "step": 18675 + }, + { + "epoch": 0.8748770319014382, + "grad_norm": 0.5921793294826904, + "learning_rate": 3.1214418280807858e-06, + "loss": 0.3145, + "step": 18676 + }, + { + "epoch": 0.8749238768913664, + "grad_norm": 0.5735146716094479, + "learning_rate": 3.1212581288430343e-06, + "loss": 0.3173, + "step": 18677 + }, + { + "epoch": 0.8749707218812948, + "grad_norm": 0.5923586263561611, + "learning_rate": 3.1210744260300983e-06, + "loss": 0.3235, + "step": 18678 + }, + { + "epoch": 0.8750175668712231, + "grad_norm": 0.5683937139921817, + "learning_rate": 3.120890719643036e-06, + "loss": 0.3202, + "step": 18679 + }, + { + "epoch": 0.8750644118611515, + "grad_norm": 0.6120256782463913, + "learning_rate": 3.1207070096829033e-06, + "loss": 0.3189, + "step": 18680 + }, + { + "epoch": 0.8751112568510798, + "grad_norm": 0.5710158994248136, + "learning_rate": 3.120523296150757e-06, + "loss": 0.3001, + "step": 18681 + }, + { + "epoch": 0.8751581018410081, + "grad_norm": 0.5906629247168139, + "learning_rate": 3.120339579047656e-06, + "loss": 0.3199, + "step": 18682 + }, + { + "epoch": 0.8752049468309364, + "grad_norm": 0.6080617331049374, + "learning_rate": 3.1201558583746566e-06, + "loss": 0.3183, + "step": 18683 + }, + { + "epoch": 0.8752517918208648, + "grad_norm": 0.5674882091852123, + "learning_rate": 3.1199721341328153e-06, + "loss": 0.3176, + "step": 18684 + }, + { + "epoch": 0.8752986368107931, + "grad_norm": 0.563861147498252, + "learning_rate": 3.1197884063231903e-06, + "loss": 0.316, + "step": 18685 + }, + { + "epoch": 0.8753454818007214, + "grad_norm": 0.6092000342456988, + "learning_rate": 3.1196046749468397e-06, + "loss": 0.3267, + "step": 18686 + }, + { + "epoch": 0.8753923267906497, + "grad_norm": 0.5985188662602341, + "learning_rate": 3.119420940004819e-06, + "loss": 0.3135, + "step": 18687 + }, + { + "epoch": 0.8754391717805781, + "grad_norm": 0.5315946732918396, + "learning_rate": 3.119237201498187e-06, + "loss": 0.3027, + "step": 18688 + }, + { + "epoch": 0.8754860167705064, + "grad_norm": 0.5990517455543634, + "learning_rate": 3.1190534594280004e-06, + "loss": 0.3344, + "step": 18689 + }, + { + "epoch": 0.8755328617604348, + "grad_norm": 0.5896372875876377, + "learning_rate": 3.1188697137953174e-06, + "loss": 0.3334, + "step": 18690 + }, + { + "epoch": 0.875579706750363, + "grad_norm": 0.538052979865287, + "learning_rate": 3.1186859646011937e-06, + "loss": 0.3132, + "step": 18691 + }, + { + "epoch": 0.8756265517402914, + "grad_norm": 0.6118952968609163, + "learning_rate": 3.1185022118466877e-06, + "loss": 0.3223, + "step": 18692 + }, + { + "epoch": 0.8756733967302197, + "grad_norm": 0.6113694053542121, + "learning_rate": 3.1183184555328583e-06, + "loss": 0.3334, + "step": 18693 + }, + { + "epoch": 0.8757202417201481, + "grad_norm": 0.6309443417435262, + "learning_rate": 3.11813469566076e-06, + "loss": 0.3311, + "step": 18694 + }, + { + "epoch": 0.8757670867100763, + "grad_norm": 0.6047038976551777, + "learning_rate": 3.1179509322314525e-06, + "loss": 0.3349, + "step": 18695 + }, + { + "epoch": 0.8758139317000047, + "grad_norm": 0.5348347133014066, + "learning_rate": 3.117767165245993e-06, + "loss": 0.2875, + "step": 18696 + }, + { + "epoch": 0.875860776689933, + "grad_norm": 0.5912565344354479, + "learning_rate": 3.1175833947054384e-06, + "loss": 0.3168, + "step": 18697 + }, + { + "epoch": 0.8759076216798614, + "grad_norm": 0.6614441976327562, + "learning_rate": 3.117399620610847e-06, + "loss": 0.3307, + "step": 18698 + }, + { + "epoch": 0.8759544666697897, + "grad_norm": 0.5409186617189079, + "learning_rate": 3.1172158429632756e-06, + "loss": 0.3058, + "step": 18699 + }, + { + "epoch": 0.876001311659718, + "grad_norm": 0.6273144517343228, + "learning_rate": 3.1170320617637824e-06, + "loss": 0.3155, + "step": 18700 + }, + { + "epoch": 0.8760481566496463, + "grad_norm": 0.5623987856780727, + "learning_rate": 3.1168482770134247e-06, + "loss": 0.3167, + "step": 18701 + }, + { + "epoch": 0.8760950016395747, + "grad_norm": 0.5491909192581057, + "learning_rate": 3.11666448871326e-06, + "loss": 0.317, + "step": 18702 + }, + { + "epoch": 0.876141846629503, + "grad_norm": 0.642121526721498, + "learning_rate": 3.116480696864346e-06, + "loss": 0.3451, + "step": 18703 + }, + { + "epoch": 0.8761886916194312, + "grad_norm": 0.587228327868739, + "learning_rate": 3.116296901467741e-06, + "loss": 0.3191, + "step": 18704 + }, + { + "epoch": 0.8762355366093596, + "grad_norm": 0.5568309200065577, + "learning_rate": 3.116113102524502e-06, + "loss": 0.2912, + "step": 18705 + }, + { + "epoch": 0.876282381599288, + "grad_norm": 0.6042395134993732, + "learning_rate": 3.1159293000356864e-06, + "loss": 0.308, + "step": 18706 + }, + { + "epoch": 0.8763292265892163, + "grad_norm": 0.601585021070634, + "learning_rate": 3.1157454940023536e-06, + "loss": 0.2946, + "step": 18707 + }, + { + "epoch": 0.8763760715791447, + "grad_norm": 0.6075793818296603, + "learning_rate": 3.1155616844255597e-06, + "loss": 0.3262, + "step": 18708 + }, + { + "epoch": 0.8764229165690729, + "grad_norm": 0.6011092209316032, + "learning_rate": 3.1153778713063627e-06, + "loss": 0.321, + "step": 18709 + }, + { + "epoch": 0.8764697615590012, + "grad_norm": 0.6086021681922245, + "learning_rate": 3.1151940546458216e-06, + "loss": 0.3194, + "step": 18710 + }, + { + "epoch": 0.8765166065489296, + "grad_norm": 0.5614870222687198, + "learning_rate": 3.115010234444993e-06, + "loss": 0.3229, + "step": 18711 + }, + { + "epoch": 0.876563451538858, + "grad_norm": 0.5457101368283778, + "learning_rate": 3.1148264107049344e-06, + "loss": 0.3164, + "step": 18712 + }, + { + "epoch": 0.8766102965287862, + "grad_norm": 0.5787961985700576, + "learning_rate": 3.114642583426704e-06, + "loss": 0.3217, + "step": 18713 + }, + { + "epoch": 0.8766571415187145, + "grad_norm": 0.5493069944991642, + "learning_rate": 3.1144587526113616e-06, + "loss": 0.2872, + "step": 18714 + }, + { + "epoch": 0.8767039865086429, + "grad_norm": 0.5119966229843826, + "learning_rate": 3.114274918259963e-06, + "loss": 0.287, + "step": 18715 + }, + { + "epoch": 0.8767508314985712, + "grad_norm": 0.5508099256620247, + "learning_rate": 3.114091080373566e-06, + "loss": 0.2971, + "step": 18716 + }, + { + "epoch": 0.8767976764884996, + "grad_norm": 0.5400349108744306, + "learning_rate": 3.1139072389532294e-06, + "loss": 0.3207, + "step": 18717 + }, + { + "epoch": 0.8768445214784278, + "grad_norm": 0.5902965669962125, + "learning_rate": 3.1137233940000113e-06, + "loss": 0.326, + "step": 18718 + }, + { + "epoch": 0.8768913664683562, + "grad_norm": 0.5669603791824295, + "learning_rate": 3.1135395455149686e-06, + "loss": 0.2968, + "step": 18719 + }, + { + "epoch": 0.8769382114582845, + "grad_norm": 0.6071072420587246, + "learning_rate": 3.113355693499161e-06, + "loss": 0.3214, + "step": 18720 + }, + { + "epoch": 0.8769850564482129, + "grad_norm": 0.6089751103922364, + "learning_rate": 3.113171837953645e-06, + "loss": 0.3211, + "step": 18721 + }, + { + "epoch": 0.8770319014381411, + "grad_norm": 0.6436706337234067, + "learning_rate": 3.1129879788794793e-06, + "loss": 0.3342, + "step": 18722 + }, + { + "epoch": 0.8770787464280695, + "grad_norm": 0.6050569207063177, + "learning_rate": 3.1128041162777224e-06, + "loss": 0.3238, + "step": 18723 + }, + { + "epoch": 0.8771255914179978, + "grad_norm": 0.5986101767444105, + "learning_rate": 3.1126202501494306e-06, + "loss": 0.3251, + "step": 18724 + }, + { + "epoch": 0.8771724364079262, + "grad_norm": 0.6017828675680321, + "learning_rate": 3.112436380495664e-06, + "loss": 0.321, + "step": 18725 + }, + { + "epoch": 0.8772192813978545, + "grad_norm": 0.5444391637697443, + "learning_rate": 3.1122525073174803e-06, + "loss": 0.3167, + "step": 18726 + }, + { + "epoch": 0.8772661263877828, + "grad_norm": 0.5952855355038961, + "learning_rate": 3.1120686306159363e-06, + "loss": 0.3128, + "step": 18727 + }, + { + "epoch": 0.8773129713777111, + "grad_norm": 0.5752051679285594, + "learning_rate": 3.1118847503920917e-06, + "loss": 0.3166, + "step": 18728 + }, + { + "epoch": 0.8773598163676395, + "grad_norm": 0.5928901284537201, + "learning_rate": 3.1117008666470046e-06, + "loss": 0.3321, + "step": 18729 + }, + { + "epoch": 0.8774066613575678, + "grad_norm": 0.5739556857034193, + "learning_rate": 3.111516979381732e-06, + "loss": 0.3188, + "step": 18730 + }, + { + "epoch": 0.8774535063474961, + "grad_norm": 0.5930056938131806, + "learning_rate": 3.1113330885973335e-06, + "loss": 0.3106, + "step": 18731 + }, + { + "epoch": 0.8775003513374244, + "grad_norm": 0.6177776332652145, + "learning_rate": 3.1111491942948667e-06, + "loss": 0.3341, + "step": 18732 + }, + { + "epoch": 0.8775471963273528, + "grad_norm": 0.6520967923170893, + "learning_rate": 3.110965296475389e-06, + "loss": 0.3085, + "step": 18733 + }, + { + "epoch": 0.8775940413172811, + "grad_norm": 0.5814106160735382, + "learning_rate": 3.1107813951399602e-06, + "loss": 0.3089, + "step": 18734 + }, + { + "epoch": 0.8776408863072095, + "grad_norm": 0.6335638603579282, + "learning_rate": 3.1105974902896386e-06, + "loss": 0.324, + "step": 18735 + }, + { + "epoch": 0.8776877312971377, + "grad_norm": 0.5846268319362877, + "learning_rate": 3.110413581925481e-06, + "loss": 0.3045, + "step": 18736 + }, + { + "epoch": 0.8777345762870661, + "grad_norm": 0.6030959022256175, + "learning_rate": 3.110229670048547e-06, + "loss": 0.3081, + "step": 18737 + }, + { + "epoch": 0.8777814212769944, + "grad_norm": 0.6199170206065405, + "learning_rate": 3.1100457546598946e-06, + "loss": 0.3455, + "step": 18738 + }, + { + "epoch": 0.8778282662669228, + "grad_norm": 0.5816724707503294, + "learning_rate": 3.1098618357605825e-06, + "loss": 0.3197, + "step": 18739 + }, + { + "epoch": 0.877875111256851, + "grad_norm": 0.6141531161402654, + "learning_rate": 3.109677913351668e-06, + "loss": 0.3056, + "step": 18740 + }, + { + "epoch": 0.8779219562467794, + "grad_norm": 0.6079790859312102, + "learning_rate": 3.109493987434211e-06, + "loss": 0.3224, + "step": 18741 + }, + { + "epoch": 0.8779688012367077, + "grad_norm": 0.6067004598239131, + "learning_rate": 3.1093100580092694e-06, + "loss": 0.3261, + "step": 18742 + }, + { + "epoch": 0.8780156462266361, + "grad_norm": 0.5499476924569104, + "learning_rate": 3.109126125077901e-06, + "loss": 0.3092, + "step": 18743 + }, + { + "epoch": 0.8780624912165644, + "grad_norm": 0.585154735137867, + "learning_rate": 3.108942188641165e-06, + "loss": 0.312, + "step": 18744 + }, + { + "epoch": 0.8781093362064927, + "grad_norm": 0.6076555077137026, + "learning_rate": 3.10875824870012e-06, + "loss": 0.3238, + "step": 18745 + }, + { + "epoch": 0.878156181196421, + "grad_norm": 0.5738230818478025, + "learning_rate": 3.1085743052558243e-06, + "loss": 0.2993, + "step": 18746 + }, + { + "epoch": 0.8782030261863494, + "grad_norm": 0.5690972643471484, + "learning_rate": 3.1083903583093366e-06, + "loss": 0.3145, + "step": 18747 + }, + { + "epoch": 0.8782498711762777, + "grad_norm": 0.601784292333255, + "learning_rate": 3.1082064078617148e-06, + "loss": 0.2987, + "step": 18748 + }, + { + "epoch": 0.878296716166206, + "grad_norm": 0.5622491398879812, + "learning_rate": 3.1080224539140186e-06, + "loss": 0.2975, + "step": 18749 + }, + { + "epoch": 0.8783435611561343, + "grad_norm": 0.5859473608766357, + "learning_rate": 3.107838496467306e-06, + "loss": 0.309, + "step": 18750 + }, + { + "epoch": 0.8783904061460627, + "grad_norm": 0.550648366904725, + "learning_rate": 3.1076545355226344e-06, + "loss": 0.3085, + "step": 18751 + }, + { + "epoch": 0.878437251135991, + "grad_norm": 0.5797266090372224, + "learning_rate": 3.107470571081065e-06, + "loss": 0.3309, + "step": 18752 + }, + { + "epoch": 0.8784840961259194, + "grad_norm": 0.5775246276710482, + "learning_rate": 3.1072866031436548e-06, + "loss": 0.3074, + "step": 18753 + }, + { + "epoch": 0.8785309411158476, + "grad_norm": 0.6102457169270187, + "learning_rate": 3.1071026317114626e-06, + "loss": 0.3165, + "step": 18754 + }, + { + "epoch": 0.878577786105776, + "grad_norm": 0.5631952432941437, + "learning_rate": 3.106918656785547e-06, + "loss": 0.2995, + "step": 18755 + }, + { + "epoch": 0.8786246310957043, + "grad_norm": 0.5590166593986499, + "learning_rate": 3.1067346783669678e-06, + "loss": 0.3001, + "step": 18756 + }, + { + "epoch": 0.8786714760856327, + "grad_norm": 0.6090882134057654, + "learning_rate": 3.1065506964567828e-06, + "loss": 0.3046, + "step": 18757 + }, + { + "epoch": 0.8787183210755609, + "grad_norm": 0.5856852177050508, + "learning_rate": 3.1063667110560508e-06, + "loss": 0.3095, + "step": 18758 + }, + { + "epoch": 0.8787651660654893, + "grad_norm": 0.6455468385209266, + "learning_rate": 3.1061827221658306e-06, + "loss": 0.3199, + "step": 18759 + }, + { + "epoch": 0.8788120110554176, + "grad_norm": 0.5647997861156269, + "learning_rate": 3.1059987297871824e-06, + "loss": 0.3333, + "step": 18760 + }, + { + "epoch": 0.878858856045346, + "grad_norm": 0.582858176755859, + "learning_rate": 3.1058147339211627e-06, + "loss": 0.3113, + "step": 18761 + }, + { + "epoch": 0.8789057010352743, + "grad_norm": 0.6777371731999368, + "learning_rate": 3.105630734568832e-06, + "loss": 0.3325, + "step": 18762 + }, + { + "epoch": 0.8789525460252026, + "grad_norm": 0.5916711108698128, + "learning_rate": 3.105446731731248e-06, + "loss": 0.3244, + "step": 18763 + }, + { + "epoch": 0.8789993910151309, + "grad_norm": 0.5795829442247566, + "learning_rate": 3.10526272540947e-06, + "loss": 0.3264, + "step": 18764 + }, + { + "epoch": 0.8790462360050593, + "grad_norm": 0.596302182734665, + "learning_rate": 3.1050787156045584e-06, + "loss": 0.3132, + "step": 18765 + }, + { + "epoch": 0.8790930809949876, + "grad_norm": 0.5532774949798331, + "learning_rate": 3.10489470231757e-06, + "loss": 0.3133, + "step": 18766 + }, + { + "epoch": 0.8791399259849159, + "grad_norm": 0.5645576119647061, + "learning_rate": 3.104710685549565e-06, + "loss": 0.2988, + "step": 18767 + }, + { + "epoch": 0.8791867709748442, + "grad_norm": 0.5702776361630105, + "learning_rate": 3.104526665301602e-06, + "loss": 0.3303, + "step": 18768 + }, + { + "epoch": 0.8792336159647726, + "grad_norm": 0.6218962899637441, + "learning_rate": 3.1043426415747395e-06, + "loss": 0.3146, + "step": 18769 + }, + { + "epoch": 0.8792804609547009, + "grad_norm": 0.5872236788314632, + "learning_rate": 3.1041586143700376e-06, + "loss": 0.3361, + "step": 18770 + }, + { + "epoch": 0.8793273059446293, + "grad_norm": 0.6152534992021688, + "learning_rate": 3.1039745836885544e-06, + "loss": 0.3269, + "step": 18771 + }, + { + "epoch": 0.8793741509345575, + "grad_norm": 0.5695488577736826, + "learning_rate": 3.103790549531349e-06, + "loss": 0.3164, + "step": 18772 + }, + { + "epoch": 0.8794209959244859, + "grad_norm": 0.5701635270752785, + "learning_rate": 3.1036065118994806e-06, + "loss": 0.3208, + "step": 18773 + }, + { + "epoch": 0.8794678409144142, + "grad_norm": 0.5808897010175167, + "learning_rate": 3.1034224707940097e-06, + "loss": 0.3199, + "step": 18774 + }, + { + "epoch": 0.8795146859043426, + "grad_norm": 0.6098323921294034, + "learning_rate": 3.103238426215993e-06, + "loss": 0.3022, + "step": 18775 + }, + { + "epoch": 0.8795615308942708, + "grad_norm": 0.5170630400595915, + "learning_rate": 3.1030543781664906e-06, + "loss": 0.2991, + "step": 18776 + }, + { + "epoch": 0.8796083758841992, + "grad_norm": 0.5539257160772358, + "learning_rate": 3.102870326646563e-06, + "loss": 0.3186, + "step": 18777 + }, + { + "epoch": 0.8796552208741275, + "grad_norm": 0.5612442394538598, + "learning_rate": 3.1026862716572677e-06, + "loss": 0.3046, + "step": 18778 + }, + { + "epoch": 0.8797020658640559, + "grad_norm": 0.6109845329316675, + "learning_rate": 3.1025022131996637e-06, + "loss": 0.3195, + "step": 18779 + }, + { + "epoch": 0.8797489108539842, + "grad_norm": 0.6089756930487856, + "learning_rate": 3.102318151274812e-06, + "loss": 0.328, + "step": 18780 + }, + { + "epoch": 0.8797957558439125, + "grad_norm": 0.5654798371342621, + "learning_rate": 3.1021340858837702e-06, + "loss": 0.3115, + "step": 18781 + }, + { + "epoch": 0.8798426008338408, + "grad_norm": 0.5978427269365935, + "learning_rate": 3.101950017027597e-06, + "loss": 0.3315, + "step": 18782 + }, + { + "epoch": 0.8798894458237692, + "grad_norm": 0.6072920180209392, + "learning_rate": 3.101765944707354e-06, + "loss": 0.34, + "step": 18783 + }, + { + "epoch": 0.8799362908136975, + "grad_norm": 0.5550521278878247, + "learning_rate": 3.1015818689240994e-06, + "loss": 0.3088, + "step": 18784 + }, + { + "epoch": 0.8799831358036257, + "grad_norm": 0.5363203866283055, + "learning_rate": 3.1013977896788914e-06, + "loss": 0.2998, + "step": 18785 + }, + { + "epoch": 0.8800299807935541, + "grad_norm": 0.5573293475756528, + "learning_rate": 3.101213706972791e-06, + "loss": 0.307, + "step": 18786 + }, + { + "epoch": 0.8800768257834825, + "grad_norm": 0.6506181107675543, + "learning_rate": 3.101029620806857e-06, + "loss": 0.3364, + "step": 18787 + }, + { + "epoch": 0.8801236707734108, + "grad_norm": 0.5805963804994365, + "learning_rate": 3.100845531182148e-06, + "loss": 0.3083, + "step": 18788 + }, + { + "epoch": 0.8801705157633392, + "grad_norm": 0.6204916937028635, + "learning_rate": 3.100661438099724e-06, + "loss": 0.3055, + "step": 18789 + }, + { + "epoch": 0.8802173607532674, + "grad_norm": 0.6033941048711015, + "learning_rate": 3.100477341560645e-06, + "loss": 0.316, + "step": 18790 + }, + { + "epoch": 0.8802642057431957, + "grad_norm": 0.6138823109082426, + "learning_rate": 3.1002932415659693e-06, + "loss": 0.3082, + "step": 18791 + }, + { + "epoch": 0.8803110507331241, + "grad_norm": 0.6261704718074702, + "learning_rate": 3.100109138116757e-06, + "loss": 0.3101, + "step": 18792 + }, + { + "epoch": 0.8803578957230525, + "grad_norm": 0.5953570096217856, + "learning_rate": 3.0999250312140677e-06, + "loss": 0.3372, + "step": 18793 + }, + { + "epoch": 0.8804047407129807, + "grad_norm": 0.5969868043356297, + "learning_rate": 3.09974092085896e-06, + "loss": 0.319, + "step": 18794 + }, + { + "epoch": 0.880451585702909, + "grad_norm": 0.5668169298919508, + "learning_rate": 3.0995568070524945e-06, + "loss": 0.3235, + "step": 18795 + }, + { + "epoch": 0.8804984306928374, + "grad_norm": 0.5562849414907266, + "learning_rate": 3.0993726897957305e-06, + "loss": 0.3104, + "step": 18796 + }, + { + "epoch": 0.8805452756827657, + "grad_norm": 0.604811286536307, + "learning_rate": 3.0991885690897265e-06, + "loss": 0.3106, + "step": 18797 + }, + { + "epoch": 0.8805921206726941, + "grad_norm": 0.5885948712605509, + "learning_rate": 3.099004444935544e-06, + "loss": 0.325, + "step": 18798 + }, + { + "epoch": 0.8806389656626223, + "grad_norm": 0.7807980663356942, + "learning_rate": 3.0988203173342407e-06, + "loss": 0.3313, + "step": 18799 + }, + { + "epoch": 0.8806858106525507, + "grad_norm": 0.6231615191151483, + "learning_rate": 3.098636186286877e-06, + "loss": 0.3046, + "step": 18800 + }, + { + "epoch": 0.880732655642479, + "grad_norm": 0.5866370081439197, + "learning_rate": 3.0984520517945125e-06, + "loss": 0.2968, + "step": 18801 + }, + { + "epoch": 0.8807795006324074, + "grad_norm": 0.635527882745114, + "learning_rate": 3.0982679138582074e-06, + "loss": 0.295, + "step": 18802 + }, + { + "epoch": 0.8808263456223356, + "grad_norm": 0.6132496201828868, + "learning_rate": 3.0980837724790205e-06, + "loss": 0.3228, + "step": 18803 + }, + { + "epoch": 0.880873190612264, + "grad_norm": 0.5963557535231794, + "learning_rate": 3.097899627658011e-06, + "loss": 0.3086, + "step": 18804 + }, + { + "epoch": 0.8809200356021923, + "grad_norm": 0.6316886580885944, + "learning_rate": 3.097715479396241e-06, + "loss": 0.341, + "step": 18805 + }, + { + "epoch": 0.8809668805921207, + "grad_norm": 0.6189935981719801, + "learning_rate": 3.0975313276947676e-06, + "loss": 0.3321, + "step": 18806 + }, + { + "epoch": 0.881013725582049, + "grad_norm": 0.5900653598997104, + "learning_rate": 3.097347172554651e-06, + "loss": 0.3024, + "step": 18807 + }, + { + "epoch": 0.8810605705719773, + "grad_norm": 0.6436990371547884, + "learning_rate": 3.097163013976953e-06, + "loss": 0.3126, + "step": 18808 + }, + { + "epoch": 0.8811074155619056, + "grad_norm": 0.5794917142661343, + "learning_rate": 3.0969788519627315e-06, + "loss": 0.3102, + "step": 18809 + }, + { + "epoch": 0.881154260551834, + "grad_norm": 0.5854100064010249, + "learning_rate": 3.096794686513046e-06, + "loss": 0.3122, + "step": 18810 + }, + { + "epoch": 0.8812011055417623, + "grad_norm": 0.6158422777327867, + "learning_rate": 3.096610517628958e-06, + "loss": 0.308, + "step": 18811 + }, + { + "epoch": 0.8812479505316906, + "grad_norm": 0.6053805930595437, + "learning_rate": 3.0964263453115263e-06, + "loss": 0.3102, + "step": 18812 + }, + { + "epoch": 0.8812947955216189, + "grad_norm": 0.5665820120444429, + "learning_rate": 3.0962421695618108e-06, + "loss": 0.3224, + "step": 18813 + }, + { + "epoch": 0.8813416405115473, + "grad_norm": 0.5673749337201413, + "learning_rate": 3.0960579903808715e-06, + "loss": 0.3044, + "step": 18814 + }, + { + "epoch": 0.8813884855014756, + "grad_norm": 0.6964297878188311, + "learning_rate": 3.0958738077697677e-06, + "loss": 0.3091, + "step": 18815 + }, + { + "epoch": 0.881435330491404, + "grad_norm": 0.6461712350736406, + "learning_rate": 3.0956896217295603e-06, + "loss": 0.3378, + "step": 18816 + }, + { + "epoch": 0.8814821754813322, + "grad_norm": 0.609096431174646, + "learning_rate": 3.095505432261309e-06, + "loss": 0.3231, + "step": 18817 + }, + { + "epoch": 0.8815290204712606, + "grad_norm": 0.6085294073591101, + "learning_rate": 3.095321239366073e-06, + "loss": 0.3282, + "step": 18818 + }, + { + "epoch": 0.8815758654611889, + "grad_norm": 0.5717042296922016, + "learning_rate": 3.0951370430449135e-06, + "loss": 0.3247, + "step": 18819 + }, + { + "epoch": 0.8816227104511173, + "grad_norm": 0.5614277807479244, + "learning_rate": 3.09495284329889e-06, + "loss": 0.2942, + "step": 18820 + }, + { + "epoch": 0.8816695554410455, + "grad_norm": 0.6164055679798648, + "learning_rate": 3.094768640129062e-06, + "loss": 0.3088, + "step": 18821 + }, + { + "epoch": 0.8817164004309739, + "grad_norm": 0.6205826381741623, + "learning_rate": 3.0945844335364905e-06, + "loss": 0.3187, + "step": 18822 + }, + { + "epoch": 0.8817632454209022, + "grad_norm": 0.5852130314957824, + "learning_rate": 3.094400223522235e-06, + "loss": 0.3113, + "step": 18823 + }, + { + "epoch": 0.8818100904108306, + "grad_norm": 0.5740727307074912, + "learning_rate": 3.0942160100873547e-06, + "loss": 0.3056, + "step": 18824 + }, + { + "epoch": 0.8818569354007589, + "grad_norm": 0.5733814131535593, + "learning_rate": 3.094031793232911e-06, + "loss": 0.3262, + "step": 18825 + }, + { + "epoch": 0.8819037803906872, + "grad_norm": 0.5762394599066333, + "learning_rate": 3.093847572959964e-06, + "loss": 0.298, + "step": 18826 + }, + { + "epoch": 0.8819506253806155, + "grad_norm": 0.5941449486933527, + "learning_rate": 3.0936633492695734e-06, + "loss": 0.3177, + "step": 18827 + }, + { + "epoch": 0.8819974703705439, + "grad_norm": 0.5953259055786149, + "learning_rate": 3.0934791221627993e-06, + "loss": 0.3358, + "step": 18828 + }, + { + "epoch": 0.8820443153604722, + "grad_norm": 0.5076714524380318, + "learning_rate": 3.0932948916407024e-06, + "loss": 0.298, + "step": 18829 + }, + { + "epoch": 0.8820911603504005, + "grad_norm": 0.5776209185102135, + "learning_rate": 3.0931106577043423e-06, + "loss": 0.3146, + "step": 18830 + }, + { + "epoch": 0.8821380053403288, + "grad_norm": 0.5789114077184612, + "learning_rate": 3.0929264203547787e-06, + "loss": 0.3206, + "step": 18831 + }, + { + "epoch": 0.8821848503302572, + "grad_norm": 0.5457186762207387, + "learning_rate": 3.0927421795930733e-06, + "loss": 0.2963, + "step": 18832 + }, + { + "epoch": 0.8822316953201855, + "grad_norm": 0.563818799950678, + "learning_rate": 3.092557935420286e-06, + "loss": 0.3273, + "step": 18833 + }, + { + "epoch": 0.8822785403101139, + "grad_norm": 0.6087955705647563, + "learning_rate": 3.092373687837476e-06, + "loss": 0.3274, + "step": 18834 + }, + { + "epoch": 0.8823253853000421, + "grad_norm": 0.6176427148763018, + "learning_rate": 3.0921894368457044e-06, + "loss": 0.3359, + "step": 18835 + }, + { + "epoch": 0.8823722302899705, + "grad_norm": 0.5441233318192105, + "learning_rate": 3.0920051824460316e-06, + "loss": 0.3055, + "step": 18836 + }, + { + "epoch": 0.8824190752798988, + "grad_norm": 0.581291115885075, + "learning_rate": 3.091820924639518e-06, + "loss": 0.3247, + "step": 18837 + }, + { + "epoch": 0.8824659202698272, + "grad_norm": 0.5950841540010294, + "learning_rate": 3.091636663427224e-06, + "loss": 0.33, + "step": 18838 + }, + { + "epoch": 0.8825127652597554, + "grad_norm": 0.5535633132387912, + "learning_rate": 3.0914523988102085e-06, + "loss": 0.3013, + "step": 18839 + }, + { + "epoch": 0.8825596102496838, + "grad_norm": 0.5956738973558182, + "learning_rate": 3.0912681307895344e-06, + "loss": 0.3111, + "step": 18840 + }, + { + "epoch": 0.8826064552396121, + "grad_norm": 0.5860209490689856, + "learning_rate": 3.09108385936626e-06, + "loss": 0.3194, + "step": 18841 + }, + { + "epoch": 0.8826533002295405, + "grad_norm": 0.5704056544179975, + "learning_rate": 3.0908995845414464e-06, + "loss": 0.3127, + "step": 18842 + }, + { + "epoch": 0.8827001452194688, + "grad_norm": 0.5966815205402225, + "learning_rate": 3.0907153063161544e-06, + "loss": 0.3239, + "step": 18843 + }, + { + "epoch": 0.8827469902093971, + "grad_norm": 0.5768440893985541, + "learning_rate": 3.090531024691445e-06, + "loss": 0.2953, + "step": 18844 + }, + { + "epoch": 0.8827938351993254, + "grad_norm": 0.6125499690792687, + "learning_rate": 3.0903467396683773e-06, + "loss": 0.3113, + "step": 18845 + }, + { + "epoch": 0.8828406801892538, + "grad_norm": 0.6844034056154644, + "learning_rate": 3.090162451248012e-06, + "loss": 0.3068, + "step": 18846 + }, + { + "epoch": 0.8828875251791821, + "grad_norm": 0.6684060987089552, + "learning_rate": 3.089978159431411e-06, + "loss": 0.3285, + "step": 18847 + }, + { + "epoch": 0.8829343701691104, + "grad_norm": 0.5973694053673214, + "learning_rate": 3.089793864219634e-06, + "loss": 0.313, + "step": 18848 + }, + { + "epoch": 0.8829812151590387, + "grad_norm": 0.6274234291378511, + "learning_rate": 3.089609565613741e-06, + "loss": 0.2986, + "step": 18849 + }, + { + "epoch": 0.8830280601489671, + "grad_norm": 0.5760768706116347, + "learning_rate": 3.0894252636147937e-06, + "loss": 0.3108, + "step": 18850 + }, + { + "epoch": 0.8830749051388954, + "grad_norm": 0.593437378345461, + "learning_rate": 3.089240958223852e-06, + "loss": 0.3094, + "step": 18851 + }, + { + "epoch": 0.8831217501288238, + "grad_norm": 0.5405158668734763, + "learning_rate": 3.0890566494419767e-06, + "loss": 0.307, + "step": 18852 + }, + { + "epoch": 0.883168595118752, + "grad_norm": 0.5817658626695194, + "learning_rate": 3.0888723372702278e-06, + "loss": 0.2993, + "step": 18853 + }, + { + "epoch": 0.8832154401086804, + "grad_norm": 0.5450148772306842, + "learning_rate": 3.0886880217096677e-06, + "loss": 0.3012, + "step": 18854 + }, + { + "epoch": 0.8832622850986087, + "grad_norm": 0.6260578705309267, + "learning_rate": 3.088503702761355e-06, + "loss": 0.3254, + "step": 18855 + }, + { + "epoch": 0.8833091300885371, + "grad_norm": 0.5445700292256274, + "learning_rate": 3.0883193804263516e-06, + "loss": 0.2929, + "step": 18856 + }, + { + "epoch": 0.8833559750784653, + "grad_norm": 0.5580700009647619, + "learning_rate": 3.0881350547057182e-06, + "loss": 0.3085, + "step": 18857 + }, + { + "epoch": 0.8834028200683937, + "grad_norm": 0.5920511811704946, + "learning_rate": 3.087950725600516e-06, + "loss": 0.3203, + "step": 18858 + }, + { + "epoch": 0.883449665058322, + "grad_norm": 0.570174412092438, + "learning_rate": 3.087766393111804e-06, + "loss": 0.3214, + "step": 18859 + }, + { + "epoch": 0.8834965100482504, + "grad_norm": 0.6061107542746494, + "learning_rate": 3.087582057240645e-06, + "loss": 0.3216, + "step": 18860 + }, + { + "epoch": 0.8835433550381787, + "grad_norm": 0.6235820418871597, + "learning_rate": 3.0873977179880986e-06, + "loss": 0.3206, + "step": 18861 + }, + { + "epoch": 0.883590200028107, + "grad_norm": 0.5748146208030255, + "learning_rate": 3.087213375355226e-06, + "loss": 0.3357, + "step": 18862 + }, + { + "epoch": 0.8836370450180353, + "grad_norm": 0.5509612932521705, + "learning_rate": 3.087029029343088e-06, + "loss": 0.3048, + "step": 18863 + }, + { + "epoch": 0.8836838900079637, + "grad_norm": 0.5914978973510471, + "learning_rate": 3.086844679952745e-06, + "loss": 0.3214, + "step": 18864 + }, + { + "epoch": 0.883730734997892, + "grad_norm": 0.6224662890880531, + "learning_rate": 3.0866603271852594e-06, + "loss": 0.3378, + "step": 18865 + }, + { + "epoch": 0.8837775799878202, + "grad_norm": 0.6069057308249082, + "learning_rate": 3.0864759710416907e-06, + "loss": 0.3364, + "step": 18866 + }, + { + "epoch": 0.8838244249777486, + "grad_norm": 0.5507768581965661, + "learning_rate": 3.0862916115231e-06, + "loss": 0.2951, + "step": 18867 + }, + { + "epoch": 0.883871269967677, + "grad_norm": 0.6012075830080202, + "learning_rate": 3.0861072486305487e-06, + "loss": 0.3274, + "step": 18868 + }, + { + "epoch": 0.8839181149576053, + "grad_norm": 0.6045843288729784, + "learning_rate": 3.085922882365097e-06, + "loss": 0.3244, + "step": 18869 + }, + { + "epoch": 0.8839649599475337, + "grad_norm": 0.5886301421812993, + "learning_rate": 3.0857385127278066e-06, + "loss": 0.2965, + "step": 18870 + }, + { + "epoch": 0.8840118049374619, + "grad_norm": 0.6207420493710104, + "learning_rate": 3.0855541397197386e-06, + "loss": 0.3537, + "step": 18871 + }, + { + "epoch": 0.8840586499273902, + "grad_norm": 0.6123888030745652, + "learning_rate": 3.085369763341954e-06, + "loss": 0.3209, + "step": 18872 + }, + { + "epoch": 0.8841054949173186, + "grad_norm": 0.6358508965609405, + "learning_rate": 3.0851853835955126e-06, + "loss": 0.3227, + "step": 18873 + }, + { + "epoch": 0.884152339907247, + "grad_norm": 0.5550813008043757, + "learning_rate": 3.0850010004814763e-06, + "loss": 0.2899, + "step": 18874 + }, + { + "epoch": 0.8841991848971752, + "grad_norm": 0.5820793660529576, + "learning_rate": 3.084816614000907e-06, + "loss": 0.3183, + "step": 18875 + }, + { + "epoch": 0.8842460298871035, + "grad_norm": 0.5536097889873827, + "learning_rate": 3.084632224154865e-06, + "loss": 0.2993, + "step": 18876 + }, + { + "epoch": 0.8842928748770319, + "grad_norm": 0.6711270584429629, + "learning_rate": 3.084447830944411e-06, + "loss": 0.334, + "step": 18877 + }, + { + "epoch": 0.8843397198669602, + "grad_norm": 0.5591496499106735, + "learning_rate": 3.084263434370607e-06, + "loss": 0.31, + "step": 18878 + }, + { + "epoch": 0.8843865648568886, + "grad_norm": 0.6032765003591835, + "learning_rate": 3.084079034434514e-06, + "loss": 0.34, + "step": 18879 + }, + { + "epoch": 0.8844334098468168, + "grad_norm": 0.5765888114208473, + "learning_rate": 3.0838946311371926e-06, + "loss": 0.3179, + "step": 18880 + }, + { + "epoch": 0.8844802548367452, + "grad_norm": 0.6381879408672106, + "learning_rate": 3.0837102244797044e-06, + "loss": 0.3206, + "step": 18881 + }, + { + "epoch": 0.8845270998266735, + "grad_norm": 0.5776648747549874, + "learning_rate": 3.0835258144631106e-06, + "loss": 0.3091, + "step": 18882 + }, + { + "epoch": 0.8845739448166019, + "grad_norm": 0.5274122996395695, + "learning_rate": 3.083341401088472e-06, + "loss": 0.2951, + "step": 18883 + }, + { + "epoch": 0.8846207898065301, + "grad_norm": 0.5392894099858813, + "learning_rate": 3.083156984356851e-06, + "loss": 0.288, + "step": 18884 + }, + { + "epoch": 0.8846676347964585, + "grad_norm": 0.649037606202141, + "learning_rate": 3.0829725642693077e-06, + "loss": 0.3459, + "step": 18885 + }, + { + "epoch": 0.8847144797863868, + "grad_norm": 0.6175542352311523, + "learning_rate": 3.0827881408269038e-06, + "loss": 0.3133, + "step": 18886 + }, + { + "epoch": 0.8847613247763152, + "grad_norm": 0.6113400428109279, + "learning_rate": 3.082603714030701e-06, + "loss": 0.3207, + "step": 18887 + }, + { + "epoch": 0.8848081697662435, + "grad_norm": 0.6040322074804892, + "learning_rate": 3.08241928388176e-06, + "loss": 0.3163, + "step": 18888 + }, + { + "epoch": 0.8848550147561718, + "grad_norm": 0.6055048437746163, + "learning_rate": 3.0822348503811427e-06, + "loss": 0.3235, + "step": 18889 + }, + { + "epoch": 0.8849018597461001, + "grad_norm": 0.5853403376842232, + "learning_rate": 3.0820504135299103e-06, + "loss": 0.34, + "step": 18890 + }, + { + "epoch": 0.8849487047360285, + "grad_norm": 0.5973707917138484, + "learning_rate": 3.081865973329123e-06, + "loss": 0.2973, + "step": 18891 + }, + { + "epoch": 0.8849955497259568, + "grad_norm": 0.5328904827379255, + "learning_rate": 3.081681529779844e-06, + "loss": 0.2925, + "step": 18892 + }, + { + "epoch": 0.8850423947158851, + "grad_norm": 0.5882201713658864, + "learning_rate": 3.081497082883134e-06, + "loss": 0.3313, + "step": 18893 + }, + { + "epoch": 0.8850892397058134, + "grad_norm": 0.5917563111253772, + "learning_rate": 3.0813126326400542e-06, + "loss": 0.3232, + "step": 18894 + }, + { + "epoch": 0.8851360846957418, + "grad_norm": 0.5451550784037488, + "learning_rate": 3.0811281790516663e-06, + "loss": 0.2979, + "step": 18895 + }, + { + "epoch": 0.8851829296856701, + "grad_norm": 0.5802664626682229, + "learning_rate": 3.0809437221190323e-06, + "loss": 0.3057, + "step": 18896 + }, + { + "epoch": 0.8852297746755985, + "grad_norm": 0.5795825821023268, + "learning_rate": 3.0807592618432125e-06, + "loss": 0.333, + "step": 18897 + }, + { + "epoch": 0.8852766196655267, + "grad_norm": 0.5639997827930009, + "learning_rate": 3.0805747982252693e-06, + "loss": 0.3094, + "step": 18898 + }, + { + "epoch": 0.8853234646554551, + "grad_norm": 0.5762530956511741, + "learning_rate": 3.0803903312662643e-06, + "loss": 0.3159, + "step": 18899 + }, + { + "epoch": 0.8853703096453834, + "grad_norm": 0.6262533067465017, + "learning_rate": 3.0802058609672594e-06, + "loss": 0.3148, + "step": 18900 + }, + { + "epoch": 0.8854171546353118, + "grad_norm": 0.6062764496714789, + "learning_rate": 3.0800213873293145e-06, + "loss": 0.2967, + "step": 18901 + }, + { + "epoch": 0.88546399962524, + "grad_norm": 0.6061602564327354, + "learning_rate": 3.079836910353493e-06, + "loss": 0.3255, + "step": 18902 + }, + { + "epoch": 0.8855108446151684, + "grad_norm": 0.56649917716994, + "learning_rate": 3.0796524300408553e-06, + "loss": 0.2942, + "step": 18903 + }, + { + "epoch": 0.8855576896050967, + "grad_norm": 0.5929307505244067, + "learning_rate": 3.0794679463924637e-06, + "loss": 0.3191, + "step": 18904 + }, + { + "epoch": 0.8856045345950251, + "grad_norm": 0.6523872543521254, + "learning_rate": 3.07928345940938e-06, + "loss": 0.3134, + "step": 18905 + }, + { + "epoch": 0.8856513795849534, + "grad_norm": 0.5949998232065444, + "learning_rate": 3.0790989690926652e-06, + "loss": 0.3161, + "step": 18906 + }, + { + "epoch": 0.8856982245748817, + "grad_norm": 0.6024400869064025, + "learning_rate": 3.078914475443382e-06, + "loss": 0.3348, + "step": 18907 + }, + { + "epoch": 0.88574506956481, + "grad_norm": 0.5395346703719871, + "learning_rate": 3.0787299784625913e-06, + "loss": 0.3138, + "step": 18908 + }, + { + "epoch": 0.8857919145547384, + "grad_norm": 0.5521285632124019, + "learning_rate": 3.078545478151354e-06, + "loss": 0.3267, + "step": 18909 + }, + { + "epoch": 0.8858387595446667, + "grad_norm": 0.5562205280015793, + "learning_rate": 3.078360974510734e-06, + "loss": 0.3182, + "step": 18910 + }, + { + "epoch": 0.885885604534595, + "grad_norm": 0.5492254707741593, + "learning_rate": 3.078176467541792e-06, + "loss": 0.3033, + "step": 18911 + }, + { + "epoch": 0.8859324495245233, + "grad_norm": 0.5913845475694282, + "learning_rate": 3.077991957245589e-06, + "loss": 0.3203, + "step": 18912 + }, + { + "epoch": 0.8859792945144517, + "grad_norm": 0.6399736538048383, + "learning_rate": 3.0778074436231875e-06, + "loss": 0.3228, + "step": 18913 + }, + { + "epoch": 0.88602613950438, + "grad_norm": 0.6093129587156049, + "learning_rate": 3.0776229266756506e-06, + "loss": 0.3035, + "step": 18914 + }, + { + "epoch": 0.8860729844943084, + "grad_norm": 0.5792909683658726, + "learning_rate": 3.077438406404038e-06, + "loss": 0.3256, + "step": 18915 + }, + { + "epoch": 0.8861198294842366, + "grad_norm": 0.5849720111882842, + "learning_rate": 3.0772538828094124e-06, + "loss": 0.3172, + "step": 18916 + }, + { + "epoch": 0.886166674474165, + "grad_norm": 0.5900740945342381, + "learning_rate": 3.0770693558928367e-06, + "loss": 0.3425, + "step": 18917 + }, + { + "epoch": 0.8862135194640933, + "grad_norm": 0.5987912357683188, + "learning_rate": 3.0768848256553715e-06, + "loss": 0.3042, + "step": 18918 + }, + { + "epoch": 0.8862603644540217, + "grad_norm": 0.5742823589347508, + "learning_rate": 3.0767002920980783e-06, + "loss": 0.3349, + "step": 18919 + }, + { + "epoch": 0.8863072094439499, + "grad_norm": 0.6093299956874663, + "learning_rate": 3.0765157552220202e-06, + "loss": 0.324, + "step": 18920 + }, + { + "epoch": 0.8863540544338783, + "grad_norm": 0.5847766569396791, + "learning_rate": 3.07633121502826e-06, + "loss": 0.3222, + "step": 18921 + }, + { + "epoch": 0.8864008994238066, + "grad_norm": 0.5765036813911806, + "learning_rate": 3.0761466715178574e-06, + "loss": 0.3038, + "step": 18922 + }, + { + "epoch": 0.886447744413735, + "grad_norm": 0.5544201535366574, + "learning_rate": 3.0759621246918753e-06, + "loss": 0.3015, + "step": 18923 + }, + { + "epoch": 0.8864945894036633, + "grad_norm": 0.5418226468420996, + "learning_rate": 3.075777574551377e-06, + "loss": 0.2831, + "step": 18924 + }, + { + "epoch": 0.8865414343935916, + "grad_norm": 0.5862561893135138, + "learning_rate": 3.0755930210974226e-06, + "loss": 0.3314, + "step": 18925 + }, + { + "epoch": 0.8865882793835199, + "grad_norm": 0.5570823040707893, + "learning_rate": 3.075408464331075e-06, + "loss": 0.3229, + "step": 18926 + }, + { + "epoch": 0.8866351243734483, + "grad_norm": 0.5731073381806349, + "learning_rate": 3.0752239042533964e-06, + "loss": 0.2923, + "step": 18927 + }, + { + "epoch": 0.8866819693633766, + "grad_norm": 0.5367330253743372, + "learning_rate": 3.0750393408654493e-06, + "loss": 0.2974, + "step": 18928 + }, + { + "epoch": 0.8867288143533049, + "grad_norm": 0.5800271360901333, + "learning_rate": 3.074854774168295e-06, + "loss": 0.3042, + "step": 18929 + }, + { + "epoch": 0.8867756593432332, + "grad_norm": 0.5439895526823542, + "learning_rate": 3.074670204162996e-06, + "loss": 0.3058, + "step": 18930 + }, + { + "epoch": 0.8868225043331616, + "grad_norm": 0.5684478247896937, + "learning_rate": 3.0744856308506143e-06, + "loss": 0.2993, + "step": 18931 + }, + { + "epoch": 0.8868693493230899, + "grad_norm": 0.5817423650917651, + "learning_rate": 3.074301054232212e-06, + "loss": 0.303, + "step": 18932 + }, + { + "epoch": 0.8869161943130183, + "grad_norm": 0.6463386244683819, + "learning_rate": 3.0741164743088525e-06, + "loss": 0.322, + "step": 18933 + }, + { + "epoch": 0.8869630393029465, + "grad_norm": 0.6312741402813475, + "learning_rate": 3.0739318910815956e-06, + "loss": 0.3311, + "step": 18934 + }, + { + "epoch": 0.8870098842928749, + "grad_norm": 0.6130827296758669, + "learning_rate": 3.073747304551506e-06, + "loss": 0.3397, + "step": 18935 + }, + { + "epoch": 0.8870567292828032, + "grad_norm": 0.5930364951440614, + "learning_rate": 3.0735627147196444e-06, + "loss": 0.3284, + "step": 18936 + }, + { + "epoch": 0.8871035742727316, + "grad_norm": 0.5560698413103459, + "learning_rate": 3.0733781215870733e-06, + "loss": 0.3235, + "step": 18937 + }, + { + "epoch": 0.8871504192626598, + "grad_norm": 0.609619220070655, + "learning_rate": 3.0731935251548556e-06, + "loss": 0.3195, + "step": 18938 + }, + { + "epoch": 0.8871972642525882, + "grad_norm": 0.6224965673035364, + "learning_rate": 3.0730089254240536e-06, + "loss": 0.3149, + "step": 18939 + }, + { + "epoch": 0.8872441092425165, + "grad_norm": 0.648287402193717, + "learning_rate": 3.0728243223957283e-06, + "loss": 0.3127, + "step": 18940 + }, + { + "epoch": 0.8872909542324449, + "grad_norm": 0.5645086543347584, + "learning_rate": 3.0726397160709438e-06, + "loss": 0.3128, + "step": 18941 + }, + { + "epoch": 0.8873377992223732, + "grad_norm": 0.651834612328456, + "learning_rate": 3.072455106450761e-06, + "loss": 0.3136, + "step": 18942 + }, + { + "epoch": 0.8873846442123015, + "grad_norm": 0.5533974695468397, + "learning_rate": 3.0722704935362437e-06, + "loss": 0.3138, + "step": 18943 + }, + { + "epoch": 0.8874314892022298, + "grad_norm": 0.58793465148873, + "learning_rate": 3.0720858773284524e-06, + "loss": 0.3283, + "step": 18944 + }, + { + "epoch": 0.8874783341921582, + "grad_norm": 0.5959776799552015, + "learning_rate": 3.0719012578284524e-06, + "loss": 0.3142, + "step": 18945 + }, + { + "epoch": 0.8875251791820865, + "grad_norm": 0.6417169811711759, + "learning_rate": 3.071716635037303e-06, + "loss": 0.3253, + "step": 18946 + }, + { + "epoch": 0.8875720241720148, + "grad_norm": 0.5836681975398693, + "learning_rate": 3.071532008956068e-06, + "loss": 0.315, + "step": 18947 + }, + { + "epoch": 0.8876188691619431, + "grad_norm": 0.5830651225669936, + "learning_rate": 3.0713473795858107e-06, + "loss": 0.3066, + "step": 18948 + }, + { + "epoch": 0.8876657141518715, + "grad_norm": 0.5377818263911828, + "learning_rate": 3.071162746927593e-06, + "loss": 0.298, + "step": 18949 + }, + { + "epoch": 0.8877125591417998, + "grad_norm": 0.6204829366575451, + "learning_rate": 3.070978110982476e-06, + "loss": 0.3306, + "step": 18950 + }, + { + "epoch": 0.8877594041317282, + "grad_norm": 0.5788272423259933, + "learning_rate": 3.070793471751525e-06, + "loss": 0.3124, + "step": 18951 + }, + { + "epoch": 0.8878062491216564, + "grad_norm": 0.5851837266063293, + "learning_rate": 3.0706088292358e-06, + "loss": 0.339, + "step": 18952 + }, + { + "epoch": 0.8878530941115848, + "grad_norm": 0.5883036045684493, + "learning_rate": 3.0704241834363647e-06, + "loss": 0.3006, + "step": 18953 + }, + { + "epoch": 0.8878999391015131, + "grad_norm": 0.6476422929892609, + "learning_rate": 3.0702395343542816e-06, + "loss": 0.3279, + "step": 18954 + }, + { + "epoch": 0.8879467840914415, + "grad_norm": 0.5990003028813415, + "learning_rate": 3.0700548819906133e-06, + "loss": 0.3028, + "step": 18955 + }, + { + "epoch": 0.8879936290813697, + "grad_norm": 0.6116321553753484, + "learning_rate": 3.069870226346423e-06, + "loss": 0.3027, + "step": 18956 + }, + { + "epoch": 0.888040474071298, + "grad_norm": 0.6143757869077238, + "learning_rate": 3.0696855674227726e-06, + "loss": 0.3141, + "step": 18957 + }, + { + "epoch": 0.8880873190612264, + "grad_norm": 0.5672871791051409, + "learning_rate": 3.0695009052207247e-06, + "loss": 0.3234, + "step": 18958 + }, + { + "epoch": 0.8881341640511548, + "grad_norm": 0.5641485966923374, + "learning_rate": 3.0693162397413424e-06, + "loss": 0.297, + "step": 18959 + }, + { + "epoch": 0.8881810090410831, + "grad_norm": 0.5215408396160357, + "learning_rate": 3.0691315709856884e-06, + "loss": 0.2954, + "step": 18960 + }, + { + "epoch": 0.8882278540310113, + "grad_norm": 0.589038839156227, + "learning_rate": 3.0689468989548243e-06, + "loss": 0.3177, + "step": 18961 + }, + { + "epoch": 0.8882746990209397, + "grad_norm": 0.6489157409416606, + "learning_rate": 3.068762223649815e-06, + "loss": 0.3343, + "step": 18962 + }, + { + "epoch": 0.888321544010868, + "grad_norm": 0.5927741292274376, + "learning_rate": 3.0685775450717216e-06, + "loss": 0.3402, + "step": 18963 + }, + { + "epoch": 0.8883683890007964, + "grad_norm": 0.5936540123637122, + "learning_rate": 3.068392863221607e-06, + "loss": 0.3239, + "step": 18964 + }, + { + "epoch": 0.8884152339907246, + "grad_norm": 0.6038970380452001, + "learning_rate": 3.0682081781005343e-06, + "loss": 0.323, + "step": 18965 + }, + { + "epoch": 0.888462078980653, + "grad_norm": 0.6098041582829404, + "learning_rate": 3.068023489709567e-06, + "loss": 0.3296, + "step": 18966 + }, + { + "epoch": 0.8885089239705813, + "grad_norm": 0.6154754501880947, + "learning_rate": 3.067838798049767e-06, + "loss": 0.3411, + "step": 18967 + }, + { + "epoch": 0.8885557689605097, + "grad_norm": 0.5627255879396862, + "learning_rate": 3.0676541031221967e-06, + "loss": 0.3023, + "step": 18968 + }, + { + "epoch": 0.888602613950438, + "grad_norm": 0.6067466338097044, + "learning_rate": 3.0674694049279206e-06, + "loss": 0.3126, + "step": 18969 + }, + { + "epoch": 0.8886494589403663, + "grad_norm": 0.5899927176046335, + "learning_rate": 3.067284703468001e-06, + "loss": 0.3371, + "step": 18970 + }, + { + "epoch": 0.8886963039302946, + "grad_norm": 0.5846289226797685, + "learning_rate": 3.0670999987434997e-06, + "loss": 0.3312, + "step": 18971 + }, + { + "epoch": 0.888743148920223, + "grad_norm": 0.6438165958614129, + "learning_rate": 3.0669152907554805e-06, + "loss": 0.3208, + "step": 18972 + }, + { + "epoch": 0.8887899939101513, + "grad_norm": 0.5868364048354259, + "learning_rate": 3.0667305795050073e-06, + "loss": 0.3181, + "step": 18973 + }, + { + "epoch": 0.8888368389000796, + "grad_norm": 0.5902990139852149, + "learning_rate": 3.0665458649931408e-06, + "loss": 0.32, + "step": 18974 + }, + { + "epoch": 0.8888836838900079, + "grad_norm": 0.5854086319268494, + "learning_rate": 3.0663611472209458e-06, + "loss": 0.3125, + "step": 18975 + }, + { + "epoch": 0.8889305288799363, + "grad_norm": 0.650338222941491, + "learning_rate": 3.0661764261894844e-06, + "loss": 0.32, + "step": 18976 + }, + { + "epoch": 0.8889773738698646, + "grad_norm": 0.5811714893235762, + "learning_rate": 3.0659917018998203e-06, + "loss": 0.3038, + "step": 18977 + }, + { + "epoch": 0.889024218859793, + "grad_norm": 0.5784927372017263, + "learning_rate": 3.0658069743530163e-06, + "loss": 0.3201, + "step": 18978 + }, + { + "epoch": 0.8890710638497212, + "grad_norm": 0.5836998487141659, + "learning_rate": 3.065622243550135e-06, + "loss": 0.3297, + "step": 18979 + }, + { + "epoch": 0.8891179088396496, + "grad_norm": 0.5874557211672659, + "learning_rate": 3.06543750949224e-06, + "loss": 0.3075, + "step": 18980 + }, + { + "epoch": 0.8891647538295779, + "grad_norm": 0.6258446654586566, + "learning_rate": 3.065252772180395e-06, + "loss": 0.3153, + "step": 18981 + }, + { + "epoch": 0.8892115988195063, + "grad_norm": 0.614580664275337, + "learning_rate": 3.065068031615661e-06, + "loss": 0.3324, + "step": 18982 + }, + { + "epoch": 0.8892584438094345, + "grad_norm": 0.5626952080389915, + "learning_rate": 3.064883287799103e-06, + "loss": 0.2995, + "step": 18983 + }, + { + "epoch": 0.8893052887993629, + "grad_norm": 0.6081785940504052, + "learning_rate": 3.0646985407317846e-06, + "loss": 0.3104, + "step": 18984 + }, + { + "epoch": 0.8893521337892912, + "grad_norm": 0.6179307264082183, + "learning_rate": 3.064513790414767e-06, + "loss": 0.321, + "step": 18985 + }, + { + "epoch": 0.8893989787792196, + "grad_norm": 0.5648504098859104, + "learning_rate": 3.064329036849114e-06, + "loss": 0.3236, + "step": 18986 + }, + { + "epoch": 0.8894458237691479, + "grad_norm": 0.5844963793978096, + "learning_rate": 3.0641442800358906e-06, + "loss": 0.3212, + "step": 18987 + }, + { + "epoch": 0.8894926687590762, + "grad_norm": 0.6516732265237878, + "learning_rate": 3.063959519976158e-06, + "loss": 0.2889, + "step": 18988 + }, + { + "epoch": 0.8895395137490045, + "grad_norm": 0.6427899269811748, + "learning_rate": 3.06377475667098e-06, + "loss": 0.339, + "step": 18989 + }, + { + "epoch": 0.8895863587389329, + "grad_norm": 0.6202381585874165, + "learning_rate": 3.06358999012142e-06, + "loss": 0.3203, + "step": 18990 + }, + { + "epoch": 0.8896332037288612, + "grad_norm": 0.5729640967783913, + "learning_rate": 3.063405220328542e-06, + "loss": 0.3087, + "step": 18991 + }, + { + "epoch": 0.8896800487187895, + "grad_norm": 0.5992459683431203, + "learning_rate": 3.0632204472934075e-06, + "loss": 0.3348, + "step": 18992 + }, + { + "epoch": 0.8897268937087178, + "grad_norm": 0.5851328555849165, + "learning_rate": 3.0630356710170805e-06, + "loss": 0.3152, + "step": 18993 + }, + { + "epoch": 0.8897737386986462, + "grad_norm": 0.5669238483368676, + "learning_rate": 3.0628508915006267e-06, + "loss": 0.319, + "step": 18994 + }, + { + "epoch": 0.8898205836885745, + "grad_norm": 0.591041815544659, + "learning_rate": 3.062666108745106e-06, + "loss": 0.2899, + "step": 18995 + }, + { + "epoch": 0.8898674286785029, + "grad_norm": 0.5705920948181608, + "learning_rate": 3.062481322751583e-06, + "loss": 0.32, + "step": 18996 + }, + { + "epoch": 0.8899142736684311, + "grad_norm": 0.6087941910660329, + "learning_rate": 3.0622965335211217e-06, + "loss": 0.3211, + "step": 18997 + }, + { + "epoch": 0.8899611186583595, + "grad_norm": 0.6205777630459093, + "learning_rate": 3.062111741054786e-06, + "loss": 0.3242, + "step": 18998 + }, + { + "epoch": 0.8900079636482878, + "grad_norm": 0.6001680176774166, + "learning_rate": 3.0619269453536376e-06, + "loss": 0.3269, + "step": 18999 + }, + { + "epoch": 0.8900548086382162, + "grad_norm": 0.5984915931838406, + "learning_rate": 3.0617421464187414e-06, + "loss": 0.3174, + "step": 19000 + }, + { + "epoch": 0.8901016536281444, + "grad_norm": 0.6060191196136034, + "learning_rate": 3.0615573442511604e-06, + "loss": 0.3109, + "step": 19001 + }, + { + "epoch": 0.8901484986180728, + "grad_norm": 0.5532341624590864, + "learning_rate": 3.0613725388519573e-06, + "loss": 0.2987, + "step": 19002 + }, + { + "epoch": 0.8901953436080011, + "grad_norm": 0.5807438340766251, + "learning_rate": 3.0611877302221973e-06, + "loss": 0.3243, + "step": 19003 + }, + { + "epoch": 0.8902421885979295, + "grad_norm": 0.6123425705153631, + "learning_rate": 3.061002918362942e-06, + "loss": 0.3209, + "step": 19004 + }, + { + "epoch": 0.8902890335878578, + "grad_norm": 0.6158052091178411, + "learning_rate": 3.0608181032752566e-06, + "loss": 0.3227, + "step": 19005 + }, + { + "epoch": 0.8903358785777861, + "grad_norm": 0.5799983920130952, + "learning_rate": 3.0606332849602038e-06, + "loss": 0.2967, + "step": 19006 + }, + { + "epoch": 0.8903827235677144, + "grad_norm": 0.5692130673530722, + "learning_rate": 3.060448463418847e-06, + "loss": 0.3152, + "step": 19007 + }, + { + "epoch": 0.8904295685576428, + "grad_norm": 0.623071080090104, + "learning_rate": 3.0602636386522507e-06, + "loss": 0.3326, + "step": 19008 + }, + { + "epoch": 0.8904764135475711, + "grad_norm": 0.6093338610561704, + "learning_rate": 3.0600788106614774e-06, + "loss": 0.3312, + "step": 19009 + }, + { + "epoch": 0.8905232585374994, + "grad_norm": 0.612082019030023, + "learning_rate": 3.0598939794475917e-06, + "loss": 0.3006, + "step": 19010 + }, + { + "epoch": 0.8905701035274277, + "grad_norm": 0.5705436963602925, + "learning_rate": 3.0597091450116566e-06, + "loss": 0.3191, + "step": 19011 + }, + { + "epoch": 0.8906169485173561, + "grad_norm": 0.6387942744440902, + "learning_rate": 3.059524307354737e-06, + "loss": 0.3287, + "step": 19012 + }, + { + "epoch": 0.8906637935072844, + "grad_norm": 0.5570558705040465, + "learning_rate": 3.059339466477894e-06, + "loss": 0.2961, + "step": 19013 + }, + { + "epoch": 0.8907106384972128, + "grad_norm": 0.5994386000686251, + "learning_rate": 3.0591546223821938e-06, + "loss": 0.337, + "step": 19014 + }, + { + "epoch": 0.890757483487141, + "grad_norm": 0.6033833421015493, + "learning_rate": 3.0589697750687003e-06, + "loss": 0.3183, + "step": 19015 + }, + { + "epoch": 0.8908043284770694, + "grad_norm": 0.5530327169425721, + "learning_rate": 3.0587849245384747e-06, + "loss": 0.3256, + "step": 19016 + }, + { + "epoch": 0.8908511734669977, + "grad_norm": 0.6101547785770812, + "learning_rate": 3.058600070792583e-06, + "loss": 0.3184, + "step": 19017 + }, + { + "epoch": 0.8908980184569261, + "grad_norm": 0.6100483496896575, + "learning_rate": 3.0584152138320877e-06, + "loss": 0.2961, + "step": 19018 + }, + { + "epoch": 0.8909448634468543, + "grad_norm": 0.6237485592831308, + "learning_rate": 3.0582303536580545e-06, + "loss": 0.3323, + "step": 19019 + }, + { + "epoch": 0.8909917084367827, + "grad_norm": 0.5874241721934271, + "learning_rate": 3.0580454902715446e-06, + "loss": 0.3004, + "step": 19020 + }, + { + "epoch": 0.891038553426711, + "grad_norm": 0.6347117075682267, + "learning_rate": 3.0578606236736237e-06, + "loss": 0.3285, + "step": 19021 + }, + { + "epoch": 0.8910853984166394, + "grad_norm": 0.5897303293256515, + "learning_rate": 3.057675753865355e-06, + "loss": 0.3247, + "step": 19022 + }, + { + "epoch": 0.8911322434065677, + "grad_norm": 0.5986534443996209, + "learning_rate": 3.057490880847803e-06, + "loss": 0.3252, + "step": 19023 + }, + { + "epoch": 0.891179088396496, + "grad_norm": 0.5746910079184111, + "learning_rate": 3.0573060046220306e-06, + "loss": 0.316, + "step": 19024 + }, + { + "epoch": 0.8912259333864243, + "grad_norm": 0.6690742443969361, + "learning_rate": 3.057121125189102e-06, + "loss": 0.3343, + "step": 19025 + }, + { + "epoch": 0.8912727783763527, + "grad_norm": 0.57795787707093, + "learning_rate": 3.0569362425500813e-06, + "loss": 0.3151, + "step": 19026 + }, + { + "epoch": 0.891319623366281, + "grad_norm": 0.5970781984436039, + "learning_rate": 3.056751356706033e-06, + "loss": 0.3113, + "step": 19027 + }, + { + "epoch": 0.8913664683562093, + "grad_norm": 0.6568686615131029, + "learning_rate": 3.05656646765802e-06, + "loss": 0.3246, + "step": 19028 + }, + { + "epoch": 0.8914133133461376, + "grad_norm": 0.5624358357490115, + "learning_rate": 3.056381575407108e-06, + "loss": 0.2918, + "step": 19029 + }, + { + "epoch": 0.891460158336066, + "grad_norm": 0.572663411895746, + "learning_rate": 3.056196679954359e-06, + "loss": 0.3154, + "step": 19030 + }, + { + "epoch": 0.8915070033259943, + "grad_norm": 0.5216205460460891, + "learning_rate": 3.0560117813008376e-06, + "loss": 0.3008, + "step": 19031 + }, + { + "epoch": 0.8915538483159227, + "grad_norm": 0.5494254132698586, + "learning_rate": 3.0558268794476093e-06, + "loss": 0.3136, + "step": 19032 + }, + { + "epoch": 0.8916006933058509, + "grad_norm": 0.6196387488687061, + "learning_rate": 3.0556419743957368e-06, + "loss": 0.3135, + "step": 19033 + }, + { + "epoch": 0.8916475382957793, + "grad_norm": 0.5449471836740861, + "learning_rate": 3.0554570661462836e-06, + "loss": 0.2964, + "step": 19034 + }, + { + "epoch": 0.8916943832857076, + "grad_norm": 0.552928256045457, + "learning_rate": 3.055272154700314e-06, + "loss": 0.3203, + "step": 19035 + }, + { + "epoch": 0.891741228275636, + "grad_norm": 0.6039674431547251, + "learning_rate": 3.0550872400588943e-06, + "loss": 0.339, + "step": 19036 + }, + { + "epoch": 0.8917880732655642, + "grad_norm": 0.6008668273067682, + "learning_rate": 3.054902322223086e-06, + "loss": 0.3159, + "step": 19037 + }, + { + "epoch": 0.8918349182554925, + "grad_norm": 0.5881056376849805, + "learning_rate": 3.0547174011939545e-06, + "loss": 0.3211, + "step": 19038 + }, + { + "epoch": 0.8918817632454209, + "grad_norm": 0.5520649857605616, + "learning_rate": 3.0545324769725643e-06, + "loss": 0.2975, + "step": 19039 + }, + { + "epoch": 0.8919286082353493, + "grad_norm": 0.6089018674002824, + "learning_rate": 3.054347549559979e-06, + "loss": 0.3336, + "step": 19040 + }, + { + "epoch": 0.8919754532252776, + "grad_norm": 0.5898686167830621, + "learning_rate": 3.054162618957262e-06, + "loss": 0.3335, + "step": 19041 + }, + { + "epoch": 0.8920222982152058, + "grad_norm": 0.5564771352875411, + "learning_rate": 3.0539776851654795e-06, + "loss": 0.305, + "step": 19042 + }, + { + "epoch": 0.8920691432051342, + "grad_norm": 0.584701231938981, + "learning_rate": 3.0537927481856944e-06, + "loss": 0.3001, + "step": 19043 + }, + { + "epoch": 0.8921159881950625, + "grad_norm": 0.5708777885961956, + "learning_rate": 3.053607808018971e-06, + "loss": 0.3295, + "step": 19044 + }, + { + "epoch": 0.8921628331849909, + "grad_norm": 0.5870313874207185, + "learning_rate": 3.0534228646663733e-06, + "loss": 0.3267, + "step": 19045 + }, + { + "epoch": 0.8922096781749191, + "grad_norm": 0.6162233938886059, + "learning_rate": 3.0532379181289666e-06, + "loss": 0.3462, + "step": 19046 + }, + { + "epoch": 0.8922565231648475, + "grad_norm": 0.55140544182993, + "learning_rate": 3.0530529684078147e-06, + "loss": 0.3081, + "step": 19047 + }, + { + "epoch": 0.8923033681547758, + "grad_norm": 0.6151235038237006, + "learning_rate": 3.0528680155039826e-06, + "loss": 0.3224, + "step": 19048 + }, + { + "epoch": 0.8923502131447042, + "grad_norm": 0.6092379459833114, + "learning_rate": 3.052683059418533e-06, + "loss": 0.3285, + "step": 19049 + }, + { + "epoch": 0.8923970581346325, + "grad_norm": 0.5950018723696162, + "learning_rate": 3.052498100152532e-06, + "loss": 0.3269, + "step": 19050 + }, + { + "epoch": 0.8924439031245608, + "grad_norm": 0.617405063468353, + "learning_rate": 3.052313137707043e-06, + "loss": 0.3403, + "step": 19051 + }, + { + "epoch": 0.8924907481144891, + "grad_norm": 0.6072693069365065, + "learning_rate": 3.052128172083131e-06, + "loss": 0.3368, + "step": 19052 + }, + { + "epoch": 0.8925375931044175, + "grad_norm": 0.6503335939446606, + "learning_rate": 3.051943203281859e-06, + "loss": 0.3363, + "step": 19053 + }, + { + "epoch": 0.8925844380943458, + "grad_norm": 0.5601267404231993, + "learning_rate": 3.0517582313042936e-06, + "loss": 0.3023, + "step": 19054 + }, + { + "epoch": 0.8926312830842741, + "grad_norm": 0.5207627220741529, + "learning_rate": 3.0515732561514987e-06, + "loss": 0.2945, + "step": 19055 + }, + { + "epoch": 0.8926781280742024, + "grad_norm": 0.5580424359036308, + "learning_rate": 3.0513882778245373e-06, + "loss": 0.3053, + "step": 19056 + }, + { + "epoch": 0.8927249730641308, + "grad_norm": 0.5864050190253951, + "learning_rate": 3.0512032963244757e-06, + "loss": 0.3176, + "step": 19057 + }, + { + "epoch": 0.8927718180540591, + "grad_norm": 0.580327421513939, + "learning_rate": 3.0510183116523777e-06, + "loss": 0.3086, + "step": 19058 + }, + { + "epoch": 0.8928186630439875, + "grad_norm": 0.561388741412369, + "learning_rate": 3.0508333238093073e-06, + "loss": 0.3071, + "step": 19059 + }, + { + "epoch": 0.8928655080339157, + "grad_norm": 0.5772429799000683, + "learning_rate": 3.05064833279633e-06, + "loss": 0.2952, + "step": 19060 + }, + { + "epoch": 0.8929123530238441, + "grad_norm": 0.6526961362047842, + "learning_rate": 3.0504633386145097e-06, + "loss": 0.3253, + "step": 19061 + }, + { + "epoch": 0.8929591980137724, + "grad_norm": 0.6696997546746782, + "learning_rate": 3.050278341264911e-06, + "loss": 0.3264, + "step": 19062 + }, + { + "epoch": 0.8930060430037008, + "grad_norm": 0.5638210868172813, + "learning_rate": 3.050093340748599e-06, + "loss": 0.325, + "step": 19063 + }, + { + "epoch": 0.893052887993629, + "grad_norm": 0.630839978928046, + "learning_rate": 3.049908337066639e-06, + "loss": 0.3202, + "step": 19064 + }, + { + "epoch": 0.8930997329835574, + "grad_norm": 0.5645830976603103, + "learning_rate": 3.0497233302200934e-06, + "loss": 0.3146, + "step": 19065 + }, + { + "epoch": 0.8931465779734857, + "grad_norm": 0.6235066602152746, + "learning_rate": 3.049538320210028e-06, + "loss": 0.3102, + "step": 19066 + }, + { + "epoch": 0.8931934229634141, + "grad_norm": 0.6055185663376956, + "learning_rate": 3.049353307037509e-06, + "loss": 0.3319, + "step": 19067 + }, + { + "epoch": 0.8932402679533424, + "grad_norm": 0.5924980183014805, + "learning_rate": 3.0491682907035993e-06, + "loss": 0.33, + "step": 19068 + }, + { + "epoch": 0.8932871129432707, + "grad_norm": 0.5289442069488787, + "learning_rate": 3.048983271209363e-06, + "loss": 0.3044, + "step": 19069 + }, + { + "epoch": 0.893333957933199, + "grad_norm": 0.5855794739779823, + "learning_rate": 3.0487982485558674e-06, + "loss": 0.3257, + "step": 19070 + }, + { + "epoch": 0.8933808029231274, + "grad_norm": 0.5461384523970644, + "learning_rate": 3.0486132227441754e-06, + "loss": 0.2987, + "step": 19071 + }, + { + "epoch": 0.8934276479130557, + "grad_norm": 0.5984509945425553, + "learning_rate": 3.048428193775352e-06, + "loss": 0.3078, + "step": 19072 + }, + { + "epoch": 0.893474492902984, + "grad_norm": 0.5805216995326471, + "learning_rate": 3.0482431616504625e-06, + "loss": 0.324, + "step": 19073 + }, + { + "epoch": 0.8935213378929123, + "grad_norm": 0.5702994431844959, + "learning_rate": 3.048058126370571e-06, + "loss": 0.3044, + "step": 19074 + }, + { + "epoch": 0.8935681828828407, + "grad_norm": 0.6144104201204804, + "learning_rate": 3.047873087936743e-06, + "loss": 0.3163, + "step": 19075 + }, + { + "epoch": 0.893615027872769, + "grad_norm": 0.5744594230558756, + "learning_rate": 3.047688046350043e-06, + "loss": 0.3119, + "step": 19076 + }, + { + "epoch": 0.8936618728626974, + "grad_norm": 0.575395447580257, + "learning_rate": 3.047503001611536e-06, + "loss": 0.3185, + "step": 19077 + }, + { + "epoch": 0.8937087178526256, + "grad_norm": 0.5919917864924213, + "learning_rate": 3.047317953722287e-06, + "loss": 0.3146, + "step": 19078 + }, + { + "epoch": 0.893755562842554, + "grad_norm": 0.6024023516580296, + "learning_rate": 3.0471329026833605e-06, + "loss": 0.3116, + "step": 19079 + }, + { + "epoch": 0.8938024078324823, + "grad_norm": 0.5679117377582555, + "learning_rate": 3.0469478484958217e-06, + "loss": 0.3103, + "step": 19080 + }, + { + "epoch": 0.8938492528224107, + "grad_norm": 0.601525616868127, + "learning_rate": 3.0467627911607355e-06, + "loss": 0.3138, + "step": 19081 + }, + { + "epoch": 0.8938960978123389, + "grad_norm": 1.3650552691392939, + "learning_rate": 3.046577730679167e-06, + "loss": 0.3065, + "step": 19082 + }, + { + "epoch": 0.8939429428022673, + "grad_norm": 0.5550225539250085, + "learning_rate": 3.0463926670521806e-06, + "loss": 0.2959, + "step": 19083 + }, + { + "epoch": 0.8939897877921956, + "grad_norm": 0.597783383122267, + "learning_rate": 3.0462076002808416e-06, + "loss": 0.3076, + "step": 19084 + }, + { + "epoch": 0.894036632782124, + "grad_norm": 0.5795029354059796, + "learning_rate": 3.0460225303662167e-06, + "loss": 0.3129, + "step": 19085 + }, + { + "epoch": 0.8940834777720523, + "grad_norm": 0.5872064115528337, + "learning_rate": 3.0458374573093673e-06, + "loss": 0.3181, + "step": 19086 + }, + { + "epoch": 0.8941303227619806, + "grad_norm": 0.6151105699121475, + "learning_rate": 3.0456523811113614e-06, + "loss": 0.3259, + "step": 19087 + }, + { + "epoch": 0.8941771677519089, + "grad_norm": 0.6216968134013314, + "learning_rate": 3.0454673017732638e-06, + "loss": 0.3279, + "step": 19088 + }, + { + "epoch": 0.8942240127418373, + "grad_norm": 0.5948559582445078, + "learning_rate": 3.0452822192961382e-06, + "loss": 0.334, + "step": 19089 + }, + { + "epoch": 0.8942708577317656, + "grad_norm": 0.5576706947866359, + "learning_rate": 3.045097133681051e-06, + "loss": 0.3185, + "step": 19090 + }, + { + "epoch": 0.8943177027216939, + "grad_norm": 0.5841858842273536, + "learning_rate": 3.0449120449290663e-06, + "loss": 0.3221, + "step": 19091 + }, + { + "epoch": 0.8943645477116222, + "grad_norm": 0.5843086406714323, + "learning_rate": 3.0447269530412506e-06, + "loss": 0.3351, + "step": 19092 + }, + { + "epoch": 0.8944113927015506, + "grad_norm": 0.5553747711977369, + "learning_rate": 3.044541858018667e-06, + "loss": 0.3198, + "step": 19093 + }, + { + "epoch": 0.8944582376914789, + "grad_norm": 0.5690525367792828, + "learning_rate": 3.0443567598623824e-06, + "loss": 0.3152, + "step": 19094 + }, + { + "epoch": 0.8945050826814073, + "grad_norm": 0.5785416563939147, + "learning_rate": 3.0441716585734615e-06, + "loss": 0.3212, + "step": 19095 + }, + { + "epoch": 0.8945519276713355, + "grad_norm": 0.6242556575193051, + "learning_rate": 3.0439865541529696e-06, + "loss": 0.3132, + "step": 19096 + }, + { + "epoch": 0.8945987726612639, + "grad_norm": 0.5554930329594961, + "learning_rate": 3.0438014466019714e-06, + "loss": 0.3122, + "step": 19097 + }, + { + "epoch": 0.8946456176511922, + "grad_norm": 0.6391137473399994, + "learning_rate": 3.0436163359215333e-06, + "loss": 0.3538, + "step": 19098 + }, + { + "epoch": 0.8946924626411206, + "grad_norm": 0.6222070151759437, + "learning_rate": 3.0434312221127192e-06, + "loss": 0.3434, + "step": 19099 + }, + { + "epoch": 0.8947393076310488, + "grad_norm": 0.5471966804127215, + "learning_rate": 3.0432461051765956e-06, + "loss": 0.3024, + "step": 19100 + }, + { + "epoch": 0.8947861526209772, + "grad_norm": 0.6019240283507394, + "learning_rate": 3.043060985114226e-06, + "loss": 0.3103, + "step": 19101 + }, + { + "epoch": 0.8948329976109055, + "grad_norm": 0.6744522917677314, + "learning_rate": 3.042875861926678e-06, + "loss": 0.336, + "step": 19102 + }, + { + "epoch": 0.8948798426008339, + "grad_norm": 0.5808248475983562, + "learning_rate": 3.042690735615016e-06, + "loss": 0.3051, + "step": 19103 + }, + { + "epoch": 0.8949266875907622, + "grad_norm": 0.5410580447414682, + "learning_rate": 3.0425056061803044e-06, + "loss": 0.2935, + "step": 19104 + }, + { + "epoch": 0.8949735325806905, + "grad_norm": 0.5689108316407046, + "learning_rate": 3.0423204736236095e-06, + "loss": 0.3138, + "step": 19105 + }, + { + "epoch": 0.8950203775706188, + "grad_norm": 0.6091826790832967, + "learning_rate": 3.042135337945997e-06, + "loss": 0.33, + "step": 19106 + }, + { + "epoch": 0.8950672225605472, + "grad_norm": 0.5878157954654465, + "learning_rate": 3.0419501991485316e-06, + "loss": 0.3219, + "step": 19107 + }, + { + "epoch": 0.8951140675504755, + "grad_norm": 0.6071407531080983, + "learning_rate": 3.041765057232279e-06, + "loss": 0.3233, + "step": 19108 + }, + { + "epoch": 0.8951609125404038, + "grad_norm": 0.5922546427539692, + "learning_rate": 3.0415799121983046e-06, + "loss": 0.312, + "step": 19109 + }, + { + "epoch": 0.8952077575303321, + "grad_norm": 0.5673481769954617, + "learning_rate": 3.0413947640476747e-06, + "loss": 0.3223, + "step": 19110 + }, + { + "epoch": 0.8952546025202605, + "grad_norm": 0.6543820498588702, + "learning_rate": 3.0412096127814535e-06, + "loss": 0.3453, + "step": 19111 + }, + { + "epoch": 0.8953014475101888, + "grad_norm": 0.5982951134693559, + "learning_rate": 3.041024458400707e-06, + "loss": 0.3147, + "step": 19112 + }, + { + "epoch": 0.8953482925001172, + "grad_norm": 0.6047861838565142, + "learning_rate": 3.0408393009065006e-06, + "loss": 0.3361, + "step": 19113 + }, + { + "epoch": 0.8953951374900454, + "grad_norm": 0.6517121374346477, + "learning_rate": 3.0406541402998997e-06, + "loss": 0.3429, + "step": 19114 + }, + { + "epoch": 0.8954419824799738, + "grad_norm": 0.5899538381323578, + "learning_rate": 3.0404689765819706e-06, + "loss": 0.3107, + "step": 19115 + }, + { + "epoch": 0.8954888274699021, + "grad_norm": 0.5369401050605103, + "learning_rate": 3.040283809753778e-06, + "loss": 0.3218, + "step": 19116 + }, + { + "epoch": 0.8955356724598305, + "grad_norm": 0.5716128603438377, + "learning_rate": 3.0400986398163883e-06, + "loss": 0.3332, + "step": 19117 + }, + { + "epoch": 0.8955825174497587, + "grad_norm": 0.5487653731227624, + "learning_rate": 3.0399134667708667e-06, + "loss": 0.3056, + "step": 19118 + }, + { + "epoch": 0.895629362439687, + "grad_norm": 0.5473860129045661, + "learning_rate": 3.0397282906182784e-06, + "loss": 0.2963, + "step": 19119 + }, + { + "epoch": 0.8956762074296154, + "grad_norm": 0.6331844834716621, + "learning_rate": 3.0395431113596897e-06, + "loss": 0.3235, + "step": 19120 + }, + { + "epoch": 0.8957230524195438, + "grad_norm": 0.5719545678951279, + "learning_rate": 3.039357928996166e-06, + "loss": 0.3196, + "step": 19121 + }, + { + "epoch": 0.8957698974094721, + "grad_norm": 0.5455433192257042, + "learning_rate": 3.0391727435287726e-06, + "loss": 0.3159, + "step": 19122 + }, + { + "epoch": 0.8958167423994003, + "grad_norm": 0.6264111895791729, + "learning_rate": 3.0389875549585757e-06, + "loss": 0.3316, + "step": 19123 + }, + { + "epoch": 0.8958635873893287, + "grad_norm": 0.5983325489784109, + "learning_rate": 3.0388023632866413e-06, + "loss": 0.3367, + "step": 19124 + }, + { + "epoch": 0.895910432379257, + "grad_norm": 0.6178385316453374, + "learning_rate": 3.0386171685140347e-06, + "loss": 0.3274, + "step": 19125 + }, + { + "epoch": 0.8959572773691854, + "grad_norm": 0.576584526576553, + "learning_rate": 3.0384319706418207e-06, + "loss": 0.3249, + "step": 19126 + }, + { + "epoch": 0.8960041223591136, + "grad_norm": 0.6661298866758655, + "learning_rate": 3.038246769671067e-06, + "loss": 0.3323, + "step": 19127 + }, + { + "epoch": 0.896050967349042, + "grad_norm": 0.5725282861878197, + "learning_rate": 3.038061565602839e-06, + "loss": 0.327, + "step": 19128 + }, + { + "epoch": 0.8960978123389703, + "grad_norm": 0.6041882529816075, + "learning_rate": 3.0378763584382004e-06, + "loss": 0.3135, + "step": 19129 + }, + { + "epoch": 0.8961446573288987, + "grad_norm": 0.5876688274089376, + "learning_rate": 3.0376911481782194e-06, + "loss": 0.3254, + "step": 19130 + }, + { + "epoch": 0.896191502318827, + "grad_norm": 0.5970851801274955, + "learning_rate": 3.0375059348239617e-06, + "loss": 0.3203, + "step": 19131 + }, + { + "epoch": 0.8962383473087553, + "grad_norm": 0.6103236929630321, + "learning_rate": 3.0373207183764906e-06, + "loss": 0.3124, + "step": 19132 + }, + { + "epoch": 0.8962851922986836, + "grad_norm": 0.5690179973652317, + "learning_rate": 3.0371354988368752e-06, + "loss": 0.2915, + "step": 19133 + }, + { + "epoch": 0.896332037288612, + "grad_norm": 0.570085115009912, + "learning_rate": 3.0369502762061805e-06, + "loss": 0.3136, + "step": 19134 + }, + { + "epoch": 0.8963788822785403, + "grad_norm": 0.5608077442202652, + "learning_rate": 3.03676505048547e-06, + "loss": 0.3122, + "step": 19135 + }, + { + "epoch": 0.8964257272684686, + "grad_norm": 0.6304160823886967, + "learning_rate": 3.0365798216758123e-06, + "loss": 0.3257, + "step": 19136 + }, + { + "epoch": 0.8964725722583969, + "grad_norm": 0.6424340584220247, + "learning_rate": 3.0363945897782734e-06, + "loss": 0.3344, + "step": 19137 + }, + { + "epoch": 0.8965194172483253, + "grad_norm": 0.5581345773194736, + "learning_rate": 3.036209354793918e-06, + "loss": 0.3112, + "step": 19138 + }, + { + "epoch": 0.8965662622382536, + "grad_norm": 0.5646058234287958, + "learning_rate": 3.0360241167238126e-06, + "loss": 0.311, + "step": 19139 + }, + { + "epoch": 0.896613107228182, + "grad_norm": 0.5812435127176767, + "learning_rate": 3.0358388755690227e-06, + "loss": 0.3056, + "step": 19140 + }, + { + "epoch": 0.8966599522181102, + "grad_norm": 0.5960680372568457, + "learning_rate": 3.0356536313306156e-06, + "loss": 0.3094, + "step": 19141 + }, + { + "epoch": 0.8967067972080386, + "grad_norm": 0.5788993440062384, + "learning_rate": 3.0354683840096554e-06, + "loss": 0.2914, + "step": 19142 + }, + { + "epoch": 0.8967536421979669, + "grad_norm": 0.5573432933488418, + "learning_rate": 3.0352831336072105e-06, + "loss": 0.2934, + "step": 19143 + }, + { + "epoch": 0.8968004871878953, + "grad_norm": 0.61762569012705, + "learning_rate": 3.0350978801243443e-06, + "loss": 0.3279, + "step": 19144 + }, + { + "epoch": 0.8968473321778235, + "grad_norm": 0.5835792327766129, + "learning_rate": 3.034912623562125e-06, + "loss": 0.2997, + "step": 19145 + }, + { + "epoch": 0.8968941771677519, + "grad_norm": 0.5654414772508722, + "learning_rate": 3.0347273639216186e-06, + "loss": 0.3113, + "step": 19146 + }, + { + "epoch": 0.8969410221576802, + "grad_norm": 0.5675689775921278, + "learning_rate": 3.0345421012038894e-06, + "loss": 0.3111, + "step": 19147 + }, + { + "epoch": 0.8969878671476086, + "grad_norm": 0.5967502032784534, + "learning_rate": 3.034356835410006e-06, + "loss": 0.3104, + "step": 19148 + }, + { + "epoch": 0.8970347121375369, + "grad_norm": 0.582103885214716, + "learning_rate": 3.0341715665410326e-06, + "loss": 0.3108, + "step": 19149 + }, + { + "epoch": 0.8970815571274652, + "grad_norm": 0.5800895445051071, + "learning_rate": 3.033986294598036e-06, + "loss": 0.313, + "step": 19150 + }, + { + "epoch": 0.8971284021173935, + "grad_norm": 0.5887743375281911, + "learning_rate": 3.033801019582083e-06, + "loss": 0.3065, + "step": 19151 + }, + { + "epoch": 0.8971752471073219, + "grad_norm": 0.5817942209180603, + "learning_rate": 3.033615741494239e-06, + "loss": 0.3242, + "step": 19152 + }, + { + "epoch": 0.8972220920972502, + "grad_norm": 0.5393886468319938, + "learning_rate": 3.0334304603355703e-06, + "loss": 0.2796, + "step": 19153 + }, + { + "epoch": 0.8972689370871785, + "grad_norm": 0.568454213086298, + "learning_rate": 3.033245176107143e-06, + "loss": 0.3235, + "step": 19154 + }, + { + "epoch": 0.8973157820771068, + "grad_norm": 0.5785078439673316, + "learning_rate": 3.033059888810025e-06, + "loss": 0.3002, + "step": 19155 + }, + { + "epoch": 0.8973626270670352, + "grad_norm": 0.5975801981599558, + "learning_rate": 3.03287459844528e-06, + "loss": 0.3063, + "step": 19156 + }, + { + "epoch": 0.8974094720569635, + "grad_norm": 0.5516537654448461, + "learning_rate": 3.032689305013976e-06, + "loss": 0.3208, + "step": 19157 + }, + { + "epoch": 0.8974563170468919, + "grad_norm": 0.6198984094293545, + "learning_rate": 3.0325040085171796e-06, + "loss": 0.3326, + "step": 19158 + }, + { + "epoch": 0.8975031620368201, + "grad_norm": 0.5994007326502023, + "learning_rate": 3.0323187089559566e-06, + "loss": 0.3175, + "step": 19159 + }, + { + "epoch": 0.8975500070267485, + "grad_norm": 0.5976965240395282, + "learning_rate": 3.0321334063313716e-06, + "loss": 0.3263, + "step": 19160 + }, + { + "epoch": 0.8975968520166768, + "grad_norm": 0.6500108709953482, + "learning_rate": 3.031948100644494e-06, + "loss": 0.3207, + "step": 19161 + }, + { + "epoch": 0.8976436970066052, + "grad_norm": 0.7007004815216035, + "learning_rate": 3.031762791896389e-06, + "loss": 0.3232, + "step": 19162 + }, + { + "epoch": 0.8976905419965334, + "grad_norm": 0.593815035954561, + "learning_rate": 3.0315774800881215e-06, + "loss": 0.3138, + "step": 19163 + }, + { + "epoch": 0.8977373869864618, + "grad_norm": 0.5586638278051498, + "learning_rate": 3.0313921652207607e-06, + "loss": 0.3172, + "step": 19164 + }, + { + "epoch": 0.8977842319763901, + "grad_norm": 0.6020477596294659, + "learning_rate": 3.0312068472953704e-06, + "loss": 0.3348, + "step": 19165 + }, + { + "epoch": 0.8978310769663185, + "grad_norm": 0.561875776077522, + "learning_rate": 3.0310215263130183e-06, + "loss": 0.312, + "step": 19166 + }, + { + "epoch": 0.8978779219562468, + "grad_norm": 0.5584122092251664, + "learning_rate": 3.0308362022747717e-06, + "loss": 0.3122, + "step": 19167 + }, + { + "epoch": 0.8979247669461751, + "grad_norm": 0.6453899635950102, + "learning_rate": 3.0306508751816948e-06, + "loss": 0.3124, + "step": 19168 + }, + { + "epoch": 0.8979716119361034, + "grad_norm": 0.6105778354153487, + "learning_rate": 3.030465545034857e-06, + "loss": 0.2923, + "step": 19169 + }, + { + "epoch": 0.8980184569260318, + "grad_norm": 0.5615532488847819, + "learning_rate": 3.0302802118353226e-06, + "loss": 0.3059, + "step": 19170 + }, + { + "epoch": 0.8980653019159601, + "grad_norm": 0.5444266398011367, + "learning_rate": 3.0300948755841585e-06, + "loss": 0.3118, + "step": 19171 + }, + { + "epoch": 0.8981121469058884, + "grad_norm": 0.5181721105654762, + "learning_rate": 3.029909536282432e-06, + "loss": 0.3027, + "step": 19172 + }, + { + "epoch": 0.8981589918958167, + "grad_norm": 0.6274376444776699, + "learning_rate": 3.0297241939312094e-06, + "loss": 0.3106, + "step": 19173 + }, + { + "epoch": 0.8982058368857451, + "grad_norm": 0.6051980468128628, + "learning_rate": 3.029538848531557e-06, + "loss": 0.3464, + "step": 19174 + }, + { + "epoch": 0.8982526818756734, + "grad_norm": 0.6414886954602923, + "learning_rate": 3.0293535000845407e-06, + "loss": 0.3214, + "step": 19175 + }, + { + "epoch": 0.8982995268656018, + "grad_norm": 0.5937351224758813, + "learning_rate": 3.02916814859123e-06, + "loss": 0.3059, + "step": 19176 + }, + { + "epoch": 0.89834637185553, + "grad_norm": 0.594541405103265, + "learning_rate": 3.028982794052688e-06, + "loss": 0.3202, + "step": 19177 + }, + { + "epoch": 0.8983932168454584, + "grad_norm": 0.6189943151042224, + "learning_rate": 3.028797436469984e-06, + "loss": 0.3257, + "step": 19178 + }, + { + "epoch": 0.8984400618353867, + "grad_norm": 0.5946363224971098, + "learning_rate": 3.0286120758441835e-06, + "loss": 0.2978, + "step": 19179 + }, + { + "epoch": 0.8984869068253151, + "grad_norm": 0.6098936713347483, + "learning_rate": 3.028426712176353e-06, + "loss": 0.3239, + "step": 19180 + }, + { + "epoch": 0.8985337518152433, + "grad_norm": 0.6025223771336505, + "learning_rate": 3.0282413454675597e-06, + "loss": 0.3248, + "step": 19181 + }, + { + "epoch": 0.8985805968051717, + "grad_norm": 0.5890021627874361, + "learning_rate": 3.0280559757188704e-06, + "loss": 0.3114, + "step": 19182 + }, + { + "epoch": 0.8986274417951, + "grad_norm": 0.5911975927996574, + "learning_rate": 3.0278706029313514e-06, + "loss": 0.3364, + "step": 19183 + }, + { + "epoch": 0.8986742867850284, + "grad_norm": 0.595005412477383, + "learning_rate": 3.02768522710607e-06, + "loss": 0.3297, + "step": 19184 + }, + { + "epoch": 0.8987211317749567, + "grad_norm": 0.6685902760598706, + "learning_rate": 3.0274998482440926e-06, + "loss": 0.3351, + "step": 19185 + }, + { + "epoch": 0.898767976764885, + "grad_norm": 0.6602761220356436, + "learning_rate": 3.027314466346486e-06, + "loss": 0.3196, + "step": 19186 + }, + { + "epoch": 0.8988148217548133, + "grad_norm": 0.5530987867920718, + "learning_rate": 3.0271290814143174e-06, + "loss": 0.3047, + "step": 19187 + }, + { + "epoch": 0.8988616667447417, + "grad_norm": 0.6024369729475377, + "learning_rate": 3.0269436934486536e-06, + "loss": 0.3165, + "step": 19188 + }, + { + "epoch": 0.89890851173467, + "grad_norm": 0.5981180513386907, + "learning_rate": 3.026758302450561e-06, + "loss": 0.3097, + "step": 19189 + }, + { + "epoch": 0.8989553567245983, + "grad_norm": 0.6097866870525034, + "learning_rate": 3.026572908421107e-06, + "loss": 0.3191, + "step": 19190 + }, + { + "epoch": 0.8990022017145266, + "grad_norm": 0.5524661682457414, + "learning_rate": 3.026387511361358e-06, + "loss": 0.3074, + "step": 19191 + }, + { + "epoch": 0.899049046704455, + "grad_norm": 0.5582786478591223, + "learning_rate": 3.026202111272381e-06, + "loss": 0.3128, + "step": 19192 + }, + { + "epoch": 0.8990958916943833, + "grad_norm": 0.62299928445976, + "learning_rate": 3.0260167081552433e-06, + "loss": 0.3158, + "step": 19193 + }, + { + "epoch": 0.8991427366843117, + "grad_norm": 0.5817944040526316, + "learning_rate": 3.025831302011012e-06, + "loss": 0.331, + "step": 19194 + }, + { + "epoch": 0.8991895816742399, + "grad_norm": 0.6034153892477646, + "learning_rate": 3.0256458928407534e-06, + "loss": 0.3102, + "step": 19195 + }, + { + "epoch": 0.8992364266641683, + "grad_norm": 0.6084422612191301, + "learning_rate": 3.0254604806455347e-06, + "loss": 0.3173, + "step": 19196 + }, + { + "epoch": 0.8992832716540966, + "grad_norm": 0.54132363781203, + "learning_rate": 3.0252750654264236e-06, + "loss": 0.2943, + "step": 19197 + }, + { + "epoch": 0.899330116644025, + "grad_norm": 0.6623203434155175, + "learning_rate": 3.0250896471844858e-06, + "loss": 0.3325, + "step": 19198 + }, + { + "epoch": 0.8993769616339532, + "grad_norm": 0.6372066826482679, + "learning_rate": 3.0249042259207896e-06, + "loss": 0.3126, + "step": 19199 + }, + { + "epoch": 0.8994238066238815, + "grad_norm": 0.5936690617291212, + "learning_rate": 3.0247188016364008e-06, + "loss": 0.31, + "step": 19200 + }, + { + "epoch": 0.8994706516138099, + "grad_norm": 0.571703894574075, + "learning_rate": 3.0245333743323877e-06, + "loss": 0.3112, + "step": 19201 + }, + { + "epoch": 0.8995174966037383, + "grad_norm": 0.551986343018951, + "learning_rate": 3.0243479440098165e-06, + "loss": 0.3043, + "step": 19202 + }, + { + "epoch": 0.8995643415936666, + "grad_norm": 0.5741560945647668, + "learning_rate": 3.0241625106697552e-06, + "loss": 0.3277, + "step": 19203 + }, + { + "epoch": 0.8996111865835948, + "grad_norm": 0.544572980386039, + "learning_rate": 3.0239770743132707e-06, + "loss": 0.2988, + "step": 19204 + }, + { + "epoch": 0.8996580315735232, + "grad_norm": 0.5777075616358064, + "learning_rate": 3.0237916349414294e-06, + "loss": 0.3162, + "step": 19205 + }, + { + "epoch": 0.8997048765634515, + "grad_norm": 0.6000647611437979, + "learning_rate": 3.0236061925552985e-06, + "loss": 0.3054, + "step": 19206 + }, + { + "epoch": 0.8997517215533799, + "grad_norm": 0.6371524592894376, + "learning_rate": 3.0234207471559457e-06, + "loss": 0.3345, + "step": 19207 + }, + { + "epoch": 0.8997985665433081, + "grad_norm": 0.5800827953335215, + "learning_rate": 3.0232352987444387e-06, + "loss": 0.3111, + "step": 19208 + }, + { + "epoch": 0.8998454115332365, + "grad_norm": 0.6103016136927603, + "learning_rate": 3.0230498473218426e-06, + "loss": 0.3011, + "step": 19209 + }, + { + "epoch": 0.8998922565231648, + "grad_norm": 0.6018990050927079, + "learning_rate": 3.022864392889228e-06, + "loss": 0.313, + "step": 19210 + }, + { + "epoch": 0.8999391015130932, + "grad_norm": 0.572983232796292, + "learning_rate": 3.0226789354476593e-06, + "loss": 0.3039, + "step": 19211 + }, + { + "epoch": 0.8999859465030215, + "grad_norm": 0.6271287175693661, + "learning_rate": 3.0224934749982043e-06, + "loss": 0.3132, + "step": 19212 + }, + { + "epoch": 0.9000327914929498, + "grad_norm": 0.5678329663245998, + "learning_rate": 3.022308011541931e-06, + "loss": 0.304, + "step": 19213 + }, + { + "epoch": 0.9000796364828781, + "grad_norm": 0.5607495652522675, + "learning_rate": 3.0221225450799063e-06, + "loss": 0.302, + "step": 19214 + }, + { + "epoch": 0.9001264814728065, + "grad_norm": 0.5778983113274402, + "learning_rate": 3.0219370756131982e-06, + "loss": 0.3107, + "step": 19215 + }, + { + "epoch": 0.9001733264627348, + "grad_norm": 0.5552068104348887, + "learning_rate": 3.0217516031428728e-06, + "loss": 0.3054, + "step": 19216 + }, + { + "epoch": 0.9002201714526631, + "grad_norm": 0.586033702472853, + "learning_rate": 3.021566127669998e-06, + "loss": 0.2908, + "step": 19217 + }, + { + "epoch": 0.9002670164425914, + "grad_norm": 0.5693599233405998, + "learning_rate": 3.0213806491956417e-06, + "loss": 0.3015, + "step": 19218 + }, + { + "epoch": 0.9003138614325198, + "grad_norm": 0.6349259136559644, + "learning_rate": 3.0211951677208707e-06, + "loss": 0.328, + "step": 19219 + }, + { + "epoch": 0.9003607064224481, + "grad_norm": 0.6398953771774473, + "learning_rate": 3.021009683246752e-06, + "loss": 0.321, + "step": 19220 + }, + { + "epoch": 0.9004075514123765, + "grad_norm": 0.5971921575567728, + "learning_rate": 3.0208241957743543e-06, + "loss": 0.3009, + "step": 19221 + }, + { + "epoch": 0.9004543964023047, + "grad_norm": 0.6044806629842892, + "learning_rate": 3.020638705304744e-06, + "loss": 0.3282, + "step": 19222 + }, + { + "epoch": 0.9005012413922331, + "grad_norm": 0.5796744438943768, + "learning_rate": 3.0204532118389886e-06, + "loss": 0.3166, + "step": 19223 + }, + { + "epoch": 0.9005480863821614, + "grad_norm": 0.5732971290310093, + "learning_rate": 3.020267715378155e-06, + "loss": 0.2879, + "step": 19224 + }, + { + "epoch": 0.9005949313720898, + "grad_norm": 0.6236343650374275, + "learning_rate": 3.0200822159233134e-06, + "loss": 0.3342, + "step": 19225 + }, + { + "epoch": 0.900641776362018, + "grad_norm": 0.5421255848543594, + "learning_rate": 3.0198967134755276e-06, + "loss": 0.304, + "step": 19226 + }, + { + "epoch": 0.9006886213519464, + "grad_norm": 0.6237993697340667, + "learning_rate": 3.019711208035867e-06, + "loss": 0.321, + "step": 19227 + }, + { + "epoch": 0.9007354663418747, + "grad_norm": 0.6163956149734443, + "learning_rate": 3.0195256996054e-06, + "loss": 0.3078, + "step": 19228 + }, + { + "epoch": 0.9007823113318031, + "grad_norm": 0.5610478757912358, + "learning_rate": 3.019340188185193e-06, + "loss": 0.3209, + "step": 19229 + }, + { + "epoch": 0.9008291563217314, + "grad_norm": 0.6221314860262438, + "learning_rate": 3.0191546737763134e-06, + "loss": 0.3443, + "step": 19230 + }, + { + "epoch": 0.9008760013116597, + "grad_norm": 0.6070940771838587, + "learning_rate": 3.018969156379829e-06, + "loss": 0.312, + "step": 19231 + }, + { + "epoch": 0.900922846301588, + "grad_norm": 0.6212525750223615, + "learning_rate": 3.018783635996808e-06, + "loss": 0.3044, + "step": 19232 + }, + { + "epoch": 0.9009696912915164, + "grad_norm": 0.5920140160659613, + "learning_rate": 3.018598112628317e-06, + "loss": 0.3171, + "step": 19233 + }, + { + "epoch": 0.9010165362814447, + "grad_norm": 0.5626649830898518, + "learning_rate": 3.018412586275425e-06, + "loss": 0.2991, + "step": 19234 + }, + { + "epoch": 0.901063381271373, + "grad_norm": 0.5910574734179646, + "learning_rate": 3.0182270569391982e-06, + "loss": 0.3185, + "step": 19235 + }, + { + "epoch": 0.9011102262613013, + "grad_norm": 0.5565931270663398, + "learning_rate": 3.018041524620705e-06, + "loss": 0.294, + "step": 19236 + }, + { + "epoch": 0.9011570712512297, + "grad_norm": 0.6095552458736752, + "learning_rate": 3.0178559893210135e-06, + "loss": 0.3163, + "step": 19237 + }, + { + "epoch": 0.901203916241158, + "grad_norm": 0.5794638696108139, + "learning_rate": 3.0176704510411904e-06, + "loss": 0.3115, + "step": 19238 + }, + { + "epoch": 0.9012507612310864, + "grad_norm": 0.6034609648826014, + "learning_rate": 3.0174849097823045e-06, + "loss": 0.3087, + "step": 19239 + }, + { + "epoch": 0.9012976062210146, + "grad_norm": 0.5946062051640566, + "learning_rate": 3.0172993655454223e-06, + "loss": 0.3164, + "step": 19240 + }, + { + "epoch": 0.901344451210943, + "grad_norm": 0.5520394271151988, + "learning_rate": 3.017113818331613e-06, + "loss": 0.3131, + "step": 19241 + }, + { + "epoch": 0.9013912962008713, + "grad_norm": 0.5849883802003386, + "learning_rate": 3.016928268141943e-06, + "loss": 0.3289, + "step": 19242 + }, + { + "epoch": 0.9014381411907997, + "grad_norm": 0.5364794416630079, + "learning_rate": 3.0167427149774815e-06, + "loss": 0.3173, + "step": 19243 + }, + { + "epoch": 0.9014849861807279, + "grad_norm": 0.5425023169221611, + "learning_rate": 3.016557158839295e-06, + "loss": 0.3143, + "step": 19244 + }, + { + "epoch": 0.9015318311706563, + "grad_norm": 0.5750722695710829, + "learning_rate": 3.0163715997284513e-06, + "loss": 0.3279, + "step": 19245 + }, + { + "epoch": 0.9015786761605846, + "grad_norm": 0.5341778570977685, + "learning_rate": 3.0161860376460207e-06, + "loss": 0.305, + "step": 19246 + }, + { + "epoch": 0.901625521150513, + "grad_norm": 0.5988852931005749, + "learning_rate": 3.0160004725930675e-06, + "loss": 0.3349, + "step": 19247 + }, + { + "epoch": 0.9016723661404413, + "grad_norm": 0.6113586537984609, + "learning_rate": 3.015814904570661e-06, + "loss": 0.3275, + "step": 19248 + }, + { + "epoch": 0.9017192111303696, + "grad_norm": 0.5922879479512351, + "learning_rate": 3.0156293335798703e-06, + "loss": 0.3173, + "step": 19249 + }, + { + "epoch": 0.9017660561202979, + "grad_norm": 0.5960264010602643, + "learning_rate": 3.015443759621763e-06, + "loss": 0.3192, + "step": 19250 + }, + { + "epoch": 0.9018129011102263, + "grad_norm": 0.5904941395857263, + "learning_rate": 3.015258182697405e-06, + "loss": 0.3166, + "step": 19251 + }, + { + "epoch": 0.9018597461001546, + "grad_norm": 0.5935963539572259, + "learning_rate": 3.0150726028078662e-06, + "loss": 0.3203, + "step": 19252 + }, + { + "epoch": 0.9019065910900829, + "grad_norm": 0.6208245490405105, + "learning_rate": 3.0148870199542145e-06, + "loss": 0.3264, + "step": 19253 + }, + { + "epoch": 0.9019534360800112, + "grad_norm": 0.6152467030647611, + "learning_rate": 3.014701434137517e-06, + "loss": 0.3378, + "step": 19254 + }, + { + "epoch": 0.9020002810699396, + "grad_norm": 0.5632431934001271, + "learning_rate": 3.0145158453588424e-06, + "loss": 0.3052, + "step": 19255 + }, + { + "epoch": 0.9020471260598679, + "grad_norm": 0.6714964362500148, + "learning_rate": 3.0143302536192578e-06, + "loss": 0.3551, + "step": 19256 + }, + { + "epoch": 0.9020939710497963, + "grad_norm": 0.6542514387532267, + "learning_rate": 3.0141446589198325e-06, + "loss": 0.3314, + "step": 19257 + }, + { + "epoch": 0.9021408160397245, + "grad_norm": 0.5645086337549652, + "learning_rate": 3.0139590612616336e-06, + "loss": 0.3156, + "step": 19258 + }, + { + "epoch": 0.9021876610296529, + "grad_norm": 0.6054817368219941, + "learning_rate": 3.0137734606457293e-06, + "loss": 0.3423, + "step": 19259 + }, + { + "epoch": 0.9022345060195812, + "grad_norm": 0.5772530506418321, + "learning_rate": 3.0135878570731884e-06, + "loss": 0.3167, + "step": 19260 + }, + { + "epoch": 0.9022813510095096, + "grad_norm": 0.5579026248019605, + "learning_rate": 3.0134022505450777e-06, + "loss": 0.3062, + "step": 19261 + }, + { + "epoch": 0.9023281959994378, + "grad_norm": 0.5854788866552866, + "learning_rate": 3.013216641062467e-06, + "loss": 0.3204, + "step": 19262 + }, + { + "epoch": 0.9023750409893662, + "grad_norm": 0.605585882599958, + "learning_rate": 3.0130310286264225e-06, + "loss": 0.3264, + "step": 19263 + }, + { + "epoch": 0.9024218859792945, + "grad_norm": 0.6287354263276635, + "learning_rate": 3.0128454132380142e-06, + "loss": 0.3424, + "step": 19264 + }, + { + "epoch": 0.9024687309692229, + "grad_norm": 0.6519117652878498, + "learning_rate": 3.0126597948983094e-06, + "loss": 0.3402, + "step": 19265 + }, + { + "epoch": 0.9025155759591512, + "grad_norm": 0.5791220637188949, + "learning_rate": 3.0124741736083757e-06, + "loss": 0.3168, + "step": 19266 + }, + { + "epoch": 0.9025624209490795, + "grad_norm": 0.6181113319572146, + "learning_rate": 3.012288549369283e-06, + "loss": 0.3063, + "step": 19267 + }, + { + "epoch": 0.9026092659390078, + "grad_norm": 0.5939366526403539, + "learning_rate": 3.012102922182098e-06, + "loss": 0.3266, + "step": 19268 + }, + { + "epoch": 0.9026561109289362, + "grad_norm": 0.5885201703415482, + "learning_rate": 3.011917292047889e-06, + "loss": 0.3217, + "step": 19269 + }, + { + "epoch": 0.9027029559188645, + "grad_norm": 0.6237122586834177, + "learning_rate": 3.0117316589677247e-06, + "loss": 0.322, + "step": 19270 + }, + { + "epoch": 0.9027498009087928, + "grad_norm": 0.5854638874732716, + "learning_rate": 3.011546022942674e-06, + "loss": 0.3099, + "step": 19271 + }, + { + "epoch": 0.9027966458987211, + "grad_norm": 0.601884860744431, + "learning_rate": 3.0113603839738038e-06, + "loss": 0.3005, + "step": 19272 + }, + { + "epoch": 0.9028434908886495, + "grad_norm": 0.6138296776757257, + "learning_rate": 3.011174742062183e-06, + "loss": 0.3393, + "step": 19273 + }, + { + "epoch": 0.9028903358785778, + "grad_norm": 0.5430012470272529, + "learning_rate": 3.0109890972088807e-06, + "loss": 0.2993, + "step": 19274 + }, + { + "epoch": 0.9029371808685062, + "grad_norm": 0.540736348795068, + "learning_rate": 3.010803449414964e-06, + "loss": 0.2909, + "step": 19275 + }, + { + "epoch": 0.9029840258584344, + "grad_norm": 0.6124523937087576, + "learning_rate": 3.0106177986815015e-06, + "loss": 0.3425, + "step": 19276 + }, + { + "epoch": 0.9030308708483628, + "grad_norm": 0.58270363741053, + "learning_rate": 3.010432145009563e-06, + "loss": 0.3257, + "step": 19277 + }, + { + "epoch": 0.9030777158382911, + "grad_norm": 0.6451237638352986, + "learning_rate": 3.010246488400215e-06, + "loss": 0.3494, + "step": 19278 + }, + { + "epoch": 0.9031245608282195, + "grad_norm": 0.5855875873664039, + "learning_rate": 3.010060828854527e-06, + "loss": 0.3483, + "step": 19279 + }, + { + "epoch": 0.9031714058181477, + "grad_norm": 0.6323891810707388, + "learning_rate": 3.0098751663735675e-06, + "loss": 0.3201, + "step": 19280 + }, + { + "epoch": 0.903218250808076, + "grad_norm": 0.6199129748779542, + "learning_rate": 3.0096895009584044e-06, + "loss": 0.326, + "step": 19281 + }, + { + "epoch": 0.9032650957980044, + "grad_norm": 0.7492338143309719, + "learning_rate": 3.0095038326101056e-06, + "loss": 0.3282, + "step": 19282 + }, + { + "epoch": 0.9033119407879328, + "grad_norm": 0.5886388379500358, + "learning_rate": 3.0093181613297413e-06, + "loss": 0.3111, + "step": 19283 + }, + { + "epoch": 0.9033587857778611, + "grad_norm": 0.6139320496765975, + "learning_rate": 3.009132487118378e-06, + "loss": 0.3214, + "step": 19284 + }, + { + "epoch": 0.9034056307677893, + "grad_norm": 0.604375851325939, + "learning_rate": 3.008946809977086e-06, + "loss": 0.3267, + "step": 19285 + }, + { + "epoch": 0.9034524757577177, + "grad_norm": 0.5361635810160732, + "learning_rate": 3.0087611299069332e-06, + "loss": 0.3026, + "step": 19286 + }, + { + "epoch": 0.903499320747646, + "grad_norm": 0.5995336464565276, + "learning_rate": 3.0085754469089866e-06, + "loss": 0.3067, + "step": 19287 + }, + { + "epoch": 0.9035461657375744, + "grad_norm": 0.6084914292411047, + "learning_rate": 3.0083897609843175e-06, + "loss": 0.3324, + "step": 19288 + }, + { + "epoch": 0.9035930107275026, + "grad_norm": 0.6324003326679982, + "learning_rate": 3.0082040721339928e-06, + "loss": 0.3207, + "step": 19289 + }, + { + "epoch": 0.903639855717431, + "grad_norm": 0.6073897359707043, + "learning_rate": 3.0080183803590814e-06, + "loss": 0.3015, + "step": 19290 + }, + { + "epoch": 0.9036867007073593, + "grad_norm": 0.5723748999053744, + "learning_rate": 3.0078326856606515e-06, + "loss": 0.3028, + "step": 19291 + }, + { + "epoch": 0.9037335456972877, + "grad_norm": 0.5987751573058796, + "learning_rate": 3.0076469880397724e-06, + "loss": 0.3147, + "step": 19292 + }, + { + "epoch": 0.903780390687216, + "grad_norm": 0.5698468795189616, + "learning_rate": 3.0074612874975123e-06, + "loss": 0.2948, + "step": 19293 + }, + { + "epoch": 0.9038272356771443, + "grad_norm": 0.5853704821352097, + "learning_rate": 3.00727558403494e-06, + "loss": 0.3091, + "step": 19294 + }, + { + "epoch": 0.9038740806670726, + "grad_norm": 0.6091976763654328, + "learning_rate": 3.007089877653125e-06, + "loss": 0.3115, + "step": 19295 + }, + { + "epoch": 0.903920925657001, + "grad_norm": 0.5489722690008942, + "learning_rate": 3.0069041683531342e-06, + "loss": 0.295, + "step": 19296 + }, + { + "epoch": 0.9039677706469293, + "grad_norm": 0.5700655217143309, + "learning_rate": 3.0067184561360373e-06, + "loss": 0.3243, + "step": 19297 + }, + { + "epoch": 0.9040146156368576, + "grad_norm": 0.6384064717812447, + "learning_rate": 3.0065327410029036e-06, + "loss": 0.3385, + "step": 19298 + }, + { + "epoch": 0.9040614606267859, + "grad_norm": 0.5909647255021677, + "learning_rate": 3.006347022954802e-06, + "loss": 0.301, + "step": 19299 + }, + { + "epoch": 0.9041083056167143, + "grad_norm": 0.5482803068330984, + "learning_rate": 3.0061613019927995e-06, + "loss": 0.2906, + "step": 19300 + }, + { + "epoch": 0.9041551506066426, + "grad_norm": 0.561848565999287, + "learning_rate": 3.0059755781179657e-06, + "loss": 0.3168, + "step": 19301 + }, + { + "epoch": 0.904201995596571, + "grad_norm": 0.5967346575333236, + "learning_rate": 3.0057898513313705e-06, + "loss": 0.3147, + "step": 19302 + }, + { + "epoch": 0.9042488405864992, + "grad_norm": 0.621868496045352, + "learning_rate": 3.005604121634081e-06, + "loss": 0.3258, + "step": 19303 + }, + { + "epoch": 0.9042956855764276, + "grad_norm": 0.5913043865237276, + "learning_rate": 3.0054183890271675e-06, + "loss": 0.3203, + "step": 19304 + }, + { + "epoch": 0.9043425305663559, + "grad_norm": 0.6010408755474613, + "learning_rate": 3.0052326535116975e-06, + "loss": 0.312, + "step": 19305 + }, + { + "epoch": 0.9043893755562843, + "grad_norm": 0.5983472035827567, + "learning_rate": 3.005046915088741e-06, + "loss": 0.302, + "step": 19306 + }, + { + "epoch": 0.9044362205462125, + "grad_norm": 0.565568262801332, + "learning_rate": 3.004861173759367e-06, + "loss": 0.3294, + "step": 19307 + }, + { + "epoch": 0.9044830655361409, + "grad_norm": 0.5977437774302425, + "learning_rate": 3.004675429524643e-06, + "loss": 0.2985, + "step": 19308 + }, + { + "epoch": 0.9045299105260692, + "grad_norm": 0.6111254116947673, + "learning_rate": 3.004489682385639e-06, + "loss": 0.3149, + "step": 19309 + }, + { + "epoch": 0.9045767555159976, + "grad_norm": 0.5894953446349303, + "learning_rate": 3.004303932343424e-06, + "loss": 0.3122, + "step": 19310 + }, + { + "epoch": 0.9046236005059259, + "grad_norm": 0.5820078575380208, + "learning_rate": 3.0041181793990658e-06, + "loss": 0.3042, + "step": 19311 + }, + { + "epoch": 0.9046704454958542, + "grad_norm": 0.5273086470887566, + "learning_rate": 3.003932423553635e-06, + "loss": 0.2956, + "step": 19312 + }, + { + "epoch": 0.9047172904857825, + "grad_norm": 0.6594219050748653, + "learning_rate": 3.0037466648082e-06, + "loss": 0.3067, + "step": 19313 + }, + { + "epoch": 0.9047641354757109, + "grad_norm": 0.5912583180038556, + "learning_rate": 3.0035609031638285e-06, + "loss": 0.2945, + "step": 19314 + }, + { + "epoch": 0.9048109804656392, + "grad_norm": 0.6408315054527153, + "learning_rate": 3.003375138621591e-06, + "loss": 0.325, + "step": 19315 + }, + { + "epoch": 0.9048578254555675, + "grad_norm": 0.6043867229065188, + "learning_rate": 3.0031893711825568e-06, + "loss": 0.3297, + "step": 19316 + }, + { + "epoch": 0.9049046704454958, + "grad_norm": 0.6044276921457054, + "learning_rate": 3.003003600847793e-06, + "loss": 0.3217, + "step": 19317 + }, + { + "epoch": 0.9049515154354242, + "grad_norm": 0.6355239871227972, + "learning_rate": 3.0028178276183707e-06, + "loss": 0.3268, + "step": 19318 + }, + { + "epoch": 0.9049983604253525, + "grad_norm": 0.6388104568143105, + "learning_rate": 3.0026320514953577e-06, + "loss": 0.3329, + "step": 19319 + }, + { + "epoch": 0.9050452054152809, + "grad_norm": 0.6358241700433775, + "learning_rate": 3.0024462724798242e-06, + "loss": 0.3406, + "step": 19320 + }, + { + "epoch": 0.9050920504052091, + "grad_norm": 0.605059931678825, + "learning_rate": 3.0022604905728383e-06, + "loss": 0.323, + "step": 19321 + }, + { + "epoch": 0.9051388953951375, + "grad_norm": 0.6195076847703403, + "learning_rate": 3.0020747057754697e-06, + "loss": 0.3086, + "step": 19322 + }, + { + "epoch": 0.9051857403850658, + "grad_norm": 0.6076163247523316, + "learning_rate": 3.001888918088788e-06, + "loss": 0.3435, + "step": 19323 + }, + { + "epoch": 0.9052325853749942, + "grad_norm": 0.622829712302093, + "learning_rate": 3.00170312751386e-06, + "loss": 0.348, + "step": 19324 + }, + { + "epoch": 0.9052794303649224, + "grad_norm": 0.605503556540306, + "learning_rate": 3.001517334051758e-06, + "loss": 0.3196, + "step": 19325 + }, + { + "epoch": 0.9053262753548508, + "grad_norm": 0.6138941889834051, + "learning_rate": 3.001331537703549e-06, + "loss": 0.319, + "step": 19326 + }, + { + "epoch": 0.9053731203447791, + "grad_norm": 0.5850711746706165, + "learning_rate": 3.0011457384703037e-06, + "loss": 0.3119, + "step": 19327 + }, + { + "epoch": 0.9054199653347075, + "grad_norm": 0.607234192220424, + "learning_rate": 3.00095993635309e-06, + "loss": 0.3194, + "step": 19328 + }, + { + "epoch": 0.9054668103246358, + "grad_norm": 0.5586152092711942, + "learning_rate": 3.000774131352978e-06, + "loss": 0.3125, + "step": 19329 + }, + { + "epoch": 0.9055136553145641, + "grad_norm": 0.5523801087423853, + "learning_rate": 3.0005883234710374e-06, + "loss": 0.3089, + "step": 19330 + }, + { + "epoch": 0.9055605003044924, + "grad_norm": 0.6017393851367993, + "learning_rate": 3.000402512708336e-06, + "loss": 0.3232, + "step": 19331 + }, + { + "epoch": 0.9056073452944208, + "grad_norm": 0.5658124884322692, + "learning_rate": 3.0002166990659443e-06, + "loss": 0.328, + "step": 19332 + }, + { + "epoch": 0.9056541902843491, + "grad_norm": 0.583655747361331, + "learning_rate": 3.000030882544931e-06, + "loss": 0.3135, + "step": 19333 + }, + { + "epoch": 0.9057010352742774, + "grad_norm": 0.6041983228122433, + "learning_rate": 2.9998450631463653e-06, + "loss": 0.3315, + "step": 19334 + }, + { + "epoch": 0.9057478802642057, + "grad_norm": 0.6758165159145032, + "learning_rate": 2.9996592408713176e-06, + "loss": 0.3196, + "step": 19335 + }, + { + "epoch": 0.9057947252541341, + "grad_norm": 0.5752260521801885, + "learning_rate": 2.999473415720856e-06, + "loss": 0.3206, + "step": 19336 + }, + { + "epoch": 0.9058415702440624, + "grad_norm": 0.5932699653001788, + "learning_rate": 2.999287587696051e-06, + "loss": 0.3294, + "step": 19337 + }, + { + "epoch": 0.9058884152339908, + "grad_norm": 0.6040281770883069, + "learning_rate": 2.9991017567979713e-06, + "loss": 0.3359, + "step": 19338 + }, + { + "epoch": 0.905935260223919, + "grad_norm": 1.03457486383002, + "learning_rate": 2.998915923027686e-06, + "loss": 0.3377, + "step": 19339 + }, + { + "epoch": 0.9059821052138474, + "grad_norm": 0.6025363747838256, + "learning_rate": 2.9987300863862655e-06, + "loss": 0.2896, + "step": 19340 + }, + { + "epoch": 0.9060289502037757, + "grad_norm": 0.5879473488167474, + "learning_rate": 2.9985442468747783e-06, + "loss": 0.3044, + "step": 19341 + }, + { + "epoch": 0.9060757951937041, + "grad_norm": 0.5677692128498804, + "learning_rate": 2.9983584044942944e-06, + "loss": 0.3226, + "step": 19342 + }, + { + "epoch": 0.9061226401836323, + "grad_norm": 0.6035987203946224, + "learning_rate": 2.9981725592458827e-06, + "loss": 0.3244, + "step": 19343 + }, + { + "epoch": 0.9061694851735607, + "grad_norm": 0.574613745582974, + "learning_rate": 2.9979867111306143e-06, + "loss": 0.3401, + "step": 19344 + }, + { + "epoch": 0.906216330163489, + "grad_norm": 0.5549427546829093, + "learning_rate": 2.997800860149557e-06, + "loss": 0.3179, + "step": 19345 + }, + { + "epoch": 0.9062631751534174, + "grad_norm": 0.5890795556542039, + "learning_rate": 2.9976150063037802e-06, + "loss": 0.2963, + "step": 19346 + }, + { + "epoch": 0.9063100201433457, + "grad_norm": 0.6331911153452413, + "learning_rate": 2.9974291495943546e-06, + "loss": 0.3316, + "step": 19347 + }, + { + "epoch": 0.906356865133274, + "grad_norm": 0.5932912967034455, + "learning_rate": 2.997243290022349e-06, + "loss": 0.3105, + "step": 19348 + }, + { + "epoch": 0.9064037101232023, + "grad_norm": 0.6203257250786629, + "learning_rate": 2.9970574275888335e-06, + "loss": 0.3277, + "step": 19349 + }, + { + "epoch": 0.9064505551131307, + "grad_norm": 0.6191602914749791, + "learning_rate": 2.9968715622948776e-06, + "loss": 0.3241, + "step": 19350 + }, + { + "epoch": 0.906497400103059, + "grad_norm": 0.5738438712759378, + "learning_rate": 2.9966856941415507e-06, + "loss": 0.3189, + "step": 19351 + }, + { + "epoch": 0.9065442450929873, + "grad_norm": 0.6352095684499427, + "learning_rate": 2.996499823129922e-06, + "loss": 0.3206, + "step": 19352 + }, + { + "epoch": 0.9065910900829156, + "grad_norm": 0.647642742528912, + "learning_rate": 2.996313949261062e-06, + "loss": 0.3252, + "step": 19353 + }, + { + "epoch": 0.906637935072844, + "grad_norm": 0.5772750899885413, + "learning_rate": 2.99612807253604e-06, + "loss": 0.3171, + "step": 19354 + }, + { + "epoch": 0.9066847800627723, + "grad_norm": 0.5525047877658557, + "learning_rate": 2.9959421929559255e-06, + "loss": 0.3109, + "step": 19355 + }, + { + "epoch": 0.9067316250527007, + "grad_norm": 0.561969441280809, + "learning_rate": 2.995756310521789e-06, + "loss": 0.3075, + "step": 19356 + }, + { + "epoch": 0.9067784700426289, + "grad_norm": 0.5453788769298207, + "learning_rate": 2.9955704252346984e-06, + "loss": 0.3168, + "step": 19357 + }, + { + "epoch": 0.9068253150325573, + "grad_norm": 0.6048572877920316, + "learning_rate": 2.995384537095725e-06, + "loss": 0.3182, + "step": 19358 + }, + { + "epoch": 0.9068721600224856, + "grad_norm": 0.5956961017173112, + "learning_rate": 2.9951986461059385e-06, + "loss": 0.3127, + "step": 19359 + }, + { + "epoch": 0.906919005012414, + "grad_norm": 0.6399991888473959, + "learning_rate": 2.9950127522664073e-06, + "loss": 0.3185, + "step": 19360 + }, + { + "epoch": 0.9069658500023422, + "grad_norm": 0.5923235779844017, + "learning_rate": 2.9948268555782035e-06, + "loss": 0.3194, + "step": 19361 + }, + { + "epoch": 0.9070126949922706, + "grad_norm": 0.5798947216438206, + "learning_rate": 2.994640956042395e-06, + "loss": 0.3109, + "step": 19362 + }, + { + "epoch": 0.9070595399821989, + "grad_norm": 0.6023876059039626, + "learning_rate": 2.9944550536600515e-06, + "loss": 0.3256, + "step": 19363 + }, + { + "epoch": 0.9071063849721273, + "grad_norm": 0.6498848866072194, + "learning_rate": 2.994269148432243e-06, + "loss": 0.3486, + "step": 19364 + }, + { + "epoch": 0.9071532299620556, + "grad_norm": 0.56888682632021, + "learning_rate": 2.9940832403600416e-06, + "loss": 0.3034, + "step": 19365 + }, + { + "epoch": 0.9072000749519838, + "grad_norm": 0.600121161372846, + "learning_rate": 2.9938973294445146e-06, + "loss": 0.3122, + "step": 19366 + }, + { + "epoch": 0.9072469199419122, + "grad_norm": 0.6103248072435611, + "learning_rate": 2.9937114156867318e-06, + "loss": 0.3153, + "step": 19367 + }, + { + "epoch": 0.9072937649318406, + "grad_norm": 0.58256377765928, + "learning_rate": 2.9935254990877645e-06, + "loss": 0.3032, + "step": 19368 + }, + { + "epoch": 0.9073406099217689, + "grad_norm": 0.6196208908585351, + "learning_rate": 2.993339579648682e-06, + "loss": 0.3267, + "step": 19369 + }, + { + "epoch": 0.9073874549116971, + "grad_norm": 0.5788293022970353, + "learning_rate": 2.993153657370554e-06, + "loss": 0.3203, + "step": 19370 + }, + { + "epoch": 0.9074342999016255, + "grad_norm": 0.6341830431740048, + "learning_rate": 2.992967732254451e-06, + "loss": 0.3445, + "step": 19371 + }, + { + "epoch": 0.9074811448915538, + "grad_norm": 0.5766244534014063, + "learning_rate": 2.992781804301443e-06, + "loss": 0.2981, + "step": 19372 + }, + { + "epoch": 0.9075279898814822, + "grad_norm": 0.5535046063975471, + "learning_rate": 2.992595873512598e-06, + "loss": 0.3217, + "step": 19373 + }, + { + "epoch": 0.9075748348714106, + "grad_norm": 0.5613821149616591, + "learning_rate": 2.9924099398889895e-06, + "loss": 0.323, + "step": 19374 + }, + { + "epoch": 0.9076216798613388, + "grad_norm": 0.6179991969870657, + "learning_rate": 2.9922240034316843e-06, + "loss": 0.3115, + "step": 19375 + }, + { + "epoch": 0.9076685248512671, + "grad_norm": 0.565954474925705, + "learning_rate": 2.992038064141754e-06, + "loss": 0.3014, + "step": 19376 + }, + { + "epoch": 0.9077153698411955, + "grad_norm": 0.5877524720331125, + "learning_rate": 2.9918521220202685e-06, + "loss": 0.3055, + "step": 19377 + }, + { + "epoch": 0.9077622148311238, + "grad_norm": 0.576456312774558, + "learning_rate": 2.991666177068297e-06, + "loss": 0.3209, + "step": 19378 + }, + { + "epoch": 0.9078090598210521, + "grad_norm": 0.6337277497445001, + "learning_rate": 2.991480229286911e-06, + "loss": 0.3322, + "step": 19379 + }, + { + "epoch": 0.9078559048109804, + "grad_norm": 0.6149073635361182, + "learning_rate": 2.9912942786771794e-06, + "loss": 0.3104, + "step": 19380 + }, + { + "epoch": 0.9079027498009088, + "grad_norm": 0.6000163652106008, + "learning_rate": 2.991108325240173e-06, + "loss": 0.3124, + "step": 19381 + }, + { + "epoch": 0.9079495947908371, + "grad_norm": 0.5874373186354933, + "learning_rate": 2.9909223689769613e-06, + "loss": 0.3106, + "step": 19382 + }, + { + "epoch": 0.9079964397807655, + "grad_norm": 0.5550896125176192, + "learning_rate": 2.990736409888615e-06, + "loss": 0.3198, + "step": 19383 + }, + { + "epoch": 0.9080432847706937, + "grad_norm": 0.5635011054673501, + "learning_rate": 2.990550447976204e-06, + "loss": 0.3033, + "step": 19384 + }, + { + "epoch": 0.9080901297606221, + "grad_norm": 0.552274786151439, + "learning_rate": 2.9903644832407973e-06, + "loss": 0.2907, + "step": 19385 + }, + { + "epoch": 0.9081369747505504, + "grad_norm": 0.6368390227130162, + "learning_rate": 2.990178515683468e-06, + "loss": 0.3286, + "step": 19386 + }, + { + "epoch": 0.9081838197404788, + "grad_norm": 0.5853018514836019, + "learning_rate": 2.9899925453052835e-06, + "loss": 0.3083, + "step": 19387 + }, + { + "epoch": 0.908230664730407, + "grad_norm": 0.5819107685713543, + "learning_rate": 2.9898065721073155e-06, + "loss": 0.3234, + "step": 19388 + }, + { + "epoch": 0.9082775097203354, + "grad_norm": 0.5454128328864769, + "learning_rate": 2.9896205960906337e-06, + "loss": 0.2933, + "step": 19389 + }, + { + "epoch": 0.9083243547102637, + "grad_norm": 0.5588300184468272, + "learning_rate": 2.9894346172563086e-06, + "loss": 0.3245, + "step": 19390 + }, + { + "epoch": 0.9083711997001921, + "grad_norm": 0.6306697300296751, + "learning_rate": 2.98924863560541e-06, + "loss": 0.3186, + "step": 19391 + }, + { + "epoch": 0.9084180446901204, + "grad_norm": 0.5849855867867171, + "learning_rate": 2.9890626511390087e-06, + "loss": 0.3065, + "step": 19392 + }, + { + "epoch": 0.9084648896800487, + "grad_norm": 0.592161969434728, + "learning_rate": 2.9888766638581753e-06, + "loss": 0.323, + "step": 19393 + }, + { + "epoch": 0.908511734669977, + "grad_norm": 0.5956571074321564, + "learning_rate": 2.988690673763978e-06, + "loss": 0.312, + "step": 19394 + }, + { + "epoch": 0.9085585796599054, + "grad_norm": 0.5390531456119874, + "learning_rate": 2.988504680857489e-06, + "loss": 0.2922, + "step": 19395 + }, + { + "epoch": 0.9086054246498337, + "grad_norm": 0.5546918796884502, + "learning_rate": 2.9883186851397792e-06, + "loss": 0.2946, + "step": 19396 + }, + { + "epoch": 0.908652269639762, + "grad_norm": 0.5826993344022107, + "learning_rate": 2.9881326866119186e-06, + "loss": 0.3198, + "step": 19397 + }, + { + "epoch": 0.9086991146296903, + "grad_norm": 0.5663371639620736, + "learning_rate": 2.987946685274976e-06, + "loss": 0.3237, + "step": 19398 + }, + { + "epoch": 0.9087459596196187, + "grad_norm": 0.5571449885255393, + "learning_rate": 2.987760681130023e-06, + "loss": 0.3097, + "step": 19399 + }, + { + "epoch": 0.908792804609547, + "grad_norm": 0.6284799969580739, + "learning_rate": 2.987574674178131e-06, + "loss": 0.3418, + "step": 19400 + }, + { + "epoch": 0.9088396495994754, + "grad_norm": 0.5824617321735592, + "learning_rate": 2.9873886644203676e-06, + "loss": 0.2981, + "step": 19401 + }, + { + "epoch": 0.9088864945894036, + "grad_norm": 0.6146096683765845, + "learning_rate": 2.987202651857806e-06, + "loss": 0.3167, + "step": 19402 + }, + { + "epoch": 0.908933339579332, + "grad_norm": 0.5680176547677442, + "learning_rate": 2.9870166364915147e-06, + "loss": 0.303, + "step": 19403 + }, + { + "epoch": 0.9089801845692603, + "grad_norm": 0.6051595754166901, + "learning_rate": 2.986830618322566e-06, + "loss": 0.3096, + "step": 19404 + }, + { + "epoch": 0.9090270295591887, + "grad_norm": 0.5776734871658584, + "learning_rate": 2.9866445973520297e-06, + "loss": 0.3135, + "step": 19405 + }, + { + "epoch": 0.9090738745491169, + "grad_norm": 0.5862265355945274, + "learning_rate": 2.986458573580975e-06, + "loss": 0.2965, + "step": 19406 + }, + { + "epoch": 0.9091207195390453, + "grad_norm": 0.6138698074739599, + "learning_rate": 2.986272547010474e-06, + "loss": 0.3218, + "step": 19407 + }, + { + "epoch": 0.9091675645289736, + "grad_norm": 0.6092854281008863, + "learning_rate": 2.986086517641597e-06, + "loss": 0.3177, + "step": 19408 + }, + { + "epoch": 0.909214409518902, + "grad_norm": 0.5740282145006894, + "learning_rate": 2.985900485475414e-06, + "loss": 0.2983, + "step": 19409 + }, + { + "epoch": 0.9092612545088303, + "grad_norm": 0.5961863830618686, + "learning_rate": 2.9857144505129955e-06, + "loss": 0.3201, + "step": 19410 + }, + { + "epoch": 0.9093080994987586, + "grad_norm": 0.6290939925119566, + "learning_rate": 2.985528412755413e-06, + "loss": 0.3235, + "step": 19411 + }, + { + "epoch": 0.9093549444886869, + "grad_norm": 0.535225623706555, + "learning_rate": 2.9853423722037356e-06, + "loss": 0.3246, + "step": 19412 + }, + { + "epoch": 0.9094017894786153, + "grad_norm": 0.615637823403909, + "learning_rate": 2.985156328859035e-06, + "loss": 0.3189, + "step": 19413 + }, + { + "epoch": 0.9094486344685436, + "grad_norm": 0.5924705126116412, + "learning_rate": 2.984970282722383e-06, + "loss": 0.339, + "step": 19414 + }, + { + "epoch": 0.9094954794584719, + "grad_norm": 0.5681145761735537, + "learning_rate": 2.9847842337948477e-06, + "loss": 0.3337, + "step": 19415 + }, + { + "epoch": 0.9095423244484002, + "grad_norm": 0.5744146446566474, + "learning_rate": 2.984598182077501e-06, + "loss": 0.3136, + "step": 19416 + }, + { + "epoch": 0.9095891694383286, + "grad_norm": 0.5750537412809802, + "learning_rate": 2.9844121275714137e-06, + "loss": 0.3008, + "step": 19417 + }, + { + "epoch": 0.9096360144282569, + "grad_norm": 0.5948936196405713, + "learning_rate": 2.984226070277657e-06, + "loss": 0.3104, + "step": 19418 + }, + { + "epoch": 0.9096828594181853, + "grad_norm": 0.6072607875939376, + "learning_rate": 2.9840400101973e-06, + "loss": 0.316, + "step": 19419 + }, + { + "epoch": 0.9097297044081135, + "grad_norm": 0.5995218417972996, + "learning_rate": 2.9838539473314155e-06, + "loss": 0.3124, + "step": 19420 + }, + { + "epoch": 0.9097765493980419, + "grad_norm": 0.5501360721041203, + "learning_rate": 2.9836678816810727e-06, + "loss": 0.298, + "step": 19421 + }, + { + "epoch": 0.9098233943879702, + "grad_norm": 0.5969898947659537, + "learning_rate": 2.983481813247342e-06, + "loss": 0.3218, + "step": 19422 + }, + { + "epoch": 0.9098702393778986, + "grad_norm": 0.6112610812502828, + "learning_rate": 2.9832957420312956e-06, + "loss": 0.3087, + "step": 19423 + }, + { + "epoch": 0.9099170843678268, + "grad_norm": 0.5856136974294844, + "learning_rate": 2.983109668034004e-06, + "loss": 0.3112, + "step": 19424 + }, + { + "epoch": 0.9099639293577552, + "grad_norm": 0.5754891927983654, + "learning_rate": 2.9829235912565375e-06, + "loss": 0.2937, + "step": 19425 + }, + { + "epoch": 0.9100107743476835, + "grad_norm": 0.6092177661569179, + "learning_rate": 2.982737511699967e-06, + "loss": 0.3189, + "step": 19426 + }, + { + "epoch": 0.9100576193376119, + "grad_norm": 0.6457107170678611, + "learning_rate": 2.982551429365363e-06, + "loss": 0.3343, + "step": 19427 + }, + { + "epoch": 0.9101044643275402, + "grad_norm": 0.6034876957911091, + "learning_rate": 2.9823653442537974e-06, + "loss": 0.3273, + "step": 19428 + }, + { + "epoch": 0.9101513093174685, + "grad_norm": 0.6013291674204087, + "learning_rate": 2.982179256366341e-06, + "loss": 0.3185, + "step": 19429 + }, + { + "epoch": 0.9101981543073968, + "grad_norm": 0.6004437925443078, + "learning_rate": 2.9819931657040628e-06, + "loss": 0.3276, + "step": 19430 + }, + { + "epoch": 0.9102449992973252, + "grad_norm": 0.6573667763181243, + "learning_rate": 2.9818070722680358e-06, + "loss": 0.3302, + "step": 19431 + }, + { + "epoch": 0.9102918442872535, + "grad_norm": 0.6059590136665757, + "learning_rate": 2.9816209760593307e-06, + "loss": 0.3267, + "step": 19432 + }, + { + "epoch": 0.9103386892771818, + "grad_norm": 0.5817399091201134, + "learning_rate": 2.981434877079017e-06, + "loss": 0.304, + "step": 19433 + }, + { + "epoch": 0.9103855342671101, + "grad_norm": 0.6011902629742948, + "learning_rate": 2.9812487753281668e-06, + "loss": 0.3274, + "step": 19434 + }, + { + "epoch": 0.9104323792570385, + "grad_norm": 0.5870135820304536, + "learning_rate": 2.9810626708078518e-06, + "loss": 0.3312, + "step": 19435 + }, + { + "epoch": 0.9104792242469668, + "grad_norm": 0.5828547509381574, + "learning_rate": 2.9808765635191413e-06, + "loss": 0.3208, + "step": 19436 + }, + { + "epoch": 0.9105260692368952, + "grad_norm": 0.5847412406567062, + "learning_rate": 2.9806904534631065e-06, + "loss": 0.3086, + "step": 19437 + }, + { + "epoch": 0.9105729142268234, + "grad_norm": 0.6045655276114951, + "learning_rate": 2.9805043406408196e-06, + "loss": 0.3292, + "step": 19438 + }, + { + "epoch": 0.9106197592167518, + "grad_norm": 0.590176550448826, + "learning_rate": 2.980318225053351e-06, + "loss": 0.3357, + "step": 19439 + }, + { + "epoch": 0.9106666042066801, + "grad_norm": 0.5788029293769487, + "learning_rate": 2.9801321067017713e-06, + "loss": 0.32, + "step": 19440 + }, + { + "epoch": 0.9107134491966085, + "grad_norm": 0.5942517898568015, + "learning_rate": 2.9799459855871525e-06, + "loss": 0.3259, + "step": 19441 + }, + { + "epoch": 0.9107602941865367, + "grad_norm": 0.593087892447658, + "learning_rate": 2.979759861710565e-06, + "loss": 0.3143, + "step": 19442 + }, + { + "epoch": 0.910807139176465, + "grad_norm": 0.5942504310760007, + "learning_rate": 2.9795737350730802e-06, + "loss": 0.3256, + "step": 19443 + }, + { + "epoch": 0.9108539841663934, + "grad_norm": 0.5785998298152698, + "learning_rate": 2.979387605675769e-06, + "loss": 0.3235, + "step": 19444 + }, + { + "epoch": 0.9109008291563218, + "grad_norm": 0.5571605924499108, + "learning_rate": 2.979201473519702e-06, + "loss": 0.3162, + "step": 19445 + }, + { + "epoch": 0.9109476741462501, + "grad_norm": 0.648032863876365, + "learning_rate": 2.979015338605952e-06, + "loss": 0.3393, + "step": 19446 + }, + { + "epoch": 0.9109945191361783, + "grad_norm": 0.5874871707087882, + "learning_rate": 2.978829200935589e-06, + "loss": 0.3013, + "step": 19447 + }, + { + "epoch": 0.9110413641261067, + "grad_norm": 0.6293624324415388, + "learning_rate": 2.9786430605096835e-06, + "loss": 0.3373, + "step": 19448 + }, + { + "epoch": 0.911088209116035, + "grad_norm": 0.5512281860060101, + "learning_rate": 2.978456917329308e-06, + "loss": 0.2951, + "step": 19449 + }, + { + "epoch": 0.9111350541059634, + "grad_norm": 0.6213874527389616, + "learning_rate": 2.9782707713955334e-06, + "loss": 0.3435, + "step": 19450 + }, + { + "epoch": 0.9111818990958916, + "grad_norm": 0.5504465448211308, + "learning_rate": 2.97808462270943e-06, + "loss": 0.3368, + "step": 19451 + }, + { + "epoch": 0.91122874408582, + "grad_norm": 0.6376143276666316, + "learning_rate": 2.9778984712720703e-06, + "loss": 0.3334, + "step": 19452 + }, + { + "epoch": 0.9112755890757483, + "grad_norm": 0.6252346098162537, + "learning_rate": 2.977712317084526e-06, + "loss": 0.3281, + "step": 19453 + }, + { + "epoch": 0.9113224340656767, + "grad_norm": 0.6081747794803078, + "learning_rate": 2.9775261601478656e-06, + "loss": 0.2984, + "step": 19454 + }, + { + "epoch": 0.911369279055605, + "grad_norm": 0.6282006489012272, + "learning_rate": 2.9773400004631624e-06, + "loss": 0.3098, + "step": 19455 + }, + { + "epoch": 0.9114161240455333, + "grad_norm": 0.608955308594348, + "learning_rate": 2.977153838031489e-06, + "loss": 0.3028, + "step": 19456 + }, + { + "epoch": 0.9114629690354616, + "grad_norm": 0.5731066834475107, + "learning_rate": 2.976967672853914e-06, + "loss": 0.329, + "step": 19457 + }, + { + "epoch": 0.91150981402539, + "grad_norm": 0.6003174838927037, + "learning_rate": 2.9767815049315095e-06, + "loss": 0.3222, + "step": 19458 + }, + { + "epoch": 0.9115566590153183, + "grad_norm": 0.586257734776787, + "learning_rate": 2.976595334265348e-06, + "loss": 0.306, + "step": 19459 + }, + { + "epoch": 0.9116035040052466, + "grad_norm": 0.5752813432865176, + "learning_rate": 2.9764091608565004e-06, + "loss": 0.3098, + "step": 19460 + }, + { + "epoch": 0.9116503489951749, + "grad_norm": 0.6055679902466157, + "learning_rate": 2.976222984706037e-06, + "loss": 0.3143, + "step": 19461 + }, + { + "epoch": 0.9116971939851033, + "grad_norm": 0.5872908435667235, + "learning_rate": 2.9760368058150307e-06, + "loss": 0.3218, + "step": 19462 + }, + { + "epoch": 0.9117440389750316, + "grad_norm": 0.5336368115366913, + "learning_rate": 2.9758506241845517e-06, + "loss": 0.3191, + "step": 19463 + }, + { + "epoch": 0.91179088396496, + "grad_norm": 0.5969215734359257, + "learning_rate": 2.975664439815672e-06, + "loss": 0.321, + "step": 19464 + }, + { + "epoch": 0.9118377289548882, + "grad_norm": 0.5794962984601912, + "learning_rate": 2.9754782527094634e-06, + "loss": 0.3002, + "step": 19465 + }, + { + "epoch": 0.9118845739448166, + "grad_norm": 0.6112265938421274, + "learning_rate": 2.9752920628669966e-06, + "loss": 0.3218, + "step": 19466 + }, + { + "epoch": 0.9119314189347449, + "grad_norm": 0.6157902250733411, + "learning_rate": 2.9751058702893444e-06, + "loss": 0.3318, + "step": 19467 + }, + { + "epoch": 0.9119782639246733, + "grad_norm": 0.6033723831068156, + "learning_rate": 2.974919674977576e-06, + "loss": 0.3278, + "step": 19468 + }, + { + "epoch": 0.9120251089146015, + "grad_norm": 0.6101500994547474, + "learning_rate": 2.9747334769327654e-06, + "loss": 0.3198, + "step": 19469 + }, + { + "epoch": 0.9120719539045299, + "grad_norm": 0.5843359372069896, + "learning_rate": 2.974547276155982e-06, + "loss": 0.3218, + "step": 19470 + }, + { + "epoch": 0.9121187988944582, + "grad_norm": 0.5740402796460911, + "learning_rate": 2.9743610726482984e-06, + "loss": 0.3122, + "step": 19471 + }, + { + "epoch": 0.9121656438843866, + "grad_norm": 0.5723460996985225, + "learning_rate": 2.9741748664107868e-06, + "loss": 0.3366, + "step": 19472 + }, + { + "epoch": 0.9122124888743149, + "grad_norm": 0.5777460351534293, + "learning_rate": 2.973988657444517e-06, + "loss": 0.3187, + "step": 19473 + }, + { + "epoch": 0.9122593338642432, + "grad_norm": 0.6249927257923719, + "learning_rate": 2.973802445750562e-06, + "loss": 0.3192, + "step": 19474 + }, + { + "epoch": 0.9123061788541715, + "grad_norm": 0.6300800573227198, + "learning_rate": 2.9736162313299938e-06, + "loss": 0.3108, + "step": 19475 + }, + { + "epoch": 0.9123530238440999, + "grad_norm": 0.5752802035094075, + "learning_rate": 2.9734300141838824e-06, + "loss": 0.3061, + "step": 19476 + }, + { + "epoch": 0.9123998688340282, + "grad_norm": 0.663751070159736, + "learning_rate": 2.9732437943133e-06, + "loss": 0.343, + "step": 19477 + }, + { + "epoch": 0.9124467138239565, + "grad_norm": 0.6147355082375179, + "learning_rate": 2.9730575717193193e-06, + "loss": 0.3185, + "step": 19478 + }, + { + "epoch": 0.9124935588138848, + "grad_norm": 0.5584496932134206, + "learning_rate": 2.9728713464030106e-06, + "loss": 0.306, + "step": 19479 + }, + { + "epoch": 0.9125404038038132, + "grad_norm": 0.5921573177267457, + "learning_rate": 2.9726851183654464e-06, + "loss": 0.3312, + "step": 19480 + }, + { + "epoch": 0.9125872487937415, + "grad_norm": 0.5857539682879904, + "learning_rate": 2.972498887607699e-06, + "loss": 0.3087, + "step": 19481 + }, + { + "epoch": 0.9126340937836699, + "grad_norm": 0.5651339383898828, + "learning_rate": 2.9723126541308376e-06, + "loss": 0.3036, + "step": 19482 + }, + { + "epoch": 0.9126809387735981, + "grad_norm": 0.5206008384587879, + "learning_rate": 2.972126417935936e-06, + "loss": 0.2903, + "step": 19483 + }, + { + "epoch": 0.9127277837635265, + "grad_norm": 0.5965264162454962, + "learning_rate": 2.9719401790240664e-06, + "loss": 0.3309, + "step": 19484 + }, + { + "epoch": 0.9127746287534548, + "grad_norm": 0.6077420621463349, + "learning_rate": 2.971753937396299e-06, + "loss": 0.3215, + "step": 19485 + }, + { + "epoch": 0.9128214737433832, + "grad_norm": 0.5687003090196557, + "learning_rate": 2.971567693053706e-06, + "loss": 0.3136, + "step": 19486 + }, + { + "epoch": 0.9128683187333114, + "grad_norm": 0.5973574091726414, + "learning_rate": 2.9713814459973605e-06, + "loss": 0.3218, + "step": 19487 + }, + { + "epoch": 0.9129151637232398, + "grad_norm": 0.6150987420913441, + "learning_rate": 2.9711951962283325e-06, + "loss": 0.3192, + "step": 19488 + }, + { + "epoch": 0.9129620087131681, + "grad_norm": 0.5974935798935516, + "learning_rate": 2.9710089437476946e-06, + "loss": 0.326, + "step": 19489 + }, + { + "epoch": 0.9130088537030965, + "grad_norm": 0.5974725640742922, + "learning_rate": 2.970822688556519e-06, + "loss": 0.3213, + "step": 19490 + }, + { + "epoch": 0.9130556986930248, + "grad_norm": 0.5819663973735203, + "learning_rate": 2.9706364306558773e-06, + "loss": 0.3261, + "step": 19491 + }, + { + "epoch": 0.9131025436829531, + "grad_norm": 0.6305348383030508, + "learning_rate": 2.97045017004684e-06, + "loss": 0.3014, + "step": 19492 + }, + { + "epoch": 0.9131493886728814, + "grad_norm": 0.5756291304236273, + "learning_rate": 2.9702639067304815e-06, + "loss": 0.3358, + "step": 19493 + }, + { + "epoch": 0.9131962336628098, + "grad_norm": 0.543629874480038, + "learning_rate": 2.9700776407078717e-06, + "loss": 0.3176, + "step": 19494 + }, + { + "epoch": 0.9132430786527381, + "grad_norm": 0.5785654112266446, + "learning_rate": 2.969891371980084e-06, + "loss": 0.3226, + "step": 19495 + }, + { + "epoch": 0.9132899236426664, + "grad_norm": 0.5709194552213314, + "learning_rate": 2.969705100548189e-06, + "loss": 0.3181, + "step": 19496 + }, + { + "epoch": 0.9133367686325947, + "grad_norm": 0.5771645878776297, + "learning_rate": 2.969518826413259e-06, + "loss": 0.3266, + "step": 19497 + }, + { + "epoch": 0.9133836136225231, + "grad_norm": 0.5755852164476571, + "learning_rate": 2.9693325495763664e-06, + "loss": 0.3174, + "step": 19498 + }, + { + "epoch": 0.9134304586124514, + "grad_norm": 0.610926948166168, + "learning_rate": 2.969146270038583e-06, + "loss": 0.3145, + "step": 19499 + }, + { + "epoch": 0.9134773036023798, + "grad_norm": 0.5976762298251452, + "learning_rate": 2.9689599878009807e-06, + "loss": 0.3138, + "step": 19500 + }, + { + "epoch": 0.913524148592308, + "grad_norm": 0.5477555836613633, + "learning_rate": 2.9687737028646317e-06, + "loss": 0.3168, + "step": 19501 + }, + { + "epoch": 0.9135709935822364, + "grad_norm": 0.6053936563845209, + "learning_rate": 2.968587415230608e-06, + "loss": 0.33, + "step": 19502 + }, + { + "epoch": 0.9136178385721647, + "grad_norm": 0.6225645581185165, + "learning_rate": 2.968401124899981e-06, + "loss": 0.319, + "step": 19503 + }, + { + "epoch": 0.9136646835620931, + "grad_norm": 0.6791787407268136, + "learning_rate": 2.968214831873823e-06, + "loss": 0.3096, + "step": 19504 + }, + { + "epoch": 0.9137115285520213, + "grad_norm": 0.6128883631174719, + "learning_rate": 2.9680285361532075e-06, + "loss": 0.3237, + "step": 19505 + }, + { + "epoch": 0.9137583735419497, + "grad_norm": 0.5689639917256635, + "learning_rate": 2.967842237739204e-06, + "loss": 0.3034, + "step": 19506 + }, + { + "epoch": 0.913805218531878, + "grad_norm": 0.6163359651036864, + "learning_rate": 2.9676559366328867e-06, + "loss": 0.3173, + "step": 19507 + }, + { + "epoch": 0.9138520635218064, + "grad_norm": 0.5972210270028583, + "learning_rate": 2.967469632835327e-06, + "loss": 0.3229, + "step": 19508 + }, + { + "epoch": 0.9138989085117347, + "grad_norm": 0.6681057082710686, + "learning_rate": 2.9672833263475976e-06, + "loss": 0.3364, + "step": 19509 + }, + { + "epoch": 0.913945753501663, + "grad_norm": 0.5471909019936133, + "learning_rate": 2.967097017170769e-06, + "loss": 0.2889, + "step": 19510 + }, + { + "epoch": 0.9139925984915913, + "grad_norm": 0.6035273488535138, + "learning_rate": 2.9669107053059154e-06, + "loss": 0.3209, + "step": 19511 + }, + { + "epoch": 0.9140394434815197, + "grad_norm": 0.5696348933717882, + "learning_rate": 2.9667243907541076e-06, + "loss": 0.3165, + "step": 19512 + }, + { + "epoch": 0.914086288471448, + "grad_norm": 0.626778749942676, + "learning_rate": 2.9665380735164184e-06, + "loss": 0.3335, + "step": 19513 + }, + { + "epoch": 0.9141331334613763, + "grad_norm": 0.6219569419183983, + "learning_rate": 2.9663517535939203e-06, + "loss": 0.3267, + "step": 19514 + }, + { + "epoch": 0.9141799784513046, + "grad_norm": 0.5733640706376016, + "learning_rate": 2.9661654309876834e-06, + "loss": 0.3258, + "step": 19515 + }, + { + "epoch": 0.914226823441233, + "grad_norm": 0.5747600731251362, + "learning_rate": 2.965979105698783e-06, + "loss": 0.3229, + "step": 19516 + }, + { + "epoch": 0.9142736684311613, + "grad_norm": 0.5877323258471987, + "learning_rate": 2.9657927777282904e-06, + "loss": 0.3226, + "step": 19517 + }, + { + "epoch": 0.9143205134210897, + "grad_norm": 0.5640002459742208, + "learning_rate": 2.965606447077276e-06, + "loss": 0.317, + "step": 19518 + }, + { + "epoch": 0.9143673584110179, + "grad_norm": 0.6150442826671936, + "learning_rate": 2.9654201137468146e-06, + "loss": 0.3044, + "step": 19519 + }, + { + "epoch": 0.9144142034009463, + "grad_norm": 0.5955061683955595, + "learning_rate": 2.9652337777379775e-06, + "loss": 0.3177, + "step": 19520 + }, + { + "epoch": 0.9144610483908746, + "grad_norm": 0.5617512107419385, + "learning_rate": 2.9650474390518365e-06, + "loss": 0.2908, + "step": 19521 + }, + { + "epoch": 0.914507893380803, + "grad_norm": 0.5850069503142685, + "learning_rate": 2.9648610976894645e-06, + "loss": 0.3078, + "step": 19522 + }, + { + "epoch": 0.9145547383707312, + "grad_norm": 0.6137463922408976, + "learning_rate": 2.9646747536519337e-06, + "loss": 0.3144, + "step": 19523 + }, + { + "epoch": 0.9146015833606596, + "grad_norm": 0.5831246117931322, + "learning_rate": 2.964488406940316e-06, + "loss": 0.3008, + "step": 19524 + }, + { + "epoch": 0.9146484283505879, + "grad_norm": 0.6896666029494286, + "learning_rate": 2.964302057555685e-06, + "loss": 0.3159, + "step": 19525 + }, + { + "epoch": 0.9146952733405163, + "grad_norm": 0.5926324761424681, + "learning_rate": 2.9641157054991123e-06, + "loss": 0.3126, + "step": 19526 + }, + { + "epoch": 0.9147421183304446, + "grad_norm": 0.616314079352874, + "learning_rate": 2.96392935077167e-06, + "loss": 0.3165, + "step": 19527 + }, + { + "epoch": 0.9147889633203728, + "grad_norm": 0.6525578215372867, + "learning_rate": 2.9637429933744306e-06, + "loss": 0.3223, + "step": 19528 + }, + { + "epoch": 0.9148358083103012, + "grad_norm": 0.6049975424825959, + "learning_rate": 2.963556633308467e-06, + "loss": 0.3195, + "step": 19529 + }, + { + "epoch": 0.9148826533002296, + "grad_norm": 0.6210935179479814, + "learning_rate": 2.9633702705748523e-06, + "loss": 0.3135, + "step": 19530 + }, + { + "epoch": 0.9149294982901579, + "grad_norm": 0.5487133892088261, + "learning_rate": 2.9631839051746567e-06, + "loss": 0.3047, + "step": 19531 + }, + { + "epoch": 0.9149763432800861, + "grad_norm": 0.56229095246498, + "learning_rate": 2.962997537108955e-06, + "loss": 0.3137, + "step": 19532 + }, + { + "epoch": 0.9150231882700145, + "grad_norm": 0.5604781775322871, + "learning_rate": 2.9628111663788197e-06, + "loss": 0.306, + "step": 19533 + }, + { + "epoch": 0.9150700332599428, + "grad_norm": 0.6032355348132539, + "learning_rate": 2.962624792985321e-06, + "loss": 0.3134, + "step": 19534 + }, + { + "epoch": 0.9151168782498712, + "grad_norm": 0.5778777930932494, + "learning_rate": 2.9624384169295333e-06, + "loss": 0.3199, + "step": 19535 + }, + { + "epoch": 0.9151637232397996, + "grad_norm": 0.5804809280704667, + "learning_rate": 2.9622520382125293e-06, + "loss": 0.3349, + "step": 19536 + }, + { + "epoch": 0.9152105682297278, + "grad_norm": 0.5656908565352637, + "learning_rate": 2.9620656568353807e-06, + "loss": 0.2911, + "step": 19537 + }, + { + "epoch": 0.9152574132196561, + "grad_norm": 0.6150996822005664, + "learning_rate": 2.96187927279916e-06, + "loss": 0.2975, + "step": 19538 + }, + { + "epoch": 0.9153042582095845, + "grad_norm": 0.5895968242971683, + "learning_rate": 2.961692886104941e-06, + "loss": 0.3136, + "step": 19539 + }, + { + "epoch": 0.9153511031995128, + "grad_norm": 0.5970224362464926, + "learning_rate": 2.9615064967537947e-06, + "loss": 0.3121, + "step": 19540 + }, + { + "epoch": 0.9153979481894411, + "grad_norm": 0.5970807190629254, + "learning_rate": 2.961320104746795e-06, + "loss": 0.3241, + "step": 19541 + }, + { + "epoch": 0.9154447931793694, + "grad_norm": 0.6072464707530788, + "learning_rate": 2.961133710085014e-06, + "loss": 0.3198, + "step": 19542 + }, + { + "epoch": 0.9154916381692978, + "grad_norm": 0.5707270682143278, + "learning_rate": 2.960947312769524e-06, + "loss": 0.3154, + "step": 19543 + }, + { + "epoch": 0.9155384831592261, + "grad_norm": 0.6119762579316823, + "learning_rate": 2.960760912801398e-06, + "loss": 0.322, + "step": 19544 + }, + { + "epoch": 0.9155853281491545, + "grad_norm": 0.5394462692908271, + "learning_rate": 2.9605745101817095e-06, + "loss": 0.2951, + "step": 19545 + }, + { + "epoch": 0.9156321731390827, + "grad_norm": 0.595661538167313, + "learning_rate": 2.9603881049115295e-06, + "loss": 0.319, + "step": 19546 + }, + { + "epoch": 0.9156790181290111, + "grad_norm": 0.5654171115456122, + "learning_rate": 2.960201696991933e-06, + "loss": 0.3173, + "step": 19547 + }, + { + "epoch": 0.9157258631189394, + "grad_norm": 0.6203878054433953, + "learning_rate": 2.9600152864239906e-06, + "loss": 0.3282, + "step": 19548 + }, + { + "epoch": 0.9157727081088678, + "grad_norm": 0.6277102745721371, + "learning_rate": 2.9598288732087755e-06, + "loss": 0.3073, + "step": 19549 + }, + { + "epoch": 0.915819553098796, + "grad_norm": 0.6034882341798381, + "learning_rate": 2.9596424573473616e-06, + "loss": 0.3327, + "step": 19550 + }, + { + "epoch": 0.9158663980887244, + "grad_norm": 0.5623054860487373, + "learning_rate": 2.9594560388408206e-06, + "loss": 0.3051, + "step": 19551 + }, + { + "epoch": 0.9159132430786527, + "grad_norm": 0.6041014439204784, + "learning_rate": 2.9592696176902247e-06, + "loss": 0.3193, + "step": 19552 + }, + { + "epoch": 0.9159600880685811, + "grad_norm": 0.5844525959215309, + "learning_rate": 2.959083193896648e-06, + "loss": 0.3322, + "step": 19553 + }, + { + "epoch": 0.9160069330585094, + "grad_norm": 0.5405287473421617, + "learning_rate": 2.958896767461164e-06, + "loss": 0.3061, + "step": 19554 + }, + { + "epoch": 0.9160537780484377, + "grad_norm": 0.5795675174086554, + "learning_rate": 2.9587103383848432e-06, + "loss": 0.3193, + "step": 19555 + }, + { + "epoch": 0.916100623038366, + "grad_norm": 0.647735945862086, + "learning_rate": 2.95852390666876e-06, + "loss": 0.3249, + "step": 19556 + }, + { + "epoch": 0.9161474680282944, + "grad_norm": 0.6201186641843827, + "learning_rate": 2.9583374723139876e-06, + "loss": 0.3415, + "step": 19557 + }, + { + "epoch": 0.9161943130182227, + "grad_norm": 0.5475504049476478, + "learning_rate": 2.9581510353215975e-06, + "loss": 0.3089, + "step": 19558 + }, + { + "epoch": 0.916241158008151, + "grad_norm": 0.5817607341434521, + "learning_rate": 2.957964595692663e-06, + "loss": 0.3223, + "step": 19559 + }, + { + "epoch": 0.9162880029980793, + "grad_norm": 0.5806090827785503, + "learning_rate": 2.9577781534282583e-06, + "loss": 0.2931, + "step": 19560 + }, + { + "epoch": 0.9163348479880077, + "grad_norm": 0.6293335750167997, + "learning_rate": 2.957591708529455e-06, + "loss": 0.3379, + "step": 19561 + }, + { + "epoch": 0.916381692977936, + "grad_norm": 0.6056537614600139, + "learning_rate": 2.9574052609973265e-06, + "loss": 0.3297, + "step": 19562 + }, + { + "epoch": 0.9164285379678644, + "grad_norm": 0.5576708369786181, + "learning_rate": 2.957218810832946e-06, + "loss": 0.303, + "step": 19563 + }, + { + "epoch": 0.9164753829577926, + "grad_norm": 0.5846451235229486, + "learning_rate": 2.9570323580373856e-06, + "loss": 0.3204, + "step": 19564 + }, + { + "epoch": 0.916522227947721, + "grad_norm": 0.5607564762456682, + "learning_rate": 2.9568459026117192e-06, + "loss": 0.306, + "step": 19565 + }, + { + "epoch": 0.9165690729376493, + "grad_norm": 0.5608282980719506, + "learning_rate": 2.9566594445570197e-06, + "loss": 0.2885, + "step": 19566 + }, + { + "epoch": 0.9166159179275777, + "grad_norm": 0.5954568137402916, + "learning_rate": 2.956472983874359e-06, + "loss": 0.3103, + "step": 19567 + }, + { + "epoch": 0.9166627629175059, + "grad_norm": 0.5685451643435381, + "learning_rate": 2.9562865205648115e-06, + "loss": 0.3029, + "step": 19568 + }, + { + "epoch": 0.9167096079074343, + "grad_norm": 0.5387380800302906, + "learning_rate": 2.95610005462945e-06, + "loss": 0.2951, + "step": 19569 + }, + { + "epoch": 0.9167564528973626, + "grad_norm": 0.6504741978110982, + "learning_rate": 2.9559135860693465e-06, + "loss": 0.3269, + "step": 19570 + }, + { + "epoch": 0.916803297887291, + "grad_norm": 0.6015558265171567, + "learning_rate": 2.955727114885576e-06, + "loss": 0.3351, + "step": 19571 + }, + { + "epoch": 0.9168501428772193, + "grad_norm": 0.6133267510295145, + "learning_rate": 2.9555406410792106e-06, + "loss": 0.3289, + "step": 19572 + }, + { + "epoch": 0.9168969878671476, + "grad_norm": 0.5706896307020362, + "learning_rate": 2.9553541646513216e-06, + "loss": 0.3029, + "step": 19573 + }, + { + "epoch": 0.9169438328570759, + "grad_norm": 0.5865118676504097, + "learning_rate": 2.9551676856029846e-06, + "loss": 0.3273, + "step": 19574 + }, + { + "epoch": 0.9169906778470043, + "grad_norm": 0.5886874080619421, + "learning_rate": 2.9549812039352732e-06, + "loss": 0.3119, + "step": 19575 + }, + { + "epoch": 0.9170375228369326, + "grad_norm": 0.6065364160131563, + "learning_rate": 2.954794719649258e-06, + "loss": 0.3015, + "step": 19576 + }, + { + "epoch": 0.9170843678268609, + "grad_norm": 0.5678522377257907, + "learning_rate": 2.9546082327460135e-06, + "loss": 0.3035, + "step": 19577 + }, + { + "epoch": 0.9171312128167892, + "grad_norm": 0.6160460677632463, + "learning_rate": 2.9544217432266137e-06, + "loss": 0.3157, + "step": 19578 + }, + { + "epoch": 0.9171780578067176, + "grad_norm": 0.5729213254961483, + "learning_rate": 2.9542352510921306e-06, + "loss": 0.2921, + "step": 19579 + }, + { + "epoch": 0.9172249027966459, + "grad_norm": 0.5705767960186678, + "learning_rate": 2.9540487563436377e-06, + "loss": 0.299, + "step": 19580 + }, + { + "epoch": 0.9172717477865743, + "grad_norm": 0.5865208240728462, + "learning_rate": 2.9538622589822087e-06, + "loss": 0.3218, + "step": 19581 + }, + { + "epoch": 0.9173185927765025, + "grad_norm": 0.6055845896431641, + "learning_rate": 2.953675759008916e-06, + "loss": 0.3241, + "step": 19582 + }, + { + "epoch": 0.9173654377664309, + "grad_norm": 0.614342227094684, + "learning_rate": 2.9534892564248334e-06, + "loss": 0.3228, + "step": 19583 + }, + { + "epoch": 0.9174122827563592, + "grad_norm": 0.569255589965193, + "learning_rate": 2.9533027512310347e-06, + "loss": 0.3444, + "step": 19584 + }, + { + "epoch": 0.9174591277462876, + "grad_norm": 0.5646867615766447, + "learning_rate": 2.9531162434285914e-06, + "loss": 0.2951, + "step": 19585 + }, + { + "epoch": 0.9175059727362158, + "grad_norm": 0.5867572695840998, + "learning_rate": 2.952929733018579e-06, + "loss": 0.3016, + "step": 19586 + }, + { + "epoch": 0.9175528177261442, + "grad_norm": 0.6100598868657269, + "learning_rate": 2.95274322000207e-06, + "loss": 0.2989, + "step": 19587 + }, + { + "epoch": 0.9175996627160725, + "grad_norm": 0.5450663402731776, + "learning_rate": 2.952556704380137e-06, + "loss": 0.3098, + "step": 19588 + }, + { + "epoch": 0.9176465077060009, + "grad_norm": 0.5954126726418947, + "learning_rate": 2.952370186153854e-06, + "loss": 0.3408, + "step": 19589 + }, + { + "epoch": 0.9176933526959292, + "grad_norm": 0.6147900420184467, + "learning_rate": 2.952183665324294e-06, + "loss": 0.3296, + "step": 19590 + }, + { + "epoch": 0.9177401976858575, + "grad_norm": 0.5571779001007635, + "learning_rate": 2.951997141892531e-06, + "loss": 0.3049, + "step": 19591 + }, + { + "epoch": 0.9177870426757858, + "grad_norm": 0.5854274299241471, + "learning_rate": 2.9518106158596384e-06, + "loss": 0.3256, + "step": 19592 + }, + { + "epoch": 0.9178338876657142, + "grad_norm": 0.6195744010163702, + "learning_rate": 2.951624087226689e-06, + "loss": 0.3024, + "step": 19593 + }, + { + "epoch": 0.9178807326556425, + "grad_norm": 0.5704068735590312, + "learning_rate": 2.951437555994756e-06, + "loss": 0.3186, + "step": 19594 + }, + { + "epoch": 0.9179275776455708, + "grad_norm": 0.6030836703489487, + "learning_rate": 2.951251022164913e-06, + "loss": 0.326, + "step": 19595 + }, + { + "epoch": 0.9179744226354991, + "grad_norm": 0.6015647150455199, + "learning_rate": 2.951064485738235e-06, + "loss": 0.3054, + "step": 19596 + }, + { + "epoch": 0.9180212676254275, + "grad_norm": 0.5776161043520099, + "learning_rate": 2.950877946715794e-06, + "loss": 0.3128, + "step": 19597 + }, + { + "epoch": 0.9180681126153558, + "grad_norm": 0.5599879855525738, + "learning_rate": 2.950691405098663e-06, + "loss": 0.316, + "step": 19598 + }, + { + "epoch": 0.9181149576052842, + "grad_norm": 0.641801671204957, + "learning_rate": 2.9505048608879173e-06, + "loss": 0.3196, + "step": 19599 + }, + { + "epoch": 0.9181618025952124, + "grad_norm": 0.5654382418195434, + "learning_rate": 2.950318314084629e-06, + "loss": 0.3032, + "step": 19600 + }, + { + "epoch": 0.9182086475851408, + "grad_norm": 0.5486885466160606, + "learning_rate": 2.950131764689871e-06, + "loss": 0.3125, + "step": 19601 + }, + { + "epoch": 0.9182554925750691, + "grad_norm": 0.6031429409717541, + "learning_rate": 2.9499452127047183e-06, + "loss": 0.3201, + "step": 19602 + }, + { + "epoch": 0.9183023375649975, + "grad_norm": 0.5611566652360578, + "learning_rate": 2.949758658130245e-06, + "loss": 0.3079, + "step": 19603 + }, + { + "epoch": 0.9183491825549257, + "grad_norm": 0.617872300409092, + "learning_rate": 2.9495721009675227e-06, + "loss": 0.3258, + "step": 19604 + }, + { + "epoch": 0.918396027544854, + "grad_norm": 0.5971594850931881, + "learning_rate": 2.9493855412176257e-06, + "loss": 0.314, + "step": 19605 + }, + { + "epoch": 0.9184428725347824, + "grad_norm": 0.5584461955139683, + "learning_rate": 2.9491989788816287e-06, + "loss": 0.3005, + "step": 19606 + }, + { + "epoch": 0.9184897175247108, + "grad_norm": 0.6097101692482282, + "learning_rate": 2.9490124139606045e-06, + "loss": 0.3282, + "step": 19607 + }, + { + "epoch": 0.9185365625146391, + "grad_norm": 0.6184876068866705, + "learning_rate": 2.9488258464556258e-06, + "loss": 0.3419, + "step": 19608 + }, + { + "epoch": 0.9185834075045674, + "grad_norm": 0.5849484265539738, + "learning_rate": 2.948639276367768e-06, + "loss": 0.3169, + "step": 19609 + }, + { + "epoch": 0.9186302524944957, + "grad_norm": 0.6151607459469496, + "learning_rate": 2.9484527036981037e-06, + "loss": 0.3245, + "step": 19610 + }, + { + "epoch": 0.918677097484424, + "grad_norm": 0.5592351554253707, + "learning_rate": 2.9482661284477066e-06, + "loss": 0.2922, + "step": 19611 + }, + { + "epoch": 0.9187239424743524, + "grad_norm": 0.5799088823009769, + "learning_rate": 2.948079550617651e-06, + "loss": 0.2939, + "step": 19612 + }, + { + "epoch": 0.9187707874642806, + "grad_norm": 0.670013148875168, + "learning_rate": 2.94789297020901e-06, + "loss": 0.3068, + "step": 19613 + }, + { + "epoch": 0.918817632454209, + "grad_norm": 0.5607661437396021, + "learning_rate": 2.9477063872228575e-06, + "loss": 0.2858, + "step": 19614 + }, + { + "epoch": 0.9188644774441374, + "grad_norm": 0.6178857301314279, + "learning_rate": 2.9475198016602667e-06, + "loss": 0.3083, + "step": 19615 + }, + { + "epoch": 0.9189113224340657, + "grad_norm": 0.5608585673833002, + "learning_rate": 2.9473332135223125e-06, + "loss": 0.3149, + "step": 19616 + }, + { + "epoch": 0.918958167423994, + "grad_norm": 0.5746321917507367, + "learning_rate": 2.947146622810068e-06, + "loss": 0.3144, + "step": 19617 + }, + { + "epoch": 0.9190050124139223, + "grad_norm": 0.5994604887765841, + "learning_rate": 2.9469600295246074e-06, + "loss": 0.3225, + "step": 19618 + }, + { + "epoch": 0.9190518574038506, + "grad_norm": 0.534022202988829, + "learning_rate": 2.946773433667004e-06, + "loss": 0.297, + "step": 19619 + }, + { + "epoch": 0.919098702393779, + "grad_norm": 0.5799052029475434, + "learning_rate": 2.946586835238332e-06, + "loss": 0.3151, + "step": 19620 + }, + { + "epoch": 0.9191455473837074, + "grad_norm": 0.605560423464005, + "learning_rate": 2.9464002342396647e-06, + "loss": 0.3196, + "step": 19621 + }, + { + "epoch": 0.9191923923736356, + "grad_norm": 0.6279982653085806, + "learning_rate": 2.9462136306720757e-06, + "loss": 0.3338, + "step": 19622 + }, + { + "epoch": 0.9192392373635639, + "grad_norm": 0.6292694157905313, + "learning_rate": 2.9460270245366394e-06, + "loss": 0.3149, + "step": 19623 + }, + { + "epoch": 0.9192860823534923, + "grad_norm": 0.5880298494637626, + "learning_rate": 2.945840415834431e-06, + "loss": 0.303, + "step": 19624 + }, + { + "epoch": 0.9193329273434206, + "grad_norm": 0.556504976296627, + "learning_rate": 2.9456538045665225e-06, + "loss": 0.3002, + "step": 19625 + }, + { + "epoch": 0.919379772333349, + "grad_norm": 0.5959353843562972, + "learning_rate": 2.9454671907339877e-06, + "loss": 0.3137, + "step": 19626 + }, + { + "epoch": 0.9194266173232772, + "grad_norm": 0.5378313751315386, + "learning_rate": 2.9452805743379014e-06, + "loss": 0.3141, + "step": 19627 + }, + { + "epoch": 0.9194734623132056, + "grad_norm": 0.6065828504553465, + "learning_rate": 2.945093955379338e-06, + "loss": 0.3551, + "step": 19628 + }, + { + "epoch": 0.9195203073031339, + "grad_norm": 0.5643259476724395, + "learning_rate": 2.94490733385937e-06, + "loss": 0.3119, + "step": 19629 + }, + { + "epoch": 0.9195671522930623, + "grad_norm": 0.5687046556504535, + "learning_rate": 2.9447207097790725e-06, + "loss": 0.3074, + "step": 19630 + }, + { + "epoch": 0.9196139972829905, + "grad_norm": 0.5896633971887196, + "learning_rate": 2.944534083139519e-06, + "loss": 0.3258, + "step": 19631 + }, + { + "epoch": 0.9196608422729189, + "grad_norm": 0.5920460938457753, + "learning_rate": 2.9443474539417837e-06, + "loss": 0.3167, + "step": 19632 + }, + { + "epoch": 0.9197076872628472, + "grad_norm": 0.5922671819202039, + "learning_rate": 2.9441608221869407e-06, + "loss": 0.3004, + "step": 19633 + }, + { + "epoch": 0.9197545322527756, + "grad_norm": 0.6637904477052244, + "learning_rate": 2.943974187876063e-06, + "loss": 0.3452, + "step": 19634 + }, + { + "epoch": 0.9198013772427039, + "grad_norm": 0.595566110739136, + "learning_rate": 2.943787551010226e-06, + "loss": 0.3206, + "step": 19635 + }, + { + "epoch": 0.9198482222326322, + "grad_norm": 0.5939671297804003, + "learning_rate": 2.9436009115905036e-06, + "loss": 0.3171, + "step": 19636 + }, + { + "epoch": 0.9198950672225605, + "grad_norm": 0.6503983319346859, + "learning_rate": 2.9434142696179686e-06, + "loss": 0.3393, + "step": 19637 + }, + { + "epoch": 0.9199419122124889, + "grad_norm": 0.6219228406955537, + "learning_rate": 2.9432276250936964e-06, + "loss": 0.3139, + "step": 19638 + }, + { + "epoch": 0.9199887572024172, + "grad_norm": 0.5837821282265149, + "learning_rate": 2.9430409780187607e-06, + "loss": 0.3245, + "step": 19639 + }, + { + "epoch": 0.9200356021923455, + "grad_norm": 0.5812904244949275, + "learning_rate": 2.942854328394235e-06, + "loss": 0.3014, + "step": 19640 + }, + { + "epoch": 0.9200824471822738, + "grad_norm": 0.6119715229985395, + "learning_rate": 2.942667676221194e-06, + "loss": 0.3174, + "step": 19641 + }, + { + "epoch": 0.9201292921722022, + "grad_norm": 0.6212718059516248, + "learning_rate": 2.942481021500713e-06, + "loss": 0.3184, + "step": 19642 + }, + { + "epoch": 0.9201761371621305, + "grad_norm": 0.5528410321239193, + "learning_rate": 2.942294364233863e-06, + "loss": 0.3158, + "step": 19643 + }, + { + "epoch": 0.9202229821520589, + "grad_norm": 0.5525277692124708, + "learning_rate": 2.942107704421721e-06, + "loss": 0.2944, + "step": 19644 + }, + { + "epoch": 0.9202698271419871, + "grad_norm": 0.5740071153081132, + "learning_rate": 2.941921042065361e-06, + "loss": 0.3139, + "step": 19645 + }, + { + "epoch": 0.9203166721319155, + "grad_norm": 0.5982404889112193, + "learning_rate": 2.9417343771658556e-06, + "loss": 0.3336, + "step": 19646 + }, + { + "epoch": 0.9203635171218438, + "grad_norm": 0.603542629927689, + "learning_rate": 2.94154770972428e-06, + "loss": 0.3375, + "step": 19647 + }, + { + "epoch": 0.9204103621117722, + "grad_norm": 0.5892941909364462, + "learning_rate": 2.9413610397417088e-06, + "loss": 0.3381, + "step": 19648 + }, + { + "epoch": 0.9204572071017004, + "grad_norm": 0.6393296250166802, + "learning_rate": 2.9411743672192157e-06, + "loss": 0.3247, + "step": 19649 + }, + { + "epoch": 0.9205040520916288, + "grad_norm": 0.576349555635767, + "learning_rate": 2.940987692157874e-06, + "loss": 0.3055, + "step": 19650 + }, + { + "epoch": 0.9205508970815571, + "grad_norm": 0.6401009923900529, + "learning_rate": 2.9408010145587596e-06, + "loss": 0.3291, + "step": 19651 + }, + { + "epoch": 0.9205977420714855, + "grad_norm": 0.652096790040078, + "learning_rate": 2.9406143344229464e-06, + "loss": 0.3323, + "step": 19652 + }, + { + "epoch": 0.9206445870614138, + "grad_norm": 0.6040913795245668, + "learning_rate": 2.9404276517515083e-06, + "loss": 0.3132, + "step": 19653 + }, + { + "epoch": 0.9206914320513421, + "grad_norm": 0.5927168607927487, + "learning_rate": 2.94024096654552e-06, + "loss": 0.3188, + "step": 19654 + }, + { + "epoch": 0.9207382770412704, + "grad_norm": 0.6128104724077342, + "learning_rate": 2.9400542788060547e-06, + "loss": 0.3143, + "step": 19655 + }, + { + "epoch": 0.9207851220311988, + "grad_norm": 0.5801627508759916, + "learning_rate": 2.9398675885341887e-06, + "loss": 0.3136, + "step": 19656 + }, + { + "epoch": 0.9208319670211271, + "grad_norm": 0.5704740422950858, + "learning_rate": 2.9396808957309948e-06, + "loss": 0.3183, + "step": 19657 + }, + { + "epoch": 0.9208788120110554, + "grad_norm": 0.576165961835408, + "learning_rate": 2.9394942003975474e-06, + "loss": 0.304, + "step": 19658 + }, + { + "epoch": 0.9209256570009837, + "grad_norm": 0.5638950663277372, + "learning_rate": 2.9393075025349223e-06, + "loss": 0.3075, + "step": 19659 + }, + { + "epoch": 0.9209725019909121, + "grad_norm": 0.5724295637874992, + "learning_rate": 2.9391208021441923e-06, + "loss": 0.3116, + "step": 19660 + }, + { + "epoch": 0.9210193469808404, + "grad_norm": 0.6509570265464573, + "learning_rate": 2.9389340992264327e-06, + "loss": 0.3208, + "step": 19661 + }, + { + "epoch": 0.9210661919707688, + "grad_norm": 0.5607514326896178, + "learning_rate": 2.9387473937827175e-06, + "loss": 0.3213, + "step": 19662 + }, + { + "epoch": 0.921113036960697, + "grad_norm": 0.6292840005266883, + "learning_rate": 2.938560685814122e-06, + "loss": 0.3306, + "step": 19663 + }, + { + "epoch": 0.9211598819506254, + "grad_norm": 0.5650185168898929, + "learning_rate": 2.938373975321719e-06, + "loss": 0.3215, + "step": 19664 + }, + { + "epoch": 0.9212067269405537, + "grad_norm": 0.640872760983033, + "learning_rate": 2.938187262306584e-06, + "loss": 0.351, + "step": 19665 + }, + { + "epoch": 0.9212535719304821, + "grad_norm": 0.5478050877699812, + "learning_rate": 2.9380005467697926e-06, + "loss": 0.2983, + "step": 19666 + }, + { + "epoch": 0.9213004169204103, + "grad_norm": 0.6030414900808744, + "learning_rate": 2.937813828712417e-06, + "loss": 0.3327, + "step": 19667 + }, + { + "epoch": 0.9213472619103387, + "grad_norm": 0.5645645820317108, + "learning_rate": 2.937627108135533e-06, + "loss": 0.3084, + "step": 19668 + }, + { + "epoch": 0.921394106900267, + "grad_norm": 0.5953010648684602, + "learning_rate": 2.937440385040215e-06, + "loss": 0.3146, + "step": 19669 + }, + { + "epoch": 0.9214409518901954, + "grad_norm": 0.6624878719481981, + "learning_rate": 2.937253659427538e-06, + "loss": 0.3033, + "step": 19670 + }, + { + "epoch": 0.9214877968801237, + "grad_norm": 0.5805439489615122, + "learning_rate": 2.9370669312985755e-06, + "loss": 0.3225, + "step": 19671 + }, + { + "epoch": 0.921534641870052, + "grad_norm": 0.573330408219151, + "learning_rate": 2.9368802006544028e-06, + "loss": 0.319, + "step": 19672 + }, + { + "epoch": 0.9215814868599803, + "grad_norm": 0.5484448734283204, + "learning_rate": 2.9366934674960952e-06, + "loss": 0.2982, + "step": 19673 + }, + { + "epoch": 0.9216283318499087, + "grad_norm": 0.5972679463532682, + "learning_rate": 2.936506731824725e-06, + "loss": 0.3283, + "step": 19674 + }, + { + "epoch": 0.921675176839837, + "grad_norm": 0.5749592288745716, + "learning_rate": 2.936319993641369e-06, + "loss": 0.316, + "step": 19675 + }, + { + "epoch": 0.9217220218297653, + "grad_norm": 0.5792070886883128, + "learning_rate": 2.9361332529471015e-06, + "loss": 0.3109, + "step": 19676 + }, + { + "epoch": 0.9217688668196936, + "grad_norm": 0.5928879695442402, + "learning_rate": 2.9359465097429963e-06, + "loss": 0.3157, + "step": 19677 + }, + { + "epoch": 0.921815711809622, + "grad_norm": 0.5949535645446397, + "learning_rate": 2.935759764030128e-06, + "loss": 0.3092, + "step": 19678 + }, + { + "epoch": 0.9218625567995503, + "grad_norm": 0.7005501494712774, + "learning_rate": 2.935573015809573e-06, + "loss": 0.3339, + "step": 19679 + }, + { + "epoch": 0.9219094017894787, + "grad_norm": 0.6186629423002745, + "learning_rate": 2.9353862650824044e-06, + "loss": 0.316, + "step": 19680 + }, + { + "epoch": 0.9219562467794069, + "grad_norm": 0.5945194773273604, + "learning_rate": 2.9351995118496968e-06, + "loss": 0.3295, + "step": 19681 + }, + { + "epoch": 0.9220030917693353, + "grad_norm": 0.6316566802415438, + "learning_rate": 2.9350127561125253e-06, + "loss": 0.3188, + "step": 19682 + }, + { + "epoch": 0.9220499367592636, + "grad_norm": 0.5735092111094694, + "learning_rate": 2.934825997871965e-06, + "loss": 0.3136, + "step": 19683 + }, + { + "epoch": 0.922096781749192, + "grad_norm": 0.5422849646351868, + "learning_rate": 2.9346392371290905e-06, + "loss": 0.2813, + "step": 19684 + }, + { + "epoch": 0.9221436267391202, + "grad_norm": 0.5737648144734074, + "learning_rate": 2.9344524738849765e-06, + "loss": 0.3089, + "step": 19685 + }, + { + "epoch": 0.9221904717290486, + "grad_norm": 0.5789774688703815, + "learning_rate": 2.9342657081406974e-06, + "loss": 0.3316, + "step": 19686 + }, + { + "epoch": 0.9222373167189769, + "grad_norm": 0.6139678934050242, + "learning_rate": 2.9340789398973284e-06, + "loss": 0.3018, + "step": 19687 + }, + { + "epoch": 0.9222841617089053, + "grad_norm": 0.5775667385194352, + "learning_rate": 2.9338921691559445e-06, + "loss": 0.3102, + "step": 19688 + }, + { + "epoch": 0.9223310066988336, + "grad_norm": 0.5953380438503754, + "learning_rate": 2.9337053959176198e-06, + "loss": 0.2945, + "step": 19689 + }, + { + "epoch": 0.9223778516887619, + "grad_norm": 0.6469714510286666, + "learning_rate": 2.93351862018343e-06, + "loss": 0.3307, + "step": 19690 + }, + { + "epoch": 0.9224246966786902, + "grad_norm": 0.609264076651461, + "learning_rate": 2.933331841954449e-06, + "loss": 0.3405, + "step": 19691 + }, + { + "epoch": 0.9224715416686186, + "grad_norm": 0.6420683613254443, + "learning_rate": 2.9331450612317527e-06, + "loss": 0.3397, + "step": 19692 + }, + { + "epoch": 0.9225183866585469, + "grad_norm": 0.5885526883463916, + "learning_rate": 2.9329582780164144e-06, + "loss": 0.3044, + "step": 19693 + }, + { + "epoch": 0.9225652316484751, + "grad_norm": 0.6047098410665538, + "learning_rate": 2.9327714923095114e-06, + "loss": 0.3222, + "step": 19694 + }, + { + "epoch": 0.9226120766384035, + "grad_norm": 0.5551939341566405, + "learning_rate": 2.9325847041121163e-06, + "loss": 0.3106, + "step": 19695 + }, + { + "epoch": 0.9226589216283319, + "grad_norm": 0.5209354844364649, + "learning_rate": 2.932397913425305e-06, + "loss": 0.288, + "step": 19696 + }, + { + "epoch": 0.9227057666182602, + "grad_norm": 0.5944098805465903, + "learning_rate": 2.932211120250153e-06, + "loss": 0.3317, + "step": 19697 + }, + { + "epoch": 0.9227526116081886, + "grad_norm": 0.5750080144327687, + "learning_rate": 2.9320243245877343e-06, + "loss": 0.3135, + "step": 19698 + }, + { + "epoch": 0.9227994565981168, + "grad_norm": 0.571030152347696, + "learning_rate": 2.9318375264391243e-06, + "loss": 0.3171, + "step": 19699 + }, + { + "epoch": 0.9228463015880451, + "grad_norm": 0.6325748701894806, + "learning_rate": 2.931650725805397e-06, + "loss": 0.3118, + "step": 19700 + }, + { + "epoch": 0.9228931465779735, + "grad_norm": 0.57728526326557, + "learning_rate": 2.9314639226876294e-06, + "loss": 0.2956, + "step": 19701 + }, + { + "epoch": 0.9229399915679019, + "grad_norm": 0.5692099913346668, + "learning_rate": 2.9312771170868947e-06, + "loss": 0.303, + "step": 19702 + }, + { + "epoch": 0.9229868365578301, + "grad_norm": 0.6083413231193773, + "learning_rate": 2.931090309004269e-06, + "loss": 0.2981, + "step": 19703 + }, + { + "epoch": 0.9230336815477584, + "grad_norm": 0.538886141266904, + "learning_rate": 2.930903498440827e-06, + "loss": 0.3083, + "step": 19704 + }, + { + "epoch": 0.9230805265376868, + "grad_norm": 0.5870723682451444, + "learning_rate": 2.930716685397643e-06, + "loss": 0.2873, + "step": 19705 + }, + { + "epoch": 0.9231273715276151, + "grad_norm": 0.6064191710308252, + "learning_rate": 2.930529869875794e-06, + "loss": 0.3181, + "step": 19706 + }, + { + "epoch": 0.9231742165175435, + "grad_norm": 0.552077224477648, + "learning_rate": 2.9303430518763527e-06, + "loss": 0.2903, + "step": 19707 + }, + { + "epoch": 0.9232210615074717, + "grad_norm": 0.565981277688748, + "learning_rate": 2.9301562314003955e-06, + "loss": 0.2872, + "step": 19708 + }, + { + "epoch": 0.9232679064974001, + "grad_norm": 0.6071290903443235, + "learning_rate": 2.9299694084489977e-06, + "loss": 0.3186, + "step": 19709 + }, + { + "epoch": 0.9233147514873284, + "grad_norm": 0.5919098837619056, + "learning_rate": 2.9297825830232336e-06, + "loss": 0.3223, + "step": 19710 + }, + { + "epoch": 0.9233615964772568, + "grad_norm": 0.5742704722464951, + "learning_rate": 2.9295957551241787e-06, + "loss": 0.3154, + "step": 19711 + }, + { + "epoch": 0.923408441467185, + "grad_norm": 0.5926545327853865, + "learning_rate": 2.929408924752909e-06, + "loss": 0.3281, + "step": 19712 + }, + { + "epoch": 0.9234552864571134, + "grad_norm": 0.5683101365206694, + "learning_rate": 2.9292220919104973e-06, + "loss": 0.3007, + "step": 19713 + }, + { + "epoch": 0.9235021314470417, + "grad_norm": 0.6004091781358158, + "learning_rate": 2.929035256598021e-06, + "loss": 0.2973, + "step": 19714 + }, + { + "epoch": 0.9235489764369701, + "grad_norm": 0.5885385760054314, + "learning_rate": 2.928848418816556e-06, + "loss": 0.3334, + "step": 19715 + }, + { + "epoch": 0.9235958214268984, + "grad_norm": 0.5931263533326899, + "learning_rate": 2.9286615785671747e-06, + "loss": 0.3253, + "step": 19716 + }, + { + "epoch": 0.9236426664168267, + "grad_norm": 0.5773848333664829, + "learning_rate": 2.9284747358509534e-06, + "loss": 0.312, + "step": 19717 + }, + { + "epoch": 0.923689511406755, + "grad_norm": 0.5846850795085055, + "learning_rate": 2.9282878906689687e-06, + "loss": 0.3323, + "step": 19718 + }, + { + "epoch": 0.9237363563966834, + "grad_norm": 0.5814832637535696, + "learning_rate": 2.9281010430222952e-06, + "loss": 0.2958, + "step": 19719 + }, + { + "epoch": 0.9237832013866117, + "grad_norm": 0.5823024033671779, + "learning_rate": 2.9279141929120065e-06, + "loss": 0.3168, + "step": 19720 + }, + { + "epoch": 0.92383004637654, + "grad_norm": 0.577890008956307, + "learning_rate": 2.9277273403391804e-06, + "loss": 0.3074, + "step": 19721 + }, + { + "epoch": 0.9238768913664683, + "grad_norm": 0.593649092660891, + "learning_rate": 2.9275404853048905e-06, + "loss": 0.3231, + "step": 19722 + }, + { + "epoch": 0.9239237363563967, + "grad_norm": 0.5684845563816657, + "learning_rate": 2.927353627810212e-06, + "loss": 0.3286, + "step": 19723 + }, + { + "epoch": 0.923970581346325, + "grad_norm": 0.5520292323524472, + "learning_rate": 2.927166767856221e-06, + "loss": 0.2957, + "step": 19724 + }, + { + "epoch": 0.9240174263362534, + "grad_norm": 0.5779610224197882, + "learning_rate": 2.926979905443993e-06, + "loss": 0.303, + "step": 19725 + }, + { + "epoch": 0.9240642713261816, + "grad_norm": 0.6299073627082238, + "learning_rate": 2.9267930405746024e-06, + "loss": 0.3054, + "step": 19726 + }, + { + "epoch": 0.92411111631611, + "grad_norm": 0.6356759908285078, + "learning_rate": 2.9266061732491263e-06, + "loss": 0.3271, + "step": 19727 + }, + { + "epoch": 0.9241579613060383, + "grad_norm": 0.6240216541998137, + "learning_rate": 2.926419303468638e-06, + "loss": 0.3196, + "step": 19728 + }, + { + "epoch": 0.9242048062959667, + "grad_norm": 0.6006388650775362, + "learning_rate": 2.926232431234214e-06, + "loss": 0.3153, + "step": 19729 + }, + { + "epoch": 0.9242516512858949, + "grad_norm": 0.5617306457828174, + "learning_rate": 2.9260455565469293e-06, + "loss": 0.3092, + "step": 19730 + }, + { + "epoch": 0.9242984962758233, + "grad_norm": 0.6470271532283726, + "learning_rate": 2.925858679407859e-06, + "loss": 0.3287, + "step": 19731 + }, + { + "epoch": 0.9243453412657516, + "grad_norm": 0.5514826607192653, + "learning_rate": 2.92567179981808e-06, + "loss": 0.297, + "step": 19732 + }, + { + "epoch": 0.92439218625568, + "grad_norm": 0.6453881592479223, + "learning_rate": 2.9254849177786664e-06, + "loss": 0.3168, + "step": 19733 + }, + { + "epoch": 0.9244390312456083, + "grad_norm": 0.6745588963040716, + "learning_rate": 2.925298033290694e-06, + "loss": 0.3561, + "step": 19734 + }, + { + "epoch": 0.9244858762355366, + "grad_norm": 0.5809897061788022, + "learning_rate": 2.9251111463552377e-06, + "loss": 0.3293, + "step": 19735 + }, + { + "epoch": 0.9245327212254649, + "grad_norm": 0.6073777912907592, + "learning_rate": 2.9249242569733756e-06, + "loss": 0.3332, + "step": 19736 + }, + { + "epoch": 0.9245795662153933, + "grad_norm": 0.6037774099736195, + "learning_rate": 2.9247373651461793e-06, + "loss": 0.3151, + "step": 19737 + }, + { + "epoch": 0.9246264112053216, + "grad_norm": 0.5542216138722973, + "learning_rate": 2.9245504708747263e-06, + "loss": 0.3057, + "step": 19738 + }, + { + "epoch": 0.9246732561952499, + "grad_norm": 0.5527752861097343, + "learning_rate": 2.9243635741600927e-06, + "loss": 0.2869, + "step": 19739 + }, + { + "epoch": 0.9247201011851782, + "grad_norm": 0.5790365460134087, + "learning_rate": 2.9241766750033535e-06, + "loss": 0.3208, + "step": 19740 + }, + { + "epoch": 0.9247669461751066, + "grad_norm": 0.5666022070597718, + "learning_rate": 2.9239897734055835e-06, + "loss": 0.2931, + "step": 19741 + }, + { + "epoch": 0.9248137911650349, + "grad_norm": 0.5276732678497266, + "learning_rate": 2.9238028693678588e-06, + "loss": 0.2877, + "step": 19742 + }, + { + "epoch": 0.9248606361549633, + "grad_norm": 0.60686286495472, + "learning_rate": 2.9236159628912565e-06, + "loss": 0.3183, + "step": 19743 + }, + { + "epoch": 0.9249074811448915, + "grad_norm": 0.6190489209320241, + "learning_rate": 2.9234290539768497e-06, + "loss": 0.3097, + "step": 19744 + }, + { + "epoch": 0.9249543261348199, + "grad_norm": 0.5986120827906427, + "learning_rate": 2.9232421426257147e-06, + "loss": 0.3115, + "step": 19745 + }, + { + "epoch": 0.9250011711247482, + "grad_norm": 0.6020662208752886, + "learning_rate": 2.9230552288389283e-06, + "loss": 0.3289, + "step": 19746 + }, + { + "epoch": 0.9250480161146766, + "grad_norm": 0.597416044804714, + "learning_rate": 2.9228683126175656e-06, + "loss": 0.3155, + "step": 19747 + }, + { + "epoch": 0.9250948611046048, + "grad_norm": 0.6003997399932766, + "learning_rate": 2.9226813939627014e-06, + "loss": 0.3137, + "step": 19748 + }, + { + "epoch": 0.9251417060945332, + "grad_norm": 0.5718996314794286, + "learning_rate": 2.922494472875412e-06, + "loss": 0.3118, + "step": 19749 + }, + { + "epoch": 0.9251885510844615, + "grad_norm": 0.5594451700948572, + "learning_rate": 2.9223075493567742e-06, + "loss": 0.3036, + "step": 19750 + }, + { + "epoch": 0.9252353960743899, + "grad_norm": 0.630814123759818, + "learning_rate": 2.9221206234078615e-06, + "loss": 0.3284, + "step": 19751 + }, + { + "epoch": 0.9252822410643182, + "grad_norm": 0.6352214005610559, + "learning_rate": 2.921933695029751e-06, + "loss": 0.3328, + "step": 19752 + }, + { + "epoch": 0.9253290860542465, + "grad_norm": 0.6235451947930978, + "learning_rate": 2.921746764223518e-06, + "loss": 0.3262, + "step": 19753 + }, + { + "epoch": 0.9253759310441748, + "grad_norm": 0.5520337226913509, + "learning_rate": 2.9215598309902386e-06, + "loss": 0.2952, + "step": 19754 + }, + { + "epoch": 0.9254227760341032, + "grad_norm": 0.6265526160451729, + "learning_rate": 2.9213728953309884e-06, + "loss": 0.3319, + "step": 19755 + }, + { + "epoch": 0.9254696210240315, + "grad_norm": 0.5696741459195289, + "learning_rate": 2.9211859572468426e-06, + "loss": 0.3076, + "step": 19756 + }, + { + "epoch": 0.9255164660139598, + "grad_norm": 0.6444622703163747, + "learning_rate": 2.920999016738878e-06, + "loss": 0.3077, + "step": 19757 + }, + { + "epoch": 0.9255633110038881, + "grad_norm": 0.6004172220721123, + "learning_rate": 2.9208120738081704e-06, + "loss": 0.3063, + "step": 19758 + }, + { + "epoch": 0.9256101559938165, + "grad_norm": 0.6387293467659046, + "learning_rate": 2.9206251284557936e-06, + "loss": 0.3154, + "step": 19759 + }, + { + "epoch": 0.9256570009837448, + "grad_norm": 0.5713586228279911, + "learning_rate": 2.920438180682826e-06, + "loss": 0.2998, + "step": 19760 + }, + { + "epoch": 0.9257038459736732, + "grad_norm": 0.5414642100374095, + "learning_rate": 2.920251230490342e-06, + "loss": 0.2935, + "step": 19761 + }, + { + "epoch": 0.9257506909636014, + "grad_norm": 0.5811287171357483, + "learning_rate": 2.9200642778794177e-06, + "loss": 0.3336, + "step": 19762 + }, + { + "epoch": 0.9257975359535298, + "grad_norm": 0.5431569038641674, + "learning_rate": 2.919877322851129e-06, + "loss": 0.2984, + "step": 19763 + }, + { + "epoch": 0.9258443809434581, + "grad_norm": 0.5850233963701182, + "learning_rate": 2.9196903654065524e-06, + "loss": 0.3234, + "step": 19764 + }, + { + "epoch": 0.9258912259333865, + "grad_norm": 0.6046223255368223, + "learning_rate": 2.9195034055467624e-06, + "loss": 0.3231, + "step": 19765 + }, + { + "epoch": 0.9259380709233147, + "grad_norm": 0.6439088622914563, + "learning_rate": 2.919316443272836e-06, + "loss": 0.3375, + "step": 19766 + }, + { + "epoch": 0.9259849159132431, + "grad_norm": 0.5591298201471466, + "learning_rate": 2.919129478585849e-06, + "loss": 0.3011, + "step": 19767 + }, + { + "epoch": 0.9260317609031714, + "grad_norm": 0.6819888953255885, + "learning_rate": 2.918942511486878e-06, + "loss": 0.3541, + "step": 19768 + }, + { + "epoch": 0.9260786058930998, + "grad_norm": 0.5772487417990071, + "learning_rate": 2.918755541976997e-06, + "loss": 0.3164, + "step": 19769 + }, + { + "epoch": 0.9261254508830281, + "grad_norm": 0.5599424800609109, + "learning_rate": 2.9185685700572836e-06, + "loss": 0.3176, + "step": 19770 + }, + { + "epoch": 0.9261722958729564, + "grad_norm": 0.6993628194027989, + "learning_rate": 2.918381595728813e-06, + "loss": 0.3206, + "step": 19771 + }, + { + "epoch": 0.9262191408628847, + "grad_norm": 0.5912392868974545, + "learning_rate": 2.918194618992662e-06, + "loss": 0.3174, + "step": 19772 + }, + { + "epoch": 0.9262659858528131, + "grad_norm": 0.6018950935617816, + "learning_rate": 2.918007639849906e-06, + "loss": 0.3182, + "step": 19773 + }, + { + "epoch": 0.9263128308427414, + "grad_norm": 0.564139297291616, + "learning_rate": 2.91782065830162e-06, + "loss": 0.3166, + "step": 19774 + }, + { + "epoch": 0.9263596758326696, + "grad_norm": 0.6426878212563479, + "learning_rate": 2.917633674348882e-06, + "loss": 0.3221, + "step": 19775 + }, + { + "epoch": 0.926406520822598, + "grad_norm": 0.5294590989401152, + "learning_rate": 2.9174466879927678e-06, + "loss": 0.2865, + "step": 19776 + }, + { + "epoch": 0.9264533658125264, + "grad_norm": 0.6261455095292597, + "learning_rate": 2.9172596992343516e-06, + "loss": 0.3518, + "step": 19777 + }, + { + "epoch": 0.9265002108024547, + "grad_norm": 0.5560432575771487, + "learning_rate": 2.9170727080747114e-06, + "loss": 0.299, + "step": 19778 + }, + { + "epoch": 0.9265470557923831, + "grad_norm": 0.6140131760540757, + "learning_rate": 2.916885714514923e-06, + "loss": 0.3346, + "step": 19779 + }, + { + "epoch": 0.9265939007823113, + "grad_norm": 0.6025955236016985, + "learning_rate": 2.916698718556061e-06, + "loss": 0.3095, + "step": 19780 + }, + { + "epoch": 0.9266407457722396, + "grad_norm": 0.5885429548712098, + "learning_rate": 2.916511720199203e-06, + "loss": 0.3147, + "step": 19781 + }, + { + "epoch": 0.926687590762168, + "grad_norm": 0.5845205218270955, + "learning_rate": 2.9163247194454253e-06, + "loss": 0.3147, + "step": 19782 + }, + { + "epoch": 0.9267344357520964, + "grad_norm": 0.5471133103859878, + "learning_rate": 2.9161377162958025e-06, + "loss": 0.3118, + "step": 19783 + }, + { + "epoch": 0.9267812807420246, + "grad_norm": 0.5703104872908096, + "learning_rate": 2.9159507107514125e-06, + "loss": 0.3024, + "step": 19784 + }, + { + "epoch": 0.926828125731953, + "grad_norm": 0.5627402240813911, + "learning_rate": 2.915763702813331e-06, + "loss": 0.3308, + "step": 19785 + }, + { + "epoch": 0.9268749707218813, + "grad_norm": 0.618983945009236, + "learning_rate": 2.915576692482633e-06, + "loss": 0.3052, + "step": 19786 + }, + { + "epoch": 0.9269218157118096, + "grad_norm": 0.6079518238145868, + "learning_rate": 2.915389679760396e-06, + "loss": 0.3174, + "step": 19787 + }, + { + "epoch": 0.926968660701738, + "grad_norm": 0.5598891373450938, + "learning_rate": 2.9152026646476956e-06, + "loss": 0.3237, + "step": 19788 + }, + { + "epoch": 0.9270155056916662, + "grad_norm": 0.6480051265642073, + "learning_rate": 2.9150156471456085e-06, + "loss": 0.3344, + "step": 19789 + }, + { + "epoch": 0.9270623506815946, + "grad_norm": 0.5965563356871673, + "learning_rate": 2.914828627255211e-06, + "loss": 0.3206, + "step": 19790 + }, + { + "epoch": 0.927109195671523, + "grad_norm": 0.5307634388894855, + "learning_rate": 2.9146416049775782e-06, + "loss": 0.2988, + "step": 19791 + }, + { + "epoch": 0.9271560406614513, + "grad_norm": 0.5729990449626617, + "learning_rate": 2.9144545803137882e-06, + "loss": 0.2979, + "step": 19792 + }, + { + "epoch": 0.9272028856513795, + "grad_norm": 0.6335404475750946, + "learning_rate": 2.914267553264915e-06, + "loss": 0.3302, + "step": 19793 + }, + { + "epoch": 0.9272497306413079, + "grad_norm": 0.5947607218621422, + "learning_rate": 2.914080523832037e-06, + "loss": 0.3172, + "step": 19794 + }, + { + "epoch": 0.9272965756312362, + "grad_norm": 0.635996579371631, + "learning_rate": 2.9138934920162286e-06, + "loss": 0.3282, + "step": 19795 + }, + { + "epoch": 0.9273434206211646, + "grad_norm": 0.5624535527866817, + "learning_rate": 2.9137064578185686e-06, + "loss": 0.3293, + "step": 19796 + }, + { + "epoch": 0.927390265611093, + "grad_norm": 0.6192969079090246, + "learning_rate": 2.9135194212401315e-06, + "loss": 0.3329, + "step": 19797 + }, + { + "epoch": 0.9274371106010212, + "grad_norm": 0.5617596374446694, + "learning_rate": 2.913332382281994e-06, + "loss": 0.3184, + "step": 19798 + }, + { + "epoch": 0.9274839555909495, + "grad_norm": 0.5855518001929975, + "learning_rate": 2.913145340945232e-06, + "loss": 0.3274, + "step": 19799 + }, + { + "epoch": 0.9275308005808779, + "grad_norm": 0.6038780392824875, + "learning_rate": 2.9129582972309233e-06, + "loss": 0.3351, + "step": 19800 + }, + { + "epoch": 0.9275776455708062, + "grad_norm": 0.5826514543975666, + "learning_rate": 2.9127712511401423e-06, + "loss": 0.3196, + "step": 19801 + }, + { + "epoch": 0.9276244905607345, + "grad_norm": 0.5666317911285174, + "learning_rate": 2.9125842026739675e-06, + "loss": 0.3175, + "step": 19802 + }, + { + "epoch": 0.9276713355506628, + "grad_norm": 0.5606793732044447, + "learning_rate": 2.9123971518334743e-06, + "loss": 0.2897, + "step": 19803 + }, + { + "epoch": 0.9277181805405912, + "grad_norm": 0.5627239829642852, + "learning_rate": 2.9122100986197383e-06, + "loss": 0.3159, + "step": 19804 + }, + { + "epoch": 0.9277650255305195, + "grad_norm": 0.5698432498212793, + "learning_rate": 2.912023043033837e-06, + "loss": 0.3164, + "step": 19805 + }, + { + "epoch": 0.9278118705204479, + "grad_norm": 0.5391105470607306, + "learning_rate": 2.911835985076847e-06, + "loss": 0.3141, + "step": 19806 + }, + { + "epoch": 0.9278587155103761, + "grad_norm": 0.5793246419669659, + "learning_rate": 2.9116489247498446e-06, + "loss": 0.3078, + "step": 19807 + }, + { + "epoch": 0.9279055605003045, + "grad_norm": 0.6137893124293905, + "learning_rate": 2.9114618620539055e-06, + "loss": 0.3397, + "step": 19808 + }, + { + "epoch": 0.9279524054902328, + "grad_norm": 0.6143898773862956, + "learning_rate": 2.9112747969901074e-06, + "loss": 0.3137, + "step": 19809 + }, + { + "epoch": 0.9279992504801612, + "grad_norm": 0.5873713321274572, + "learning_rate": 2.9110877295595256e-06, + "loss": 0.337, + "step": 19810 + }, + { + "epoch": 0.9280460954700894, + "grad_norm": 0.616056669931253, + "learning_rate": 2.9109006597632376e-06, + "loss": 0.3196, + "step": 19811 + }, + { + "epoch": 0.9280929404600178, + "grad_norm": 0.5733314232008462, + "learning_rate": 2.910713587602319e-06, + "loss": 0.2932, + "step": 19812 + }, + { + "epoch": 0.9281397854499461, + "grad_norm": 0.6130336194339238, + "learning_rate": 2.910526513077848e-06, + "loss": 0.3145, + "step": 19813 + }, + { + "epoch": 0.9281866304398745, + "grad_norm": 0.596334431634364, + "learning_rate": 2.9103394361909e-06, + "loss": 0.3093, + "step": 19814 + }, + { + "epoch": 0.9282334754298028, + "grad_norm": 0.5340067352966514, + "learning_rate": 2.9101523569425504e-06, + "loss": 0.2922, + "step": 19815 + }, + { + "epoch": 0.9282803204197311, + "grad_norm": 0.612033741971492, + "learning_rate": 2.909965275333878e-06, + "loss": 0.343, + "step": 19816 + }, + { + "epoch": 0.9283271654096594, + "grad_norm": 0.586247528349062, + "learning_rate": 2.909778191365959e-06, + "loss": 0.3218, + "step": 19817 + }, + { + "epoch": 0.9283740103995878, + "grad_norm": 0.5652681715308748, + "learning_rate": 2.9095911050398684e-06, + "loss": 0.3053, + "step": 19818 + }, + { + "epoch": 0.9284208553895161, + "grad_norm": 0.6230020101135688, + "learning_rate": 2.909404016356685e-06, + "loss": 0.318, + "step": 19819 + }, + { + "epoch": 0.9284677003794444, + "grad_norm": 0.512362297170457, + "learning_rate": 2.909216925317484e-06, + "loss": 0.2911, + "step": 19820 + }, + { + "epoch": 0.9285145453693727, + "grad_norm": 0.6042496034140813, + "learning_rate": 2.9090298319233416e-06, + "loss": 0.3206, + "step": 19821 + }, + { + "epoch": 0.9285613903593011, + "grad_norm": 0.6260743376651376, + "learning_rate": 2.9088427361753363e-06, + "loss": 0.3, + "step": 19822 + }, + { + "epoch": 0.9286082353492294, + "grad_norm": 0.5891191477954086, + "learning_rate": 2.9086556380745436e-06, + "loss": 0.317, + "step": 19823 + }, + { + "epoch": 0.9286550803391578, + "grad_norm": 0.5947170370183491, + "learning_rate": 2.90846853762204e-06, + "loss": 0.3106, + "step": 19824 + }, + { + "epoch": 0.928701925329086, + "grad_norm": 0.5782495240117063, + "learning_rate": 2.9082814348189038e-06, + "loss": 0.3026, + "step": 19825 + }, + { + "epoch": 0.9287487703190144, + "grad_norm": 0.6182935740848077, + "learning_rate": 2.908094329666209e-06, + "loss": 0.3166, + "step": 19826 + }, + { + "epoch": 0.9287956153089427, + "grad_norm": 0.6132197012185006, + "learning_rate": 2.907907222165035e-06, + "loss": 0.3102, + "step": 19827 + }, + { + "epoch": 0.9288424602988711, + "grad_norm": 0.5512389160778097, + "learning_rate": 2.9077201123164573e-06, + "loss": 0.3043, + "step": 19828 + }, + { + "epoch": 0.9288893052887993, + "grad_norm": 0.5697880232859548, + "learning_rate": 2.9075330001215526e-06, + "loss": 0.3015, + "step": 19829 + }, + { + "epoch": 0.9289361502787277, + "grad_norm": 0.6674865245279119, + "learning_rate": 2.9073458855813975e-06, + "loss": 0.3291, + "step": 19830 + }, + { + "epoch": 0.928982995268656, + "grad_norm": 0.5623276474450962, + "learning_rate": 2.90715876869707e-06, + "loss": 0.3174, + "step": 19831 + }, + { + "epoch": 0.9290298402585844, + "grad_norm": 0.5465929233032816, + "learning_rate": 2.9069716494696453e-06, + "loss": 0.2994, + "step": 19832 + }, + { + "epoch": 0.9290766852485127, + "grad_norm": 0.5667281025751321, + "learning_rate": 2.9067845279002018e-06, + "loss": 0.3177, + "step": 19833 + }, + { + "epoch": 0.929123530238441, + "grad_norm": 0.5463187106339935, + "learning_rate": 2.9065974039898154e-06, + "loss": 0.3029, + "step": 19834 + }, + { + "epoch": 0.9291703752283693, + "grad_norm": 0.5321529601619355, + "learning_rate": 2.9064102777395632e-06, + "loss": 0.2991, + "step": 19835 + }, + { + "epoch": 0.9292172202182977, + "grad_norm": 0.5659260403544542, + "learning_rate": 2.906223149150521e-06, + "loss": 0.301, + "step": 19836 + }, + { + "epoch": 0.929264065208226, + "grad_norm": 0.5869863203622333, + "learning_rate": 2.906036018223768e-06, + "loss": 0.3217, + "step": 19837 + }, + { + "epoch": 0.9293109101981543, + "grad_norm": 0.5958605791338283, + "learning_rate": 2.9058488849603796e-06, + "loss": 0.3227, + "step": 19838 + }, + { + "epoch": 0.9293577551880826, + "grad_norm": 0.56823596300694, + "learning_rate": 2.905661749361432e-06, + "loss": 0.3019, + "step": 19839 + }, + { + "epoch": 0.929404600178011, + "grad_norm": 0.5941590016715295, + "learning_rate": 2.9054746114280035e-06, + "loss": 0.3084, + "step": 19840 + }, + { + "epoch": 0.9294514451679393, + "grad_norm": 0.5944409845183316, + "learning_rate": 2.9052874711611712e-06, + "loss": 0.3208, + "step": 19841 + }, + { + "epoch": 0.9294982901578677, + "grad_norm": 0.5820902753194411, + "learning_rate": 2.9051003285620104e-06, + "loss": 0.3198, + "step": 19842 + }, + { + "epoch": 0.9295451351477959, + "grad_norm": 0.6542730647652294, + "learning_rate": 2.904913183631599e-06, + "loss": 0.3328, + "step": 19843 + }, + { + "epoch": 0.9295919801377243, + "grad_norm": 0.6597324116703978, + "learning_rate": 2.9047260363710143e-06, + "loss": 0.3424, + "step": 19844 + }, + { + "epoch": 0.9296388251276526, + "grad_norm": 0.6215676409308294, + "learning_rate": 2.9045388867813334e-06, + "loss": 0.3305, + "step": 19845 + }, + { + "epoch": 0.929685670117581, + "grad_norm": 0.5869739678627146, + "learning_rate": 2.904351734863633e-06, + "loss": 0.3228, + "step": 19846 + }, + { + "epoch": 0.9297325151075092, + "grad_norm": 0.5937965181563154, + "learning_rate": 2.9041645806189887e-06, + "loss": 0.3103, + "step": 19847 + }, + { + "epoch": 0.9297793600974376, + "grad_norm": 0.6193969106966235, + "learning_rate": 2.9039774240484803e-06, + "loss": 0.3036, + "step": 19848 + }, + { + "epoch": 0.9298262050873659, + "grad_norm": 0.5837304508028432, + "learning_rate": 2.9037902651531823e-06, + "loss": 0.313, + "step": 19849 + }, + { + "epoch": 0.9298730500772943, + "grad_norm": 0.5982455608324562, + "learning_rate": 2.9036031039341735e-06, + "loss": 0.3228, + "step": 19850 + }, + { + "epoch": 0.9299198950672226, + "grad_norm": 0.5700033926197609, + "learning_rate": 2.90341594039253e-06, + "loss": 0.313, + "step": 19851 + }, + { + "epoch": 0.9299667400571509, + "grad_norm": 0.6134005504016268, + "learning_rate": 2.903228774529329e-06, + "loss": 0.3326, + "step": 19852 + }, + { + "epoch": 0.9300135850470792, + "grad_norm": 0.5810220340238569, + "learning_rate": 2.903041606345648e-06, + "loss": 0.3012, + "step": 19853 + }, + { + "epoch": 0.9300604300370076, + "grad_norm": 0.5668760285828445, + "learning_rate": 2.902854435842563e-06, + "loss": 0.3012, + "step": 19854 + }, + { + "epoch": 0.9301072750269359, + "grad_norm": 0.5942337059687177, + "learning_rate": 2.902667263021154e-06, + "loss": 0.3017, + "step": 19855 + }, + { + "epoch": 0.9301541200168641, + "grad_norm": 0.5950804701466926, + "learning_rate": 2.9024800878824944e-06, + "loss": 0.3211, + "step": 19856 + }, + { + "epoch": 0.9302009650067925, + "grad_norm": 0.5683496864554974, + "learning_rate": 2.902292910427663e-06, + "loss": 0.3036, + "step": 19857 + }, + { + "epoch": 0.9302478099967209, + "grad_norm": 0.5228237917421219, + "learning_rate": 2.9021057306577376e-06, + "loss": 0.3032, + "step": 19858 + }, + { + "epoch": 0.9302946549866492, + "grad_norm": 0.5404317327886298, + "learning_rate": 2.901918548573795e-06, + "loss": 0.2983, + "step": 19859 + }, + { + "epoch": 0.9303414999765776, + "grad_norm": 0.6381527553138554, + "learning_rate": 2.9017313641769117e-06, + "loss": 0.3175, + "step": 19860 + }, + { + "epoch": 0.9303883449665058, + "grad_norm": 0.6000306669039489, + "learning_rate": 2.901544177468166e-06, + "loss": 0.3148, + "step": 19861 + }, + { + "epoch": 0.9304351899564341, + "grad_norm": 0.5563025434353966, + "learning_rate": 2.9013569884486337e-06, + "loss": 0.2933, + "step": 19862 + }, + { + "epoch": 0.9304820349463625, + "grad_norm": 0.6325057348953398, + "learning_rate": 2.901169797119393e-06, + "loss": 0.3228, + "step": 19863 + }, + { + "epoch": 0.9305288799362909, + "grad_norm": 0.606032754818899, + "learning_rate": 2.9009826034815212e-06, + "loss": 0.32, + "step": 19864 + }, + { + "epoch": 0.9305757249262191, + "grad_norm": 0.5678620413639413, + "learning_rate": 2.900795407536095e-06, + "loss": 0.333, + "step": 19865 + }, + { + "epoch": 0.9306225699161474, + "grad_norm": 0.5503218169802364, + "learning_rate": 2.9006082092841925e-06, + "loss": 0.2771, + "step": 19866 + }, + { + "epoch": 0.9306694149060758, + "grad_norm": 0.6160848725417646, + "learning_rate": 2.9004210087268904e-06, + "loss": 0.3204, + "step": 19867 + }, + { + "epoch": 0.9307162598960041, + "grad_norm": 0.5694626305601512, + "learning_rate": 2.9002338058652652e-06, + "loss": 0.3034, + "step": 19868 + }, + { + "epoch": 0.9307631048859325, + "grad_norm": 0.5747962415358105, + "learning_rate": 2.9000466007003957e-06, + "loss": 0.3076, + "step": 19869 + }, + { + "epoch": 0.9308099498758607, + "grad_norm": 0.6002636422945627, + "learning_rate": 2.8998593932333584e-06, + "loss": 0.3263, + "step": 19870 + }, + { + "epoch": 0.9308567948657891, + "grad_norm": 0.6285747660795363, + "learning_rate": 2.8996721834652307e-06, + "loss": 0.3217, + "step": 19871 + }, + { + "epoch": 0.9309036398557174, + "grad_norm": 0.6005319661418109, + "learning_rate": 2.89948497139709e-06, + "loss": 0.3161, + "step": 19872 + }, + { + "epoch": 0.9309504848456458, + "grad_norm": 0.5425504113754619, + "learning_rate": 2.899297757030014e-06, + "loss": 0.3081, + "step": 19873 + }, + { + "epoch": 0.930997329835574, + "grad_norm": 0.6366890721198938, + "learning_rate": 2.8991105403650797e-06, + "loss": 0.3255, + "step": 19874 + }, + { + "epoch": 0.9310441748255024, + "grad_norm": 0.602810944549172, + "learning_rate": 2.898923321403364e-06, + "loss": 0.32, + "step": 19875 + }, + { + "epoch": 0.9310910198154307, + "grad_norm": 0.6192433451675999, + "learning_rate": 2.898736100145946e-06, + "loss": 0.3265, + "step": 19876 + }, + { + "epoch": 0.9311378648053591, + "grad_norm": 0.56787929724671, + "learning_rate": 2.898548876593901e-06, + "loss": 0.3174, + "step": 19877 + }, + { + "epoch": 0.9311847097952874, + "grad_norm": 0.6249963576245248, + "learning_rate": 2.8983616507483075e-06, + "loss": 0.3242, + "step": 19878 + }, + { + "epoch": 0.9312315547852157, + "grad_norm": 0.591550600454406, + "learning_rate": 2.898174422610243e-06, + "loss": 0.3314, + "step": 19879 + }, + { + "epoch": 0.931278399775144, + "grad_norm": 0.5667944660511891, + "learning_rate": 2.897987192180785e-06, + "loss": 0.3078, + "step": 19880 + }, + { + "epoch": 0.9313252447650724, + "grad_norm": 0.5788103558103265, + "learning_rate": 2.897799959461011e-06, + "loss": 0.3326, + "step": 19881 + }, + { + "epoch": 0.9313720897550007, + "grad_norm": 0.5506866313323404, + "learning_rate": 2.897612724451997e-06, + "loss": 0.2877, + "step": 19882 + }, + { + "epoch": 0.931418934744929, + "grad_norm": 0.6339631832786732, + "learning_rate": 2.897425487154823e-06, + "loss": 0.3301, + "step": 19883 + }, + { + "epoch": 0.9314657797348573, + "grad_norm": 0.5638447097916005, + "learning_rate": 2.8972382475705647e-06, + "loss": 0.3274, + "step": 19884 + }, + { + "epoch": 0.9315126247247857, + "grad_norm": 0.5852417009961837, + "learning_rate": 2.897051005700299e-06, + "loss": 0.3354, + "step": 19885 + }, + { + "epoch": 0.931559469714714, + "grad_norm": 0.5725193887693767, + "learning_rate": 2.8968637615451063e-06, + "loss": 0.302, + "step": 19886 + }, + { + "epoch": 0.9316063147046424, + "grad_norm": 0.5809178844187082, + "learning_rate": 2.8966765151060614e-06, + "loss": 0.3128, + "step": 19887 + }, + { + "epoch": 0.9316531596945706, + "grad_norm": 0.6562860551070365, + "learning_rate": 2.8964892663842437e-06, + "loss": 0.3368, + "step": 19888 + }, + { + "epoch": 0.931700004684499, + "grad_norm": 0.5988418024012108, + "learning_rate": 2.8963020153807294e-06, + "loss": 0.3237, + "step": 19889 + }, + { + "epoch": 0.9317468496744273, + "grad_norm": 0.5893844021910138, + "learning_rate": 2.896114762096597e-06, + "loss": 0.3038, + "step": 19890 + }, + { + "epoch": 0.9317936946643557, + "grad_norm": 0.5804925816717089, + "learning_rate": 2.8959275065329228e-06, + "loss": 0.3311, + "step": 19891 + }, + { + "epoch": 0.9318405396542839, + "grad_norm": 0.5512707130605862, + "learning_rate": 2.895740248690786e-06, + "loss": 0.3057, + "step": 19892 + }, + { + "epoch": 0.9318873846442123, + "grad_norm": 0.5617199198535837, + "learning_rate": 2.895552988571263e-06, + "loss": 0.3174, + "step": 19893 + }, + { + "epoch": 0.9319342296341406, + "grad_norm": 0.5588161404857401, + "learning_rate": 2.8953657261754326e-06, + "loss": 0.31, + "step": 19894 + }, + { + "epoch": 0.931981074624069, + "grad_norm": 0.6547628371577363, + "learning_rate": 2.8951784615043716e-06, + "loss": 0.3176, + "step": 19895 + }, + { + "epoch": 0.9320279196139973, + "grad_norm": 0.6102547302532285, + "learning_rate": 2.8949911945591576e-06, + "loss": 0.3341, + "step": 19896 + }, + { + "epoch": 0.9320747646039256, + "grad_norm": 0.5973425008985216, + "learning_rate": 2.894803925340869e-06, + "loss": 0.3175, + "step": 19897 + }, + { + "epoch": 0.9321216095938539, + "grad_norm": 0.5566376425077697, + "learning_rate": 2.8946166538505832e-06, + "loss": 0.3039, + "step": 19898 + }, + { + "epoch": 0.9321684545837823, + "grad_norm": 0.5862301653029722, + "learning_rate": 2.894429380089377e-06, + "loss": 0.3246, + "step": 19899 + }, + { + "epoch": 0.9322152995737106, + "grad_norm": 0.6714007309980813, + "learning_rate": 2.8942421040583297e-06, + "loss": 0.2968, + "step": 19900 + }, + { + "epoch": 0.9322621445636389, + "grad_norm": 0.5836075067882358, + "learning_rate": 2.894054825758518e-06, + "loss": 0.3083, + "step": 19901 + }, + { + "epoch": 0.9323089895535672, + "grad_norm": 0.5217489523128817, + "learning_rate": 2.8938675451910185e-06, + "loss": 0.3134, + "step": 19902 + }, + { + "epoch": 0.9323558345434956, + "grad_norm": 0.5343696261227986, + "learning_rate": 2.893680262356911e-06, + "loss": 0.2947, + "step": 19903 + }, + { + "epoch": 0.9324026795334239, + "grad_norm": 0.5960951446758718, + "learning_rate": 2.893492977257274e-06, + "loss": 0.3074, + "step": 19904 + }, + { + "epoch": 0.9324495245233523, + "grad_norm": 0.574089997809287, + "learning_rate": 2.8933056898931824e-06, + "loss": 0.28, + "step": 19905 + }, + { + "epoch": 0.9324963695132805, + "grad_norm": 0.5700918693741119, + "learning_rate": 2.8931184002657152e-06, + "loss": 0.3129, + "step": 19906 + }, + { + "epoch": 0.9325432145032089, + "grad_norm": 0.590653525841209, + "learning_rate": 2.892931108375951e-06, + "loss": 0.3021, + "step": 19907 + }, + { + "epoch": 0.9325900594931372, + "grad_norm": 0.6060264012681137, + "learning_rate": 2.892743814224967e-06, + "loss": 0.3051, + "step": 19908 + }, + { + "epoch": 0.9326369044830656, + "grad_norm": 0.6601097968762338, + "learning_rate": 2.892556517813841e-06, + "loss": 0.3595, + "step": 19909 + }, + { + "epoch": 0.9326837494729938, + "grad_norm": 0.6521749202526288, + "learning_rate": 2.8923692191436507e-06, + "loss": 0.3183, + "step": 19910 + }, + { + "epoch": 0.9327305944629222, + "grad_norm": 0.5501043716005406, + "learning_rate": 2.892181918215475e-06, + "loss": 0.2964, + "step": 19911 + }, + { + "epoch": 0.9327774394528505, + "grad_norm": 0.6164668523045895, + "learning_rate": 2.8919946150303895e-06, + "loss": 0.3395, + "step": 19912 + }, + { + "epoch": 0.9328242844427789, + "grad_norm": 0.6247180255317423, + "learning_rate": 2.8918073095894745e-06, + "loss": 0.3114, + "step": 19913 + }, + { + "epoch": 0.9328711294327072, + "grad_norm": 0.5792019569438553, + "learning_rate": 2.891620001893806e-06, + "loss": 0.3038, + "step": 19914 + }, + { + "epoch": 0.9329179744226355, + "grad_norm": 0.6349936131117312, + "learning_rate": 2.8914326919444643e-06, + "loss": 0.3189, + "step": 19915 + }, + { + "epoch": 0.9329648194125638, + "grad_norm": 0.6617557553236607, + "learning_rate": 2.891245379742525e-06, + "loss": 0.3295, + "step": 19916 + }, + { + "epoch": 0.9330116644024922, + "grad_norm": 0.5828866595283931, + "learning_rate": 2.891058065289067e-06, + "loss": 0.3219, + "step": 19917 + }, + { + "epoch": 0.9330585093924205, + "grad_norm": 0.5710683990993773, + "learning_rate": 2.890870748585168e-06, + "loss": 0.3236, + "step": 19918 + }, + { + "epoch": 0.9331053543823488, + "grad_norm": 0.6298492232436026, + "learning_rate": 2.8906834296319065e-06, + "loss": 0.3351, + "step": 19919 + }, + { + "epoch": 0.9331521993722771, + "grad_norm": 0.598064591858883, + "learning_rate": 2.890496108430359e-06, + "loss": 0.297, + "step": 19920 + }, + { + "epoch": 0.9331990443622055, + "grad_norm": 0.5865313023925756, + "learning_rate": 2.890308784981606e-06, + "loss": 0.3232, + "step": 19921 + }, + { + "epoch": 0.9332458893521338, + "grad_norm": 0.5777988861029304, + "learning_rate": 2.8901214592867233e-06, + "loss": 0.3051, + "step": 19922 + }, + { + "epoch": 0.9332927343420622, + "grad_norm": 0.5968901053970573, + "learning_rate": 2.889934131346789e-06, + "loss": 0.3002, + "step": 19923 + }, + { + "epoch": 0.9333395793319904, + "grad_norm": 0.5948899969708453, + "learning_rate": 2.889746801162882e-06, + "loss": 0.3259, + "step": 19924 + }, + { + "epoch": 0.9333864243219188, + "grad_norm": 0.5288774605844111, + "learning_rate": 2.8895594687360816e-06, + "loss": 0.2894, + "step": 19925 + }, + { + "epoch": 0.9334332693118471, + "grad_norm": 0.6886861182861167, + "learning_rate": 2.889372134067463e-06, + "loss": 0.3339, + "step": 19926 + }, + { + "epoch": 0.9334801143017755, + "grad_norm": 0.6215683293013631, + "learning_rate": 2.8891847971581055e-06, + "loss": 0.3453, + "step": 19927 + }, + { + "epoch": 0.9335269592917037, + "grad_norm": 0.5922164133821577, + "learning_rate": 2.888997458009088e-06, + "loss": 0.3311, + "step": 19928 + }, + { + "epoch": 0.9335738042816321, + "grad_norm": 0.5727086715453116, + "learning_rate": 2.8888101166214876e-06, + "loss": 0.3033, + "step": 19929 + }, + { + "epoch": 0.9336206492715604, + "grad_norm": 0.655993704713643, + "learning_rate": 2.8886227729963824e-06, + "loss": 0.3654, + "step": 19930 + }, + { + "epoch": 0.9336674942614888, + "grad_norm": 0.5915012080919698, + "learning_rate": 2.8884354271348515e-06, + "loss": 0.3174, + "step": 19931 + }, + { + "epoch": 0.9337143392514171, + "grad_norm": 0.6018262810529188, + "learning_rate": 2.8882480790379722e-06, + "loss": 0.3277, + "step": 19932 + }, + { + "epoch": 0.9337611842413454, + "grad_norm": 0.5745410757357674, + "learning_rate": 2.8880607287068225e-06, + "loss": 0.3201, + "step": 19933 + }, + { + "epoch": 0.9338080292312737, + "grad_norm": 0.5479567518734019, + "learning_rate": 2.887873376142481e-06, + "loss": 0.3009, + "step": 19934 + }, + { + "epoch": 0.9338548742212021, + "grad_norm": 0.5656350738117308, + "learning_rate": 2.8876860213460256e-06, + "loss": 0.3189, + "step": 19935 + }, + { + "epoch": 0.9339017192111304, + "grad_norm": 0.6221606568357289, + "learning_rate": 2.8874986643185345e-06, + "loss": 0.3094, + "step": 19936 + }, + { + "epoch": 0.9339485642010587, + "grad_norm": 0.6634496928069595, + "learning_rate": 2.8873113050610864e-06, + "loss": 0.3408, + "step": 19937 + }, + { + "epoch": 0.933995409190987, + "grad_norm": 0.633823278773052, + "learning_rate": 2.8871239435747584e-06, + "loss": 0.3118, + "step": 19938 + }, + { + "epoch": 0.9340422541809154, + "grad_norm": 0.6250726879895728, + "learning_rate": 2.8869365798606304e-06, + "loss": 0.3208, + "step": 19939 + }, + { + "epoch": 0.9340890991708437, + "grad_norm": 0.620288533852069, + "learning_rate": 2.886749213919779e-06, + "loss": 0.3158, + "step": 19940 + }, + { + "epoch": 0.9341359441607721, + "grad_norm": 0.6531436664267534, + "learning_rate": 2.886561845753283e-06, + "loss": 0.3643, + "step": 19941 + }, + { + "epoch": 0.9341827891507003, + "grad_norm": 0.5636051410428744, + "learning_rate": 2.8863744753622208e-06, + "loss": 0.2992, + "step": 19942 + }, + { + "epoch": 0.9342296341406287, + "grad_norm": 0.5956162439278939, + "learning_rate": 2.88618710274767e-06, + "loss": 0.322, + "step": 19943 + }, + { + "epoch": 0.934276479130557, + "grad_norm": 0.61414563515264, + "learning_rate": 2.8859997279107103e-06, + "loss": 0.3434, + "step": 19944 + }, + { + "epoch": 0.9343233241204854, + "grad_norm": 0.5800455971570562, + "learning_rate": 2.8858123508524193e-06, + "loss": 0.2884, + "step": 19945 + }, + { + "epoch": 0.9343701691104136, + "grad_norm": 0.5873972624577373, + "learning_rate": 2.885624971573875e-06, + "loss": 0.3109, + "step": 19946 + }, + { + "epoch": 0.934417014100342, + "grad_norm": 0.5757033217386656, + "learning_rate": 2.885437590076156e-06, + "loss": 0.3078, + "step": 19947 + }, + { + "epoch": 0.9344638590902703, + "grad_norm": 0.5581088947258733, + "learning_rate": 2.88525020636034e-06, + "loss": 0.2846, + "step": 19948 + }, + { + "epoch": 0.9345107040801987, + "grad_norm": 0.6215287373303273, + "learning_rate": 2.8850628204275068e-06, + "loss": 0.3364, + "step": 19949 + }, + { + "epoch": 0.934557549070127, + "grad_norm": 0.6560855352852776, + "learning_rate": 2.884875432278733e-06, + "loss": 0.3214, + "step": 19950 + }, + { + "epoch": 0.9346043940600552, + "grad_norm": 0.6276990927070281, + "learning_rate": 2.8846880419150978e-06, + "loss": 0.3336, + "step": 19951 + }, + { + "epoch": 0.9346512390499836, + "grad_norm": 0.6377562307217668, + "learning_rate": 2.8845006493376796e-06, + "loss": 0.3309, + "step": 19952 + }, + { + "epoch": 0.934698084039912, + "grad_norm": 0.5965089590307815, + "learning_rate": 2.884313254547558e-06, + "loss": 0.2945, + "step": 19953 + }, + { + "epoch": 0.9347449290298403, + "grad_norm": 0.5936701294648856, + "learning_rate": 2.884125857545809e-06, + "loss": 0.3071, + "step": 19954 + }, + { + "epoch": 0.9347917740197685, + "grad_norm": 0.6614808093415958, + "learning_rate": 2.883938458333513e-06, + "loss": 0.3285, + "step": 19955 + }, + { + "epoch": 0.9348386190096969, + "grad_norm": 0.5891720836196185, + "learning_rate": 2.8837510569117473e-06, + "loss": 0.3321, + "step": 19956 + }, + { + "epoch": 0.9348854639996252, + "grad_norm": 0.57917272906091, + "learning_rate": 2.883563653281591e-06, + "loss": 0.3154, + "step": 19957 + }, + { + "epoch": 0.9349323089895536, + "grad_norm": 0.6003179547861031, + "learning_rate": 2.883376247444121e-06, + "loss": 0.3283, + "step": 19958 + }, + { + "epoch": 0.934979153979482, + "grad_norm": 0.5737383525889297, + "learning_rate": 2.8831888394004188e-06, + "loss": 0.3128, + "step": 19959 + }, + { + "epoch": 0.9350259989694102, + "grad_norm": 0.5836738226920937, + "learning_rate": 2.88300142915156e-06, + "loss": 0.3119, + "step": 19960 + }, + { + "epoch": 0.9350728439593385, + "grad_norm": 0.5736992470472718, + "learning_rate": 2.8828140166986245e-06, + "loss": 0.3118, + "step": 19961 + }, + { + "epoch": 0.9351196889492669, + "grad_norm": 0.5599821641602772, + "learning_rate": 2.8826266020426908e-06, + "loss": 0.3125, + "step": 19962 + }, + { + "epoch": 0.9351665339391952, + "grad_norm": 0.5909072958713982, + "learning_rate": 2.8824391851848377e-06, + "loss": 0.3056, + "step": 19963 + }, + { + "epoch": 0.9352133789291235, + "grad_norm": 0.621596044708717, + "learning_rate": 2.882251766126142e-06, + "loss": 0.3223, + "step": 19964 + }, + { + "epoch": 0.9352602239190518, + "grad_norm": 0.5636942744049369, + "learning_rate": 2.882064344867684e-06, + "loss": 0.31, + "step": 19965 + }, + { + "epoch": 0.9353070689089802, + "grad_norm": 0.593283474261525, + "learning_rate": 2.881876921410541e-06, + "loss": 0.3481, + "step": 19966 + }, + { + "epoch": 0.9353539138989085, + "grad_norm": 0.6023090581651117, + "learning_rate": 2.881689495755794e-06, + "loss": 0.3325, + "step": 19967 + }, + { + "epoch": 0.9354007588888369, + "grad_norm": 0.604924509433823, + "learning_rate": 2.8815020679045192e-06, + "loss": 0.3237, + "step": 19968 + }, + { + "epoch": 0.9354476038787651, + "grad_norm": 0.5690056664260806, + "learning_rate": 2.881314637857795e-06, + "loss": 0.3082, + "step": 19969 + }, + { + "epoch": 0.9354944488686935, + "grad_norm": 0.5360156563239339, + "learning_rate": 2.8811272056167015e-06, + "loss": 0.2945, + "step": 19970 + }, + { + "epoch": 0.9355412938586218, + "grad_norm": 0.5427171939575929, + "learning_rate": 2.8809397711823173e-06, + "loss": 0.3033, + "step": 19971 + }, + { + "epoch": 0.9355881388485502, + "grad_norm": 0.5538284217956132, + "learning_rate": 2.88075233455572e-06, + "loss": 0.3223, + "step": 19972 + }, + { + "epoch": 0.9356349838384784, + "grad_norm": 0.5920670236908787, + "learning_rate": 2.880564895737988e-06, + "loss": 0.3119, + "step": 19973 + }, + { + "epoch": 0.9356818288284068, + "grad_norm": 0.5583148820596286, + "learning_rate": 2.8803774547302014e-06, + "loss": 0.3039, + "step": 19974 + }, + { + "epoch": 0.9357286738183351, + "grad_norm": 0.5676777064921595, + "learning_rate": 2.880190011533438e-06, + "loss": 0.3087, + "step": 19975 + }, + { + "epoch": 0.9357755188082635, + "grad_norm": 0.5865061011899878, + "learning_rate": 2.8800025661487764e-06, + "loss": 0.3281, + "step": 19976 + }, + { + "epoch": 0.9358223637981918, + "grad_norm": 0.6012237226521785, + "learning_rate": 2.879815118577296e-06, + "loss": 0.3406, + "step": 19977 + }, + { + "epoch": 0.9358692087881201, + "grad_norm": 0.6163117801418132, + "learning_rate": 2.879627668820075e-06, + "loss": 0.3267, + "step": 19978 + }, + { + "epoch": 0.9359160537780484, + "grad_norm": 0.6152010620408971, + "learning_rate": 2.879440216878192e-06, + "loss": 0.3038, + "step": 19979 + }, + { + "epoch": 0.9359628987679768, + "grad_norm": 0.5753678219203726, + "learning_rate": 2.879252762752726e-06, + "loss": 0.3221, + "step": 19980 + }, + { + "epoch": 0.9360097437579051, + "grad_norm": 0.5852510131654718, + "learning_rate": 2.879065306444756e-06, + "loss": 0.3202, + "step": 19981 + }, + { + "epoch": 0.9360565887478334, + "grad_norm": 0.5898293381513776, + "learning_rate": 2.87887784795536e-06, + "loss": 0.3351, + "step": 19982 + }, + { + "epoch": 0.9361034337377617, + "grad_norm": 0.6207380845288912, + "learning_rate": 2.878690387285617e-06, + "loss": 0.3076, + "step": 19983 + }, + { + "epoch": 0.9361502787276901, + "grad_norm": 0.5663358659975368, + "learning_rate": 2.8785029244366057e-06, + "loss": 0.2999, + "step": 19984 + }, + { + "epoch": 0.9361971237176184, + "grad_norm": 0.6103747327132605, + "learning_rate": 2.8783154594094065e-06, + "loss": 0.3239, + "step": 19985 + }, + { + "epoch": 0.9362439687075468, + "grad_norm": 0.583664674374496, + "learning_rate": 2.8781279922050962e-06, + "loss": 0.3264, + "step": 19986 + }, + { + "epoch": 0.936290813697475, + "grad_norm": 0.6150183345035082, + "learning_rate": 2.877940522824754e-06, + "loss": 0.3192, + "step": 19987 + }, + { + "epoch": 0.9363376586874034, + "grad_norm": 0.6617776812500796, + "learning_rate": 2.8777530512694597e-06, + "loss": 0.3377, + "step": 19988 + }, + { + "epoch": 0.9363845036773317, + "grad_norm": 0.6438487930159543, + "learning_rate": 2.8775655775402916e-06, + "loss": 0.3147, + "step": 19989 + }, + { + "epoch": 0.9364313486672601, + "grad_norm": 0.5901765752068803, + "learning_rate": 2.8773781016383277e-06, + "loss": 0.332, + "step": 19990 + }, + { + "epoch": 0.9364781936571883, + "grad_norm": 0.5743149293046208, + "learning_rate": 2.8771906235646484e-06, + "loss": 0.319, + "step": 19991 + }, + { + "epoch": 0.9365250386471167, + "grad_norm": 0.6042412544429628, + "learning_rate": 2.877003143320332e-06, + "loss": 0.2874, + "step": 19992 + }, + { + "epoch": 0.936571883637045, + "grad_norm": 0.6252831296149397, + "learning_rate": 2.8768156609064567e-06, + "loss": 0.3165, + "step": 19993 + }, + { + "epoch": 0.9366187286269734, + "grad_norm": 0.5643354311298083, + "learning_rate": 2.8766281763241018e-06, + "loss": 0.3186, + "step": 19994 + }, + { + "epoch": 0.9366655736169017, + "grad_norm": 0.5841628506635425, + "learning_rate": 2.8764406895743474e-06, + "loss": 0.3182, + "step": 19995 + }, + { + "epoch": 0.93671241860683, + "grad_norm": 0.5874729068601923, + "learning_rate": 2.8762532006582703e-06, + "loss": 0.3086, + "step": 19996 + }, + { + "epoch": 0.9367592635967583, + "grad_norm": 0.58034479836818, + "learning_rate": 2.8760657095769513e-06, + "loss": 0.2948, + "step": 19997 + }, + { + "epoch": 0.9368061085866867, + "grad_norm": 0.5585846675451865, + "learning_rate": 2.8758782163314686e-06, + "loss": 0.302, + "step": 19998 + }, + { + "epoch": 0.936852953576615, + "grad_norm": 0.5601221542224304, + "learning_rate": 2.8756907209229013e-06, + "loss": 0.3008, + "step": 19999 + }, + { + "epoch": 0.9368997985665433, + "grad_norm": 0.5702457292832357, + "learning_rate": 2.8755032233523278e-06, + "loss": 0.315, + "step": 20000 + }, + { + "epoch": 0.9369466435564716, + "grad_norm": 0.5927979707125425, + "learning_rate": 2.875315723620828e-06, + "loss": 0.314, + "step": 20001 + }, + { + "epoch": 0.9369934885464, + "grad_norm": 0.6201051616749649, + "learning_rate": 2.875128221729481e-06, + "loss": 0.3275, + "step": 20002 + }, + { + "epoch": 0.9370403335363283, + "grad_norm": 0.5710193329166505, + "learning_rate": 2.874940717679364e-06, + "loss": 0.3136, + "step": 20003 + }, + { + "epoch": 0.9370871785262567, + "grad_norm": 0.5688382192638936, + "learning_rate": 2.8747532114715587e-06, + "loss": 0.3321, + "step": 20004 + }, + { + "epoch": 0.9371340235161849, + "grad_norm": 0.5548399240667163, + "learning_rate": 2.8745657031071422e-06, + "loss": 0.3109, + "step": 20005 + }, + { + "epoch": 0.9371808685061133, + "grad_norm": 0.5501376135767637, + "learning_rate": 2.874378192587195e-06, + "loss": 0.3059, + "step": 20006 + }, + { + "epoch": 0.9372277134960416, + "grad_norm": 0.596645240041601, + "learning_rate": 2.874190679912795e-06, + "loss": 0.3225, + "step": 20007 + }, + { + "epoch": 0.93727455848597, + "grad_norm": 0.6241836788339893, + "learning_rate": 2.874003165085021e-06, + "loss": 0.3145, + "step": 20008 + }, + { + "epoch": 0.9373214034758982, + "grad_norm": 0.6185833747833709, + "learning_rate": 2.8738156481049532e-06, + "loss": 0.3094, + "step": 20009 + }, + { + "epoch": 0.9373682484658266, + "grad_norm": 0.5501075236370205, + "learning_rate": 2.8736281289736706e-06, + "loss": 0.301, + "step": 20010 + }, + { + "epoch": 0.9374150934557549, + "grad_norm": 0.6073932884067695, + "learning_rate": 2.8734406076922516e-06, + "loss": 0.323, + "step": 20011 + }, + { + "epoch": 0.9374619384456833, + "grad_norm": 0.6241398577028833, + "learning_rate": 2.8732530842617765e-06, + "loss": 0.3286, + "step": 20012 + }, + { + "epoch": 0.9375087834356116, + "grad_norm": 0.5775695665182752, + "learning_rate": 2.873065558683323e-06, + "loss": 0.3138, + "step": 20013 + }, + { + "epoch": 0.9375556284255399, + "grad_norm": 0.6155121969048106, + "learning_rate": 2.872878030957971e-06, + "loss": 0.3084, + "step": 20014 + }, + { + "epoch": 0.9376024734154682, + "grad_norm": 0.5361870997417671, + "learning_rate": 2.8726905010868e-06, + "loss": 0.3172, + "step": 20015 + }, + { + "epoch": 0.9376493184053966, + "grad_norm": 0.5642922741459382, + "learning_rate": 2.8725029690708887e-06, + "loss": 0.321, + "step": 20016 + }, + { + "epoch": 0.9376961633953249, + "grad_norm": 0.6310092859867102, + "learning_rate": 2.872315434911317e-06, + "loss": 0.3245, + "step": 20017 + }, + { + "epoch": 0.9377430083852532, + "grad_norm": 0.5865455250508604, + "learning_rate": 2.8721278986091626e-06, + "loss": 0.3131, + "step": 20018 + }, + { + "epoch": 0.9377898533751815, + "grad_norm": 0.567379930880065, + "learning_rate": 2.871940360165506e-06, + "loss": 0.3004, + "step": 20019 + }, + { + "epoch": 0.9378366983651099, + "grad_norm": 0.5353590246444072, + "learning_rate": 2.8717528195814265e-06, + "loss": 0.318, + "step": 20020 + }, + { + "epoch": 0.9378835433550382, + "grad_norm": 0.6589370018859013, + "learning_rate": 2.871565276858003e-06, + "loss": 0.3162, + "step": 20021 + }, + { + "epoch": 0.9379303883449666, + "grad_norm": 0.5705682207567597, + "learning_rate": 2.871377731996313e-06, + "loss": 0.3311, + "step": 20022 + }, + { + "epoch": 0.9379772333348948, + "grad_norm": 0.6353495513852037, + "learning_rate": 2.87119018499744e-06, + "loss": 0.323, + "step": 20023 + }, + { + "epoch": 0.9380240783248232, + "grad_norm": 0.5755757253630966, + "learning_rate": 2.871002635862459e-06, + "loss": 0.2873, + "step": 20024 + }, + { + "epoch": 0.9380709233147515, + "grad_norm": 0.5685776011016522, + "learning_rate": 2.870815084592452e-06, + "loss": 0.3017, + "step": 20025 + }, + { + "epoch": 0.9381177683046799, + "grad_norm": 0.5973375083271154, + "learning_rate": 2.8706275311884967e-06, + "loss": 0.3126, + "step": 20026 + }, + { + "epoch": 0.9381646132946081, + "grad_norm": 0.5772525147404752, + "learning_rate": 2.8704399756516745e-06, + "loss": 0.3184, + "step": 20027 + }, + { + "epoch": 0.9382114582845364, + "grad_norm": 0.6231063843900646, + "learning_rate": 2.8702524179830617e-06, + "loss": 0.3304, + "step": 20028 + }, + { + "epoch": 0.9382583032744648, + "grad_norm": 0.6540149048244921, + "learning_rate": 2.8700648581837406e-06, + "loss": 0.3405, + "step": 20029 + }, + { + "epoch": 0.9383051482643932, + "grad_norm": 0.5980470941507895, + "learning_rate": 2.8698772962547884e-06, + "loss": 0.2977, + "step": 20030 + }, + { + "epoch": 0.9383519932543215, + "grad_norm": 0.557241572253655, + "learning_rate": 2.869689732197285e-06, + "loss": 0.2983, + "step": 20031 + }, + { + "epoch": 0.9383988382442497, + "grad_norm": 0.6121253618891772, + "learning_rate": 2.8695021660123114e-06, + "loss": 0.3184, + "step": 20032 + }, + { + "epoch": 0.9384456832341781, + "grad_norm": 0.5431501637956047, + "learning_rate": 2.8693145977009453e-06, + "loss": 0.3049, + "step": 20033 + }, + { + "epoch": 0.9384925282241064, + "grad_norm": 0.5930199251363621, + "learning_rate": 2.869127027264265e-06, + "loss": 0.2998, + "step": 20034 + }, + { + "epoch": 0.9385393732140348, + "grad_norm": 0.548650216978175, + "learning_rate": 2.8689394547033532e-06, + "loss": 0.313, + "step": 20035 + }, + { + "epoch": 0.938586218203963, + "grad_norm": 0.5967713040712745, + "learning_rate": 2.8687518800192864e-06, + "loss": 0.3279, + "step": 20036 + }, + { + "epoch": 0.9386330631938914, + "grad_norm": 0.6337795475364337, + "learning_rate": 2.8685643032131463e-06, + "loss": 0.3218, + "step": 20037 + }, + { + "epoch": 0.9386799081838197, + "grad_norm": 0.6492278672256535, + "learning_rate": 2.868376724286011e-06, + "loss": 0.3459, + "step": 20038 + }, + { + "epoch": 0.9387267531737481, + "grad_norm": 0.5131071363061541, + "learning_rate": 2.86818914323896e-06, + "loss": 0.2999, + "step": 20039 + }, + { + "epoch": 0.9387735981636764, + "grad_norm": 0.6093606547079844, + "learning_rate": 2.8680015600730727e-06, + "loss": 0.327, + "step": 20040 + }, + { + "epoch": 0.9388204431536047, + "grad_norm": 0.6111746141274373, + "learning_rate": 2.8678139747894296e-06, + "loss": 0.3008, + "step": 20041 + }, + { + "epoch": 0.938867288143533, + "grad_norm": 0.6362155260805287, + "learning_rate": 2.8676263873891087e-06, + "loss": 0.3121, + "step": 20042 + }, + { + "epoch": 0.9389141331334614, + "grad_norm": 0.6156772541530031, + "learning_rate": 2.8674387978731905e-06, + "loss": 0.32, + "step": 20043 + }, + { + "epoch": 0.9389609781233897, + "grad_norm": 0.6563649786761526, + "learning_rate": 2.8672512062427553e-06, + "loss": 0.3376, + "step": 20044 + }, + { + "epoch": 0.939007823113318, + "grad_norm": 0.6088149701190712, + "learning_rate": 2.867063612498881e-06, + "loss": 0.3161, + "step": 20045 + }, + { + "epoch": 0.9390546681032463, + "grad_norm": 0.5792467832352918, + "learning_rate": 2.8668760166426476e-06, + "loss": 0.2893, + "step": 20046 + }, + { + "epoch": 0.9391015130931747, + "grad_norm": 0.6119940117158803, + "learning_rate": 2.8666884186751354e-06, + "loss": 0.3367, + "step": 20047 + }, + { + "epoch": 0.939148358083103, + "grad_norm": 0.5597243112169843, + "learning_rate": 2.8665008185974236e-06, + "loss": 0.3022, + "step": 20048 + }, + { + "epoch": 0.9391952030730314, + "grad_norm": 0.5766510718335575, + "learning_rate": 2.8663132164105912e-06, + "loss": 0.3136, + "step": 20049 + }, + { + "epoch": 0.9392420480629596, + "grad_norm": 0.630989388778704, + "learning_rate": 2.866125612115719e-06, + "loss": 0.3051, + "step": 20050 + }, + { + "epoch": 0.939288893052888, + "grad_norm": 0.6247372508458987, + "learning_rate": 2.8659380057138854e-06, + "loss": 0.3199, + "step": 20051 + }, + { + "epoch": 0.9393357380428163, + "grad_norm": 0.5660123338877319, + "learning_rate": 2.8657503972061705e-06, + "loss": 0.314, + "step": 20052 + }, + { + "epoch": 0.9393825830327447, + "grad_norm": 0.7916447903079554, + "learning_rate": 2.865562786593654e-06, + "loss": 0.3018, + "step": 20053 + }, + { + "epoch": 0.9394294280226729, + "grad_norm": 0.6084982618831559, + "learning_rate": 2.8653751738774153e-06, + "loss": 0.3152, + "step": 20054 + }, + { + "epoch": 0.9394762730126013, + "grad_norm": 0.6211630689929247, + "learning_rate": 2.8651875590585355e-06, + "loss": 0.3376, + "step": 20055 + }, + { + "epoch": 0.9395231180025296, + "grad_norm": 0.615865192155834, + "learning_rate": 2.8649999421380923e-06, + "loss": 0.3238, + "step": 20056 + }, + { + "epoch": 0.939569962992458, + "grad_norm": 0.5585387240215373, + "learning_rate": 2.864812323117166e-06, + "loss": 0.3076, + "step": 20057 + }, + { + "epoch": 0.9396168079823863, + "grad_norm": 0.6170727385784371, + "learning_rate": 2.8646247019968364e-06, + "loss": 0.3134, + "step": 20058 + }, + { + "epoch": 0.9396636529723146, + "grad_norm": 0.5418952523579899, + "learning_rate": 2.864437078778184e-06, + "loss": 0.2997, + "step": 20059 + }, + { + "epoch": 0.9397104979622429, + "grad_norm": 0.5776121785099578, + "learning_rate": 2.864249453462287e-06, + "loss": 0.3159, + "step": 20060 + }, + { + "epoch": 0.9397573429521713, + "grad_norm": 0.5440060114345021, + "learning_rate": 2.8640618260502266e-06, + "loss": 0.2949, + "step": 20061 + }, + { + "epoch": 0.9398041879420996, + "grad_norm": 0.6117290055047463, + "learning_rate": 2.8638741965430817e-06, + "loss": 0.3196, + "step": 20062 + }, + { + "epoch": 0.9398510329320279, + "grad_norm": 0.6612764888369406, + "learning_rate": 2.863686564941932e-06, + "loss": 0.3391, + "step": 20063 + }, + { + "epoch": 0.9398978779219562, + "grad_norm": 0.6287065350680285, + "learning_rate": 2.8634989312478573e-06, + "loss": 0.2912, + "step": 20064 + }, + { + "epoch": 0.9399447229118846, + "grad_norm": 0.5965018159754004, + "learning_rate": 2.863311295461939e-06, + "loss": 0.3194, + "step": 20065 + }, + { + "epoch": 0.9399915679018129, + "grad_norm": 0.5941599622076779, + "learning_rate": 2.863123657585254e-06, + "loss": 0.3021, + "step": 20066 + }, + { + "epoch": 0.9400384128917413, + "grad_norm": 0.5504394637916524, + "learning_rate": 2.8629360176188836e-06, + "loss": 0.3156, + "step": 20067 + }, + { + "epoch": 0.9400852578816695, + "grad_norm": 0.6074671217096564, + "learning_rate": 2.8627483755639083e-06, + "loss": 0.3239, + "step": 20068 + }, + { + "epoch": 0.9401321028715979, + "grad_norm": 0.609025124024236, + "learning_rate": 2.8625607314214077e-06, + "loss": 0.298, + "step": 20069 + }, + { + "epoch": 0.9401789478615262, + "grad_norm": 0.5677361839826789, + "learning_rate": 2.8623730851924603e-06, + "loss": 0.3133, + "step": 20070 + }, + { + "epoch": 0.9402257928514546, + "grad_norm": 0.6323197085532256, + "learning_rate": 2.862185436878147e-06, + "loss": 0.3267, + "step": 20071 + }, + { + "epoch": 0.9402726378413828, + "grad_norm": 0.5375528312147131, + "learning_rate": 2.8619977864795482e-06, + "loss": 0.2872, + "step": 20072 + }, + { + "epoch": 0.9403194828313112, + "grad_norm": 0.5509637772372339, + "learning_rate": 2.8618101339977422e-06, + "loss": 0.3257, + "step": 20073 + }, + { + "epoch": 0.9403663278212395, + "grad_norm": 0.6624885022018144, + "learning_rate": 2.861622479433811e-06, + "loss": 0.3472, + "step": 20074 + }, + { + "epoch": 0.9404131728111679, + "grad_norm": 0.6091673999070656, + "learning_rate": 2.861434822788832e-06, + "loss": 0.3262, + "step": 20075 + }, + { + "epoch": 0.9404600178010962, + "grad_norm": 0.5843411370997639, + "learning_rate": 2.8612471640638873e-06, + "loss": 0.3173, + "step": 20076 + }, + { + "epoch": 0.9405068627910245, + "grad_norm": 0.5918580959744116, + "learning_rate": 2.861059503260056e-06, + "loss": 0.3172, + "step": 20077 + }, + { + "epoch": 0.9405537077809528, + "grad_norm": 0.5973051720661823, + "learning_rate": 2.8608718403784174e-06, + "loss": 0.3084, + "step": 20078 + }, + { + "epoch": 0.9406005527708812, + "grad_norm": 0.576493050140911, + "learning_rate": 2.860684175420053e-06, + "loss": 0.3061, + "step": 20079 + }, + { + "epoch": 0.9406473977608095, + "grad_norm": 0.5706044219030464, + "learning_rate": 2.8604965083860404e-06, + "loss": 0.3114, + "step": 20080 + }, + { + "epoch": 0.9406942427507378, + "grad_norm": 0.5917208220911281, + "learning_rate": 2.860308839277462e-06, + "loss": 0.3206, + "step": 20081 + }, + { + "epoch": 0.9407410877406661, + "grad_norm": 0.6017129801008194, + "learning_rate": 2.8601211680953967e-06, + "loss": 0.3082, + "step": 20082 + }, + { + "epoch": 0.9407879327305945, + "grad_norm": 0.5889395711143992, + "learning_rate": 2.8599334948409242e-06, + "loss": 0.3191, + "step": 20083 + }, + { + "epoch": 0.9408347777205228, + "grad_norm": 0.6022968230210856, + "learning_rate": 2.859745819515126e-06, + "loss": 0.3174, + "step": 20084 + }, + { + "epoch": 0.9408816227104512, + "grad_norm": 0.6121415372089858, + "learning_rate": 2.8595581421190794e-06, + "loss": 0.3321, + "step": 20085 + }, + { + "epoch": 0.9409284677003794, + "grad_norm": 0.5711443283539198, + "learning_rate": 2.859370462653867e-06, + "loss": 0.318, + "step": 20086 + }, + { + "epoch": 0.9409753126903078, + "grad_norm": 0.5953150168392728, + "learning_rate": 2.859182781120568e-06, + "loss": 0.3078, + "step": 20087 + }, + { + "epoch": 0.9410221576802361, + "grad_norm": 0.5558252363417536, + "learning_rate": 2.858995097520262e-06, + "loss": 0.2952, + "step": 20088 + }, + { + "epoch": 0.9410690026701645, + "grad_norm": 0.5823233747067723, + "learning_rate": 2.85880741185403e-06, + "loss": 0.3244, + "step": 20089 + }, + { + "epoch": 0.9411158476600927, + "grad_norm": 0.5966144126454747, + "learning_rate": 2.8586197241229515e-06, + "loss": 0.3185, + "step": 20090 + }, + { + "epoch": 0.9411626926500211, + "grad_norm": 0.6190324855938387, + "learning_rate": 2.858432034328106e-06, + "loss": 0.3319, + "step": 20091 + }, + { + "epoch": 0.9412095376399494, + "grad_norm": 0.6183368740383401, + "learning_rate": 2.858244342470574e-06, + "loss": 0.3208, + "step": 20092 + }, + { + "epoch": 0.9412563826298778, + "grad_norm": 0.601031472699299, + "learning_rate": 2.858056648551437e-06, + "loss": 0.325, + "step": 20093 + }, + { + "epoch": 0.9413032276198061, + "grad_norm": 0.6126207599120448, + "learning_rate": 2.857868952571773e-06, + "loss": 0.32, + "step": 20094 + }, + { + "epoch": 0.9413500726097344, + "grad_norm": 0.6159104257948846, + "learning_rate": 2.857681254532663e-06, + "loss": 0.3221, + "step": 20095 + }, + { + "epoch": 0.9413969175996627, + "grad_norm": 0.6149224281617611, + "learning_rate": 2.857493554435188e-06, + "loss": 0.3236, + "step": 20096 + }, + { + "epoch": 0.9414437625895911, + "grad_norm": 0.5273160555967117, + "learning_rate": 2.8573058522804274e-06, + "loss": 0.2888, + "step": 20097 + }, + { + "epoch": 0.9414906075795194, + "grad_norm": 0.6172174357880235, + "learning_rate": 2.857118148069461e-06, + "loss": 0.3108, + "step": 20098 + }, + { + "epoch": 0.9415374525694477, + "grad_norm": 0.646110869055833, + "learning_rate": 2.85693044180337e-06, + "loss": 0.3288, + "step": 20099 + }, + { + "epoch": 0.941584297559376, + "grad_norm": 0.6110156879069393, + "learning_rate": 2.8567427334832336e-06, + "loss": 0.307, + "step": 20100 + }, + { + "epoch": 0.9416311425493044, + "grad_norm": 0.5821793602779954, + "learning_rate": 2.8565550231101324e-06, + "loss": 0.3203, + "step": 20101 + }, + { + "epoch": 0.9416779875392327, + "grad_norm": 0.6073135492510274, + "learning_rate": 2.856367310685147e-06, + "loss": 0.3304, + "step": 20102 + }, + { + "epoch": 0.9417248325291611, + "grad_norm": 0.5487525499563041, + "learning_rate": 2.856179596209357e-06, + "loss": 0.2876, + "step": 20103 + }, + { + "epoch": 0.9417716775190893, + "grad_norm": 0.5588597126835426, + "learning_rate": 2.8559918796838424e-06, + "loss": 0.3038, + "step": 20104 + }, + { + "epoch": 0.9418185225090177, + "grad_norm": 0.6285984621335797, + "learning_rate": 2.8558041611096853e-06, + "loss": 0.3086, + "step": 20105 + }, + { + "epoch": 0.941865367498946, + "grad_norm": 0.6299739070760563, + "learning_rate": 2.855616440487963e-06, + "loss": 0.3117, + "step": 20106 + }, + { + "epoch": 0.9419122124888744, + "grad_norm": 0.6136364376775109, + "learning_rate": 2.8554287178197594e-06, + "loss": 0.3166, + "step": 20107 + }, + { + "epoch": 0.9419590574788026, + "grad_norm": 0.5822291106358196, + "learning_rate": 2.855240993106152e-06, + "loss": 0.3024, + "step": 20108 + }, + { + "epoch": 0.942005902468731, + "grad_norm": 0.5834267567248426, + "learning_rate": 2.8550532663482213e-06, + "loss": 0.3067, + "step": 20109 + }, + { + "epoch": 0.9420527474586593, + "grad_norm": 0.644072522693559, + "learning_rate": 2.8548655375470492e-06, + "loss": 0.3211, + "step": 20110 + }, + { + "epoch": 0.9420995924485877, + "grad_norm": 0.5761472365876182, + "learning_rate": 2.854677806703715e-06, + "loss": 0.3281, + "step": 20111 + }, + { + "epoch": 0.942146437438516, + "grad_norm": 0.5635222062088698, + "learning_rate": 2.854490073819299e-06, + "loss": 0.3055, + "step": 20112 + }, + { + "epoch": 0.9421932824284442, + "grad_norm": 0.5505252794391108, + "learning_rate": 2.854302338894881e-06, + "loss": 0.3073, + "step": 20113 + }, + { + "epoch": 0.9422401274183726, + "grad_norm": 0.6486967113813829, + "learning_rate": 2.8541146019315434e-06, + "loss": 0.3262, + "step": 20114 + }, + { + "epoch": 0.942286972408301, + "grad_norm": 0.619049997287001, + "learning_rate": 2.853926862930364e-06, + "loss": 0.3243, + "step": 20115 + }, + { + "epoch": 0.9423338173982293, + "grad_norm": 0.5835968225778773, + "learning_rate": 2.853739121892425e-06, + "loss": 0.3279, + "step": 20116 + }, + { + "epoch": 0.9423806623881575, + "grad_norm": 0.603216359263515, + "learning_rate": 2.853551378818807e-06, + "loss": 0.3343, + "step": 20117 + }, + { + "epoch": 0.9424275073780859, + "grad_norm": 0.5704101199704974, + "learning_rate": 2.8533636337105893e-06, + "loss": 0.303, + "step": 20118 + }, + { + "epoch": 0.9424743523680142, + "grad_norm": 0.5784104846238234, + "learning_rate": 2.853175886568852e-06, + "loss": 0.3015, + "step": 20119 + }, + { + "epoch": 0.9425211973579426, + "grad_norm": 0.5820879874100043, + "learning_rate": 2.852988137394677e-06, + "loss": 0.2938, + "step": 20120 + }, + { + "epoch": 0.942568042347871, + "grad_norm": 0.5642269878277792, + "learning_rate": 2.8528003861891433e-06, + "loss": 0.281, + "step": 20121 + }, + { + "epoch": 0.9426148873377992, + "grad_norm": 0.5944461824341688, + "learning_rate": 2.8526126329533323e-06, + "loss": 0.3297, + "step": 20122 + }, + { + "epoch": 0.9426617323277275, + "grad_norm": 0.5569290657650854, + "learning_rate": 2.8524248776883247e-06, + "loss": 0.323, + "step": 20123 + }, + { + "epoch": 0.9427085773176559, + "grad_norm": 0.550666638955399, + "learning_rate": 2.8522371203951997e-06, + "loss": 0.3122, + "step": 20124 + }, + { + "epoch": 0.9427554223075842, + "grad_norm": 0.5777475589440955, + "learning_rate": 2.8520493610750393e-06, + "loss": 0.3142, + "step": 20125 + }, + { + "epoch": 0.9428022672975125, + "grad_norm": 0.647526644564679, + "learning_rate": 2.8518615997289234e-06, + "loss": 0.3363, + "step": 20126 + }, + { + "epoch": 0.9428491122874408, + "grad_norm": 0.5605250026873696, + "learning_rate": 2.8516738363579317e-06, + "loss": 0.3062, + "step": 20127 + }, + { + "epoch": 0.9428959572773692, + "grad_norm": 0.6255150834902131, + "learning_rate": 2.8514860709631457e-06, + "loss": 0.3356, + "step": 20128 + }, + { + "epoch": 0.9429428022672975, + "grad_norm": 0.6090215094949176, + "learning_rate": 2.8512983035456463e-06, + "loss": 0.3096, + "step": 20129 + }, + { + "epoch": 0.9429896472572259, + "grad_norm": 0.5590204970854135, + "learning_rate": 2.8511105341065127e-06, + "loss": 0.3134, + "step": 20130 + }, + { + "epoch": 0.9430364922471541, + "grad_norm": 0.594635107440921, + "learning_rate": 2.850922762646827e-06, + "loss": 0.3239, + "step": 20131 + }, + { + "epoch": 0.9430833372370825, + "grad_norm": 0.6011146874881855, + "learning_rate": 2.8507349891676683e-06, + "loss": 0.3129, + "step": 20132 + }, + { + "epoch": 0.9431301822270108, + "grad_norm": 0.5561736962448355, + "learning_rate": 2.850547213670118e-06, + "loss": 0.2992, + "step": 20133 + }, + { + "epoch": 0.9431770272169392, + "grad_norm": 0.5986166727154026, + "learning_rate": 2.8503594361552566e-06, + "loss": 0.329, + "step": 20134 + }, + { + "epoch": 0.9432238722068674, + "grad_norm": 0.6385834255685737, + "learning_rate": 2.8501716566241654e-06, + "loss": 0.3406, + "step": 20135 + }, + { + "epoch": 0.9432707171967958, + "grad_norm": 0.6102682896694217, + "learning_rate": 2.8499838750779235e-06, + "loss": 0.3041, + "step": 20136 + }, + { + "epoch": 0.9433175621867241, + "grad_norm": 0.5584843280032538, + "learning_rate": 2.8497960915176122e-06, + "loss": 0.299, + "step": 20137 + }, + { + "epoch": 0.9433644071766525, + "grad_norm": 0.5467136367941497, + "learning_rate": 2.849608305944313e-06, + "loss": 0.3169, + "step": 20138 + }, + { + "epoch": 0.9434112521665808, + "grad_norm": 0.5735325997399034, + "learning_rate": 2.8494205183591054e-06, + "loss": 0.3191, + "step": 20139 + }, + { + "epoch": 0.9434580971565091, + "grad_norm": 0.5510913366768729, + "learning_rate": 2.8492327287630707e-06, + "loss": 0.308, + "step": 20140 + }, + { + "epoch": 0.9435049421464374, + "grad_norm": 0.631761836786217, + "learning_rate": 2.84904493715729e-06, + "loss": 0.3096, + "step": 20141 + }, + { + "epoch": 0.9435517871363658, + "grad_norm": 0.6139548114953913, + "learning_rate": 2.848857143542843e-06, + "loss": 0.3233, + "step": 20142 + }, + { + "epoch": 0.9435986321262941, + "grad_norm": 0.618126760561465, + "learning_rate": 2.8486693479208106e-06, + "loss": 0.3256, + "step": 20143 + }, + { + "epoch": 0.9436454771162224, + "grad_norm": 0.5819999076154767, + "learning_rate": 2.8484815502922735e-06, + "loss": 0.3112, + "step": 20144 + }, + { + "epoch": 0.9436923221061507, + "grad_norm": 0.5528057191878872, + "learning_rate": 2.848293750658313e-06, + "loss": 0.3, + "step": 20145 + }, + { + "epoch": 0.9437391670960791, + "grad_norm": 0.5942073033563082, + "learning_rate": 2.8481059490200096e-06, + "loss": 0.3032, + "step": 20146 + }, + { + "epoch": 0.9437860120860074, + "grad_norm": 0.5875039177143001, + "learning_rate": 2.8479181453784437e-06, + "loss": 0.3091, + "step": 20147 + }, + { + "epoch": 0.9438328570759358, + "grad_norm": 0.5944838639168281, + "learning_rate": 2.847730339734697e-06, + "loss": 0.3206, + "step": 20148 + }, + { + "epoch": 0.943879702065864, + "grad_norm": 0.5964062887582473, + "learning_rate": 2.8475425320898486e-06, + "loss": 0.3297, + "step": 20149 + }, + { + "epoch": 0.9439265470557924, + "grad_norm": 0.5946019177507703, + "learning_rate": 2.8473547224449806e-06, + "loss": 0.3309, + "step": 20150 + }, + { + "epoch": 0.9439733920457207, + "grad_norm": 0.5776449500177099, + "learning_rate": 2.8471669108011742e-06, + "loss": 0.3171, + "step": 20151 + }, + { + "epoch": 0.9440202370356491, + "grad_norm": 0.5892583189688008, + "learning_rate": 2.846979097159509e-06, + "loss": 0.313, + "step": 20152 + }, + { + "epoch": 0.9440670820255773, + "grad_norm": 0.6036619355062482, + "learning_rate": 2.846791281521066e-06, + "loss": 0.302, + "step": 20153 + }, + { + "epoch": 0.9441139270155057, + "grad_norm": 0.5661601406921624, + "learning_rate": 2.8466034638869265e-06, + "loss": 0.3081, + "step": 20154 + }, + { + "epoch": 0.944160772005434, + "grad_norm": 0.6071329565714948, + "learning_rate": 2.846415644258171e-06, + "loss": 0.3208, + "step": 20155 + }, + { + "epoch": 0.9442076169953624, + "grad_norm": 0.6806148066410017, + "learning_rate": 2.8462278226358806e-06, + "loss": 0.3575, + "step": 20156 + }, + { + "epoch": 0.9442544619852907, + "grad_norm": 0.62194123646164, + "learning_rate": 2.8460399990211367e-06, + "loss": 0.3282, + "step": 20157 + }, + { + "epoch": 0.944301306975219, + "grad_norm": 0.640131524846857, + "learning_rate": 2.8458521734150192e-06, + "loss": 0.3542, + "step": 20158 + }, + { + "epoch": 0.9443481519651473, + "grad_norm": 0.5330740989913894, + "learning_rate": 2.8456643458186093e-06, + "loss": 0.3147, + "step": 20159 + }, + { + "epoch": 0.9443949969550757, + "grad_norm": 0.5778509865649286, + "learning_rate": 2.8454765162329883e-06, + "loss": 0.3301, + "step": 20160 + }, + { + "epoch": 0.944441841945004, + "grad_norm": 0.6136446480209972, + "learning_rate": 2.8452886846592357e-06, + "loss": 0.3116, + "step": 20161 + }, + { + "epoch": 0.9444886869349323, + "grad_norm": 0.5576105709622695, + "learning_rate": 2.8451008510984344e-06, + "loss": 0.2818, + "step": 20162 + }, + { + "epoch": 0.9445355319248606, + "grad_norm": 0.5868299690539196, + "learning_rate": 2.8449130155516653e-06, + "loss": 0.3156, + "step": 20163 + }, + { + "epoch": 0.944582376914789, + "grad_norm": 0.5461275393836174, + "learning_rate": 2.844725178020007e-06, + "loss": 0.3092, + "step": 20164 + }, + { + "epoch": 0.9446292219047173, + "grad_norm": 0.569058586050927, + "learning_rate": 2.844537338504542e-06, + "loss": 0.3227, + "step": 20165 + }, + { + "epoch": 0.9446760668946457, + "grad_norm": 0.5673250710473483, + "learning_rate": 2.844349497006352e-06, + "loss": 0.3067, + "step": 20166 + }, + { + "epoch": 0.9447229118845739, + "grad_norm": 0.6055622797091659, + "learning_rate": 2.844161653526517e-06, + "loss": 0.3137, + "step": 20167 + }, + { + "epoch": 0.9447697568745023, + "grad_norm": 0.6042133244136377, + "learning_rate": 2.8439738080661178e-06, + "loss": 0.3259, + "step": 20168 + }, + { + "epoch": 0.9448166018644306, + "grad_norm": 0.5617259687386066, + "learning_rate": 2.8437859606262365e-06, + "loss": 0.2948, + "step": 20169 + }, + { + "epoch": 0.944863446854359, + "grad_norm": 0.6140884973744282, + "learning_rate": 2.843598111207953e-06, + "loss": 0.3351, + "step": 20170 + }, + { + "epoch": 0.9449102918442872, + "grad_norm": 0.5731669256643313, + "learning_rate": 2.843410259812348e-06, + "loss": 0.2982, + "step": 20171 + }, + { + "epoch": 0.9449571368342156, + "grad_norm": 0.5659375392068443, + "learning_rate": 2.8432224064405045e-06, + "loss": 0.3024, + "step": 20172 + }, + { + "epoch": 0.9450039818241439, + "grad_norm": 0.5753243289954961, + "learning_rate": 2.843034551093502e-06, + "loss": 0.3222, + "step": 20173 + }, + { + "epoch": 0.9450508268140723, + "grad_norm": 0.554900674153035, + "learning_rate": 2.842846693772421e-06, + "loss": 0.2906, + "step": 20174 + }, + { + "epoch": 0.9450976718040006, + "grad_norm": 0.6173401570372377, + "learning_rate": 2.8426588344783444e-06, + "loss": 0.3093, + "step": 20175 + }, + { + "epoch": 0.9451445167939289, + "grad_norm": 0.6094148361413397, + "learning_rate": 2.842470973212352e-06, + "loss": 0.3241, + "step": 20176 + }, + { + "epoch": 0.9451913617838572, + "grad_norm": 0.6577633326710858, + "learning_rate": 2.8422831099755256e-06, + "loss": 0.3414, + "step": 20177 + }, + { + "epoch": 0.9452382067737856, + "grad_norm": 0.6132599592679079, + "learning_rate": 2.8420952447689454e-06, + "loss": 0.3216, + "step": 20178 + }, + { + "epoch": 0.9452850517637139, + "grad_norm": 0.561408269285325, + "learning_rate": 2.841907377593693e-06, + "loss": 0.3051, + "step": 20179 + }, + { + "epoch": 0.9453318967536422, + "grad_norm": 0.5842889572335794, + "learning_rate": 2.8417195084508497e-06, + "loss": 0.2971, + "step": 20180 + }, + { + "epoch": 0.9453787417435705, + "grad_norm": 0.5446428934603771, + "learning_rate": 2.841531637341497e-06, + "loss": 0.298, + "step": 20181 + }, + { + "epoch": 0.9454255867334989, + "grad_norm": 0.5033869617987048, + "learning_rate": 2.841343764266715e-06, + "loss": 0.2989, + "step": 20182 + }, + { + "epoch": 0.9454724317234272, + "grad_norm": 0.5904597981297944, + "learning_rate": 2.8411558892275854e-06, + "loss": 0.3064, + "step": 20183 + }, + { + "epoch": 0.9455192767133556, + "grad_norm": 0.5986409089113859, + "learning_rate": 2.8409680122251905e-06, + "loss": 0.3362, + "step": 20184 + }, + { + "epoch": 0.9455661217032838, + "grad_norm": 0.6107695067929155, + "learning_rate": 2.840780133260609e-06, + "loss": 0.3275, + "step": 20185 + }, + { + "epoch": 0.9456129666932122, + "grad_norm": 0.6017886009838422, + "learning_rate": 2.8405922523349234e-06, + "loss": 0.315, + "step": 20186 + }, + { + "epoch": 0.9456598116831405, + "grad_norm": 0.5625770124631113, + "learning_rate": 2.8404043694492163e-06, + "loss": 0.3062, + "step": 20187 + }, + { + "epoch": 0.9457066566730689, + "grad_norm": 0.5731758495802943, + "learning_rate": 2.8402164846045664e-06, + "loss": 0.3187, + "step": 20188 + }, + { + "epoch": 0.9457535016629971, + "grad_norm": 0.5567543596060569, + "learning_rate": 2.8400285978020566e-06, + "loss": 0.3133, + "step": 20189 + }, + { + "epoch": 0.9458003466529254, + "grad_norm": 0.6299490083986374, + "learning_rate": 2.839840709042768e-06, + "loss": 0.3339, + "step": 20190 + }, + { + "epoch": 0.9458471916428538, + "grad_norm": 0.5825686111500717, + "learning_rate": 2.8396528183277817e-06, + "loss": 0.3088, + "step": 20191 + }, + { + "epoch": 0.9458940366327822, + "grad_norm": 0.6648254713143851, + "learning_rate": 2.839464925658178e-06, + "loss": 0.317, + "step": 20192 + }, + { + "epoch": 0.9459408816227105, + "grad_norm": 0.6123655363734201, + "learning_rate": 2.839277031035039e-06, + "loss": 0.3462, + "step": 20193 + }, + { + "epoch": 0.9459877266126387, + "grad_norm": 0.5922296459873525, + "learning_rate": 2.839089134459446e-06, + "loss": 0.3237, + "step": 20194 + }, + { + "epoch": 0.9460345716025671, + "grad_norm": 0.5775580618334042, + "learning_rate": 2.8389012359324808e-06, + "loss": 0.2974, + "step": 20195 + }, + { + "epoch": 0.9460814165924954, + "grad_norm": 0.5941939685897942, + "learning_rate": 2.838713335455224e-06, + "loss": 0.3266, + "step": 20196 + }, + { + "epoch": 0.9461282615824238, + "grad_norm": 0.6164182868526, + "learning_rate": 2.838525433028757e-06, + "loss": 0.3167, + "step": 20197 + }, + { + "epoch": 0.946175106572352, + "grad_norm": 0.5850032635976402, + "learning_rate": 2.8383375286541616e-06, + "loss": 0.3219, + "step": 20198 + }, + { + "epoch": 0.9462219515622804, + "grad_norm": 0.6116252609548366, + "learning_rate": 2.8381496223325182e-06, + "loss": 0.3194, + "step": 20199 + }, + { + "epoch": 0.9462687965522087, + "grad_norm": 0.5413735416307711, + "learning_rate": 2.837961714064909e-06, + "loss": 0.3201, + "step": 20200 + }, + { + "epoch": 0.9463156415421371, + "grad_norm": 0.5727529694029537, + "learning_rate": 2.837773803852415e-06, + "loss": 0.3094, + "step": 20201 + }, + { + "epoch": 0.9463624865320655, + "grad_norm": 0.6669273743520474, + "learning_rate": 2.837585891696118e-06, + "loss": 0.3429, + "step": 20202 + }, + { + "epoch": 0.9464093315219937, + "grad_norm": 0.5705497603902225, + "learning_rate": 2.8373979775970984e-06, + "loss": 0.328, + "step": 20203 + }, + { + "epoch": 0.946456176511922, + "grad_norm": 0.5963473605535757, + "learning_rate": 2.837210061556438e-06, + "loss": 0.3058, + "step": 20204 + }, + { + "epoch": 0.9465030215018504, + "grad_norm": 0.5542860664600384, + "learning_rate": 2.8370221435752197e-06, + "loss": 0.2933, + "step": 20205 + }, + { + "epoch": 0.9465498664917787, + "grad_norm": 0.6091729897979639, + "learning_rate": 2.8368342236545225e-06, + "loss": 0.3104, + "step": 20206 + }, + { + "epoch": 0.946596711481707, + "grad_norm": 0.5523231849717488, + "learning_rate": 2.8366463017954294e-06, + "loss": 0.3134, + "step": 20207 + }, + { + "epoch": 0.9466435564716353, + "grad_norm": 0.5736643848171296, + "learning_rate": 2.836458377999022e-06, + "loss": 0.3186, + "step": 20208 + }, + { + "epoch": 0.9466904014615637, + "grad_norm": 0.5883713787791652, + "learning_rate": 2.8362704522663802e-06, + "loss": 0.3077, + "step": 20209 + }, + { + "epoch": 0.946737246451492, + "grad_norm": 0.6194851178165793, + "learning_rate": 2.8360825245985867e-06, + "loss": 0.3261, + "step": 20210 + }, + { + "epoch": 0.9467840914414204, + "grad_norm": 0.5910799438130928, + "learning_rate": 2.835894594996723e-06, + "loss": 0.3361, + "step": 20211 + }, + { + "epoch": 0.9468309364313486, + "grad_norm": 0.6053962028697566, + "learning_rate": 2.835706663461871e-06, + "loss": 0.3216, + "step": 20212 + }, + { + "epoch": 0.946877781421277, + "grad_norm": 0.6051495186215822, + "learning_rate": 2.8355187299951103e-06, + "loss": 0.333, + "step": 20213 + }, + { + "epoch": 0.9469246264112053, + "grad_norm": 0.6099371868379024, + "learning_rate": 2.835330794597524e-06, + "loss": 0.3048, + "step": 20214 + }, + { + "epoch": 0.9469714714011337, + "grad_norm": 0.6081131549378379, + "learning_rate": 2.8351428572701935e-06, + "loss": 0.3044, + "step": 20215 + }, + { + "epoch": 0.9470183163910619, + "grad_norm": 0.586347933101336, + "learning_rate": 2.8349549180142006e-06, + "loss": 0.3139, + "step": 20216 + }, + { + "epoch": 0.9470651613809903, + "grad_norm": 0.5792696974501761, + "learning_rate": 2.834766976830625e-06, + "loss": 0.2992, + "step": 20217 + }, + { + "epoch": 0.9471120063709186, + "grad_norm": 0.5990323257558002, + "learning_rate": 2.834579033720551e-06, + "loss": 0.3208, + "step": 20218 + }, + { + "epoch": 0.947158851360847, + "grad_norm": 0.5915796420659429, + "learning_rate": 2.834391088685058e-06, + "loss": 0.3325, + "step": 20219 + }, + { + "epoch": 0.9472056963507753, + "grad_norm": 0.5879986723414579, + "learning_rate": 2.834203141725228e-06, + "loss": 0.3126, + "step": 20220 + }, + { + "epoch": 0.9472525413407036, + "grad_norm": 0.5980383772065573, + "learning_rate": 2.834015192842144e-06, + "loss": 0.3265, + "step": 20221 + }, + { + "epoch": 0.9472993863306319, + "grad_norm": 0.6239921731177519, + "learning_rate": 2.8338272420368864e-06, + "loss": 0.3215, + "step": 20222 + }, + { + "epoch": 0.9473462313205603, + "grad_norm": 0.6062306760925313, + "learning_rate": 2.833639289310536e-06, + "loss": 0.3439, + "step": 20223 + }, + { + "epoch": 0.9473930763104886, + "grad_norm": 0.6156405345063739, + "learning_rate": 2.833451334664176e-06, + "loss": 0.3167, + "step": 20224 + }, + { + "epoch": 0.9474399213004169, + "grad_norm": 0.5567898688720839, + "learning_rate": 2.833263378098886e-06, + "loss": 0.2823, + "step": 20225 + }, + { + "epoch": 0.9474867662903452, + "grad_norm": 0.6090510387683341, + "learning_rate": 2.8330754196157505e-06, + "loss": 0.3168, + "step": 20226 + }, + { + "epoch": 0.9475336112802736, + "grad_norm": 0.6175721036258857, + "learning_rate": 2.8328874592158494e-06, + "loss": 0.3193, + "step": 20227 + }, + { + "epoch": 0.9475804562702019, + "grad_norm": 0.5955975028880782, + "learning_rate": 2.8326994969002646e-06, + "loss": 0.3126, + "step": 20228 + }, + { + "epoch": 0.9476273012601303, + "grad_norm": 0.6228781271932657, + "learning_rate": 2.832511532670078e-06, + "loss": 0.3037, + "step": 20229 + }, + { + "epoch": 0.9476741462500585, + "grad_norm": 0.5726963100766367, + "learning_rate": 2.832323566526371e-06, + "loss": 0.3202, + "step": 20230 + }, + { + "epoch": 0.9477209912399869, + "grad_norm": 0.5275868928816031, + "learning_rate": 2.832135598470225e-06, + "loss": 0.3039, + "step": 20231 + }, + { + "epoch": 0.9477678362299152, + "grad_norm": 0.5915558277305075, + "learning_rate": 2.831947628502722e-06, + "loss": 0.3253, + "step": 20232 + }, + { + "epoch": 0.9478146812198436, + "grad_norm": 0.6566928099422832, + "learning_rate": 2.831759656624945e-06, + "loss": 0.3099, + "step": 20233 + }, + { + "epoch": 0.9478615262097718, + "grad_norm": 0.6038094263155066, + "learning_rate": 2.8315716828379736e-06, + "loss": 0.3228, + "step": 20234 + }, + { + "epoch": 0.9479083711997002, + "grad_norm": 0.5998675584231977, + "learning_rate": 2.83138370714289e-06, + "loss": 0.3226, + "step": 20235 + }, + { + "epoch": 0.9479552161896285, + "grad_norm": 0.6149041454319512, + "learning_rate": 2.8311957295407778e-06, + "loss": 0.3044, + "step": 20236 + }, + { + "epoch": 0.9480020611795569, + "grad_norm": 0.571514650412363, + "learning_rate": 2.831007750032717e-06, + "loss": 0.3075, + "step": 20237 + }, + { + "epoch": 0.9480489061694852, + "grad_norm": 0.5554619987678826, + "learning_rate": 2.830819768619789e-06, + "loss": 0.3026, + "step": 20238 + }, + { + "epoch": 0.9480957511594135, + "grad_norm": 0.5691182570739949, + "learning_rate": 2.8306317853030773e-06, + "loss": 0.3108, + "step": 20239 + }, + { + "epoch": 0.9481425961493418, + "grad_norm": 0.6032366587591576, + "learning_rate": 2.8304438000836624e-06, + "loss": 0.3152, + "step": 20240 + }, + { + "epoch": 0.9481894411392702, + "grad_norm": 0.5935628938737094, + "learning_rate": 2.8302558129626263e-06, + "loss": 0.3149, + "step": 20241 + }, + { + "epoch": 0.9482362861291985, + "grad_norm": 0.5908857441404409, + "learning_rate": 2.8300678239410516e-06, + "loss": 0.3203, + "step": 20242 + }, + { + "epoch": 0.9482831311191268, + "grad_norm": 0.556300813615896, + "learning_rate": 2.8298798330200194e-06, + "loss": 0.3062, + "step": 20243 + }, + { + "epoch": 0.9483299761090551, + "grad_norm": 0.5815146682320892, + "learning_rate": 2.8296918402006107e-06, + "loss": 0.3151, + "step": 20244 + }, + { + "epoch": 0.9483768210989835, + "grad_norm": 0.5403943840773101, + "learning_rate": 2.8295038454839095e-06, + "loss": 0.3241, + "step": 20245 + }, + { + "epoch": 0.9484236660889118, + "grad_norm": 0.6253212940525844, + "learning_rate": 2.829315848870996e-06, + "loss": 0.3121, + "step": 20246 + }, + { + "epoch": 0.9484705110788402, + "grad_norm": 0.5383581814962741, + "learning_rate": 2.8291278503629525e-06, + "loss": 0.3014, + "step": 20247 + }, + { + "epoch": 0.9485173560687684, + "grad_norm": 0.584484348601955, + "learning_rate": 2.8289398499608612e-06, + "loss": 0.3101, + "step": 20248 + }, + { + "epoch": 0.9485642010586968, + "grad_norm": 0.572408715639876, + "learning_rate": 2.8287518476658033e-06, + "loss": 0.3281, + "step": 20249 + }, + { + "epoch": 0.9486110460486251, + "grad_norm": 0.5694234101592491, + "learning_rate": 2.8285638434788613e-06, + "loss": 0.314, + "step": 20250 + }, + { + "epoch": 0.9486578910385535, + "grad_norm": 0.6329259204633393, + "learning_rate": 2.8283758374011178e-06, + "loss": 0.335, + "step": 20251 + }, + { + "epoch": 0.9487047360284817, + "grad_norm": 0.6629664897901941, + "learning_rate": 2.828187829433653e-06, + "loss": 0.3006, + "step": 20252 + }, + { + "epoch": 0.9487515810184101, + "grad_norm": 0.5819411380537989, + "learning_rate": 2.827999819577549e-06, + "loss": 0.307, + "step": 20253 + }, + { + "epoch": 0.9487984260083384, + "grad_norm": 0.5859314825338026, + "learning_rate": 2.82781180783389e-06, + "loss": 0.3197, + "step": 20254 + }, + { + "epoch": 0.9488452709982668, + "grad_norm": 0.5544582817700764, + "learning_rate": 2.827623794203756e-06, + "loss": 0.3091, + "step": 20255 + }, + { + "epoch": 0.9488921159881951, + "grad_norm": 0.5702979676909191, + "learning_rate": 2.827435778688229e-06, + "loss": 0.318, + "step": 20256 + }, + { + "epoch": 0.9489389609781234, + "grad_norm": 0.6114470322807773, + "learning_rate": 2.8272477612883916e-06, + "loss": 0.3042, + "step": 20257 + }, + { + "epoch": 0.9489858059680517, + "grad_norm": 0.6095060055926382, + "learning_rate": 2.827059742005326e-06, + "loss": 0.3231, + "step": 20258 + }, + { + "epoch": 0.9490326509579801, + "grad_norm": 0.594287716976732, + "learning_rate": 2.826871720840113e-06, + "loss": 0.3126, + "step": 20259 + }, + { + "epoch": 0.9490794959479084, + "grad_norm": 0.6755780766943895, + "learning_rate": 2.8266836977938355e-06, + "loss": 0.3364, + "step": 20260 + }, + { + "epoch": 0.9491263409378367, + "grad_norm": 0.6396604537490709, + "learning_rate": 2.8264956728675764e-06, + "loss": 0.3228, + "step": 20261 + }, + { + "epoch": 0.949173185927765, + "grad_norm": 0.5690678414576956, + "learning_rate": 2.8263076460624157e-06, + "loss": 0.3137, + "step": 20262 + }, + { + "epoch": 0.9492200309176934, + "grad_norm": 0.5736019524917643, + "learning_rate": 2.826119617379437e-06, + "loss": 0.2977, + "step": 20263 + }, + { + "epoch": 0.9492668759076217, + "grad_norm": 0.6038496992416011, + "learning_rate": 2.825931586819721e-06, + "loss": 0.306, + "step": 20264 + }, + { + "epoch": 0.9493137208975501, + "grad_norm": 0.5583955453987013, + "learning_rate": 2.825743554384352e-06, + "loss": 0.2992, + "step": 20265 + }, + { + "epoch": 0.9493605658874783, + "grad_norm": 0.6341411039402205, + "learning_rate": 2.82555552007441e-06, + "loss": 0.3258, + "step": 20266 + }, + { + "epoch": 0.9494074108774067, + "grad_norm": 0.6209048308491598, + "learning_rate": 2.825367483890978e-06, + "loss": 0.2932, + "step": 20267 + }, + { + "epoch": 0.949454255867335, + "grad_norm": 0.6163678384404472, + "learning_rate": 2.825179445835138e-06, + "loss": 0.3226, + "step": 20268 + }, + { + "epoch": 0.9495011008572634, + "grad_norm": 0.639751780338151, + "learning_rate": 2.824991405907972e-06, + "loss": 0.3378, + "step": 20269 + }, + { + "epoch": 0.9495479458471916, + "grad_norm": 0.5611440449659887, + "learning_rate": 2.8248033641105615e-06, + "loss": 0.3019, + "step": 20270 + }, + { + "epoch": 0.94959479083712, + "grad_norm": 0.595881254225869, + "learning_rate": 2.8246153204439902e-06, + "loss": 0.3089, + "step": 20271 + }, + { + "epoch": 0.9496416358270483, + "grad_norm": 0.5833738795561033, + "learning_rate": 2.8244272749093395e-06, + "loss": 0.3174, + "step": 20272 + }, + { + "epoch": 0.9496884808169767, + "grad_norm": 0.5827338528108486, + "learning_rate": 2.82423922750769e-06, + "loss": 0.3183, + "step": 20273 + }, + { + "epoch": 0.949735325806905, + "grad_norm": 0.5360136350230269, + "learning_rate": 2.8240511782401257e-06, + "loss": 0.2787, + "step": 20274 + }, + { + "epoch": 0.9497821707968332, + "grad_norm": 0.5730367740130629, + "learning_rate": 2.8238631271077294e-06, + "loss": 0.3059, + "step": 20275 + }, + { + "epoch": 0.9498290157867616, + "grad_norm": 0.5585207877927891, + "learning_rate": 2.823675074111581e-06, + "loss": 0.323, + "step": 20276 + }, + { + "epoch": 0.94987586077669, + "grad_norm": 0.577630873829898, + "learning_rate": 2.823487019252764e-06, + "loss": 0.3191, + "step": 20277 + }, + { + "epoch": 0.9499227057666183, + "grad_norm": 0.6295182548671, + "learning_rate": 2.823298962532361e-06, + "loss": 0.3105, + "step": 20278 + }, + { + "epoch": 0.9499695507565465, + "grad_norm": 0.6512903667440546, + "learning_rate": 2.823110903951454e-06, + "loss": 0.2963, + "step": 20279 + }, + { + "epoch": 0.9500163957464749, + "grad_norm": 0.5675097131829198, + "learning_rate": 2.822922843511124e-06, + "loss": 0.3242, + "step": 20280 + }, + { + "epoch": 0.9500632407364032, + "grad_norm": 0.6122670976500885, + "learning_rate": 2.822734781212455e-06, + "loss": 0.3299, + "step": 20281 + }, + { + "epoch": 0.9501100857263316, + "grad_norm": 0.6175350987066667, + "learning_rate": 2.8225467170565285e-06, + "loss": 0.305, + "step": 20282 + }, + { + "epoch": 0.95015693071626, + "grad_norm": 0.5622650657724954, + "learning_rate": 2.8223586510444255e-06, + "loss": 0.3152, + "step": 20283 + }, + { + "epoch": 0.9502037757061882, + "grad_norm": 0.6137736363816444, + "learning_rate": 2.8221705831772306e-06, + "loss": 0.3272, + "step": 20284 + }, + { + "epoch": 0.9502506206961165, + "grad_norm": 0.5533614517438951, + "learning_rate": 2.8219825134560246e-06, + "loss": 0.308, + "step": 20285 + }, + { + "epoch": 0.9502974656860449, + "grad_norm": 0.6304126245052742, + "learning_rate": 2.821794441881891e-06, + "loss": 0.3226, + "step": 20286 + }, + { + "epoch": 0.9503443106759732, + "grad_norm": 0.5894811322655524, + "learning_rate": 2.8216063684559096e-06, + "loss": 0.3117, + "step": 20287 + }, + { + "epoch": 0.9503911556659015, + "grad_norm": 0.6023590373591969, + "learning_rate": 2.821418293179166e-06, + "loss": 0.3327, + "step": 20288 + }, + { + "epoch": 0.9504380006558298, + "grad_norm": 0.6082961933882777, + "learning_rate": 2.8212302160527406e-06, + "loss": 0.3201, + "step": 20289 + }, + { + "epoch": 0.9504848456457582, + "grad_norm": 0.5732369727399409, + "learning_rate": 2.8210421370777153e-06, + "loss": 0.3111, + "step": 20290 + }, + { + "epoch": 0.9505316906356865, + "grad_norm": 0.6057752351070153, + "learning_rate": 2.8208540562551735e-06, + "loss": 0.3068, + "step": 20291 + }, + { + "epoch": 0.9505785356256149, + "grad_norm": 0.6272980516004611, + "learning_rate": 2.820665973586198e-06, + "loss": 0.3389, + "step": 20292 + }, + { + "epoch": 0.9506253806155431, + "grad_norm": 0.5335526472969504, + "learning_rate": 2.820477889071869e-06, + "loss": 0.3061, + "step": 20293 + }, + { + "epoch": 0.9506722256054715, + "grad_norm": 0.570248444424051, + "learning_rate": 2.820289802713271e-06, + "loss": 0.3149, + "step": 20294 + }, + { + "epoch": 0.9507190705953998, + "grad_norm": 0.5686050972086965, + "learning_rate": 2.8201017145114855e-06, + "loss": 0.3321, + "step": 20295 + }, + { + "epoch": 0.9507659155853282, + "grad_norm": 0.5810175014911597, + "learning_rate": 2.8199136244675962e-06, + "loss": 0.3124, + "step": 20296 + }, + { + "epoch": 0.9508127605752564, + "grad_norm": 0.5811667196192476, + "learning_rate": 2.8197255325826834e-06, + "loss": 0.3116, + "step": 20297 + }, + { + "epoch": 0.9508596055651848, + "grad_norm": 0.5912484582849635, + "learning_rate": 2.8195374388578304e-06, + "loss": 0.2983, + "step": 20298 + }, + { + "epoch": 0.9509064505551131, + "grad_norm": 0.6126815497174436, + "learning_rate": 2.8193493432941197e-06, + "loss": 0.3339, + "step": 20299 + }, + { + "epoch": 0.9509532955450415, + "grad_norm": 0.5816207268418481, + "learning_rate": 2.819161245892635e-06, + "loss": 0.332, + "step": 20300 + }, + { + "epoch": 0.9510001405349698, + "grad_norm": 0.6134342011885019, + "learning_rate": 2.818973146654456e-06, + "loss": 0.3346, + "step": 20301 + }, + { + "epoch": 0.9510469855248981, + "grad_norm": 0.5834489718416747, + "learning_rate": 2.8187850455806672e-06, + "loss": 0.3078, + "step": 20302 + }, + { + "epoch": 0.9510938305148264, + "grad_norm": 0.6720095444779974, + "learning_rate": 2.8185969426723516e-06, + "loss": 0.3252, + "step": 20303 + }, + { + "epoch": 0.9511406755047548, + "grad_norm": 0.6083260112061012, + "learning_rate": 2.8184088379305895e-06, + "loss": 0.3106, + "step": 20304 + }, + { + "epoch": 0.9511875204946831, + "grad_norm": 0.582031144335339, + "learning_rate": 2.818220731356465e-06, + "loss": 0.3071, + "step": 20305 + }, + { + "epoch": 0.9512343654846114, + "grad_norm": 0.5447162640975902, + "learning_rate": 2.8180326229510595e-06, + "loss": 0.3078, + "step": 20306 + }, + { + "epoch": 0.9512812104745397, + "grad_norm": 0.5667842975244454, + "learning_rate": 2.817844512715457e-06, + "loss": 0.2869, + "step": 20307 + }, + { + "epoch": 0.9513280554644681, + "grad_norm": 0.566354597151354, + "learning_rate": 2.817656400650739e-06, + "loss": 0.3191, + "step": 20308 + }, + { + "epoch": 0.9513749004543964, + "grad_norm": 0.6210474481124922, + "learning_rate": 2.8174682867579883e-06, + "loss": 0.3075, + "step": 20309 + }, + { + "epoch": 0.9514217454443248, + "grad_norm": 0.6254509313792033, + "learning_rate": 2.8172801710382874e-06, + "loss": 0.3228, + "step": 20310 + }, + { + "epoch": 0.951468590434253, + "grad_norm": 0.6064022061442125, + "learning_rate": 2.817092053492718e-06, + "loss": 0.3363, + "step": 20311 + }, + { + "epoch": 0.9515154354241814, + "grad_norm": 0.570128534589266, + "learning_rate": 2.8169039341223647e-06, + "loss": 0.3365, + "step": 20312 + }, + { + "epoch": 0.9515622804141097, + "grad_norm": 0.6966701018979132, + "learning_rate": 2.816715812928309e-06, + "loss": 0.327, + "step": 20313 + }, + { + "epoch": 0.9516091254040381, + "grad_norm": 0.5857857728850219, + "learning_rate": 2.8165276899116328e-06, + "loss": 0.3147, + "step": 20314 + }, + { + "epoch": 0.9516559703939663, + "grad_norm": 0.5600681742778661, + "learning_rate": 2.8163395650734194e-06, + "loss": 0.3067, + "step": 20315 + }, + { + "epoch": 0.9517028153838947, + "grad_norm": 0.5403359178482863, + "learning_rate": 2.816151438414751e-06, + "loss": 0.318, + "step": 20316 + }, + { + "epoch": 0.951749660373823, + "grad_norm": 0.6275908631029986, + "learning_rate": 2.8159633099367113e-06, + "loss": 0.3085, + "step": 20317 + }, + { + "epoch": 0.9517965053637514, + "grad_norm": 0.5452881677790888, + "learning_rate": 2.815775179640382e-06, + "loss": 0.3119, + "step": 20318 + }, + { + "epoch": 0.9518433503536797, + "grad_norm": 0.6170985498447947, + "learning_rate": 2.8155870475268447e-06, + "loss": 0.3128, + "step": 20319 + }, + { + "epoch": 0.951890195343608, + "grad_norm": 0.5941501178893516, + "learning_rate": 2.815398913597185e-06, + "loss": 0.3058, + "step": 20320 + }, + { + "epoch": 0.9519370403335363, + "grad_norm": 0.567715316353305, + "learning_rate": 2.815210777852483e-06, + "loss": 0.314, + "step": 20321 + }, + { + "epoch": 0.9519838853234647, + "grad_norm": 0.62249380437124, + "learning_rate": 2.815022640293822e-06, + "loss": 0.3215, + "step": 20322 + }, + { + "epoch": 0.952030730313393, + "grad_norm": 0.5450320732370681, + "learning_rate": 2.814834500922285e-06, + "loss": 0.322, + "step": 20323 + }, + { + "epoch": 0.9520775753033213, + "grad_norm": 0.6105948484869275, + "learning_rate": 2.814646359738955e-06, + "loss": 0.3234, + "step": 20324 + }, + { + "epoch": 0.9521244202932496, + "grad_norm": 0.641966051714189, + "learning_rate": 2.8144582167449138e-06, + "loss": 0.3226, + "step": 20325 + }, + { + "epoch": 0.952171265283178, + "grad_norm": 0.6074135945378165, + "learning_rate": 2.814270071941244e-06, + "loss": 0.3415, + "step": 20326 + }, + { + "epoch": 0.9522181102731063, + "grad_norm": 0.587577150341746, + "learning_rate": 2.8140819253290302e-06, + "loss": 0.3178, + "step": 20327 + }, + { + "epoch": 0.9522649552630347, + "grad_norm": 0.5914339585307605, + "learning_rate": 2.813893776909354e-06, + "loss": 0.3223, + "step": 20328 + }, + { + "epoch": 0.9523118002529629, + "grad_norm": 0.587193147769487, + "learning_rate": 2.813705626683297e-06, + "loss": 0.317, + "step": 20329 + }, + { + "epoch": 0.9523586452428913, + "grad_norm": 0.586046710017186, + "learning_rate": 2.8135174746519434e-06, + "loss": 0.3164, + "step": 20330 + }, + { + "epoch": 0.9524054902328196, + "grad_norm": 0.6419715785180045, + "learning_rate": 2.8133293208163753e-06, + "loss": 0.303, + "step": 20331 + }, + { + "epoch": 0.952452335222748, + "grad_norm": 0.5984207089139276, + "learning_rate": 2.8131411651776757e-06, + "loss": 0.3262, + "step": 20332 + }, + { + "epoch": 0.9524991802126762, + "grad_norm": 0.644551271338419, + "learning_rate": 2.812953007736928e-06, + "loss": 0.3223, + "step": 20333 + }, + { + "epoch": 0.9525460252026046, + "grad_norm": 0.6039072328595199, + "learning_rate": 2.8127648484952135e-06, + "loss": 0.3359, + "step": 20334 + }, + { + "epoch": 0.9525928701925329, + "grad_norm": 0.5581028235422815, + "learning_rate": 2.8125766874536164e-06, + "loss": 0.3132, + "step": 20335 + }, + { + "epoch": 0.9526397151824613, + "grad_norm": 0.5905249367975561, + "learning_rate": 2.8123885246132195e-06, + "loss": 0.3066, + "step": 20336 + }, + { + "epoch": 0.9526865601723896, + "grad_norm": 0.5661695727156211, + "learning_rate": 2.812200359975104e-06, + "loss": 0.3181, + "step": 20337 + }, + { + "epoch": 0.9527334051623179, + "grad_norm": 0.5397025029281816, + "learning_rate": 2.812012193540355e-06, + "loss": 0.298, + "step": 20338 + }, + { + "epoch": 0.9527802501522462, + "grad_norm": 0.5996555671000847, + "learning_rate": 2.8118240253100544e-06, + "loss": 0.3292, + "step": 20339 + }, + { + "epoch": 0.9528270951421746, + "grad_norm": 0.5402060283618413, + "learning_rate": 2.8116358552852833e-06, + "loss": 0.3031, + "step": 20340 + }, + { + "epoch": 0.9528739401321029, + "grad_norm": 0.5875460428984354, + "learning_rate": 2.811447683467128e-06, + "loss": 0.3153, + "step": 20341 + }, + { + "epoch": 0.9529207851220312, + "grad_norm": 0.6432578109287951, + "learning_rate": 2.8112595098566684e-06, + "loss": 0.3269, + "step": 20342 + }, + { + "epoch": 0.9529676301119595, + "grad_norm": 0.6476773296060476, + "learning_rate": 2.811071334454989e-06, + "loss": 0.3265, + "step": 20343 + }, + { + "epoch": 0.9530144751018879, + "grad_norm": 0.627761424578787, + "learning_rate": 2.8108831572631717e-06, + "loss": 0.3344, + "step": 20344 + }, + { + "epoch": 0.9530613200918162, + "grad_norm": 0.5701476459025536, + "learning_rate": 2.810694978282301e-06, + "loss": 0.3052, + "step": 20345 + }, + { + "epoch": 0.9531081650817446, + "grad_norm": 0.5505337129141542, + "learning_rate": 2.8105067975134583e-06, + "loss": 0.3099, + "step": 20346 + }, + { + "epoch": 0.9531550100716728, + "grad_norm": 0.5868145185912583, + "learning_rate": 2.8103186149577266e-06, + "loss": 0.3106, + "step": 20347 + }, + { + "epoch": 0.9532018550616012, + "grad_norm": 0.5985574432825563, + "learning_rate": 2.81013043061619e-06, + "loss": 0.3347, + "step": 20348 + }, + { + "epoch": 0.9532487000515295, + "grad_norm": 0.6351129335338312, + "learning_rate": 2.80994224448993e-06, + "loss": 0.3202, + "step": 20349 + }, + { + "epoch": 0.9532955450414579, + "grad_norm": 0.5747303307874196, + "learning_rate": 2.8097540565800303e-06, + "loss": 0.3254, + "step": 20350 + }, + { + "epoch": 0.9533423900313861, + "grad_norm": 0.6662117660014367, + "learning_rate": 2.8095658668875743e-06, + "loss": 0.343, + "step": 20351 + }, + { + "epoch": 0.9533892350213145, + "grad_norm": 0.6323743020040818, + "learning_rate": 2.809377675413645e-06, + "loss": 0.3353, + "step": 20352 + }, + { + "epoch": 0.9534360800112428, + "grad_norm": 0.5938156732264948, + "learning_rate": 2.8091894821593242e-06, + "loss": 0.3439, + "step": 20353 + }, + { + "epoch": 0.9534829250011712, + "grad_norm": 0.5868431271174505, + "learning_rate": 2.809001287125695e-06, + "loss": 0.3256, + "step": 20354 + }, + { + "epoch": 0.9535297699910995, + "grad_norm": 0.5975019912206081, + "learning_rate": 2.808813090313842e-06, + "loss": 0.3237, + "step": 20355 + }, + { + "epoch": 0.9535766149810277, + "grad_norm": 0.6047508883531155, + "learning_rate": 2.8086248917248475e-06, + "loss": 0.307, + "step": 20356 + }, + { + "epoch": 0.9536234599709561, + "grad_norm": 0.5712708563542788, + "learning_rate": 2.8084366913597936e-06, + "loss": 0.305, + "step": 20357 + }, + { + "epoch": 0.9536703049608845, + "grad_norm": 0.5560775893844265, + "learning_rate": 2.808248489219765e-06, + "loss": 0.2964, + "step": 20358 + }, + { + "epoch": 0.9537171499508128, + "grad_norm": 0.6177995750708979, + "learning_rate": 2.808060285305843e-06, + "loss": 0.331, + "step": 20359 + }, + { + "epoch": 0.953763994940741, + "grad_norm": 0.6327754849454574, + "learning_rate": 2.8078720796191115e-06, + "loss": 0.3235, + "step": 20360 + }, + { + "epoch": 0.9538108399306694, + "grad_norm": 0.6125563976291886, + "learning_rate": 2.8076838721606544e-06, + "loss": 0.32, + "step": 20361 + }, + { + "epoch": 0.9538576849205977, + "grad_norm": 0.5963921488920821, + "learning_rate": 2.8074956629315532e-06, + "loss": 0.2925, + "step": 20362 + }, + { + "epoch": 0.9539045299105261, + "grad_norm": 0.6194815682690211, + "learning_rate": 2.8073074519328914e-06, + "loss": 0.346, + "step": 20363 + }, + { + "epoch": 0.9539513749004545, + "grad_norm": 0.5865015778303525, + "learning_rate": 2.807119239165753e-06, + "loss": 0.3211, + "step": 20364 + }, + { + "epoch": 0.9539982198903827, + "grad_norm": 0.5752815000888861, + "learning_rate": 2.8069310246312205e-06, + "loss": 0.3039, + "step": 20365 + }, + { + "epoch": 0.954045064880311, + "grad_norm": 0.5655402948635776, + "learning_rate": 2.806742808330377e-06, + "loss": 0.3214, + "step": 20366 + }, + { + "epoch": 0.9540919098702394, + "grad_norm": 0.6136309116595926, + "learning_rate": 2.8065545902643066e-06, + "loss": 0.328, + "step": 20367 + }, + { + "epoch": 0.9541387548601677, + "grad_norm": 0.6050236625481088, + "learning_rate": 2.8063663704340905e-06, + "loss": 0.3209, + "step": 20368 + }, + { + "epoch": 0.954185599850096, + "grad_norm": 0.6000565347596218, + "learning_rate": 2.806178148840813e-06, + "loss": 0.3033, + "step": 20369 + }, + { + "epoch": 0.9542324448400243, + "grad_norm": 0.5530589689059627, + "learning_rate": 2.805989925485558e-06, + "loss": 0.291, + "step": 20370 + }, + { + "epoch": 0.9542792898299527, + "grad_norm": 0.6274067380346493, + "learning_rate": 2.8058017003694073e-06, + "loss": 0.3199, + "step": 20371 + }, + { + "epoch": 0.954326134819881, + "grad_norm": 0.6116572878859834, + "learning_rate": 2.8056134734934436e-06, + "loss": 0.3181, + "step": 20372 + }, + { + "epoch": 0.9543729798098094, + "grad_norm": 0.5966235884380123, + "learning_rate": 2.8054252448587533e-06, + "loss": 0.3132, + "step": 20373 + }, + { + "epoch": 0.9544198247997376, + "grad_norm": 0.6137768945440887, + "learning_rate": 2.8052370144664166e-06, + "loss": 0.3292, + "step": 20374 + }, + { + "epoch": 0.954466669789666, + "grad_norm": 0.6565740002430065, + "learning_rate": 2.8050487823175166e-06, + "loss": 0.3575, + "step": 20375 + }, + { + "epoch": 0.9545135147795943, + "grad_norm": 0.600913355995813, + "learning_rate": 2.8048605484131387e-06, + "loss": 0.323, + "step": 20376 + }, + { + "epoch": 0.9545603597695227, + "grad_norm": 0.5884775354688888, + "learning_rate": 2.804672312754365e-06, + "loss": 0.3317, + "step": 20377 + }, + { + "epoch": 0.9546072047594509, + "grad_norm": 0.5632919404581993, + "learning_rate": 2.804484075342278e-06, + "loss": 0.3134, + "step": 20378 + }, + { + "epoch": 0.9546540497493793, + "grad_norm": 0.6008255090428316, + "learning_rate": 2.8042958361779626e-06, + "loss": 0.3287, + "step": 20379 + }, + { + "epoch": 0.9547008947393076, + "grad_norm": 0.5802220840371883, + "learning_rate": 2.8041075952625007e-06, + "loss": 0.3097, + "step": 20380 + }, + { + "epoch": 0.954747739729236, + "grad_norm": 0.6025578310422859, + "learning_rate": 2.8039193525969755e-06, + "loss": 0.317, + "step": 20381 + }, + { + "epoch": 0.9547945847191642, + "grad_norm": 0.6081497133939875, + "learning_rate": 2.8037311081824714e-06, + "loss": 0.2978, + "step": 20382 + }, + { + "epoch": 0.9548414297090926, + "grad_norm": 0.5744483840080308, + "learning_rate": 2.803542862020071e-06, + "loss": 0.3029, + "step": 20383 + }, + { + "epoch": 0.9548882746990209, + "grad_norm": 0.5750459904564109, + "learning_rate": 2.8033546141108576e-06, + "loss": 0.3348, + "step": 20384 + }, + { + "epoch": 0.9549351196889493, + "grad_norm": 0.6031201644580215, + "learning_rate": 2.8031663644559147e-06, + "loss": 0.3119, + "step": 20385 + }, + { + "epoch": 0.9549819646788776, + "grad_norm": 0.5578471896235313, + "learning_rate": 2.8029781130563254e-06, + "loss": 0.2967, + "step": 20386 + }, + { + "epoch": 0.9550288096688059, + "grad_norm": 0.6212987874039269, + "learning_rate": 2.8027898599131738e-06, + "loss": 0.3173, + "step": 20387 + }, + { + "epoch": 0.9550756546587342, + "grad_norm": 0.5745812503006842, + "learning_rate": 2.8026016050275424e-06, + "loss": 0.3245, + "step": 20388 + }, + { + "epoch": 0.9551224996486626, + "grad_norm": 0.5567696073200122, + "learning_rate": 2.8024133484005146e-06, + "loss": 0.2988, + "step": 20389 + }, + { + "epoch": 0.9551693446385909, + "grad_norm": 0.6031391695676361, + "learning_rate": 2.8022250900331743e-06, + "loss": 0.3131, + "step": 20390 + }, + { + "epoch": 0.9552161896285192, + "grad_norm": 0.6099566261359689, + "learning_rate": 2.802036829926604e-06, + "loss": 0.317, + "step": 20391 + }, + { + "epoch": 0.9552630346184475, + "grad_norm": 0.5817047829050682, + "learning_rate": 2.801848568081888e-06, + "loss": 0.3287, + "step": 20392 + }, + { + "epoch": 0.9553098796083759, + "grad_norm": 0.58423063195851, + "learning_rate": 2.801660304500109e-06, + "loss": 0.3157, + "step": 20393 + }, + { + "epoch": 0.9553567245983042, + "grad_norm": 0.6054311397162786, + "learning_rate": 2.801472039182352e-06, + "loss": 0.3315, + "step": 20394 + }, + { + "epoch": 0.9554035695882326, + "grad_norm": 0.5887740434456098, + "learning_rate": 2.8012837721296983e-06, + "loss": 0.317, + "step": 20395 + }, + { + "epoch": 0.9554504145781608, + "grad_norm": 0.578980338526459, + "learning_rate": 2.8010955033432314e-06, + "loss": 0.3142, + "step": 20396 + }, + { + "epoch": 0.9554972595680892, + "grad_norm": 0.6156659983984957, + "learning_rate": 2.8009072328240366e-06, + "loss": 0.3176, + "step": 20397 + }, + { + "epoch": 0.9555441045580175, + "grad_norm": 0.6348804502356298, + "learning_rate": 2.8007189605731964e-06, + "loss": 0.3427, + "step": 20398 + }, + { + "epoch": 0.9555909495479459, + "grad_norm": 0.5504052316505987, + "learning_rate": 2.800530686591794e-06, + "loss": 0.3071, + "step": 20399 + }, + { + "epoch": 0.9556377945378741, + "grad_norm": 0.5839040154970246, + "learning_rate": 2.8003424108809126e-06, + "loss": 0.3188, + "step": 20400 + }, + { + "epoch": 0.9556846395278025, + "grad_norm": 0.6344114201859203, + "learning_rate": 2.8001541334416365e-06, + "loss": 0.3038, + "step": 20401 + }, + { + "epoch": 0.9557314845177308, + "grad_norm": 0.6027050352202822, + "learning_rate": 2.7999658542750484e-06, + "loss": 0.3147, + "step": 20402 + }, + { + "epoch": 0.9557783295076592, + "grad_norm": 0.5927544307613258, + "learning_rate": 2.799777573382233e-06, + "loss": 0.3035, + "step": 20403 + }, + { + "epoch": 0.9558251744975875, + "grad_norm": 0.5867773798703981, + "learning_rate": 2.799589290764272e-06, + "loss": 0.3269, + "step": 20404 + }, + { + "epoch": 0.9558720194875158, + "grad_norm": 0.5717732638991166, + "learning_rate": 2.799401006422251e-06, + "loss": 0.3011, + "step": 20405 + }, + { + "epoch": 0.9559188644774441, + "grad_norm": 0.6359247950005865, + "learning_rate": 2.7992127203572516e-06, + "loss": 0.3355, + "step": 20406 + }, + { + "epoch": 0.9559657094673725, + "grad_norm": 0.5670029130627625, + "learning_rate": 2.7990244325703584e-06, + "loss": 0.3117, + "step": 20407 + }, + { + "epoch": 0.9560125544573008, + "grad_norm": 0.6162272822315789, + "learning_rate": 2.7988361430626547e-06, + "loss": 0.3288, + "step": 20408 + }, + { + "epoch": 0.9560593994472291, + "grad_norm": 0.5767008492852795, + "learning_rate": 2.798647851835225e-06, + "loss": 0.318, + "step": 20409 + }, + { + "epoch": 0.9561062444371574, + "grad_norm": 0.5650948572943325, + "learning_rate": 2.7984595588891504e-06, + "loss": 0.3198, + "step": 20410 + }, + { + "epoch": 0.9561530894270858, + "grad_norm": 0.5860596148341294, + "learning_rate": 2.7982712642255173e-06, + "loss": 0.2998, + "step": 20411 + }, + { + "epoch": 0.9561999344170141, + "grad_norm": 0.5665728609758793, + "learning_rate": 2.798082967845408e-06, + "loss": 0.3086, + "step": 20412 + }, + { + "epoch": 0.9562467794069425, + "grad_norm": 0.5903818015866926, + "learning_rate": 2.7978946697499053e-06, + "loss": 0.3065, + "step": 20413 + }, + { + "epoch": 0.9562936243968707, + "grad_norm": 0.6416145034933346, + "learning_rate": 2.797706369940094e-06, + "loss": 0.33, + "step": 20414 + }, + { + "epoch": 0.9563404693867991, + "grad_norm": 0.5821163305732256, + "learning_rate": 2.7975180684170585e-06, + "loss": 0.3088, + "step": 20415 + }, + { + "epoch": 0.9563873143767274, + "grad_norm": 0.6395086924780541, + "learning_rate": 2.7973297651818797e-06, + "loss": 0.3181, + "step": 20416 + }, + { + "epoch": 0.9564341593666558, + "grad_norm": 0.6239667505525194, + "learning_rate": 2.7971414602356428e-06, + "loss": 0.3345, + "step": 20417 + }, + { + "epoch": 0.956481004356584, + "grad_norm": 0.5822999795339259, + "learning_rate": 2.7969531535794327e-06, + "loss": 0.3311, + "step": 20418 + }, + { + "epoch": 0.9565278493465124, + "grad_norm": 0.5952848906384104, + "learning_rate": 2.7967648452143314e-06, + "loss": 0.3163, + "step": 20419 + }, + { + "epoch": 0.9565746943364407, + "grad_norm": 0.6129879840505865, + "learning_rate": 2.7965765351414225e-06, + "loss": 0.3232, + "step": 20420 + }, + { + "epoch": 0.9566215393263691, + "grad_norm": 0.5990930703171519, + "learning_rate": 2.79638822336179e-06, + "loss": 0.315, + "step": 20421 + }, + { + "epoch": 0.9566683843162974, + "grad_norm": 0.5861365120348685, + "learning_rate": 2.796199909876519e-06, + "loss": 0.3258, + "step": 20422 + }, + { + "epoch": 0.9567152293062257, + "grad_norm": 0.5923963992757721, + "learning_rate": 2.7960115946866912e-06, + "loss": 0.3216, + "step": 20423 + }, + { + "epoch": 0.956762074296154, + "grad_norm": 0.5823697308614434, + "learning_rate": 2.795823277793391e-06, + "loss": 0.2897, + "step": 20424 + }, + { + "epoch": 0.9568089192860824, + "grad_norm": 0.571885761925729, + "learning_rate": 2.7956349591977024e-06, + "loss": 0.3082, + "step": 20425 + }, + { + "epoch": 0.9568557642760107, + "grad_norm": 0.5820549257514387, + "learning_rate": 2.795446638900709e-06, + "loss": 0.308, + "step": 20426 + }, + { + "epoch": 0.956902609265939, + "grad_norm": 0.5718187158178933, + "learning_rate": 2.7952583169034938e-06, + "loss": 0.3041, + "step": 20427 + }, + { + "epoch": 0.9569494542558673, + "grad_norm": 0.5768305125971247, + "learning_rate": 2.795069993207142e-06, + "loss": 0.2965, + "step": 20428 + }, + { + "epoch": 0.9569962992457957, + "grad_norm": 0.5694619047289625, + "learning_rate": 2.794881667812736e-06, + "loss": 0.3197, + "step": 20429 + }, + { + "epoch": 0.957043144235724, + "grad_norm": 0.5626037041539929, + "learning_rate": 2.7946933407213605e-06, + "loss": 0.3201, + "step": 20430 + }, + { + "epoch": 0.9570899892256524, + "grad_norm": 0.6167709314729151, + "learning_rate": 2.7945050119340988e-06, + "loss": 0.3253, + "step": 20431 + }, + { + "epoch": 0.9571368342155806, + "grad_norm": 0.6098608803020354, + "learning_rate": 2.794316681452035e-06, + "loss": 0.3087, + "step": 20432 + }, + { + "epoch": 0.957183679205509, + "grad_norm": 0.6054296785835098, + "learning_rate": 2.794128349276252e-06, + "loss": 0.3297, + "step": 20433 + }, + { + "epoch": 0.9572305241954373, + "grad_norm": 0.5488394206965209, + "learning_rate": 2.793940015407835e-06, + "loss": 0.3038, + "step": 20434 + }, + { + "epoch": 0.9572773691853657, + "grad_norm": 0.6028533009741001, + "learning_rate": 2.7937516798478664e-06, + "loss": 0.3349, + "step": 20435 + }, + { + "epoch": 0.9573242141752939, + "grad_norm": 0.610724666694171, + "learning_rate": 2.7935633425974314e-06, + "loss": 0.3308, + "step": 20436 + }, + { + "epoch": 0.9573710591652222, + "grad_norm": 0.590449783735045, + "learning_rate": 2.793375003657613e-06, + "loss": 0.2789, + "step": 20437 + }, + { + "epoch": 0.9574179041551506, + "grad_norm": 0.5951663957701477, + "learning_rate": 2.7931866630294948e-06, + "loss": 0.3217, + "step": 20438 + }, + { + "epoch": 0.957464749145079, + "grad_norm": 0.6135324915954229, + "learning_rate": 2.792998320714161e-06, + "loss": 0.3258, + "step": 20439 + }, + { + "epoch": 0.9575115941350073, + "grad_norm": 0.5799371944458795, + "learning_rate": 2.792809976712696e-06, + "loss": 0.2997, + "step": 20440 + }, + { + "epoch": 0.9575584391249355, + "grad_norm": 0.6069555042915783, + "learning_rate": 2.7926216310261826e-06, + "loss": 0.3356, + "step": 20441 + }, + { + "epoch": 0.9576052841148639, + "grad_norm": 0.5815111906486736, + "learning_rate": 2.7924332836557054e-06, + "loss": 0.3246, + "step": 20442 + }, + { + "epoch": 0.9576521291047922, + "grad_norm": 0.579997796595207, + "learning_rate": 2.7922449346023487e-06, + "loss": 0.3301, + "step": 20443 + }, + { + "epoch": 0.9576989740947206, + "grad_norm": 0.5929946793635485, + "learning_rate": 2.7920565838671954e-06, + "loss": 0.3082, + "step": 20444 + }, + { + "epoch": 0.9577458190846488, + "grad_norm": 0.5995445955598503, + "learning_rate": 2.7918682314513296e-06, + "loss": 0.3178, + "step": 20445 + }, + { + "epoch": 0.9577926640745772, + "grad_norm": 0.6001507537536978, + "learning_rate": 2.791679877355836e-06, + "loss": 0.3154, + "step": 20446 + }, + { + "epoch": 0.9578395090645055, + "grad_norm": 0.5994329934863588, + "learning_rate": 2.7914915215817985e-06, + "loss": 0.3124, + "step": 20447 + }, + { + "epoch": 0.9578863540544339, + "grad_norm": 0.6297542090739513, + "learning_rate": 2.7913031641302994e-06, + "loss": 0.3245, + "step": 20448 + }, + { + "epoch": 0.9579331990443622, + "grad_norm": 0.5565388854600587, + "learning_rate": 2.7911148050024243e-06, + "loss": 0.2984, + "step": 20449 + }, + { + "epoch": 0.9579800440342905, + "grad_norm": 0.573661141728061, + "learning_rate": 2.7909264441992573e-06, + "loss": 0.3124, + "step": 20450 + }, + { + "epoch": 0.9580268890242188, + "grad_norm": 0.5903814883555855, + "learning_rate": 2.790738081721881e-06, + "loss": 0.3258, + "step": 20451 + }, + { + "epoch": 0.9580737340141472, + "grad_norm": 0.5878875891887596, + "learning_rate": 2.79054971757138e-06, + "loss": 0.3229, + "step": 20452 + }, + { + "epoch": 0.9581205790040755, + "grad_norm": 0.5930788844663343, + "learning_rate": 2.7903613517488395e-06, + "loss": 0.3487, + "step": 20453 + }, + { + "epoch": 0.9581674239940038, + "grad_norm": 0.5287600612702402, + "learning_rate": 2.790172984255341e-06, + "loss": 0.3054, + "step": 20454 + }, + { + "epoch": 0.9582142689839321, + "grad_norm": 0.6013282382086851, + "learning_rate": 2.7899846150919706e-06, + "loss": 0.3305, + "step": 20455 + }, + { + "epoch": 0.9582611139738605, + "grad_norm": 0.5873572234112273, + "learning_rate": 2.7897962442598107e-06, + "loss": 0.316, + "step": 20456 + }, + { + "epoch": 0.9583079589637888, + "grad_norm": 0.5626744611852862, + "learning_rate": 2.789607871759948e-06, + "loss": 0.3176, + "step": 20457 + }, + { + "epoch": 0.9583548039537172, + "grad_norm": 0.5607519156859915, + "learning_rate": 2.7894194975934637e-06, + "loss": 0.2879, + "step": 20458 + }, + { + "epoch": 0.9584016489436454, + "grad_norm": 0.5601472540265119, + "learning_rate": 2.7892311217614424e-06, + "loss": 0.3081, + "step": 20459 + }, + { + "epoch": 0.9584484939335738, + "grad_norm": 0.6323686276553091, + "learning_rate": 2.7890427442649696e-06, + "loss": 0.3215, + "step": 20460 + }, + { + "epoch": 0.9584953389235021, + "grad_norm": 0.6110386510979323, + "learning_rate": 2.7888543651051287e-06, + "loss": 0.2977, + "step": 20461 + }, + { + "epoch": 0.9585421839134305, + "grad_norm": 0.553876620840561, + "learning_rate": 2.7886659842830025e-06, + "loss": 0.3208, + "step": 20462 + }, + { + "epoch": 0.9585890289033587, + "grad_norm": 0.6227146004824582, + "learning_rate": 2.7884776017996766e-06, + "loss": 0.3102, + "step": 20463 + }, + { + "epoch": 0.9586358738932871, + "grad_norm": 0.5864972797270176, + "learning_rate": 2.788289217656235e-06, + "loss": 0.325, + "step": 20464 + }, + { + "epoch": 0.9586827188832154, + "grad_norm": 0.5684780028638814, + "learning_rate": 2.788100831853761e-06, + "loss": 0.301, + "step": 20465 + }, + { + "epoch": 0.9587295638731438, + "grad_norm": 0.5535959424029825, + "learning_rate": 2.787912444393339e-06, + "loss": 0.3009, + "step": 20466 + }, + { + "epoch": 0.9587764088630721, + "grad_norm": 0.6252635507328645, + "learning_rate": 2.7877240552760537e-06, + "loss": 0.3139, + "step": 20467 + }, + { + "epoch": 0.9588232538530004, + "grad_norm": 0.6167941955807242, + "learning_rate": 2.7875356645029887e-06, + "loss": 0.3155, + "step": 20468 + }, + { + "epoch": 0.9588700988429287, + "grad_norm": 0.5862682195483495, + "learning_rate": 2.7873472720752275e-06, + "loss": 0.3312, + "step": 20469 + }, + { + "epoch": 0.9589169438328571, + "grad_norm": 0.5574369122410794, + "learning_rate": 2.787158877993856e-06, + "loss": 0.2934, + "step": 20470 + }, + { + "epoch": 0.9589637888227854, + "grad_norm": 0.6167472328133767, + "learning_rate": 2.786970482259957e-06, + "loss": 0.3244, + "step": 20471 + }, + { + "epoch": 0.9590106338127137, + "grad_norm": 0.5940114723984782, + "learning_rate": 2.786782084874615e-06, + "loss": 0.3087, + "step": 20472 + }, + { + "epoch": 0.959057478802642, + "grad_norm": 0.5682066373248077, + "learning_rate": 2.7865936858389137e-06, + "loss": 0.3141, + "step": 20473 + }, + { + "epoch": 0.9591043237925704, + "grad_norm": 0.5881337549158651, + "learning_rate": 2.7864052851539374e-06, + "loss": 0.3045, + "step": 20474 + }, + { + "epoch": 0.9591511687824987, + "grad_norm": 0.5493411964485513, + "learning_rate": 2.786216882820772e-06, + "loss": 0.3104, + "step": 20475 + }, + { + "epoch": 0.9591980137724271, + "grad_norm": 0.6214259687870888, + "learning_rate": 2.7860284788405e-06, + "loss": 0.3168, + "step": 20476 + }, + { + "epoch": 0.9592448587623553, + "grad_norm": 0.5767810106716018, + "learning_rate": 2.7858400732142054e-06, + "loss": 0.3073, + "step": 20477 + }, + { + "epoch": 0.9592917037522837, + "grad_norm": 0.5241432029161892, + "learning_rate": 2.785651665942973e-06, + "loss": 0.292, + "step": 20478 + }, + { + "epoch": 0.959338548742212, + "grad_norm": 0.6152991584216057, + "learning_rate": 2.7854632570278884e-06, + "loss": 0.3039, + "step": 20479 + }, + { + "epoch": 0.9593853937321404, + "grad_norm": 0.5870515620346936, + "learning_rate": 2.785274846470033e-06, + "loss": 0.3043, + "step": 20480 + }, + { + "epoch": 0.9594322387220686, + "grad_norm": 0.577199621170215, + "learning_rate": 2.785086434270493e-06, + "loss": 0.3129, + "step": 20481 + }, + { + "epoch": 0.959479083711997, + "grad_norm": 0.6155892990227707, + "learning_rate": 2.7848980204303527e-06, + "loss": 0.3113, + "step": 20482 + }, + { + "epoch": 0.9595259287019253, + "grad_norm": 0.5752860369807454, + "learning_rate": 2.784709604950695e-06, + "loss": 0.3201, + "step": 20483 + }, + { + "epoch": 0.9595727736918537, + "grad_norm": 0.5864608841086772, + "learning_rate": 2.7845211878326055e-06, + "loss": 0.2947, + "step": 20484 + }, + { + "epoch": 0.959619618681782, + "grad_norm": 0.5900685193675651, + "learning_rate": 2.784332769077169e-06, + "loss": 0.3076, + "step": 20485 + }, + { + "epoch": 0.9596664636717103, + "grad_norm": 0.5387373200319984, + "learning_rate": 2.7841443486854674e-06, + "loss": 0.2848, + "step": 20486 + }, + { + "epoch": 0.9597133086616386, + "grad_norm": 0.6164307737339076, + "learning_rate": 2.783955926658587e-06, + "loss": 0.3186, + "step": 20487 + }, + { + "epoch": 0.959760153651567, + "grad_norm": 0.5539946877440326, + "learning_rate": 2.783767502997612e-06, + "loss": 0.3228, + "step": 20488 + }, + { + "epoch": 0.9598069986414953, + "grad_norm": 0.620176886035313, + "learning_rate": 2.7835790777036266e-06, + "loss": 0.3194, + "step": 20489 + }, + { + "epoch": 0.9598538436314236, + "grad_norm": 0.5901914283287953, + "learning_rate": 2.7833906507777135e-06, + "loss": 0.3033, + "step": 20490 + }, + { + "epoch": 0.9599006886213519, + "grad_norm": 0.6072537575733775, + "learning_rate": 2.7832022222209592e-06, + "loss": 0.3429, + "step": 20491 + }, + { + "epoch": 0.9599475336112803, + "grad_norm": 0.6249594966808393, + "learning_rate": 2.783013792034448e-06, + "loss": 0.3075, + "step": 20492 + }, + { + "epoch": 0.9599943786012086, + "grad_norm": 0.6246816432409925, + "learning_rate": 2.782825360219263e-06, + "loss": 0.3196, + "step": 20493 + }, + { + "epoch": 0.960041223591137, + "grad_norm": 0.6147133167039414, + "learning_rate": 2.7826369267764884e-06, + "loss": 0.343, + "step": 20494 + }, + { + "epoch": 0.9600880685810652, + "grad_norm": 0.5821144353262648, + "learning_rate": 2.7824484917072103e-06, + "loss": 0.303, + "step": 20495 + }, + { + "epoch": 0.9601349135709936, + "grad_norm": 0.5906112388399214, + "learning_rate": 2.7822600550125124e-06, + "loss": 0.3129, + "step": 20496 + }, + { + "epoch": 0.9601817585609219, + "grad_norm": 0.586464744056472, + "learning_rate": 2.7820716166934773e-06, + "loss": 0.3148, + "step": 20497 + }, + { + "epoch": 0.9602286035508503, + "grad_norm": 0.5664662968470932, + "learning_rate": 2.7818831767511924e-06, + "loss": 0.3094, + "step": 20498 + }, + { + "epoch": 0.9602754485407785, + "grad_norm": 0.6097413072191029, + "learning_rate": 2.7816947351867403e-06, + "loss": 0.3421, + "step": 20499 + }, + { + "epoch": 0.9603222935307069, + "grad_norm": 0.5586266181567889, + "learning_rate": 2.7815062920012052e-06, + "loss": 0.3075, + "step": 20500 + }, + { + "epoch": 0.9603691385206352, + "grad_norm": 0.5711347581849039, + "learning_rate": 2.781317847195673e-06, + "loss": 0.3008, + "step": 20501 + }, + { + "epoch": 0.9604159835105636, + "grad_norm": 0.5870341085042645, + "learning_rate": 2.7811294007712267e-06, + "loss": 0.3176, + "step": 20502 + }, + { + "epoch": 0.9604628285004919, + "grad_norm": 0.6306964370715075, + "learning_rate": 2.7809409527289517e-06, + "loss": 0.3341, + "step": 20503 + }, + { + "epoch": 0.9605096734904202, + "grad_norm": 0.607007164092429, + "learning_rate": 2.780752503069932e-06, + "loss": 0.3098, + "step": 20504 + }, + { + "epoch": 0.9605565184803485, + "grad_norm": 0.5917219555633276, + "learning_rate": 2.780564051795252e-06, + "loss": 0.292, + "step": 20505 + }, + { + "epoch": 0.9606033634702769, + "grad_norm": 0.6190333099368771, + "learning_rate": 2.7803755989059973e-06, + "loss": 0.3125, + "step": 20506 + }, + { + "epoch": 0.9606502084602052, + "grad_norm": 0.6202702557493928, + "learning_rate": 2.780187144403251e-06, + "loss": 0.3058, + "step": 20507 + }, + { + "epoch": 0.9606970534501335, + "grad_norm": 0.6148569053044343, + "learning_rate": 2.7799986882880975e-06, + "loss": 0.3035, + "step": 20508 + }, + { + "epoch": 0.9607438984400618, + "grad_norm": 0.5800103191543869, + "learning_rate": 2.7798102305616228e-06, + "loss": 0.2937, + "step": 20509 + }, + { + "epoch": 0.9607907434299902, + "grad_norm": 0.5852451100416411, + "learning_rate": 2.7796217712249096e-06, + "loss": 0.3116, + "step": 20510 + }, + { + "epoch": 0.9608375884199185, + "grad_norm": 0.6254322603513046, + "learning_rate": 2.779433310279044e-06, + "loss": 0.3428, + "step": 20511 + }, + { + "epoch": 0.9608844334098469, + "grad_norm": 0.5753523414605342, + "learning_rate": 2.7792448477251095e-06, + "loss": 0.3142, + "step": 20512 + }, + { + "epoch": 0.9609312783997751, + "grad_norm": 0.5981306290353542, + "learning_rate": 2.779056383564192e-06, + "loss": 0.3331, + "step": 20513 + }, + { + "epoch": 0.9609781233897035, + "grad_norm": 0.5805435544348325, + "learning_rate": 2.778867917797374e-06, + "loss": 0.3086, + "step": 20514 + }, + { + "epoch": 0.9610249683796318, + "grad_norm": 0.5845623547022009, + "learning_rate": 2.7786794504257416e-06, + "loss": 0.3253, + "step": 20515 + }, + { + "epoch": 0.9610718133695602, + "grad_norm": 0.5745737636409045, + "learning_rate": 2.7784909814503792e-06, + "loss": 0.3037, + "step": 20516 + }, + { + "epoch": 0.9611186583594884, + "grad_norm": 0.596878042072873, + "learning_rate": 2.7783025108723717e-06, + "loss": 0.3097, + "step": 20517 + }, + { + "epoch": 0.9611655033494167, + "grad_norm": 0.6033648293432422, + "learning_rate": 2.778114038692802e-06, + "loss": 0.3297, + "step": 20518 + }, + { + "epoch": 0.9612123483393451, + "grad_norm": 0.5529476913566573, + "learning_rate": 2.777925564912757e-06, + "loss": 0.3068, + "step": 20519 + }, + { + "epoch": 0.9612591933292735, + "grad_norm": 0.6197637971455197, + "learning_rate": 2.77773708953332e-06, + "loss": 0.3321, + "step": 20520 + }, + { + "epoch": 0.9613060383192018, + "grad_norm": 0.6502032013100856, + "learning_rate": 2.777548612555575e-06, + "loss": 0.3186, + "step": 20521 + }, + { + "epoch": 0.96135288330913, + "grad_norm": 0.5421419953036956, + "learning_rate": 2.7773601339806088e-06, + "loss": 0.2999, + "step": 20522 + }, + { + "epoch": 0.9613997282990584, + "grad_norm": 0.5771141542340557, + "learning_rate": 2.777171653809504e-06, + "loss": 0.3128, + "step": 20523 + }, + { + "epoch": 0.9614465732889867, + "grad_norm": 0.5553687917387188, + "learning_rate": 2.776983172043346e-06, + "loss": 0.3149, + "step": 20524 + }, + { + "epoch": 0.9614934182789151, + "grad_norm": 0.5872447903975407, + "learning_rate": 2.7767946886832198e-06, + "loss": 0.3173, + "step": 20525 + }, + { + "epoch": 0.9615402632688433, + "grad_norm": 0.5828876570235314, + "learning_rate": 2.776606203730209e-06, + "loss": 0.3002, + "step": 20526 + }, + { + "epoch": 0.9615871082587717, + "grad_norm": 0.6151060199256974, + "learning_rate": 2.7764177171853994e-06, + "loss": 0.3189, + "step": 20527 + }, + { + "epoch": 0.9616339532487, + "grad_norm": 0.6389200726871365, + "learning_rate": 2.776229229049876e-06, + "loss": 0.3313, + "step": 20528 + }, + { + "epoch": 0.9616807982386284, + "grad_norm": 0.601606106206983, + "learning_rate": 2.7760407393247218e-06, + "loss": 0.3095, + "step": 20529 + }, + { + "epoch": 0.9617276432285568, + "grad_norm": 0.5771266473608516, + "learning_rate": 2.7758522480110233e-06, + "loss": 0.309, + "step": 20530 + }, + { + "epoch": 0.961774488218485, + "grad_norm": 0.6263674670729636, + "learning_rate": 2.7756637551098643e-06, + "loss": 0.3445, + "step": 20531 + }, + { + "epoch": 0.9618213332084133, + "grad_norm": 0.5693501007735453, + "learning_rate": 2.775475260622329e-06, + "loss": 0.3126, + "step": 20532 + }, + { + "epoch": 0.9618681781983417, + "grad_norm": 0.565009866051459, + "learning_rate": 2.775286764549503e-06, + "loss": 0.3369, + "step": 20533 + }, + { + "epoch": 0.96191502318827, + "grad_norm": 0.5846743686303592, + "learning_rate": 2.775098266892472e-06, + "loss": 0.327, + "step": 20534 + }, + { + "epoch": 0.9619618681781983, + "grad_norm": 0.5883142335487994, + "learning_rate": 2.7749097676523186e-06, + "loss": 0.2934, + "step": 20535 + }, + { + "epoch": 0.9620087131681266, + "grad_norm": 0.5758803426758804, + "learning_rate": 2.774721266830128e-06, + "loss": 0.3222, + "step": 20536 + }, + { + "epoch": 0.962055558158055, + "grad_norm": 0.6466104177171264, + "learning_rate": 2.774532764426987e-06, + "loss": 0.3375, + "step": 20537 + }, + { + "epoch": 0.9621024031479833, + "grad_norm": 0.6548839080124371, + "learning_rate": 2.7743442604439786e-06, + "loss": 0.3345, + "step": 20538 + }, + { + "epoch": 0.9621492481379117, + "grad_norm": 0.5483547748678487, + "learning_rate": 2.7741557548821868e-06, + "loss": 0.2886, + "step": 20539 + }, + { + "epoch": 0.9621960931278399, + "grad_norm": 0.6129990823266944, + "learning_rate": 2.7739672477426987e-06, + "loss": 0.3251, + "step": 20540 + }, + { + "epoch": 0.9622429381177683, + "grad_norm": 0.5842495718543647, + "learning_rate": 2.773778739026598e-06, + "loss": 0.2959, + "step": 20541 + }, + { + "epoch": 0.9622897831076966, + "grad_norm": 0.6197495764149412, + "learning_rate": 2.7735902287349685e-06, + "loss": 0.343, + "step": 20542 + }, + { + "epoch": 0.962336628097625, + "grad_norm": 0.627928713951916, + "learning_rate": 2.773401716868897e-06, + "loss": 0.3065, + "step": 20543 + }, + { + "epoch": 0.9623834730875532, + "grad_norm": 0.6189411286297083, + "learning_rate": 2.773213203429466e-06, + "loss": 0.3041, + "step": 20544 + }, + { + "epoch": 0.9624303180774816, + "grad_norm": 0.5714665766885961, + "learning_rate": 2.773024688417763e-06, + "loss": 0.2978, + "step": 20545 + }, + { + "epoch": 0.9624771630674099, + "grad_norm": 0.5951621596992968, + "learning_rate": 2.7728361718348716e-06, + "loss": 0.3114, + "step": 20546 + }, + { + "epoch": 0.9625240080573383, + "grad_norm": 0.5738040558602355, + "learning_rate": 2.772647653681876e-06, + "loss": 0.3049, + "step": 20547 + }, + { + "epoch": 0.9625708530472666, + "grad_norm": 0.5790159105328395, + "learning_rate": 2.7724591339598616e-06, + "loss": 0.3199, + "step": 20548 + }, + { + "epoch": 0.9626176980371949, + "grad_norm": 0.60659893178284, + "learning_rate": 2.772270612669914e-06, + "loss": 0.3093, + "step": 20549 + }, + { + "epoch": 0.9626645430271232, + "grad_norm": 0.5870574444595112, + "learning_rate": 2.7720820898131163e-06, + "loss": 0.3009, + "step": 20550 + }, + { + "epoch": 0.9627113880170516, + "grad_norm": 0.6347705659450708, + "learning_rate": 2.7718935653905554e-06, + "loss": 0.3412, + "step": 20551 + }, + { + "epoch": 0.9627582330069799, + "grad_norm": 0.5519305442465978, + "learning_rate": 2.771705039403316e-06, + "loss": 0.3139, + "step": 20552 + }, + { + "epoch": 0.9628050779969082, + "grad_norm": 0.5614849955625159, + "learning_rate": 2.771516511852481e-06, + "loss": 0.2958, + "step": 20553 + }, + { + "epoch": 0.9628519229868365, + "grad_norm": 0.5996583732656651, + "learning_rate": 2.771327982739137e-06, + "loss": 0.3086, + "step": 20554 + }, + { + "epoch": 0.9628987679767649, + "grad_norm": 0.6119395542802772, + "learning_rate": 2.771139452064369e-06, + "loss": 0.3376, + "step": 20555 + }, + { + "epoch": 0.9629456129666932, + "grad_norm": 0.5543951393571697, + "learning_rate": 2.7709509198292624e-06, + "loss": 0.3012, + "step": 20556 + }, + { + "epoch": 0.9629924579566216, + "grad_norm": 0.5792393525512949, + "learning_rate": 2.7707623860349e-06, + "loss": 0.3213, + "step": 20557 + }, + { + "epoch": 0.9630393029465498, + "grad_norm": 0.6642423539895076, + "learning_rate": 2.770573850682369e-06, + "loss": 0.3325, + "step": 20558 + }, + { + "epoch": 0.9630861479364782, + "grad_norm": 0.5524953751840501, + "learning_rate": 2.7703853137727534e-06, + "loss": 0.3037, + "step": 20559 + }, + { + "epoch": 0.9631329929264065, + "grad_norm": 0.5627995467092033, + "learning_rate": 2.7701967753071376e-06, + "loss": 0.3072, + "step": 20560 + }, + { + "epoch": 0.9631798379163349, + "grad_norm": 0.6014466120670895, + "learning_rate": 2.7700082352866076e-06, + "loss": 0.3386, + "step": 20561 + }, + { + "epoch": 0.9632266829062631, + "grad_norm": 0.6011705383119942, + "learning_rate": 2.7698196937122483e-06, + "loss": 0.3243, + "step": 20562 + }, + { + "epoch": 0.9632735278961915, + "grad_norm": 0.5757009452066442, + "learning_rate": 2.769631150585144e-06, + "loss": 0.3021, + "step": 20563 + }, + { + "epoch": 0.9633203728861198, + "grad_norm": 0.6001137098230663, + "learning_rate": 2.7694426059063807e-06, + "loss": 0.3485, + "step": 20564 + }, + { + "epoch": 0.9633672178760482, + "grad_norm": 0.6037891193832066, + "learning_rate": 2.769254059677043e-06, + "loss": 0.3284, + "step": 20565 + }, + { + "epoch": 0.9634140628659765, + "grad_norm": 0.6060879240485629, + "learning_rate": 2.7690655118982156e-06, + "loss": 0.298, + "step": 20566 + }, + { + "epoch": 0.9634609078559048, + "grad_norm": 0.561325996092399, + "learning_rate": 2.768876962570984e-06, + "loss": 0.3116, + "step": 20567 + }, + { + "epoch": 0.9635077528458331, + "grad_norm": 0.6089923265769871, + "learning_rate": 2.768688411696433e-06, + "loss": 0.3274, + "step": 20568 + }, + { + "epoch": 0.9635545978357615, + "grad_norm": 0.6105906930944213, + "learning_rate": 2.768499859275648e-06, + "loss": 0.2944, + "step": 20569 + }, + { + "epoch": 0.9636014428256898, + "grad_norm": 0.6214260276008996, + "learning_rate": 2.7683113053097126e-06, + "loss": 0.3328, + "step": 20570 + }, + { + "epoch": 0.9636482878156181, + "grad_norm": 0.5901969535051074, + "learning_rate": 2.7681227497997144e-06, + "loss": 0.3256, + "step": 20571 + }, + { + "epoch": 0.9636951328055464, + "grad_norm": 0.5671960960582029, + "learning_rate": 2.7679341927467368e-06, + "loss": 0.285, + "step": 20572 + }, + { + "epoch": 0.9637419777954748, + "grad_norm": 0.6026139085395315, + "learning_rate": 2.767745634151865e-06, + "loss": 0.3009, + "step": 20573 + }, + { + "epoch": 0.9637888227854031, + "grad_norm": 0.6526007910467903, + "learning_rate": 2.7675570740161846e-06, + "loss": 0.3002, + "step": 20574 + }, + { + "epoch": 0.9638356677753315, + "grad_norm": 0.6067800356107244, + "learning_rate": 2.76736851234078e-06, + "loss": 0.3259, + "step": 20575 + }, + { + "epoch": 0.9638825127652597, + "grad_norm": 0.6110282079215955, + "learning_rate": 2.7671799491267377e-06, + "loss": 0.3257, + "step": 20576 + }, + { + "epoch": 0.9639293577551881, + "grad_norm": 0.5792593823372647, + "learning_rate": 2.7669913843751416e-06, + "loss": 0.3156, + "step": 20577 + }, + { + "epoch": 0.9639762027451164, + "grad_norm": 0.5881356433284661, + "learning_rate": 2.7668028180870764e-06, + "loss": 0.3071, + "step": 20578 + }, + { + "epoch": 0.9640230477350448, + "grad_norm": 0.6064589290882368, + "learning_rate": 2.766614250263629e-06, + "loss": 0.3166, + "step": 20579 + }, + { + "epoch": 0.964069892724973, + "grad_norm": 0.6018383826831358, + "learning_rate": 2.766425680905884e-06, + "loss": 0.3249, + "step": 20580 + }, + { + "epoch": 0.9641167377149014, + "grad_norm": 0.5969735883376259, + "learning_rate": 2.7662371100149247e-06, + "loss": 0.3148, + "step": 20581 + }, + { + "epoch": 0.9641635827048297, + "grad_norm": 0.5901644333425512, + "learning_rate": 2.7660485375918385e-06, + "loss": 0.2968, + "step": 20582 + }, + { + "epoch": 0.9642104276947581, + "grad_norm": 0.5691444371803653, + "learning_rate": 2.7658599636377106e-06, + "loss": 0.3077, + "step": 20583 + }, + { + "epoch": 0.9642572726846864, + "grad_norm": 0.5587957980674466, + "learning_rate": 2.765671388153624e-06, + "loss": 0.3238, + "step": 20584 + }, + { + "epoch": 0.9643041176746147, + "grad_norm": 0.5880536168279483, + "learning_rate": 2.7654828111406655e-06, + "loss": 0.3215, + "step": 20585 + }, + { + "epoch": 0.964350962664543, + "grad_norm": 0.6491243054354435, + "learning_rate": 2.765294232599921e-06, + "loss": 0.3356, + "step": 20586 + }, + { + "epoch": 0.9643978076544714, + "grad_norm": 0.6219839439132604, + "learning_rate": 2.7651056525324747e-06, + "loss": 0.3214, + "step": 20587 + }, + { + "epoch": 0.9644446526443997, + "grad_norm": 0.5958000965571055, + "learning_rate": 2.764917070939412e-06, + "loss": 0.3271, + "step": 20588 + }, + { + "epoch": 0.964491497634328, + "grad_norm": 0.5963843163737597, + "learning_rate": 2.7647284878218177e-06, + "loss": 0.3057, + "step": 20589 + }, + { + "epoch": 0.9645383426242563, + "grad_norm": 0.572651423890803, + "learning_rate": 2.7645399031807784e-06, + "loss": 0.3033, + "step": 20590 + }, + { + "epoch": 0.9645851876141847, + "grad_norm": 0.6178359036940304, + "learning_rate": 2.7643513170173774e-06, + "loss": 0.3294, + "step": 20591 + }, + { + "epoch": 0.964632032604113, + "grad_norm": 0.5773232867807931, + "learning_rate": 2.7641627293327018e-06, + "loss": 0.3005, + "step": 20592 + }, + { + "epoch": 0.9646788775940414, + "grad_norm": 0.5703659837366478, + "learning_rate": 2.763974140127836e-06, + "loss": 0.3037, + "step": 20593 + }, + { + "epoch": 0.9647257225839696, + "grad_norm": 0.6381950170566881, + "learning_rate": 2.7637855494038646e-06, + "loss": 0.3231, + "step": 20594 + }, + { + "epoch": 0.964772567573898, + "grad_norm": 0.6281183165260764, + "learning_rate": 2.7635969571618743e-06, + "loss": 0.3155, + "step": 20595 + }, + { + "epoch": 0.9648194125638263, + "grad_norm": 0.6673603880981784, + "learning_rate": 2.763408363402949e-06, + "loss": 0.3136, + "step": 20596 + }, + { + "epoch": 0.9648662575537547, + "grad_norm": 0.609092225012115, + "learning_rate": 2.7632197681281758e-06, + "loss": 0.3244, + "step": 20597 + }, + { + "epoch": 0.9649131025436829, + "grad_norm": 0.5911850268255633, + "learning_rate": 2.763031171338639e-06, + "loss": 0.2944, + "step": 20598 + }, + { + "epoch": 0.9649599475336113, + "grad_norm": 0.6539090591911909, + "learning_rate": 2.762842573035423e-06, + "loss": 0.3189, + "step": 20599 + }, + { + "epoch": 0.9650067925235396, + "grad_norm": 0.5401029805376887, + "learning_rate": 2.7626539732196145e-06, + "loss": 0.3123, + "step": 20600 + }, + { + "epoch": 0.965053637513468, + "grad_norm": 0.6294139768499271, + "learning_rate": 2.762465371892299e-06, + "loss": 0.3233, + "step": 20601 + }, + { + "epoch": 0.9651004825033963, + "grad_norm": 0.5656551668592862, + "learning_rate": 2.7622767690545605e-06, + "loss": 0.3143, + "step": 20602 + }, + { + "epoch": 0.9651473274933245, + "grad_norm": 0.5977061571032873, + "learning_rate": 2.7620881647074847e-06, + "loss": 0.316, + "step": 20603 + }, + { + "epoch": 0.9651941724832529, + "grad_norm": 0.6157495279235703, + "learning_rate": 2.7618995588521584e-06, + "loss": 0.3119, + "step": 20604 + }, + { + "epoch": 0.9652410174731813, + "grad_norm": 0.5692393122948523, + "learning_rate": 2.7617109514896657e-06, + "loss": 0.2959, + "step": 20605 + }, + { + "epoch": 0.9652878624631096, + "grad_norm": 0.5881052746324589, + "learning_rate": 2.7615223426210917e-06, + "loss": 0.2996, + "step": 20606 + }, + { + "epoch": 0.9653347074530378, + "grad_norm": 0.6156816684327256, + "learning_rate": 2.761333732247523e-06, + "loss": 0.3457, + "step": 20607 + }, + { + "epoch": 0.9653815524429662, + "grad_norm": 0.5728611617664632, + "learning_rate": 2.7611451203700445e-06, + "loss": 0.3112, + "step": 20608 + }, + { + "epoch": 0.9654283974328945, + "grad_norm": 0.6288010423810121, + "learning_rate": 2.7609565069897405e-06, + "loss": 0.332, + "step": 20609 + }, + { + "epoch": 0.9654752424228229, + "grad_norm": 0.598244917973874, + "learning_rate": 2.760767892107698e-06, + "loss": 0.3082, + "step": 20610 + }, + { + "epoch": 0.9655220874127513, + "grad_norm": 0.6150676711110805, + "learning_rate": 2.7605792757250024e-06, + "loss": 0.3093, + "step": 20611 + }, + { + "epoch": 0.9655689324026795, + "grad_norm": 0.6338562118669039, + "learning_rate": 2.7603906578427374e-06, + "loss": 0.3107, + "step": 20612 + }, + { + "epoch": 0.9656157773926078, + "grad_norm": 0.6179876168891468, + "learning_rate": 2.7602020384619902e-06, + "loss": 0.3251, + "step": 20613 + }, + { + "epoch": 0.9656626223825362, + "grad_norm": 0.5922068735886511, + "learning_rate": 2.7600134175838455e-06, + "loss": 0.3194, + "step": 20614 + }, + { + "epoch": 0.9657094673724645, + "grad_norm": 0.6183084032955012, + "learning_rate": 2.759824795209389e-06, + "loss": 0.322, + "step": 20615 + }, + { + "epoch": 0.9657563123623928, + "grad_norm": 0.596233704172374, + "learning_rate": 2.759636171339707e-06, + "loss": 0.314, + "step": 20616 + }, + { + "epoch": 0.9658031573523211, + "grad_norm": 0.6244414126244745, + "learning_rate": 2.759447545975883e-06, + "loss": 0.3068, + "step": 20617 + }, + { + "epoch": 0.9658500023422495, + "grad_norm": 0.5840343323166935, + "learning_rate": 2.759258919119004e-06, + "loss": 0.3133, + "step": 20618 + }, + { + "epoch": 0.9658968473321778, + "grad_norm": 0.7196635113743598, + "learning_rate": 2.759070290770155e-06, + "loss": 0.3239, + "step": 20619 + }, + { + "epoch": 0.9659436923221062, + "grad_norm": 0.5593039152037751, + "learning_rate": 2.7588816609304216e-06, + "loss": 0.3103, + "step": 20620 + }, + { + "epoch": 0.9659905373120344, + "grad_norm": 0.6056233491878271, + "learning_rate": 2.758693029600889e-06, + "loss": 0.3123, + "step": 20621 + }, + { + "epoch": 0.9660373823019628, + "grad_norm": 0.5639528521555824, + "learning_rate": 2.7585043967826434e-06, + "loss": 0.3165, + "step": 20622 + }, + { + "epoch": 0.9660842272918911, + "grad_norm": 0.5673400685140358, + "learning_rate": 2.75831576247677e-06, + "loss": 0.2872, + "step": 20623 + }, + { + "epoch": 0.9661310722818195, + "grad_norm": 0.550678440934413, + "learning_rate": 2.7581271266843533e-06, + "loss": 0.296, + "step": 20624 + }, + { + "epoch": 0.9661779172717477, + "grad_norm": 0.5738495540409001, + "learning_rate": 2.757938489406481e-06, + "loss": 0.3156, + "step": 20625 + }, + { + "epoch": 0.9662247622616761, + "grad_norm": 0.5799633524556808, + "learning_rate": 2.7577498506442372e-06, + "loss": 0.2965, + "step": 20626 + }, + { + "epoch": 0.9662716072516044, + "grad_norm": 0.5840069427951241, + "learning_rate": 2.7575612103987083e-06, + "loss": 0.3143, + "step": 20627 + }, + { + "epoch": 0.9663184522415328, + "grad_norm": 0.5987008654736473, + "learning_rate": 2.7573725686709785e-06, + "loss": 0.3147, + "step": 20628 + }, + { + "epoch": 0.9663652972314611, + "grad_norm": 0.5608799940779486, + "learning_rate": 2.7571839254621353e-06, + "loss": 0.3126, + "step": 20629 + }, + { + "epoch": 0.9664121422213894, + "grad_norm": 0.5871472568018087, + "learning_rate": 2.756995280773262e-06, + "loss": 0.3259, + "step": 20630 + }, + { + "epoch": 0.9664589872113177, + "grad_norm": 0.5432605074629983, + "learning_rate": 2.7568066346054458e-06, + "loss": 0.2873, + "step": 20631 + }, + { + "epoch": 0.9665058322012461, + "grad_norm": 0.5536842375193048, + "learning_rate": 2.7566179869597733e-06, + "loss": 0.3041, + "step": 20632 + }, + { + "epoch": 0.9665526771911744, + "grad_norm": 0.5933316968369147, + "learning_rate": 2.756429337837327e-06, + "loss": 0.3131, + "step": 20633 + }, + { + "epoch": 0.9665995221811027, + "grad_norm": 0.6178667836757183, + "learning_rate": 2.756240687239195e-06, + "loss": 0.3364, + "step": 20634 + }, + { + "epoch": 0.966646367171031, + "grad_norm": 0.589637944271031, + "learning_rate": 2.7560520351664622e-06, + "loss": 0.3281, + "step": 20635 + }, + { + "epoch": 0.9666932121609594, + "grad_norm": 0.6511923413409659, + "learning_rate": 2.7558633816202147e-06, + "loss": 0.3305, + "step": 20636 + }, + { + "epoch": 0.9667400571508877, + "grad_norm": 0.5810426329359861, + "learning_rate": 2.755674726601537e-06, + "loss": 0.3086, + "step": 20637 + }, + { + "epoch": 0.9667869021408161, + "grad_norm": 0.5594126261066479, + "learning_rate": 2.755486070111516e-06, + "loss": 0.3079, + "step": 20638 + }, + { + "epoch": 0.9668337471307443, + "grad_norm": 0.5478607272403342, + "learning_rate": 2.755297412151237e-06, + "loss": 0.3073, + "step": 20639 + }, + { + "epoch": 0.9668805921206727, + "grad_norm": 0.6050384830311187, + "learning_rate": 2.7551087527217852e-06, + "loss": 0.3253, + "step": 20640 + }, + { + "epoch": 0.966927437110601, + "grad_norm": 0.6075852634461452, + "learning_rate": 2.7549200918242467e-06, + "loss": 0.3144, + "step": 20641 + }, + { + "epoch": 0.9669742821005294, + "grad_norm": 0.5723950105520881, + "learning_rate": 2.7547314294597077e-06, + "loss": 0.3115, + "step": 20642 + }, + { + "epoch": 0.9670211270904576, + "grad_norm": 0.6176145910073162, + "learning_rate": 2.754542765629253e-06, + "loss": 0.3415, + "step": 20643 + }, + { + "epoch": 0.967067972080386, + "grad_norm": 0.5906615738300733, + "learning_rate": 2.7543541003339683e-06, + "loss": 0.3014, + "step": 20644 + }, + { + "epoch": 0.9671148170703143, + "grad_norm": 0.5788349916127915, + "learning_rate": 2.7541654335749394e-06, + "loss": 0.3019, + "step": 20645 + }, + { + "epoch": 0.9671616620602427, + "grad_norm": 0.5879279489752485, + "learning_rate": 2.7539767653532535e-06, + "loss": 0.3275, + "step": 20646 + }, + { + "epoch": 0.967208507050171, + "grad_norm": 0.6117209886632133, + "learning_rate": 2.753788095669995e-06, + "loss": 0.3285, + "step": 20647 + }, + { + "epoch": 0.9672553520400993, + "grad_norm": 0.6007806555693949, + "learning_rate": 2.7535994245262487e-06, + "loss": 0.3051, + "step": 20648 + }, + { + "epoch": 0.9673021970300276, + "grad_norm": 0.6595624487662792, + "learning_rate": 2.7534107519231023e-06, + "loss": 0.3329, + "step": 20649 + }, + { + "epoch": 0.967349042019956, + "grad_norm": 0.5743032538839735, + "learning_rate": 2.753222077861641e-06, + "loss": 0.3283, + "step": 20650 + }, + { + "epoch": 0.9673958870098843, + "grad_norm": 0.6143309872414768, + "learning_rate": 2.753033402342949e-06, + "loss": 0.3135, + "step": 20651 + }, + { + "epoch": 0.9674427319998126, + "grad_norm": 0.632037689165628, + "learning_rate": 2.752844725368114e-06, + "loss": 0.3333, + "step": 20652 + }, + { + "epoch": 0.9674895769897409, + "grad_norm": 0.5583702963948629, + "learning_rate": 2.752656046938222e-06, + "loss": 0.2788, + "step": 20653 + }, + { + "epoch": 0.9675364219796693, + "grad_norm": 0.6177651510238568, + "learning_rate": 2.752467367054357e-06, + "loss": 0.3121, + "step": 20654 + }, + { + "epoch": 0.9675832669695976, + "grad_norm": 0.6098175778252184, + "learning_rate": 2.7522786857176054e-06, + "loss": 0.3291, + "step": 20655 + }, + { + "epoch": 0.967630111959526, + "grad_norm": 0.646521788409563, + "learning_rate": 2.7520900029290544e-06, + "loss": 0.31, + "step": 20656 + }, + { + "epoch": 0.9676769569494542, + "grad_norm": 0.5804229517259054, + "learning_rate": 2.7519013186897887e-06, + "loss": 0.3225, + "step": 20657 + }, + { + "epoch": 0.9677238019393826, + "grad_norm": 0.5759808421789513, + "learning_rate": 2.751712633000893e-06, + "loss": 0.3169, + "step": 20658 + }, + { + "epoch": 0.9677706469293109, + "grad_norm": 0.6177309357182346, + "learning_rate": 2.751523945863456e-06, + "loss": 0.3296, + "step": 20659 + }, + { + "epoch": 0.9678174919192393, + "grad_norm": 0.6167686696066279, + "learning_rate": 2.7513352572785613e-06, + "loss": 0.3161, + "step": 20660 + }, + { + "epoch": 0.9678643369091675, + "grad_norm": 0.6015502194423369, + "learning_rate": 2.751146567247295e-06, + "loss": 0.3009, + "step": 20661 + }, + { + "epoch": 0.9679111818990959, + "grad_norm": 0.5664970037482301, + "learning_rate": 2.7509578757707434e-06, + "loss": 0.3167, + "step": 20662 + }, + { + "epoch": 0.9679580268890242, + "grad_norm": 0.5318694754962738, + "learning_rate": 2.7507691828499927e-06, + "loss": 0.2926, + "step": 20663 + }, + { + "epoch": 0.9680048718789526, + "grad_norm": 0.6365321501492834, + "learning_rate": 2.7505804884861282e-06, + "loss": 0.3363, + "step": 20664 + }, + { + "epoch": 0.9680517168688809, + "grad_norm": 0.6133319370565866, + "learning_rate": 2.750391792680236e-06, + "loss": 0.3144, + "step": 20665 + }, + { + "epoch": 0.9680985618588092, + "grad_norm": 0.5487682368508436, + "learning_rate": 2.750203095433401e-06, + "loss": 0.3157, + "step": 20666 + }, + { + "epoch": 0.9681454068487375, + "grad_norm": 0.5951324163191575, + "learning_rate": 2.7500143967467113e-06, + "loss": 0.3243, + "step": 20667 + }, + { + "epoch": 0.9681922518386659, + "grad_norm": 0.614115872100399, + "learning_rate": 2.7498256966212516e-06, + "loss": 0.3197, + "step": 20668 + }, + { + "epoch": 0.9682390968285942, + "grad_norm": 0.6128338237909496, + "learning_rate": 2.749636995058107e-06, + "loss": 0.3128, + "step": 20669 + }, + { + "epoch": 0.9682859418185225, + "grad_norm": 0.6253897835032346, + "learning_rate": 2.749448292058365e-06, + "loss": 0.3358, + "step": 20670 + }, + { + "epoch": 0.9683327868084508, + "grad_norm": 0.6159312767729483, + "learning_rate": 2.749259587623111e-06, + "loss": 0.3251, + "step": 20671 + }, + { + "epoch": 0.9683796317983792, + "grad_norm": 0.5677027859345651, + "learning_rate": 2.7490708817534297e-06, + "loss": 0.3229, + "step": 20672 + }, + { + "epoch": 0.9684264767883075, + "grad_norm": 0.5809405115191033, + "learning_rate": 2.748882174450408e-06, + "loss": 0.3317, + "step": 20673 + }, + { + "epoch": 0.9684733217782359, + "grad_norm": 0.5802931919093188, + "learning_rate": 2.7486934657151333e-06, + "loss": 0.3169, + "step": 20674 + }, + { + "epoch": 0.9685201667681641, + "grad_norm": 0.5681780642661678, + "learning_rate": 2.748504755548689e-06, + "loss": 0.3108, + "step": 20675 + }, + { + "epoch": 0.9685670117580925, + "grad_norm": 0.5795398084878018, + "learning_rate": 2.7483160439521623e-06, + "loss": 0.3177, + "step": 20676 + }, + { + "epoch": 0.9686138567480208, + "grad_norm": 0.6318965323924307, + "learning_rate": 2.74812733092664e-06, + "loss": 0.3343, + "step": 20677 + }, + { + "epoch": 0.9686607017379492, + "grad_norm": 0.5564699102968237, + "learning_rate": 2.747938616473207e-06, + "loss": 0.2762, + "step": 20678 + }, + { + "epoch": 0.9687075467278774, + "grad_norm": 0.5759948984152035, + "learning_rate": 2.747749900592949e-06, + "loss": 0.3086, + "step": 20679 + }, + { + "epoch": 0.9687543917178058, + "grad_norm": 0.6128190683911336, + "learning_rate": 2.7475611832869532e-06, + "loss": 0.3299, + "step": 20680 + }, + { + "epoch": 0.9688012367077341, + "grad_norm": 0.575718868969497, + "learning_rate": 2.7473724645563048e-06, + "loss": 0.314, + "step": 20681 + }, + { + "epoch": 0.9688480816976625, + "grad_norm": 0.5752543554519476, + "learning_rate": 2.74718374440209e-06, + "loss": 0.3096, + "step": 20682 + }, + { + "epoch": 0.9688949266875908, + "grad_norm": 0.6301790906275617, + "learning_rate": 2.746995022825395e-06, + "loss": 0.3175, + "step": 20683 + }, + { + "epoch": 0.968941771677519, + "grad_norm": 0.5713448629563589, + "learning_rate": 2.7468062998273053e-06, + "loss": 0.3046, + "step": 20684 + }, + { + "epoch": 0.9689886166674474, + "grad_norm": 0.610327800940176, + "learning_rate": 2.746617575408908e-06, + "loss": 0.3164, + "step": 20685 + }, + { + "epoch": 0.9690354616573758, + "grad_norm": 0.5891841561478226, + "learning_rate": 2.7464288495712886e-06, + "loss": 0.3082, + "step": 20686 + }, + { + "epoch": 0.9690823066473041, + "grad_norm": 0.6112035054047712, + "learning_rate": 2.7462401223155326e-06, + "loss": 0.3346, + "step": 20687 + }, + { + "epoch": 0.9691291516372323, + "grad_norm": 0.5895183218708623, + "learning_rate": 2.746051393642727e-06, + "loss": 0.3165, + "step": 20688 + }, + { + "epoch": 0.9691759966271607, + "grad_norm": 0.627480247686366, + "learning_rate": 2.7458626635539575e-06, + "loss": 0.317, + "step": 20689 + }, + { + "epoch": 0.969222841617089, + "grad_norm": 0.5727923773229089, + "learning_rate": 2.7456739320503096e-06, + "loss": 0.3227, + "step": 20690 + }, + { + "epoch": 0.9692696866070174, + "grad_norm": 0.585829134377975, + "learning_rate": 2.7454851991328703e-06, + "loss": 0.3006, + "step": 20691 + }, + { + "epoch": 0.9693165315969458, + "grad_norm": 0.6034489844379374, + "learning_rate": 2.745296464802725e-06, + "loss": 0.3362, + "step": 20692 + }, + { + "epoch": 0.969363376586874, + "grad_norm": 0.5516169941922173, + "learning_rate": 2.7451077290609607e-06, + "loss": 0.299, + "step": 20693 + }, + { + "epoch": 0.9694102215768023, + "grad_norm": 0.623454604104921, + "learning_rate": 2.7449189919086628e-06, + "loss": 0.3333, + "step": 20694 + }, + { + "epoch": 0.9694570665667307, + "grad_norm": 0.6122760967686176, + "learning_rate": 2.7447302533469177e-06, + "loss": 0.3121, + "step": 20695 + }, + { + "epoch": 0.969503911556659, + "grad_norm": 0.5993030814078326, + "learning_rate": 2.744541513376812e-06, + "loss": 0.3239, + "step": 20696 + }, + { + "epoch": 0.9695507565465873, + "grad_norm": 0.646256586534453, + "learning_rate": 2.7443527719994305e-06, + "loss": 0.3498, + "step": 20697 + }, + { + "epoch": 0.9695976015365156, + "grad_norm": 0.6012286141265039, + "learning_rate": 2.744164029215861e-06, + "loss": 0.3162, + "step": 20698 + }, + { + "epoch": 0.969644446526444, + "grad_norm": 0.5872807115170671, + "learning_rate": 2.7439752850271884e-06, + "loss": 0.3107, + "step": 20699 + }, + { + "epoch": 0.9696912915163723, + "grad_norm": 0.583259162183967, + "learning_rate": 2.7437865394344994e-06, + "loss": 0.3042, + "step": 20700 + }, + { + "epoch": 0.9697381365063007, + "grad_norm": 0.6390257818084756, + "learning_rate": 2.7435977924388794e-06, + "loss": 0.3121, + "step": 20701 + }, + { + "epoch": 0.9697849814962289, + "grad_norm": 0.5878820615685244, + "learning_rate": 2.7434090440414174e-06, + "loss": 0.3219, + "step": 20702 + }, + { + "epoch": 0.9698318264861573, + "grad_norm": 0.5954347406664527, + "learning_rate": 2.743220294243196e-06, + "loss": 0.2987, + "step": 20703 + }, + { + "epoch": 0.9698786714760856, + "grad_norm": 0.5988350868677664, + "learning_rate": 2.7430315430453023e-06, + "loss": 0.3029, + "step": 20704 + }, + { + "epoch": 0.969925516466014, + "grad_norm": 0.5933446939935013, + "learning_rate": 2.7428427904488248e-06, + "loss": 0.3183, + "step": 20705 + }, + { + "epoch": 0.9699723614559422, + "grad_norm": 0.5828560702157661, + "learning_rate": 2.742654036454847e-06, + "loss": 0.3318, + "step": 20706 + }, + { + "epoch": 0.9700192064458706, + "grad_norm": 0.6350557225884079, + "learning_rate": 2.7424652810644564e-06, + "loss": 0.3336, + "step": 20707 + }, + { + "epoch": 0.9700660514357989, + "grad_norm": 0.6099648243473452, + "learning_rate": 2.74227652427874e-06, + "loss": 0.3386, + "step": 20708 + }, + { + "epoch": 0.9701128964257273, + "grad_norm": 0.5969732116250611, + "learning_rate": 2.742087766098782e-06, + "loss": 0.3222, + "step": 20709 + }, + { + "epoch": 0.9701597414156556, + "grad_norm": 0.5921041800765424, + "learning_rate": 2.7418990065256694e-06, + "loss": 0.3126, + "step": 20710 + }, + { + "epoch": 0.9702065864055839, + "grad_norm": 0.597667726827178, + "learning_rate": 2.74171024556049e-06, + "loss": 0.3084, + "step": 20711 + }, + { + "epoch": 0.9702534313955122, + "grad_norm": 0.6229120135960471, + "learning_rate": 2.7415214832043285e-06, + "loss": 0.3452, + "step": 20712 + }, + { + "epoch": 0.9703002763854406, + "grad_norm": 0.5628340006760935, + "learning_rate": 2.741332719458271e-06, + "loss": 0.3097, + "step": 20713 + }, + { + "epoch": 0.9703471213753689, + "grad_norm": 0.5812019691064385, + "learning_rate": 2.741143954323405e-06, + "loss": 0.3265, + "step": 20714 + }, + { + "epoch": 0.9703939663652972, + "grad_norm": 0.5886625123121243, + "learning_rate": 2.7409551878008157e-06, + "loss": 0.3274, + "step": 20715 + }, + { + "epoch": 0.9704408113552255, + "grad_norm": 0.6010870103048853, + "learning_rate": 2.74076641989159e-06, + "loss": 0.3285, + "step": 20716 + }, + { + "epoch": 0.9704876563451539, + "grad_norm": 0.5416163213644655, + "learning_rate": 2.7405776505968144e-06, + "loss": 0.2913, + "step": 20717 + }, + { + "epoch": 0.9705345013350822, + "grad_norm": 0.6069107826647894, + "learning_rate": 2.7403888799175743e-06, + "loss": 0.3149, + "step": 20718 + }, + { + "epoch": 0.9705813463250106, + "grad_norm": 0.5971358325299697, + "learning_rate": 2.7402001078549575e-06, + "loss": 0.3082, + "step": 20719 + }, + { + "epoch": 0.9706281913149388, + "grad_norm": 0.6253186501479028, + "learning_rate": 2.740011334410049e-06, + "loss": 0.3134, + "step": 20720 + }, + { + "epoch": 0.9706750363048672, + "grad_norm": 0.6321263451684158, + "learning_rate": 2.739822559583935e-06, + "loss": 0.3298, + "step": 20721 + }, + { + "epoch": 0.9707218812947955, + "grad_norm": 0.6315513774485682, + "learning_rate": 2.7396337833777024e-06, + "loss": 0.3346, + "step": 20722 + }, + { + "epoch": 0.9707687262847239, + "grad_norm": 0.6704986720602228, + "learning_rate": 2.739445005792439e-06, + "loss": 0.3187, + "step": 20723 + }, + { + "epoch": 0.9708155712746521, + "grad_norm": 0.5765698746496388, + "learning_rate": 2.7392562268292287e-06, + "loss": 0.3097, + "step": 20724 + }, + { + "epoch": 0.9708624162645805, + "grad_norm": 0.5643718042505342, + "learning_rate": 2.7390674464891586e-06, + "loss": 0.2979, + "step": 20725 + }, + { + "epoch": 0.9709092612545088, + "grad_norm": 0.5949794577283744, + "learning_rate": 2.7388786647733168e-06, + "loss": 0.3273, + "step": 20726 + }, + { + "epoch": 0.9709561062444372, + "grad_norm": 0.5986510163561836, + "learning_rate": 2.738689881682787e-06, + "loss": 0.3215, + "step": 20727 + }, + { + "epoch": 0.9710029512343655, + "grad_norm": 0.617142053255026, + "learning_rate": 2.7385010972186575e-06, + "loss": 0.3192, + "step": 20728 + }, + { + "epoch": 0.9710497962242938, + "grad_norm": 0.61323366205026, + "learning_rate": 2.7383123113820144e-06, + "loss": 0.3414, + "step": 20729 + }, + { + "epoch": 0.9710966412142221, + "grad_norm": 0.6354407998223844, + "learning_rate": 2.7381235241739436e-06, + "loss": 0.3407, + "step": 20730 + }, + { + "epoch": 0.9711434862041505, + "grad_norm": 0.5796143433669722, + "learning_rate": 2.7379347355955315e-06, + "loss": 0.2987, + "step": 20731 + }, + { + "epoch": 0.9711903311940788, + "grad_norm": 0.5866176911900705, + "learning_rate": 2.737745945647865e-06, + "loss": 0.3264, + "step": 20732 + }, + { + "epoch": 0.9712371761840071, + "grad_norm": 0.6108041950082242, + "learning_rate": 2.7375571543320306e-06, + "loss": 0.3121, + "step": 20733 + }, + { + "epoch": 0.9712840211739354, + "grad_norm": 0.6282829392202085, + "learning_rate": 2.7373683616491137e-06, + "loss": 0.3297, + "step": 20734 + }, + { + "epoch": 0.9713308661638638, + "grad_norm": 0.6247908499401917, + "learning_rate": 2.7371795676002024e-06, + "loss": 0.317, + "step": 20735 + }, + { + "epoch": 0.9713777111537921, + "grad_norm": 0.5929896177541307, + "learning_rate": 2.7369907721863813e-06, + "loss": 0.3179, + "step": 20736 + }, + { + "epoch": 0.9714245561437205, + "grad_norm": 0.591741494848544, + "learning_rate": 2.7368019754087394e-06, + "loss": 0.3331, + "step": 20737 + }, + { + "epoch": 0.9714714011336487, + "grad_norm": 0.6468421655223034, + "learning_rate": 2.736613177268361e-06, + "loss": 0.342, + "step": 20738 + }, + { + "epoch": 0.9715182461235771, + "grad_norm": 0.5936001316384184, + "learning_rate": 2.7364243777663323e-06, + "loss": 0.3252, + "step": 20739 + }, + { + "epoch": 0.9715650911135054, + "grad_norm": 0.572411061369773, + "learning_rate": 2.736235576903742e-06, + "loss": 0.3055, + "step": 20740 + }, + { + "epoch": 0.9716119361034338, + "grad_norm": 0.5848586733624845, + "learning_rate": 2.736046774681675e-06, + "loss": 0.3306, + "step": 20741 + }, + { + "epoch": 0.971658781093362, + "grad_norm": 0.5609911717220628, + "learning_rate": 2.7358579711012175e-06, + "loss": 0.2866, + "step": 20742 + }, + { + "epoch": 0.9717056260832904, + "grad_norm": 0.5977866349437393, + "learning_rate": 2.7356691661634567e-06, + "loss": 0.306, + "step": 20743 + }, + { + "epoch": 0.9717524710732187, + "grad_norm": 0.6001826732043997, + "learning_rate": 2.73548035986948e-06, + "loss": 0.2861, + "step": 20744 + }, + { + "epoch": 0.9717993160631471, + "grad_norm": 0.6184654990307002, + "learning_rate": 2.7352915522203723e-06, + "loss": 0.3311, + "step": 20745 + }, + { + "epoch": 0.9718461610530754, + "grad_norm": 0.6113365469324341, + "learning_rate": 2.7351027432172205e-06, + "loss": 0.335, + "step": 20746 + }, + { + "epoch": 0.9718930060430037, + "grad_norm": 0.6059281271295501, + "learning_rate": 2.7349139328611123e-06, + "loss": 0.3183, + "step": 20747 + }, + { + "epoch": 0.971939851032932, + "grad_norm": 0.6225799670989413, + "learning_rate": 2.734725121153133e-06, + "loss": 0.3241, + "step": 20748 + }, + { + "epoch": 0.9719866960228604, + "grad_norm": 0.6320623455823096, + "learning_rate": 2.73453630809437e-06, + "loss": 0.3224, + "step": 20749 + }, + { + "epoch": 0.9720335410127887, + "grad_norm": 0.645831056087807, + "learning_rate": 2.7343474936859095e-06, + "loss": 0.3195, + "step": 20750 + }, + { + "epoch": 0.972080386002717, + "grad_norm": 0.7282662109054242, + "learning_rate": 2.7341586779288376e-06, + "loss": 0.3166, + "step": 20751 + }, + { + "epoch": 0.9721272309926453, + "grad_norm": 0.5847698860803318, + "learning_rate": 2.7339698608242413e-06, + "loss": 0.3151, + "step": 20752 + }, + { + "epoch": 0.9721740759825737, + "grad_norm": 0.6187510434354917, + "learning_rate": 2.7337810423732083e-06, + "loss": 0.3137, + "step": 20753 + }, + { + "epoch": 0.972220920972502, + "grad_norm": 0.5589334796343722, + "learning_rate": 2.733592222576823e-06, + "loss": 0.2975, + "step": 20754 + }, + { + "epoch": 0.9722677659624304, + "grad_norm": 0.5825182454842455, + "learning_rate": 2.7334034014361736e-06, + "loss": 0.3123, + "step": 20755 + }, + { + "epoch": 0.9723146109523586, + "grad_norm": 0.6457524481446102, + "learning_rate": 2.7332145789523468e-06, + "loss": 0.3329, + "step": 20756 + }, + { + "epoch": 0.972361455942287, + "grad_norm": 0.5506338446725388, + "learning_rate": 2.7330257551264276e-06, + "loss": 0.3065, + "step": 20757 + }, + { + "epoch": 0.9724083009322153, + "grad_norm": 0.6055622518753219, + "learning_rate": 2.732836929959505e-06, + "loss": 0.337, + "step": 20758 + }, + { + "epoch": 0.9724551459221437, + "grad_norm": 0.6034753717934097, + "learning_rate": 2.7326481034526637e-06, + "loss": 0.3012, + "step": 20759 + }, + { + "epoch": 0.9725019909120719, + "grad_norm": 0.590496625006882, + "learning_rate": 2.7324592756069904e-06, + "loss": 0.3234, + "step": 20760 + }, + { + "epoch": 0.9725488359020003, + "grad_norm": 0.6143940243476289, + "learning_rate": 2.732270446423574e-06, + "loss": 0.3367, + "step": 20761 + }, + { + "epoch": 0.9725956808919286, + "grad_norm": 0.5822739801945362, + "learning_rate": 2.732081615903498e-06, + "loss": 0.3117, + "step": 20762 + }, + { + "epoch": 0.972642525881857, + "grad_norm": 0.5949417525832638, + "learning_rate": 2.7318927840478516e-06, + "loss": 0.3184, + "step": 20763 + }, + { + "epoch": 0.9726893708717853, + "grad_norm": 0.5751892320617404, + "learning_rate": 2.7317039508577197e-06, + "loss": 0.3031, + "step": 20764 + }, + { + "epoch": 0.9727362158617135, + "grad_norm": 0.6435205757782625, + "learning_rate": 2.7315151163341907e-06, + "loss": 0.3196, + "step": 20765 + }, + { + "epoch": 0.9727830608516419, + "grad_norm": 0.5829337596101349, + "learning_rate": 2.73132628047835e-06, + "loss": 0.3147, + "step": 20766 + }, + { + "epoch": 0.9728299058415703, + "grad_norm": 0.6266568154341852, + "learning_rate": 2.7311374432912844e-06, + "loss": 0.3333, + "step": 20767 + }, + { + "epoch": 0.9728767508314986, + "grad_norm": 0.6059507707858014, + "learning_rate": 2.7309486047740815e-06, + "loss": 0.3073, + "step": 20768 + }, + { + "epoch": 0.9729235958214268, + "grad_norm": 0.5846936797394537, + "learning_rate": 2.730759764927827e-06, + "loss": 0.3134, + "step": 20769 + }, + { + "epoch": 0.9729704408113552, + "grad_norm": 0.6109254484789697, + "learning_rate": 2.730570923753608e-06, + "loss": 0.3164, + "step": 20770 + }, + { + "epoch": 0.9730172858012835, + "grad_norm": 0.5990842132843186, + "learning_rate": 2.730382081252511e-06, + "loss": 0.3387, + "step": 20771 + }, + { + "epoch": 0.9730641307912119, + "grad_norm": 0.6040930253519252, + "learning_rate": 2.730193237425625e-06, + "loss": 0.3192, + "step": 20772 + }, + { + "epoch": 0.9731109757811403, + "grad_norm": 0.5974220699711579, + "learning_rate": 2.7300043922740325e-06, + "loss": 0.3237, + "step": 20773 + }, + { + "epoch": 0.9731578207710685, + "grad_norm": 0.5816706388680798, + "learning_rate": 2.729815545798823e-06, + "loss": 0.3159, + "step": 20774 + }, + { + "epoch": 0.9732046657609968, + "grad_norm": 0.6621439924986428, + "learning_rate": 2.729626698001083e-06, + "loss": 0.3241, + "step": 20775 + }, + { + "epoch": 0.9732515107509252, + "grad_norm": 0.6122212205554456, + "learning_rate": 2.7294378488818995e-06, + "loss": 0.3308, + "step": 20776 + }, + { + "epoch": 0.9732983557408535, + "grad_norm": 0.5841920367930917, + "learning_rate": 2.729248998442358e-06, + "loss": 0.3035, + "step": 20777 + }, + { + "epoch": 0.9733452007307818, + "grad_norm": 0.5638742528306501, + "learning_rate": 2.729060146683547e-06, + "loss": 0.32, + "step": 20778 + }, + { + "epoch": 0.9733920457207101, + "grad_norm": 0.6014203647450355, + "learning_rate": 2.7288712936065524e-06, + "loss": 0.3147, + "step": 20779 + }, + { + "epoch": 0.9734388907106385, + "grad_norm": 0.5401323513292839, + "learning_rate": 2.7286824392124606e-06, + "loss": 0.2767, + "step": 20780 + }, + { + "epoch": 0.9734857357005668, + "grad_norm": 0.5799142701909185, + "learning_rate": 2.728493583502359e-06, + "loss": 0.3026, + "step": 20781 + }, + { + "epoch": 0.9735325806904952, + "grad_norm": 0.6066458720928207, + "learning_rate": 2.728304726477334e-06, + "loss": 0.306, + "step": 20782 + }, + { + "epoch": 0.9735794256804234, + "grad_norm": 0.6028731483885621, + "learning_rate": 2.7281158681384727e-06, + "loss": 0.3156, + "step": 20783 + }, + { + "epoch": 0.9736262706703518, + "grad_norm": 0.5992398831912656, + "learning_rate": 2.7279270084868626e-06, + "loss": 0.2938, + "step": 20784 + }, + { + "epoch": 0.9736731156602801, + "grad_norm": 0.6101703368508449, + "learning_rate": 2.7277381475235885e-06, + "loss": 0.3221, + "step": 20785 + }, + { + "epoch": 0.9737199606502085, + "grad_norm": 0.55532707369166, + "learning_rate": 2.7275492852497397e-06, + "loss": 0.3092, + "step": 20786 + }, + { + "epoch": 0.9737668056401367, + "grad_norm": 0.5467263255761293, + "learning_rate": 2.7273604216664026e-06, + "loss": 0.2944, + "step": 20787 + }, + { + "epoch": 0.9738136506300651, + "grad_norm": 0.6111185819860128, + "learning_rate": 2.7271715567746617e-06, + "loss": 0.3274, + "step": 20788 + }, + { + "epoch": 0.9738604956199934, + "grad_norm": 0.6323999892878538, + "learning_rate": 2.7269826905756067e-06, + "loss": 0.3404, + "step": 20789 + }, + { + "epoch": 0.9739073406099218, + "grad_norm": 0.6048967328470304, + "learning_rate": 2.7267938230703232e-06, + "loss": 0.3158, + "step": 20790 + }, + { + "epoch": 0.9739541855998501, + "grad_norm": 0.6379237509624035, + "learning_rate": 2.7266049542598975e-06, + "loss": 0.3104, + "step": 20791 + }, + { + "epoch": 0.9740010305897784, + "grad_norm": 0.5888267008255602, + "learning_rate": 2.726416084145418e-06, + "loss": 0.3128, + "step": 20792 + }, + { + "epoch": 0.9740478755797067, + "grad_norm": 0.555049666123393, + "learning_rate": 2.7262272127279713e-06, + "loss": 0.3171, + "step": 20793 + }, + { + "epoch": 0.9740947205696351, + "grad_norm": 0.5709193846632791, + "learning_rate": 2.726038340008643e-06, + "loss": 0.3112, + "step": 20794 + }, + { + "epoch": 0.9741415655595634, + "grad_norm": 0.5636982197502756, + "learning_rate": 2.725849465988521e-06, + "loss": 0.2842, + "step": 20795 + }, + { + "epoch": 0.9741884105494917, + "grad_norm": 0.5827262543173278, + "learning_rate": 2.7256605906686923e-06, + "loss": 0.3331, + "step": 20796 + }, + { + "epoch": 0.97423525553942, + "grad_norm": 0.5852947050126138, + "learning_rate": 2.7254717140502435e-06, + "loss": 0.2992, + "step": 20797 + }, + { + "epoch": 0.9742821005293484, + "grad_norm": 0.6127100239442709, + "learning_rate": 2.7252828361342615e-06, + "loss": 0.3379, + "step": 20798 + }, + { + "epoch": 0.9743289455192767, + "grad_norm": 0.6335474052932003, + "learning_rate": 2.7250939569218336e-06, + "loss": 0.3251, + "step": 20799 + }, + { + "epoch": 0.9743757905092051, + "grad_norm": 0.6176861191514972, + "learning_rate": 2.7249050764140472e-06, + "loss": 0.3213, + "step": 20800 + }, + { + "epoch": 0.9744226354991333, + "grad_norm": 0.5573102915139913, + "learning_rate": 2.7247161946119873e-06, + "loss": 0.3219, + "step": 20801 + }, + { + "epoch": 0.9744694804890617, + "grad_norm": 0.5908113944810547, + "learning_rate": 2.724527311516743e-06, + "loss": 0.3149, + "step": 20802 + }, + { + "epoch": 0.97451632547899, + "grad_norm": 0.6388240273474796, + "learning_rate": 2.7243384271294004e-06, + "loss": 0.3094, + "step": 20803 + }, + { + "epoch": 0.9745631704689184, + "grad_norm": 0.6045644451533424, + "learning_rate": 2.7241495414510462e-06, + "loss": 0.3291, + "step": 20804 + }, + { + "epoch": 0.9746100154588466, + "grad_norm": 0.6099175055778154, + "learning_rate": 2.7239606544827684e-06, + "loss": 0.3242, + "step": 20805 + }, + { + "epoch": 0.974656860448775, + "grad_norm": 0.5918690864503329, + "learning_rate": 2.7237717662256525e-06, + "loss": 0.3223, + "step": 20806 + }, + { + "epoch": 0.9747037054387033, + "grad_norm": 0.603584087345641, + "learning_rate": 2.723582876680787e-06, + "loss": 0.3159, + "step": 20807 + }, + { + "epoch": 0.9747505504286317, + "grad_norm": 0.5697982909080223, + "learning_rate": 2.7233939858492582e-06, + "loss": 0.3102, + "step": 20808 + }, + { + "epoch": 0.97479739541856, + "grad_norm": 0.6767518859292452, + "learning_rate": 2.7232050937321526e-06, + "loss": 0.3305, + "step": 20809 + }, + { + "epoch": 0.9748442404084883, + "grad_norm": 0.6106281071310724, + "learning_rate": 2.7230162003305583e-06, + "loss": 0.3212, + "step": 20810 + }, + { + "epoch": 0.9748910853984166, + "grad_norm": 0.6574931344766276, + "learning_rate": 2.7228273056455618e-06, + "loss": 0.3435, + "step": 20811 + }, + { + "epoch": 0.974937930388345, + "grad_norm": 0.5768621431199497, + "learning_rate": 2.7226384096782492e-06, + "loss": 0.3063, + "step": 20812 + }, + { + "epoch": 0.9749847753782733, + "grad_norm": 0.5994316044440795, + "learning_rate": 2.722449512429709e-06, + "loss": 0.3155, + "step": 20813 + }, + { + "epoch": 0.9750316203682016, + "grad_norm": 0.6361207696335947, + "learning_rate": 2.7222606139010287e-06, + "loss": 0.3157, + "step": 20814 + }, + { + "epoch": 0.9750784653581299, + "grad_norm": 0.6034393506669947, + "learning_rate": 2.722071714093294e-06, + "loss": 0.3285, + "step": 20815 + }, + { + "epoch": 0.9751253103480583, + "grad_norm": 0.5411050070881772, + "learning_rate": 2.7218828130075915e-06, + "loss": 0.3047, + "step": 20816 + }, + { + "epoch": 0.9751721553379866, + "grad_norm": 0.6257135285796117, + "learning_rate": 2.7216939106450104e-06, + "loss": 0.3453, + "step": 20817 + }, + { + "epoch": 0.975219000327915, + "grad_norm": 0.6213035093156689, + "learning_rate": 2.7215050070066363e-06, + "loss": 0.3039, + "step": 20818 + }, + { + "epoch": 0.9752658453178432, + "grad_norm": 0.5757219924903293, + "learning_rate": 2.721316102093556e-06, + "loss": 0.3084, + "step": 20819 + }, + { + "epoch": 0.9753126903077716, + "grad_norm": 0.6166572951456746, + "learning_rate": 2.7211271959068574e-06, + "loss": 0.3026, + "step": 20820 + }, + { + "epoch": 0.9753595352976999, + "grad_norm": 0.623811222259514, + "learning_rate": 2.7209382884476277e-06, + "loss": 0.3144, + "step": 20821 + }, + { + "epoch": 0.9754063802876283, + "grad_norm": 0.5754462046818337, + "learning_rate": 2.7207493797169525e-06, + "loss": 0.3251, + "step": 20822 + }, + { + "epoch": 0.9754532252775565, + "grad_norm": 0.6088874957360474, + "learning_rate": 2.7205604697159215e-06, + "loss": 0.3151, + "step": 20823 + }, + { + "epoch": 0.9755000702674849, + "grad_norm": 0.5488356265529648, + "learning_rate": 2.7203715584456196e-06, + "loss": 0.3084, + "step": 20824 + }, + { + "epoch": 0.9755469152574132, + "grad_norm": 0.5969541036657772, + "learning_rate": 2.7201826459071353e-06, + "loss": 0.3201, + "step": 20825 + }, + { + "epoch": 0.9755937602473416, + "grad_norm": 0.6340908220198767, + "learning_rate": 2.719993732101555e-06, + "loss": 0.3281, + "step": 20826 + }, + { + "epoch": 0.9756406052372699, + "grad_norm": 0.585926054482069, + "learning_rate": 2.719804817029966e-06, + "loss": 0.3036, + "step": 20827 + }, + { + "epoch": 0.9756874502271982, + "grad_norm": 0.6133298220720815, + "learning_rate": 2.719615900693456e-06, + "loss": 0.3219, + "step": 20828 + }, + { + "epoch": 0.9757342952171265, + "grad_norm": 0.5763402416628665, + "learning_rate": 2.719426983093111e-06, + "loss": 0.3129, + "step": 20829 + }, + { + "epoch": 0.9757811402070549, + "grad_norm": 0.6255155416336033, + "learning_rate": 2.719238064230019e-06, + "loss": 0.3302, + "step": 20830 + }, + { + "epoch": 0.9758279851969832, + "grad_norm": 0.601333703561789, + "learning_rate": 2.7190491441052672e-06, + "loss": 0.3167, + "step": 20831 + }, + { + "epoch": 0.9758748301869115, + "grad_norm": 0.6372450049244336, + "learning_rate": 2.7188602227199427e-06, + "loss": 0.3303, + "step": 20832 + }, + { + "epoch": 0.9759216751768398, + "grad_norm": 0.5587248539813785, + "learning_rate": 2.7186713000751327e-06, + "loss": 0.2978, + "step": 20833 + }, + { + "epoch": 0.9759685201667682, + "grad_norm": 0.6498162495449875, + "learning_rate": 2.718482376171924e-06, + "loss": 0.3372, + "step": 20834 + }, + { + "epoch": 0.9760153651566965, + "grad_norm": 0.598360220035277, + "learning_rate": 2.7182934510114044e-06, + "loss": 0.3331, + "step": 20835 + }, + { + "epoch": 0.9760622101466249, + "grad_norm": 0.5792945816523616, + "learning_rate": 2.718104524594661e-06, + "loss": 0.31, + "step": 20836 + }, + { + "epoch": 0.9761090551365531, + "grad_norm": 0.5625372369058307, + "learning_rate": 2.7179155969227807e-06, + "loss": 0.3099, + "step": 20837 + }, + { + "epoch": 0.9761559001264815, + "grad_norm": 0.645378732350229, + "learning_rate": 2.7177266679968507e-06, + "loss": 0.3562, + "step": 20838 + }, + { + "epoch": 0.9762027451164098, + "grad_norm": 0.6309809978032306, + "learning_rate": 2.717537737817959e-06, + "loss": 0.3196, + "step": 20839 + }, + { + "epoch": 0.9762495901063382, + "grad_norm": 0.5653232914788267, + "learning_rate": 2.717348806387191e-06, + "loss": 0.3252, + "step": 20840 + }, + { + "epoch": 0.9762964350962664, + "grad_norm": 0.6199378311974948, + "learning_rate": 2.717159873705636e-06, + "loss": 0.3367, + "step": 20841 + }, + { + "epoch": 0.9763432800861948, + "grad_norm": 0.5870296484625972, + "learning_rate": 2.7169709397743814e-06, + "loss": 0.3218, + "step": 20842 + }, + { + "epoch": 0.9763901250761231, + "grad_norm": 0.5908998893922962, + "learning_rate": 2.7167820045945124e-06, + "loss": 0.3101, + "step": 20843 + }, + { + "epoch": 0.9764369700660515, + "grad_norm": 0.5301736768106331, + "learning_rate": 2.7165930681671177e-06, + "loss": 0.3094, + "step": 20844 + }, + { + "epoch": 0.9764838150559798, + "grad_norm": 0.5737274737439911, + "learning_rate": 2.7164041304932848e-06, + "loss": 0.3171, + "step": 20845 + }, + { + "epoch": 0.976530660045908, + "grad_norm": 0.567815274168336, + "learning_rate": 2.7162151915741004e-06, + "loss": 0.3206, + "step": 20846 + }, + { + "epoch": 0.9765775050358364, + "grad_norm": 0.5809997437386174, + "learning_rate": 2.7160262514106517e-06, + "loss": 0.3043, + "step": 20847 + }, + { + "epoch": 0.9766243500257648, + "grad_norm": 0.6088928987767025, + "learning_rate": 2.7158373100040263e-06, + "loss": 0.3199, + "step": 20848 + }, + { + "epoch": 0.9766711950156931, + "grad_norm": 0.5631489590788278, + "learning_rate": 2.715648367355312e-06, + "loss": 0.3009, + "step": 20849 + }, + { + "epoch": 0.9767180400056213, + "grad_norm": 0.5594836029326989, + "learning_rate": 2.715459423465594e-06, + "loss": 0.3038, + "step": 20850 + }, + { + "epoch": 0.9767648849955497, + "grad_norm": 0.5641504349292522, + "learning_rate": 2.7152704783359624e-06, + "loss": 0.3237, + "step": 20851 + }, + { + "epoch": 0.976811729985478, + "grad_norm": 0.602221826050301, + "learning_rate": 2.715081531967503e-06, + "loss": 0.3102, + "step": 20852 + }, + { + "epoch": 0.9768585749754064, + "grad_norm": 0.6205122849562414, + "learning_rate": 2.714892584361304e-06, + "loss": 0.3181, + "step": 20853 + }, + { + "epoch": 0.9769054199653348, + "grad_norm": 0.6537463811199962, + "learning_rate": 2.7147036355184513e-06, + "loss": 0.3389, + "step": 20854 + }, + { + "epoch": 0.976952264955263, + "grad_norm": 0.6170354424904951, + "learning_rate": 2.7145146854400334e-06, + "loss": 0.3123, + "step": 20855 + }, + { + "epoch": 0.9769991099451913, + "grad_norm": 0.5985109690779733, + "learning_rate": 2.714325734127138e-06, + "loss": 0.3286, + "step": 20856 + }, + { + "epoch": 0.9770459549351197, + "grad_norm": 0.5935218162710398, + "learning_rate": 2.714136781580851e-06, + "loss": 0.3136, + "step": 20857 + }, + { + "epoch": 0.977092799925048, + "grad_norm": 0.5990466529250674, + "learning_rate": 2.713947827802261e-06, + "loss": 0.3242, + "step": 20858 + }, + { + "epoch": 0.9771396449149763, + "grad_norm": 0.5879430143635315, + "learning_rate": 2.7137588727924553e-06, + "loss": 0.2897, + "step": 20859 + }, + { + "epoch": 0.9771864899049046, + "grad_norm": 0.5878071455247881, + "learning_rate": 2.7135699165525207e-06, + "loss": 0.3023, + "step": 20860 + }, + { + "epoch": 0.977233334894833, + "grad_norm": 0.5173428959553682, + "learning_rate": 2.7133809590835448e-06, + "loss": 0.2868, + "step": 20861 + }, + { + "epoch": 0.9772801798847613, + "grad_norm": 0.5897877724931897, + "learning_rate": 2.7131920003866145e-06, + "loss": 0.3167, + "step": 20862 + }, + { + "epoch": 0.9773270248746897, + "grad_norm": 0.598830966789461, + "learning_rate": 2.7130030404628198e-06, + "loss": 0.308, + "step": 20863 + }, + { + "epoch": 0.9773738698646179, + "grad_norm": 0.5841525878414542, + "learning_rate": 2.7128140793132447e-06, + "loss": 0.3107, + "step": 20864 + }, + { + "epoch": 0.9774207148545463, + "grad_norm": 0.6796359922920169, + "learning_rate": 2.7126251169389777e-06, + "loss": 0.3424, + "step": 20865 + }, + { + "epoch": 0.9774675598444746, + "grad_norm": 0.6339988469282467, + "learning_rate": 2.7124361533411076e-06, + "loss": 0.311, + "step": 20866 + }, + { + "epoch": 0.977514404834403, + "grad_norm": 0.6005985433118096, + "learning_rate": 2.7122471885207204e-06, + "loss": 0.3079, + "step": 20867 + }, + { + "epoch": 0.9775612498243312, + "grad_norm": 0.5993242309326425, + "learning_rate": 2.7120582224789036e-06, + "loss": 0.3185, + "step": 20868 + }, + { + "epoch": 0.9776080948142596, + "grad_norm": 0.6196522104248978, + "learning_rate": 2.7118692552167458e-06, + "loss": 0.3164, + "step": 20869 + }, + { + "epoch": 0.9776549398041879, + "grad_norm": 0.5667505833687555, + "learning_rate": 2.7116802867353335e-06, + "loss": 0.3139, + "step": 20870 + }, + { + "epoch": 0.9777017847941163, + "grad_norm": 0.6722264752140886, + "learning_rate": 2.7114913170357536e-06, + "loss": 0.3464, + "step": 20871 + }, + { + "epoch": 0.9777486297840446, + "grad_norm": 0.5776533179609746, + "learning_rate": 2.711302346119095e-06, + "loss": 0.3015, + "step": 20872 + }, + { + "epoch": 0.9777954747739729, + "grad_norm": 0.6116692262163077, + "learning_rate": 2.7111133739864447e-06, + "loss": 0.3076, + "step": 20873 + }, + { + "epoch": 0.9778423197639012, + "grad_norm": 0.5732596754050314, + "learning_rate": 2.7109244006388896e-06, + "loss": 0.2973, + "step": 20874 + }, + { + "epoch": 0.9778891647538296, + "grad_norm": 0.5980539071313401, + "learning_rate": 2.710735426077518e-06, + "loss": 0.3264, + "step": 20875 + }, + { + "epoch": 0.9779360097437579, + "grad_norm": 0.589534411981248, + "learning_rate": 2.7105464503034164e-06, + "loss": 0.3185, + "step": 20876 + }, + { + "epoch": 0.9779828547336862, + "grad_norm": 0.6249019897975129, + "learning_rate": 2.710357473317673e-06, + "loss": 0.3385, + "step": 20877 + }, + { + "epoch": 0.9780296997236145, + "grad_norm": 0.5722116036203337, + "learning_rate": 2.7101684951213757e-06, + "loss": 0.2988, + "step": 20878 + }, + { + "epoch": 0.9780765447135429, + "grad_norm": 0.5886993212341946, + "learning_rate": 2.709979515715611e-06, + "loss": 0.3241, + "step": 20879 + }, + { + "epoch": 0.9781233897034712, + "grad_norm": 0.5350188311907375, + "learning_rate": 2.7097905351014673e-06, + "loss": 0.2916, + "step": 20880 + }, + { + "epoch": 0.9781702346933996, + "grad_norm": 0.6963472218905209, + "learning_rate": 2.709601553280032e-06, + "loss": 0.3379, + "step": 20881 + }, + { + "epoch": 0.9782170796833278, + "grad_norm": 0.6421205699545739, + "learning_rate": 2.7094125702523916e-06, + "loss": 0.3274, + "step": 20882 + }, + { + "epoch": 0.9782639246732562, + "grad_norm": 0.6122437805435152, + "learning_rate": 2.7092235860196347e-06, + "loss": 0.3199, + "step": 20883 + }, + { + "epoch": 0.9783107696631845, + "grad_norm": 0.6175707953242624, + "learning_rate": 2.70903460058285e-06, + "loss": 0.3152, + "step": 20884 + }, + { + "epoch": 0.9783576146531129, + "grad_norm": 0.5835598590270028, + "learning_rate": 2.7088456139431225e-06, + "loss": 0.3122, + "step": 20885 + }, + { + "epoch": 0.9784044596430411, + "grad_norm": 0.5640611266403186, + "learning_rate": 2.7086566261015406e-06, + "loss": 0.2976, + "step": 20886 + }, + { + "epoch": 0.9784513046329695, + "grad_norm": 0.62489046697028, + "learning_rate": 2.708467637059193e-06, + "loss": 0.3075, + "step": 20887 + }, + { + "epoch": 0.9784981496228978, + "grad_norm": 0.593206751886542, + "learning_rate": 2.7082786468171664e-06, + "loss": 0.3206, + "step": 20888 + }, + { + "epoch": 0.9785449946128262, + "grad_norm": 0.6720976978620196, + "learning_rate": 2.7080896553765483e-06, + "loss": 0.3149, + "step": 20889 + }, + { + "epoch": 0.9785918396027545, + "grad_norm": 0.5972043917477086, + "learning_rate": 2.707900662738427e-06, + "loss": 0.3092, + "step": 20890 + }, + { + "epoch": 0.9786386845926828, + "grad_norm": 0.6384451691780462, + "learning_rate": 2.7077116689038895e-06, + "loss": 0.3343, + "step": 20891 + }, + { + "epoch": 0.9786855295826111, + "grad_norm": 0.5639207883915469, + "learning_rate": 2.7075226738740227e-06, + "loss": 0.3153, + "step": 20892 + }, + { + "epoch": 0.9787323745725395, + "grad_norm": 0.5695341971121811, + "learning_rate": 2.7073336776499165e-06, + "loss": 0.321, + "step": 20893 + }, + { + "epoch": 0.9787792195624678, + "grad_norm": 0.5611584850488524, + "learning_rate": 2.7071446802326564e-06, + "loss": 0.3032, + "step": 20894 + }, + { + "epoch": 0.9788260645523961, + "grad_norm": 0.5761069835052913, + "learning_rate": 2.7069556816233304e-06, + "loss": 0.3267, + "step": 20895 + }, + { + "epoch": 0.9788729095423244, + "grad_norm": 0.5737837335301429, + "learning_rate": 2.7067666818230266e-06, + "loss": 0.306, + "step": 20896 + }, + { + "epoch": 0.9789197545322528, + "grad_norm": 0.5471323613854584, + "learning_rate": 2.7065776808328333e-06, + "loss": 0.2951, + "step": 20897 + }, + { + "epoch": 0.9789665995221811, + "grad_norm": 0.6436433573099254, + "learning_rate": 2.706388678653837e-06, + "loss": 0.3237, + "step": 20898 + }, + { + "epoch": 0.9790134445121095, + "grad_norm": 0.6155246034199051, + "learning_rate": 2.7061996752871245e-06, + "loss": 0.3184, + "step": 20899 + }, + { + "epoch": 0.9790602895020377, + "grad_norm": 0.5819672179054874, + "learning_rate": 2.7060106707337863e-06, + "loss": 0.3177, + "step": 20900 + }, + { + "epoch": 0.9791071344919661, + "grad_norm": 0.5962744918563413, + "learning_rate": 2.7058216649949078e-06, + "loss": 0.3213, + "step": 20901 + }, + { + "epoch": 0.9791539794818944, + "grad_norm": 0.6154776545083475, + "learning_rate": 2.705632658071577e-06, + "loss": 0.3122, + "step": 20902 + }, + { + "epoch": 0.9792008244718228, + "grad_norm": 0.5771173651840378, + "learning_rate": 2.705443649964883e-06, + "loss": 0.2907, + "step": 20903 + }, + { + "epoch": 0.979247669461751, + "grad_norm": 0.5981172354132397, + "learning_rate": 2.705254640675911e-06, + "loss": 0.3187, + "step": 20904 + }, + { + "epoch": 0.9792945144516794, + "grad_norm": 0.5142420783998927, + "learning_rate": 2.7050656302057517e-06, + "loss": 0.2999, + "step": 20905 + }, + { + "epoch": 0.9793413594416077, + "grad_norm": 0.5597674615298818, + "learning_rate": 2.7048766185554904e-06, + "loss": 0.3052, + "step": 20906 + }, + { + "epoch": 0.9793882044315361, + "grad_norm": 0.5645006847786705, + "learning_rate": 2.704687605726215e-06, + "loss": 0.3008, + "step": 20907 + }, + { + "epoch": 0.9794350494214644, + "grad_norm": 0.6129861534047742, + "learning_rate": 2.704498591719015e-06, + "loss": 0.324, + "step": 20908 + }, + { + "epoch": 0.9794818944113927, + "grad_norm": 0.5741078377284975, + "learning_rate": 2.704309576534977e-06, + "loss": 0.2929, + "step": 20909 + }, + { + "epoch": 0.979528739401321, + "grad_norm": 0.5897422702072488, + "learning_rate": 2.704120560175188e-06, + "loss": 0.306, + "step": 20910 + }, + { + "epoch": 0.9795755843912494, + "grad_norm": 0.6311728697928384, + "learning_rate": 2.7039315426407363e-06, + "loss": 0.3422, + "step": 20911 + }, + { + "epoch": 0.9796224293811777, + "grad_norm": 0.6043333562267698, + "learning_rate": 2.703742523932711e-06, + "loss": 0.3262, + "step": 20912 + }, + { + "epoch": 0.979669274371106, + "grad_norm": 0.560488938454853, + "learning_rate": 2.7035535040521977e-06, + "loss": 0.3057, + "step": 20913 + }, + { + "epoch": 0.9797161193610343, + "grad_norm": 0.6579074661321356, + "learning_rate": 2.7033644830002853e-06, + "loss": 0.321, + "step": 20914 + }, + { + "epoch": 0.9797629643509627, + "grad_norm": 0.579321152664734, + "learning_rate": 2.7031754607780617e-06, + "loss": 0.3111, + "step": 20915 + }, + { + "epoch": 0.979809809340891, + "grad_norm": 0.5776586183752619, + "learning_rate": 2.7029864373866142e-06, + "loss": 0.3193, + "step": 20916 + }, + { + "epoch": 0.9798566543308194, + "grad_norm": 0.612663465446753, + "learning_rate": 2.702797412827031e-06, + "loss": 0.312, + "step": 20917 + }, + { + "epoch": 0.9799034993207476, + "grad_norm": 0.5982027871519044, + "learning_rate": 2.7026083871003995e-06, + "loss": 0.3133, + "step": 20918 + }, + { + "epoch": 0.979950344310676, + "grad_norm": 0.5793751269264449, + "learning_rate": 2.702419360207808e-06, + "loss": 0.2983, + "step": 20919 + }, + { + "epoch": 0.9799971893006043, + "grad_norm": 0.6030218750259247, + "learning_rate": 2.702230332150343e-06, + "loss": 0.3086, + "step": 20920 + }, + { + "epoch": 0.9800440342905327, + "grad_norm": 0.623568175722694, + "learning_rate": 2.702041302929094e-06, + "loss": 0.3159, + "step": 20921 + }, + { + "epoch": 0.9800908792804609, + "grad_norm": 0.5680268950890422, + "learning_rate": 2.7018522725451483e-06, + "loss": 0.3233, + "step": 20922 + }, + { + "epoch": 0.9801377242703893, + "grad_norm": 0.6974010239442582, + "learning_rate": 2.701663240999593e-06, + "loss": 0.3124, + "step": 20923 + }, + { + "epoch": 0.9801845692603176, + "grad_norm": 0.6364087872455131, + "learning_rate": 2.7014742082935167e-06, + "loss": 0.3274, + "step": 20924 + }, + { + "epoch": 0.980231414250246, + "grad_norm": 0.5910250787721433, + "learning_rate": 2.7012851744280067e-06, + "loss": 0.321, + "step": 20925 + }, + { + "epoch": 0.9802782592401743, + "grad_norm": 0.5750738211474261, + "learning_rate": 2.701096139404151e-06, + "loss": 0.3047, + "step": 20926 + }, + { + "epoch": 0.9803251042301026, + "grad_norm": 0.5509333775305658, + "learning_rate": 2.7009071032230385e-06, + "loss": 0.2884, + "step": 20927 + }, + { + "epoch": 0.9803719492200309, + "grad_norm": 0.6333142045446782, + "learning_rate": 2.700718065885755e-06, + "loss": 0.3295, + "step": 20928 + }, + { + "epoch": 0.9804187942099593, + "grad_norm": 0.6140964689169179, + "learning_rate": 2.7005290273933902e-06, + "loss": 0.3185, + "step": 20929 + }, + { + "epoch": 0.9804656391998876, + "grad_norm": 0.6355489458530635, + "learning_rate": 2.700339987747031e-06, + "loss": 0.3105, + "step": 20930 + }, + { + "epoch": 0.9805124841898158, + "grad_norm": 0.5722597371628388, + "learning_rate": 2.7001509469477656e-06, + "loss": 0.3119, + "step": 20931 + }, + { + "epoch": 0.9805593291797442, + "grad_norm": 0.5921500330742941, + "learning_rate": 2.6999619049966815e-06, + "loss": 0.3241, + "step": 20932 + }, + { + "epoch": 0.9806061741696726, + "grad_norm": 0.6144993219421607, + "learning_rate": 2.6997728618948675e-06, + "loss": 0.311, + "step": 20933 + }, + { + "epoch": 0.9806530191596009, + "grad_norm": 0.5704716875927394, + "learning_rate": 2.6995838176434105e-06, + "loss": 0.3083, + "step": 20934 + }, + { + "epoch": 0.9806998641495293, + "grad_norm": 0.6012201936323656, + "learning_rate": 2.6993947722433983e-06, + "loss": 0.3371, + "step": 20935 + }, + { + "epoch": 0.9807467091394575, + "grad_norm": 0.5877410610668053, + "learning_rate": 2.6992057256959196e-06, + "loss": 0.3277, + "step": 20936 + }, + { + "epoch": 0.9807935541293858, + "grad_norm": 0.6016591925413439, + "learning_rate": 2.6990166780020626e-06, + "loss": 0.3324, + "step": 20937 + }, + { + "epoch": 0.9808403991193142, + "grad_norm": 0.598868938683012, + "learning_rate": 2.698827629162914e-06, + "loss": 0.3121, + "step": 20938 + }, + { + "epoch": 0.9808872441092426, + "grad_norm": 0.6121547083762491, + "learning_rate": 2.6986385791795627e-06, + "loss": 0.3305, + "step": 20939 + }, + { + "epoch": 0.9809340890991708, + "grad_norm": 0.5433947558080975, + "learning_rate": 2.6984495280530966e-06, + "loss": 0.3169, + "step": 20940 + }, + { + "epoch": 0.9809809340890991, + "grad_norm": 0.5777965829068158, + "learning_rate": 2.6982604757846028e-06, + "loss": 0.3075, + "step": 20941 + }, + { + "epoch": 0.9810277790790275, + "grad_norm": 0.579944987771698, + "learning_rate": 2.6980714223751703e-06, + "loss": 0.3151, + "step": 20942 + }, + { + "epoch": 0.9810746240689558, + "grad_norm": 0.6082827068425584, + "learning_rate": 2.697882367825886e-06, + "loss": 0.327, + "step": 20943 + }, + { + "epoch": 0.9811214690588842, + "grad_norm": 0.5994105411826349, + "learning_rate": 2.6976933121378384e-06, + "loss": 0.3319, + "step": 20944 + }, + { + "epoch": 0.9811683140488124, + "grad_norm": 0.5886034253336366, + "learning_rate": 2.6975042553121163e-06, + "loss": 0.3189, + "step": 20945 + }, + { + "epoch": 0.9812151590387408, + "grad_norm": 0.5284252502491297, + "learning_rate": 2.697315197349806e-06, + "loss": 0.2752, + "step": 20946 + }, + { + "epoch": 0.9812620040286691, + "grad_norm": 0.579053386879222, + "learning_rate": 2.6971261382519963e-06, + "loss": 0.3042, + "step": 20947 + }, + { + "epoch": 0.9813088490185975, + "grad_norm": 0.5766716200652672, + "learning_rate": 2.6969370780197757e-06, + "loss": 0.3151, + "step": 20948 + }, + { + "epoch": 0.9813556940085257, + "grad_norm": 0.5875421423926788, + "learning_rate": 2.6967480166542314e-06, + "loss": 0.3073, + "step": 20949 + }, + { + "epoch": 0.9814025389984541, + "grad_norm": 0.5932478759530716, + "learning_rate": 2.6965589541564518e-06, + "loss": 0.312, + "step": 20950 + }, + { + "epoch": 0.9814493839883824, + "grad_norm": 0.5544075311187445, + "learning_rate": 2.696369890527525e-06, + "loss": 0.3074, + "step": 20951 + }, + { + "epoch": 0.9814962289783108, + "grad_norm": 0.6568815832729848, + "learning_rate": 2.6961808257685383e-06, + "loss": 0.3257, + "step": 20952 + }, + { + "epoch": 0.9815430739682391, + "grad_norm": 0.5779022104010002, + "learning_rate": 2.6959917598805805e-06, + "loss": 0.3057, + "step": 20953 + }, + { + "epoch": 0.9815899189581674, + "grad_norm": 0.6011119990602894, + "learning_rate": 2.6958026928647403e-06, + "loss": 0.3157, + "step": 20954 + }, + { + "epoch": 0.9816367639480957, + "grad_norm": 0.6288770766681144, + "learning_rate": 2.6956136247221036e-06, + "loss": 0.328, + "step": 20955 + }, + { + "epoch": 0.9816836089380241, + "grad_norm": 0.587396961569824, + "learning_rate": 2.69542455545376e-06, + "loss": 0.3288, + "step": 20956 + }, + { + "epoch": 0.9817304539279524, + "grad_norm": 0.6032311894213861, + "learning_rate": 2.6952354850607974e-06, + "loss": 0.3015, + "step": 20957 + }, + { + "epoch": 0.9817772989178807, + "grad_norm": 0.5755948300060105, + "learning_rate": 2.695046413544304e-06, + "loss": 0.3174, + "step": 20958 + }, + { + "epoch": 0.981824143907809, + "grad_norm": 0.5504238087738633, + "learning_rate": 2.6948573409053665e-06, + "loss": 0.3271, + "step": 20959 + }, + { + "epoch": 0.9818709888977374, + "grad_norm": 0.6268271443131052, + "learning_rate": 2.6946682671450747e-06, + "loss": 0.3052, + "step": 20960 + }, + { + "epoch": 0.9819178338876657, + "grad_norm": 0.5700102392964504, + "learning_rate": 2.694479192264516e-06, + "loss": 0.2961, + "step": 20961 + }, + { + "epoch": 0.9819646788775941, + "grad_norm": 0.61360345797036, + "learning_rate": 2.6942901162647778e-06, + "loss": 0.3193, + "step": 20962 + }, + { + "epoch": 0.9820115238675223, + "grad_norm": 0.5887808938341973, + "learning_rate": 2.6941010391469494e-06, + "loss": 0.2982, + "step": 20963 + }, + { + "epoch": 0.9820583688574507, + "grad_norm": 0.6169413535352554, + "learning_rate": 2.6939119609121176e-06, + "loss": 0.3218, + "step": 20964 + }, + { + "epoch": 0.982105213847379, + "grad_norm": 0.5323654777854434, + "learning_rate": 2.6937228815613724e-06, + "loss": 0.3143, + "step": 20965 + }, + { + "epoch": 0.9821520588373074, + "grad_norm": 0.6486122331393135, + "learning_rate": 2.6935338010957997e-06, + "loss": 0.3294, + "step": 20966 + }, + { + "epoch": 0.9821989038272356, + "grad_norm": 0.5924753921213886, + "learning_rate": 2.693344719516489e-06, + "loss": 0.3055, + "step": 20967 + }, + { + "epoch": 0.982245748817164, + "grad_norm": 0.6185088838407592, + "learning_rate": 2.693155636824528e-06, + "loss": 0.2987, + "step": 20968 + }, + { + "epoch": 0.9822925938070923, + "grad_norm": 0.6666458462878316, + "learning_rate": 2.6929665530210047e-06, + "loss": 0.326, + "step": 20969 + }, + { + "epoch": 0.9823394387970207, + "grad_norm": 0.5822322178717172, + "learning_rate": 2.692777468107008e-06, + "loss": 0.3177, + "step": 20970 + }, + { + "epoch": 0.982386283786949, + "grad_norm": 0.5728179211414834, + "learning_rate": 2.6925883820836253e-06, + "loss": 0.3028, + "step": 20971 + }, + { + "epoch": 0.9824331287768773, + "grad_norm": 0.5655146030338977, + "learning_rate": 2.692399294951944e-06, + "loss": 0.2959, + "step": 20972 + }, + { + "epoch": 0.9824799737668056, + "grad_norm": 0.6101463664659523, + "learning_rate": 2.692210206713054e-06, + "loss": 0.3162, + "step": 20973 + }, + { + "epoch": 0.982526818756734, + "grad_norm": 0.5832125220168038, + "learning_rate": 2.692021117368042e-06, + "loss": 0.3222, + "step": 20974 + }, + { + "epoch": 0.9825736637466623, + "grad_norm": 0.5711176032316649, + "learning_rate": 2.691832026917997e-06, + "loss": 0.3192, + "step": 20975 + }, + { + "epoch": 0.9826205087365906, + "grad_norm": 0.6172249313742593, + "learning_rate": 2.691642935364007e-06, + "loss": 0.3242, + "step": 20976 + }, + { + "epoch": 0.9826673537265189, + "grad_norm": 0.5837232691051338, + "learning_rate": 2.6914538427071595e-06, + "loss": 0.3114, + "step": 20977 + }, + { + "epoch": 0.9827141987164473, + "grad_norm": 0.5739883299927645, + "learning_rate": 2.691264748948544e-06, + "loss": 0.2988, + "step": 20978 + }, + { + "epoch": 0.9827610437063756, + "grad_norm": 0.5630196919604396, + "learning_rate": 2.691075654089248e-06, + "loss": 0.3047, + "step": 20979 + }, + { + "epoch": 0.982807888696304, + "grad_norm": 0.5689047766133555, + "learning_rate": 2.690886558130359e-06, + "loss": 0.3003, + "step": 20980 + }, + { + "epoch": 0.9828547336862322, + "grad_norm": 0.5801103491905264, + "learning_rate": 2.690697461072966e-06, + "loss": 0.3133, + "step": 20981 + }, + { + "epoch": 0.9829015786761606, + "grad_norm": 0.5672637027441398, + "learning_rate": 2.690508362918157e-06, + "loss": 0.3183, + "step": 20982 + }, + { + "epoch": 0.9829484236660889, + "grad_norm": 0.597104198812939, + "learning_rate": 2.6903192636670207e-06, + "loss": 0.3134, + "step": 20983 + }, + { + "epoch": 0.9829952686560173, + "grad_norm": 0.5523163332929429, + "learning_rate": 2.690130163320644e-06, + "loss": 0.3009, + "step": 20984 + }, + { + "epoch": 0.9830421136459455, + "grad_norm": 0.6250400062182484, + "learning_rate": 2.689941061880117e-06, + "loss": 0.3001, + "step": 20985 + }, + { + "epoch": 0.9830889586358739, + "grad_norm": 0.6455298287134722, + "learning_rate": 2.6897519593465267e-06, + "loss": 0.3247, + "step": 20986 + }, + { + "epoch": 0.9831358036258022, + "grad_norm": 0.6112509332376417, + "learning_rate": 2.6895628557209607e-06, + "loss": 0.2987, + "step": 20987 + }, + { + "epoch": 0.9831826486157306, + "grad_norm": 0.5766895883258111, + "learning_rate": 2.6893737510045094e-06, + "loss": 0.2988, + "step": 20988 + }, + { + "epoch": 0.9832294936056589, + "grad_norm": 0.6218021997981966, + "learning_rate": 2.6891846451982588e-06, + "loss": 0.3237, + "step": 20989 + }, + { + "epoch": 0.9832763385955872, + "grad_norm": 0.6421851582738, + "learning_rate": 2.6889955383032985e-06, + "loss": 0.3346, + "step": 20990 + }, + { + "epoch": 0.9833231835855155, + "grad_norm": 0.631879844551887, + "learning_rate": 2.688806430320716e-06, + "loss": 0.334, + "step": 20991 + }, + { + "epoch": 0.9833700285754439, + "grad_norm": 0.6327680187610304, + "learning_rate": 2.688617321251601e-06, + "loss": 0.3162, + "step": 20992 + }, + { + "epoch": 0.9834168735653722, + "grad_norm": 0.5885305147608094, + "learning_rate": 2.6884282110970394e-06, + "loss": 0.3131, + "step": 20993 + }, + { + "epoch": 0.9834637185553005, + "grad_norm": 0.6254337130206932, + "learning_rate": 2.688239099858122e-06, + "loss": 0.3398, + "step": 20994 + }, + { + "epoch": 0.9835105635452288, + "grad_norm": 0.6069339889229416, + "learning_rate": 2.688049987535935e-06, + "loss": 0.3311, + "step": 20995 + }, + { + "epoch": 0.9835574085351572, + "grad_norm": 0.5788939530481941, + "learning_rate": 2.6878608741315682e-06, + "loss": 0.3035, + "step": 20996 + }, + { + "epoch": 0.9836042535250855, + "grad_norm": 0.6411578816671517, + "learning_rate": 2.687671759646109e-06, + "loss": 0.3095, + "step": 20997 + }, + { + "epoch": 0.9836510985150139, + "grad_norm": 0.7007881758226676, + "learning_rate": 2.687482644080646e-06, + "loss": 0.3156, + "step": 20998 + }, + { + "epoch": 0.9836979435049421, + "grad_norm": 0.7398731697185391, + "learning_rate": 2.6872935274362673e-06, + "loss": 0.3428, + "step": 20999 + }, + { + "epoch": 0.9837447884948705, + "grad_norm": 0.5560701941840791, + "learning_rate": 2.6871044097140618e-06, + "loss": 0.289, + "step": 21000 + }, + { + "epoch": 0.9837916334847988, + "grad_norm": 0.6125249035454007, + "learning_rate": 2.686915290915117e-06, + "loss": 0.3074, + "step": 21001 + }, + { + "epoch": 0.9838384784747272, + "grad_norm": 0.5635127529031223, + "learning_rate": 2.6867261710405214e-06, + "loss": 0.3175, + "step": 21002 + }, + { + "epoch": 0.9838853234646554, + "grad_norm": 0.642818081975461, + "learning_rate": 2.686537050091365e-06, + "loss": 0.3204, + "step": 21003 + }, + { + "epoch": 0.9839321684545838, + "grad_norm": 0.5763002119633409, + "learning_rate": 2.686347928068733e-06, + "loss": 0.2932, + "step": 21004 + }, + { + "epoch": 0.9839790134445121, + "grad_norm": 0.6021929734315139, + "learning_rate": 2.6861588049737163e-06, + "loss": 0.3106, + "step": 21005 + }, + { + "epoch": 0.9840258584344405, + "grad_norm": 0.6199782565410443, + "learning_rate": 2.685969680807403e-06, + "loss": 0.3289, + "step": 21006 + }, + { + "epoch": 0.9840727034243688, + "grad_norm": 0.638747372147842, + "learning_rate": 2.685780555570881e-06, + "loss": 0.3152, + "step": 21007 + }, + { + "epoch": 0.984119548414297, + "grad_norm": 0.6326184185748585, + "learning_rate": 2.6855914292652372e-06, + "loss": 0.3262, + "step": 21008 + }, + { + "epoch": 0.9841663934042254, + "grad_norm": 0.6419982545063474, + "learning_rate": 2.6854023018915627e-06, + "loss": 0.3286, + "step": 21009 + }, + { + "epoch": 0.9842132383941538, + "grad_norm": 0.6132607196177721, + "learning_rate": 2.6852131734509446e-06, + "loss": 0.3318, + "step": 21010 + }, + { + "epoch": 0.9842600833840821, + "grad_norm": 0.5818783182310883, + "learning_rate": 2.6850240439444703e-06, + "loss": 0.3001, + "step": 21011 + }, + { + "epoch": 0.9843069283740103, + "grad_norm": 0.6146392563022097, + "learning_rate": 2.68483491337323e-06, + "loss": 0.3084, + "step": 21012 + }, + { + "epoch": 0.9843537733639387, + "grad_norm": 0.6184701358657505, + "learning_rate": 2.684645781738311e-06, + "loss": 0.3021, + "step": 21013 + }, + { + "epoch": 0.984400618353867, + "grad_norm": 0.7123156496511844, + "learning_rate": 2.6844566490408012e-06, + "loss": 0.3249, + "step": 21014 + }, + { + "epoch": 0.9844474633437954, + "grad_norm": 0.6054510757575654, + "learning_rate": 2.6842675152817904e-06, + "loss": 0.311, + "step": 21015 + }, + { + "epoch": 0.9844943083337238, + "grad_norm": 0.5715676489099517, + "learning_rate": 2.684078380462366e-06, + "loss": 0.3063, + "step": 21016 + }, + { + "epoch": 0.984541153323652, + "grad_norm": 0.5808376826375794, + "learning_rate": 2.6838892445836173e-06, + "loss": 0.2955, + "step": 21017 + }, + { + "epoch": 0.9845879983135803, + "grad_norm": 0.6232950212693182, + "learning_rate": 2.6837001076466325e-06, + "loss": 0.3159, + "step": 21018 + }, + { + "epoch": 0.9846348433035087, + "grad_norm": 0.5675017412846157, + "learning_rate": 2.6835109696524984e-06, + "loss": 0.309, + "step": 21019 + }, + { + "epoch": 0.984681688293437, + "grad_norm": 0.5990719302348039, + "learning_rate": 2.683321830602306e-06, + "loss": 0.2897, + "step": 21020 + }, + { + "epoch": 0.9847285332833653, + "grad_norm": 0.576103966194844, + "learning_rate": 2.6831326904971423e-06, + "loss": 0.311, + "step": 21021 + }, + { + "epoch": 0.9847753782732936, + "grad_norm": 0.5368081231855356, + "learning_rate": 2.682943549338096e-06, + "loss": 0.2872, + "step": 21022 + }, + { + "epoch": 0.984822223263222, + "grad_norm": 0.5619872413041115, + "learning_rate": 2.6827544071262548e-06, + "loss": 0.3005, + "step": 21023 + }, + { + "epoch": 0.9848690682531503, + "grad_norm": 0.5478057510659204, + "learning_rate": 2.6825652638627092e-06, + "loss": 0.2932, + "step": 21024 + }, + { + "epoch": 0.9849159132430787, + "grad_norm": 0.6466755065923817, + "learning_rate": 2.6823761195485455e-06, + "loss": 0.3009, + "step": 21025 + }, + { + "epoch": 0.9849627582330069, + "grad_norm": 0.5812448474233969, + "learning_rate": 2.6821869741848525e-06, + "loss": 0.2964, + "step": 21026 + }, + { + "epoch": 0.9850096032229353, + "grad_norm": 0.5747144508660674, + "learning_rate": 2.6819978277727204e-06, + "loss": 0.3133, + "step": 21027 + }, + { + "epoch": 0.9850564482128636, + "grad_norm": 0.5846324336585211, + "learning_rate": 2.6818086803132366e-06, + "loss": 0.3125, + "step": 21028 + }, + { + "epoch": 0.985103293202792, + "grad_norm": 0.5704628947029045, + "learning_rate": 2.681619531807489e-06, + "loss": 0.3087, + "step": 21029 + }, + { + "epoch": 0.9851501381927202, + "grad_norm": 0.5917626616819663, + "learning_rate": 2.681430382256567e-06, + "loss": 0.3184, + "step": 21030 + }, + { + "epoch": 0.9851969831826486, + "grad_norm": 0.5505943330705758, + "learning_rate": 2.681241231661559e-06, + "loss": 0.2957, + "step": 21031 + }, + { + "epoch": 0.9852438281725769, + "grad_norm": 0.5777788548689424, + "learning_rate": 2.6810520800235525e-06, + "loss": 0.3041, + "step": 21032 + }, + { + "epoch": 0.9852906731625053, + "grad_norm": 0.59889838801137, + "learning_rate": 2.6808629273436365e-06, + "loss": 0.3488, + "step": 21033 + }, + { + "epoch": 0.9853375181524336, + "grad_norm": 0.602190257628338, + "learning_rate": 2.6806737736229005e-06, + "loss": 0.326, + "step": 21034 + }, + { + "epoch": 0.9853843631423619, + "grad_norm": 0.5727176909403755, + "learning_rate": 2.6804846188624323e-06, + "loss": 0.3048, + "step": 21035 + }, + { + "epoch": 0.9854312081322902, + "grad_norm": 0.5442469482765416, + "learning_rate": 2.68029546306332e-06, + "loss": 0.3059, + "step": 21036 + }, + { + "epoch": 0.9854780531222186, + "grad_norm": 0.5875377886288832, + "learning_rate": 2.6801063062266535e-06, + "loss": 0.3013, + "step": 21037 + }, + { + "epoch": 0.9855248981121469, + "grad_norm": 0.618473144187077, + "learning_rate": 2.67991714835352e-06, + "loss": 0.329, + "step": 21038 + }, + { + "epoch": 0.9855717431020752, + "grad_norm": 0.6007783579370795, + "learning_rate": 2.6797279894450084e-06, + "loss": 0.3155, + "step": 21039 + }, + { + "epoch": 0.9856185880920035, + "grad_norm": 0.6053546646642415, + "learning_rate": 2.679538829502208e-06, + "loss": 0.3032, + "step": 21040 + }, + { + "epoch": 0.9856654330819319, + "grad_norm": 0.5686402870787507, + "learning_rate": 2.679349668526206e-06, + "loss": 0.3148, + "step": 21041 + }, + { + "epoch": 0.9857122780718602, + "grad_norm": 0.5980124167404663, + "learning_rate": 2.679160506518092e-06, + "loss": 0.3063, + "step": 21042 + }, + { + "epoch": 0.9857591230617886, + "grad_norm": 0.5796440176355488, + "learning_rate": 2.6789713434789544e-06, + "loss": 0.3373, + "step": 21043 + }, + { + "epoch": 0.9858059680517168, + "grad_norm": 0.632806911868032, + "learning_rate": 2.6787821794098813e-06, + "loss": 0.3233, + "step": 21044 + }, + { + "epoch": 0.9858528130416452, + "grad_norm": 0.583865640695673, + "learning_rate": 2.6785930143119626e-06, + "loss": 0.3159, + "step": 21045 + }, + { + "epoch": 0.9858996580315735, + "grad_norm": 0.5868879220141852, + "learning_rate": 2.6784038481862855e-06, + "loss": 0.3, + "step": 21046 + }, + { + "epoch": 0.9859465030215019, + "grad_norm": 0.5868500516524638, + "learning_rate": 2.6782146810339387e-06, + "loss": 0.3091, + "step": 21047 + }, + { + "epoch": 0.9859933480114301, + "grad_norm": 0.541413855919606, + "learning_rate": 2.6780255128560117e-06, + "loss": 0.3263, + "step": 21048 + }, + { + "epoch": 0.9860401930013585, + "grad_norm": 0.5777365948722698, + "learning_rate": 2.6778363436535925e-06, + "loss": 0.3338, + "step": 21049 + }, + { + "epoch": 0.9860870379912868, + "grad_norm": 0.6704890487986583, + "learning_rate": 2.6776471734277698e-06, + "loss": 0.3471, + "step": 21050 + }, + { + "epoch": 0.9861338829812152, + "grad_norm": 0.5788507555395372, + "learning_rate": 2.6774580021796317e-06, + "loss": 0.3073, + "step": 21051 + }, + { + "epoch": 0.9861807279711435, + "grad_norm": 0.6008663172346501, + "learning_rate": 2.677268829910269e-06, + "loss": 0.322, + "step": 21052 + }, + { + "epoch": 0.9862275729610718, + "grad_norm": 0.6775740332049494, + "learning_rate": 2.677079656620767e-06, + "loss": 0.357, + "step": 21053 + }, + { + "epoch": 0.9862744179510001, + "grad_norm": 0.639685344828087, + "learning_rate": 2.6768904823122165e-06, + "loss": 0.3204, + "step": 21054 + }, + { + "epoch": 0.9863212629409285, + "grad_norm": 0.6078279928839675, + "learning_rate": 2.6767013069857062e-06, + "loss": 0.3238, + "step": 21055 + }, + { + "epoch": 0.9863681079308568, + "grad_norm": 0.6524493389612265, + "learning_rate": 2.676512130642324e-06, + "loss": 0.3335, + "step": 21056 + }, + { + "epoch": 0.9864149529207851, + "grad_norm": 0.6040750310398606, + "learning_rate": 2.676322953283159e-06, + "loss": 0.3163, + "step": 21057 + }, + { + "epoch": 0.9864617979107134, + "grad_norm": 0.6004716679649786, + "learning_rate": 2.6761337749092996e-06, + "loss": 0.308, + "step": 21058 + }, + { + "epoch": 0.9865086429006418, + "grad_norm": 0.5776431393908403, + "learning_rate": 2.675944595521835e-06, + "loss": 0.3223, + "step": 21059 + }, + { + "epoch": 0.9865554878905701, + "grad_norm": 0.6151626002409921, + "learning_rate": 2.675755415121853e-06, + "loss": 0.3191, + "step": 21060 + }, + { + "epoch": 0.9866023328804985, + "grad_norm": 0.5538345365947062, + "learning_rate": 2.675566233710443e-06, + "loss": 0.3311, + "step": 21061 + }, + { + "epoch": 0.9866491778704267, + "grad_norm": 0.6287404680893606, + "learning_rate": 2.6753770512886935e-06, + "loss": 0.3225, + "step": 21062 + }, + { + "epoch": 0.9866960228603551, + "grad_norm": 0.5885355860675976, + "learning_rate": 2.6751878678576924e-06, + "loss": 0.3119, + "step": 21063 + }, + { + "epoch": 0.9867428678502834, + "grad_norm": 0.6149564967422749, + "learning_rate": 2.6749986834185306e-06, + "loss": 0.3342, + "step": 21064 + }, + { + "epoch": 0.9867897128402118, + "grad_norm": 0.62218461731735, + "learning_rate": 2.6748094979722937e-06, + "loss": 0.3241, + "step": 21065 + }, + { + "epoch": 0.98683655783014, + "grad_norm": 0.6113830019795509, + "learning_rate": 2.6746203115200734e-06, + "loss": 0.3177, + "step": 21066 + }, + { + "epoch": 0.9868834028200684, + "grad_norm": 0.6338497290591397, + "learning_rate": 2.6744311240629566e-06, + "loss": 0.3203, + "step": 21067 + }, + { + "epoch": 0.9869302478099967, + "grad_norm": 0.6083137061564127, + "learning_rate": 2.674241935602032e-06, + "loss": 0.3, + "step": 21068 + }, + { + "epoch": 0.9869770927999251, + "grad_norm": 0.6235343228689321, + "learning_rate": 2.67405274613839e-06, + "loss": 0.3166, + "step": 21069 + }, + { + "epoch": 0.9870239377898534, + "grad_norm": 0.6216161939715388, + "learning_rate": 2.673863555673118e-06, + "loss": 0.3295, + "step": 21070 + }, + { + "epoch": 0.9870707827797817, + "grad_norm": 0.5758751044832799, + "learning_rate": 2.6736743642073038e-06, + "loss": 0.3194, + "step": 21071 + }, + { + "epoch": 0.98711762776971, + "grad_norm": 0.5818165424861391, + "learning_rate": 2.673485171742038e-06, + "loss": 0.2974, + "step": 21072 + }, + { + "epoch": 0.9871644727596384, + "grad_norm": 0.6143692501690872, + "learning_rate": 2.673295978278409e-06, + "loss": 0.3268, + "step": 21073 + }, + { + "epoch": 0.9872113177495667, + "grad_norm": 0.6225035809904891, + "learning_rate": 2.6731067838175045e-06, + "loss": 0.3322, + "step": 21074 + }, + { + "epoch": 0.987258162739495, + "grad_norm": 0.640229129680489, + "learning_rate": 2.6729175883604143e-06, + "loss": 0.3035, + "step": 21075 + }, + { + "epoch": 0.9873050077294233, + "grad_norm": 0.628168356391996, + "learning_rate": 2.672728391908227e-06, + "loss": 0.3176, + "step": 21076 + }, + { + "epoch": 0.9873518527193517, + "grad_norm": 0.6442733776628504, + "learning_rate": 2.672539194462031e-06, + "loss": 0.3084, + "step": 21077 + }, + { + "epoch": 0.98739869770928, + "grad_norm": 0.6554789515838513, + "learning_rate": 2.6723499960229155e-06, + "loss": 0.3322, + "step": 21078 + }, + { + "epoch": 0.9874455426992084, + "grad_norm": 0.595604924777211, + "learning_rate": 2.672160796591969e-06, + "loss": 0.3152, + "step": 21079 + }, + { + "epoch": 0.9874923876891366, + "grad_norm": 0.6251085394245933, + "learning_rate": 2.67197159617028e-06, + "loss": 0.3147, + "step": 21080 + }, + { + "epoch": 0.987539232679065, + "grad_norm": 0.5815230036402941, + "learning_rate": 2.6717823947589384e-06, + "loss": 0.3155, + "step": 21081 + }, + { + "epoch": 0.9875860776689933, + "grad_norm": 0.5472711034599289, + "learning_rate": 2.671593192359032e-06, + "loss": 0.2917, + "step": 21082 + }, + { + "epoch": 0.9876329226589217, + "grad_norm": 0.5793293801659912, + "learning_rate": 2.6714039889716497e-06, + "loss": 0.2934, + "step": 21083 + }, + { + "epoch": 0.9876797676488499, + "grad_norm": 0.5902703058716382, + "learning_rate": 2.67121478459788e-06, + "loss": 0.319, + "step": 21084 + }, + { + "epoch": 0.9877266126387783, + "grad_norm": 0.592529549441887, + "learning_rate": 2.6710255792388134e-06, + "loss": 0.3026, + "step": 21085 + }, + { + "epoch": 0.9877734576287066, + "grad_norm": 0.5908513614296614, + "learning_rate": 2.6708363728955367e-06, + "loss": 0.3184, + "step": 21086 + }, + { + "epoch": 0.987820302618635, + "grad_norm": 0.6244647241605266, + "learning_rate": 2.6706471655691403e-06, + "loss": 0.3087, + "step": 21087 + }, + { + "epoch": 0.9878671476085633, + "grad_norm": 0.6191034599199634, + "learning_rate": 2.6704579572607125e-06, + "loss": 0.3189, + "step": 21088 + }, + { + "epoch": 0.9879139925984916, + "grad_norm": 0.5946969040474381, + "learning_rate": 2.6702687479713413e-06, + "loss": 0.3202, + "step": 21089 + }, + { + "epoch": 0.9879608375884199, + "grad_norm": 0.5593326908657885, + "learning_rate": 2.6700795377021165e-06, + "loss": 0.3032, + "step": 21090 + }, + { + "epoch": 0.9880076825783483, + "grad_norm": 0.5975001305497368, + "learning_rate": 2.669890326454127e-06, + "loss": 0.3228, + "step": 21091 + }, + { + "epoch": 0.9880545275682766, + "grad_norm": 0.5425716424344479, + "learning_rate": 2.6697011142284605e-06, + "loss": 0.3093, + "step": 21092 + }, + { + "epoch": 0.9881013725582048, + "grad_norm": 0.6316284633059525, + "learning_rate": 2.669511901026207e-06, + "loss": 0.3258, + "step": 21093 + }, + { + "epoch": 0.9881482175481332, + "grad_norm": 0.5939683212703444, + "learning_rate": 2.6693226868484563e-06, + "loss": 0.308, + "step": 21094 + }, + { + "epoch": 0.9881950625380616, + "grad_norm": 0.6053271617540553, + "learning_rate": 2.6691334716962945e-06, + "loss": 0.3236, + "step": 21095 + }, + { + "epoch": 0.9882419075279899, + "grad_norm": 0.539496294035511, + "learning_rate": 2.6689442555708126e-06, + "loss": 0.3073, + "step": 21096 + }, + { + "epoch": 0.9882887525179183, + "grad_norm": 0.6365119762681409, + "learning_rate": 2.6687550384731e-06, + "loss": 0.3405, + "step": 21097 + }, + { + "epoch": 0.9883355975078465, + "grad_norm": 0.5841694384401551, + "learning_rate": 2.6685658204042438e-06, + "loss": 0.3236, + "step": 21098 + }, + { + "epoch": 0.9883824424977748, + "grad_norm": 0.6090789055989307, + "learning_rate": 2.668376601365333e-06, + "loss": 0.3314, + "step": 21099 + }, + { + "epoch": 0.9884292874877032, + "grad_norm": 0.6490901292261609, + "learning_rate": 2.668187381357458e-06, + "loss": 0.3249, + "step": 21100 + }, + { + "epoch": 0.9884761324776316, + "grad_norm": 0.5794624313314228, + "learning_rate": 2.6679981603817067e-06, + "loss": 0.311, + "step": 21101 + }, + { + "epoch": 0.9885229774675598, + "grad_norm": 0.5816480108636728, + "learning_rate": 2.667808938439168e-06, + "loss": 0.3068, + "step": 21102 + }, + { + "epoch": 0.9885698224574881, + "grad_norm": 0.5450701378165044, + "learning_rate": 2.667619715530931e-06, + "loss": 0.317, + "step": 21103 + }, + { + "epoch": 0.9886166674474165, + "grad_norm": 0.5870763388076309, + "learning_rate": 2.667430491658085e-06, + "loss": 0.3262, + "step": 21104 + }, + { + "epoch": 0.9886635124373448, + "grad_norm": 0.5766810945783043, + "learning_rate": 2.667241266821719e-06, + "loss": 0.3022, + "step": 21105 + }, + { + "epoch": 0.9887103574272732, + "grad_norm": 0.571633034829416, + "learning_rate": 2.6670520410229205e-06, + "loss": 0.3071, + "step": 21106 + }, + { + "epoch": 0.9887572024172014, + "grad_norm": 0.5635789307215182, + "learning_rate": 2.66686281426278e-06, + "loss": 0.3216, + "step": 21107 + }, + { + "epoch": 0.9888040474071298, + "grad_norm": 0.6013812040175319, + "learning_rate": 2.6666735865423865e-06, + "loss": 0.3174, + "step": 21108 + }, + { + "epoch": 0.9888508923970581, + "grad_norm": 0.6509025707655028, + "learning_rate": 2.6664843578628273e-06, + "loss": 0.3319, + "step": 21109 + }, + { + "epoch": 0.9888977373869865, + "grad_norm": 0.5650946810386195, + "learning_rate": 2.6662951282251938e-06, + "loss": 0.3211, + "step": 21110 + }, + { + "epoch": 0.9889445823769147, + "grad_norm": 0.5304288722507852, + "learning_rate": 2.6661058976305727e-06, + "loss": 0.2881, + "step": 21111 + }, + { + "epoch": 0.9889914273668431, + "grad_norm": 0.633982261900003, + "learning_rate": 2.6659166660800538e-06, + "loss": 0.3124, + "step": 21112 + }, + { + "epoch": 0.9890382723567714, + "grad_norm": 0.5774897241346095, + "learning_rate": 2.6657274335747267e-06, + "loss": 0.3139, + "step": 21113 + }, + { + "epoch": 0.9890851173466998, + "grad_norm": 0.5577241957816322, + "learning_rate": 2.6655382001156795e-06, + "loss": 0.3089, + "step": 21114 + }, + { + "epoch": 0.9891319623366281, + "grad_norm": 0.5811682150382056, + "learning_rate": 2.6653489657040015e-06, + "loss": 0.306, + "step": 21115 + }, + { + "epoch": 0.9891788073265564, + "grad_norm": 0.5702716263275392, + "learning_rate": 2.6651597303407823e-06, + "loss": 0.3076, + "step": 21116 + }, + { + "epoch": 0.9892256523164847, + "grad_norm": 0.5800090135453646, + "learning_rate": 2.66497049402711e-06, + "loss": 0.3102, + "step": 21117 + }, + { + "epoch": 0.9892724973064131, + "grad_norm": 0.5515089424353471, + "learning_rate": 2.664781256764074e-06, + "loss": 0.2845, + "step": 21118 + }, + { + "epoch": 0.9893193422963414, + "grad_norm": 0.6212577763733631, + "learning_rate": 2.6645920185527635e-06, + "loss": 0.3321, + "step": 21119 + }, + { + "epoch": 0.9893661872862697, + "grad_norm": 0.6194878932146289, + "learning_rate": 2.6644027793942666e-06, + "loss": 0.3279, + "step": 21120 + }, + { + "epoch": 0.989413032276198, + "grad_norm": 0.5829028475732352, + "learning_rate": 2.6642135392896733e-06, + "loss": 0.3007, + "step": 21121 + }, + { + "epoch": 0.9894598772661264, + "grad_norm": 0.5945988353339076, + "learning_rate": 2.6640242982400734e-06, + "loss": 0.3359, + "step": 21122 + }, + { + "epoch": 0.9895067222560547, + "grad_norm": 0.6042555087159086, + "learning_rate": 2.663835056246554e-06, + "loss": 0.3254, + "step": 21123 + }, + { + "epoch": 0.9895535672459831, + "grad_norm": 0.5557236514033244, + "learning_rate": 2.6636458133102045e-06, + "loss": 0.3225, + "step": 21124 + }, + { + "epoch": 0.9896004122359113, + "grad_norm": 0.592345722375716, + "learning_rate": 2.663456569432115e-06, + "loss": 0.3055, + "step": 21125 + }, + { + "epoch": 0.9896472572258397, + "grad_norm": 0.5825083856596287, + "learning_rate": 2.6632673246133743e-06, + "loss": 0.2998, + "step": 21126 + }, + { + "epoch": 0.989694102215768, + "grad_norm": 0.6391267498995075, + "learning_rate": 2.6630780788550707e-06, + "loss": 0.3183, + "step": 21127 + }, + { + "epoch": 0.9897409472056964, + "grad_norm": 0.6014298162447436, + "learning_rate": 2.6628888321582945e-06, + "loss": 0.3248, + "step": 21128 + }, + { + "epoch": 0.9897877921956246, + "grad_norm": 0.6315682700838519, + "learning_rate": 2.662699584524134e-06, + "loss": 0.3209, + "step": 21129 + }, + { + "epoch": 0.989834637185553, + "grad_norm": 0.6012302106400665, + "learning_rate": 2.6625103359536776e-06, + "loss": 0.3402, + "step": 21130 + }, + { + "epoch": 0.9898814821754813, + "grad_norm": 0.5883545936543472, + "learning_rate": 2.662321086448015e-06, + "loss": 0.3164, + "step": 21131 + }, + { + "epoch": 0.9899283271654097, + "grad_norm": 0.5816505942531123, + "learning_rate": 2.662131836008236e-06, + "loss": 0.3184, + "step": 21132 + }, + { + "epoch": 0.989975172155338, + "grad_norm": 0.5612701963588743, + "learning_rate": 2.6619425846354278e-06, + "loss": 0.3056, + "step": 21133 + }, + { + "epoch": 0.9900220171452663, + "grad_norm": 0.6618597206449404, + "learning_rate": 2.6617533323306823e-06, + "loss": 0.3278, + "step": 21134 + }, + { + "epoch": 0.9900688621351946, + "grad_norm": 0.6070583244160579, + "learning_rate": 2.6615640790950857e-06, + "loss": 0.3143, + "step": 21135 + }, + { + "epoch": 0.990115707125123, + "grad_norm": 0.5880555354255188, + "learning_rate": 2.6613748249297294e-06, + "loss": 0.3136, + "step": 21136 + }, + { + "epoch": 0.9901625521150513, + "grad_norm": 0.6099480626577258, + "learning_rate": 2.6611855698357014e-06, + "loss": 0.3064, + "step": 21137 + }, + { + "epoch": 0.9902093971049796, + "grad_norm": 0.6267314016167221, + "learning_rate": 2.6609963138140902e-06, + "loss": 0.3162, + "step": 21138 + }, + { + "epoch": 0.9902562420949079, + "grad_norm": 0.6148715725288583, + "learning_rate": 2.6608070568659868e-06, + "loss": 0.3331, + "step": 21139 + }, + { + "epoch": 0.9903030870848363, + "grad_norm": 0.5895019617171895, + "learning_rate": 2.6606177989924786e-06, + "loss": 0.3056, + "step": 21140 + }, + { + "epoch": 0.9903499320747646, + "grad_norm": 0.5446308558445132, + "learning_rate": 2.660428540194655e-06, + "loss": 0.2989, + "step": 21141 + }, + { + "epoch": 0.990396777064693, + "grad_norm": 0.6311565735049138, + "learning_rate": 2.6602392804736055e-06, + "loss": 0.3257, + "step": 21142 + }, + { + "epoch": 0.9904436220546212, + "grad_norm": 0.5718555630605293, + "learning_rate": 2.6600500198304207e-06, + "loss": 0.3101, + "step": 21143 + }, + { + "epoch": 0.9904904670445496, + "grad_norm": 0.5782476875549514, + "learning_rate": 2.6598607582661868e-06, + "loss": 0.3179, + "step": 21144 + }, + { + "epoch": 0.9905373120344779, + "grad_norm": 0.5776698140368595, + "learning_rate": 2.659671495781994e-06, + "loss": 0.2938, + "step": 21145 + }, + { + "epoch": 0.9905841570244063, + "grad_norm": 0.5881325291504084, + "learning_rate": 2.6594822323789334e-06, + "loss": 0.2928, + "step": 21146 + }, + { + "epoch": 0.9906310020143345, + "grad_norm": 0.7396418687078272, + "learning_rate": 2.6592929680580916e-06, + "loss": 0.3298, + "step": 21147 + }, + { + "epoch": 0.9906778470042629, + "grad_norm": 0.6232666454771738, + "learning_rate": 2.659103702820559e-06, + "loss": 0.3059, + "step": 21148 + }, + { + "epoch": 0.9907246919941912, + "grad_norm": 0.661546018369013, + "learning_rate": 2.6589144366674246e-06, + "loss": 0.3116, + "step": 21149 + }, + { + "epoch": 0.9907715369841196, + "grad_norm": 0.6341308019070315, + "learning_rate": 2.658725169599778e-06, + "loss": 0.3231, + "step": 21150 + }, + { + "epoch": 0.9908183819740479, + "grad_norm": 0.6029560005096777, + "learning_rate": 2.6585359016187073e-06, + "loss": 0.331, + "step": 21151 + }, + { + "epoch": 0.9908652269639762, + "grad_norm": 0.630652034546029, + "learning_rate": 2.6583466327253027e-06, + "loss": 0.3244, + "step": 21152 + }, + { + "epoch": 0.9909120719539045, + "grad_norm": 0.5898669750001445, + "learning_rate": 2.658157362920653e-06, + "loss": 0.3302, + "step": 21153 + }, + { + "epoch": 0.9909589169438329, + "grad_norm": 0.5890535563434954, + "learning_rate": 2.657968092205847e-06, + "loss": 0.3179, + "step": 21154 + }, + { + "epoch": 0.9910057619337612, + "grad_norm": 0.5967868484941526, + "learning_rate": 2.6577788205819748e-06, + "loss": 0.3277, + "step": 21155 + }, + { + "epoch": 0.9910526069236895, + "grad_norm": 0.6124058217199095, + "learning_rate": 2.6575895480501244e-06, + "loss": 0.3409, + "step": 21156 + }, + { + "epoch": 0.9910994519136178, + "grad_norm": 0.5719907955854193, + "learning_rate": 2.657400274611387e-06, + "loss": 0.3044, + "step": 21157 + }, + { + "epoch": 0.9911462969035462, + "grad_norm": 0.6066484984336638, + "learning_rate": 2.65721100026685e-06, + "loss": 0.3161, + "step": 21158 + }, + { + "epoch": 0.9911931418934745, + "grad_norm": 0.5843829745604806, + "learning_rate": 2.6570217250176027e-06, + "loss": 0.2948, + "step": 21159 + }, + { + "epoch": 0.9912399868834029, + "grad_norm": 0.5729943337995216, + "learning_rate": 2.6568324488647356e-06, + "loss": 0.2951, + "step": 21160 + }, + { + "epoch": 0.9912868318733311, + "grad_norm": 0.5573372917171278, + "learning_rate": 2.656643171809337e-06, + "loss": 0.3007, + "step": 21161 + }, + { + "epoch": 0.9913336768632595, + "grad_norm": 0.6172589432804387, + "learning_rate": 2.6564538938524957e-06, + "loss": 0.326, + "step": 21162 + }, + { + "epoch": 0.9913805218531878, + "grad_norm": 0.5953791038979879, + "learning_rate": 2.656264614995302e-06, + "loss": 0.3217, + "step": 21163 + }, + { + "epoch": 0.9914273668431162, + "grad_norm": 0.6543054390706514, + "learning_rate": 2.6560753352388454e-06, + "loss": 0.3313, + "step": 21164 + }, + { + "epoch": 0.9914742118330444, + "grad_norm": 0.5718380089817569, + "learning_rate": 2.6558860545842137e-06, + "loss": 0.304, + "step": 21165 + }, + { + "epoch": 0.9915210568229728, + "grad_norm": 0.6353694656080716, + "learning_rate": 2.6556967730324967e-06, + "loss": 0.3234, + "step": 21166 + }, + { + "epoch": 0.9915679018129011, + "grad_norm": 0.644421382545504, + "learning_rate": 2.6555074905847844e-06, + "loss": 0.3371, + "step": 21167 + }, + { + "epoch": 0.9916147468028295, + "grad_norm": 0.5923830637820217, + "learning_rate": 2.6553182072421658e-06, + "loss": 0.3303, + "step": 21168 + }, + { + "epoch": 0.9916615917927578, + "grad_norm": 0.6069193953636077, + "learning_rate": 2.655128923005729e-06, + "loss": 0.3028, + "step": 21169 + }, + { + "epoch": 0.991708436782686, + "grad_norm": 0.5802584013218328, + "learning_rate": 2.654939637876565e-06, + "loss": 0.3101, + "step": 21170 + }, + { + "epoch": 0.9917552817726144, + "grad_norm": 0.5880230548324874, + "learning_rate": 2.654750351855763e-06, + "loss": 0.3084, + "step": 21171 + }, + { + "epoch": 0.9918021267625428, + "grad_norm": 0.5581540508248931, + "learning_rate": 2.6545610649444097e-06, + "loss": 0.3131, + "step": 21172 + }, + { + "epoch": 0.9918489717524711, + "grad_norm": 0.6096994317968747, + "learning_rate": 2.6543717771435974e-06, + "loss": 0.3219, + "step": 21173 + }, + { + "epoch": 0.9918958167423993, + "grad_norm": 0.5649851475334274, + "learning_rate": 2.6541824884544145e-06, + "loss": 0.2962, + "step": 21174 + }, + { + "epoch": 0.9919426617323277, + "grad_norm": 0.6034678270270013, + "learning_rate": 2.65399319887795e-06, + "loss": 0.3087, + "step": 21175 + }, + { + "epoch": 0.991989506722256, + "grad_norm": 0.6158840438641433, + "learning_rate": 2.653803908415293e-06, + "loss": 0.3294, + "step": 21176 + }, + { + "epoch": 0.9920363517121844, + "grad_norm": 0.5913204112176245, + "learning_rate": 2.653614617067534e-06, + "loss": 0.3151, + "step": 21177 + }, + { + "epoch": 0.9920831967021128, + "grad_norm": 0.5934350115049273, + "learning_rate": 2.653425324835761e-06, + "loss": 0.3022, + "step": 21178 + }, + { + "epoch": 0.992130041692041, + "grad_norm": 0.6276681286830835, + "learning_rate": 2.6532360317210633e-06, + "loss": 0.3379, + "step": 21179 + }, + { + "epoch": 0.9921768866819693, + "grad_norm": 0.5917796184698273, + "learning_rate": 2.6530467377245315e-06, + "loss": 0.2979, + "step": 21180 + }, + { + "epoch": 0.9922237316718977, + "grad_norm": 0.6088208038106953, + "learning_rate": 2.652857442847254e-06, + "loss": 0.3159, + "step": 21181 + }, + { + "epoch": 0.992270576661826, + "grad_norm": 0.5669248006671506, + "learning_rate": 2.6526681470903198e-06, + "loss": 0.2892, + "step": 21182 + }, + { + "epoch": 0.9923174216517543, + "grad_norm": 0.5744949236723377, + "learning_rate": 2.65247885045482e-06, + "loss": 0.296, + "step": 21183 + }, + { + "epoch": 0.9923642666416826, + "grad_norm": 0.6099105078641828, + "learning_rate": 2.652289552941841e-06, + "loss": 0.3063, + "step": 21184 + }, + { + "epoch": 0.992411111631611, + "grad_norm": 0.5975697730267455, + "learning_rate": 2.6521002545524754e-06, + "loss": 0.2971, + "step": 21185 + }, + { + "epoch": 0.9924579566215394, + "grad_norm": 0.6063485867117965, + "learning_rate": 2.651910955287811e-06, + "loss": 0.3333, + "step": 21186 + }, + { + "epoch": 0.9925048016114677, + "grad_norm": 0.5489355874639943, + "learning_rate": 2.651721655148936e-06, + "loss": 0.3062, + "step": 21187 + }, + { + "epoch": 0.9925516466013959, + "grad_norm": 0.5649137752063657, + "learning_rate": 2.651532354136942e-06, + "loss": 0.3083, + "step": 21188 + }, + { + "epoch": 0.9925984915913243, + "grad_norm": 0.6146229482283262, + "learning_rate": 2.6513430522529175e-06, + "loss": 0.3223, + "step": 21189 + }, + { + "epoch": 0.9926453365812526, + "grad_norm": 0.5819000265139908, + "learning_rate": 2.651153749497951e-06, + "loss": 0.307, + "step": 21190 + }, + { + "epoch": 0.992692181571181, + "grad_norm": 0.5476081283825236, + "learning_rate": 2.650964445873133e-06, + "loss": 0.3021, + "step": 21191 + }, + { + "epoch": 0.9927390265611092, + "grad_norm": 0.6277992813269758, + "learning_rate": 2.6507751413795534e-06, + "loss": 0.316, + "step": 21192 + }, + { + "epoch": 0.9927858715510376, + "grad_norm": 0.5656911398966792, + "learning_rate": 2.6505858360182997e-06, + "loss": 0.3135, + "step": 21193 + }, + { + "epoch": 0.9928327165409659, + "grad_norm": 0.6293856384871953, + "learning_rate": 2.6503965297904622e-06, + "loss": 0.3296, + "step": 21194 + }, + { + "epoch": 0.9928795615308943, + "grad_norm": 0.5606985965342974, + "learning_rate": 2.6502072226971316e-06, + "loss": 0.3003, + "step": 21195 + }, + { + "epoch": 0.9929264065208226, + "grad_norm": 0.6162025503101837, + "learning_rate": 2.650017914739396e-06, + "loss": 0.3547, + "step": 21196 + }, + { + "epoch": 0.9929732515107509, + "grad_norm": 0.6014796324499726, + "learning_rate": 2.6498286059183437e-06, + "loss": 0.3456, + "step": 21197 + }, + { + "epoch": 0.9930200965006792, + "grad_norm": 0.6042483101157371, + "learning_rate": 2.6496392962350663e-06, + "loss": 0.3159, + "step": 21198 + }, + { + "epoch": 0.9930669414906076, + "grad_norm": 0.577854071791434, + "learning_rate": 2.6494499856906524e-06, + "loss": 0.3049, + "step": 21199 + }, + { + "epoch": 0.9931137864805359, + "grad_norm": 0.6879296023423418, + "learning_rate": 2.6492606742861913e-06, + "loss": 0.344, + "step": 21200 + }, + { + "epoch": 0.9931606314704642, + "grad_norm": 0.6140936797196644, + "learning_rate": 2.6490713620227728e-06, + "loss": 0.3114, + "step": 21201 + }, + { + "epoch": 0.9932074764603925, + "grad_norm": 0.5988218831881669, + "learning_rate": 2.648882048901486e-06, + "loss": 0.3088, + "step": 21202 + }, + { + "epoch": 0.9932543214503209, + "grad_norm": 0.5984150931330474, + "learning_rate": 2.64869273492342e-06, + "loss": 0.3301, + "step": 21203 + }, + { + "epoch": 0.9933011664402492, + "grad_norm": 0.6386982967705125, + "learning_rate": 2.6485034200896655e-06, + "loss": 0.3203, + "step": 21204 + }, + { + "epoch": 0.9933480114301776, + "grad_norm": 0.6302679547979825, + "learning_rate": 2.64831410440131e-06, + "loss": 0.3367, + "step": 21205 + }, + { + "epoch": 0.9933948564201058, + "grad_norm": 0.6112526810845805, + "learning_rate": 2.6481247878594453e-06, + "loss": 0.3285, + "step": 21206 + }, + { + "epoch": 0.9934417014100342, + "grad_norm": 0.6118090653826478, + "learning_rate": 2.647935470465159e-06, + "loss": 0.3292, + "step": 21207 + }, + { + "epoch": 0.9934885463999625, + "grad_norm": 0.6593169130235726, + "learning_rate": 2.647746152219541e-06, + "loss": 0.3335, + "step": 21208 + }, + { + "epoch": 0.9935353913898909, + "grad_norm": 0.5674735629082565, + "learning_rate": 2.647556833123682e-06, + "loss": 0.2929, + "step": 21209 + }, + { + "epoch": 0.9935822363798191, + "grad_norm": 0.600316227115769, + "learning_rate": 2.64736751317867e-06, + "loss": 0.2926, + "step": 21210 + }, + { + "epoch": 0.9936290813697475, + "grad_norm": 0.5785469223746481, + "learning_rate": 2.6471781923855943e-06, + "loss": 0.3086, + "step": 21211 + }, + { + "epoch": 0.9936759263596758, + "grad_norm": 0.5956132089397075, + "learning_rate": 2.6469888707455447e-06, + "loss": 0.3266, + "step": 21212 + }, + { + "epoch": 0.9937227713496042, + "grad_norm": 0.569578676314079, + "learning_rate": 2.6467995482596133e-06, + "loss": 0.3129, + "step": 21213 + }, + { + "epoch": 0.9937696163395325, + "grad_norm": 0.6181853108552666, + "learning_rate": 2.6466102249288856e-06, + "loss": 0.3415, + "step": 21214 + }, + { + "epoch": 0.9938164613294608, + "grad_norm": 0.5539387397487274, + "learning_rate": 2.6464209007544533e-06, + "loss": 0.3087, + "step": 21215 + }, + { + "epoch": 0.9938633063193891, + "grad_norm": 0.6473179021636172, + "learning_rate": 2.6462315757374058e-06, + "loss": 0.3162, + "step": 21216 + }, + { + "epoch": 0.9939101513093175, + "grad_norm": 0.5746934675399749, + "learning_rate": 2.6460422498788323e-06, + "loss": 0.3329, + "step": 21217 + }, + { + "epoch": 0.9939569962992458, + "grad_norm": 0.5731023753888831, + "learning_rate": 2.6458529231798225e-06, + "loss": 0.3039, + "step": 21218 + }, + { + "epoch": 0.9940038412891741, + "grad_norm": 0.574408543047169, + "learning_rate": 2.6456635956414658e-06, + "loss": 0.2984, + "step": 21219 + }, + { + "epoch": 0.9940506862791024, + "grad_norm": 0.6218117128779928, + "learning_rate": 2.6454742672648514e-06, + "loss": 0.2977, + "step": 21220 + }, + { + "epoch": 0.9940975312690308, + "grad_norm": 0.649830773840231, + "learning_rate": 2.6452849380510686e-06, + "loss": 0.3431, + "step": 21221 + }, + { + "epoch": 0.9941443762589591, + "grad_norm": 0.6526867148896945, + "learning_rate": 2.6450956080012088e-06, + "loss": 0.3291, + "step": 21222 + }, + { + "epoch": 0.9941912212488875, + "grad_norm": 0.5811210830892365, + "learning_rate": 2.6449062771163595e-06, + "loss": 0.3099, + "step": 21223 + }, + { + "epoch": 0.9942380662388157, + "grad_norm": 0.5709335679134533, + "learning_rate": 2.6447169453976106e-06, + "loss": 0.3014, + "step": 21224 + }, + { + "epoch": 0.9942849112287441, + "grad_norm": 0.6448509232251257, + "learning_rate": 2.644527612846053e-06, + "loss": 0.3434, + "step": 21225 + }, + { + "epoch": 0.9943317562186724, + "grad_norm": 0.5576034817238849, + "learning_rate": 2.6443382794627746e-06, + "loss": 0.3064, + "step": 21226 + }, + { + "epoch": 0.9943786012086008, + "grad_norm": 0.5983743984202411, + "learning_rate": 2.644148945248866e-06, + "loss": 0.3139, + "step": 21227 + }, + { + "epoch": 0.994425446198529, + "grad_norm": 0.5734596115205485, + "learning_rate": 2.6439596102054167e-06, + "loss": 0.307, + "step": 21228 + }, + { + "epoch": 0.9944722911884574, + "grad_norm": 0.6115455068594645, + "learning_rate": 2.6437702743335154e-06, + "loss": 0.3256, + "step": 21229 + }, + { + "epoch": 0.9945191361783857, + "grad_norm": 0.6428069865215267, + "learning_rate": 2.6435809376342525e-06, + "loss": 0.3228, + "step": 21230 + }, + { + "epoch": 0.9945659811683141, + "grad_norm": 0.5677273899545865, + "learning_rate": 2.643391600108718e-06, + "loss": 0.3079, + "step": 21231 + }, + { + "epoch": 0.9946128261582424, + "grad_norm": 0.5661365947922004, + "learning_rate": 2.643202261758e-06, + "loss": 0.3151, + "step": 21232 + }, + { + "epoch": 0.9946596711481707, + "grad_norm": 0.5937828008771194, + "learning_rate": 2.643012922583189e-06, + "loss": 0.3099, + "step": 21233 + }, + { + "epoch": 0.994706516138099, + "grad_norm": 0.6650801269837866, + "learning_rate": 2.6428235825853753e-06, + "loss": 0.3442, + "step": 21234 + }, + { + "epoch": 0.9947533611280274, + "grad_norm": 0.5627588736420275, + "learning_rate": 2.6426342417656474e-06, + "loss": 0.3164, + "step": 21235 + }, + { + "epoch": 0.9948002061179557, + "grad_norm": 0.6921524643714658, + "learning_rate": 2.642444900125095e-06, + "loss": 0.3245, + "step": 21236 + }, + { + "epoch": 0.994847051107884, + "grad_norm": 0.6330732001238774, + "learning_rate": 2.6422555576648083e-06, + "loss": 0.3486, + "step": 21237 + }, + { + "epoch": 0.9948938960978123, + "grad_norm": 0.6141127624624445, + "learning_rate": 2.642066214385877e-06, + "loss": 0.3238, + "step": 21238 + }, + { + "epoch": 0.9949407410877407, + "grad_norm": 0.6013688954129454, + "learning_rate": 2.64187687028939e-06, + "loss": 0.3082, + "step": 21239 + }, + { + "epoch": 0.994987586077669, + "grad_norm": 0.5658434718455599, + "learning_rate": 2.6416875253764375e-06, + "loss": 0.303, + "step": 21240 + }, + { + "epoch": 0.9950344310675974, + "grad_norm": 0.5728728536919974, + "learning_rate": 2.641498179648109e-06, + "loss": 0.3171, + "step": 21241 + }, + { + "epoch": 0.9950812760575256, + "grad_norm": 0.6252142133492414, + "learning_rate": 2.6413088331054926e-06, + "loss": 0.323, + "step": 21242 + }, + { + "epoch": 0.995128121047454, + "grad_norm": 0.5593707017344229, + "learning_rate": 2.6411194857496803e-06, + "loss": 0.3048, + "step": 21243 + }, + { + "epoch": 0.9951749660373823, + "grad_norm": 0.6358170602213455, + "learning_rate": 2.6409301375817612e-06, + "loss": 0.329, + "step": 21244 + }, + { + "epoch": 0.9952218110273107, + "grad_norm": 0.5673873226231925, + "learning_rate": 2.6407407886028246e-06, + "loss": 0.3029, + "step": 21245 + }, + { + "epoch": 0.9952686560172389, + "grad_norm": 0.6044986024019859, + "learning_rate": 2.6405514388139597e-06, + "loss": 0.3271, + "step": 21246 + }, + { + "epoch": 0.9953155010071673, + "grad_norm": 0.565223596968674, + "learning_rate": 2.6403620882162567e-06, + "loss": 0.2975, + "step": 21247 + }, + { + "epoch": 0.9953623459970956, + "grad_norm": 0.5766916850261238, + "learning_rate": 2.6401727368108053e-06, + "loss": 0.3059, + "step": 21248 + }, + { + "epoch": 0.995409190987024, + "grad_norm": 0.5822248512859446, + "learning_rate": 2.639983384598695e-06, + "loss": 0.3235, + "step": 21249 + }, + { + "epoch": 0.9954560359769523, + "grad_norm": 0.6249299555095076, + "learning_rate": 2.6397940315810156e-06, + "loss": 0.304, + "step": 21250 + }, + { + "epoch": 0.9955028809668806, + "grad_norm": 0.586852548154072, + "learning_rate": 2.639604677758857e-06, + "loss": 0.3122, + "step": 21251 + }, + { + "epoch": 0.9955497259568089, + "grad_norm": 0.6150377776329865, + "learning_rate": 2.6394153231333076e-06, + "loss": 0.3168, + "step": 21252 + }, + { + "epoch": 0.9955965709467373, + "grad_norm": 0.6028005497856509, + "learning_rate": 2.639225967705459e-06, + "loss": 0.3091, + "step": 21253 + }, + { + "epoch": 0.9956434159366656, + "grad_norm": 0.6036147672645008, + "learning_rate": 2.6390366114763993e-06, + "loss": 0.3097, + "step": 21254 + }, + { + "epoch": 0.9956902609265939, + "grad_norm": 0.5601491190457736, + "learning_rate": 2.638847254447219e-06, + "loss": 0.3107, + "step": 21255 + }, + { + "epoch": 0.9957371059165222, + "grad_norm": 0.5914512263641781, + "learning_rate": 2.638657896619008e-06, + "loss": 0.3081, + "step": 21256 + }, + { + "epoch": 0.9957839509064506, + "grad_norm": 0.6325997353568664, + "learning_rate": 2.6384685379928555e-06, + "loss": 0.3128, + "step": 21257 + }, + { + "epoch": 0.9958307958963789, + "grad_norm": 0.5537862705685342, + "learning_rate": 2.6382791785698514e-06, + "loss": 0.3077, + "step": 21258 + }, + { + "epoch": 0.9958776408863073, + "grad_norm": 0.5759609021896352, + "learning_rate": 2.638089818351085e-06, + "loss": 0.3146, + "step": 21259 + }, + { + "epoch": 0.9959244858762355, + "grad_norm": 0.6051417321085353, + "learning_rate": 2.6379004573376464e-06, + "loss": 0.3291, + "step": 21260 + }, + { + "epoch": 0.9959713308661639, + "grad_norm": 0.5977491923159644, + "learning_rate": 2.6377110955306258e-06, + "loss": 0.3326, + "step": 21261 + }, + { + "epoch": 0.9960181758560922, + "grad_norm": 0.5756211773141994, + "learning_rate": 2.6375217329311126e-06, + "loss": 0.321, + "step": 21262 + }, + { + "epoch": 0.9960650208460206, + "grad_norm": 0.5770591432347305, + "learning_rate": 2.637332369540196e-06, + "loss": 0.3243, + "step": 21263 + }, + { + "epoch": 0.9961118658359488, + "grad_norm": 0.535645184406193, + "learning_rate": 2.637143005358965e-06, + "loss": 0.2973, + "step": 21264 + }, + { + "epoch": 0.9961587108258771, + "grad_norm": 0.6448469559886073, + "learning_rate": 2.6369536403885127e-06, + "loss": 0.3252, + "step": 21265 + }, + { + "epoch": 0.9962055558158055, + "grad_norm": 0.5678225871484768, + "learning_rate": 2.6367642746299254e-06, + "loss": 0.2953, + "step": 21266 + }, + { + "epoch": 0.9962524008057339, + "grad_norm": 0.5912327735776295, + "learning_rate": 2.636574908084294e-06, + "loss": 0.2872, + "step": 21267 + }, + { + "epoch": 0.9962992457956622, + "grad_norm": 0.6478360207692264, + "learning_rate": 2.636385540752709e-06, + "loss": 0.3076, + "step": 21268 + }, + { + "epoch": 0.9963460907855904, + "grad_norm": 0.6649302785836358, + "learning_rate": 2.636196172636259e-06, + "loss": 0.3411, + "step": 21269 + }, + { + "epoch": 0.9963929357755188, + "grad_norm": 0.5985700445806128, + "learning_rate": 2.6360068037360344e-06, + "loss": 0.3247, + "step": 21270 + }, + { + "epoch": 0.9964397807654471, + "grad_norm": 0.5672563771824637, + "learning_rate": 2.6358174340531244e-06, + "loss": 0.3076, + "step": 21271 + }, + { + "epoch": 0.9964866257553755, + "grad_norm": 0.6220809153301703, + "learning_rate": 2.63562806358862e-06, + "loss": 0.3229, + "step": 21272 + }, + { + "epoch": 0.9965334707453037, + "grad_norm": 0.5968907703368161, + "learning_rate": 2.6354386923436095e-06, + "loss": 0.3202, + "step": 21273 + }, + { + "epoch": 0.9965803157352321, + "grad_norm": 0.6781182288382551, + "learning_rate": 2.6352493203191836e-06, + "loss": 0.3085, + "step": 21274 + }, + { + "epoch": 0.9966271607251604, + "grad_norm": 0.6180236950540221, + "learning_rate": 2.635059947516432e-06, + "loss": 0.2943, + "step": 21275 + }, + { + "epoch": 0.9966740057150888, + "grad_norm": 0.6009782590400828, + "learning_rate": 2.634870573936444e-06, + "loss": 0.3264, + "step": 21276 + }, + { + "epoch": 0.9967208507050171, + "grad_norm": 0.5841831714154131, + "learning_rate": 2.63468119958031e-06, + "loss": 0.3207, + "step": 21277 + }, + { + "epoch": 0.9967676956949454, + "grad_norm": 0.6451728988696295, + "learning_rate": 2.6344918244491192e-06, + "loss": 0.3258, + "step": 21278 + }, + { + "epoch": 0.9968145406848737, + "grad_norm": 0.5905742095636636, + "learning_rate": 2.6343024485439623e-06, + "loss": 0.3032, + "step": 21279 + }, + { + "epoch": 0.9968613856748021, + "grad_norm": 0.5541534651655476, + "learning_rate": 2.6341130718659285e-06, + "loss": 0.2878, + "step": 21280 + }, + { + "epoch": 0.9969082306647304, + "grad_norm": 0.604192499485572, + "learning_rate": 2.633923694416107e-06, + "loss": 0.321, + "step": 21281 + }, + { + "epoch": 0.9969550756546587, + "grad_norm": 0.6012447930611844, + "learning_rate": 2.6337343161955886e-06, + "loss": 0.3332, + "step": 21282 + }, + { + "epoch": 0.997001920644587, + "grad_norm": 0.5788099859350169, + "learning_rate": 2.633544937205464e-06, + "loss": 0.3128, + "step": 21283 + }, + { + "epoch": 0.9970487656345154, + "grad_norm": 0.6943105364627788, + "learning_rate": 2.6333555574468206e-06, + "loss": 0.3237, + "step": 21284 + }, + { + "epoch": 0.9970956106244437, + "grad_norm": 0.5685651682857589, + "learning_rate": 2.633166176920749e-06, + "loss": 0.2989, + "step": 21285 + }, + { + "epoch": 0.9971424556143721, + "grad_norm": 0.5761202747960795, + "learning_rate": 2.6329767956283407e-06, + "loss": 0.3107, + "step": 21286 + }, + { + "epoch": 0.9971893006043003, + "grad_norm": 0.5870123487037111, + "learning_rate": 2.632787413570684e-06, + "loss": 0.3101, + "step": 21287 + }, + { + "epoch": 0.9972361455942287, + "grad_norm": 0.6254988621872815, + "learning_rate": 2.6325980307488684e-06, + "loss": 0.294, + "step": 21288 + }, + { + "epoch": 0.997282990584157, + "grad_norm": 0.6427108234550561, + "learning_rate": 2.632408647163986e-06, + "loss": 0.333, + "step": 21289 + }, + { + "epoch": 0.9973298355740854, + "grad_norm": 0.5642116420432493, + "learning_rate": 2.632219262817124e-06, + "loss": 0.3202, + "step": 21290 + }, + { + "epoch": 0.9973766805640136, + "grad_norm": 0.5878724467184347, + "learning_rate": 2.6320298777093734e-06, + "loss": 0.3249, + "step": 21291 + }, + { + "epoch": 0.997423525553942, + "grad_norm": 0.5654367518379544, + "learning_rate": 2.6318404918418245e-06, + "loss": 0.3024, + "step": 21292 + }, + { + "epoch": 0.9974703705438703, + "grad_norm": 0.5853166834352296, + "learning_rate": 2.631651105215566e-06, + "loss": 0.2991, + "step": 21293 + }, + { + "epoch": 0.9975172155337987, + "grad_norm": 0.6083915516302961, + "learning_rate": 2.6314617178316894e-06, + "loss": 0.3334, + "step": 21294 + }, + { + "epoch": 0.997564060523727, + "grad_norm": 0.5867529697371738, + "learning_rate": 2.631272329691283e-06, + "loss": 0.31, + "step": 21295 + }, + { + "epoch": 0.9976109055136553, + "grad_norm": 0.5559546264171559, + "learning_rate": 2.6310829407954377e-06, + "loss": 0.2932, + "step": 21296 + }, + { + "epoch": 0.9976577505035836, + "grad_norm": 0.6521261717906727, + "learning_rate": 2.6308935511452425e-06, + "loss": 0.3277, + "step": 21297 + }, + { + "epoch": 0.997704595493512, + "grad_norm": 0.560897384218934, + "learning_rate": 2.630704160741788e-06, + "loss": 0.3041, + "step": 21298 + }, + { + "epoch": 0.9977514404834403, + "grad_norm": 0.6027252564202099, + "learning_rate": 2.6305147695861643e-06, + "loss": 0.3193, + "step": 21299 + }, + { + "epoch": 0.9977982854733686, + "grad_norm": 0.6014140796681434, + "learning_rate": 2.6303253776794607e-06, + "loss": 0.3109, + "step": 21300 + }, + { + "epoch": 0.9978451304632969, + "grad_norm": 0.6364476742854593, + "learning_rate": 2.6301359850227674e-06, + "loss": 0.3229, + "step": 21301 + }, + { + "epoch": 0.9978919754532253, + "grad_norm": 0.6158690266103739, + "learning_rate": 2.629946591617174e-06, + "loss": 0.3254, + "step": 21302 + }, + { + "epoch": 0.9979388204431536, + "grad_norm": 0.5257795988427705, + "learning_rate": 2.6297571974637703e-06, + "loss": 0.302, + "step": 21303 + }, + { + "epoch": 0.997985665433082, + "grad_norm": 0.6130384715949845, + "learning_rate": 2.629567802563648e-06, + "loss": 0.328, + "step": 21304 + }, + { + "epoch": 0.9980325104230102, + "grad_norm": 0.6070444678091378, + "learning_rate": 2.629378406917894e-06, + "loss": 0.3015, + "step": 21305 + }, + { + "epoch": 0.9980793554129386, + "grad_norm": 0.617220785262487, + "learning_rate": 2.6291890105275998e-06, + "loss": 0.327, + "step": 21306 + }, + { + "epoch": 0.9981262004028669, + "grad_norm": 0.5713026204453382, + "learning_rate": 2.6289996133938563e-06, + "loss": 0.325, + "step": 21307 + }, + { + "epoch": 0.9981730453927953, + "grad_norm": 0.583625437359098, + "learning_rate": 2.628810215517752e-06, + "loss": 0.2881, + "step": 21308 + }, + { + "epoch": 0.9982198903827235, + "grad_norm": 0.5917694320757414, + "learning_rate": 2.628620816900377e-06, + "loss": 0.3496, + "step": 21309 + }, + { + "epoch": 0.9982667353726519, + "grad_norm": 0.6028694708273685, + "learning_rate": 2.6284314175428215e-06, + "loss": 0.3161, + "step": 21310 + }, + { + "epoch": 0.9983135803625802, + "grad_norm": 0.5902883337535765, + "learning_rate": 2.6282420174461767e-06, + "loss": 0.3121, + "step": 21311 + }, + { + "epoch": 0.9983604253525086, + "grad_norm": 0.613016952752628, + "learning_rate": 2.6280526166115294e-06, + "loss": 0.321, + "step": 21312 + }, + { + "epoch": 0.9984072703424369, + "grad_norm": 0.5886638029300909, + "learning_rate": 2.6278632150399723e-06, + "loss": 0.3148, + "step": 21313 + }, + { + "epoch": 0.9984541153323652, + "grad_norm": 0.5960034657835527, + "learning_rate": 2.6276738127325947e-06, + "loss": 0.2958, + "step": 21314 + }, + { + "epoch": 0.9985009603222935, + "grad_norm": 0.5450681301033272, + "learning_rate": 2.6274844096904868e-06, + "loss": 0.3019, + "step": 21315 + }, + { + "epoch": 0.9985478053122219, + "grad_norm": 0.5852353134980778, + "learning_rate": 2.6272950059147374e-06, + "loss": 0.3014, + "step": 21316 + }, + { + "epoch": 0.9985946503021502, + "grad_norm": 0.6045866974997831, + "learning_rate": 2.627105601406438e-06, + "loss": 0.3059, + "step": 21317 + }, + { + "epoch": 0.9986414952920785, + "grad_norm": 0.58736867350428, + "learning_rate": 2.626916196166677e-06, + "loss": 0.2914, + "step": 21318 + }, + { + "epoch": 0.9986883402820068, + "grad_norm": 0.5883613312403677, + "learning_rate": 2.6267267901965453e-06, + "loss": 0.2998, + "step": 21319 + }, + { + "epoch": 0.9987351852719352, + "grad_norm": 0.6253075184908741, + "learning_rate": 2.6265373834971337e-06, + "loss": 0.3334, + "step": 21320 + }, + { + "epoch": 0.9987820302618635, + "grad_norm": 0.6069799400917593, + "learning_rate": 2.6263479760695305e-06, + "loss": 0.3046, + "step": 21321 + }, + { + "epoch": 0.9988288752517919, + "grad_norm": 0.6028041179725744, + "learning_rate": 2.626158567914826e-06, + "loss": 0.3453, + "step": 21322 + }, + { + "epoch": 0.9988757202417201, + "grad_norm": 0.6349009378554056, + "learning_rate": 2.6259691590341117e-06, + "loss": 0.3164, + "step": 21323 + }, + { + "epoch": 0.9989225652316485, + "grad_norm": 0.5746337994764481, + "learning_rate": 2.625779749428476e-06, + "loss": 0.2974, + "step": 21324 + }, + { + "epoch": 0.9989694102215768, + "grad_norm": 0.6357363461126505, + "learning_rate": 2.6255903390990094e-06, + "loss": 0.3206, + "step": 21325 + }, + { + "epoch": 0.9990162552115052, + "grad_norm": 0.5988970464924327, + "learning_rate": 2.6254009280468022e-06, + "loss": 0.2976, + "step": 21326 + }, + { + "epoch": 0.9990631002014334, + "grad_norm": 0.6188267733433048, + "learning_rate": 2.6252115162729442e-06, + "loss": 0.3392, + "step": 21327 + }, + { + "epoch": 0.9991099451913618, + "grad_norm": 0.6034348352909391, + "learning_rate": 2.625022103778525e-06, + "loss": 0.3063, + "step": 21328 + }, + { + "epoch": 0.9991567901812901, + "grad_norm": 0.6516186337846519, + "learning_rate": 2.6248326905646356e-06, + "loss": 0.339, + "step": 21329 + }, + { + "epoch": 0.9992036351712185, + "grad_norm": 0.5917300863371502, + "learning_rate": 2.624643276632365e-06, + "loss": 0.32, + "step": 21330 + }, + { + "epoch": 0.9992504801611468, + "grad_norm": 0.5648172151757263, + "learning_rate": 2.624453861982803e-06, + "loss": 0.2989, + "step": 21331 + }, + { + "epoch": 0.999297325151075, + "grad_norm": 0.6306752314709015, + "learning_rate": 2.624264446617042e-06, + "loss": 0.3414, + "step": 21332 + }, + { + "epoch": 0.9993441701410034, + "grad_norm": 0.5730072662724722, + "learning_rate": 2.624075030536169e-06, + "loss": 0.3204, + "step": 21333 + }, + { + "epoch": 0.9993910151309318, + "grad_norm": 0.5702339712020589, + "learning_rate": 2.6238856137412756e-06, + "loss": 0.3179, + "step": 21334 + }, + { + "epoch": 0.9994378601208601, + "grad_norm": 0.6254280507968396, + "learning_rate": 2.623696196233452e-06, + "loss": 0.3276, + "step": 21335 + }, + { + "epoch": 0.9994847051107884, + "grad_norm": 0.6145178397207334, + "learning_rate": 2.6235067780137878e-06, + "loss": 0.3009, + "step": 21336 + }, + { + "epoch": 0.9995315501007167, + "grad_norm": 0.5621264174664204, + "learning_rate": 2.6233173590833725e-06, + "loss": 0.3088, + "step": 21337 + }, + { + "epoch": 0.999578395090645, + "grad_norm": 0.608873400578698, + "learning_rate": 2.623127939443298e-06, + "loss": 0.3204, + "step": 21338 + }, + { + "epoch": 0.9996252400805734, + "grad_norm": 0.6504355700601218, + "learning_rate": 2.6229385190946525e-06, + "loss": 0.3416, + "step": 21339 + }, + { + "epoch": 0.9996720850705018, + "grad_norm": 0.5246786170345673, + "learning_rate": 2.6227490980385268e-06, + "loss": 0.2884, + "step": 21340 + }, + { + "epoch": 0.99971893006043, + "grad_norm": 0.654194535953376, + "learning_rate": 2.6225596762760107e-06, + "loss": 0.347, + "step": 21341 + }, + { + "epoch": 0.9997657750503584, + "grad_norm": 0.5804934267801942, + "learning_rate": 2.6223702538081953e-06, + "loss": 0.31, + "step": 21342 + }, + { + "epoch": 0.9998126200402867, + "grad_norm": 0.6077270488282179, + "learning_rate": 2.6221808306361685e-06, + "loss": 0.3303, + "step": 21343 + }, + { + "epoch": 0.999859465030215, + "grad_norm": 0.5627794664847283, + "learning_rate": 2.6219914067610227e-06, + "loss": 0.3137, + "step": 21344 + }, + { + "epoch": 0.9999063100201433, + "grad_norm": 0.5914836509135921, + "learning_rate": 2.621801982183846e-06, + "loss": 0.3243, + "step": 21345 + }, + { + "epoch": 0.9999531550100716, + "grad_norm": 0.6497334614118613, + "learning_rate": 2.6216125569057305e-06, + "loss": 0.3455, + "step": 21346 + }, + { + "epoch": 1.0, + "grad_norm": 0.5925572660636661, + "learning_rate": 2.6214231309277656e-06, + "loss": 0.3404, + "step": 21347 + }, + { + "epoch": 1.0000468449899282, + "grad_norm": 0.7621683541790276, + "learning_rate": 2.62123370425104e-06, + "loss": 0.3065, + "step": 21348 + }, + { + "epoch": 1.0000936899798567, + "grad_norm": 0.6452814344281902, + "learning_rate": 2.6210442768766453e-06, + "loss": 0.2751, + "step": 21349 + }, + { + "epoch": 1.000140534969785, + "grad_norm": 0.6623822476048196, + "learning_rate": 2.6208548488056718e-06, + "loss": 0.2655, + "step": 21350 + }, + { + "epoch": 1.0001873799597134, + "grad_norm": 0.6722763794832238, + "learning_rate": 2.620665420039208e-06, + "loss": 0.2785, + "step": 21351 + }, + { + "epoch": 1.0002342249496416, + "grad_norm": 0.6301510218092059, + "learning_rate": 2.620475990578345e-06, + "loss": 0.2907, + "step": 21352 + }, + { + "epoch": 1.00028106993957, + "grad_norm": 0.6515464775838649, + "learning_rate": 2.6202865604241747e-06, + "loss": 0.2855, + "step": 21353 + }, + { + "epoch": 1.0003279149294984, + "grad_norm": 0.5934511416635357, + "learning_rate": 2.620097129577784e-06, + "loss": 0.2771, + "step": 21354 + }, + { + "epoch": 1.0003747599194266, + "grad_norm": 0.5929206939460382, + "learning_rate": 2.6199076980402644e-06, + "loss": 0.2839, + "step": 21355 + }, + { + "epoch": 1.0004216049093548, + "grad_norm": 0.562104201587405, + "learning_rate": 2.6197182658127063e-06, + "loss": 0.2689, + "step": 21356 + }, + { + "epoch": 1.0004684498992833, + "grad_norm": 0.5868383897843054, + "learning_rate": 2.6195288328962003e-06, + "loss": 0.2791, + "step": 21357 + }, + { + "epoch": 1.0005152948892115, + "grad_norm": 0.590886450912142, + "learning_rate": 2.6193393992918354e-06, + "loss": 0.2839, + "step": 21358 + }, + { + "epoch": 1.00056213987914, + "grad_norm": 0.6360721602139459, + "learning_rate": 2.6191499650007025e-06, + "loss": 0.2862, + "step": 21359 + }, + { + "epoch": 1.0006089848690682, + "grad_norm": 0.6083402397294084, + "learning_rate": 2.6189605300238914e-06, + "loss": 0.2852, + "step": 21360 + }, + { + "epoch": 1.0006558298589965, + "grad_norm": 0.6409385844575117, + "learning_rate": 2.618771094362492e-06, + "loss": 0.2859, + "step": 21361 + }, + { + "epoch": 1.000702674848925, + "grad_norm": 0.6495743725836046, + "learning_rate": 2.6185816580175954e-06, + "loss": 0.282, + "step": 21362 + }, + { + "epoch": 1.0007495198388532, + "grad_norm": 0.6721244669185126, + "learning_rate": 2.618392220990291e-06, + "loss": 0.2802, + "step": 21363 + }, + { + "epoch": 1.0007963648287816, + "grad_norm": 0.600968557461043, + "learning_rate": 2.6182027832816688e-06, + "loss": 0.2809, + "step": 21364 + }, + { + "epoch": 1.00084320981871, + "grad_norm": 0.6081989461590142, + "learning_rate": 2.6180133448928196e-06, + "loss": 0.2903, + "step": 21365 + }, + { + "epoch": 1.0008900548086381, + "grad_norm": 0.5962887813623428, + "learning_rate": 2.617823905824833e-06, + "loss": 0.27, + "step": 21366 + }, + { + "epoch": 1.0009368997985666, + "grad_norm": 0.596940738614943, + "learning_rate": 2.6176344660787995e-06, + "loss": 0.2928, + "step": 21367 + }, + { + "epoch": 1.0009837447884948, + "grad_norm": 0.5516713385228559, + "learning_rate": 2.6174450256558096e-06, + "loss": 0.2764, + "step": 21368 + }, + { + "epoch": 1.0010305897784233, + "grad_norm": 0.5924111919806592, + "learning_rate": 2.6172555845569527e-06, + "loss": 0.2774, + "step": 21369 + }, + { + "epoch": 1.0010774347683515, + "grad_norm": 0.6600233402729188, + "learning_rate": 2.6170661427833192e-06, + "loss": 0.2917, + "step": 21370 + }, + { + "epoch": 1.0011242797582798, + "grad_norm": 0.6083158117031688, + "learning_rate": 2.6168767003360002e-06, + "loss": 0.2906, + "step": 21371 + }, + { + "epoch": 1.0011711247482082, + "grad_norm": 0.5781676128053397, + "learning_rate": 2.6166872572160846e-06, + "loss": 0.2815, + "step": 21372 + }, + { + "epoch": 1.0012179697381365, + "grad_norm": 0.6363212207314644, + "learning_rate": 2.616497813424663e-06, + "loss": 0.2863, + "step": 21373 + }, + { + "epoch": 1.0012648147280647, + "grad_norm": 0.6162337189608341, + "learning_rate": 2.616308368962826e-06, + "loss": 0.275, + "step": 21374 + }, + { + "epoch": 1.0013116597179932, + "grad_norm": 0.554226637323728, + "learning_rate": 2.6161189238316635e-06, + "loss": 0.2684, + "step": 21375 + }, + { + "epoch": 1.0013585047079214, + "grad_norm": 0.5890759396746714, + "learning_rate": 2.615929478032266e-06, + "loss": 0.2884, + "step": 21376 + }, + { + "epoch": 1.00140534969785, + "grad_norm": 0.5915313128838672, + "learning_rate": 2.6157400315657234e-06, + "loss": 0.2829, + "step": 21377 + }, + { + "epoch": 1.0014521946877781, + "grad_norm": 0.6281918502988475, + "learning_rate": 2.6155505844331264e-06, + "loss": 0.2816, + "step": 21378 + }, + { + "epoch": 1.0014990396777064, + "grad_norm": 0.6171045605629709, + "learning_rate": 2.615361136635564e-06, + "loss": 0.2738, + "step": 21379 + }, + { + "epoch": 1.0015458846676348, + "grad_norm": 0.580393423374886, + "learning_rate": 2.615171688174128e-06, + "loss": 0.2743, + "step": 21380 + }, + { + "epoch": 1.001592729657563, + "grad_norm": 0.5489444640379922, + "learning_rate": 2.6149822390499076e-06, + "loss": 0.2645, + "step": 21381 + }, + { + "epoch": 1.0016395746474915, + "grad_norm": 0.5654956554695513, + "learning_rate": 2.614792789263993e-06, + "loss": 0.2616, + "step": 21382 + }, + { + "epoch": 1.0016864196374198, + "grad_norm": 0.609475185296607, + "learning_rate": 2.614603338817475e-06, + "loss": 0.2758, + "step": 21383 + }, + { + "epoch": 1.001733264627348, + "grad_norm": 0.6247094932810627, + "learning_rate": 2.6144138877114433e-06, + "loss": 0.2789, + "step": 21384 + }, + { + "epoch": 1.0017801096172765, + "grad_norm": 0.5885017359671056, + "learning_rate": 2.6142244359469893e-06, + "loss": 0.2714, + "step": 21385 + }, + { + "epoch": 1.0018269546072047, + "grad_norm": 0.5584660381877136, + "learning_rate": 2.6140349835252018e-06, + "loss": 0.2696, + "step": 21386 + }, + { + "epoch": 1.0018737995971332, + "grad_norm": 0.612944598430084, + "learning_rate": 2.6138455304471716e-06, + "loss": 0.292, + "step": 21387 + }, + { + "epoch": 1.0019206445870614, + "grad_norm": 0.6444917102938598, + "learning_rate": 2.613656076713989e-06, + "loss": 0.2898, + "step": 21388 + }, + { + "epoch": 1.0019674895769897, + "grad_norm": 0.638658770240823, + "learning_rate": 2.613466622326744e-06, + "loss": 0.296, + "step": 21389 + }, + { + "epoch": 1.0020143345669181, + "grad_norm": 0.6021928898170886, + "learning_rate": 2.6132771672865274e-06, + "loss": 0.291, + "step": 21390 + }, + { + "epoch": 1.0020611795568464, + "grad_norm": 0.6063935934336643, + "learning_rate": 2.61308771159443e-06, + "loss": 0.2802, + "step": 21391 + }, + { + "epoch": 1.0021080245467746, + "grad_norm": 0.5705442945942418, + "learning_rate": 2.6128982552515397e-06, + "loss": 0.2732, + "step": 21392 + }, + { + "epoch": 1.002154869536703, + "grad_norm": 0.5738122801510729, + "learning_rate": 2.6127087982589493e-06, + "loss": 0.2716, + "step": 21393 + }, + { + "epoch": 1.0022017145266313, + "grad_norm": 0.5697702433375164, + "learning_rate": 2.6125193406177473e-06, + "loss": 0.2707, + "step": 21394 + }, + { + "epoch": 1.0022485595165598, + "grad_norm": 0.583949688425172, + "learning_rate": 2.6123298823290254e-06, + "loss": 0.2773, + "step": 21395 + }, + { + "epoch": 1.002295404506488, + "grad_norm": 0.5762072264593651, + "learning_rate": 2.6121404233938734e-06, + "loss": 0.2621, + "step": 21396 + }, + { + "epoch": 1.0023422494964163, + "grad_norm": 0.6073203039942336, + "learning_rate": 2.611950963813381e-06, + "loss": 0.2974, + "step": 21397 + }, + { + "epoch": 1.0023890944863447, + "grad_norm": 0.5784534531447744, + "learning_rate": 2.6117615035886395e-06, + "loss": 0.2769, + "step": 21398 + }, + { + "epoch": 1.002435939476273, + "grad_norm": 0.5825719026729944, + "learning_rate": 2.6115720427207388e-06, + "loss": 0.2748, + "step": 21399 + }, + { + "epoch": 1.0024827844662014, + "grad_norm": 0.575690001108276, + "learning_rate": 2.611382581210768e-06, + "loss": 0.2569, + "step": 21400 + }, + { + "epoch": 1.0025296294561297, + "grad_norm": 0.589616860493083, + "learning_rate": 2.611193119059818e-06, + "loss": 0.2879, + "step": 21401 + }, + { + "epoch": 1.002576474446058, + "grad_norm": 0.5723757948213621, + "learning_rate": 2.611003656268982e-06, + "loss": 0.2858, + "step": 21402 + }, + { + "epoch": 1.0026233194359864, + "grad_norm": 0.605695352520867, + "learning_rate": 2.6108141928393456e-06, + "loss": 0.2841, + "step": 21403 + }, + { + "epoch": 1.0026701644259146, + "grad_norm": 0.5871127439310732, + "learning_rate": 2.6106247287720023e-06, + "loss": 0.2639, + "step": 21404 + }, + { + "epoch": 1.002717009415843, + "grad_norm": 0.5506756708855276, + "learning_rate": 2.6104352640680414e-06, + "loss": 0.274, + "step": 21405 + }, + { + "epoch": 1.0027638544057713, + "grad_norm": 0.5812333232331687, + "learning_rate": 2.6102457987285533e-06, + "loss": 0.2904, + "step": 21406 + }, + { + "epoch": 1.0028106993956996, + "grad_norm": 0.6189056641065335, + "learning_rate": 2.610056332754628e-06, + "loss": 0.2816, + "step": 21407 + }, + { + "epoch": 1.002857544385628, + "grad_norm": 0.6067541019718526, + "learning_rate": 2.6098668661473563e-06, + "loss": 0.2859, + "step": 21408 + }, + { + "epoch": 1.0029043893755563, + "grad_norm": 0.580309079045611, + "learning_rate": 2.609677398907829e-06, + "loss": 0.2835, + "step": 21409 + }, + { + "epoch": 1.0029512343654845, + "grad_norm": 0.6216073699692082, + "learning_rate": 2.6094879310371353e-06, + "loss": 0.2888, + "step": 21410 + }, + { + "epoch": 1.002998079355413, + "grad_norm": 0.5830879352184215, + "learning_rate": 2.609298462536366e-06, + "loss": 0.2799, + "step": 21411 + }, + { + "epoch": 1.0030449243453412, + "grad_norm": 0.6076639830122114, + "learning_rate": 2.6091089934066123e-06, + "loss": 0.2988, + "step": 21412 + }, + { + "epoch": 1.0030917693352697, + "grad_norm": 0.5898203582107507, + "learning_rate": 2.6089195236489622e-06, + "loss": 0.2921, + "step": 21413 + }, + { + "epoch": 1.003138614325198, + "grad_norm": 0.5846424033437457, + "learning_rate": 2.6087300532645087e-06, + "loss": 0.2818, + "step": 21414 + }, + { + "epoch": 1.0031854593151261, + "grad_norm": 0.6333824113166386, + "learning_rate": 2.608540582254341e-06, + "loss": 0.293, + "step": 21415 + }, + { + "epoch": 1.0032323043050546, + "grad_norm": 0.5384497783843007, + "learning_rate": 2.6083511106195497e-06, + "loss": 0.2514, + "step": 21416 + }, + { + "epoch": 1.0032791492949829, + "grad_norm": 0.6494738136148223, + "learning_rate": 2.6081616383612247e-06, + "loss": 0.2842, + "step": 21417 + }, + { + "epoch": 1.0033259942849113, + "grad_norm": 0.5678476451266093, + "learning_rate": 2.607972165480456e-06, + "loss": 0.2805, + "step": 21418 + }, + { + "epoch": 1.0033728392748396, + "grad_norm": 0.5578010198019967, + "learning_rate": 2.607782691978336e-06, + "loss": 0.2668, + "step": 21419 + }, + { + "epoch": 1.0034196842647678, + "grad_norm": 0.5786992978242369, + "learning_rate": 2.607593217855953e-06, + "loss": 0.2786, + "step": 21420 + }, + { + "epoch": 1.0034665292546963, + "grad_norm": 0.591157957000169, + "learning_rate": 2.6074037431143977e-06, + "loss": 0.2907, + "step": 21421 + }, + { + "epoch": 1.0035133742446245, + "grad_norm": 0.5900154808635065, + "learning_rate": 2.6072142677547614e-06, + "loss": 0.2737, + "step": 21422 + }, + { + "epoch": 1.003560219234553, + "grad_norm": 0.580419081366838, + "learning_rate": 2.6070247917781345e-06, + "loss": 0.2764, + "step": 21423 + }, + { + "epoch": 1.0036070642244812, + "grad_norm": 0.6162703053662256, + "learning_rate": 2.6068353151856057e-06, + "loss": 0.2815, + "step": 21424 + }, + { + "epoch": 1.0036539092144094, + "grad_norm": 0.58839066331712, + "learning_rate": 2.6066458379782666e-06, + "loss": 0.2723, + "step": 21425 + }, + { + "epoch": 1.003700754204338, + "grad_norm": 0.5760413242902857, + "learning_rate": 2.606456360157208e-06, + "loss": 0.2725, + "step": 21426 + }, + { + "epoch": 1.0037475991942661, + "grad_norm": 0.5938864687877342, + "learning_rate": 2.60626688172352e-06, + "loss": 0.2838, + "step": 21427 + }, + { + "epoch": 1.0037944441841944, + "grad_norm": 0.6366007603759142, + "learning_rate": 2.606077402678292e-06, + "loss": 0.298, + "step": 21428 + }, + { + "epoch": 1.0038412891741229, + "grad_norm": 0.5691569127246459, + "learning_rate": 2.605887923022616e-06, + "loss": 0.2875, + "step": 21429 + }, + { + "epoch": 1.003888134164051, + "grad_norm": 0.6098610320634014, + "learning_rate": 2.6056984427575816e-06, + "loss": 0.2832, + "step": 21430 + }, + { + "epoch": 1.0039349791539796, + "grad_norm": 0.5871346153279215, + "learning_rate": 2.6055089618842783e-06, + "loss": 0.2831, + "step": 21431 + }, + { + "epoch": 1.0039818241439078, + "grad_norm": 0.5842957251118422, + "learning_rate": 2.6053194804037986e-06, + "loss": 0.293, + "step": 21432 + }, + { + "epoch": 1.004028669133836, + "grad_norm": 0.6308502723976379, + "learning_rate": 2.6051299983172312e-06, + "loss": 0.2852, + "step": 21433 + }, + { + "epoch": 1.0040755141237645, + "grad_norm": 0.5981065979180445, + "learning_rate": 2.6049405156256668e-06, + "loss": 0.2802, + "step": 21434 + }, + { + "epoch": 1.0041223591136927, + "grad_norm": 0.5999552721144881, + "learning_rate": 2.6047510323301966e-06, + "loss": 0.2841, + "step": 21435 + }, + { + "epoch": 1.0041692041036212, + "grad_norm": 0.5567591981806329, + "learning_rate": 2.6045615484319096e-06, + "loss": 0.2622, + "step": 21436 + }, + { + "epoch": 1.0042160490935494, + "grad_norm": 0.5718803037668873, + "learning_rate": 2.604372063931898e-06, + "loss": 0.2876, + "step": 21437 + }, + { + "epoch": 1.0042628940834777, + "grad_norm": 0.6417719842063629, + "learning_rate": 2.604182578831252e-06, + "loss": 0.2639, + "step": 21438 + }, + { + "epoch": 1.0043097390734061, + "grad_norm": 0.6054897941397254, + "learning_rate": 2.6039930931310604e-06, + "loss": 0.2671, + "step": 21439 + }, + { + "epoch": 1.0043565840633344, + "grad_norm": 0.5500629165717439, + "learning_rate": 2.603803606832415e-06, + "loss": 0.2644, + "step": 21440 + }, + { + "epoch": 1.0044034290532629, + "grad_norm": 0.554167661335181, + "learning_rate": 2.6036141199364064e-06, + "loss": 0.2807, + "step": 21441 + }, + { + "epoch": 1.004450274043191, + "grad_norm": 0.5915527659481743, + "learning_rate": 2.603424632444124e-06, + "loss": 0.2797, + "step": 21442 + }, + { + "epoch": 1.0044971190331193, + "grad_norm": 0.550807902821988, + "learning_rate": 2.6032351443566584e-06, + "loss": 0.2621, + "step": 21443 + }, + { + "epoch": 1.0045439640230478, + "grad_norm": 0.5781126009101997, + "learning_rate": 2.603045655675102e-06, + "loss": 0.2799, + "step": 21444 + }, + { + "epoch": 1.004590809012976, + "grad_norm": 0.5750630801517375, + "learning_rate": 2.6028561664005425e-06, + "loss": 0.2684, + "step": 21445 + }, + { + "epoch": 1.0046376540029043, + "grad_norm": 0.6505500273191692, + "learning_rate": 2.6026666765340715e-06, + "loss": 0.2875, + "step": 21446 + }, + { + "epoch": 1.0046844989928327, + "grad_norm": 0.5909704528999197, + "learning_rate": 2.60247718607678e-06, + "loss": 0.2753, + "step": 21447 + }, + { + "epoch": 1.004731343982761, + "grad_norm": 0.603126573152084, + "learning_rate": 2.602287695029759e-06, + "loss": 0.2842, + "step": 21448 + }, + { + "epoch": 1.0047781889726894, + "grad_norm": 0.6073147691624754, + "learning_rate": 2.602098203394096e-06, + "loss": 0.2966, + "step": 21449 + }, + { + "epoch": 1.0048250339626177, + "grad_norm": 0.5496458088509357, + "learning_rate": 2.601908711170885e-06, + "loss": 0.2597, + "step": 21450 + }, + { + "epoch": 1.004871878952546, + "grad_norm": 0.5710188017963577, + "learning_rate": 2.601719218361215e-06, + "loss": 0.2852, + "step": 21451 + }, + { + "epoch": 1.0049187239424744, + "grad_norm": 0.5993721522448535, + "learning_rate": 2.601529724966176e-06, + "loss": 0.2488, + "step": 21452 + }, + { + "epoch": 1.0049655689324026, + "grad_norm": 0.5970863115442636, + "learning_rate": 2.6013402309868586e-06, + "loss": 0.2639, + "step": 21453 + }, + { + "epoch": 1.005012413922331, + "grad_norm": 0.6191269758618422, + "learning_rate": 2.6011507364243544e-06, + "loss": 0.2804, + "step": 21454 + }, + { + "epoch": 1.0050592589122593, + "grad_norm": 0.6061141095889278, + "learning_rate": 2.6009612412797526e-06, + "loss": 0.2942, + "step": 21455 + }, + { + "epoch": 1.0051061039021876, + "grad_norm": 0.6174344531803915, + "learning_rate": 2.6007717455541442e-06, + "loss": 0.2849, + "step": 21456 + }, + { + "epoch": 1.005152948892116, + "grad_norm": 0.570935397784869, + "learning_rate": 2.60058224924862e-06, + "loss": 0.2757, + "step": 21457 + }, + { + "epoch": 1.0051997938820443, + "grad_norm": 0.5916410568832178, + "learning_rate": 2.6003927523642704e-06, + "loss": 0.2844, + "step": 21458 + }, + { + "epoch": 1.0052466388719727, + "grad_norm": 0.5698155882057766, + "learning_rate": 2.600203254902185e-06, + "loss": 0.2829, + "step": 21459 + }, + { + "epoch": 1.005293483861901, + "grad_norm": 0.5916554648618924, + "learning_rate": 2.600013756863456e-06, + "loss": 0.2798, + "step": 21460 + }, + { + "epoch": 1.0053403288518292, + "grad_norm": 0.553507677834582, + "learning_rate": 2.5998242582491727e-06, + "loss": 0.2694, + "step": 21461 + }, + { + "epoch": 1.0053871738417577, + "grad_norm": 0.5777071908712633, + "learning_rate": 2.5996347590604253e-06, + "loss": 0.27, + "step": 21462 + }, + { + "epoch": 1.005434018831686, + "grad_norm": 0.5777704171859686, + "learning_rate": 2.5994452592983055e-06, + "loss": 0.2657, + "step": 21463 + }, + { + "epoch": 1.0054808638216142, + "grad_norm": 0.5400837702306492, + "learning_rate": 2.599255758963903e-06, + "loss": 0.2766, + "step": 21464 + }, + { + "epoch": 1.0055277088115426, + "grad_norm": 0.5636400240790542, + "learning_rate": 2.5990662580583085e-06, + "loss": 0.2764, + "step": 21465 + }, + { + "epoch": 1.0055745538014709, + "grad_norm": 0.5986224312336009, + "learning_rate": 2.5988767565826127e-06, + "loss": 0.28, + "step": 21466 + }, + { + "epoch": 1.0056213987913993, + "grad_norm": 0.5641099527498186, + "learning_rate": 2.598687254537906e-06, + "loss": 0.2825, + "step": 21467 + }, + { + "epoch": 1.0056682437813276, + "grad_norm": 0.5923517659431384, + "learning_rate": 2.598497751925279e-06, + "loss": 0.2495, + "step": 21468 + }, + { + "epoch": 1.0057150887712558, + "grad_norm": 0.5734931663637441, + "learning_rate": 2.5983082487458227e-06, + "loss": 0.275, + "step": 21469 + }, + { + "epoch": 1.0057619337611843, + "grad_norm": 0.5934667581046824, + "learning_rate": 2.5981187450006262e-06, + "loss": 0.2753, + "step": 21470 + }, + { + "epoch": 1.0058087787511125, + "grad_norm": 0.5634181076039196, + "learning_rate": 2.5979292406907807e-06, + "loss": 0.275, + "step": 21471 + }, + { + "epoch": 1.005855623741041, + "grad_norm": 0.5938274006623083, + "learning_rate": 2.597739735817379e-06, + "loss": 0.2891, + "step": 21472 + }, + { + "epoch": 1.0059024687309692, + "grad_norm": 0.5751178042640662, + "learning_rate": 2.5975502303815075e-06, + "loss": 0.2632, + "step": 21473 + }, + { + "epoch": 1.0059493137208975, + "grad_norm": 0.5532023665786715, + "learning_rate": 2.5973607243842596e-06, + "loss": 0.2586, + "step": 21474 + }, + { + "epoch": 1.005996158710826, + "grad_norm": 0.5814381381471735, + "learning_rate": 2.597171217826726e-06, + "loss": 0.2785, + "step": 21475 + }, + { + "epoch": 1.0060430037007542, + "grad_norm": 0.5670507843890359, + "learning_rate": 2.596981710709996e-06, + "loss": 0.2728, + "step": 21476 + }, + { + "epoch": 1.0060898486906826, + "grad_norm": 0.6051742447280076, + "learning_rate": 2.59679220303516e-06, + "loss": 0.2857, + "step": 21477 + }, + { + "epoch": 1.0061366936806109, + "grad_norm": 0.5995862266560684, + "learning_rate": 2.59660269480331e-06, + "loss": 0.2846, + "step": 21478 + }, + { + "epoch": 1.0061835386705391, + "grad_norm": 0.5608173316171454, + "learning_rate": 2.5964131860155354e-06, + "loss": 0.2743, + "step": 21479 + }, + { + "epoch": 1.0062303836604676, + "grad_norm": 0.5708419961014086, + "learning_rate": 2.5962236766729276e-06, + "loss": 0.2585, + "step": 21480 + }, + { + "epoch": 1.0062772286503958, + "grad_norm": 0.6311582155446115, + "learning_rate": 2.5960341667765766e-06, + "loss": 0.2969, + "step": 21481 + }, + { + "epoch": 1.006324073640324, + "grad_norm": 0.6419563608413341, + "learning_rate": 2.595844656327573e-06, + "loss": 0.2868, + "step": 21482 + }, + { + "epoch": 1.0063709186302525, + "grad_norm": 0.5759892311312336, + "learning_rate": 2.5956551453270066e-06, + "loss": 0.2599, + "step": 21483 + }, + { + "epoch": 1.0064177636201808, + "grad_norm": 0.5879242616144386, + "learning_rate": 2.5954656337759704e-06, + "loss": 0.2617, + "step": 21484 + }, + { + "epoch": 1.0064646086101092, + "grad_norm": 0.6054422610360221, + "learning_rate": 2.5952761216755524e-06, + "loss": 0.277, + "step": 21485 + }, + { + "epoch": 1.0065114536000375, + "grad_norm": 0.582636651651883, + "learning_rate": 2.5950866090268446e-06, + "loss": 0.2668, + "step": 21486 + }, + { + "epoch": 1.0065582985899657, + "grad_norm": 0.5481302297981152, + "learning_rate": 2.5948970958309373e-06, + "loss": 0.263, + "step": 21487 + }, + { + "epoch": 1.0066051435798942, + "grad_norm": 0.5861444797800168, + "learning_rate": 2.5947075820889208e-06, + "loss": 0.2775, + "step": 21488 + }, + { + "epoch": 1.0066519885698224, + "grad_norm": 0.5941741211801688, + "learning_rate": 2.594518067801887e-06, + "loss": 0.2801, + "step": 21489 + }, + { + "epoch": 1.0066988335597509, + "grad_norm": 0.5646469302714154, + "learning_rate": 2.5943285529709243e-06, + "loss": 0.2667, + "step": 21490 + }, + { + "epoch": 1.0067456785496791, + "grad_norm": 0.6024731077392902, + "learning_rate": 2.5941390375971247e-06, + "loss": 0.2589, + "step": 21491 + }, + { + "epoch": 1.0067925235396074, + "grad_norm": 0.615909762157642, + "learning_rate": 2.5939495216815784e-06, + "loss": 0.2796, + "step": 21492 + }, + { + "epoch": 1.0068393685295358, + "grad_norm": 0.6090296395945998, + "learning_rate": 2.5937600052253774e-06, + "loss": 0.2781, + "step": 21493 + }, + { + "epoch": 1.006886213519464, + "grad_norm": 0.5446008836214866, + "learning_rate": 2.5935704882296096e-06, + "loss": 0.2555, + "step": 21494 + }, + { + "epoch": 1.0069330585093925, + "grad_norm": 0.6180632598722665, + "learning_rate": 2.5933809706953677e-06, + "loss": 0.2878, + "step": 21495 + }, + { + "epoch": 1.0069799034993208, + "grad_norm": 0.5385363643059204, + "learning_rate": 2.5931914526237423e-06, + "loss": 0.2612, + "step": 21496 + }, + { + "epoch": 1.007026748489249, + "grad_norm": 0.5954476564405571, + "learning_rate": 2.5930019340158234e-06, + "loss": 0.2815, + "step": 21497 + }, + { + "epoch": 1.0070735934791775, + "grad_norm": 0.5697450703440975, + "learning_rate": 2.5928124148727013e-06, + "loss": 0.288, + "step": 21498 + }, + { + "epoch": 1.0071204384691057, + "grad_norm": 0.5821671998254192, + "learning_rate": 2.5926228951954674e-06, + "loss": 0.2675, + "step": 21499 + }, + { + "epoch": 1.007167283459034, + "grad_norm": 0.6235944643789101, + "learning_rate": 2.592433374985212e-06, + "loss": 0.2975, + "step": 21500 + }, + { + "epoch": 1.0072141284489624, + "grad_norm": 0.5713401880992023, + "learning_rate": 2.5922438542430256e-06, + "loss": 0.283, + "step": 21501 + }, + { + "epoch": 1.0072609734388906, + "grad_norm": 0.6487494870513466, + "learning_rate": 2.592054332969999e-06, + "loss": 0.2807, + "step": 21502 + }, + { + "epoch": 1.0073078184288191, + "grad_norm": 0.6423663741693927, + "learning_rate": 2.5918648111672234e-06, + "loss": 0.2888, + "step": 21503 + }, + { + "epoch": 1.0073546634187474, + "grad_norm": 0.5784184295933986, + "learning_rate": 2.5916752888357876e-06, + "loss": 0.2521, + "step": 21504 + }, + { + "epoch": 1.0074015084086756, + "grad_norm": 0.5877894180549523, + "learning_rate": 2.591485765976785e-06, + "loss": 0.2643, + "step": 21505 + }, + { + "epoch": 1.007448353398604, + "grad_norm": 0.546795214034399, + "learning_rate": 2.5912962425913033e-06, + "loss": 0.2785, + "step": 21506 + }, + { + "epoch": 1.0074951983885323, + "grad_norm": 0.5982724328555229, + "learning_rate": 2.591106718680436e-06, + "loss": 0.2887, + "step": 21507 + }, + { + "epoch": 1.0075420433784608, + "grad_norm": 0.5595245356330213, + "learning_rate": 2.5909171942452727e-06, + "loss": 0.2602, + "step": 21508 + }, + { + "epoch": 1.007588888368389, + "grad_norm": 0.5677393122082051, + "learning_rate": 2.5907276692869027e-06, + "loss": 0.2721, + "step": 21509 + }, + { + "epoch": 1.0076357333583172, + "grad_norm": 0.6307518418781827, + "learning_rate": 2.590538143806418e-06, + "loss": 0.2732, + "step": 21510 + }, + { + "epoch": 1.0076825783482457, + "grad_norm": 0.6107078502516854, + "learning_rate": 2.5903486178049092e-06, + "loss": 0.2953, + "step": 21511 + }, + { + "epoch": 1.007729423338174, + "grad_norm": 0.5682235319563682, + "learning_rate": 2.590159091283467e-06, + "loss": 0.2664, + "step": 21512 + }, + { + "epoch": 1.0077762683281024, + "grad_norm": 0.6073738681463896, + "learning_rate": 2.589969564243181e-06, + "loss": 0.2871, + "step": 21513 + }, + { + "epoch": 1.0078231133180307, + "grad_norm": 0.5701684278288811, + "learning_rate": 2.589780036685144e-06, + "loss": 0.2758, + "step": 21514 + }, + { + "epoch": 1.007869958307959, + "grad_norm": 0.5757123969077393, + "learning_rate": 2.589590508610445e-06, + "loss": 0.2684, + "step": 21515 + }, + { + "epoch": 1.0079168032978874, + "grad_norm": 0.60430886539235, + "learning_rate": 2.5894009800201745e-06, + "loss": 0.2855, + "step": 21516 + }, + { + "epoch": 1.0079636482878156, + "grad_norm": 0.5437358018834882, + "learning_rate": 2.589211450915425e-06, + "loss": 0.2667, + "step": 21517 + }, + { + "epoch": 1.0080104932777438, + "grad_norm": 0.5905541504842265, + "learning_rate": 2.5890219212972856e-06, + "loss": 0.2743, + "step": 21518 + }, + { + "epoch": 1.0080573382676723, + "grad_norm": 0.6017004031325847, + "learning_rate": 2.588832391166847e-06, + "loss": 0.2828, + "step": 21519 + }, + { + "epoch": 1.0081041832576005, + "grad_norm": 0.5723174376450602, + "learning_rate": 2.588642860525201e-06, + "loss": 0.2793, + "step": 21520 + }, + { + "epoch": 1.008151028247529, + "grad_norm": 0.5700429464785224, + "learning_rate": 2.5884533293734372e-06, + "loss": 0.2719, + "step": 21521 + }, + { + "epoch": 1.0081978732374572, + "grad_norm": 0.5490990791096536, + "learning_rate": 2.5882637977126462e-06, + "loss": 0.2646, + "step": 21522 + }, + { + "epoch": 1.0082447182273855, + "grad_norm": 0.6071188373097215, + "learning_rate": 2.5880742655439196e-06, + "loss": 0.3033, + "step": 21523 + }, + { + "epoch": 1.008291563217314, + "grad_norm": 0.603050310761784, + "learning_rate": 2.587884732868348e-06, + "loss": 0.2839, + "step": 21524 + }, + { + "epoch": 1.0083384082072422, + "grad_norm": 0.5720579928021144, + "learning_rate": 2.5876951996870215e-06, + "loss": 0.28, + "step": 21525 + }, + { + "epoch": 1.0083852531971707, + "grad_norm": 0.537286442881661, + "learning_rate": 2.587505666001031e-06, + "loss": 0.2744, + "step": 21526 + }, + { + "epoch": 1.008432098187099, + "grad_norm": 0.6192289802995181, + "learning_rate": 2.587316131811468e-06, + "loss": 0.2794, + "step": 21527 + }, + { + "epoch": 1.0084789431770271, + "grad_norm": 0.5998495221489198, + "learning_rate": 2.5871265971194227e-06, + "loss": 0.2913, + "step": 21528 + }, + { + "epoch": 1.0085257881669556, + "grad_norm": 0.654339887110915, + "learning_rate": 2.5869370619259847e-06, + "loss": 0.2927, + "step": 21529 + }, + { + "epoch": 1.0085726331568838, + "grad_norm": 0.5692729462849858, + "learning_rate": 2.586747526232247e-06, + "loss": 0.2667, + "step": 21530 + }, + { + "epoch": 1.0086194781468123, + "grad_norm": 0.5763085419457868, + "learning_rate": 2.586557990039299e-06, + "loss": 0.3002, + "step": 21531 + }, + { + "epoch": 1.0086663231367405, + "grad_norm": 0.5329172727434601, + "learning_rate": 2.5863684533482306e-06, + "loss": 0.2636, + "step": 21532 + }, + { + "epoch": 1.0087131681266688, + "grad_norm": 0.5995125781696751, + "learning_rate": 2.586178916160134e-06, + "loss": 0.286, + "step": 21533 + }, + { + "epoch": 1.0087600131165972, + "grad_norm": 0.5535075599929422, + "learning_rate": 2.585989378476099e-06, + "loss": 0.2641, + "step": 21534 + }, + { + "epoch": 1.0088068581065255, + "grad_norm": 0.6187124965215689, + "learning_rate": 2.5857998402972172e-06, + "loss": 0.3061, + "step": 21535 + }, + { + "epoch": 1.0088537030964537, + "grad_norm": 0.5631585998219092, + "learning_rate": 2.585610301624579e-06, + "loss": 0.2708, + "step": 21536 + }, + { + "epoch": 1.0089005480863822, + "grad_norm": 0.5916523913157558, + "learning_rate": 2.585420762459275e-06, + "loss": 0.2718, + "step": 21537 + }, + { + "epoch": 1.0089473930763104, + "grad_norm": 0.5943879921778011, + "learning_rate": 2.585231222802396e-06, + "loss": 0.2879, + "step": 21538 + }, + { + "epoch": 1.008994238066239, + "grad_norm": 0.6061696785200017, + "learning_rate": 2.585041682655032e-06, + "loss": 0.2641, + "step": 21539 + }, + { + "epoch": 1.0090410830561671, + "grad_norm": 0.5522905508472515, + "learning_rate": 2.584852142018275e-06, + "loss": 0.2788, + "step": 21540 + }, + { + "epoch": 1.0090879280460954, + "grad_norm": 0.6221863093344113, + "learning_rate": 2.5846626008932147e-06, + "loss": 0.284, + "step": 21541 + }, + { + "epoch": 1.0091347730360238, + "grad_norm": 0.6422300214045638, + "learning_rate": 2.584473059280944e-06, + "loss": 0.2879, + "step": 21542 + }, + { + "epoch": 1.009181618025952, + "grad_norm": 0.6102570317998017, + "learning_rate": 2.5842835171825502e-06, + "loss": 0.2874, + "step": 21543 + }, + { + "epoch": 1.0092284630158805, + "grad_norm": 0.5632450472298144, + "learning_rate": 2.5840939745991266e-06, + "loss": 0.2747, + "step": 21544 + }, + { + "epoch": 1.0092753080058088, + "grad_norm": 0.5696671151114231, + "learning_rate": 2.583904431531764e-06, + "loss": 0.2681, + "step": 21545 + }, + { + "epoch": 1.009322152995737, + "grad_norm": 0.6022980881852236, + "learning_rate": 2.583714887981552e-06, + "loss": 0.281, + "step": 21546 + }, + { + "epoch": 1.0093689979856655, + "grad_norm": 0.5959589243024137, + "learning_rate": 2.5835253439495816e-06, + "loss": 0.2639, + "step": 21547 + }, + { + "epoch": 1.0094158429755937, + "grad_norm": 0.6021802673811641, + "learning_rate": 2.583335799436944e-06, + "loss": 0.28, + "step": 21548 + }, + { + "epoch": 1.0094626879655222, + "grad_norm": 0.573254164116356, + "learning_rate": 2.5831462544447306e-06, + "loss": 0.2709, + "step": 21549 + }, + { + "epoch": 1.0095095329554504, + "grad_norm": 0.6052384453109538, + "learning_rate": 2.5829567089740305e-06, + "loss": 0.2772, + "step": 21550 + }, + { + "epoch": 1.0095563779453787, + "grad_norm": 0.5701083962900574, + "learning_rate": 2.5827671630259354e-06, + "loss": 0.2708, + "step": 21551 + }, + { + "epoch": 1.0096032229353071, + "grad_norm": 0.5748570759209107, + "learning_rate": 2.5825776166015363e-06, + "loss": 0.28, + "step": 21552 + }, + { + "epoch": 1.0096500679252354, + "grad_norm": 0.5766918665917555, + "learning_rate": 2.5823880697019238e-06, + "loss": 0.2748, + "step": 21553 + }, + { + "epoch": 1.0096969129151636, + "grad_norm": 0.5738192546943963, + "learning_rate": 2.582198522328189e-06, + "loss": 0.2677, + "step": 21554 + }, + { + "epoch": 1.009743757905092, + "grad_norm": 0.6613941149970992, + "learning_rate": 2.5820089744814215e-06, + "loss": 0.3016, + "step": 21555 + }, + { + "epoch": 1.0097906028950203, + "grad_norm": 0.6124518128294726, + "learning_rate": 2.5818194261627133e-06, + "loss": 0.2941, + "step": 21556 + }, + { + "epoch": 1.0098374478849488, + "grad_norm": 0.6021160112027285, + "learning_rate": 2.581629877373155e-06, + "loss": 0.2756, + "step": 21557 + }, + { + "epoch": 1.009884292874877, + "grad_norm": 0.6269104895579765, + "learning_rate": 2.581440328113837e-06, + "loss": 0.2759, + "step": 21558 + }, + { + "epoch": 1.0099311378648053, + "grad_norm": 0.5889431747764353, + "learning_rate": 2.5812507783858507e-06, + "loss": 0.2704, + "step": 21559 + }, + { + "epoch": 1.0099779828547337, + "grad_norm": 0.5905630718247242, + "learning_rate": 2.581061228190287e-06, + "loss": 0.2815, + "step": 21560 + }, + { + "epoch": 1.010024827844662, + "grad_norm": 0.601792734950296, + "learning_rate": 2.5808716775282354e-06, + "loss": 0.2962, + "step": 21561 + }, + { + "epoch": 1.0100716728345904, + "grad_norm": 0.608755389883279, + "learning_rate": 2.580682126400788e-06, + "loss": 0.293, + "step": 21562 + }, + { + "epoch": 1.0101185178245187, + "grad_norm": 0.6337054868036596, + "learning_rate": 2.580492574809036e-06, + "loss": 0.2867, + "step": 21563 + }, + { + "epoch": 1.010165362814447, + "grad_norm": 0.5433089655111073, + "learning_rate": 2.5803030227540684e-06, + "loss": 0.278, + "step": 21564 + }, + { + "epoch": 1.0102122078043754, + "grad_norm": 0.5954750937784717, + "learning_rate": 2.5801134702369774e-06, + "loss": 0.2863, + "step": 21565 + }, + { + "epoch": 1.0102590527943036, + "grad_norm": 0.5152682237318426, + "learning_rate": 2.5799239172588536e-06, + "loss": 0.2475, + "step": 21566 + }, + { + "epoch": 1.010305897784232, + "grad_norm": 0.5600168861109737, + "learning_rate": 2.579734363820788e-06, + "loss": 0.2682, + "step": 21567 + }, + { + "epoch": 1.0103527427741603, + "grad_norm": 0.6232078364952713, + "learning_rate": 2.5795448099238705e-06, + "loss": 0.263, + "step": 21568 + }, + { + "epoch": 1.0103995877640886, + "grad_norm": 0.5954607703623643, + "learning_rate": 2.5793552555691933e-06, + "loss": 0.2923, + "step": 21569 + }, + { + "epoch": 1.010446432754017, + "grad_norm": 0.5651349036310547, + "learning_rate": 2.579165700757846e-06, + "loss": 0.2581, + "step": 21570 + }, + { + "epoch": 1.0104932777439453, + "grad_norm": 0.5889591639553686, + "learning_rate": 2.57897614549092e-06, + "loss": 0.2665, + "step": 21571 + }, + { + "epoch": 1.0105401227338735, + "grad_norm": 0.6548733717864943, + "learning_rate": 2.5787865897695067e-06, + "loss": 0.2831, + "step": 21572 + }, + { + "epoch": 1.010586967723802, + "grad_norm": 0.6540144158478717, + "learning_rate": 2.5785970335946962e-06, + "loss": 0.2856, + "step": 21573 + }, + { + "epoch": 1.0106338127137302, + "grad_norm": 0.6797206730865459, + "learning_rate": 2.5784074769675795e-06, + "loss": 0.2799, + "step": 21574 + }, + { + "epoch": 1.0106806577036587, + "grad_norm": 0.5547116911284428, + "learning_rate": 2.578217919889247e-06, + "loss": 0.265, + "step": 21575 + }, + { + "epoch": 1.010727502693587, + "grad_norm": 0.6216956459880175, + "learning_rate": 2.5780283623607906e-06, + "loss": 0.2867, + "step": 21576 + }, + { + "epoch": 1.0107743476835152, + "grad_norm": 0.5673391191559252, + "learning_rate": 2.5778388043833e-06, + "loss": 0.277, + "step": 21577 + }, + { + "epoch": 1.0108211926734436, + "grad_norm": 0.5858058735274785, + "learning_rate": 2.577649245957868e-06, + "loss": 0.257, + "step": 21578 + }, + { + "epoch": 1.0108680376633719, + "grad_norm": 0.6160783496934136, + "learning_rate": 2.5774596870855824e-06, + "loss": 0.278, + "step": 21579 + }, + { + "epoch": 1.0109148826533003, + "grad_norm": 0.5905505629111903, + "learning_rate": 2.5772701277675364e-06, + "loss": 0.2869, + "step": 21580 + }, + { + "epoch": 1.0109617276432286, + "grad_norm": 0.6012177614541873, + "learning_rate": 2.5770805680048206e-06, + "loss": 0.2598, + "step": 21581 + }, + { + "epoch": 1.0110085726331568, + "grad_norm": 0.60271656624072, + "learning_rate": 2.5768910077985254e-06, + "loss": 0.2826, + "step": 21582 + }, + { + "epoch": 1.0110554176230853, + "grad_norm": 0.5982905783401983, + "learning_rate": 2.5767014471497407e-06, + "loss": 0.2821, + "step": 21583 + }, + { + "epoch": 1.0111022626130135, + "grad_norm": 0.5967519010621534, + "learning_rate": 2.57651188605956e-06, + "loss": 0.2754, + "step": 21584 + }, + { + "epoch": 1.011149107602942, + "grad_norm": 0.5726098911267379, + "learning_rate": 2.576322324529072e-06, + "loss": 0.2821, + "step": 21585 + }, + { + "epoch": 1.0111959525928702, + "grad_norm": 0.6171353776387122, + "learning_rate": 2.576132762559368e-06, + "loss": 0.2859, + "step": 21586 + }, + { + "epoch": 1.0112427975827984, + "grad_norm": 0.5634858589334082, + "learning_rate": 2.5759432001515395e-06, + "loss": 0.2587, + "step": 21587 + }, + { + "epoch": 1.011289642572727, + "grad_norm": 0.5562023920920972, + "learning_rate": 2.575753637306677e-06, + "loss": 0.2639, + "step": 21588 + }, + { + "epoch": 1.0113364875626552, + "grad_norm": 0.5739190808858095, + "learning_rate": 2.575564074025871e-06, + "loss": 0.2707, + "step": 21589 + }, + { + "epoch": 1.0113833325525834, + "grad_norm": 0.5702667691555203, + "learning_rate": 2.5753745103102127e-06, + "loss": 0.2776, + "step": 21590 + }, + { + "epoch": 1.0114301775425119, + "grad_norm": 0.5509759283183285, + "learning_rate": 2.5751849461607937e-06, + "loss": 0.2711, + "step": 21591 + }, + { + "epoch": 1.01147702253244, + "grad_norm": 0.5893988191732122, + "learning_rate": 2.574995381578703e-06, + "loss": 0.2796, + "step": 21592 + }, + { + "epoch": 1.0115238675223686, + "grad_norm": 0.6430481260715255, + "learning_rate": 2.574805816565033e-06, + "loss": 0.282, + "step": 21593 + }, + { + "epoch": 1.0115707125122968, + "grad_norm": 0.6042958693583571, + "learning_rate": 2.574616251120876e-06, + "loss": 0.2766, + "step": 21594 + }, + { + "epoch": 1.011617557502225, + "grad_norm": 0.5797317656312763, + "learning_rate": 2.5744266852473192e-06, + "loss": 0.2647, + "step": 21595 + }, + { + "epoch": 1.0116644024921535, + "grad_norm": 0.6351800898252445, + "learning_rate": 2.574237118945456e-06, + "loss": 0.2972, + "step": 21596 + }, + { + "epoch": 1.0117112474820817, + "grad_norm": 0.5851310814575389, + "learning_rate": 2.5740475522163775e-06, + "loss": 0.2644, + "step": 21597 + }, + { + "epoch": 1.0117580924720102, + "grad_norm": 0.576821323927488, + "learning_rate": 2.573857985061174e-06, + "loss": 0.273, + "step": 21598 + }, + { + "epoch": 1.0118049374619384, + "grad_norm": 0.5979376902331479, + "learning_rate": 2.5736684174809357e-06, + "loss": 0.281, + "step": 21599 + }, + { + "epoch": 1.0118517824518667, + "grad_norm": 0.5926791531950689, + "learning_rate": 2.573478849476755e-06, + "loss": 0.2976, + "step": 21600 + }, + { + "epoch": 1.0118986274417952, + "grad_norm": 0.6237656988645676, + "learning_rate": 2.5732892810497212e-06, + "loss": 0.2749, + "step": 21601 + }, + { + "epoch": 1.0119454724317234, + "grad_norm": 0.5300564163171011, + "learning_rate": 2.573099712200926e-06, + "loss": 0.2606, + "step": 21602 + }, + { + "epoch": 1.0119923174216519, + "grad_norm": 0.5831435572527719, + "learning_rate": 2.5729101429314607e-06, + "loss": 0.2667, + "step": 21603 + }, + { + "epoch": 1.01203916241158, + "grad_norm": 0.5954874122325464, + "learning_rate": 2.5727205732424153e-06, + "loss": 0.2775, + "step": 21604 + }, + { + "epoch": 1.0120860074015083, + "grad_norm": 0.5997294855337162, + "learning_rate": 2.5725310031348822e-06, + "loss": 0.2812, + "step": 21605 + }, + { + "epoch": 1.0121328523914368, + "grad_norm": 0.5939342094333955, + "learning_rate": 2.5723414326099513e-06, + "loss": 0.2721, + "step": 21606 + }, + { + "epoch": 1.012179697381365, + "grad_norm": 0.6034034702727773, + "learning_rate": 2.572151861668713e-06, + "loss": 0.2908, + "step": 21607 + }, + { + "epoch": 1.0122265423712933, + "grad_norm": 0.5610502124553578, + "learning_rate": 2.5719622903122594e-06, + "loss": 0.263, + "step": 21608 + }, + { + "epoch": 1.0122733873612217, + "grad_norm": 0.6433600203238965, + "learning_rate": 2.5717727185416806e-06, + "loss": 0.2776, + "step": 21609 + }, + { + "epoch": 1.01232023235115, + "grad_norm": 0.6043030251110585, + "learning_rate": 2.5715831463580676e-06, + "loss": 0.2769, + "step": 21610 + }, + { + "epoch": 1.0123670773410784, + "grad_norm": 0.5996308935717216, + "learning_rate": 2.5713935737625113e-06, + "loss": 0.2801, + "step": 21611 + }, + { + "epoch": 1.0124139223310067, + "grad_norm": 0.6046442622263475, + "learning_rate": 2.5712040007561045e-06, + "loss": 0.2877, + "step": 21612 + }, + { + "epoch": 1.012460767320935, + "grad_norm": 0.5463843726217708, + "learning_rate": 2.571014427339935e-06, + "loss": 0.2714, + "step": 21613 + }, + { + "epoch": 1.0125076123108634, + "grad_norm": 0.5449954945871178, + "learning_rate": 2.570824853515096e-06, + "loss": 0.2714, + "step": 21614 + }, + { + "epoch": 1.0125544573007916, + "grad_norm": 0.6113871057036716, + "learning_rate": 2.570635279282678e-06, + "loss": 0.2913, + "step": 21615 + }, + { + "epoch": 1.01260130229072, + "grad_norm": 0.6341390862009204, + "learning_rate": 2.5704457046437715e-06, + "loss": 0.2912, + "step": 21616 + }, + { + "epoch": 1.0126481472806483, + "grad_norm": 0.5906093302882303, + "learning_rate": 2.570256129599467e-06, + "loss": 0.2672, + "step": 21617 + }, + { + "epoch": 1.0126949922705766, + "grad_norm": 0.5724635968949408, + "learning_rate": 2.5700665541508568e-06, + "loss": 0.2708, + "step": 21618 + }, + { + "epoch": 1.012741837260505, + "grad_norm": 0.5715145810274338, + "learning_rate": 2.5698769782990313e-06, + "loss": 0.2924, + "step": 21619 + }, + { + "epoch": 1.0127886822504333, + "grad_norm": 0.599726737490771, + "learning_rate": 2.5696874020450808e-06, + "loss": 0.2768, + "step": 21620 + }, + { + "epoch": 1.0128355272403617, + "grad_norm": 0.6019210015646107, + "learning_rate": 2.569497825390098e-06, + "loss": 0.2904, + "step": 21621 + }, + { + "epoch": 1.01288237223029, + "grad_norm": 0.6000219251641167, + "learning_rate": 2.5693082483351718e-06, + "loss": 0.2907, + "step": 21622 + }, + { + "epoch": 1.0129292172202182, + "grad_norm": 0.6062257354733186, + "learning_rate": 2.569118670881393e-06, + "loss": 0.2888, + "step": 21623 + }, + { + "epoch": 1.0129760622101467, + "grad_norm": 0.5558456868415828, + "learning_rate": 2.5689290930298554e-06, + "loss": 0.27, + "step": 21624 + }, + { + "epoch": 1.013022907200075, + "grad_norm": 0.7045334179220958, + "learning_rate": 2.5687395147816467e-06, + "loss": 0.2958, + "step": 21625 + }, + { + "epoch": 1.0130697521900032, + "grad_norm": 0.5612340224265813, + "learning_rate": 2.568549936137861e-06, + "loss": 0.2611, + "step": 21626 + }, + { + "epoch": 1.0131165971799316, + "grad_norm": 0.5584453092805928, + "learning_rate": 2.568360357099587e-06, + "loss": 0.2594, + "step": 21627 + }, + { + "epoch": 1.0131634421698599, + "grad_norm": 0.6340958831492545, + "learning_rate": 2.5681707776679156e-06, + "loss": 0.2916, + "step": 21628 + }, + { + "epoch": 1.0132102871597883, + "grad_norm": 0.6046843313181076, + "learning_rate": 2.567981197843939e-06, + "loss": 0.292, + "step": 21629 + }, + { + "epoch": 1.0132571321497166, + "grad_norm": 0.62416465956352, + "learning_rate": 2.567791617628748e-06, + "loss": 0.2934, + "step": 21630 + }, + { + "epoch": 1.0133039771396448, + "grad_norm": 0.5539577031161593, + "learning_rate": 2.5676020370234326e-06, + "loss": 0.2666, + "step": 21631 + }, + { + "epoch": 1.0133508221295733, + "grad_norm": 0.5515640123818222, + "learning_rate": 2.5674124560290846e-06, + "loss": 0.2765, + "step": 21632 + }, + { + "epoch": 1.0133976671195015, + "grad_norm": 0.5641850351391231, + "learning_rate": 2.567222874646796e-06, + "loss": 0.2711, + "step": 21633 + }, + { + "epoch": 1.01344451210943, + "grad_norm": 0.6126910732876808, + "learning_rate": 2.5670332928776555e-06, + "loss": 0.2887, + "step": 21634 + }, + { + "epoch": 1.0134913570993582, + "grad_norm": 0.6104492027841214, + "learning_rate": 2.566843710722755e-06, + "loss": 0.2789, + "step": 21635 + }, + { + "epoch": 1.0135382020892865, + "grad_norm": 0.6467606495369552, + "learning_rate": 2.566654128183187e-06, + "loss": 0.2849, + "step": 21636 + }, + { + "epoch": 1.013585047079215, + "grad_norm": 0.5673088739732103, + "learning_rate": 2.5664645452600407e-06, + "loss": 0.2809, + "step": 21637 + }, + { + "epoch": 1.0136318920691432, + "grad_norm": 0.6666627552837661, + "learning_rate": 2.566274961954407e-06, + "loss": 0.2811, + "step": 21638 + }, + { + "epoch": 1.0136787370590716, + "grad_norm": 0.6354815607089214, + "learning_rate": 2.5660853782673782e-06, + "loss": 0.2962, + "step": 21639 + }, + { + "epoch": 1.0137255820489999, + "grad_norm": 0.6027255400381653, + "learning_rate": 2.5658957942000447e-06, + "loss": 0.2708, + "step": 21640 + }, + { + "epoch": 1.0137724270389281, + "grad_norm": 0.5947733971119908, + "learning_rate": 2.565706209753497e-06, + "loss": 0.2826, + "step": 21641 + }, + { + "epoch": 1.0138192720288566, + "grad_norm": 0.5472677335479232, + "learning_rate": 2.565516624928827e-06, + "loss": 0.2624, + "step": 21642 + }, + { + "epoch": 1.0138661170187848, + "grad_norm": 0.5854016267504236, + "learning_rate": 2.565327039727125e-06, + "loss": 0.285, + "step": 21643 + }, + { + "epoch": 1.013912962008713, + "grad_norm": 0.6027438431510045, + "learning_rate": 2.565137454149482e-06, + "loss": 0.2883, + "step": 21644 + }, + { + "epoch": 1.0139598069986415, + "grad_norm": 0.5941592577860794, + "learning_rate": 2.5649478681969904e-06, + "loss": 0.2918, + "step": 21645 + }, + { + "epoch": 1.0140066519885698, + "grad_norm": 0.5725393442539393, + "learning_rate": 2.564758281870739e-06, + "loss": 0.2467, + "step": 21646 + }, + { + "epoch": 1.0140534969784982, + "grad_norm": 0.6007389673990841, + "learning_rate": 2.5645686951718207e-06, + "loss": 0.2841, + "step": 21647 + }, + { + "epoch": 1.0141003419684265, + "grad_norm": 0.6287867504256456, + "learning_rate": 2.5643791081013255e-06, + "loss": 0.2921, + "step": 21648 + }, + { + "epoch": 1.0141471869583547, + "grad_norm": 0.5916749590891505, + "learning_rate": 2.5641895206603452e-06, + "loss": 0.279, + "step": 21649 + }, + { + "epoch": 1.0141940319482832, + "grad_norm": 0.6546362301816994, + "learning_rate": 2.5639999328499697e-06, + "loss": 0.2887, + "step": 21650 + }, + { + "epoch": 1.0142408769382114, + "grad_norm": 0.555770025499186, + "learning_rate": 2.5638103446712907e-06, + "loss": 0.2734, + "step": 21651 + }, + { + "epoch": 1.0142877219281399, + "grad_norm": 0.6051203073982923, + "learning_rate": 2.5636207561253996e-06, + "loss": 0.2918, + "step": 21652 + }, + { + "epoch": 1.0143345669180681, + "grad_norm": 0.5793440552421141, + "learning_rate": 2.5634311672133866e-06, + "loss": 0.2723, + "step": 21653 + }, + { + "epoch": 1.0143814119079964, + "grad_norm": 0.5284568385642646, + "learning_rate": 2.5632415779363435e-06, + "loss": 0.2707, + "step": 21654 + }, + { + "epoch": 1.0144282568979248, + "grad_norm": 0.5673368770420776, + "learning_rate": 2.563051988295361e-06, + "loss": 0.2918, + "step": 21655 + }, + { + "epoch": 1.014475101887853, + "grad_norm": 0.5440649098941585, + "learning_rate": 2.5628623982915295e-06, + "loss": 0.2826, + "step": 21656 + }, + { + "epoch": 1.0145219468777815, + "grad_norm": 0.6064262159477974, + "learning_rate": 2.5626728079259415e-06, + "loss": 0.2826, + "step": 21657 + }, + { + "epoch": 1.0145687918677098, + "grad_norm": 0.5807947698319869, + "learning_rate": 2.5624832171996876e-06, + "loss": 0.2814, + "step": 21658 + }, + { + "epoch": 1.014615636857638, + "grad_norm": 0.5659423741316337, + "learning_rate": 2.5622936261138577e-06, + "loss": 0.2736, + "step": 21659 + }, + { + "epoch": 1.0146624818475665, + "grad_norm": 0.6075819327019991, + "learning_rate": 2.5621040346695436e-06, + "loss": 0.2741, + "step": 21660 + }, + { + "epoch": 1.0147093268374947, + "grad_norm": 0.5883933179351566, + "learning_rate": 2.561914442867837e-06, + "loss": 0.2584, + "step": 21661 + }, + { + "epoch": 1.014756171827423, + "grad_norm": 0.5884907697396515, + "learning_rate": 2.5617248507098273e-06, + "loss": 0.287, + "step": 21662 + }, + { + "epoch": 1.0148030168173514, + "grad_norm": 0.6266725153089039, + "learning_rate": 2.5615352581966075e-06, + "loss": 0.287, + "step": 21663 + }, + { + "epoch": 1.0148498618072797, + "grad_norm": 0.5546308013433962, + "learning_rate": 2.561345665329268e-06, + "loss": 0.2676, + "step": 21664 + }, + { + "epoch": 1.0148967067972081, + "grad_norm": 0.6007831820264785, + "learning_rate": 2.5611560721088993e-06, + "loss": 0.2914, + "step": 21665 + }, + { + "epoch": 1.0149435517871364, + "grad_norm": 0.6168379479332301, + "learning_rate": 2.560966478536592e-06, + "loss": 0.2917, + "step": 21666 + }, + { + "epoch": 1.0149903967770646, + "grad_norm": 0.5397417543218644, + "learning_rate": 2.5607768846134383e-06, + "loss": 0.2627, + "step": 21667 + }, + { + "epoch": 1.015037241766993, + "grad_norm": 0.6081777373428919, + "learning_rate": 2.56058729034053e-06, + "loss": 0.2989, + "step": 21668 + }, + { + "epoch": 1.0150840867569213, + "grad_norm": 0.5866486680724188, + "learning_rate": 2.560397695718956e-06, + "loss": 0.2798, + "step": 21669 + }, + { + "epoch": 1.0151309317468498, + "grad_norm": 0.5452288731059104, + "learning_rate": 2.5602081007498086e-06, + "loss": 0.2711, + "step": 21670 + }, + { + "epoch": 1.015177776736778, + "grad_norm": 0.5464746420636273, + "learning_rate": 2.560018505434179e-06, + "loss": 0.2567, + "step": 21671 + }, + { + "epoch": 1.0152246217267062, + "grad_norm": 0.5962739596000153, + "learning_rate": 2.559828909773157e-06, + "loss": 0.2771, + "step": 21672 + }, + { + "epoch": 1.0152714667166347, + "grad_norm": 0.5328959059772101, + "learning_rate": 2.559639313767836e-06, + "loss": 0.2488, + "step": 21673 + }, + { + "epoch": 1.015318311706563, + "grad_norm": 0.6183938405367624, + "learning_rate": 2.559449717419305e-06, + "loss": 0.2939, + "step": 21674 + }, + { + "epoch": 1.0153651566964914, + "grad_norm": 0.6767518797848762, + "learning_rate": 2.5592601207286562e-06, + "loss": 0.2988, + "step": 21675 + }, + { + "epoch": 1.0154120016864197, + "grad_norm": 0.5884856900252861, + "learning_rate": 2.5590705236969803e-06, + "loss": 0.2908, + "step": 21676 + }, + { + "epoch": 1.015458846676348, + "grad_norm": 0.5884341356079934, + "learning_rate": 2.5588809263253676e-06, + "loss": 0.2675, + "step": 21677 + }, + { + "epoch": 1.0155056916662764, + "grad_norm": 0.597533541013571, + "learning_rate": 2.558691328614911e-06, + "loss": 0.2733, + "step": 21678 + }, + { + "epoch": 1.0155525366562046, + "grad_norm": 0.6181256383259526, + "learning_rate": 2.5585017305667005e-06, + "loss": 0.2881, + "step": 21679 + }, + { + "epoch": 1.0155993816461328, + "grad_norm": 0.5980408034602156, + "learning_rate": 2.558312132181826e-06, + "loss": 0.2697, + "step": 21680 + }, + { + "epoch": 1.0156462266360613, + "grad_norm": 0.6194162459104091, + "learning_rate": 2.5581225334613806e-06, + "loss": 0.2698, + "step": 21681 + }, + { + "epoch": 1.0156930716259895, + "grad_norm": 0.5686794169613806, + "learning_rate": 2.557932934406455e-06, + "loss": 0.2647, + "step": 21682 + }, + { + "epoch": 1.015739916615918, + "grad_norm": 0.5856280962565504, + "learning_rate": 2.5577433350181395e-06, + "loss": 0.2833, + "step": 21683 + }, + { + "epoch": 1.0157867616058462, + "grad_norm": 0.6423541791834575, + "learning_rate": 2.5575537352975257e-06, + "loss": 0.285, + "step": 21684 + }, + { + "epoch": 1.0158336065957745, + "grad_norm": 0.6238498604167833, + "learning_rate": 2.5573641352457047e-06, + "loss": 0.2826, + "step": 21685 + }, + { + "epoch": 1.015880451585703, + "grad_norm": 0.5736325925121216, + "learning_rate": 2.5571745348637678e-06, + "loss": 0.2616, + "step": 21686 + }, + { + "epoch": 1.0159272965756312, + "grad_norm": 0.7148742325470049, + "learning_rate": 2.556984934152805e-06, + "loss": 0.3068, + "step": 21687 + }, + { + "epoch": 1.0159741415655597, + "grad_norm": 0.5278870727246775, + "learning_rate": 2.556795333113909e-06, + "loss": 0.2593, + "step": 21688 + }, + { + "epoch": 1.016020986555488, + "grad_norm": 0.6644129174229022, + "learning_rate": 2.55660573174817e-06, + "loss": 0.2593, + "step": 21689 + }, + { + "epoch": 1.0160678315454161, + "grad_norm": 0.5956603840097737, + "learning_rate": 2.5564161300566793e-06, + "loss": 0.2703, + "step": 21690 + }, + { + "epoch": 1.0161146765353446, + "grad_norm": 0.5918381938467919, + "learning_rate": 2.5562265280405274e-06, + "loss": 0.2848, + "step": 21691 + }, + { + "epoch": 1.0161615215252728, + "grad_norm": 0.6638001099124291, + "learning_rate": 2.5560369257008064e-06, + "loss": 0.2728, + "step": 21692 + }, + { + "epoch": 1.0162083665152013, + "grad_norm": 0.5630091490722329, + "learning_rate": 2.5558473230386073e-06, + "loss": 0.2798, + "step": 21693 + }, + { + "epoch": 1.0162552115051295, + "grad_norm": 0.5655911991375363, + "learning_rate": 2.55565772005502e-06, + "loss": 0.2615, + "step": 21694 + }, + { + "epoch": 1.0163020564950578, + "grad_norm": 0.5434858635620511, + "learning_rate": 2.555468116751137e-06, + "loss": 0.2595, + "step": 21695 + }, + { + "epoch": 1.0163489014849862, + "grad_norm": 0.5821396728742987, + "learning_rate": 2.5552785131280496e-06, + "loss": 0.2755, + "step": 21696 + }, + { + "epoch": 1.0163957464749145, + "grad_norm": 0.6657533324824221, + "learning_rate": 2.5550889091868476e-06, + "loss": 0.2967, + "step": 21697 + }, + { + "epoch": 1.0164425914648427, + "grad_norm": 0.6259604599959072, + "learning_rate": 2.5548993049286224e-06, + "loss": 0.276, + "step": 21698 + }, + { + "epoch": 1.0164894364547712, + "grad_norm": 0.6382848379328047, + "learning_rate": 2.554709700354466e-06, + "loss": 0.2945, + "step": 21699 + }, + { + "epoch": 1.0165362814446994, + "grad_norm": 0.6055902751428706, + "learning_rate": 2.554520095465469e-06, + "loss": 0.2957, + "step": 21700 + }, + { + "epoch": 1.016583126434628, + "grad_norm": 0.6020027478501768, + "learning_rate": 2.5543304902627218e-06, + "loss": 0.2864, + "step": 21701 + }, + { + "epoch": 1.0166299714245561, + "grad_norm": 0.5417181804590481, + "learning_rate": 2.554140884747317e-06, + "loss": 0.2599, + "step": 21702 + }, + { + "epoch": 1.0166768164144844, + "grad_norm": 0.619763709041575, + "learning_rate": 2.553951278920346e-06, + "loss": 0.2937, + "step": 21703 + }, + { + "epoch": 1.0167236614044128, + "grad_norm": 0.5916871827415524, + "learning_rate": 2.553761672782897e-06, + "loss": 0.2717, + "step": 21704 + }, + { + "epoch": 1.016770506394341, + "grad_norm": 0.6161190348503607, + "learning_rate": 2.5535720663360634e-06, + "loss": 0.2879, + "step": 21705 + }, + { + "epoch": 1.0168173513842695, + "grad_norm": 0.5573466892196534, + "learning_rate": 2.553382459580937e-06, + "loss": 0.2797, + "step": 21706 + }, + { + "epoch": 1.0168641963741978, + "grad_norm": 0.585176457484397, + "learning_rate": 2.5531928525186077e-06, + "loss": 0.2785, + "step": 21707 + }, + { + "epoch": 1.016911041364126, + "grad_norm": 0.595093482262348, + "learning_rate": 2.553003245150167e-06, + "loss": 0.2652, + "step": 21708 + }, + { + "epoch": 1.0169578863540545, + "grad_norm": 0.6024800866262261, + "learning_rate": 2.5528136374767052e-06, + "loss": 0.2773, + "step": 21709 + }, + { + "epoch": 1.0170047313439827, + "grad_norm": 0.5864258179164551, + "learning_rate": 2.5526240294993155e-06, + "loss": 0.2914, + "step": 21710 + }, + { + "epoch": 1.0170515763339112, + "grad_norm": 0.5364679330148795, + "learning_rate": 2.5524344212190865e-06, + "loss": 0.257, + "step": 21711 + }, + { + "epoch": 1.0170984213238394, + "grad_norm": 0.5518366020329739, + "learning_rate": 2.5522448126371113e-06, + "loss": 0.2588, + "step": 21712 + }, + { + "epoch": 1.0171452663137677, + "grad_norm": 0.5649285179720618, + "learning_rate": 2.55205520375448e-06, + "loss": 0.2808, + "step": 21713 + }, + { + "epoch": 1.0171921113036961, + "grad_norm": 0.5787405601179647, + "learning_rate": 2.551865594572284e-06, + "loss": 0.2774, + "step": 21714 + }, + { + "epoch": 1.0172389562936244, + "grad_norm": 0.58722452888306, + "learning_rate": 2.551675985091614e-06, + "loss": 0.2775, + "step": 21715 + }, + { + "epoch": 1.0172858012835526, + "grad_norm": 0.6007431391509497, + "learning_rate": 2.5514863753135626e-06, + "loss": 0.273, + "step": 21716 + }, + { + "epoch": 1.017332646273481, + "grad_norm": 0.5745155291005036, + "learning_rate": 2.55129676523922e-06, + "loss": 0.277, + "step": 21717 + }, + { + "epoch": 1.0173794912634093, + "grad_norm": 0.6137531201554696, + "learning_rate": 2.551107154869677e-06, + "loss": 0.2685, + "step": 21718 + }, + { + "epoch": 1.0174263362533378, + "grad_norm": 0.6453189431883604, + "learning_rate": 2.5509175442060256e-06, + "loss": 0.3053, + "step": 21719 + }, + { + "epoch": 1.017473181243266, + "grad_norm": 0.5871753966717352, + "learning_rate": 2.5507279332493567e-06, + "loss": 0.2794, + "step": 21720 + }, + { + "epoch": 1.0175200262331943, + "grad_norm": 0.6087568666245128, + "learning_rate": 2.5505383220007605e-06, + "loss": 0.2732, + "step": 21721 + }, + { + "epoch": 1.0175668712231227, + "grad_norm": 0.5975402565954407, + "learning_rate": 2.5503487104613296e-06, + "loss": 0.2699, + "step": 21722 + }, + { + "epoch": 1.017613716213051, + "grad_norm": 0.5678877787382328, + "learning_rate": 2.550159098632154e-06, + "loss": 0.2823, + "step": 21723 + }, + { + "epoch": 1.0176605612029794, + "grad_norm": 0.5924807958514199, + "learning_rate": 2.549969486514326e-06, + "loss": 0.2631, + "step": 21724 + }, + { + "epoch": 1.0177074061929077, + "grad_norm": 0.604633065008511, + "learning_rate": 2.5497798741089362e-06, + "loss": 0.2777, + "step": 21725 + }, + { + "epoch": 1.017754251182836, + "grad_norm": 0.5739415572050609, + "learning_rate": 2.5495902614170746e-06, + "loss": 0.2807, + "step": 21726 + }, + { + "epoch": 1.0178010961727644, + "grad_norm": 0.5892986135783874, + "learning_rate": 2.549400648439835e-06, + "loss": 0.2881, + "step": 21727 + }, + { + "epoch": 1.0178479411626926, + "grad_norm": 0.5397735886060043, + "learning_rate": 2.5492110351783068e-06, + "loss": 0.2581, + "step": 21728 + }, + { + "epoch": 1.017894786152621, + "grad_norm": 0.5963183925600284, + "learning_rate": 2.549021421633581e-06, + "loss": 0.2749, + "step": 21729 + }, + { + "epoch": 1.0179416311425493, + "grad_norm": 0.5704841282407267, + "learning_rate": 2.548831807806749e-06, + "loss": 0.2598, + "step": 21730 + }, + { + "epoch": 1.0179884761324776, + "grad_norm": 0.5614631866794818, + "learning_rate": 2.5486421936989027e-06, + "loss": 0.2624, + "step": 21731 + }, + { + "epoch": 1.018035321122406, + "grad_norm": 0.5676683355204346, + "learning_rate": 2.548452579311132e-06, + "loss": 0.2771, + "step": 21732 + }, + { + "epoch": 1.0180821661123343, + "grad_norm": 0.5282653260895505, + "learning_rate": 2.5482629646445295e-06, + "loss": 0.2528, + "step": 21733 + }, + { + "epoch": 1.0181290111022625, + "grad_norm": 0.5864372247292594, + "learning_rate": 2.5480733497001862e-06, + "loss": 0.2533, + "step": 21734 + }, + { + "epoch": 1.018175856092191, + "grad_norm": 0.6259370403782539, + "learning_rate": 2.5478837344791922e-06, + "loss": 0.2909, + "step": 21735 + }, + { + "epoch": 1.0182227010821192, + "grad_norm": 0.6961931452667106, + "learning_rate": 2.5476941189826393e-06, + "loss": 0.291, + "step": 21736 + }, + { + "epoch": 1.0182695460720477, + "grad_norm": 0.604588783611533, + "learning_rate": 2.547504503211619e-06, + "loss": 0.2847, + "step": 21737 + }, + { + "epoch": 1.018316391061976, + "grad_norm": 0.5949376664893246, + "learning_rate": 2.5473148871672226e-06, + "loss": 0.2759, + "step": 21738 + }, + { + "epoch": 1.0183632360519042, + "grad_norm": 0.5544073215401368, + "learning_rate": 2.5471252708505406e-06, + "loss": 0.2676, + "step": 21739 + }, + { + "epoch": 1.0184100810418326, + "grad_norm": 0.6053390874410373, + "learning_rate": 2.5469356542626646e-06, + "loss": 0.2894, + "step": 21740 + }, + { + "epoch": 1.0184569260317609, + "grad_norm": 0.5731041957591542, + "learning_rate": 2.5467460374046854e-06, + "loss": 0.2903, + "step": 21741 + }, + { + "epoch": 1.0185037710216893, + "grad_norm": 0.6194402618668018, + "learning_rate": 2.5465564202776944e-06, + "loss": 0.2874, + "step": 21742 + }, + { + "epoch": 1.0185506160116176, + "grad_norm": 0.5677631477041064, + "learning_rate": 2.5463668028827836e-06, + "loss": 0.2792, + "step": 21743 + }, + { + "epoch": 1.0185974610015458, + "grad_norm": 0.6184702742636099, + "learning_rate": 2.5461771852210426e-06, + "loss": 0.2944, + "step": 21744 + }, + { + "epoch": 1.0186443059914743, + "grad_norm": 0.5625871051444465, + "learning_rate": 2.545987567293564e-06, + "loss": 0.2788, + "step": 21745 + }, + { + "epoch": 1.0186911509814025, + "grad_norm": 0.5559920674087648, + "learning_rate": 2.545797949101439e-06, + "loss": 0.2666, + "step": 21746 + }, + { + "epoch": 1.018737995971331, + "grad_norm": 0.5910781440978652, + "learning_rate": 2.5456083306457573e-06, + "loss": 0.2857, + "step": 21747 + }, + { + "epoch": 1.0187848409612592, + "grad_norm": 0.6109756114262153, + "learning_rate": 2.545418711927612e-06, + "loss": 0.2936, + "step": 21748 + }, + { + "epoch": 1.0188316859511874, + "grad_norm": 0.5821781613237236, + "learning_rate": 2.545229092948093e-06, + "loss": 0.2675, + "step": 21749 + }, + { + "epoch": 1.018878530941116, + "grad_norm": 0.5586484094729456, + "learning_rate": 2.5450394737082916e-06, + "loss": 0.2543, + "step": 21750 + }, + { + "epoch": 1.0189253759310442, + "grad_norm": 0.5939758522311899, + "learning_rate": 2.5448498542092997e-06, + "loss": 0.2653, + "step": 21751 + }, + { + "epoch": 1.0189722209209724, + "grad_norm": 0.6012346425682508, + "learning_rate": 2.5446602344522088e-06, + "loss": 0.28, + "step": 21752 + }, + { + "epoch": 1.0190190659109009, + "grad_norm": 0.5821264521743381, + "learning_rate": 2.5444706144381086e-06, + "loss": 0.2771, + "step": 21753 + }, + { + "epoch": 1.019065910900829, + "grad_norm": 0.5521137606293011, + "learning_rate": 2.5442809941680914e-06, + "loss": 0.2805, + "step": 21754 + }, + { + "epoch": 1.0191127558907576, + "grad_norm": 0.5513330951053863, + "learning_rate": 2.544091373643248e-06, + "loss": 0.2662, + "step": 21755 + }, + { + "epoch": 1.0191596008806858, + "grad_norm": 0.5460502082327675, + "learning_rate": 2.54390175286467e-06, + "loss": 0.2593, + "step": 21756 + }, + { + "epoch": 1.019206445870614, + "grad_norm": 0.6157648399991309, + "learning_rate": 2.543712131833449e-06, + "loss": 0.2878, + "step": 21757 + }, + { + "epoch": 1.0192532908605425, + "grad_norm": 0.5963919724668926, + "learning_rate": 2.543522510550675e-06, + "loss": 0.2804, + "step": 21758 + }, + { + "epoch": 1.0193001358504707, + "grad_norm": 0.644234096196462, + "learning_rate": 2.543332889017441e-06, + "loss": 0.275, + "step": 21759 + }, + { + "epoch": 1.0193469808403992, + "grad_norm": 0.5923547636541971, + "learning_rate": 2.5431432672348354e-06, + "loss": 0.2704, + "step": 21760 + }, + { + "epoch": 1.0193938258303274, + "grad_norm": 0.578044352684706, + "learning_rate": 2.5429536452039523e-06, + "loss": 0.2675, + "step": 21761 + }, + { + "epoch": 1.0194406708202557, + "grad_norm": 0.5822022281175502, + "learning_rate": 2.5427640229258817e-06, + "loss": 0.2785, + "step": 21762 + }, + { + "epoch": 1.0194875158101842, + "grad_norm": 0.5578882688858394, + "learning_rate": 2.5425744004017146e-06, + "loss": 0.2573, + "step": 21763 + }, + { + "epoch": 1.0195343608001124, + "grad_norm": 0.6315341672111039, + "learning_rate": 2.5423847776325423e-06, + "loss": 0.2769, + "step": 21764 + }, + { + "epoch": 1.0195812057900409, + "grad_norm": 0.5615271105381987, + "learning_rate": 2.5421951546194564e-06, + "loss": 0.2687, + "step": 21765 + }, + { + "epoch": 1.019628050779969, + "grad_norm": 0.5575729207105853, + "learning_rate": 2.5420055313635485e-06, + "loss": 0.2672, + "step": 21766 + }, + { + "epoch": 1.0196748957698973, + "grad_norm": 0.6144547891952015, + "learning_rate": 2.541815907865909e-06, + "loss": 0.2895, + "step": 21767 + }, + { + "epoch": 1.0197217407598258, + "grad_norm": 0.5550499904148118, + "learning_rate": 2.5416262841276292e-06, + "loss": 0.2703, + "step": 21768 + }, + { + "epoch": 1.019768585749754, + "grad_norm": 0.5707018545751888, + "learning_rate": 2.5414366601498007e-06, + "loss": 0.2787, + "step": 21769 + }, + { + "epoch": 1.0198154307396823, + "grad_norm": 0.5936925507424133, + "learning_rate": 2.541247035933515e-06, + "loss": 0.2924, + "step": 21770 + }, + { + "epoch": 1.0198622757296107, + "grad_norm": 0.5949704587955622, + "learning_rate": 2.541057411479862e-06, + "loss": 0.2731, + "step": 21771 + }, + { + "epoch": 1.019909120719539, + "grad_norm": 0.6100973996430641, + "learning_rate": 2.5408677867899344e-06, + "loss": 0.2789, + "step": 21772 + }, + { + "epoch": 1.0199559657094674, + "grad_norm": 0.5435489463092835, + "learning_rate": 2.540678161864824e-06, + "loss": 0.2659, + "step": 21773 + }, + { + "epoch": 1.0200028106993957, + "grad_norm": 0.5812227102678337, + "learning_rate": 2.54048853670562e-06, + "loss": 0.2751, + "step": 21774 + }, + { + "epoch": 1.020049655689324, + "grad_norm": 0.5439524040876517, + "learning_rate": 2.540298911313414e-06, + "loss": 0.2695, + "step": 21775 + }, + { + "epoch": 1.0200965006792524, + "grad_norm": 0.6525424969854786, + "learning_rate": 2.5401092856892995e-06, + "loss": 0.2735, + "step": 21776 + }, + { + "epoch": 1.0201433456691806, + "grad_norm": 0.5776602117151328, + "learning_rate": 2.539919659834365e-06, + "loss": 0.2725, + "step": 21777 + }, + { + "epoch": 1.020190190659109, + "grad_norm": 0.5845436575725871, + "learning_rate": 2.5397300337497034e-06, + "loss": 0.2807, + "step": 21778 + }, + { + "epoch": 1.0202370356490373, + "grad_norm": 0.6633305918914674, + "learning_rate": 2.5395404074364055e-06, + "loss": 0.3044, + "step": 21779 + }, + { + "epoch": 1.0202838806389656, + "grad_norm": 0.5864460013469297, + "learning_rate": 2.5393507808955624e-06, + "loss": 0.2762, + "step": 21780 + }, + { + "epoch": 1.020330725628894, + "grad_norm": 0.5417710756266659, + "learning_rate": 2.539161154128265e-06, + "loss": 0.2597, + "step": 21781 + }, + { + "epoch": 1.0203775706188223, + "grad_norm": 0.5557635220291937, + "learning_rate": 2.5389715271356052e-06, + "loss": 0.2645, + "step": 21782 + }, + { + "epoch": 1.0204244156087507, + "grad_norm": 0.6369869187990089, + "learning_rate": 2.538781899918675e-06, + "loss": 0.2821, + "step": 21783 + }, + { + "epoch": 1.020471260598679, + "grad_norm": 0.5563867096437919, + "learning_rate": 2.5385922724785635e-06, + "loss": 0.2627, + "step": 21784 + }, + { + "epoch": 1.0205181055886072, + "grad_norm": 0.5718584746439889, + "learning_rate": 2.5384026448163627e-06, + "loss": 0.2707, + "step": 21785 + }, + { + "epoch": 1.0205649505785357, + "grad_norm": 0.6207839087874962, + "learning_rate": 2.538213016933166e-06, + "loss": 0.289, + "step": 21786 + }, + { + "epoch": 1.020611795568464, + "grad_norm": 0.6259082882496929, + "learning_rate": 2.538023388830062e-06, + "loss": 0.2916, + "step": 21787 + }, + { + "epoch": 1.0206586405583922, + "grad_norm": 0.5670711206470964, + "learning_rate": 2.537833760508143e-06, + "loss": 0.2646, + "step": 21788 + }, + { + "epoch": 1.0207054855483206, + "grad_norm": 0.5838943063095567, + "learning_rate": 2.5376441319685e-06, + "loss": 0.2924, + "step": 21789 + }, + { + "epoch": 1.0207523305382489, + "grad_norm": 0.609786678230567, + "learning_rate": 2.5374545032122255e-06, + "loss": 0.2819, + "step": 21790 + }, + { + "epoch": 1.0207991755281773, + "grad_norm": 0.5486117586641006, + "learning_rate": 2.537264874240409e-06, + "loss": 0.2823, + "step": 21791 + }, + { + "epoch": 1.0208460205181056, + "grad_norm": 0.6139647297118027, + "learning_rate": 2.5370752450541425e-06, + "loss": 0.2887, + "step": 21792 + }, + { + "epoch": 1.0208928655080338, + "grad_norm": 0.6742666643075691, + "learning_rate": 2.5368856156545174e-06, + "loss": 0.2793, + "step": 21793 + }, + { + "epoch": 1.0209397104979623, + "grad_norm": 0.6270457591734119, + "learning_rate": 2.536695986042625e-06, + "loss": 0.2837, + "step": 21794 + }, + { + "epoch": 1.0209865554878905, + "grad_norm": 0.6009765605849421, + "learning_rate": 2.5365063562195564e-06, + "loss": 0.2915, + "step": 21795 + }, + { + "epoch": 1.021033400477819, + "grad_norm": 0.5745528908277645, + "learning_rate": 2.5363167261864024e-06, + "loss": 0.2723, + "step": 21796 + }, + { + "epoch": 1.0210802454677472, + "grad_norm": 0.5863175347685481, + "learning_rate": 2.5361270959442553e-06, + "loss": 0.2689, + "step": 21797 + }, + { + "epoch": 1.0211270904576755, + "grad_norm": 0.6507783537765685, + "learning_rate": 2.535937465494206e-06, + "loss": 0.3093, + "step": 21798 + }, + { + "epoch": 1.021173935447604, + "grad_norm": 0.6316206794077269, + "learning_rate": 2.535747834837345e-06, + "loss": 0.2922, + "step": 21799 + }, + { + "epoch": 1.0212207804375322, + "grad_norm": 0.6184743955269604, + "learning_rate": 2.5355582039747647e-06, + "loss": 0.281, + "step": 21800 + }, + { + "epoch": 1.0212676254274606, + "grad_norm": 0.6295204734999597, + "learning_rate": 2.5353685729075554e-06, + "loss": 0.2844, + "step": 21801 + }, + { + "epoch": 1.0213144704173889, + "grad_norm": 0.5807757204562413, + "learning_rate": 2.5351789416368087e-06, + "loss": 0.2426, + "step": 21802 + }, + { + "epoch": 1.0213613154073171, + "grad_norm": 0.574598142489127, + "learning_rate": 2.5349893101636165e-06, + "loss": 0.2661, + "step": 21803 + }, + { + "epoch": 1.0214081603972456, + "grad_norm": 0.5947264965582203, + "learning_rate": 2.5347996784890706e-06, + "loss": 0.2871, + "step": 21804 + }, + { + "epoch": 1.0214550053871738, + "grad_norm": 0.6934099409747836, + "learning_rate": 2.53461004661426e-06, + "loss": 0.2927, + "step": 21805 + }, + { + "epoch": 1.021501850377102, + "grad_norm": 0.6110074156897946, + "learning_rate": 2.534420414540277e-06, + "loss": 0.2786, + "step": 21806 + }, + { + "epoch": 1.0215486953670305, + "grad_norm": 0.5788287343631494, + "learning_rate": 2.5342307822682137e-06, + "loss": 0.2627, + "step": 21807 + }, + { + "epoch": 1.0215955403569588, + "grad_norm": 0.6216214576311627, + "learning_rate": 2.534041149799161e-06, + "loss": 0.2859, + "step": 21808 + }, + { + "epoch": 1.0216423853468872, + "grad_norm": 0.5774521198268766, + "learning_rate": 2.53385151713421e-06, + "loss": 0.2742, + "step": 21809 + }, + { + "epoch": 1.0216892303368155, + "grad_norm": 0.5942079284858665, + "learning_rate": 2.533661884274452e-06, + "loss": 0.2717, + "step": 21810 + }, + { + "epoch": 1.0217360753267437, + "grad_norm": 0.5818233293237468, + "learning_rate": 2.533472251220978e-06, + "loss": 0.2566, + "step": 21811 + }, + { + "epoch": 1.0217829203166722, + "grad_norm": 0.590986940652247, + "learning_rate": 2.5332826179748798e-06, + "loss": 0.2792, + "step": 21812 + }, + { + "epoch": 1.0218297653066004, + "grad_norm": 0.6060632431191245, + "learning_rate": 2.5330929845372488e-06, + "loss": 0.2832, + "step": 21813 + }, + { + "epoch": 1.0218766102965289, + "grad_norm": 0.634974675310292, + "learning_rate": 2.5329033509091755e-06, + "loss": 0.3035, + "step": 21814 + }, + { + "epoch": 1.0219234552864571, + "grad_norm": 0.547792130694545, + "learning_rate": 2.532713717091752e-06, + "loss": 0.2737, + "step": 21815 + }, + { + "epoch": 1.0219703002763854, + "grad_norm": 0.6082567759851151, + "learning_rate": 2.5325240830860693e-06, + "loss": 0.2725, + "step": 21816 + }, + { + "epoch": 1.0220171452663138, + "grad_norm": 0.6313259734759501, + "learning_rate": 2.532334448893218e-06, + "loss": 0.2857, + "step": 21817 + }, + { + "epoch": 1.022063990256242, + "grad_norm": 0.5886873079586882, + "learning_rate": 2.5321448145142913e-06, + "loss": 0.2702, + "step": 21818 + }, + { + "epoch": 1.0221108352461705, + "grad_norm": 0.605135138185015, + "learning_rate": 2.531955179950379e-06, + "loss": 0.2749, + "step": 21819 + }, + { + "epoch": 1.0221576802360988, + "grad_norm": 0.5811925291974839, + "learning_rate": 2.531765545202572e-06, + "loss": 0.2642, + "step": 21820 + }, + { + "epoch": 1.022204525226027, + "grad_norm": 0.6138467233098471, + "learning_rate": 2.5315759102719616e-06, + "loss": 0.2834, + "step": 21821 + }, + { + "epoch": 1.0222513702159555, + "grad_norm": 0.6413816295554747, + "learning_rate": 2.531386275159642e-06, + "loss": 0.2836, + "step": 21822 + }, + { + "epoch": 1.0222982152058837, + "grad_norm": 0.6198444798497482, + "learning_rate": 2.5311966398667004e-06, + "loss": 0.2851, + "step": 21823 + }, + { + "epoch": 1.022345060195812, + "grad_norm": 0.5633791748776208, + "learning_rate": 2.5310070043942303e-06, + "loss": 0.2781, + "step": 21824 + }, + { + "epoch": 1.0223919051857404, + "grad_norm": 0.6168910885955577, + "learning_rate": 2.5308173687433234e-06, + "loss": 0.2655, + "step": 21825 + }, + { + "epoch": 1.0224387501756687, + "grad_norm": 0.6057594852263659, + "learning_rate": 2.5306277329150698e-06, + "loss": 0.271, + "step": 21826 + }, + { + "epoch": 1.0224855951655971, + "grad_norm": 0.5630661018984642, + "learning_rate": 2.5304380969105613e-06, + "loss": 0.2599, + "step": 21827 + }, + { + "epoch": 1.0225324401555254, + "grad_norm": 0.5957230262776195, + "learning_rate": 2.530248460730889e-06, + "loss": 0.2809, + "step": 21828 + }, + { + "epoch": 1.0225792851454536, + "grad_norm": 0.5528769603622337, + "learning_rate": 2.530058824377145e-06, + "loss": 0.2647, + "step": 21829 + }, + { + "epoch": 1.022626130135382, + "grad_norm": 0.5389313155166933, + "learning_rate": 2.5298691878504196e-06, + "loss": 0.2668, + "step": 21830 + }, + { + "epoch": 1.0226729751253103, + "grad_norm": 0.5782153325272136, + "learning_rate": 2.5296795511518045e-06, + "loss": 0.2703, + "step": 21831 + }, + { + "epoch": 1.0227198201152388, + "grad_norm": 0.6175290612407575, + "learning_rate": 2.529489914282392e-06, + "loss": 0.2658, + "step": 21832 + }, + { + "epoch": 1.022766665105167, + "grad_norm": 0.5881545736611135, + "learning_rate": 2.5293002772432714e-06, + "loss": 0.2996, + "step": 21833 + }, + { + "epoch": 1.0228135100950952, + "grad_norm": 0.5822338430416993, + "learning_rate": 2.5291106400355353e-06, + "loss": 0.2773, + "step": 21834 + }, + { + "epoch": 1.0228603550850237, + "grad_norm": 0.6117628386401496, + "learning_rate": 2.5289210026602747e-06, + "loss": 0.2626, + "step": 21835 + }, + { + "epoch": 1.022907200074952, + "grad_norm": 0.5785477576498466, + "learning_rate": 2.5287313651185806e-06, + "loss": 0.2752, + "step": 21836 + }, + { + "epoch": 1.0229540450648804, + "grad_norm": 0.5794565064834026, + "learning_rate": 2.528541727411546e-06, + "loss": 0.2789, + "step": 21837 + }, + { + "epoch": 1.0230008900548087, + "grad_norm": 0.6960393142316135, + "learning_rate": 2.5283520895402596e-06, + "loss": 0.3012, + "step": 21838 + }, + { + "epoch": 1.023047735044737, + "grad_norm": 0.6003449700177513, + "learning_rate": 2.5281624515058147e-06, + "loss": 0.2786, + "step": 21839 + }, + { + "epoch": 1.0230945800346654, + "grad_norm": 0.5582446620707344, + "learning_rate": 2.527972813309302e-06, + "loss": 0.2688, + "step": 21840 + }, + { + "epoch": 1.0231414250245936, + "grad_norm": 0.5818411400042861, + "learning_rate": 2.527783174951812e-06, + "loss": 0.2772, + "step": 21841 + }, + { + "epoch": 1.0231882700145218, + "grad_norm": 0.5934634370937879, + "learning_rate": 2.5275935364344373e-06, + "loss": 0.2852, + "step": 21842 + }, + { + "epoch": 1.0232351150044503, + "grad_norm": 0.5844733882823573, + "learning_rate": 2.52740389775827e-06, + "loss": 0.2618, + "step": 21843 + }, + { + "epoch": 1.0232819599943785, + "grad_norm": 0.6171042457077083, + "learning_rate": 2.527214258924399e-06, + "loss": 0.2866, + "step": 21844 + }, + { + "epoch": 1.023328804984307, + "grad_norm": 0.550292322047167, + "learning_rate": 2.527024619933916e-06, + "loss": 0.2621, + "step": 21845 + }, + { + "epoch": 1.0233756499742352, + "grad_norm": 0.6437884938180509, + "learning_rate": 2.526834980787914e-06, + "loss": 0.2884, + "step": 21846 + }, + { + "epoch": 1.0234224949641635, + "grad_norm": 0.5647527546903003, + "learning_rate": 2.5266453414874837e-06, + "loss": 0.2738, + "step": 21847 + }, + { + "epoch": 1.023469339954092, + "grad_norm": 0.6231982204538865, + "learning_rate": 2.5264557020337156e-06, + "loss": 0.283, + "step": 21848 + }, + { + "epoch": 1.0235161849440202, + "grad_norm": 0.6141698219218208, + "learning_rate": 2.526266062427702e-06, + "loss": 0.2883, + "step": 21849 + }, + { + "epoch": 1.0235630299339487, + "grad_norm": 0.6540330472412236, + "learning_rate": 2.526076422670534e-06, + "loss": 0.2952, + "step": 21850 + }, + { + "epoch": 1.023609874923877, + "grad_norm": 0.5988311504072582, + "learning_rate": 2.5258867827633016e-06, + "loss": 0.2879, + "step": 21851 + }, + { + "epoch": 1.0236567199138051, + "grad_norm": 0.5804647300142597, + "learning_rate": 2.5256971427070974e-06, + "loss": 0.2675, + "step": 21852 + }, + { + "epoch": 1.0237035649037336, + "grad_norm": 0.5895588805823547, + "learning_rate": 2.525507502503014e-06, + "loss": 0.2672, + "step": 21853 + }, + { + "epoch": 1.0237504098936618, + "grad_norm": 0.569203403831279, + "learning_rate": 2.5253178621521407e-06, + "loss": 0.2762, + "step": 21854 + }, + { + "epoch": 1.0237972548835903, + "grad_norm": 0.6164904146342481, + "learning_rate": 2.5251282216555687e-06, + "loss": 0.2686, + "step": 21855 + }, + { + "epoch": 1.0238440998735185, + "grad_norm": 0.6037872262410735, + "learning_rate": 2.5249385810143913e-06, + "loss": 0.2899, + "step": 21856 + }, + { + "epoch": 1.0238909448634468, + "grad_norm": 0.6006181110269223, + "learning_rate": 2.524748940229698e-06, + "loss": 0.2775, + "step": 21857 + }, + { + "epoch": 1.0239377898533752, + "grad_norm": 0.5698704443000254, + "learning_rate": 2.5245592993025803e-06, + "loss": 0.2762, + "step": 21858 + }, + { + "epoch": 1.0239846348433035, + "grad_norm": 0.6269966724642403, + "learning_rate": 2.524369658234131e-06, + "loss": 0.28, + "step": 21859 + }, + { + "epoch": 1.0240314798332317, + "grad_norm": 0.5377577201454237, + "learning_rate": 2.52418001702544e-06, + "loss": 0.2519, + "step": 21860 + }, + { + "epoch": 1.0240783248231602, + "grad_norm": 0.5684679814577313, + "learning_rate": 2.5239903756775986e-06, + "loss": 0.2809, + "step": 21861 + }, + { + "epoch": 1.0241251698130884, + "grad_norm": 0.5417205137364886, + "learning_rate": 2.5238007341916994e-06, + "loss": 0.2597, + "step": 21862 + }, + { + "epoch": 1.024172014803017, + "grad_norm": 0.5485525807642936, + "learning_rate": 2.5236110925688325e-06, + "loss": 0.2727, + "step": 21863 + }, + { + "epoch": 1.0242188597929451, + "grad_norm": 0.6237296931075234, + "learning_rate": 2.52342145081009e-06, + "loss": 0.268, + "step": 21864 + }, + { + "epoch": 1.0242657047828734, + "grad_norm": 0.5833357671306936, + "learning_rate": 2.523231808916563e-06, + "loss": 0.2687, + "step": 21865 + }, + { + "epoch": 1.0243125497728018, + "grad_norm": 0.5379762198224926, + "learning_rate": 2.5230421668893423e-06, + "loss": 0.2708, + "step": 21866 + }, + { + "epoch": 1.02435939476273, + "grad_norm": 0.5483405451308925, + "learning_rate": 2.52285252472952e-06, + "loss": 0.2677, + "step": 21867 + }, + { + "epoch": 1.0244062397526585, + "grad_norm": 0.5766213968756954, + "learning_rate": 2.5226628824381873e-06, + "loss": 0.2634, + "step": 21868 + }, + { + "epoch": 1.0244530847425868, + "grad_norm": 0.5668672771061537, + "learning_rate": 2.522473240016435e-06, + "loss": 0.2622, + "step": 21869 + }, + { + "epoch": 1.024499929732515, + "grad_norm": 0.5973447456385652, + "learning_rate": 2.5222835974653553e-06, + "loss": 0.2792, + "step": 21870 + }, + { + "epoch": 1.0245467747224435, + "grad_norm": 0.5543052813170632, + "learning_rate": 2.5220939547860392e-06, + "loss": 0.2678, + "step": 21871 + }, + { + "epoch": 1.0245936197123717, + "grad_norm": 0.6036241407605962, + "learning_rate": 2.521904311979577e-06, + "loss": 0.2982, + "step": 21872 + }, + { + "epoch": 1.0246404647023002, + "grad_norm": 0.5898205156784402, + "learning_rate": 2.5217146690470612e-06, + "loss": 0.2718, + "step": 21873 + }, + { + "epoch": 1.0246873096922284, + "grad_norm": 0.6213093324632294, + "learning_rate": 2.521525025989584e-06, + "loss": 0.2802, + "step": 21874 + }, + { + "epoch": 1.0247341546821567, + "grad_norm": 0.5820359025615705, + "learning_rate": 2.5213353828082347e-06, + "loss": 0.2636, + "step": 21875 + }, + { + "epoch": 1.0247809996720851, + "grad_norm": 0.6182308207946278, + "learning_rate": 2.521145739504106e-06, + "loss": 0.2876, + "step": 21876 + }, + { + "epoch": 1.0248278446620134, + "grad_norm": 0.5534811858434935, + "learning_rate": 2.520956096078289e-06, + "loss": 0.2798, + "step": 21877 + }, + { + "epoch": 1.0248746896519416, + "grad_norm": 0.5889966848843138, + "learning_rate": 2.5207664525318748e-06, + "loss": 0.2788, + "step": 21878 + }, + { + "epoch": 1.02492153464187, + "grad_norm": 0.575358315162396, + "learning_rate": 2.520576808865955e-06, + "loss": 0.2601, + "step": 21879 + }, + { + "epoch": 1.0249683796317983, + "grad_norm": 0.5515264565945164, + "learning_rate": 2.520387165081621e-06, + "loss": 0.2703, + "step": 21880 + }, + { + "epoch": 1.0250152246217268, + "grad_norm": 0.5768094815070317, + "learning_rate": 2.5201975211799634e-06, + "loss": 0.2799, + "step": 21881 + }, + { + "epoch": 1.025062069611655, + "grad_norm": 0.6026517638387436, + "learning_rate": 2.520007877162074e-06, + "loss": 0.2833, + "step": 21882 + }, + { + "epoch": 1.0251089146015833, + "grad_norm": 0.6442490004202832, + "learning_rate": 2.519818233029045e-06, + "loss": 0.2994, + "step": 21883 + }, + { + "epoch": 1.0251557595915117, + "grad_norm": 0.6344868529605922, + "learning_rate": 2.5196285887819668e-06, + "loss": 0.2818, + "step": 21884 + }, + { + "epoch": 1.02520260458144, + "grad_norm": 0.6257450135969541, + "learning_rate": 2.5194389444219306e-06, + "loss": 0.2766, + "step": 21885 + }, + { + "epoch": 1.0252494495713684, + "grad_norm": 0.5882967275507891, + "learning_rate": 2.519249299950029e-06, + "loss": 0.2675, + "step": 21886 + }, + { + "epoch": 1.0252962945612967, + "grad_norm": 0.5619641673254425, + "learning_rate": 2.519059655367352e-06, + "loss": 0.2753, + "step": 21887 + }, + { + "epoch": 1.025343139551225, + "grad_norm": 0.6229945782232152, + "learning_rate": 2.518870010674992e-06, + "loss": 0.3005, + "step": 21888 + }, + { + "epoch": 1.0253899845411534, + "grad_norm": 0.6214437528186119, + "learning_rate": 2.5186803658740393e-06, + "loss": 0.2895, + "step": 21889 + }, + { + "epoch": 1.0254368295310816, + "grad_norm": 0.6068508332353566, + "learning_rate": 2.5184907209655857e-06, + "loss": 0.2879, + "step": 21890 + }, + { + "epoch": 1.02548367452101, + "grad_norm": 0.5807322353791817, + "learning_rate": 2.518301075950722e-06, + "loss": 0.2738, + "step": 21891 + }, + { + "epoch": 1.0255305195109383, + "grad_norm": 0.6168639936964375, + "learning_rate": 2.5181114308305423e-06, + "loss": 0.2884, + "step": 21892 + }, + { + "epoch": 1.0255773645008666, + "grad_norm": 0.6039722405014367, + "learning_rate": 2.517921785606134e-06, + "loss": 0.2731, + "step": 21893 + }, + { + "epoch": 1.025624209490795, + "grad_norm": 0.5806541391250667, + "learning_rate": 2.51773214027859e-06, + "loss": 0.282, + "step": 21894 + }, + { + "epoch": 1.0256710544807233, + "grad_norm": 0.6220787012057019, + "learning_rate": 2.5175424948490034e-06, + "loss": 0.2887, + "step": 21895 + }, + { + "epoch": 1.0257178994706515, + "grad_norm": 0.6150215449454521, + "learning_rate": 2.517352849318464e-06, + "loss": 0.2879, + "step": 21896 + }, + { + "epoch": 1.02576474446058, + "grad_norm": 0.551313719252999, + "learning_rate": 2.517163203688063e-06, + "loss": 0.2523, + "step": 21897 + }, + { + "epoch": 1.0258115894505082, + "grad_norm": 0.5780877287973174, + "learning_rate": 2.516973557958892e-06, + "loss": 0.2787, + "step": 21898 + }, + { + "epoch": 1.0258584344404367, + "grad_norm": 0.5550896839569683, + "learning_rate": 2.5167839121320426e-06, + "loss": 0.2459, + "step": 21899 + }, + { + "epoch": 1.025905279430365, + "grad_norm": 0.5711054677414404, + "learning_rate": 2.5165942662086055e-06, + "loss": 0.2628, + "step": 21900 + }, + { + "epoch": 1.0259521244202932, + "grad_norm": 0.6084059404784115, + "learning_rate": 2.5164046201896735e-06, + "loss": 0.2673, + "step": 21901 + }, + { + "epoch": 1.0259989694102216, + "grad_norm": 0.6657249658757153, + "learning_rate": 2.5162149740763364e-06, + "loss": 0.2948, + "step": 21902 + }, + { + "epoch": 1.0260458144001499, + "grad_norm": 0.5990648025547626, + "learning_rate": 2.516025327869686e-06, + "loss": 0.2752, + "step": 21903 + }, + { + "epoch": 1.0260926593900783, + "grad_norm": 0.6275247626864693, + "learning_rate": 2.5158356815708145e-06, + "loss": 0.2694, + "step": 21904 + }, + { + "epoch": 1.0261395043800066, + "grad_norm": 0.5285269763554906, + "learning_rate": 2.5156460351808122e-06, + "loss": 0.2513, + "step": 21905 + }, + { + "epoch": 1.0261863493699348, + "grad_norm": 0.564762821600289, + "learning_rate": 2.515456388700771e-06, + "loss": 0.2695, + "step": 21906 + }, + { + "epoch": 1.0262331943598633, + "grad_norm": 0.5867748408050623, + "learning_rate": 2.5152667421317824e-06, + "loss": 0.2738, + "step": 21907 + }, + { + "epoch": 1.0262800393497915, + "grad_norm": 0.5913520282338701, + "learning_rate": 2.515077095474937e-06, + "loss": 0.2887, + "step": 21908 + }, + { + "epoch": 1.02632688433972, + "grad_norm": 0.6031831472834229, + "learning_rate": 2.5148874487313274e-06, + "loss": 0.2834, + "step": 21909 + }, + { + "epoch": 1.0263737293296482, + "grad_norm": 0.5987860252871307, + "learning_rate": 2.514697801902044e-06, + "loss": 0.2595, + "step": 21910 + }, + { + "epoch": 1.0264205743195765, + "grad_norm": 0.5533867605633613, + "learning_rate": 2.514508154988178e-06, + "loss": 0.2673, + "step": 21911 + }, + { + "epoch": 1.026467419309505, + "grad_norm": 0.6076174005366172, + "learning_rate": 2.514318507990821e-06, + "loss": 0.2618, + "step": 21912 + }, + { + "epoch": 1.0265142642994332, + "grad_norm": 0.6354700482972357, + "learning_rate": 2.514128860911066e-06, + "loss": 0.2922, + "step": 21913 + }, + { + "epoch": 1.0265611092893614, + "grad_norm": 0.6252468625394724, + "learning_rate": 2.513939213750002e-06, + "loss": 0.2846, + "step": 21914 + }, + { + "epoch": 1.0266079542792899, + "grad_norm": 0.5531289261501537, + "learning_rate": 2.513749566508721e-06, + "loss": 0.266, + "step": 21915 + }, + { + "epoch": 1.026654799269218, + "grad_norm": 0.5625502966790624, + "learning_rate": 2.513559919188315e-06, + "loss": 0.2855, + "step": 21916 + }, + { + "epoch": 1.0267016442591466, + "grad_norm": 0.5805928975546224, + "learning_rate": 2.513370271789876e-06, + "loss": 0.2717, + "step": 21917 + }, + { + "epoch": 1.0267484892490748, + "grad_norm": 0.6277240024205055, + "learning_rate": 2.513180624314493e-06, + "loss": 0.2719, + "step": 21918 + }, + { + "epoch": 1.026795334239003, + "grad_norm": 0.5249959410494311, + "learning_rate": 2.5129909767632595e-06, + "loss": 0.2562, + "step": 21919 + }, + { + "epoch": 1.0268421792289315, + "grad_norm": 0.5776996986614021, + "learning_rate": 2.512801329137266e-06, + "loss": 0.2835, + "step": 21920 + }, + { + "epoch": 1.0268890242188597, + "grad_norm": 0.6113875425097735, + "learning_rate": 2.512611681437604e-06, + "loss": 0.285, + "step": 21921 + }, + { + "epoch": 1.0269358692087882, + "grad_norm": 0.5676006702540136, + "learning_rate": 2.5124220336653648e-06, + "loss": 0.2738, + "step": 21922 + }, + { + "epoch": 1.0269827141987165, + "grad_norm": 0.5990366330614395, + "learning_rate": 2.5122323858216413e-06, + "loss": 0.2775, + "step": 21923 + }, + { + "epoch": 1.0270295591886447, + "grad_norm": 0.5878164742654908, + "learning_rate": 2.512042737907522e-06, + "loss": 0.2873, + "step": 21924 + }, + { + "epoch": 1.0270764041785732, + "grad_norm": 0.5732241574801912, + "learning_rate": 2.5118530899241e-06, + "loss": 0.2679, + "step": 21925 + }, + { + "epoch": 1.0271232491685014, + "grad_norm": 0.6049867135630841, + "learning_rate": 2.5116634418724667e-06, + "loss": 0.2867, + "step": 21926 + }, + { + "epoch": 1.0271700941584299, + "grad_norm": 0.5606369457605124, + "learning_rate": 2.5114737937537138e-06, + "loss": 0.2684, + "step": 21927 + }, + { + "epoch": 1.027216939148358, + "grad_norm": 0.5979764231986039, + "learning_rate": 2.511284145568931e-06, + "loss": 0.2647, + "step": 21928 + }, + { + "epoch": 1.0272637841382863, + "grad_norm": 0.6019695818679869, + "learning_rate": 2.5110944973192114e-06, + "loss": 0.2801, + "step": 21929 + }, + { + "epoch": 1.0273106291282148, + "grad_norm": 0.5725228414338056, + "learning_rate": 2.510904849005646e-06, + "loss": 0.2779, + "step": 21930 + }, + { + "epoch": 1.027357474118143, + "grad_norm": 0.5803296820521676, + "learning_rate": 2.510715200629325e-06, + "loss": 0.2988, + "step": 21931 + }, + { + "epoch": 1.0274043191080713, + "grad_norm": 0.5661942182801173, + "learning_rate": 2.5105255521913412e-06, + "loss": 0.2721, + "step": 21932 + }, + { + "epoch": 1.0274511640979997, + "grad_norm": 0.5925632517877379, + "learning_rate": 2.5103359036927855e-06, + "loss": 0.2929, + "step": 21933 + }, + { + "epoch": 1.027498009087928, + "grad_norm": 0.5481424451386389, + "learning_rate": 2.5101462551347495e-06, + "loss": 0.2612, + "step": 21934 + }, + { + "epoch": 1.0275448540778565, + "grad_norm": 0.5548408842104408, + "learning_rate": 2.5099566065183245e-06, + "loss": 0.2795, + "step": 21935 + }, + { + "epoch": 1.0275916990677847, + "grad_norm": 0.595642254588714, + "learning_rate": 2.509766957844601e-06, + "loss": 0.258, + "step": 21936 + }, + { + "epoch": 1.027638544057713, + "grad_norm": 0.5823729258746617, + "learning_rate": 2.509577309114672e-06, + "loss": 0.2762, + "step": 21937 + }, + { + "epoch": 1.0276853890476414, + "grad_norm": 0.6212045962134544, + "learning_rate": 2.5093876603296273e-06, + "loss": 0.2773, + "step": 21938 + }, + { + "epoch": 1.0277322340375696, + "grad_norm": 0.6134277335187966, + "learning_rate": 2.509198011490559e-06, + "loss": 0.2784, + "step": 21939 + }, + { + "epoch": 1.027779079027498, + "grad_norm": 0.5788337637461887, + "learning_rate": 2.509008362598559e-06, + "loss": 0.2686, + "step": 21940 + }, + { + "epoch": 1.0278259240174263, + "grad_norm": 0.5642571097715017, + "learning_rate": 2.5088187136547184e-06, + "loss": 0.2712, + "step": 21941 + }, + { + "epoch": 1.0278727690073546, + "grad_norm": 0.6527282383397126, + "learning_rate": 2.508629064660127e-06, + "loss": 0.293, + "step": 21942 + }, + { + "epoch": 1.027919613997283, + "grad_norm": 0.613413048558124, + "learning_rate": 2.508439415615878e-06, + "loss": 0.2799, + "step": 21943 + }, + { + "epoch": 1.0279664589872113, + "grad_norm": 0.5998651944471993, + "learning_rate": 2.508249766523063e-06, + "loss": 0.2804, + "step": 21944 + }, + { + "epoch": 1.0280133039771397, + "grad_norm": 0.6053296619599577, + "learning_rate": 2.5080601173827723e-06, + "loss": 0.2653, + "step": 21945 + }, + { + "epoch": 1.028060148967068, + "grad_norm": 0.59757427320398, + "learning_rate": 2.5078704681960968e-06, + "loss": 0.2724, + "step": 21946 + }, + { + "epoch": 1.0281069939569962, + "grad_norm": 0.5892398491569665, + "learning_rate": 2.50768081896413e-06, + "loss": 0.2743, + "step": 21947 + }, + { + "epoch": 1.0281538389469247, + "grad_norm": 0.5712015918132975, + "learning_rate": 2.5074911696879616e-06, + "loss": 0.2632, + "step": 21948 + }, + { + "epoch": 1.028200683936853, + "grad_norm": 0.5835144323434767, + "learning_rate": 2.507301520368683e-06, + "loss": 0.2694, + "step": 21949 + }, + { + "epoch": 1.0282475289267812, + "grad_norm": 0.5381180500661656, + "learning_rate": 2.5071118710073866e-06, + "loss": 0.2554, + "step": 21950 + }, + { + "epoch": 1.0282943739167096, + "grad_norm": 0.5921494890836625, + "learning_rate": 2.5069222216051632e-06, + "loss": 0.2788, + "step": 21951 + }, + { + "epoch": 1.0283412189066379, + "grad_norm": 0.5690087322630797, + "learning_rate": 2.506732572163103e-06, + "loss": 0.273, + "step": 21952 + }, + { + "epoch": 1.0283880638965663, + "grad_norm": 0.5923109577741134, + "learning_rate": 2.5065429226822996e-06, + "loss": 0.2738, + "step": 21953 + }, + { + "epoch": 1.0284349088864946, + "grad_norm": 0.5900296142075347, + "learning_rate": 2.506353273163843e-06, + "loss": 0.2785, + "step": 21954 + }, + { + "epoch": 1.0284817538764228, + "grad_norm": 0.6195647692441957, + "learning_rate": 2.506163623608825e-06, + "loss": 0.3059, + "step": 21955 + }, + { + "epoch": 1.0285285988663513, + "grad_norm": 0.6632244639167529, + "learning_rate": 2.505973974018337e-06, + "loss": 0.2829, + "step": 21956 + }, + { + "epoch": 1.0285754438562795, + "grad_norm": 0.5604292827853646, + "learning_rate": 2.5057843243934698e-06, + "loss": 0.2715, + "step": 21957 + }, + { + "epoch": 1.028622288846208, + "grad_norm": 0.6015489942958175, + "learning_rate": 2.5055946747353162e-06, + "loss": 0.2887, + "step": 21958 + }, + { + "epoch": 1.0286691338361362, + "grad_norm": 0.552322391152086, + "learning_rate": 2.5054050250449657e-06, + "loss": 0.2716, + "step": 21959 + }, + { + "epoch": 1.0287159788260645, + "grad_norm": 0.5777563381106785, + "learning_rate": 2.505215375323511e-06, + "loss": 0.2785, + "step": 21960 + }, + { + "epoch": 1.028762823815993, + "grad_norm": 0.5383622480586276, + "learning_rate": 2.5050257255720426e-06, + "loss": 0.2688, + "step": 21961 + }, + { + "epoch": 1.0288096688059212, + "grad_norm": 0.5954201648234007, + "learning_rate": 2.504836075791654e-06, + "loss": 0.2841, + "step": 21962 + }, + { + "epoch": 1.0288565137958496, + "grad_norm": 0.6547788548217838, + "learning_rate": 2.5046464259834335e-06, + "loss": 0.3004, + "step": 21963 + }, + { + "epoch": 1.0289033587857779, + "grad_norm": 0.5656876195943332, + "learning_rate": 2.504456776148474e-06, + "loss": 0.2682, + "step": 21964 + }, + { + "epoch": 1.0289502037757061, + "grad_norm": 0.5952239737736631, + "learning_rate": 2.504267126287868e-06, + "loss": 0.2812, + "step": 21965 + }, + { + "epoch": 1.0289970487656346, + "grad_norm": 0.5756926052833363, + "learning_rate": 2.5040774764027048e-06, + "loss": 0.2523, + "step": 21966 + }, + { + "epoch": 1.0290438937555628, + "grad_norm": 0.5882631173198773, + "learning_rate": 2.503887826494077e-06, + "loss": 0.2869, + "step": 21967 + }, + { + "epoch": 1.029090738745491, + "grad_norm": 0.5478943208357075, + "learning_rate": 2.5036981765630757e-06, + "loss": 0.2656, + "step": 21968 + }, + { + "epoch": 1.0291375837354195, + "grad_norm": 0.5874056453677782, + "learning_rate": 2.503508526610793e-06, + "loss": 0.2734, + "step": 21969 + }, + { + "epoch": 1.0291844287253478, + "grad_norm": 0.5736532163915578, + "learning_rate": 2.503318876638318e-06, + "loss": 0.27, + "step": 21970 + }, + { + "epoch": 1.0292312737152762, + "grad_norm": 0.6328319440989936, + "learning_rate": 2.503129226646745e-06, + "loss": 0.2802, + "step": 21971 + }, + { + "epoch": 1.0292781187052045, + "grad_norm": 0.5906915493256357, + "learning_rate": 2.502939576637164e-06, + "loss": 0.2742, + "step": 21972 + }, + { + "epoch": 1.0293249636951327, + "grad_norm": 0.577374850581382, + "learning_rate": 2.5027499266106657e-06, + "loss": 0.2762, + "step": 21973 + }, + { + "epoch": 1.0293718086850612, + "grad_norm": 0.6301752408128323, + "learning_rate": 2.5025602765683433e-06, + "loss": 0.2885, + "step": 21974 + }, + { + "epoch": 1.0294186536749894, + "grad_norm": 0.6112357802915684, + "learning_rate": 2.5023706265112864e-06, + "loss": 0.2667, + "step": 21975 + }, + { + "epoch": 1.0294654986649179, + "grad_norm": 0.5490128952331609, + "learning_rate": 2.5021809764405875e-06, + "loss": 0.2637, + "step": 21976 + }, + { + "epoch": 1.0295123436548461, + "grad_norm": 0.6574345735088767, + "learning_rate": 2.5019913263573374e-06, + "loss": 0.2797, + "step": 21977 + }, + { + "epoch": 1.0295591886447744, + "grad_norm": 0.5980383035349056, + "learning_rate": 2.501801676262628e-06, + "loss": 0.2642, + "step": 21978 + }, + { + "epoch": 1.0296060336347028, + "grad_norm": 0.6206564557241113, + "learning_rate": 2.5016120261575504e-06, + "loss": 0.2815, + "step": 21979 + }, + { + "epoch": 1.029652878624631, + "grad_norm": 0.6374835626229638, + "learning_rate": 2.501422376043196e-06, + "loss": 0.2901, + "step": 21980 + }, + { + "epoch": 1.0296997236145595, + "grad_norm": 0.5783002310545445, + "learning_rate": 2.501232725920656e-06, + "loss": 0.2668, + "step": 21981 + }, + { + "epoch": 1.0297465686044878, + "grad_norm": 0.5728927759131056, + "learning_rate": 2.5010430757910216e-06, + "loss": 0.2733, + "step": 21982 + }, + { + "epoch": 1.029793413594416, + "grad_norm": 0.5863332753457101, + "learning_rate": 2.500853425655386e-06, + "loss": 0.2689, + "step": 21983 + }, + { + "epoch": 1.0298402585843445, + "grad_norm": 0.5770372182656245, + "learning_rate": 2.5006637755148372e-06, + "loss": 0.2727, + "step": 21984 + }, + { + "epoch": 1.0298871035742727, + "grad_norm": 0.5674844098146203, + "learning_rate": 2.500474125370469e-06, + "loss": 0.2673, + "step": 21985 + }, + { + "epoch": 1.029933948564201, + "grad_norm": 0.6131332455979938, + "learning_rate": 2.500284475223373e-06, + "loss": 0.2948, + "step": 21986 + }, + { + "epoch": 1.0299807935541294, + "grad_norm": 0.5882513702003513, + "learning_rate": 2.50009482507464e-06, + "loss": 0.2874, + "step": 21987 + }, + { + "epoch": 1.0300276385440577, + "grad_norm": 0.564038886045953, + "learning_rate": 2.499905174925361e-06, + "loss": 0.2777, + "step": 21988 + }, + { + "epoch": 1.0300744835339861, + "grad_norm": 0.6132776959367024, + "learning_rate": 2.4997155247766275e-06, + "loss": 0.2884, + "step": 21989 + }, + { + "epoch": 1.0301213285239144, + "grad_norm": 0.5853528198157572, + "learning_rate": 2.499525874629532e-06, + "loss": 0.2724, + "step": 21990 + }, + { + "epoch": 1.0301681735138426, + "grad_norm": 0.6046533848317046, + "learning_rate": 2.4993362244851636e-06, + "loss": 0.2807, + "step": 21991 + }, + { + "epoch": 1.030215018503771, + "grad_norm": 0.6156234885079782, + "learning_rate": 2.4991465743446155e-06, + "loss": 0.2873, + "step": 21992 + }, + { + "epoch": 1.0302618634936993, + "grad_norm": 0.5651007289127986, + "learning_rate": 2.498956924208979e-06, + "loss": 0.2784, + "step": 21993 + }, + { + "epoch": 1.0303087084836278, + "grad_norm": 0.5817026831214158, + "learning_rate": 2.4987672740793456e-06, + "loss": 0.2855, + "step": 21994 + }, + { + "epoch": 1.030355553473556, + "grad_norm": 0.5931274753736484, + "learning_rate": 2.4985776239568054e-06, + "loss": 0.271, + "step": 21995 + }, + { + "epoch": 1.0304023984634842, + "grad_norm": 0.5848802049993437, + "learning_rate": 2.4983879738424505e-06, + "loss": 0.2859, + "step": 21996 + }, + { + "epoch": 1.0304492434534127, + "grad_norm": 0.5964584829514104, + "learning_rate": 2.4981983237373727e-06, + "loss": 0.28, + "step": 21997 + }, + { + "epoch": 1.030496088443341, + "grad_norm": 0.6142923528800784, + "learning_rate": 2.498008673642663e-06, + "loss": 0.2878, + "step": 21998 + }, + { + "epoch": 1.0305429334332694, + "grad_norm": 0.5880472721517955, + "learning_rate": 2.497819023559413e-06, + "loss": 0.2606, + "step": 21999 + }, + { + "epoch": 1.0305897784231977, + "grad_norm": 0.5885111945588576, + "learning_rate": 2.4976293734887145e-06, + "loss": 0.2914, + "step": 22000 + }, + { + "epoch": 1.030636623413126, + "grad_norm": 0.6374627687917714, + "learning_rate": 2.497439723431658e-06, + "loss": 0.2948, + "step": 22001 + }, + { + "epoch": 1.0306834684030544, + "grad_norm": 0.5771160813090983, + "learning_rate": 2.4972500733893347e-06, + "loss": 0.2777, + "step": 22002 + }, + { + "epoch": 1.0307303133929826, + "grad_norm": 0.5783531850650494, + "learning_rate": 2.4970604233628366e-06, + "loss": 0.2805, + "step": 22003 + }, + { + "epoch": 1.0307771583829108, + "grad_norm": 0.5638551771984593, + "learning_rate": 2.496870773353256e-06, + "loss": 0.2681, + "step": 22004 + }, + { + "epoch": 1.0308240033728393, + "grad_norm": 0.5898203414631354, + "learning_rate": 2.4966811233616818e-06, + "loss": 0.2772, + "step": 22005 + }, + { + "epoch": 1.0308708483627675, + "grad_norm": 0.5875200956400413, + "learning_rate": 2.4964914733892087e-06, + "loss": 0.2794, + "step": 22006 + }, + { + "epoch": 1.030917693352696, + "grad_norm": 0.5643131821676672, + "learning_rate": 2.496301823436925e-06, + "loss": 0.2649, + "step": 22007 + }, + { + "epoch": 1.0309645383426242, + "grad_norm": 0.5715051630168656, + "learning_rate": 2.496112173505924e-06, + "loss": 0.2745, + "step": 22008 + }, + { + "epoch": 1.0310113833325525, + "grad_norm": 0.6142880046747718, + "learning_rate": 2.495922523597296e-06, + "loss": 0.2781, + "step": 22009 + }, + { + "epoch": 1.031058228322481, + "grad_norm": 0.6392198729126151, + "learning_rate": 2.495732873712133e-06, + "loss": 0.2697, + "step": 22010 + }, + { + "epoch": 1.0311050733124092, + "grad_norm": 0.6401771918461401, + "learning_rate": 2.4955432238515263e-06, + "loss": 0.2805, + "step": 22011 + }, + { + "epoch": 1.0311519183023377, + "grad_norm": 0.5957234567466955, + "learning_rate": 2.4953535740165678e-06, + "loss": 0.2755, + "step": 22012 + }, + { + "epoch": 1.031198763292266, + "grad_norm": 0.6002483515664147, + "learning_rate": 2.4951639242083475e-06, + "loss": 0.2814, + "step": 22013 + }, + { + "epoch": 1.0312456082821941, + "grad_norm": 0.5810476162627463, + "learning_rate": 2.4949742744279578e-06, + "loss": 0.2891, + "step": 22014 + }, + { + "epoch": 1.0312924532721226, + "grad_norm": 0.5763513247694138, + "learning_rate": 2.49478462467649e-06, + "loss": 0.2668, + "step": 22015 + }, + { + "epoch": 1.0313392982620508, + "grad_norm": 0.5766300514785752, + "learning_rate": 2.4945949749550347e-06, + "loss": 0.273, + "step": 22016 + }, + { + "epoch": 1.0313861432519793, + "grad_norm": 0.5681413297688663, + "learning_rate": 2.494405325264685e-06, + "loss": 0.2797, + "step": 22017 + }, + { + "epoch": 1.0314329882419075, + "grad_norm": 0.5725855838453113, + "learning_rate": 2.4942156756065315e-06, + "loss": 0.2641, + "step": 22018 + }, + { + "epoch": 1.0314798332318358, + "grad_norm": 0.579991792767761, + "learning_rate": 2.494026025981664e-06, + "loss": 0.2811, + "step": 22019 + }, + { + "epoch": 1.0315266782217642, + "grad_norm": 0.5623589982731084, + "learning_rate": 2.493836376391176e-06, + "loss": 0.2618, + "step": 22020 + }, + { + "epoch": 1.0315735232116925, + "grad_norm": 0.5945033468830521, + "learning_rate": 2.493646726836158e-06, + "loss": 0.2702, + "step": 22021 + }, + { + "epoch": 1.0316203682016207, + "grad_norm": 0.5540908283489019, + "learning_rate": 2.4934570773177013e-06, + "loss": 0.2664, + "step": 22022 + }, + { + "epoch": 1.0316672131915492, + "grad_norm": 0.5434105839961588, + "learning_rate": 2.493267427836897e-06, + "loss": 0.2705, + "step": 22023 + }, + { + "epoch": 1.0317140581814774, + "grad_norm": 0.6060371520432555, + "learning_rate": 2.493077778394838e-06, + "loss": 0.2912, + "step": 22024 + }, + { + "epoch": 1.031760903171406, + "grad_norm": 0.6624704218884075, + "learning_rate": 2.492888128992615e-06, + "loss": 0.2879, + "step": 22025 + }, + { + "epoch": 1.0318077481613341, + "grad_norm": 0.597663714044621, + "learning_rate": 2.4926984796313178e-06, + "loss": 0.2713, + "step": 22026 + }, + { + "epoch": 1.0318545931512624, + "grad_norm": 0.5866377536032479, + "learning_rate": 2.492508830312039e-06, + "loss": 0.2878, + "step": 22027 + }, + { + "epoch": 1.0319014381411908, + "grad_norm": 0.6231889994807841, + "learning_rate": 2.492319181035871e-06, + "loss": 0.2875, + "step": 22028 + }, + { + "epoch": 1.031948283131119, + "grad_norm": 0.5503185807943016, + "learning_rate": 2.492129531803903e-06, + "loss": 0.2749, + "step": 22029 + }, + { + "epoch": 1.0319951281210475, + "grad_norm": 0.5714484295938191, + "learning_rate": 2.491939882617228e-06, + "loss": 0.2653, + "step": 22030 + }, + { + "epoch": 1.0320419731109758, + "grad_norm": 0.5901101352545437, + "learning_rate": 2.4917502334769382e-06, + "loss": 0.2818, + "step": 22031 + }, + { + "epoch": 1.032088818100904, + "grad_norm": 0.54922950875188, + "learning_rate": 2.4915605843841224e-06, + "loss": 0.2715, + "step": 22032 + }, + { + "epoch": 1.0321356630908325, + "grad_norm": 0.5825794225454407, + "learning_rate": 2.4913709353398734e-06, + "loss": 0.2792, + "step": 22033 + }, + { + "epoch": 1.0321825080807607, + "grad_norm": 0.5640272755941069, + "learning_rate": 2.4911812863452824e-06, + "loss": 0.2568, + "step": 22034 + }, + { + "epoch": 1.0322293530706892, + "grad_norm": 0.6377956137966553, + "learning_rate": 2.4909916374014415e-06, + "loss": 0.2767, + "step": 22035 + }, + { + "epoch": 1.0322761980606174, + "grad_norm": 0.6362888735789833, + "learning_rate": 2.4908019885094407e-06, + "loss": 0.2932, + "step": 22036 + }, + { + "epoch": 1.0323230430505457, + "grad_norm": 0.5836627199372793, + "learning_rate": 2.4906123396703735e-06, + "loss": 0.2796, + "step": 22037 + }, + { + "epoch": 1.0323698880404741, + "grad_norm": 0.5680074663762499, + "learning_rate": 2.490422690885329e-06, + "loss": 0.2747, + "step": 22038 + }, + { + "epoch": 1.0324167330304024, + "grad_norm": 0.5863275248393989, + "learning_rate": 2.4902330421553997e-06, + "loss": 0.2717, + "step": 22039 + }, + { + "epoch": 1.0324635780203306, + "grad_norm": 0.5601280300046103, + "learning_rate": 2.4900433934816764e-06, + "loss": 0.2611, + "step": 22040 + }, + { + "epoch": 1.032510423010259, + "grad_norm": 0.6106323463008838, + "learning_rate": 2.4898537448652505e-06, + "loss": 0.2892, + "step": 22041 + }, + { + "epoch": 1.0325572680001873, + "grad_norm": 0.6148119349127981, + "learning_rate": 2.489664096307215e-06, + "loss": 0.28, + "step": 22042 + }, + { + "epoch": 1.0326041129901158, + "grad_norm": 0.6454788185263954, + "learning_rate": 2.4894744478086596e-06, + "loss": 0.2828, + "step": 22043 + }, + { + "epoch": 1.032650957980044, + "grad_norm": 0.637138369074407, + "learning_rate": 2.4892847993706757e-06, + "loss": 0.3074, + "step": 22044 + }, + { + "epoch": 1.0326978029699723, + "grad_norm": 0.6181352099015428, + "learning_rate": 2.489095150994355e-06, + "loss": 0.289, + "step": 22045 + }, + { + "epoch": 1.0327446479599007, + "grad_norm": 0.5830473290604988, + "learning_rate": 2.4889055026807894e-06, + "loss": 0.2814, + "step": 22046 + }, + { + "epoch": 1.032791492949829, + "grad_norm": 0.6152996694847417, + "learning_rate": 2.4887158544310696e-06, + "loss": 0.2856, + "step": 22047 + }, + { + "epoch": 1.0328383379397574, + "grad_norm": 0.6337526766303945, + "learning_rate": 2.4885262062462866e-06, + "loss": 0.2828, + "step": 22048 + }, + { + "epoch": 1.0328851829296857, + "grad_norm": 0.6105248337900873, + "learning_rate": 2.488336558127534e-06, + "loss": 0.2921, + "step": 22049 + }, + { + "epoch": 1.032932027919614, + "grad_norm": 0.6252209947960073, + "learning_rate": 2.488146910075901e-06, + "loss": 0.2781, + "step": 22050 + }, + { + "epoch": 1.0329788729095424, + "grad_norm": 0.5642416034710691, + "learning_rate": 2.487957262092479e-06, + "loss": 0.2682, + "step": 22051 + }, + { + "epoch": 1.0330257178994706, + "grad_norm": 0.5686914825453726, + "learning_rate": 2.4877676141783595e-06, + "loss": 0.2829, + "step": 22052 + }, + { + "epoch": 1.033072562889399, + "grad_norm": 0.5861318640214587, + "learning_rate": 2.4875779663346352e-06, + "loss": 0.2705, + "step": 22053 + }, + { + "epoch": 1.0331194078793273, + "grad_norm": 0.5733032056722964, + "learning_rate": 2.4873883185623963e-06, + "loss": 0.2578, + "step": 22054 + }, + { + "epoch": 1.0331662528692556, + "grad_norm": 0.5794434660773686, + "learning_rate": 2.4871986708627354e-06, + "loss": 0.267, + "step": 22055 + }, + { + "epoch": 1.033213097859184, + "grad_norm": 0.5805190591853784, + "learning_rate": 2.4870090232367414e-06, + "loss": 0.2885, + "step": 22056 + }, + { + "epoch": 1.0332599428491123, + "grad_norm": 0.58172588761621, + "learning_rate": 2.486819375685508e-06, + "loss": 0.2745, + "step": 22057 + }, + { + "epoch": 1.0333067878390405, + "grad_norm": 0.5796851728095987, + "learning_rate": 2.4866297282101255e-06, + "loss": 0.2771, + "step": 22058 + }, + { + "epoch": 1.033353632828969, + "grad_norm": 0.5768729697289838, + "learning_rate": 2.4864400808116856e-06, + "loss": 0.2747, + "step": 22059 + }, + { + "epoch": 1.0334004778188972, + "grad_norm": 0.5641133499724519, + "learning_rate": 2.4862504334912794e-06, + "loss": 0.2701, + "step": 22060 + }, + { + "epoch": 1.0334473228088257, + "grad_norm": 0.5623290651076103, + "learning_rate": 2.4860607862499997e-06, + "loss": 0.2732, + "step": 22061 + }, + { + "epoch": 1.033494167798754, + "grad_norm": 0.6024166345010837, + "learning_rate": 2.4858711390889353e-06, + "loss": 0.294, + "step": 22062 + }, + { + "epoch": 1.0335410127886822, + "grad_norm": 0.5853038620326795, + "learning_rate": 2.4856814920091797e-06, + "loss": 0.2752, + "step": 22063 + }, + { + "epoch": 1.0335878577786106, + "grad_norm": 0.610616027109301, + "learning_rate": 2.485491845011823e-06, + "loss": 0.2863, + "step": 22064 + }, + { + "epoch": 1.0336347027685389, + "grad_norm": 0.5912092242303513, + "learning_rate": 2.485302198097957e-06, + "loss": 0.2595, + "step": 22065 + }, + { + "epoch": 1.0336815477584673, + "grad_norm": 0.5918021693010284, + "learning_rate": 2.4851125512686734e-06, + "loss": 0.2697, + "step": 22066 + }, + { + "epoch": 1.0337283927483956, + "grad_norm": 0.5653706375072204, + "learning_rate": 2.4849229045250645e-06, + "loss": 0.2658, + "step": 22067 + }, + { + "epoch": 1.0337752377383238, + "grad_norm": 0.5846861080910705, + "learning_rate": 2.484733257868219e-06, + "loss": 0.2747, + "step": 22068 + }, + { + "epoch": 1.0338220827282523, + "grad_norm": 0.5779550869703086, + "learning_rate": 2.4845436112992296e-06, + "loss": 0.2595, + "step": 22069 + }, + { + "epoch": 1.0338689277181805, + "grad_norm": 0.5776920320537554, + "learning_rate": 2.4843539648191886e-06, + "loss": 0.2775, + "step": 22070 + }, + { + "epoch": 1.033915772708109, + "grad_norm": 0.5707246792259972, + "learning_rate": 2.484164318429186e-06, + "loss": 0.2724, + "step": 22071 + }, + { + "epoch": 1.0339626176980372, + "grad_norm": 0.5954189068729528, + "learning_rate": 2.483974672130314e-06, + "loss": 0.2737, + "step": 22072 + }, + { + "epoch": 1.0340094626879655, + "grad_norm": 0.6238077086708236, + "learning_rate": 2.4837850259236645e-06, + "loss": 0.2807, + "step": 22073 + }, + { + "epoch": 1.034056307677894, + "grad_norm": 0.5824603254040336, + "learning_rate": 2.483595379810328e-06, + "loss": 0.2561, + "step": 22074 + }, + { + "epoch": 1.0341031526678222, + "grad_norm": 0.5833130260896636, + "learning_rate": 2.483405733791395e-06, + "loss": 0.2855, + "step": 22075 + }, + { + "epoch": 1.0341499976577504, + "grad_norm": 0.6239307248078297, + "learning_rate": 2.483216087867958e-06, + "loss": 0.2841, + "step": 22076 + }, + { + "epoch": 1.0341968426476789, + "grad_norm": 0.6147328463170249, + "learning_rate": 2.483026442041109e-06, + "loss": 0.2929, + "step": 22077 + }, + { + "epoch": 1.034243687637607, + "grad_norm": 0.568352065684242, + "learning_rate": 2.4828367963119375e-06, + "loss": 0.2675, + "step": 22078 + }, + { + "epoch": 1.0342905326275356, + "grad_norm": 0.6043907426521464, + "learning_rate": 2.4826471506815374e-06, + "loss": 0.2928, + "step": 22079 + }, + { + "epoch": 1.0343373776174638, + "grad_norm": 0.5882704356939944, + "learning_rate": 2.482457505150997e-06, + "loss": 0.2695, + "step": 22080 + }, + { + "epoch": 1.034384222607392, + "grad_norm": 0.5912449193536395, + "learning_rate": 2.4822678597214103e-06, + "loss": 0.2849, + "step": 22081 + }, + { + "epoch": 1.0344310675973205, + "grad_norm": 0.5884767352288678, + "learning_rate": 2.482078214393867e-06, + "loss": 0.2736, + "step": 22082 + }, + { + "epoch": 1.0344779125872487, + "grad_norm": 0.5867856524404451, + "learning_rate": 2.4818885691694585e-06, + "loss": 0.2932, + "step": 22083 + }, + { + "epoch": 1.0345247575771772, + "grad_norm": 0.6173178382134364, + "learning_rate": 2.4816989240492776e-06, + "loss": 0.2817, + "step": 22084 + }, + { + "epoch": 1.0345716025671055, + "grad_norm": 0.5914346847691803, + "learning_rate": 2.481509279034416e-06, + "loss": 0.2724, + "step": 22085 + }, + { + "epoch": 1.0346184475570337, + "grad_norm": 0.6265187062233245, + "learning_rate": 2.481319634125962e-06, + "loss": 0.2676, + "step": 22086 + }, + { + "epoch": 1.0346652925469622, + "grad_norm": 0.6143025223353863, + "learning_rate": 2.481129989325009e-06, + "loss": 0.2697, + "step": 22087 + }, + { + "epoch": 1.0347121375368904, + "grad_norm": 0.6148034375760073, + "learning_rate": 2.480940344632649e-06, + "loss": 0.2764, + "step": 22088 + }, + { + "epoch": 1.0347589825268189, + "grad_norm": 0.5898861395890778, + "learning_rate": 2.4807507000499715e-06, + "loss": 0.2699, + "step": 22089 + }, + { + "epoch": 1.034805827516747, + "grad_norm": 0.5839902121435657, + "learning_rate": 2.480561055578069e-06, + "loss": 0.2947, + "step": 22090 + }, + { + "epoch": 1.0348526725066753, + "grad_norm": 0.6128811126027671, + "learning_rate": 2.4803714112180336e-06, + "loss": 0.2878, + "step": 22091 + }, + { + "epoch": 1.0348995174966038, + "grad_norm": 0.5942273628305846, + "learning_rate": 2.480181766970956e-06, + "loss": 0.279, + "step": 22092 + }, + { + "epoch": 1.034946362486532, + "grad_norm": 0.5882918295574192, + "learning_rate": 2.4799921228379265e-06, + "loss": 0.2998, + "step": 22093 + }, + { + "epoch": 1.0349932074764603, + "grad_norm": 0.6123278538890542, + "learning_rate": 2.4798024788200374e-06, + "loss": 0.2811, + "step": 22094 + }, + { + "epoch": 1.0350400524663887, + "grad_norm": 0.5928182775972539, + "learning_rate": 2.47961283491838e-06, + "loss": 0.2773, + "step": 22095 + }, + { + "epoch": 1.035086897456317, + "grad_norm": 0.5680006229483975, + "learning_rate": 2.4794231911340454e-06, + "loss": 0.2804, + "step": 22096 + }, + { + "epoch": 1.0351337424462455, + "grad_norm": 0.659585382787904, + "learning_rate": 2.479233547468125e-06, + "loss": 0.2819, + "step": 22097 + }, + { + "epoch": 1.0351805874361737, + "grad_norm": 0.58343321353882, + "learning_rate": 2.4790439039217116e-06, + "loss": 0.2814, + "step": 22098 + }, + { + "epoch": 1.035227432426102, + "grad_norm": 0.6130903186850535, + "learning_rate": 2.478854260495895e-06, + "loss": 0.2807, + "step": 22099 + }, + { + "epoch": 1.0352742774160304, + "grad_norm": 0.585671033170938, + "learning_rate": 2.4786646171917657e-06, + "loss": 0.2693, + "step": 22100 + }, + { + "epoch": 1.0353211224059586, + "grad_norm": 0.5600108443672401, + "learning_rate": 2.4784749740104165e-06, + "loss": 0.2555, + "step": 22101 + }, + { + "epoch": 1.035367967395887, + "grad_norm": 0.5571516645572006, + "learning_rate": 2.4782853309529387e-06, + "loss": 0.2702, + "step": 22102 + }, + { + "epoch": 1.0354148123858153, + "grad_norm": 0.6455808742583234, + "learning_rate": 2.4780956880204233e-06, + "loss": 0.2948, + "step": 22103 + }, + { + "epoch": 1.0354616573757436, + "grad_norm": 0.5856037230819846, + "learning_rate": 2.477906045213962e-06, + "loss": 0.2658, + "step": 22104 + }, + { + "epoch": 1.035508502365672, + "grad_norm": 0.6060208135553031, + "learning_rate": 2.4777164025346455e-06, + "loss": 0.2719, + "step": 22105 + }, + { + "epoch": 1.0355553473556003, + "grad_norm": 0.6168638834154999, + "learning_rate": 2.477526759983566e-06, + "loss": 0.2574, + "step": 22106 + }, + { + "epoch": 1.0356021923455287, + "grad_norm": 0.6483246770266302, + "learning_rate": 2.4773371175618135e-06, + "loss": 0.2891, + "step": 22107 + }, + { + "epoch": 1.035649037335457, + "grad_norm": 0.6270067086187697, + "learning_rate": 2.477147475270481e-06, + "loss": 0.2949, + "step": 22108 + }, + { + "epoch": 1.0356958823253852, + "grad_norm": 0.6160632304485629, + "learning_rate": 2.476957833110658e-06, + "loss": 0.2895, + "step": 22109 + }, + { + "epoch": 1.0357427273153137, + "grad_norm": 0.59427193747714, + "learning_rate": 2.4767681910834383e-06, + "loss": 0.2776, + "step": 22110 + }, + { + "epoch": 1.035789572305242, + "grad_norm": 0.5582773038409085, + "learning_rate": 2.476578549189911e-06, + "loss": 0.2837, + "step": 22111 + }, + { + "epoch": 1.0358364172951702, + "grad_norm": 0.5919835934874929, + "learning_rate": 2.4763889074311684e-06, + "loss": 0.2879, + "step": 22112 + }, + { + "epoch": 1.0358832622850986, + "grad_norm": 0.6522508298365082, + "learning_rate": 2.476199265808301e-06, + "loss": 0.2683, + "step": 22113 + }, + { + "epoch": 1.0359301072750269, + "grad_norm": 0.5812218233258005, + "learning_rate": 2.4760096243224013e-06, + "loss": 0.2844, + "step": 22114 + }, + { + "epoch": 1.0359769522649553, + "grad_norm": 0.5563318572047649, + "learning_rate": 2.4758199829745605e-06, + "loss": 0.264, + "step": 22115 + }, + { + "epoch": 1.0360237972548836, + "grad_norm": 0.5482897075595587, + "learning_rate": 2.47563034176587e-06, + "loss": 0.2715, + "step": 22116 + }, + { + "epoch": 1.0360706422448118, + "grad_norm": 0.5669730603174528, + "learning_rate": 2.47544070069742e-06, + "loss": 0.2888, + "step": 22117 + }, + { + "epoch": 1.0361174872347403, + "grad_norm": 0.6324627494891163, + "learning_rate": 2.4752510597703026e-06, + "loss": 0.2925, + "step": 22118 + }, + { + "epoch": 1.0361643322246685, + "grad_norm": 0.5995896395157867, + "learning_rate": 2.47506141898561e-06, + "loss": 0.2834, + "step": 22119 + }, + { + "epoch": 1.036211177214597, + "grad_norm": 0.6207312431478527, + "learning_rate": 2.4748717783444317e-06, + "loss": 0.296, + "step": 22120 + }, + { + "epoch": 1.0362580222045252, + "grad_norm": 0.6035224951167371, + "learning_rate": 2.4746821378478598e-06, + "loss": 0.2939, + "step": 22121 + }, + { + "epoch": 1.0363048671944535, + "grad_norm": 0.6054487963630432, + "learning_rate": 2.474492497496987e-06, + "loss": 0.2955, + "step": 22122 + }, + { + "epoch": 1.036351712184382, + "grad_norm": 0.5803176228236406, + "learning_rate": 2.474302857292903e-06, + "loss": 0.2571, + "step": 22123 + }, + { + "epoch": 1.0363985571743102, + "grad_norm": 0.6128447884790617, + "learning_rate": 2.4741132172366993e-06, + "loss": 0.2725, + "step": 22124 + }, + { + "epoch": 1.0364454021642386, + "grad_norm": 0.6435119464659022, + "learning_rate": 2.473923577329467e-06, + "loss": 0.2914, + "step": 22125 + }, + { + "epoch": 1.0364922471541669, + "grad_norm": 0.5892665532752348, + "learning_rate": 2.473733937572299e-06, + "loss": 0.2853, + "step": 22126 + }, + { + "epoch": 1.0365390921440951, + "grad_norm": 0.5591800493569603, + "learning_rate": 2.4735442979662848e-06, + "loss": 0.2687, + "step": 22127 + }, + { + "epoch": 1.0365859371340236, + "grad_norm": 0.5900689447107347, + "learning_rate": 2.4733546585125176e-06, + "loss": 0.2843, + "step": 22128 + }, + { + "epoch": 1.0366327821239518, + "grad_norm": 0.6017455921609595, + "learning_rate": 2.4731650192120866e-06, + "loss": 0.2925, + "step": 22129 + }, + { + "epoch": 1.03667962711388, + "grad_norm": 0.5320126890277352, + "learning_rate": 2.4729753800660846e-06, + "loss": 0.2521, + "step": 22130 + }, + { + "epoch": 1.0367264721038085, + "grad_norm": 0.5639602983350941, + "learning_rate": 2.4727857410756025e-06, + "loss": 0.286, + "step": 22131 + }, + { + "epoch": 1.0367733170937368, + "grad_norm": 0.5824488532252724, + "learning_rate": 2.4725961022417312e-06, + "loss": 0.2839, + "step": 22132 + }, + { + "epoch": 1.0368201620836652, + "grad_norm": 0.5828477678181648, + "learning_rate": 2.4724064635655627e-06, + "loss": 0.2781, + "step": 22133 + }, + { + "epoch": 1.0368670070735935, + "grad_norm": 0.5862987060298639, + "learning_rate": 2.472216825048189e-06, + "loss": 0.278, + "step": 22134 + }, + { + "epoch": 1.0369138520635217, + "grad_norm": 0.5804472837199771, + "learning_rate": 2.472027186690699e-06, + "loss": 0.2705, + "step": 22135 + }, + { + "epoch": 1.0369606970534502, + "grad_norm": 0.636812821688927, + "learning_rate": 2.471837548494186e-06, + "loss": 0.2885, + "step": 22136 + }, + { + "epoch": 1.0370075420433784, + "grad_norm": 0.5479817259450979, + "learning_rate": 2.4716479104597412e-06, + "loss": 0.2548, + "step": 22137 + }, + { + "epoch": 1.0370543870333069, + "grad_norm": 0.6099742447656669, + "learning_rate": 2.471458272588455e-06, + "loss": 0.2914, + "step": 22138 + }, + { + "epoch": 1.0371012320232351, + "grad_norm": 0.6026832935835815, + "learning_rate": 2.4712686348814194e-06, + "loss": 0.2991, + "step": 22139 + }, + { + "epoch": 1.0371480770131634, + "grad_norm": 0.5889728952722852, + "learning_rate": 2.4710789973397266e-06, + "loss": 0.2787, + "step": 22140 + }, + { + "epoch": 1.0371949220030918, + "grad_norm": 0.6028732074098054, + "learning_rate": 2.4708893599644664e-06, + "loss": 0.2727, + "step": 22141 + }, + { + "epoch": 1.03724176699302, + "grad_norm": 0.650589302084574, + "learning_rate": 2.47069972275673e-06, + "loss": 0.2769, + "step": 22142 + }, + { + "epoch": 1.0372886119829485, + "grad_norm": 0.5619072849034352, + "learning_rate": 2.470510085717609e-06, + "loss": 0.266, + "step": 22143 + }, + { + "epoch": 1.0373354569728768, + "grad_norm": 0.5703611175228783, + "learning_rate": 2.470320448848196e-06, + "loss": 0.2584, + "step": 22144 + }, + { + "epoch": 1.037382301962805, + "grad_norm": 0.6388367026361141, + "learning_rate": 2.4701308121495808e-06, + "loss": 0.2803, + "step": 22145 + }, + { + "epoch": 1.0374291469527335, + "grad_norm": 0.5956645353781996, + "learning_rate": 2.469941175622856e-06, + "loss": 0.2664, + "step": 22146 + }, + { + "epoch": 1.0374759919426617, + "grad_norm": 0.5653656204341396, + "learning_rate": 2.4697515392691114e-06, + "loss": 0.2794, + "step": 22147 + }, + { + "epoch": 1.03752283693259, + "grad_norm": 0.5941862635318668, + "learning_rate": 2.4695619030894395e-06, + "loss": 0.2722, + "step": 22148 + }, + { + "epoch": 1.0375696819225184, + "grad_norm": 0.6338837447018104, + "learning_rate": 2.4693722670849306e-06, + "loss": 0.2827, + "step": 22149 + }, + { + "epoch": 1.0376165269124467, + "grad_norm": 0.575747545515467, + "learning_rate": 2.4691826312566775e-06, + "loss": 0.2694, + "step": 22150 + }, + { + "epoch": 1.0376633719023751, + "grad_norm": 0.617503824251587, + "learning_rate": 2.4689929956057697e-06, + "loss": 0.2894, + "step": 22151 + }, + { + "epoch": 1.0377102168923034, + "grad_norm": 0.6752557003467689, + "learning_rate": 2.468803360133301e-06, + "loss": 0.2826, + "step": 22152 + }, + { + "epoch": 1.0377570618822316, + "grad_norm": 0.5845422822021904, + "learning_rate": 2.4686137248403594e-06, + "loss": 0.2737, + "step": 22153 + }, + { + "epoch": 1.03780390687216, + "grad_norm": 0.5795360859614509, + "learning_rate": 2.4684240897280388e-06, + "loss": 0.2703, + "step": 22154 + }, + { + "epoch": 1.0378507518620883, + "grad_norm": 0.5794633116487602, + "learning_rate": 2.468234454797429e-06, + "loss": 0.2815, + "step": 22155 + }, + { + "epoch": 1.0378975968520168, + "grad_norm": 0.6282144936980523, + "learning_rate": 2.468044820049622e-06, + "loss": 0.2786, + "step": 22156 + }, + { + "epoch": 1.037944441841945, + "grad_norm": 0.5742034638303867, + "learning_rate": 2.4678551854857095e-06, + "loss": 0.2719, + "step": 22157 + }, + { + "epoch": 1.0379912868318732, + "grad_norm": 0.5774276998385935, + "learning_rate": 2.467665551106783e-06, + "loss": 0.2772, + "step": 22158 + }, + { + "epoch": 1.0380381318218017, + "grad_norm": 0.5381997175632383, + "learning_rate": 2.4674759169139316e-06, + "loss": 0.2728, + "step": 22159 + }, + { + "epoch": 1.03808497681173, + "grad_norm": 0.5623596002764388, + "learning_rate": 2.4672862829082485e-06, + "loss": 0.2654, + "step": 22160 + }, + { + "epoch": 1.0381318218016584, + "grad_norm": 0.5912054164910927, + "learning_rate": 2.4670966490908253e-06, + "loss": 0.2598, + "step": 22161 + }, + { + "epoch": 1.0381786667915867, + "grad_norm": 0.5782624638839277, + "learning_rate": 2.466907015462752e-06, + "loss": 0.2775, + "step": 22162 + }, + { + "epoch": 1.038225511781515, + "grad_norm": 0.5804665613377031, + "learning_rate": 2.4667173820251202e-06, + "loss": 0.2583, + "step": 22163 + }, + { + "epoch": 1.0382723567714434, + "grad_norm": 0.6401150634272443, + "learning_rate": 2.4665277487790224e-06, + "loss": 0.2799, + "step": 22164 + }, + { + "epoch": 1.0383192017613716, + "grad_norm": 0.6242061246821157, + "learning_rate": 2.4663381157255497e-06, + "loss": 0.2932, + "step": 22165 + }, + { + "epoch": 1.0383660467512998, + "grad_norm": 0.5992726603900709, + "learning_rate": 2.4661484828657912e-06, + "loss": 0.2714, + "step": 22166 + }, + { + "epoch": 1.0384128917412283, + "grad_norm": 0.5568518405959492, + "learning_rate": 2.4659588502008398e-06, + "loss": 0.2539, + "step": 22167 + }, + { + "epoch": 1.0384597367311565, + "grad_norm": 0.6131897604046376, + "learning_rate": 2.4657692177317867e-06, + "loss": 0.2927, + "step": 22168 + }, + { + "epoch": 1.038506581721085, + "grad_norm": 0.6222086405081925, + "learning_rate": 2.4655795854597234e-06, + "loss": 0.2759, + "step": 22169 + }, + { + "epoch": 1.0385534267110133, + "grad_norm": 0.5913138812506191, + "learning_rate": 2.4653899533857405e-06, + "loss": 0.281, + "step": 22170 + }, + { + "epoch": 1.0386002717009415, + "grad_norm": 0.5720409767947715, + "learning_rate": 2.465200321510931e-06, + "loss": 0.2634, + "step": 22171 + }, + { + "epoch": 1.03864711669087, + "grad_norm": 0.5703672881150117, + "learning_rate": 2.4650106898363843e-06, + "loss": 0.2846, + "step": 22172 + }, + { + "epoch": 1.0386939616807982, + "grad_norm": 0.5926065111216968, + "learning_rate": 2.4648210583631917e-06, + "loss": 0.282, + "step": 22173 + }, + { + "epoch": 1.0387408066707267, + "grad_norm": 0.5471374346798407, + "learning_rate": 2.464631427092445e-06, + "loss": 0.2556, + "step": 22174 + }, + { + "epoch": 1.038787651660655, + "grad_norm": 0.6331924394773375, + "learning_rate": 2.464441796025236e-06, + "loss": 0.2923, + "step": 22175 + }, + { + "epoch": 1.0388344966505831, + "grad_norm": 0.6270609277812775, + "learning_rate": 2.464252165162655e-06, + "loss": 0.303, + "step": 22176 + }, + { + "epoch": 1.0388813416405116, + "grad_norm": 0.5933214900679435, + "learning_rate": 2.4640625345057952e-06, + "loss": 0.282, + "step": 22177 + }, + { + "epoch": 1.0389281866304398, + "grad_norm": 0.6064534212573368, + "learning_rate": 2.4638729040557456e-06, + "loss": 0.2976, + "step": 22178 + }, + { + "epoch": 1.0389750316203683, + "grad_norm": 0.598825364762325, + "learning_rate": 2.4636832738135984e-06, + "loss": 0.2979, + "step": 22179 + }, + { + "epoch": 1.0390218766102965, + "grad_norm": 0.5906120291616601, + "learning_rate": 2.4634936437804444e-06, + "loss": 0.2858, + "step": 22180 + }, + { + "epoch": 1.0390687216002248, + "grad_norm": 0.5800395196882936, + "learning_rate": 2.4633040139573753e-06, + "loss": 0.2684, + "step": 22181 + }, + { + "epoch": 1.0391155665901533, + "grad_norm": 0.553879593768937, + "learning_rate": 2.463114384345483e-06, + "loss": 0.2513, + "step": 22182 + }, + { + "epoch": 1.0391624115800815, + "grad_norm": 0.5855315772350074, + "learning_rate": 2.462924754945859e-06, + "loss": 0.281, + "step": 22183 + }, + { + "epoch": 1.0392092565700097, + "grad_norm": 0.6206748366127907, + "learning_rate": 2.462735125759592e-06, + "loss": 0.2881, + "step": 22184 + }, + { + "epoch": 1.0392561015599382, + "grad_norm": 0.6009841916246558, + "learning_rate": 2.462545496787775e-06, + "loss": 0.279, + "step": 22185 + }, + { + "epoch": 1.0393029465498664, + "grad_norm": 0.605163382097917, + "learning_rate": 2.4623558680315002e-06, + "loss": 0.2777, + "step": 22186 + }, + { + "epoch": 1.039349791539795, + "grad_norm": 0.5946985886923626, + "learning_rate": 2.462166239491857e-06, + "loss": 0.2731, + "step": 22187 + }, + { + "epoch": 1.0393966365297231, + "grad_norm": 0.5935207594956645, + "learning_rate": 2.461976611169938e-06, + "loss": 0.278, + "step": 22188 + }, + { + "epoch": 1.0394434815196514, + "grad_norm": 0.6326206382092777, + "learning_rate": 2.461786983066835e-06, + "loss": 0.2733, + "step": 22189 + }, + { + "epoch": 1.0394903265095798, + "grad_norm": 0.6001823933749032, + "learning_rate": 2.4615973551836377e-06, + "loss": 0.2844, + "step": 22190 + }, + { + "epoch": 1.039537171499508, + "grad_norm": 0.6311544746310611, + "learning_rate": 2.4614077275214377e-06, + "loss": 0.2914, + "step": 22191 + }, + { + "epoch": 1.0395840164894365, + "grad_norm": 0.6059480465698357, + "learning_rate": 2.461218100081326e-06, + "loss": 0.2677, + "step": 22192 + }, + { + "epoch": 1.0396308614793648, + "grad_norm": 0.5726589044800252, + "learning_rate": 2.461028472864395e-06, + "loss": 0.2672, + "step": 22193 + }, + { + "epoch": 1.039677706469293, + "grad_norm": 0.5902832161430289, + "learning_rate": 2.4608388458717354e-06, + "loss": 0.2793, + "step": 22194 + }, + { + "epoch": 1.0397245514592215, + "grad_norm": 0.589727808232042, + "learning_rate": 2.460649219104439e-06, + "loss": 0.2828, + "step": 22195 + }, + { + "epoch": 1.0397713964491497, + "grad_norm": 0.6020612008008538, + "learning_rate": 2.4604595925635953e-06, + "loss": 0.2846, + "step": 22196 + }, + { + "epoch": 1.0398182414390782, + "grad_norm": 0.6372518317861217, + "learning_rate": 2.4602699662502975e-06, + "loss": 0.2947, + "step": 22197 + }, + { + "epoch": 1.0398650864290064, + "grad_norm": 0.5948811013805896, + "learning_rate": 2.4600803401656354e-06, + "loss": 0.2827, + "step": 22198 + }, + { + "epoch": 1.0399119314189347, + "grad_norm": 0.6371447641557175, + "learning_rate": 2.4598907143107014e-06, + "loss": 0.2875, + "step": 22199 + }, + { + "epoch": 1.0399587764088631, + "grad_norm": 0.5706942580904761, + "learning_rate": 2.459701088686586e-06, + "loss": 0.277, + "step": 22200 + }, + { + "epoch": 1.0400056213987914, + "grad_norm": 0.6007327445471925, + "learning_rate": 2.4595114632943813e-06, + "loss": 0.2795, + "step": 22201 + }, + { + "epoch": 1.0400524663887196, + "grad_norm": 0.6168761252940358, + "learning_rate": 2.4593218381351773e-06, + "loss": 0.2931, + "step": 22202 + }, + { + "epoch": 1.040099311378648, + "grad_norm": 0.6337750377405126, + "learning_rate": 2.459132213210066e-06, + "loss": 0.2894, + "step": 22203 + }, + { + "epoch": 1.0401461563685763, + "grad_norm": 0.582390603783147, + "learning_rate": 2.4589425885201386e-06, + "loss": 0.2562, + "step": 22204 + }, + { + "epoch": 1.0401930013585048, + "grad_norm": 0.5921012097702394, + "learning_rate": 2.458752964066486e-06, + "loss": 0.2775, + "step": 22205 + }, + { + "epoch": 1.040239846348433, + "grad_norm": 0.6051759319915211, + "learning_rate": 2.4585633398502e-06, + "loss": 0.2797, + "step": 22206 + }, + { + "epoch": 1.0402866913383613, + "grad_norm": 0.5559829938523408, + "learning_rate": 2.4583737158723725e-06, + "loss": 0.2732, + "step": 22207 + }, + { + "epoch": 1.0403335363282897, + "grad_norm": 0.6010252013536658, + "learning_rate": 2.4581840921340924e-06, + "loss": 0.2771, + "step": 22208 + }, + { + "epoch": 1.040380381318218, + "grad_norm": 0.5486040585402532, + "learning_rate": 2.4579944686364528e-06, + "loss": 0.2611, + "step": 22209 + }, + { + "epoch": 1.0404272263081464, + "grad_norm": 0.624931323597741, + "learning_rate": 2.4578048453805445e-06, + "loss": 0.2864, + "step": 22210 + }, + { + "epoch": 1.0404740712980747, + "grad_norm": 0.5991135326043626, + "learning_rate": 2.4576152223674585e-06, + "loss": 0.2749, + "step": 22211 + }, + { + "epoch": 1.040520916288003, + "grad_norm": 0.5528362325104945, + "learning_rate": 2.457425599598286e-06, + "loss": 0.2578, + "step": 22212 + }, + { + "epoch": 1.0405677612779314, + "grad_norm": 0.6147343105973371, + "learning_rate": 2.4572359770741195e-06, + "loss": 0.2884, + "step": 22213 + }, + { + "epoch": 1.0406146062678596, + "grad_norm": 0.616973763270423, + "learning_rate": 2.457046354796049e-06, + "loss": 0.2893, + "step": 22214 + }, + { + "epoch": 1.040661451257788, + "grad_norm": 0.6061019014671252, + "learning_rate": 2.456856732765165e-06, + "loss": 0.2853, + "step": 22215 + }, + { + "epoch": 1.0407082962477163, + "grad_norm": 0.5884699983937648, + "learning_rate": 2.4566671109825604e-06, + "loss": 0.2756, + "step": 22216 + }, + { + "epoch": 1.0407551412376446, + "grad_norm": 0.5930791426656855, + "learning_rate": 2.4564774894493252e-06, + "loss": 0.2959, + "step": 22217 + }, + { + "epoch": 1.040801986227573, + "grad_norm": 0.6474826300121516, + "learning_rate": 2.4562878681665514e-06, + "loss": 0.2957, + "step": 22218 + }, + { + "epoch": 1.0408488312175013, + "grad_norm": 0.5782424814776941, + "learning_rate": 2.4560982471353306e-06, + "loss": 0.2657, + "step": 22219 + }, + { + "epoch": 1.0408956762074295, + "grad_norm": 0.5520920655856313, + "learning_rate": 2.4559086263567523e-06, + "loss": 0.2727, + "step": 22220 + }, + { + "epoch": 1.040942521197358, + "grad_norm": 0.5689947540624786, + "learning_rate": 2.4557190058319095e-06, + "loss": 0.277, + "step": 22221 + }, + { + "epoch": 1.0409893661872862, + "grad_norm": 0.5505960727304701, + "learning_rate": 2.455529385561892e-06, + "loss": 0.2591, + "step": 22222 + }, + { + "epoch": 1.0410362111772147, + "grad_norm": 0.5574681178905075, + "learning_rate": 2.4553397655477916e-06, + "loss": 0.2766, + "step": 22223 + }, + { + "epoch": 1.041083056167143, + "grad_norm": 0.5906827903823731, + "learning_rate": 2.4551501457907003e-06, + "loss": 0.275, + "step": 22224 + }, + { + "epoch": 1.0411299011570712, + "grad_norm": 0.5644340018734452, + "learning_rate": 2.4549605262917096e-06, + "loss": 0.2769, + "step": 22225 + }, + { + "epoch": 1.0411767461469996, + "grad_norm": 0.5769012802638543, + "learning_rate": 2.454770907051908e-06, + "loss": 0.2683, + "step": 22226 + }, + { + "epoch": 1.0412235911369279, + "grad_norm": 0.5308881551535699, + "learning_rate": 2.4545812880723888e-06, + "loss": 0.2481, + "step": 22227 + }, + { + "epoch": 1.0412704361268563, + "grad_norm": 0.5836590140723331, + "learning_rate": 2.4543916693542435e-06, + "loss": 0.2749, + "step": 22228 + }, + { + "epoch": 1.0413172811167846, + "grad_norm": 0.6033850822714071, + "learning_rate": 2.4542020508985615e-06, + "loss": 0.2614, + "step": 22229 + }, + { + "epoch": 1.0413641261067128, + "grad_norm": 0.5480946932159548, + "learning_rate": 2.454012432706436e-06, + "loss": 0.2821, + "step": 22230 + }, + { + "epoch": 1.0414109710966413, + "grad_norm": 0.5793383340604433, + "learning_rate": 2.4538228147789574e-06, + "loss": 0.2632, + "step": 22231 + }, + { + "epoch": 1.0414578160865695, + "grad_norm": 0.5342518251815472, + "learning_rate": 2.4536331971172176e-06, + "loss": 0.2639, + "step": 22232 + }, + { + "epoch": 1.041504661076498, + "grad_norm": 0.5936448085814473, + "learning_rate": 2.453443579722306e-06, + "loss": 0.2766, + "step": 22233 + }, + { + "epoch": 1.0415515060664262, + "grad_norm": 0.6058188008115123, + "learning_rate": 2.453253962595315e-06, + "loss": 0.278, + "step": 22234 + }, + { + "epoch": 1.0415983510563545, + "grad_norm": 0.5750909960270143, + "learning_rate": 2.4530643457373363e-06, + "loss": 0.2683, + "step": 22235 + }, + { + "epoch": 1.041645196046283, + "grad_norm": 0.6194473797913758, + "learning_rate": 2.4528747291494598e-06, + "loss": 0.2887, + "step": 22236 + }, + { + "epoch": 1.0416920410362112, + "grad_norm": 0.5910889143657079, + "learning_rate": 2.4526851128327774e-06, + "loss": 0.2896, + "step": 22237 + }, + { + "epoch": 1.0417388860261394, + "grad_norm": 0.5607441996379543, + "learning_rate": 2.4524954967883814e-06, + "loss": 0.2749, + "step": 22238 + }, + { + "epoch": 1.0417857310160679, + "grad_norm": 0.6087797049351392, + "learning_rate": 2.4523058810173615e-06, + "loss": 0.293, + "step": 22239 + }, + { + "epoch": 1.041832576005996, + "grad_norm": 0.5889624608279648, + "learning_rate": 2.452116265520808e-06, + "loss": 0.2722, + "step": 22240 + }, + { + "epoch": 1.0418794209959246, + "grad_norm": 0.5889755982987177, + "learning_rate": 2.451926650299814e-06, + "loss": 0.2811, + "step": 22241 + }, + { + "epoch": 1.0419262659858528, + "grad_norm": 0.5935123678016522, + "learning_rate": 2.4517370353554705e-06, + "loss": 0.2746, + "step": 22242 + }, + { + "epoch": 1.041973110975781, + "grad_norm": 0.5872245057595986, + "learning_rate": 2.451547420688868e-06, + "loss": 0.2906, + "step": 22243 + }, + { + "epoch": 1.0420199559657095, + "grad_norm": 0.5744035264983945, + "learning_rate": 2.4513578063010986e-06, + "loss": 0.2591, + "step": 22244 + }, + { + "epoch": 1.0420668009556378, + "grad_norm": 0.5960919233245738, + "learning_rate": 2.4511681921932516e-06, + "loss": 0.2697, + "step": 22245 + }, + { + "epoch": 1.0421136459455662, + "grad_norm": 0.5723255020616071, + "learning_rate": 2.4509785783664204e-06, + "loss": 0.2738, + "step": 22246 + }, + { + "epoch": 1.0421604909354945, + "grad_norm": 0.5494917845481454, + "learning_rate": 2.450788964821694e-06, + "loss": 0.2659, + "step": 22247 + }, + { + "epoch": 1.0422073359254227, + "grad_norm": 0.5572353231176324, + "learning_rate": 2.450599351560166e-06, + "loss": 0.2558, + "step": 22248 + }, + { + "epoch": 1.0422541809153512, + "grad_norm": 0.5686306815867692, + "learning_rate": 2.450409738582925e-06, + "loss": 0.2588, + "step": 22249 + }, + { + "epoch": 1.0423010259052794, + "grad_norm": 0.5531548778131691, + "learning_rate": 2.450220125891065e-06, + "loss": 0.2862, + "step": 22250 + }, + { + "epoch": 1.0423478708952079, + "grad_norm": 0.6193880644367185, + "learning_rate": 2.4500305134856746e-06, + "loss": 0.2856, + "step": 22251 + }, + { + "epoch": 1.042394715885136, + "grad_norm": 0.5812746854863885, + "learning_rate": 2.4498409013678468e-06, + "loss": 0.2728, + "step": 22252 + }, + { + "epoch": 1.0424415608750643, + "grad_norm": 0.5999611430459872, + "learning_rate": 2.449651289538671e-06, + "loss": 0.2793, + "step": 22253 + }, + { + "epoch": 1.0424884058649928, + "grad_norm": 0.5830146663549309, + "learning_rate": 2.4494616779992395e-06, + "loss": 0.2795, + "step": 22254 + }, + { + "epoch": 1.042535250854921, + "grad_norm": 0.5866461448790953, + "learning_rate": 2.449272066750644e-06, + "loss": 0.2809, + "step": 22255 + }, + { + "epoch": 1.0425820958448493, + "grad_norm": 0.5665504011327142, + "learning_rate": 2.4490824557939756e-06, + "loss": 0.2787, + "step": 22256 + }, + { + "epoch": 1.0426289408347778, + "grad_norm": 0.552740759796593, + "learning_rate": 2.4488928451303234e-06, + "loss": 0.2803, + "step": 22257 + }, + { + "epoch": 1.042675785824706, + "grad_norm": 0.5486300763715315, + "learning_rate": 2.4487032347607805e-06, + "loss": 0.263, + "step": 22258 + }, + { + "epoch": 1.0427226308146345, + "grad_norm": 0.5711027833162352, + "learning_rate": 2.448513624686438e-06, + "loss": 0.2722, + "step": 22259 + }, + { + "epoch": 1.0427694758045627, + "grad_norm": 0.5776553843975462, + "learning_rate": 2.448324014908386e-06, + "loss": 0.2968, + "step": 22260 + }, + { + "epoch": 1.042816320794491, + "grad_norm": 0.61297033778791, + "learning_rate": 2.448134405427716e-06, + "loss": 0.2718, + "step": 22261 + }, + { + "epoch": 1.0428631657844194, + "grad_norm": 0.5707580981954002, + "learning_rate": 2.447944796245521e-06, + "loss": 0.2824, + "step": 22262 + }, + { + "epoch": 1.0429100107743476, + "grad_norm": 0.6108107375499785, + "learning_rate": 2.4477551873628904e-06, + "loss": 0.2729, + "step": 22263 + }, + { + "epoch": 1.042956855764276, + "grad_norm": 0.5744978693009599, + "learning_rate": 2.4475655787809143e-06, + "loss": 0.2655, + "step": 22264 + }, + { + "epoch": 1.0430037007542043, + "grad_norm": 0.5681734521356445, + "learning_rate": 2.4473759705006853e-06, + "loss": 0.2793, + "step": 22265 + }, + { + "epoch": 1.0430505457441326, + "grad_norm": 0.5991315455647889, + "learning_rate": 2.447186362523295e-06, + "loss": 0.2844, + "step": 22266 + }, + { + "epoch": 1.043097390734061, + "grad_norm": 0.5694994691829532, + "learning_rate": 2.4469967548498336e-06, + "loss": 0.2636, + "step": 22267 + }, + { + "epoch": 1.0431442357239893, + "grad_norm": 0.5574505180617201, + "learning_rate": 2.4468071474813936e-06, + "loss": 0.272, + "step": 22268 + }, + { + "epoch": 1.0431910807139178, + "grad_norm": 0.6405405279330719, + "learning_rate": 2.4466175404190637e-06, + "loss": 0.2909, + "step": 22269 + }, + { + "epoch": 1.043237925703846, + "grad_norm": 0.5758734827151425, + "learning_rate": 2.446427933663937e-06, + "loss": 0.2662, + "step": 22270 + }, + { + "epoch": 1.0432847706937742, + "grad_norm": 0.5628975243217529, + "learning_rate": 2.4462383272171036e-06, + "loss": 0.2694, + "step": 22271 + }, + { + "epoch": 1.0433316156837027, + "grad_norm": 0.6364455301731952, + "learning_rate": 2.4460487210796554e-06, + "loss": 0.2768, + "step": 22272 + }, + { + "epoch": 1.043378460673631, + "grad_norm": 0.5776815626608558, + "learning_rate": 2.4458591152526832e-06, + "loss": 0.2797, + "step": 22273 + }, + { + "epoch": 1.0434253056635592, + "grad_norm": 0.5697550681179142, + "learning_rate": 2.4456695097372795e-06, + "loss": 0.28, + "step": 22274 + }, + { + "epoch": 1.0434721506534876, + "grad_norm": 0.6135833791126865, + "learning_rate": 2.4454799045345325e-06, + "loss": 0.2879, + "step": 22275 + }, + { + "epoch": 1.0435189956434159, + "grad_norm": 0.5931203780261508, + "learning_rate": 2.445290299645535e-06, + "loss": 0.2713, + "step": 22276 + }, + { + "epoch": 1.0435658406333443, + "grad_norm": 0.5877827816050988, + "learning_rate": 2.4451006950713784e-06, + "loss": 0.2809, + "step": 22277 + }, + { + "epoch": 1.0436126856232726, + "grad_norm": 0.6104088663919076, + "learning_rate": 2.444911090813153e-06, + "loss": 0.2834, + "step": 22278 + }, + { + "epoch": 1.0436595306132008, + "grad_norm": 0.6092853828999808, + "learning_rate": 2.444721486871951e-06, + "loss": 0.2837, + "step": 22279 + }, + { + "epoch": 1.0437063756031293, + "grad_norm": 0.6144609856060637, + "learning_rate": 2.4445318832488636e-06, + "loss": 0.2637, + "step": 22280 + }, + { + "epoch": 1.0437532205930575, + "grad_norm": 0.5918973956087472, + "learning_rate": 2.444342279944981e-06, + "loss": 0.261, + "step": 22281 + }, + { + "epoch": 1.043800065582986, + "grad_norm": 0.5661407082418414, + "learning_rate": 2.444152676961394e-06, + "loss": 0.2862, + "step": 22282 + }, + { + "epoch": 1.0438469105729142, + "grad_norm": 0.5902411420488363, + "learning_rate": 2.443963074299194e-06, + "loss": 0.2778, + "step": 22283 + }, + { + "epoch": 1.0438937555628425, + "grad_norm": 0.569609069904018, + "learning_rate": 2.443773471959473e-06, + "loss": 0.282, + "step": 22284 + }, + { + "epoch": 1.043940600552771, + "grad_norm": 0.5808054161315088, + "learning_rate": 2.443583869943321e-06, + "loss": 0.2628, + "step": 22285 + }, + { + "epoch": 1.0439874455426992, + "grad_norm": 0.5616201163971111, + "learning_rate": 2.443394268251831e-06, + "loss": 0.259, + "step": 22286 + }, + { + "epoch": 1.0440342905326276, + "grad_norm": 0.5802950641111291, + "learning_rate": 2.4432046668860913e-06, + "loss": 0.2697, + "step": 22287 + }, + { + "epoch": 1.0440811355225559, + "grad_norm": 0.6044185960350917, + "learning_rate": 2.4430150658471956e-06, + "loss": 0.2976, + "step": 22288 + }, + { + "epoch": 1.0441279805124841, + "grad_norm": 0.6312099348219601, + "learning_rate": 2.442825465136233e-06, + "loss": 0.2782, + "step": 22289 + }, + { + "epoch": 1.0441748255024126, + "grad_norm": 0.5366344679822127, + "learning_rate": 2.4426358647542957e-06, + "loss": 0.2623, + "step": 22290 + }, + { + "epoch": 1.0442216704923408, + "grad_norm": 0.5713511194021933, + "learning_rate": 2.4424462647024747e-06, + "loss": 0.2798, + "step": 22291 + }, + { + "epoch": 1.044268515482269, + "grad_norm": 0.5623365538524423, + "learning_rate": 2.4422566649818618e-06, + "loss": 0.269, + "step": 22292 + }, + { + "epoch": 1.0443153604721975, + "grad_norm": 0.6299928978075874, + "learning_rate": 2.4420670655935458e-06, + "loss": 0.2782, + "step": 22293 + }, + { + "epoch": 1.0443622054621258, + "grad_norm": 0.5949781135607475, + "learning_rate": 2.4418774665386203e-06, + "loss": 0.2797, + "step": 22294 + }, + { + "epoch": 1.0444090504520542, + "grad_norm": 0.5788975426209596, + "learning_rate": 2.4416878678181745e-06, + "loss": 0.2741, + "step": 22295 + }, + { + "epoch": 1.0444558954419825, + "grad_norm": 0.5570665154949828, + "learning_rate": 2.4414982694333008e-06, + "loss": 0.2568, + "step": 22296 + }, + { + "epoch": 1.0445027404319107, + "grad_norm": 0.5922202311019497, + "learning_rate": 2.4413086713850896e-06, + "loss": 0.2852, + "step": 22297 + }, + { + "epoch": 1.0445495854218392, + "grad_norm": 0.6103828778219118, + "learning_rate": 2.4411190736746324e-06, + "loss": 0.2709, + "step": 22298 + }, + { + "epoch": 1.0445964304117674, + "grad_norm": 0.5524875945194421, + "learning_rate": 2.440929476303021e-06, + "loss": 0.2737, + "step": 22299 + }, + { + "epoch": 1.0446432754016959, + "grad_norm": 0.5455070998938133, + "learning_rate": 2.4407398792713446e-06, + "loss": 0.2501, + "step": 22300 + }, + { + "epoch": 1.0446901203916241, + "grad_norm": 0.5499520858720063, + "learning_rate": 2.440550282580696e-06, + "loss": 0.2792, + "step": 22301 + }, + { + "epoch": 1.0447369653815524, + "grad_norm": 0.5801524662155791, + "learning_rate": 2.440360686232165e-06, + "loss": 0.2894, + "step": 22302 + }, + { + "epoch": 1.0447838103714808, + "grad_norm": 0.6023598422166876, + "learning_rate": 2.440171090226843e-06, + "loss": 0.2703, + "step": 22303 + }, + { + "epoch": 1.044830655361409, + "grad_norm": 0.5469967723450699, + "learning_rate": 2.4399814945658217e-06, + "loss": 0.269, + "step": 22304 + }, + { + "epoch": 1.0448775003513375, + "grad_norm": 0.5644751420715194, + "learning_rate": 2.4397918992501926e-06, + "loss": 0.2505, + "step": 22305 + }, + { + "epoch": 1.0449243453412658, + "grad_norm": 0.5817803511759395, + "learning_rate": 2.4396023042810453e-06, + "loss": 0.2903, + "step": 22306 + }, + { + "epoch": 1.044971190331194, + "grad_norm": 0.5572345371685882, + "learning_rate": 2.439412709659471e-06, + "loss": 0.2719, + "step": 22307 + }, + { + "epoch": 1.0450180353211225, + "grad_norm": 0.5826278431619882, + "learning_rate": 2.439223115386562e-06, + "loss": 0.2848, + "step": 22308 + }, + { + "epoch": 1.0450648803110507, + "grad_norm": 0.5219177724623222, + "learning_rate": 2.439033521463408e-06, + "loss": 0.2556, + "step": 22309 + }, + { + "epoch": 1.045111725300979, + "grad_norm": 0.5788517806920471, + "learning_rate": 2.438843927891101e-06, + "loss": 0.2698, + "step": 22310 + }, + { + "epoch": 1.0451585702909074, + "grad_norm": 0.5852270617583549, + "learning_rate": 2.438654334670733e-06, + "loss": 0.2646, + "step": 22311 + }, + { + "epoch": 1.0452054152808357, + "grad_norm": 0.6016641000564191, + "learning_rate": 2.4384647418033933e-06, + "loss": 0.2916, + "step": 22312 + }, + { + "epoch": 1.0452522602707641, + "grad_norm": 0.6047871738451335, + "learning_rate": 2.438275149290173e-06, + "loss": 0.2866, + "step": 22313 + }, + { + "epoch": 1.0452991052606924, + "grad_norm": 0.5559693450880389, + "learning_rate": 2.4380855571321634e-06, + "loss": 0.2696, + "step": 22314 + }, + { + "epoch": 1.0453459502506206, + "grad_norm": 0.5725007722786739, + "learning_rate": 2.437895965330457e-06, + "loss": 0.2647, + "step": 22315 + }, + { + "epoch": 1.045392795240549, + "grad_norm": 0.5643852478247491, + "learning_rate": 2.4377063738861427e-06, + "loss": 0.2657, + "step": 22316 + }, + { + "epoch": 1.0454396402304773, + "grad_norm": 0.6291940871439119, + "learning_rate": 2.4375167828003136e-06, + "loss": 0.2763, + "step": 22317 + }, + { + "epoch": 1.0454864852204058, + "grad_norm": 0.5780510667063049, + "learning_rate": 2.437327192074059e-06, + "loss": 0.26, + "step": 22318 + }, + { + "epoch": 1.045533330210334, + "grad_norm": 0.5924136958206546, + "learning_rate": 2.437137601708471e-06, + "loss": 0.2862, + "step": 22319 + }, + { + "epoch": 1.0455801752002623, + "grad_norm": 0.607505560109336, + "learning_rate": 2.4369480117046396e-06, + "loss": 0.2858, + "step": 22320 + }, + { + "epoch": 1.0456270201901907, + "grad_norm": 0.5730817464230091, + "learning_rate": 2.4367584220636565e-06, + "loss": 0.2756, + "step": 22321 + }, + { + "epoch": 1.045673865180119, + "grad_norm": 0.6149602971732681, + "learning_rate": 2.4365688327866138e-06, + "loss": 0.2926, + "step": 22322 + }, + { + "epoch": 1.0457207101700474, + "grad_norm": 0.5741026262212751, + "learning_rate": 2.4363792438746016e-06, + "loss": 0.2609, + "step": 22323 + }, + { + "epoch": 1.0457675551599757, + "grad_norm": 0.5524763144547652, + "learning_rate": 2.43618965532871e-06, + "loss": 0.2745, + "step": 22324 + }, + { + "epoch": 1.045814400149904, + "grad_norm": 0.5641856247223662, + "learning_rate": 2.4360000671500307e-06, + "loss": 0.2726, + "step": 22325 + }, + { + "epoch": 1.0458612451398324, + "grad_norm": 0.5488199111138107, + "learning_rate": 2.435810479339656e-06, + "loss": 0.2596, + "step": 22326 + }, + { + "epoch": 1.0459080901297606, + "grad_norm": 0.5985594394764973, + "learning_rate": 2.435620891898675e-06, + "loss": 0.2754, + "step": 22327 + }, + { + "epoch": 1.0459549351196888, + "grad_norm": 0.6105926746947016, + "learning_rate": 2.4354313048281793e-06, + "loss": 0.2799, + "step": 22328 + }, + { + "epoch": 1.0460017801096173, + "grad_norm": 0.6208071273880603, + "learning_rate": 2.4352417181292614e-06, + "loss": 0.2775, + "step": 22329 + }, + { + "epoch": 1.0460486250995455, + "grad_norm": 0.5512863440661939, + "learning_rate": 2.435052131803011e-06, + "loss": 0.2535, + "step": 22330 + }, + { + "epoch": 1.046095470089474, + "grad_norm": 0.604306820187085, + "learning_rate": 2.4348625458505182e-06, + "loss": 0.2737, + "step": 22331 + }, + { + "epoch": 1.0461423150794023, + "grad_norm": 0.6019882426473938, + "learning_rate": 2.4346729602728754e-06, + "loss": 0.277, + "step": 22332 + }, + { + "epoch": 1.0461891600693305, + "grad_norm": 0.6204279811366865, + "learning_rate": 2.4344833750711737e-06, + "loss": 0.2794, + "step": 22333 + }, + { + "epoch": 1.046236005059259, + "grad_norm": 0.5923439724734889, + "learning_rate": 2.4342937902465034e-06, + "loss": 0.2761, + "step": 22334 + }, + { + "epoch": 1.0462828500491872, + "grad_norm": 0.6188768563143566, + "learning_rate": 2.4341042057999566e-06, + "loss": 0.2999, + "step": 22335 + }, + { + "epoch": 1.0463296950391157, + "grad_norm": 0.5838386260231239, + "learning_rate": 2.4339146217326226e-06, + "loss": 0.2723, + "step": 22336 + }, + { + "epoch": 1.046376540029044, + "grad_norm": 0.5980118588945684, + "learning_rate": 2.4337250380455937e-06, + "loss": 0.2779, + "step": 22337 + }, + { + "epoch": 1.0464233850189721, + "grad_norm": 0.5781807292013329, + "learning_rate": 2.43353545473996e-06, + "loss": 0.2717, + "step": 22338 + }, + { + "epoch": 1.0464702300089006, + "grad_norm": 0.5608237978894834, + "learning_rate": 2.433345871816814e-06, + "loss": 0.2691, + "step": 22339 + }, + { + "epoch": 1.0465170749988288, + "grad_norm": 0.5699388452043436, + "learning_rate": 2.433156289277245e-06, + "loss": 0.2777, + "step": 22340 + }, + { + "epoch": 1.0465639199887573, + "grad_norm": 0.5865532900935219, + "learning_rate": 2.4329667071223457e-06, + "loss": 0.2782, + "step": 22341 + }, + { + "epoch": 1.0466107649786855, + "grad_norm": 0.5941188549993626, + "learning_rate": 2.4327771253532053e-06, + "loss": 0.2789, + "step": 22342 + }, + { + "epoch": 1.0466576099686138, + "grad_norm": 0.6062916755473737, + "learning_rate": 2.4325875439709158e-06, + "loss": 0.2732, + "step": 22343 + }, + { + "epoch": 1.0467044549585423, + "grad_norm": 0.5798594384017369, + "learning_rate": 2.432397962976568e-06, + "loss": 0.2744, + "step": 22344 + }, + { + "epoch": 1.0467512999484705, + "grad_norm": 0.6160330525140767, + "learning_rate": 2.4322083823712523e-06, + "loss": 0.2877, + "step": 22345 + }, + { + "epoch": 1.0467981449383987, + "grad_norm": 0.5789041580440822, + "learning_rate": 2.4320188021560616e-06, + "loss": 0.2679, + "step": 22346 + }, + { + "epoch": 1.0468449899283272, + "grad_norm": 0.562125147165797, + "learning_rate": 2.4318292223320857e-06, + "loss": 0.2649, + "step": 22347 + }, + { + "epoch": 1.0468918349182554, + "grad_norm": 0.568143174266472, + "learning_rate": 2.431639642900415e-06, + "loss": 0.2765, + "step": 22348 + }, + { + "epoch": 1.046938679908184, + "grad_norm": 0.5670349894104019, + "learning_rate": 2.43145006386214e-06, + "loss": 0.2723, + "step": 22349 + }, + { + "epoch": 1.0469855248981121, + "grad_norm": 0.5320932534342286, + "learning_rate": 2.4312604852183537e-06, + "loss": 0.2708, + "step": 22350 + }, + { + "epoch": 1.0470323698880404, + "grad_norm": 0.6382820568745592, + "learning_rate": 2.4310709069701454e-06, + "loss": 0.276, + "step": 22351 + }, + { + "epoch": 1.0470792148779688, + "grad_norm": 0.5464430342233655, + "learning_rate": 2.430881329118607e-06, + "loss": 0.2673, + "step": 22352 + }, + { + "epoch": 1.047126059867897, + "grad_norm": 0.6344334814031437, + "learning_rate": 2.43069175166483e-06, + "loss": 0.2729, + "step": 22353 + }, + { + "epoch": 1.0471729048578255, + "grad_norm": 0.5929632155053878, + "learning_rate": 2.430502174609904e-06, + "loss": 0.2714, + "step": 22354 + }, + { + "epoch": 1.0472197498477538, + "grad_norm": 0.6221476984070133, + "learning_rate": 2.43031259795492e-06, + "loss": 0.2779, + "step": 22355 + }, + { + "epoch": 1.047266594837682, + "grad_norm": 0.6206259653846662, + "learning_rate": 2.4301230217009695e-06, + "loss": 0.3019, + "step": 22356 + }, + { + "epoch": 1.0473134398276105, + "grad_norm": 0.5824265609395702, + "learning_rate": 2.429933445849144e-06, + "loss": 0.2836, + "step": 22357 + }, + { + "epoch": 1.0473602848175387, + "grad_norm": 0.6129572802288434, + "learning_rate": 2.4297438704005334e-06, + "loss": 0.268, + "step": 22358 + }, + { + "epoch": 1.0474071298074672, + "grad_norm": 0.5625335349056312, + "learning_rate": 2.42955429535623e-06, + "loss": 0.2784, + "step": 22359 + }, + { + "epoch": 1.0474539747973954, + "grad_norm": 0.5860154485511734, + "learning_rate": 2.4293647207173233e-06, + "loss": 0.2868, + "step": 22360 + }, + { + "epoch": 1.0475008197873237, + "grad_norm": 0.5835913540702912, + "learning_rate": 2.429175146484905e-06, + "loss": 0.2815, + "step": 22361 + }, + { + "epoch": 1.0475476647772521, + "grad_norm": 0.6099007787671114, + "learning_rate": 2.4289855726600654e-06, + "loss": 0.2712, + "step": 22362 + }, + { + "epoch": 1.0475945097671804, + "grad_norm": 0.5671893092513196, + "learning_rate": 2.4287959992438963e-06, + "loss": 0.2666, + "step": 22363 + }, + { + "epoch": 1.0476413547571086, + "grad_norm": 0.602844912721262, + "learning_rate": 2.4286064262374887e-06, + "loss": 0.286, + "step": 22364 + }, + { + "epoch": 1.047688199747037, + "grad_norm": 0.630318100894037, + "learning_rate": 2.4284168536419323e-06, + "loss": 0.2985, + "step": 22365 + }, + { + "epoch": 1.0477350447369653, + "grad_norm": 0.5881846761310072, + "learning_rate": 2.4282272814583207e-06, + "loss": 0.2742, + "step": 22366 + }, + { + "epoch": 1.0477818897268938, + "grad_norm": 0.5628187109869601, + "learning_rate": 2.428037709687742e-06, + "loss": 0.2632, + "step": 22367 + }, + { + "epoch": 1.047828734716822, + "grad_norm": 0.6712994543972376, + "learning_rate": 2.427848138331288e-06, + "loss": 0.2772, + "step": 22368 + }, + { + "epoch": 1.0478755797067503, + "grad_norm": 0.6092358302330698, + "learning_rate": 2.4276585673900496e-06, + "loss": 0.2804, + "step": 22369 + }, + { + "epoch": 1.0479224246966787, + "grad_norm": 0.5920876035459672, + "learning_rate": 2.427468996865118e-06, + "loss": 0.2865, + "step": 22370 + }, + { + "epoch": 1.047969269686607, + "grad_norm": 0.5952409144063338, + "learning_rate": 2.4272794267575847e-06, + "loss": 0.2704, + "step": 22371 + }, + { + "epoch": 1.0480161146765354, + "grad_norm": 0.5949934428044322, + "learning_rate": 2.42708985706854e-06, + "loss": 0.2835, + "step": 22372 + }, + { + "epoch": 1.0480629596664637, + "grad_norm": 0.5816723471012621, + "learning_rate": 2.4269002877990747e-06, + "loss": 0.2719, + "step": 22373 + }, + { + "epoch": 1.048109804656392, + "grad_norm": 0.5795097743354853, + "learning_rate": 2.4267107189502796e-06, + "loss": 0.2779, + "step": 22374 + }, + { + "epoch": 1.0481566496463204, + "grad_norm": 0.6567197742893779, + "learning_rate": 2.426521150523246e-06, + "loss": 0.2922, + "step": 22375 + }, + { + "epoch": 1.0482034946362486, + "grad_norm": 0.5783496296955539, + "learning_rate": 2.4263315825190643e-06, + "loss": 0.2748, + "step": 22376 + }, + { + "epoch": 1.048250339626177, + "grad_norm": 0.5656703524340884, + "learning_rate": 2.426142014938827e-06, + "loss": 0.2644, + "step": 22377 + }, + { + "epoch": 1.0482971846161053, + "grad_norm": 0.6397283188197348, + "learning_rate": 2.425952447783623e-06, + "loss": 0.2797, + "step": 22378 + }, + { + "epoch": 1.0483440296060336, + "grad_norm": 0.6522565018563337, + "learning_rate": 2.4257628810545443e-06, + "loss": 0.286, + "step": 22379 + }, + { + "epoch": 1.048390874595962, + "grad_norm": 0.5615141670872118, + "learning_rate": 2.425573314752681e-06, + "loss": 0.2506, + "step": 22380 + }, + { + "epoch": 1.0484377195858903, + "grad_norm": 0.5691754917151777, + "learning_rate": 2.425383748879125e-06, + "loss": 0.2674, + "step": 22381 + }, + { + "epoch": 1.0484845645758185, + "grad_norm": 0.6058403632219951, + "learning_rate": 2.425194183434967e-06, + "loss": 0.2852, + "step": 22382 + }, + { + "epoch": 1.048531409565747, + "grad_norm": 0.6092419921057363, + "learning_rate": 2.425004618421297e-06, + "loss": 0.2791, + "step": 22383 + }, + { + "epoch": 1.0485782545556752, + "grad_norm": 0.5737737570235756, + "learning_rate": 2.424815053839208e-06, + "loss": 0.2811, + "step": 22384 + }, + { + "epoch": 1.0486250995456037, + "grad_norm": 0.5841411244019412, + "learning_rate": 2.424625489689788e-06, + "loss": 0.2824, + "step": 22385 + }, + { + "epoch": 1.048671944535532, + "grad_norm": 0.6097988785608471, + "learning_rate": 2.42443592597413e-06, + "loss": 0.2915, + "step": 22386 + }, + { + "epoch": 1.0487187895254602, + "grad_norm": 0.6285156404450432, + "learning_rate": 2.424246362693324e-06, + "loss": 0.2825, + "step": 22387 + }, + { + "epoch": 1.0487656345153886, + "grad_norm": 0.6505442977456961, + "learning_rate": 2.4240567998484614e-06, + "loss": 0.3009, + "step": 22388 + }, + { + "epoch": 1.0488124795053169, + "grad_norm": 0.6680001039725205, + "learning_rate": 2.4238672374406323e-06, + "loss": 0.3083, + "step": 22389 + }, + { + "epoch": 1.0488593244952453, + "grad_norm": 0.5888652884613195, + "learning_rate": 2.423677675470929e-06, + "loss": 0.2755, + "step": 22390 + }, + { + "epoch": 1.0489061694851736, + "grad_norm": 0.5502837023254781, + "learning_rate": 2.423488113940441e-06, + "loss": 0.2725, + "step": 22391 + }, + { + "epoch": 1.0489530144751018, + "grad_norm": 0.5797249613241826, + "learning_rate": 2.4232985528502597e-06, + "loss": 0.2812, + "step": 22392 + }, + { + "epoch": 1.0489998594650303, + "grad_norm": 0.5830509074187578, + "learning_rate": 2.4231089922014754e-06, + "loss": 0.2816, + "step": 22393 + }, + { + "epoch": 1.0490467044549585, + "grad_norm": 0.6229833765753229, + "learning_rate": 2.42291943199518e-06, + "loss": 0.3043, + "step": 22394 + }, + { + "epoch": 1.049093549444887, + "grad_norm": 0.5725637732793672, + "learning_rate": 2.422729872232464e-06, + "loss": 0.2814, + "step": 22395 + }, + { + "epoch": 1.0491403944348152, + "grad_norm": 0.6049044478866875, + "learning_rate": 2.422540312914419e-06, + "loss": 0.2805, + "step": 22396 + }, + { + "epoch": 1.0491872394247435, + "grad_norm": 0.5872969904623365, + "learning_rate": 2.4223507540421337e-06, + "loss": 0.2796, + "step": 22397 + }, + { + "epoch": 1.049234084414672, + "grad_norm": 0.6017550055759734, + "learning_rate": 2.4221611956167004e-06, + "loss": 0.2785, + "step": 22398 + }, + { + "epoch": 1.0492809294046002, + "grad_norm": 0.590211595227411, + "learning_rate": 2.4219716376392102e-06, + "loss": 0.2803, + "step": 22399 + }, + { + "epoch": 1.0493277743945284, + "grad_norm": 0.5700924036949891, + "learning_rate": 2.4217820801107534e-06, + "loss": 0.2604, + "step": 22400 + }, + { + "epoch": 1.0493746193844569, + "grad_norm": 0.6220223874805073, + "learning_rate": 2.421592523032421e-06, + "loss": 0.2933, + "step": 22401 + }, + { + "epoch": 1.049421464374385, + "grad_norm": 0.6147365928697315, + "learning_rate": 2.421402966405305e-06, + "loss": 0.2904, + "step": 22402 + }, + { + "epoch": 1.0494683093643136, + "grad_norm": 0.6390326910997207, + "learning_rate": 2.421213410230494e-06, + "loss": 0.2834, + "step": 22403 + }, + { + "epoch": 1.0495151543542418, + "grad_norm": 0.5722750687870128, + "learning_rate": 2.4210238545090805e-06, + "loss": 0.2788, + "step": 22404 + }, + { + "epoch": 1.04956199934417, + "grad_norm": 0.6455221090362325, + "learning_rate": 2.420834299242154e-06, + "loss": 0.2947, + "step": 22405 + }, + { + "epoch": 1.0496088443340985, + "grad_norm": 0.5907898021639162, + "learning_rate": 2.4206447444308076e-06, + "loss": 0.2902, + "step": 22406 + }, + { + "epoch": 1.0496556893240268, + "grad_norm": 0.5494919732395576, + "learning_rate": 2.42045519007613e-06, + "loss": 0.2758, + "step": 22407 + }, + { + "epoch": 1.0497025343139552, + "grad_norm": 0.5783268981279392, + "learning_rate": 2.4202656361792133e-06, + "loss": 0.2796, + "step": 22408 + }, + { + "epoch": 1.0497493793038835, + "grad_norm": 0.5647718718680316, + "learning_rate": 2.4200760827411472e-06, + "loss": 0.2711, + "step": 22409 + }, + { + "epoch": 1.0497962242938117, + "grad_norm": 0.5710835977391034, + "learning_rate": 2.4198865297630234e-06, + "loss": 0.2652, + "step": 22410 + }, + { + "epoch": 1.0498430692837402, + "grad_norm": 0.5413306135996754, + "learning_rate": 2.419696977245932e-06, + "loss": 0.2836, + "step": 22411 + }, + { + "epoch": 1.0498899142736684, + "grad_norm": 0.633438720359569, + "learning_rate": 2.419507425190965e-06, + "loss": 0.2869, + "step": 22412 + }, + { + "epoch": 1.0499367592635969, + "grad_norm": 0.616499588956445, + "learning_rate": 2.4193178735992125e-06, + "loss": 0.2935, + "step": 22413 + }, + { + "epoch": 1.049983604253525, + "grad_norm": 0.6176032794120843, + "learning_rate": 2.419128322471766e-06, + "loss": 0.2677, + "step": 22414 + }, + { + "epoch": 1.0500304492434533, + "grad_norm": 0.6478799321319675, + "learning_rate": 2.418938771809714e-06, + "loss": 0.2854, + "step": 22415 + }, + { + "epoch": 1.0500772942333818, + "grad_norm": 0.5808200145302378, + "learning_rate": 2.4187492216141497e-06, + "loss": 0.2733, + "step": 22416 + }, + { + "epoch": 1.05012413922331, + "grad_norm": 0.5849167941818554, + "learning_rate": 2.4185596718861633e-06, + "loss": 0.2695, + "step": 22417 + }, + { + "epoch": 1.0501709842132383, + "grad_norm": 0.5999476474275202, + "learning_rate": 2.4183701226268457e-06, + "loss": 0.2945, + "step": 22418 + }, + { + "epoch": 1.0502178292031668, + "grad_norm": 0.5928709667871925, + "learning_rate": 2.4181805738372867e-06, + "loss": 0.284, + "step": 22419 + }, + { + "epoch": 1.050264674193095, + "grad_norm": 0.5821558375169029, + "learning_rate": 2.4179910255185797e-06, + "loss": 0.2781, + "step": 22420 + }, + { + "epoch": 1.0503115191830235, + "grad_norm": 0.5624894115701943, + "learning_rate": 2.4178014776718125e-06, + "loss": 0.2552, + "step": 22421 + }, + { + "epoch": 1.0503583641729517, + "grad_norm": 0.6067638291586279, + "learning_rate": 2.417611930298077e-06, + "loss": 0.2726, + "step": 22422 + }, + { + "epoch": 1.05040520916288, + "grad_norm": 0.5734819139492239, + "learning_rate": 2.417422383398464e-06, + "loss": 0.275, + "step": 22423 + }, + { + "epoch": 1.0504520541528084, + "grad_norm": 0.5775572417813203, + "learning_rate": 2.417232836974065e-06, + "loss": 0.2719, + "step": 22424 + }, + { + "epoch": 1.0504988991427366, + "grad_norm": 0.5834305277384075, + "learning_rate": 2.41704329102597e-06, + "loss": 0.2637, + "step": 22425 + }, + { + "epoch": 1.050545744132665, + "grad_norm": 0.5834143481039294, + "learning_rate": 2.416853745555271e-06, + "loss": 0.2911, + "step": 22426 + }, + { + "epoch": 1.0505925891225933, + "grad_norm": 0.5758057739671573, + "learning_rate": 2.4166642005630565e-06, + "loss": 0.2594, + "step": 22427 + }, + { + "epoch": 1.0506394341125216, + "grad_norm": 0.6044039361013904, + "learning_rate": 2.416474656050419e-06, + "loss": 0.2863, + "step": 22428 + }, + { + "epoch": 1.05068627910245, + "grad_norm": 0.5991994685012637, + "learning_rate": 2.4162851120184486e-06, + "loss": 0.2769, + "step": 22429 + }, + { + "epoch": 1.0507331240923783, + "grad_norm": 0.5920996938315383, + "learning_rate": 2.416095568468237e-06, + "loss": 0.2806, + "step": 22430 + }, + { + "epoch": 1.0507799690823068, + "grad_norm": 0.6327682108311621, + "learning_rate": 2.4159060254008733e-06, + "loss": 0.2953, + "step": 22431 + }, + { + "epoch": 1.050826814072235, + "grad_norm": 0.6244938662968375, + "learning_rate": 2.4157164828174493e-06, + "loss": 0.2746, + "step": 22432 + }, + { + "epoch": 1.0508736590621632, + "grad_norm": 0.6416428580384489, + "learning_rate": 2.415526940719057e-06, + "loss": 0.2913, + "step": 22433 + }, + { + "epoch": 1.0509205040520917, + "grad_norm": 0.5735009557629057, + "learning_rate": 2.4153373991067857e-06, + "loss": 0.29, + "step": 22434 + }, + { + "epoch": 1.05096734904202, + "grad_norm": 0.592345855827647, + "learning_rate": 2.415147857981726e-06, + "loss": 0.2794, + "step": 22435 + }, + { + "epoch": 1.0510141940319482, + "grad_norm": 0.5968986622722254, + "learning_rate": 2.414958317344968e-06, + "loss": 0.2809, + "step": 22436 + }, + { + "epoch": 1.0510610390218766, + "grad_norm": 0.5974851406607755, + "learning_rate": 2.414768777197605e-06, + "loss": 0.2953, + "step": 22437 + }, + { + "epoch": 1.0511078840118049, + "grad_norm": 0.5714831210468239, + "learning_rate": 2.4145792375407256e-06, + "loss": 0.2724, + "step": 22438 + }, + { + "epoch": 1.0511547290017333, + "grad_norm": 0.5841877066378723, + "learning_rate": 2.414389698375422e-06, + "loss": 0.2786, + "step": 22439 + }, + { + "epoch": 1.0512015739916616, + "grad_norm": 0.5858105170261552, + "learning_rate": 2.414200159702783e-06, + "loss": 0.2917, + "step": 22440 + }, + { + "epoch": 1.0512484189815898, + "grad_norm": 0.5912473845225705, + "learning_rate": 2.4140106215239013e-06, + "loss": 0.2926, + "step": 22441 + }, + { + "epoch": 1.0512952639715183, + "grad_norm": 0.5938759732381067, + "learning_rate": 2.4138210838398666e-06, + "loss": 0.2781, + "step": 22442 + }, + { + "epoch": 1.0513421089614465, + "grad_norm": 0.625404375146986, + "learning_rate": 2.4136315466517698e-06, + "loss": 0.2895, + "step": 22443 + }, + { + "epoch": 1.051388953951375, + "grad_norm": 0.5743242510195513, + "learning_rate": 2.413442009960702e-06, + "loss": 0.2633, + "step": 22444 + }, + { + "epoch": 1.0514357989413032, + "grad_norm": 0.5674412870358775, + "learning_rate": 2.413252473767754e-06, + "loss": 0.2565, + "step": 22445 + }, + { + "epoch": 1.0514826439312315, + "grad_norm": 0.5769801938885608, + "learning_rate": 2.4130629380740157e-06, + "loss": 0.2781, + "step": 22446 + }, + { + "epoch": 1.05152948892116, + "grad_norm": 0.5898065234881181, + "learning_rate": 2.412873402880578e-06, + "loss": 0.2876, + "step": 22447 + }, + { + "epoch": 1.0515763339110882, + "grad_norm": 0.5825332834307502, + "learning_rate": 2.4126838681885327e-06, + "loss": 0.2828, + "step": 22448 + }, + { + "epoch": 1.0516231789010166, + "grad_norm": 0.5937963049732428, + "learning_rate": 2.412494333998969e-06, + "loss": 0.2868, + "step": 22449 + }, + { + "epoch": 1.0516700238909449, + "grad_norm": 0.5789983448561639, + "learning_rate": 2.4123048003129785e-06, + "loss": 0.2733, + "step": 22450 + }, + { + "epoch": 1.0517168688808731, + "grad_norm": 0.5647010371632157, + "learning_rate": 2.4121152671316532e-06, + "loss": 0.2663, + "step": 22451 + }, + { + "epoch": 1.0517637138708016, + "grad_norm": 0.5782528182827321, + "learning_rate": 2.4119257344560816e-06, + "loss": 0.2772, + "step": 22452 + }, + { + "epoch": 1.0518105588607298, + "grad_norm": 0.5585694291800295, + "learning_rate": 2.4117362022873546e-06, + "loss": 0.26, + "step": 22453 + }, + { + "epoch": 1.051857403850658, + "grad_norm": 0.5727594970342728, + "learning_rate": 2.411546670626564e-06, + "loss": 0.2637, + "step": 22454 + }, + { + "epoch": 1.0519042488405865, + "grad_norm": 0.6327336879158305, + "learning_rate": 2.4113571394748005e-06, + "loss": 0.2806, + "step": 22455 + }, + { + "epoch": 1.0519510938305148, + "grad_norm": 0.6238904093242198, + "learning_rate": 2.4111676088331536e-06, + "loss": 0.2889, + "step": 22456 + }, + { + "epoch": 1.0519979388204432, + "grad_norm": 0.6289536340456643, + "learning_rate": 2.410978078702716e-06, + "loss": 0.2858, + "step": 22457 + }, + { + "epoch": 1.0520447838103715, + "grad_norm": 0.6100772105121874, + "learning_rate": 2.410788549084576e-06, + "loss": 0.2769, + "step": 22458 + }, + { + "epoch": 1.0520916288002997, + "grad_norm": 0.6059371030532443, + "learning_rate": 2.410599019979826e-06, + "loss": 0.2825, + "step": 22459 + }, + { + "epoch": 1.0521384737902282, + "grad_norm": 0.6176465415451697, + "learning_rate": 2.4104094913895556e-06, + "loss": 0.2931, + "step": 22460 + }, + { + "epoch": 1.0521853187801564, + "grad_norm": 0.6328094504980406, + "learning_rate": 2.4102199633148565e-06, + "loss": 0.292, + "step": 22461 + }, + { + "epoch": 1.0522321637700849, + "grad_norm": 0.5637310494246182, + "learning_rate": 2.410030435756819e-06, + "loss": 0.267, + "step": 22462 + }, + { + "epoch": 1.0522790087600131, + "grad_norm": 0.6331618859436968, + "learning_rate": 2.4098409087165343e-06, + "loss": 0.2846, + "step": 22463 + }, + { + "epoch": 1.0523258537499414, + "grad_norm": 0.6179149869321653, + "learning_rate": 2.4096513821950916e-06, + "loss": 0.2888, + "step": 22464 + }, + { + "epoch": 1.0523726987398698, + "grad_norm": 0.6378637838587535, + "learning_rate": 2.4094618561935827e-06, + "loss": 0.3015, + "step": 22465 + }, + { + "epoch": 1.052419543729798, + "grad_norm": 0.5887098092908541, + "learning_rate": 2.409272330713098e-06, + "loss": 0.2693, + "step": 22466 + }, + { + "epoch": 1.0524663887197265, + "grad_norm": 0.5987794716364534, + "learning_rate": 2.409082805754728e-06, + "loss": 0.264, + "step": 22467 + }, + { + "epoch": 1.0525132337096548, + "grad_norm": 0.5978229570117002, + "learning_rate": 2.408893281319564e-06, + "loss": 0.2905, + "step": 22468 + }, + { + "epoch": 1.052560078699583, + "grad_norm": 0.6053376600930388, + "learning_rate": 2.408703757408697e-06, + "loss": 0.2878, + "step": 22469 + }, + { + "epoch": 1.0526069236895115, + "grad_norm": 0.5594687831242506, + "learning_rate": 2.4085142340232164e-06, + "loss": 0.2662, + "step": 22470 + }, + { + "epoch": 1.0526537686794397, + "grad_norm": 0.6112640625580176, + "learning_rate": 2.408324711164213e-06, + "loss": 0.274, + "step": 22471 + }, + { + "epoch": 1.052700613669368, + "grad_norm": 0.6284970251207345, + "learning_rate": 2.4081351888327774e-06, + "loss": 0.2863, + "step": 22472 + }, + { + "epoch": 1.0527474586592964, + "grad_norm": 0.5961581470994527, + "learning_rate": 2.4079456670300015e-06, + "loss": 0.2791, + "step": 22473 + }, + { + "epoch": 1.0527943036492247, + "grad_norm": 0.5954179277661164, + "learning_rate": 2.407756145756975e-06, + "loss": 0.2708, + "step": 22474 + }, + { + "epoch": 1.0528411486391531, + "grad_norm": 0.5758831211466492, + "learning_rate": 2.407566625014789e-06, + "loss": 0.2852, + "step": 22475 + }, + { + "epoch": 1.0528879936290814, + "grad_norm": 0.612902034293673, + "learning_rate": 2.4073771048045335e-06, + "loss": 0.2959, + "step": 22476 + }, + { + "epoch": 1.0529348386190096, + "grad_norm": 0.5962541047911797, + "learning_rate": 2.4071875851272995e-06, + "loss": 0.2757, + "step": 22477 + }, + { + "epoch": 1.052981683608938, + "grad_norm": 0.6216080894614838, + "learning_rate": 2.4069980659841774e-06, + "loss": 0.2901, + "step": 22478 + }, + { + "epoch": 1.0530285285988663, + "grad_norm": 0.5861317059707647, + "learning_rate": 2.4068085473762586e-06, + "loss": 0.282, + "step": 22479 + }, + { + "epoch": 1.0530753735887948, + "grad_norm": 0.5630990433015601, + "learning_rate": 2.4066190293046323e-06, + "loss": 0.2684, + "step": 22480 + }, + { + "epoch": 1.053122218578723, + "grad_norm": 0.5682412186992313, + "learning_rate": 2.4064295117703917e-06, + "loss": 0.2676, + "step": 22481 + }, + { + "epoch": 1.0531690635686513, + "grad_norm": 0.628360963124616, + "learning_rate": 2.406239994774624e-06, + "loss": 0.2759, + "step": 22482 + }, + { + "epoch": 1.0532159085585797, + "grad_norm": 0.6024216814945036, + "learning_rate": 2.4060504783184224e-06, + "loss": 0.2737, + "step": 22483 + }, + { + "epoch": 1.053262753548508, + "grad_norm": 0.5531280195072991, + "learning_rate": 2.405860962402876e-06, + "loss": 0.2727, + "step": 22484 + }, + { + "epoch": 1.0533095985384364, + "grad_norm": 0.5989670786020234, + "learning_rate": 2.405671447029076e-06, + "loss": 0.2807, + "step": 22485 + }, + { + "epoch": 1.0533564435283647, + "grad_norm": 0.6025075353874703, + "learning_rate": 2.4054819321981144e-06, + "loss": 0.2824, + "step": 22486 + }, + { + "epoch": 1.053403288518293, + "grad_norm": 0.5916667550345748, + "learning_rate": 2.4052924179110805e-06, + "loss": 0.2826, + "step": 22487 + }, + { + "epoch": 1.0534501335082214, + "grad_norm": 0.617550880961253, + "learning_rate": 2.405102904169064e-06, + "loss": 0.2816, + "step": 22488 + }, + { + "epoch": 1.0534969784981496, + "grad_norm": 0.6086942736458177, + "learning_rate": 2.4049133909731563e-06, + "loss": 0.27, + "step": 22489 + }, + { + "epoch": 1.0535438234880778, + "grad_norm": 0.6006209576228985, + "learning_rate": 2.404723878324449e-06, + "loss": 0.2862, + "step": 22490 + }, + { + "epoch": 1.0535906684780063, + "grad_norm": 0.5615759170939177, + "learning_rate": 2.4045343662240304e-06, + "loss": 0.2544, + "step": 22491 + }, + { + "epoch": 1.0536375134679345, + "grad_norm": 0.581505793727037, + "learning_rate": 2.4043448546729934e-06, + "loss": 0.2759, + "step": 22492 + }, + { + "epoch": 1.053684358457863, + "grad_norm": 0.5810863369975535, + "learning_rate": 2.4041553436724286e-06, + "loss": 0.2729, + "step": 22493 + }, + { + "epoch": 1.0537312034477913, + "grad_norm": 0.5371558162424038, + "learning_rate": 2.403965833223425e-06, + "loss": 0.2644, + "step": 22494 + }, + { + "epoch": 1.0537780484377195, + "grad_norm": 0.5615794408260659, + "learning_rate": 2.4037763233270732e-06, + "loss": 0.2656, + "step": 22495 + }, + { + "epoch": 1.053824893427648, + "grad_norm": 0.5750755154184418, + "learning_rate": 2.403586813984465e-06, + "loss": 0.2784, + "step": 22496 + }, + { + "epoch": 1.0538717384175762, + "grad_norm": 0.6345307488078564, + "learning_rate": 2.4033973051966904e-06, + "loss": 0.2808, + "step": 22497 + }, + { + "epoch": 1.0539185834075047, + "grad_norm": 0.6073830772690382, + "learning_rate": 2.40320779696484e-06, + "loss": 0.3004, + "step": 22498 + }, + { + "epoch": 1.053965428397433, + "grad_norm": 0.5952219797627155, + "learning_rate": 2.4030182892900053e-06, + "loss": 0.2582, + "step": 22499 + }, + { + "epoch": 1.0540122733873611, + "grad_norm": 0.6000690865970912, + "learning_rate": 2.4028287821732747e-06, + "loss": 0.2719, + "step": 22500 + }, + { + "epoch": 1.0540591183772896, + "grad_norm": 0.585508041100718, + "learning_rate": 2.402639275615741e-06, + "loss": 0.2859, + "step": 22501 + }, + { + "epoch": 1.0541059633672178, + "grad_norm": 0.6274897700908173, + "learning_rate": 2.402449769618493e-06, + "loss": 0.2737, + "step": 22502 + }, + { + "epoch": 1.0541528083571463, + "grad_norm": 0.5588939366712038, + "learning_rate": 2.402260264182622e-06, + "loss": 0.2737, + "step": 22503 + }, + { + "epoch": 1.0541996533470746, + "grad_norm": 0.5467473506949558, + "learning_rate": 2.4020707593092193e-06, + "loss": 0.2679, + "step": 22504 + }, + { + "epoch": 1.0542464983370028, + "grad_norm": 0.612452229556374, + "learning_rate": 2.401881254999374e-06, + "loss": 0.2835, + "step": 22505 + }, + { + "epoch": 1.0542933433269313, + "grad_norm": 0.5997475740468097, + "learning_rate": 2.4016917512541786e-06, + "loss": 0.2821, + "step": 22506 + }, + { + "epoch": 1.0543401883168595, + "grad_norm": 0.5672656290020637, + "learning_rate": 2.4015022480747218e-06, + "loss": 0.2633, + "step": 22507 + }, + { + "epoch": 1.0543870333067877, + "grad_norm": 0.550265590955623, + "learning_rate": 2.4013127454620948e-06, + "loss": 0.2702, + "step": 22508 + }, + { + "epoch": 1.0544338782967162, + "grad_norm": 0.6031409837615344, + "learning_rate": 2.4011232434173877e-06, + "loss": 0.2846, + "step": 22509 + }, + { + "epoch": 1.0544807232866444, + "grad_norm": 0.5581807400212142, + "learning_rate": 2.4009337419416915e-06, + "loss": 0.2613, + "step": 22510 + }, + { + "epoch": 1.054527568276573, + "grad_norm": 0.579450941716153, + "learning_rate": 2.400744241036097e-06, + "loss": 0.2683, + "step": 22511 + }, + { + "epoch": 1.0545744132665011, + "grad_norm": 0.6233650616085781, + "learning_rate": 2.4005547407016957e-06, + "loss": 0.2778, + "step": 22512 + }, + { + "epoch": 1.0546212582564294, + "grad_norm": 0.6412218836551908, + "learning_rate": 2.400365240939575e-06, + "loss": 0.2615, + "step": 22513 + }, + { + "epoch": 1.0546681032463578, + "grad_norm": 0.6015231224566322, + "learning_rate": 2.400175741750828e-06, + "loss": 0.2914, + "step": 22514 + }, + { + "epoch": 1.054714948236286, + "grad_norm": 0.639246619526234, + "learning_rate": 2.3999862431365447e-06, + "loss": 0.2756, + "step": 22515 + }, + { + "epoch": 1.0547617932262146, + "grad_norm": 0.5464107764663697, + "learning_rate": 2.3997967450978148e-06, + "loss": 0.2653, + "step": 22516 + }, + { + "epoch": 1.0548086382161428, + "grad_norm": 0.6419651936609614, + "learning_rate": 2.3996072476357304e-06, + "loss": 0.2973, + "step": 22517 + }, + { + "epoch": 1.054855483206071, + "grad_norm": 0.6265309081077433, + "learning_rate": 2.399417750751381e-06, + "loss": 0.2899, + "step": 22518 + }, + { + "epoch": 1.0549023281959995, + "grad_norm": 0.5905619259446546, + "learning_rate": 2.3992282544458566e-06, + "loss": 0.2887, + "step": 22519 + }, + { + "epoch": 1.0549491731859277, + "grad_norm": 0.582509537033304, + "learning_rate": 2.3990387587202482e-06, + "loss": 0.2658, + "step": 22520 + }, + { + "epoch": 1.0549960181758562, + "grad_norm": 0.5493382160128014, + "learning_rate": 2.3988492635756464e-06, + "loss": 0.2747, + "step": 22521 + }, + { + "epoch": 1.0550428631657844, + "grad_norm": 0.6017795991178869, + "learning_rate": 2.398659769013142e-06, + "loss": 0.2644, + "step": 22522 + }, + { + "epoch": 1.0550897081557127, + "grad_norm": 0.5580294041103155, + "learning_rate": 2.3984702750338244e-06, + "loss": 0.2746, + "step": 22523 + }, + { + "epoch": 1.0551365531456411, + "grad_norm": 0.5949630315269453, + "learning_rate": 2.398280781638786e-06, + "loss": 0.275, + "step": 22524 + }, + { + "epoch": 1.0551833981355694, + "grad_norm": 0.6559996870580218, + "learning_rate": 2.3980912888291154e-06, + "loss": 0.2765, + "step": 22525 + }, + { + "epoch": 1.0552302431254976, + "grad_norm": 0.6650326397393621, + "learning_rate": 2.3979017966059044e-06, + "loss": 0.2881, + "step": 22526 + }, + { + "epoch": 1.055277088115426, + "grad_norm": 0.5828718146698371, + "learning_rate": 2.397712304970242e-06, + "loss": 0.2612, + "step": 22527 + }, + { + "epoch": 1.0553239331053543, + "grad_norm": 0.5470673746073965, + "learning_rate": 2.3975228139232206e-06, + "loss": 0.2688, + "step": 22528 + }, + { + "epoch": 1.0553707780952828, + "grad_norm": 0.6017202407796315, + "learning_rate": 2.397333323465929e-06, + "loss": 0.2909, + "step": 22529 + }, + { + "epoch": 1.055417623085211, + "grad_norm": 0.5308675803705346, + "learning_rate": 2.397143833599459e-06, + "loss": 0.2583, + "step": 22530 + }, + { + "epoch": 1.0554644680751393, + "grad_norm": 0.5711535776174408, + "learning_rate": 2.3969543443248995e-06, + "loss": 0.2755, + "step": 22531 + }, + { + "epoch": 1.0555113130650677, + "grad_norm": 0.5638378210819333, + "learning_rate": 2.3967648556433424e-06, + "loss": 0.2596, + "step": 22532 + }, + { + "epoch": 1.055558158054996, + "grad_norm": 0.5963095231791488, + "learning_rate": 2.396575367555877e-06, + "loss": 0.2739, + "step": 22533 + }, + { + "epoch": 1.0556050030449244, + "grad_norm": 0.558783047975154, + "learning_rate": 2.3963858800635945e-06, + "loss": 0.254, + "step": 22534 + }, + { + "epoch": 1.0556518480348527, + "grad_norm": 0.5435442076082592, + "learning_rate": 2.3961963931675854e-06, + "loss": 0.2608, + "step": 22535 + }, + { + "epoch": 1.055698693024781, + "grad_norm": 0.6138375194466404, + "learning_rate": 2.396006906868941e-06, + "loss": 0.2827, + "step": 22536 + }, + { + "epoch": 1.0557455380147094, + "grad_norm": 0.5841050841076524, + "learning_rate": 2.3958174211687493e-06, + "loss": 0.2721, + "step": 22537 + }, + { + "epoch": 1.0557923830046376, + "grad_norm": 0.6233635315583187, + "learning_rate": 2.3956279360681024e-06, + "loss": 0.2899, + "step": 22538 + }, + { + "epoch": 1.055839227994566, + "grad_norm": 0.5832756869274236, + "learning_rate": 2.395438451568091e-06, + "loss": 0.2858, + "step": 22539 + }, + { + "epoch": 1.0558860729844943, + "grad_norm": 0.6212753213600452, + "learning_rate": 2.3952489676698042e-06, + "loss": 0.2937, + "step": 22540 + }, + { + "epoch": 1.0559329179744226, + "grad_norm": 0.6213947186062252, + "learning_rate": 2.3950594843743337e-06, + "loss": 0.2799, + "step": 22541 + }, + { + "epoch": 1.055979762964351, + "grad_norm": 0.5821969755300802, + "learning_rate": 2.39487000168277e-06, + "loss": 0.2702, + "step": 22542 + }, + { + "epoch": 1.0560266079542793, + "grad_norm": 0.5697908148088856, + "learning_rate": 2.394680519596203e-06, + "loss": 0.2819, + "step": 22543 + }, + { + "epoch": 1.0560734529442075, + "grad_norm": 0.6024876390653745, + "learning_rate": 2.3944910381157225e-06, + "loss": 0.2825, + "step": 22544 + }, + { + "epoch": 1.056120297934136, + "grad_norm": 0.6129940541189965, + "learning_rate": 2.3943015572424193e-06, + "loss": 0.2597, + "step": 22545 + }, + { + "epoch": 1.0561671429240642, + "grad_norm": 0.5496428250736992, + "learning_rate": 2.394112076977385e-06, + "loss": 0.2744, + "step": 22546 + }, + { + "epoch": 1.0562139879139927, + "grad_norm": 0.5862910181037596, + "learning_rate": 2.393922597321708e-06, + "loss": 0.2855, + "step": 22547 + }, + { + "epoch": 1.056260832903921, + "grad_norm": 0.6131290324811555, + "learning_rate": 2.3937331182764814e-06, + "loss": 0.2899, + "step": 22548 + }, + { + "epoch": 1.0563076778938492, + "grad_norm": 0.6047486100001115, + "learning_rate": 2.3935436398427927e-06, + "loss": 0.2725, + "step": 22549 + }, + { + "epoch": 1.0563545228837776, + "grad_norm": 0.6500759053971981, + "learning_rate": 2.3933541620217342e-06, + "loss": 0.2832, + "step": 22550 + }, + { + "epoch": 1.0564013678737059, + "grad_norm": 0.5684185436610253, + "learning_rate": 2.393164684814395e-06, + "loss": 0.2592, + "step": 22551 + }, + { + "epoch": 1.0564482128636343, + "grad_norm": 0.5900732513830866, + "learning_rate": 2.3929752082218664e-06, + "loss": 0.2865, + "step": 22552 + }, + { + "epoch": 1.0564950578535626, + "grad_norm": 0.5737087083287603, + "learning_rate": 2.392785732245239e-06, + "loss": 0.2689, + "step": 22553 + }, + { + "epoch": 1.0565419028434908, + "grad_norm": 0.599229427437449, + "learning_rate": 2.392596256885603e-06, + "loss": 0.2929, + "step": 22554 + }, + { + "epoch": 1.0565887478334193, + "grad_norm": 0.6356412003775429, + "learning_rate": 2.392406782144048e-06, + "loss": 0.2774, + "step": 22555 + }, + { + "epoch": 1.0566355928233475, + "grad_norm": 0.592821935452919, + "learning_rate": 2.3922173080216644e-06, + "loss": 0.2758, + "step": 22556 + }, + { + "epoch": 1.056682437813276, + "grad_norm": 0.5787231315038677, + "learning_rate": 2.3920278345195443e-06, + "loss": 0.2749, + "step": 22557 + }, + { + "epoch": 1.0567292828032042, + "grad_norm": 0.5982081460279102, + "learning_rate": 2.3918383616387758e-06, + "loss": 0.2845, + "step": 22558 + }, + { + "epoch": 1.0567761277931325, + "grad_norm": 0.5719693378916052, + "learning_rate": 2.3916488893804507e-06, + "loss": 0.2856, + "step": 22559 + }, + { + "epoch": 1.056822972783061, + "grad_norm": 0.6053281326426865, + "learning_rate": 2.39145941774566e-06, + "loss": 0.302, + "step": 22560 + }, + { + "epoch": 1.0568698177729892, + "grad_norm": 0.6249414050171123, + "learning_rate": 2.391269946735492e-06, + "loss": 0.2824, + "step": 22561 + }, + { + "epoch": 1.0569166627629174, + "grad_norm": 0.6110884493305667, + "learning_rate": 2.3910804763510386e-06, + "loss": 0.2807, + "step": 22562 + }, + { + "epoch": 1.0569635077528459, + "grad_norm": 0.5979753182971894, + "learning_rate": 2.390891006593389e-06, + "loss": 0.2828, + "step": 22563 + }, + { + "epoch": 1.057010352742774, + "grad_norm": 0.6601112404520804, + "learning_rate": 2.3907015374636347e-06, + "loss": 0.2727, + "step": 22564 + }, + { + "epoch": 1.0570571977327026, + "grad_norm": 0.612036245794568, + "learning_rate": 2.390512068962865e-06, + "loss": 0.2894, + "step": 22565 + }, + { + "epoch": 1.0571040427226308, + "grad_norm": 0.5840971139547835, + "learning_rate": 2.3903226010921724e-06, + "loss": 0.282, + "step": 22566 + }, + { + "epoch": 1.057150887712559, + "grad_norm": 0.5688822841371657, + "learning_rate": 2.390133133852644e-06, + "loss": 0.2622, + "step": 22567 + }, + { + "epoch": 1.0571977327024875, + "grad_norm": 0.5962478543199095, + "learning_rate": 2.389943667245373e-06, + "loss": 0.2762, + "step": 22568 + }, + { + "epoch": 1.0572445776924158, + "grad_norm": 0.6023086378350919, + "learning_rate": 2.3897542012714476e-06, + "loss": 0.2854, + "step": 22569 + }, + { + "epoch": 1.0572914226823442, + "grad_norm": 0.6461596679198882, + "learning_rate": 2.3895647359319595e-06, + "loss": 0.2956, + "step": 22570 + }, + { + "epoch": 1.0573382676722725, + "grad_norm": 0.5842384705710223, + "learning_rate": 2.389375271227998e-06, + "loss": 0.2711, + "step": 22571 + }, + { + "epoch": 1.0573851126622007, + "grad_norm": 0.5475112334768388, + "learning_rate": 2.3891858071606544e-06, + "loss": 0.2636, + "step": 22572 + }, + { + "epoch": 1.0574319576521292, + "grad_norm": 0.5597213207837362, + "learning_rate": 2.3889963437310194e-06, + "loss": 0.2722, + "step": 22573 + }, + { + "epoch": 1.0574788026420574, + "grad_norm": 0.5849217551938408, + "learning_rate": 2.3888068809401823e-06, + "loss": 0.2529, + "step": 22574 + }, + { + "epoch": 1.0575256476319859, + "grad_norm": 0.5492786859033778, + "learning_rate": 2.388617418789233e-06, + "loss": 0.2645, + "step": 22575 + }, + { + "epoch": 1.057572492621914, + "grad_norm": 0.5517185313699003, + "learning_rate": 2.3884279572792625e-06, + "loss": 0.2627, + "step": 22576 + }, + { + "epoch": 1.0576193376118423, + "grad_norm": 0.5895503100861703, + "learning_rate": 2.3882384964113613e-06, + "loss": 0.2688, + "step": 22577 + }, + { + "epoch": 1.0576661826017708, + "grad_norm": 0.5550580246721455, + "learning_rate": 2.388049036186619e-06, + "loss": 0.2781, + "step": 22578 + }, + { + "epoch": 1.057713027591699, + "grad_norm": 0.6143752419179888, + "learning_rate": 2.387859576606128e-06, + "loss": 0.2772, + "step": 22579 + }, + { + "epoch": 1.0577598725816273, + "grad_norm": 0.605815879578864, + "learning_rate": 2.387670117670975e-06, + "loss": 0.2793, + "step": 22580 + }, + { + "epoch": 1.0578067175715558, + "grad_norm": 0.6157427992524696, + "learning_rate": 2.3874806593822535e-06, + "loss": 0.2739, + "step": 22581 + }, + { + "epoch": 1.057853562561484, + "grad_norm": 0.6106991679417312, + "learning_rate": 2.3872912017410515e-06, + "loss": 0.2943, + "step": 22582 + }, + { + "epoch": 1.0579004075514125, + "grad_norm": 0.6065419798429337, + "learning_rate": 2.3871017447484603e-06, + "loss": 0.2903, + "step": 22583 + }, + { + "epoch": 1.0579472525413407, + "grad_norm": 0.5807329206584682, + "learning_rate": 2.3869122884055713e-06, + "loss": 0.2776, + "step": 22584 + }, + { + "epoch": 1.057994097531269, + "grad_norm": 0.5726358353403496, + "learning_rate": 2.386722832713474e-06, + "loss": 0.3044, + "step": 22585 + }, + { + "epoch": 1.0580409425211974, + "grad_norm": 0.5686709894204502, + "learning_rate": 2.3865333776732567e-06, + "loss": 0.2685, + "step": 22586 + }, + { + "epoch": 1.0580877875111256, + "grad_norm": 0.5728938435923216, + "learning_rate": 2.386343923286012e-06, + "loss": 0.2728, + "step": 22587 + }, + { + "epoch": 1.058134632501054, + "grad_norm": 0.593904897798539, + "learning_rate": 2.386154469552829e-06, + "loss": 0.2707, + "step": 22588 + }, + { + "epoch": 1.0581814774909823, + "grad_norm": 0.5730602985604731, + "learning_rate": 2.3859650164747987e-06, + "loss": 0.2761, + "step": 22589 + }, + { + "epoch": 1.0582283224809106, + "grad_norm": 0.6326260354903992, + "learning_rate": 2.385775564053011e-06, + "loss": 0.2899, + "step": 22590 + }, + { + "epoch": 1.058275167470839, + "grad_norm": 0.5977226932894097, + "learning_rate": 2.3855861122885575e-06, + "loss": 0.2712, + "step": 22591 + }, + { + "epoch": 1.0583220124607673, + "grad_norm": 0.6066274431962583, + "learning_rate": 2.385396661182526e-06, + "loss": 0.2842, + "step": 22592 + }, + { + "epoch": 1.0583688574506958, + "grad_norm": 0.5877556684085709, + "learning_rate": 2.385207210736008e-06, + "loss": 0.2791, + "step": 22593 + }, + { + "epoch": 1.058415702440624, + "grad_norm": 0.5897181656225553, + "learning_rate": 2.385017760950093e-06, + "loss": 0.2835, + "step": 22594 + }, + { + "epoch": 1.0584625474305522, + "grad_norm": 0.6139387392288362, + "learning_rate": 2.3848283118258727e-06, + "loss": 0.2739, + "step": 22595 + }, + { + "epoch": 1.0585093924204807, + "grad_norm": 0.604370322935605, + "learning_rate": 2.3846388633644364e-06, + "loss": 0.29, + "step": 22596 + }, + { + "epoch": 1.058556237410409, + "grad_norm": 0.623595749856401, + "learning_rate": 2.384449415566875e-06, + "loss": 0.2895, + "step": 22597 + }, + { + "epoch": 1.0586030824003372, + "grad_norm": 0.5864225142728349, + "learning_rate": 2.384259968434277e-06, + "loss": 0.2596, + "step": 22598 + }, + { + "epoch": 1.0586499273902656, + "grad_norm": 0.5604169509305086, + "learning_rate": 2.384070521967735e-06, + "loss": 0.287, + "step": 22599 + }, + { + "epoch": 1.0586967723801939, + "grad_norm": 0.6177997316625056, + "learning_rate": 2.383881076168337e-06, + "loss": 0.2818, + "step": 22600 + }, + { + "epoch": 1.0587436173701223, + "grad_norm": 0.5652615323826006, + "learning_rate": 2.383691631037174e-06, + "loss": 0.2651, + "step": 22601 + }, + { + "epoch": 1.0587904623600506, + "grad_norm": 0.627727236787238, + "learning_rate": 2.3835021865753376e-06, + "loss": 0.2762, + "step": 22602 + }, + { + "epoch": 1.0588373073499788, + "grad_norm": 0.5725812494830761, + "learning_rate": 2.3833127427839167e-06, + "loss": 0.2635, + "step": 22603 + }, + { + "epoch": 1.0588841523399073, + "grad_norm": 0.6187229421513782, + "learning_rate": 2.383123299664001e-06, + "loss": 0.2968, + "step": 22604 + }, + { + "epoch": 1.0589309973298355, + "grad_norm": 0.5797421208433136, + "learning_rate": 2.382933857216681e-06, + "loss": 0.2689, + "step": 22605 + }, + { + "epoch": 1.058977842319764, + "grad_norm": 0.5810707124938606, + "learning_rate": 2.382744415443048e-06, + "loss": 0.2776, + "step": 22606 + }, + { + "epoch": 1.0590246873096922, + "grad_norm": 0.5343951646400011, + "learning_rate": 2.382554974344191e-06, + "loss": 0.2627, + "step": 22607 + }, + { + "epoch": 1.0590715322996205, + "grad_norm": 0.5722358026139001, + "learning_rate": 2.3823655339212005e-06, + "loss": 0.2801, + "step": 22608 + }, + { + "epoch": 1.059118377289549, + "grad_norm": 0.5845507626960996, + "learning_rate": 2.382176094175168e-06, + "loss": 0.2798, + "step": 22609 + }, + { + "epoch": 1.0591652222794772, + "grad_norm": 0.6165631846639787, + "learning_rate": 2.3819866551071816e-06, + "loss": 0.2843, + "step": 22610 + }, + { + "epoch": 1.0592120672694056, + "grad_norm": 0.5542810986468429, + "learning_rate": 2.381797216718332e-06, + "loss": 0.271, + "step": 22611 + }, + { + "epoch": 1.0592589122593339, + "grad_norm": 0.5388370888466724, + "learning_rate": 2.3816077790097097e-06, + "loss": 0.2709, + "step": 22612 + }, + { + "epoch": 1.0593057572492621, + "grad_norm": 0.5865653192253261, + "learning_rate": 2.381418341982405e-06, + "loss": 0.2755, + "step": 22613 + }, + { + "epoch": 1.0593526022391906, + "grad_norm": 0.644560319381035, + "learning_rate": 2.3812289056375082e-06, + "loss": 0.2879, + "step": 22614 + }, + { + "epoch": 1.0593994472291188, + "grad_norm": 0.5900938247320741, + "learning_rate": 2.38103946997611e-06, + "loss": 0.2634, + "step": 22615 + }, + { + "epoch": 1.059446292219047, + "grad_norm": 0.5586857756850062, + "learning_rate": 2.3808500349992983e-06, + "loss": 0.2778, + "step": 22616 + }, + { + "epoch": 1.0594931372089755, + "grad_norm": 0.5847501505311113, + "learning_rate": 2.3806606007081654e-06, + "loss": 0.2805, + "step": 22617 + }, + { + "epoch": 1.0595399821989038, + "grad_norm": 0.6137162841660475, + "learning_rate": 2.3804711671038e-06, + "loss": 0.2752, + "step": 22618 + }, + { + "epoch": 1.0595868271888322, + "grad_norm": 0.6472402511489529, + "learning_rate": 2.380281734187294e-06, + "loss": 0.3018, + "step": 22619 + }, + { + "epoch": 1.0596336721787605, + "grad_norm": 0.5637752319418293, + "learning_rate": 2.380092301959736e-06, + "loss": 0.2769, + "step": 22620 + }, + { + "epoch": 1.0596805171686887, + "grad_norm": 0.5663339702260901, + "learning_rate": 2.3799028704222174e-06, + "loss": 0.271, + "step": 22621 + }, + { + "epoch": 1.0597273621586172, + "grad_norm": 0.6314634071863878, + "learning_rate": 2.3797134395758266e-06, + "loss": 0.2892, + "step": 22622 + }, + { + "epoch": 1.0597742071485454, + "grad_norm": 0.563682346881688, + "learning_rate": 2.3795240094216555e-06, + "loss": 0.281, + "step": 22623 + }, + { + "epoch": 1.0598210521384739, + "grad_norm": 0.558168024355747, + "learning_rate": 2.379334579960793e-06, + "loss": 0.2732, + "step": 22624 + }, + { + "epoch": 1.0598678971284021, + "grad_norm": 0.566954520133046, + "learning_rate": 2.379145151194329e-06, + "loss": 0.2547, + "step": 22625 + }, + { + "epoch": 1.0599147421183304, + "grad_norm": 0.5539044116089513, + "learning_rate": 2.3789557231233555e-06, + "loss": 0.2572, + "step": 22626 + }, + { + "epoch": 1.0599615871082588, + "grad_norm": 0.5871935590758137, + "learning_rate": 2.3787662957489616e-06, + "loss": 0.269, + "step": 22627 + }, + { + "epoch": 1.060008432098187, + "grad_norm": 0.5600090733183042, + "learning_rate": 2.378576869072236e-06, + "loss": 0.2673, + "step": 22628 + }, + { + "epoch": 1.0600552770881155, + "grad_norm": 0.6004391736330787, + "learning_rate": 2.3783874430942704e-06, + "loss": 0.2909, + "step": 22629 + }, + { + "epoch": 1.0601021220780438, + "grad_norm": 0.5834523976515191, + "learning_rate": 2.3781980178161546e-06, + "loss": 0.2855, + "step": 22630 + }, + { + "epoch": 1.060148967067972, + "grad_norm": 0.5829570186024671, + "learning_rate": 2.378008593238978e-06, + "loss": 0.2794, + "step": 22631 + }, + { + "epoch": 1.0601958120579005, + "grad_norm": 0.5994558579266878, + "learning_rate": 2.3778191693638315e-06, + "loss": 0.2807, + "step": 22632 + }, + { + "epoch": 1.0602426570478287, + "grad_norm": 0.5552963392518602, + "learning_rate": 2.3776297461918064e-06, + "loss": 0.2571, + "step": 22633 + }, + { + "epoch": 1.060289502037757, + "grad_norm": 0.5583220980726846, + "learning_rate": 2.3774403237239906e-06, + "loss": 0.2758, + "step": 22634 + }, + { + "epoch": 1.0603363470276854, + "grad_norm": 0.5902957405095205, + "learning_rate": 2.377250901961474e-06, + "loss": 0.2697, + "step": 22635 + }, + { + "epoch": 1.0603831920176137, + "grad_norm": 0.5985178159988604, + "learning_rate": 2.377061480905348e-06, + "loss": 0.2704, + "step": 22636 + }, + { + "epoch": 1.0604300370075421, + "grad_norm": 0.5848658373758111, + "learning_rate": 2.3768720605567026e-06, + "loss": 0.2653, + "step": 22637 + }, + { + "epoch": 1.0604768819974704, + "grad_norm": 0.6228963207817674, + "learning_rate": 2.376682640916627e-06, + "loss": 0.281, + "step": 22638 + }, + { + "epoch": 1.0605237269873986, + "grad_norm": 0.5892979950393387, + "learning_rate": 2.3764932219862122e-06, + "loss": 0.2737, + "step": 22639 + }, + { + "epoch": 1.060570571977327, + "grad_norm": 0.5328115985243115, + "learning_rate": 2.3763038037665487e-06, + "loss": 0.2563, + "step": 22640 + }, + { + "epoch": 1.0606174169672553, + "grad_norm": 0.6394529237719697, + "learning_rate": 2.3761143862587252e-06, + "loss": 0.2982, + "step": 22641 + }, + { + "epoch": 1.0606642619571838, + "grad_norm": 0.5918146755353098, + "learning_rate": 2.3759249694638313e-06, + "loss": 0.2629, + "step": 22642 + }, + { + "epoch": 1.060711106947112, + "grad_norm": 0.6039384086587767, + "learning_rate": 2.375735553382959e-06, + "loss": 0.2652, + "step": 22643 + }, + { + "epoch": 1.0607579519370403, + "grad_norm": 0.5626240528472366, + "learning_rate": 2.3755461380171967e-06, + "loss": 0.256, + "step": 22644 + }, + { + "epoch": 1.0608047969269687, + "grad_norm": 0.5720147494558728, + "learning_rate": 2.375356723367635e-06, + "loss": 0.2798, + "step": 22645 + }, + { + "epoch": 1.060851641916897, + "grad_norm": 0.6116887252046355, + "learning_rate": 2.3751673094353657e-06, + "loss": 0.276, + "step": 22646 + }, + { + "epoch": 1.0608984869068254, + "grad_norm": 0.607221500843869, + "learning_rate": 2.374977896221476e-06, + "loss": 0.2891, + "step": 22647 + }, + { + "epoch": 1.0609453318967537, + "grad_norm": 0.5838770184214952, + "learning_rate": 2.374788483727057e-06, + "loss": 0.2774, + "step": 22648 + }, + { + "epoch": 1.060992176886682, + "grad_norm": 0.5900367400711588, + "learning_rate": 2.374599071953198e-06, + "loss": 0.2739, + "step": 22649 + }, + { + "epoch": 1.0610390218766104, + "grad_norm": 0.602007970377346, + "learning_rate": 2.3744096609009906e-06, + "loss": 0.2914, + "step": 22650 + }, + { + "epoch": 1.0610858668665386, + "grad_norm": 0.6024650189842552, + "learning_rate": 2.3742202505715245e-06, + "loss": 0.2849, + "step": 22651 + }, + { + "epoch": 1.0611327118564668, + "grad_norm": 0.5827346767643347, + "learning_rate": 2.3740308409658896e-06, + "loss": 0.2588, + "step": 22652 + }, + { + "epoch": 1.0611795568463953, + "grad_norm": 0.6004485751051188, + "learning_rate": 2.3738414320851742e-06, + "loss": 0.2864, + "step": 22653 + }, + { + "epoch": 1.0612264018363236, + "grad_norm": 0.5816973296051716, + "learning_rate": 2.37365202393047e-06, + "loss": 0.2715, + "step": 22654 + }, + { + "epoch": 1.061273246826252, + "grad_norm": 0.5643691311327356, + "learning_rate": 2.3734626165028675e-06, + "loss": 0.2724, + "step": 22655 + }, + { + "epoch": 1.0613200918161803, + "grad_norm": 0.6218558175870142, + "learning_rate": 2.3732732098034547e-06, + "loss": 0.2926, + "step": 22656 + }, + { + "epoch": 1.0613669368061085, + "grad_norm": 0.6674665153089724, + "learning_rate": 2.3730838038333233e-06, + "loss": 0.2929, + "step": 22657 + }, + { + "epoch": 1.061413781796037, + "grad_norm": 0.5585549481087909, + "learning_rate": 2.372894398593563e-06, + "loss": 0.2775, + "step": 22658 + }, + { + "epoch": 1.0614606267859652, + "grad_norm": 0.6166660792712705, + "learning_rate": 2.3727049940852635e-06, + "loss": 0.2867, + "step": 22659 + }, + { + "epoch": 1.0615074717758937, + "grad_norm": 0.5749584464340356, + "learning_rate": 2.372515590309514e-06, + "loss": 0.2729, + "step": 22660 + }, + { + "epoch": 1.061554316765822, + "grad_norm": 0.6113192092804045, + "learning_rate": 2.3723261872674057e-06, + "loss": 0.3021, + "step": 22661 + }, + { + "epoch": 1.0616011617557501, + "grad_norm": 0.57051133599295, + "learning_rate": 2.372136784960028e-06, + "loss": 0.2634, + "step": 22662 + }, + { + "epoch": 1.0616480067456786, + "grad_norm": 0.6056766012332689, + "learning_rate": 2.3719473833884706e-06, + "loss": 0.2691, + "step": 22663 + }, + { + "epoch": 1.0616948517356068, + "grad_norm": 0.607590490289058, + "learning_rate": 2.371757982553825e-06, + "loss": 0.2854, + "step": 22664 + }, + { + "epoch": 1.0617416967255353, + "grad_norm": 0.5794502143156655, + "learning_rate": 2.371568582457179e-06, + "loss": 0.2779, + "step": 22665 + }, + { + "epoch": 1.0617885417154636, + "grad_norm": 0.5980887345897982, + "learning_rate": 2.371379183099624e-06, + "loss": 0.2957, + "step": 22666 + }, + { + "epoch": 1.0618353867053918, + "grad_norm": 0.6382683034149507, + "learning_rate": 2.3711897844822485e-06, + "loss": 0.3013, + "step": 22667 + }, + { + "epoch": 1.0618822316953203, + "grad_norm": 0.5673418649053437, + "learning_rate": 2.371000386606145e-06, + "loss": 0.2576, + "step": 22668 + }, + { + "epoch": 1.0619290766852485, + "grad_norm": 0.6610845496260098, + "learning_rate": 2.3708109894724006e-06, + "loss": 0.2936, + "step": 22669 + }, + { + "epoch": 1.0619759216751767, + "grad_norm": 0.5934635428079233, + "learning_rate": 2.3706215930821077e-06, + "loss": 0.2808, + "step": 22670 + }, + { + "epoch": 1.0620227666651052, + "grad_norm": 0.5477619021086593, + "learning_rate": 2.3704321974363537e-06, + "loss": 0.2744, + "step": 22671 + }, + { + "epoch": 1.0620696116550334, + "grad_norm": 0.5849482064789505, + "learning_rate": 2.3702428025362305e-06, + "loss": 0.2769, + "step": 22672 + }, + { + "epoch": 1.062116456644962, + "grad_norm": 0.6115276245835032, + "learning_rate": 2.370053408382827e-06, + "loss": 0.277, + "step": 22673 + }, + { + "epoch": 1.0621633016348901, + "grad_norm": 0.5867623242498157, + "learning_rate": 2.369864014977233e-06, + "loss": 0.2962, + "step": 22674 + }, + { + "epoch": 1.0622101466248184, + "grad_norm": 0.6327699155800627, + "learning_rate": 2.36967462232054e-06, + "loss": 0.2986, + "step": 22675 + }, + { + "epoch": 1.0622569916147468, + "grad_norm": 0.6011272432049755, + "learning_rate": 2.3694852304138374e-06, + "loss": 0.2809, + "step": 22676 + }, + { + "epoch": 1.062303836604675, + "grad_norm": 0.5843639715928601, + "learning_rate": 2.3692958392582127e-06, + "loss": 0.2846, + "step": 22677 + }, + { + "epoch": 1.0623506815946036, + "grad_norm": 0.6179053135258914, + "learning_rate": 2.369106448854758e-06, + "loss": 0.2855, + "step": 22678 + }, + { + "epoch": 1.0623975265845318, + "grad_norm": 0.6323642718672997, + "learning_rate": 2.3689170592045636e-06, + "loss": 0.2946, + "step": 22679 + }, + { + "epoch": 1.06244437157446, + "grad_norm": 0.6168453953046839, + "learning_rate": 2.3687276703087173e-06, + "loss": 0.2831, + "step": 22680 + }, + { + "epoch": 1.0624912165643885, + "grad_norm": 0.5902540338008453, + "learning_rate": 2.368538282168311e-06, + "loss": 0.2699, + "step": 22681 + }, + { + "epoch": 1.0625380615543167, + "grad_norm": 0.5565316851976351, + "learning_rate": 2.3683488947844346e-06, + "loss": 0.263, + "step": 22682 + }, + { + "epoch": 1.0625849065442452, + "grad_norm": 0.5537631946565859, + "learning_rate": 2.3681595081581767e-06, + "loss": 0.2643, + "step": 22683 + }, + { + "epoch": 1.0626317515341734, + "grad_norm": 0.5607737441485616, + "learning_rate": 2.3679701222906274e-06, + "loss": 0.2736, + "step": 22684 + }, + { + "epoch": 1.0626785965241017, + "grad_norm": 0.5732922655406225, + "learning_rate": 2.3677807371828764e-06, + "loss": 0.2652, + "step": 22685 + }, + { + "epoch": 1.0627254415140301, + "grad_norm": 0.6330438125057295, + "learning_rate": 2.367591352836015e-06, + "loss": 0.2869, + "step": 22686 + }, + { + "epoch": 1.0627722865039584, + "grad_norm": 0.6099547263766197, + "learning_rate": 2.367401969251131e-06, + "loss": 0.2777, + "step": 22687 + }, + { + "epoch": 1.0628191314938866, + "grad_norm": 0.611691434041569, + "learning_rate": 2.367212586429317e-06, + "loss": 0.2794, + "step": 22688 + }, + { + "epoch": 1.062865976483815, + "grad_norm": 0.5555863656044077, + "learning_rate": 2.36702320437166e-06, + "loss": 0.2722, + "step": 22689 + }, + { + "epoch": 1.0629128214737433, + "grad_norm": 0.6403220555618956, + "learning_rate": 2.3668338230792513e-06, + "loss": 0.2677, + "step": 22690 + }, + { + "epoch": 1.0629596664636718, + "grad_norm": 0.5879002194185136, + "learning_rate": 2.3666444425531803e-06, + "loss": 0.2681, + "step": 22691 + }, + { + "epoch": 1.0630065114536, + "grad_norm": 0.5912446698298762, + "learning_rate": 2.3664550627945368e-06, + "loss": 0.2671, + "step": 22692 + }, + { + "epoch": 1.0630533564435283, + "grad_norm": 0.5817198017220493, + "learning_rate": 2.3662656838044113e-06, + "loss": 0.2684, + "step": 22693 + }, + { + "epoch": 1.0631002014334567, + "grad_norm": 0.6154173925582924, + "learning_rate": 2.366076305583894e-06, + "loss": 0.2801, + "step": 22694 + }, + { + "epoch": 1.063147046423385, + "grad_norm": 0.5804004419859887, + "learning_rate": 2.3658869281340727e-06, + "loss": 0.2665, + "step": 22695 + }, + { + "epoch": 1.0631938914133134, + "grad_norm": 0.5835985636834675, + "learning_rate": 2.3656975514560385e-06, + "loss": 0.2761, + "step": 22696 + }, + { + "epoch": 1.0632407364032417, + "grad_norm": 0.5946677079427304, + "learning_rate": 2.3655081755508816e-06, + "loss": 0.2883, + "step": 22697 + }, + { + "epoch": 1.06328758139317, + "grad_norm": 0.6687766333027861, + "learning_rate": 2.3653188004196905e-06, + "loss": 0.2916, + "step": 22698 + }, + { + "epoch": 1.0633344263830984, + "grad_norm": 0.6599339579218488, + "learning_rate": 2.365129426063556e-06, + "loss": 0.2974, + "step": 22699 + }, + { + "epoch": 1.0633812713730266, + "grad_norm": 0.636362202193036, + "learning_rate": 2.364940052483569e-06, + "loss": 0.2913, + "step": 22700 + }, + { + "epoch": 1.0634281163629549, + "grad_norm": 0.6115308526701035, + "learning_rate": 2.3647506796808177e-06, + "loss": 0.2817, + "step": 22701 + }, + { + "epoch": 1.0634749613528833, + "grad_norm": 0.6142950064811389, + "learning_rate": 2.3645613076563914e-06, + "loss": 0.2805, + "step": 22702 + }, + { + "epoch": 1.0635218063428116, + "grad_norm": 0.6011762597098907, + "learning_rate": 2.3643719364113806e-06, + "loss": 0.2852, + "step": 22703 + }, + { + "epoch": 1.06356865133274, + "grad_norm": 0.5618567301134123, + "learning_rate": 2.364182565946876e-06, + "loss": 0.2719, + "step": 22704 + }, + { + "epoch": 1.0636154963226683, + "grad_norm": 0.5866975811241578, + "learning_rate": 2.363993196263966e-06, + "loss": 0.2702, + "step": 22705 + }, + { + "epoch": 1.0636623413125965, + "grad_norm": 0.5610639990675569, + "learning_rate": 2.363803827363742e-06, + "loss": 0.2628, + "step": 22706 + }, + { + "epoch": 1.063709186302525, + "grad_norm": 0.6043453447313621, + "learning_rate": 2.363614459247292e-06, + "loss": 0.2711, + "step": 22707 + }, + { + "epoch": 1.0637560312924532, + "grad_norm": 0.5936308737738581, + "learning_rate": 2.3634250919157067e-06, + "loss": 0.2787, + "step": 22708 + }, + { + "epoch": 1.0638028762823817, + "grad_norm": 0.6202989257424821, + "learning_rate": 2.363235725370075e-06, + "loss": 0.2837, + "step": 22709 + }, + { + "epoch": 1.06384972127231, + "grad_norm": 0.6524432092582224, + "learning_rate": 2.363046359611488e-06, + "loss": 0.2997, + "step": 22710 + }, + { + "epoch": 1.0638965662622382, + "grad_norm": 0.6021678787497905, + "learning_rate": 2.3628569946410345e-06, + "loss": 0.2816, + "step": 22711 + }, + { + "epoch": 1.0639434112521666, + "grad_norm": 0.5925586346407375, + "learning_rate": 2.3626676304598046e-06, + "loss": 0.2743, + "step": 22712 + }, + { + "epoch": 1.0639902562420949, + "grad_norm": 0.5917989302493062, + "learning_rate": 2.3624782670688886e-06, + "loss": 0.2769, + "step": 22713 + }, + { + "epoch": 1.0640371012320233, + "grad_norm": 0.60384675507552, + "learning_rate": 2.3622889044693755e-06, + "loss": 0.2813, + "step": 22714 + }, + { + "epoch": 1.0640839462219516, + "grad_norm": 0.6161282095714883, + "learning_rate": 2.362099542662354e-06, + "loss": 0.2746, + "step": 22715 + }, + { + "epoch": 1.0641307912118798, + "grad_norm": 0.5449542599374921, + "learning_rate": 2.3619101816489155e-06, + "loss": 0.2592, + "step": 22716 + }, + { + "epoch": 1.0641776362018083, + "grad_norm": 0.5689052925561549, + "learning_rate": 2.3617208214301494e-06, + "loss": 0.2605, + "step": 22717 + }, + { + "epoch": 1.0642244811917365, + "grad_norm": 0.5942760167538277, + "learning_rate": 2.361531462007145e-06, + "loss": 0.2702, + "step": 22718 + }, + { + "epoch": 1.064271326181665, + "grad_norm": 0.5667245437148357, + "learning_rate": 2.361342103380993e-06, + "loss": 0.2477, + "step": 22719 + }, + { + "epoch": 1.0643181711715932, + "grad_norm": 0.6145115849559875, + "learning_rate": 2.3611527455527817e-06, + "loss": 0.2803, + "step": 22720 + }, + { + "epoch": 1.0643650161615215, + "grad_norm": 0.5673264174265379, + "learning_rate": 2.3609633885236016e-06, + "loss": 0.2873, + "step": 22721 + }, + { + "epoch": 1.06441186115145, + "grad_norm": 0.6305624204546116, + "learning_rate": 2.360774032294542e-06, + "loss": 0.2766, + "step": 22722 + }, + { + "epoch": 1.0644587061413782, + "grad_norm": 0.549474407086829, + "learning_rate": 2.3605846768666924e-06, + "loss": 0.2823, + "step": 22723 + }, + { + "epoch": 1.0645055511313064, + "grad_norm": 0.5761447081404428, + "learning_rate": 2.360395322241144e-06, + "loss": 0.2828, + "step": 22724 + }, + { + "epoch": 1.0645523961212349, + "grad_norm": 0.539107018100734, + "learning_rate": 2.3602059684189856e-06, + "loss": 0.2545, + "step": 22725 + }, + { + "epoch": 1.064599241111163, + "grad_norm": 0.5729277936746545, + "learning_rate": 2.3600166154013057e-06, + "loss": 0.273, + "step": 22726 + }, + { + "epoch": 1.0646460861010916, + "grad_norm": 0.5946913064468268, + "learning_rate": 2.359827263189195e-06, + "loss": 0.2747, + "step": 22727 + }, + { + "epoch": 1.0646929310910198, + "grad_norm": 0.5848796443287468, + "learning_rate": 2.3596379117837437e-06, + "loss": 0.2754, + "step": 22728 + }, + { + "epoch": 1.064739776080948, + "grad_norm": 0.5722971096771886, + "learning_rate": 2.3594485611860407e-06, + "loss": 0.2547, + "step": 22729 + }, + { + "epoch": 1.0647866210708765, + "grad_norm": 0.5440724722484581, + "learning_rate": 2.3592592113971754e-06, + "loss": 0.2542, + "step": 22730 + }, + { + "epoch": 1.0648334660608048, + "grad_norm": 0.6182921043731567, + "learning_rate": 2.3590698624182396e-06, + "loss": 0.2631, + "step": 22731 + }, + { + "epoch": 1.0648803110507332, + "grad_norm": 0.6016840838549019, + "learning_rate": 2.3588805142503205e-06, + "loss": 0.2683, + "step": 22732 + }, + { + "epoch": 1.0649271560406615, + "grad_norm": 0.6023266829869894, + "learning_rate": 2.3586911668945078e-06, + "loss": 0.2738, + "step": 22733 + }, + { + "epoch": 1.0649740010305897, + "grad_norm": 0.5890067939317777, + "learning_rate": 2.3585018203518924e-06, + "loss": 0.2682, + "step": 22734 + }, + { + "epoch": 1.0650208460205182, + "grad_norm": 0.6113245034047631, + "learning_rate": 2.3583124746235637e-06, + "loss": 0.2942, + "step": 22735 + }, + { + "epoch": 1.0650676910104464, + "grad_norm": 0.6051841266544041, + "learning_rate": 2.3581231297106105e-06, + "loss": 0.2819, + "step": 22736 + }, + { + "epoch": 1.0651145360003746, + "grad_norm": 0.6326122231948214, + "learning_rate": 2.3579337856141243e-06, + "loss": 0.2748, + "step": 22737 + }, + { + "epoch": 1.065161380990303, + "grad_norm": 0.6267193522625736, + "learning_rate": 2.357744442335192e-06, + "loss": 0.2685, + "step": 22738 + }, + { + "epoch": 1.0652082259802313, + "grad_norm": 0.5277926567880706, + "learning_rate": 2.357555099874906e-06, + "loss": 0.2558, + "step": 22739 + }, + { + "epoch": 1.0652550709701598, + "grad_norm": 0.5879454133405361, + "learning_rate": 2.3573657582343534e-06, + "loss": 0.2892, + "step": 22740 + }, + { + "epoch": 1.065301915960088, + "grad_norm": 0.5886386375187302, + "learning_rate": 2.357176417414625e-06, + "loss": 0.2652, + "step": 22741 + }, + { + "epoch": 1.0653487609500163, + "grad_norm": 0.5767075719541714, + "learning_rate": 2.3569870774168114e-06, + "loss": 0.2748, + "step": 22742 + }, + { + "epoch": 1.0653956059399448, + "grad_norm": 0.5557237414693479, + "learning_rate": 2.3567977382420012e-06, + "loss": 0.2615, + "step": 22743 + }, + { + "epoch": 1.065442450929873, + "grad_norm": 0.6262428544651821, + "learning_rate": 2.3566083998912833e-06, + "loss": 0.2542, + "step": 22744 + }, + { + "epoch": 1.0654892959198015, + "grad_norm": 0.5815365820923494, + "learning_rate": 2.356419062365748e-06, + "loss": 0.2725, + "step": 22745 + }, + { + "epoch": 1.0655361409097297, + "grad_norm": 0.5873859491566003, + "learning_rate": 2.3562297256664855e-06, + "loss": 0.2751, + "step": 22746 + }, + { + "epoch": 1.065582985899658, + "grad_norm": 0.6160951132056539, + "learning_rate": 2.356040389794584e-06, + "loss": 0.2742, + "step": 22747 + }, + { + "epoch": 1.0656298308895864, + "grad_norm": 0.6179069243212476, + "learning_rate": 2.355851054751134e-06, + "loss": 0.3013, + "step": 22748 + }, + { + "epoch": 1.0656766758795146, + "grad_norm": 0.5976538709482728, + "learning_rate": 2.355661720537226e-06, + "loss": 0.2893, + "step": 22749 + }, + { + "epoch": 1.065723520869443, + "grad_norm": 0.618817150949706, + "learning_rate": 2.3554723871539483e-06, + "loss": 0.2732, + "step": 22750 + }, + { + "epoch": 1.0657703658593713, + "grad_norm": 0.6421662479843225, + "learning_rate": 2.3552830546023898e-06, + "loss": 0.278, + "step": 22751 + }, + { + "epoch": 1.0658172108492996, + "grad_norm": 0.5446826999001639, + "learning_rate": 2.3550937228836413e-06, + "loss": 0.2484, + "step": 22752 + }, + { + "epoch": 1.065864055839228, + "grad_norm": 0.563595600279444, + "learning_rate": 2.354904391998792e-06, + "loss": 0.2661, + "step": 22753 + }, + { + "epoch": 1.0659109008291563, + "grad_norm": 0.6345472370296796, + "learning_rate": 2.354715061948931e-06, + "loss": 0.285, + "step": 22754 + }, + { + "epoch": 1.0659577458190848, + "grad_norm": 0.5723232027222024, + "learning_rate": 2.35452573273515e-06, + "loss": 0.2684, + "step": 22755 + }, + { + "epoch": 1.066004590809013, + "grad_norm": 0.5710882500183999, + "learning_rate": 2.3543364043585355e-06, + "loss": 0.2565, + "step": 22756 + }, + { + "epoch": 1.0660514357989412, + "grad_norm": 0.6418735509071131, + "learning_rate": 2.3541470768201783e-06, + "loss": 0.2867, + "step": 22757 + }, + { + "epoch": 1.0660982807888697, + "grad_norm": 0.6053169447268665, + "learning_rate": 2.353957750121168e-06, + "loss": 0.2787, + "step": 22758 + }, + { + "epoch": 1.066145125778798, + "grad_norm": 0.6134509631181568, + "learning_rate": 2.3537684242625947e-06, + "loss": 0.2707, + "step": 22759 + }, + { + "epoch": 1.0661919707687262, + "grad_norm": 0.5948844957157889, + "learning_rate": 2.3535790992455467e-06, + "loss": 0.2694, + "step": 22760 + }, + { + "epoch": 1.0662388157586546, + "grad_norm": 0.6675466887483595, + "learning_rate": 2.3533897750711153e-06, + "loss": 0.2826, + "step": 22761 + }, + { + "epoch": 1.0662856607485829, + "grad_norm": 0.5659457112671469, + "learning_rate": 2.353200451740388e-06, + "loss": 0.2722, + "step": 22762 + }, + { + "epoch": 1.0663325057385113, + "grad_norm": 0.6288012834917823, + "learning_rate": 2.3530111292544557e-06, + "loss": 0.2821, + "step": 22763 + }, + { + "epoch": 1.0663793507284396, + "grad_norm": 0.5868369071897727, + "learning_rate": 2.3528218076144065e-06, + "loss": 0.2816, + "step": 22764 + }, + { + "epoch": 1.0664261957183678, + "grad_norm": 0.5875769339281426, + "learning_rate": 2.3526324868213315e-06, + "loss": 0.2896, + "step": 22765 + }, + { + "epoch": 1.0664730407082963, + "grad_norm": 0.5832848057911852, + "learning_rate": 2.3524431668763193e-06, + "loss": 0.2718, + "step": 22766 + }, + { + "epoch": 1.0665198856982245, + "grad_norm": 0.579776775935683, + "learning_rate": 2.3522538477804603e-06, + "loss": 0.2866, + "step": 22767 + }, + { + "epoch": 1.066566730688153, + "grad_norm": 0.6178152290766089, + "learning_rate": 2.3520645295348423e-06, + "loss": 0.2659, + "step": 22768 + }, + { + "epoch": 1.0666135756780812, + "grad_norm": 0.60385747400925, + "learning_rate": 2.351875212140556e-06, + "loss": 0.2736, + "step": 22769 + }, + { + "epoch": 1.0666604206680095, + "grad_norm": 0.614642986808547, + "learning_rate": 2.3516858955986906e-06, + "loss": 0.2885, + "step": 22770 + }, + { + "epoch": 1.066707265657938, + "grad_norm": 0.6195146893776577, + "learning_rate": 2.3514965799103358e-06, + "loss": 0.3053, + "step": 22771 + }, + { + "epoch": 1.0667541106478662, + "grad_norm": 0.5320432915933431, + "learning_rate": 2.35130726507658e-06, + "loss": 0.2532, + "step": 22772 + }, + { + "epoch": 1.0668009556377944, + "grad_norm": 0.6294284293677633, + "learning_rate": 2.351117951098515e-06, + "loss": 0.277, + "step": 22773 + }, + { + "epoch": 1.0668478006277229, + "grad_norm": 0.6098193997802864, + "learning_rate": 2.3509286379772285e-06, + "loss": 0.2928, + "step": 22774 + }, + { + "epoch": 1.0668946456176511, + "grad_norm": 0.6215065338983131, + "learning_rate": 2.3507393257138095e-06, + "loss": 0.2864, + "step": 22775 + }, + { + "epoch": 1.0669414906075796, + "grad_norm": 0.543938964595071, + "learning_rate": 2.350550014309348e-06, + "loss": 0.2809, + "step": 22776 + }, + { + "epoch": 1.0669883355975078, + "grad_norm": 0.5988951688670496, + "learning_rate": 2.350360703764934e-06, + "loss": 0.2795, + "step": 22777 + }, + { + "epoch": 1.067035180587436, + "grad_norm": 0.559599775108525, + "learning_rate": 2.3501713940816563e-06, + "loss": 0.2691, + "step": 22778 + }, + { + "epoch": 1.0670820255773645, + "grad_norm": 0.6152337598186938, + "learning_rate": 2.3499820852606046e-06, + "loss": 0.2658, + "step": 22779 + }, + { + "epoch": 1.0671288705672928, + "grad_norm": 0.6261113206824886, + "learning_rate": 2.3497927773028696e-06, + "loss": 0.3056, + "step": 22780 + }, + { + "epoch": 1.0671757155572212, + "grad_norm": 0.6199113632686408, + "learning_rate": 2.349603470209538e-06, + "loss": 0.2751, + "step": 22781 + }, + { + "epoch": 1.0672225605471495, + "grad_norm": 0.5867790800257519, + "learning_rate": 2.349414163981701e-06, + "loss": 0.2775, + "step": 22782 + }, + { + "epoch": 1.0672694055370777, + "grad_norm": 0.6346630355578349, + "learning_rate": 2.349224858620447e-06, + "loss": 0.2926, + "step": 22783 + }, + { + "epoch": 1.0673162505270062, + "grad_norm": 0.5954025489367913, + "learning_rate": 2.349035554126867e-06, + "loss": 0.2718, + "step": 22784 + }, + { + "epoch": 1.0673630955169344, + "grad_norm": 0.6073673333677233, + "learning_rate": 2.348846250502049e-06, + "loss": 0.2875, + "step": 22785 + }, + { + "epoch": 1.0674099405068629, + "grad_norm": 0.6190113024631411, + "learning_rate": 2.3486569477470838e-06, + "loss": 0.2874, + "step": 22786 + }, + { + "epoch": 1.0674567854967911, + "grad_norm": 0.6119867169680457, + "learning_rate": 2.3484676458630583e-06, + "loss": 0.2777, + "step": 22787 + }, + { + "epoch": 1.0675036304867194, + "grad_norm": 0.5965883376917747, + "learning_rate": 2.3482783448510646e-06, + "loss": 0.2702, + "step": 22788 + }, + { + "epoch": 1.0675504754766478, + "grad_norm": 0.5989004203139441, + "learning_rate": 2.34808904471219e-06, + "loss": 0.279, + "step": 22789 + }, + { + "epoch": 1.067597320466576, + "grad_norm": 0.5739385068473061, + "learning_rate": 2.3478997454475246e-06, + "loss": 0.2621, + "step": 22790 + }, + { + "epoch": 1.0676441654565045, + "grad_norm": 0.6052322936694147, + "learning_rate": 2.347710447058159e-06, + "loss": 0.2808, + "step": 22791 + }, + { + "epoch": 1.0676910104464328, + "grad_norm": 0.5777351415615724, + "learning_rate": 2.3475211495451817e-06, + "loss": 0.272, + "step": 22792 + }, + { + "epoch": 1.067737855436361, + "grad_norm": 0.5730170700707553, + "learning_rate": 2.3473318529096807e-06, + "loss": 0.2666, + "step": 22793 + }, + { + "epoch": 1.0677847004262895, + "grad_norm": 0.562552946169931, + "learning_rate": 2.3471425571527463e-06, + "loss": 0.2754, + "step": 22794 + }, + { + "epoch": 1.0678315454162177, + "grad_norm": 0.5886507789155833, + "learning_rate": 2.3469532622754693e-06, + "loss": 0.2758, + "step": 22795 + }, + { + "epoch": 1.067878390406146, + "grad_norm": 0.6432148222564426, + "learning_rate": 2.3467639682789367e-06, + "loss": 0.2801, + "step": 22796 + }, + { + "epoch": 1.0679252353960744, + "grad_norm": 0.6114217668388914, + "learning_rate": 2.34657467516424e-06, + "loss": 0.2729, + "step": 22797 + }, + { + "epoch": 1.0679720803860027, + "grad_norm": 0.6090160072738479, + "learning_rate": 2.346385382932467e-06, + "loss": 0.2652, + "step": 22798 + }, + { + "epoch": 1.0680189253759311, + "grad_norm": 0.571673551972172, + "learning_rate": 2.3461960915847077e-06, + "loss": 0.2617, + "step": 22799 + }, + { + "epoch": 1.0680657703658594, + "grad_norm": 0.6279636903490677, + "learning_rate": 2.3460068011220503e-06, + "loss": 0.2865, + "step": 22800 + }, + { + "epoch": 1.0681126153557876, + "grad_norm": 0.611477646839407, + "learning_rate": 2.3458175115455864e-06, + "loss": 0.2666, + "step": 22801 + }, + { + "epoch": 1.068159460345716, + "grad_norm": 0.6305132832332875, + "learning_rate": 2.345628222856403e-06, + "loss": 0.2787, + "step": 22802 + }, + { + "epoch": 1.0682063053356443, + "grad_norm": 0.6573190141183423, + "learning_rate": 2.3454389350555903e-06, + "loss": 0.287, + "step": 22803 + }, + { + "epoch": 1.0682531503255728, + "grad_norm": 0.5701852674325777, + "learning_rate": 2.345249648144239e-06, + "loss": 0.2671, + "step": 22804 + }, + { + "epoch": 1.068299995315501, + "grad_norm": 0.6118311880539465, + "learning_rate": 2.3450603621234354e-06, + "loss": 0.2772, + "step": 22805 + }, + { + "epoch": 1.0683468403054293, + "grad_norm": 0.5886271189628165, + "learning_rate": 2.3448710769942717e-06, + "loss": 0.2763, + "step": 22806 + }, + { + "epoch": 1.0683936852953577, + "grad_norm": 0.5754532732506007, + "learning_rate": 2.344681792757835e-06, + "loss": 0.2719, + "step": 22807 + }, + { + "epoch": 1.068440530285286, + "grad_norm": 0.5569684183051274, + "learning_rate": 2.3444925094152164e-06, + "loss": 0.2741, + "step": 22808 + }, + { + "epoch": 1.0684873752752142, + "grad_norm": 0.5863682808446428, + "learning_rate": 2.3443032269675037e-06, + "loss": 0.2795, + "step": 22809 + }, + { + "epoch": 1.0685342202651427, + "grad_norm": 0.5813891474050737, + "learning_rate": 2.3441139454157875e-06, + "loss": 0.2652, + "step": 22810 + }, + { + "epoch": 1.068581065255071, + "grad_norm": 0.6074489133452617, + "learning_rate": 2.343924664761156e-06, + "loss": 0.2656, + "step": 22811 + }, + { + "epoch": 1.0686279102449994, + "grad_norm": 0.5526673796548959, + "learning_rate": 2.3437353850046985e-06, + "loss": 0.2582, + "step": 22812 + }, + { + "epoch": 1.0686747552349276, + "grad_norm": 0.5866960780550938, + "learning_rate": 2.3435461061475047e-06, + "loss": 0.2641, + "step": 22813 + }, + { + "epoch": 1.0687216002248558, + "grad_norm": 0.545157095133182, + "learning_rate": 2.3433568281906637e-06, + "loss": 0.267, + "step": 22814 + }, + { + "epoch": 1.0687684452147843, + "grad_norm": 0.529368310181016, + "learning_rate": 2.3431675511352652e-06, + "loss": 0.2437, + "step": 22815 + }, + { + "epoch": 1.0688152902047126, + "grad_norm": 0.6002979907687764, + "learning_rate": 2.3429782749823986e-06, + "loss": 0.2804, + "step": 22816 + }, + { + "epoch": 1.068862135194641, + "grad_norm": 0.7233330384526412, + "learning_rate": 2.3427889997331513e-06, + "loss": 0.3034, + "step": 22817 + }, + { + "epoch": 1.0689089801845693, + "grad_norm": 0.6120336536063538, + "learning_rate": 2.342599725388614e-06, + "loss": 0.2758, + "step": 22818 + }, + { + "epoch": 1.0689558251744975, + "grad_norm": 0.5884507249986014, + "learning_rate": 2.342410451949876e-06, + "loss": 0.2695, + "step": 22819 + }, + { + "epoch": 1.069002670164426, + "grad_norm": 0.63399116186943, + "learning_rate": 2.342221179418026e-06, + "loss": 0.2675, + "step": 22820 + }, + { + "epoch": 1.0690495151543542, + "grad_norm": 0.6393569491000852, + "learning_rate": 2.3420319077941535e-06, + "loss": 0.2805, + "step": 22821 + }, + { + "epoch": 1.0690963601442827, + "grad_norm": 0.6247499414640906, + "learning_rate": 2.3418426370793485e-06, + "loss": 0.2773, + "step": 22822 + }, + { + "epoch": 1.069143205134211, + "grad_norm": 0.6178601904694329, + "learning_rate": 2.341653367274699e-06, + "loss": 0.3, + "step": 22823 + }, + { + "epoch": 1.0691900501241391, + "grad_norm": 0.6114074565742119, + "learning_rate": 2.3414640983812935e-06, + "loss": 0.2768, + "step": 22824 + }, + { + "epoch": 1.0692368951140676, + "grad_norm": 0.6017719589033135, + "learning_rate": 2.341274830400223e-06, + "loss": 0.275, + "step": 22825 + }, + { + "epoch": 1.0692837401039959, + "grad_norm": 0.6041816464445097, + "learning_rate": 2.3410855633325762e-06, + "loss": 0.2924, + "step": 22826 + }, + { + "epoch": 1.0693305850939243, + "grad_norm": 0.5549038628248519, + "learning_rate": 2.3408962971794413e-06, + "loss": 0.2697, + "step": 22827 + }, + { + "epoch": 1.0693774300838526, + "grad_norm": 0.5813060363162598, + "learning_rate": 2.3407070319419096e-06, + "loss": 0.2601, + "step": 22828 + }, + { + "epoch": 1.0694242750737808, + "grad_norm": 0.5814908091437496, + "learning_rate": 2.340517767621068e-06, + "loss": 0.2762, + "step": 22829 + }, + { + "epoch": 1.0694711200637093, + "grad_norm": 0.6435257609727985, + "learning_rate": 2.3403285042180067e-06, + "loss": 0.3079, + "step": 22830 + }, + { + "epoch": 1.0695179650536375, + "grad_norm": 0.6057966664076617, + "learning_rate": 2.340139241733814e-06, + "loss": 0.2771, + "step": 22831 + }, + { + "epoch": 1.0695648100435657, + "grad_norm": 0.6175699956508193, + "learning_rate": 2.33994998016958e-06, + "loss": 0.2893, + "step": 22832 + }, + { + "epoch": 1.0696116550334942, + "grad_norm": 0.572027858770288, + "learning_rate": 2.3397607195263945e-06, + "loss": 0.2632, + "step": 22833 + }, + { + "epoch": 1.0696585000234224, + "grad_norm": 0.5983280029767601, + "learning_rate": 2.339571459805346e-06, + "loss": 0.2734, + "step": 22834 + }, + { + "epoch": 1.069705345013351, + "grad_norm": 0.6168394904390853, + "learning_rate": 2.339382201007522e-06, + "loss": 0.2901, + "step": 22835 + }, + { + "epoch": 1.0697521900032791, + "grad_norm": 0.581689528117623, + "learning_rate": 2.339192943134014e-06, + "loss": 0.2803, + "step": 22836 + }, + { + "epoch": 1.0697990349932074, + "grad_norm": 0.5951408724359661, + "learning_rate": 2.33900368618591e-06, + "loss": 0.2868, + "step": 22837 + }, + { + "epoch": 1.0698458799831359, + "grad_norm": 0.5717795846472016, + "learning_rate": 2.3388144301642994e-06, + "loss": 0.2592, + "step": 22838 + }, + { + "epoch": 1.069892724973064, + "grad_norm": 0.620994653811009, + "learning_rate": 2.3386251750702706e-06, + "loss": 0.2967, + "step": 22839 + }, + { + "epoch": 1.0699395699629926, + "grad_norm": 0.6307653644113138, + "learning_rate": 2.3384359209049147e-06, + "loss": 0.2775, + "step": 22840 + }, + { + "epoch": 1.0699864149529208, + "grad_norm": 0.5382427715661174, + "learning_rate": 2.338246667669319e-06, + "loss": 0.2625, + "step": 22841 + }, + { + "epoch": 1.070033259942849, + "grad_norm": 0.6131914593121954, + "learning_rate": 2.3380574153645726e-06, + "loss": 0.2835, + "step": 22842 + }, + { + "epoch": 1.0700801049327775, + "grad_norm": 0.5860920581883888, + "learning_rate": 2.3378681639917645e-06, + "loss": 0.2533, + "step": 22843 + }, + { + "epoch": 1.0701269499227057, + "grad_norm": 0.6117431105246476, + "learning_rate": 2.3376789135519857e-06, + "loss": 0.2791, + "step": 22844 + }, + { + "epoch": 1.070173794912634, + "grad_norm": 0.5493498408148416, + "learning_rate": 2.337489664046323e-06, + "loss": 0.283, + "step": 22845 + }, + { + "epoch": 1.0702206399025624, + "grad_norm": 0.5507443146616238, + "learning_rate": 2.337300415475867e-06, + "loss": 0.276, + "step": 22846 + }, + { + "epoch": 1.0702674848924907, + "grad_norm": 0.5916776789375849, + "learning_rate": 2.3371111678417064e-06, + "loss": 0.2748, + "step": 22847 + }, + { + "epoch": 1.0703143298824191, + "grad_norm": 0.5977490550738994, + "learning_rate": 2.3369219211449297e-06, + "loss": 0.28, + "step": 22848 + }, + { + "epoch": 1.0703611748723474, + "grad_norm": 0.5702853937320026, + "learning_rate": 2.336732675386626e-06, + "loss": 0.2739, + "step": 22849 + }, + { + "epoch": 1.0704080198622756, + "grad_norm": 0.5959877942129009, + "learning_rate": 2.3365434305678854e-06, + "loss": 0.27, + "step": 22850 + }, + { + "epoch": 1.070454864852204, + "grad_norm": 0.6324360248216431, + "learning_rate": 2.3363541866897955e-06, + "loss": 0.2832, + "step": 22851 + }, + { + "epoch": 1.0705017098421323, + "grad_norm": 0.6144727617830177, + "learning_rate": 2.3361649437534466e-06, + "loss": 0.2893, + "step": 22852 + }, + { + "epoch": 1.0705485548320608, + "grad_norm": 0.6224474508922514, + "learning_rate": 2.335975701759928e-06, + "loss": 0.2916, + "step": 22853 + }, + { + "epoch": 1.070595399821989, + "grad_norm": 0.5813622289014706, + "learning_rate": 2.335786460710327e-06, + "loss": 0.271, + "step": 22854 + }, + { + "epoch": 1.0706422448119173, + "grad_norm": 0.5542950621787394, + "learning_rate": 2.335597220605734e-06, + "loss": 0.2667, + "step": 22855 + }, + { + "epoch": 1.0706890898018457, + "grad_norm": 0.5913881404544742, + "learning_rate": 2.3354079814472374e-06, + "loss": 0.2856, + "step": 22856 + }, + { + "epoch": 1.070735934791774, + "grad_norm": 0.6237419397245914, + "learning_rate": 2.3352187432359266e-06, + "loss": 0.2942, + "step": 22857 + }, + { + "epoch": 1.0707827797817024, + "grad_norm": 0.5372229062544533, + "learning_rate": 2.3350295059728905e-06, + "loss": 0.2673, + "step": 22858 + }, + { + "epoch": 1.0708296247716307, + "grad_norm": 0.5886730722260651, + "learning_rate": 2.3348402696592186e-06, + "loss": 0.2847, + "step": 22859 + }, + { + "epoch": 1.070876469761559, + "grad_norm": 0.6300371773218103, + "learning_rate": 2.334651034295999e-06, + "loss": 0.2788, + "step": 22860 + }, + { + "epoch": 1.0709233147514874, + "grad_norm": 0.6024307561657628, + "learning_rate": 2.3344617998843213e-06, + "loss": 0.2838, + "step": 22861 + }, + { + "epoch": 1.0709701597414156, + "grad_norm": 0.5979994651902513, + "learning_rate": 2.334272566425274e-06, + "loss": 0.2774, + "step": 22862 + }, + { + "epoch": 1.071017004731344, + "grad_norm": 0.5847857470543956, + "learning_rate": 2.3340833339199466e-06, + "loss": 0.2661, + "step": 22863 + }, + { + "epoch": 1.0710638497212723, + "grad_norm": 0.5693029027602557, + "learning_rate": 2.3338941023694277e-06, + "loss": 0.2734, + "step": 22864 + }, + { + "epoch": 1.0711106947112006, + "grad_norm": 0.6026867178722314, + "learning_rate": 2.333704871774808e-06, + "loss": 0.273, + "step": 22865 + }, + { + "epoch": 1.071157539701129, + "grad_norm": 0.5702201936221263, + "learning_rate": 2.333515642137173e-06, + "loss": 0.2754, + "step": 22866 + }, + { + "epoch": 1.0712043846910573, + "grad_norm": 0.5970266432835899, + "learning_rate": 2.3333264134576144e-06, + "loss": 0.2765, + "step": 22867 + }, + { + "epoch": 1.0712512296809855, + "grad_norm": 0.5860557381016822, + "learning_rate": 2.3331371857372205e-06, + "loss": 0.2726, + "step": 22868 + }, + { + "epoch": 1.071298074670914, + "grad_norm": 0.5663313026401471, + "learning_rate": 2.3329479589770794e-06, + "loss": 0.2673, + "step": 22869 + }, + { + "epoch": 1.0713449196608422, + "grad_norm": 0.6142950315483527, + "learning_rate": 2.3327587331782814e-06, + "loss": 0.2715, + "step": 22870 + }, + { + "epoch": 1.0713917646507707, + "grad_norm": 0.5735245194199758, + "learning_rate": 2.3325695083419157e-06, + "loss": 0.2618, + "step": 22871 + }, + { + "epoch": 1.071438609640699, + "grad_norm": 0.5302982920539787, + "learning_rate": 2.3323802844690694e-06, + "loss": 0.2652, + "step": 22872 + }, + { + "epoch": 1.0714854546306272, + "grad_norm": 0.5924900415396857, + "learning_rate": 2.3321910615608328e-06, + "loss": 0.2947, + "step": 22873 + }, + { + "epoch": 1.0715322996205556, + "grad_norm": 0.67937980761744, + "learning_rate": 2.3320018396182937e-06, + "loss": 0.2923, + "step": 22874 + }, + { + "epoch": 1.0715791446104839, + "grad_norm": 0.6140771415234879, + "learning_rate": 2.3318126186425428e-06, + "loss": 0.2775, + "step": 22875 + }, + { + "epoch": 1.0716259896004123, + "grad_norm": 0.5921025057238949, + "learning_rate": 2.331623398634667e-06, + "loss": 0.2665, + "step": 22876 + }, + { + "epoch": 1.0716728345903406, + "grad_norm": 0.5720210767382717, + "learning_rate": 2.3314341795957575e-06, + "loss": 0.2679, + "step": 22877 + }, + { + "epoch": 1.0717196795802688, + "grad_norm": 0.6106153285695288, + "learning_rate": 2.3312449615269012e-06, + "loss": 0.2821, + "step": 22878 + }, + { + "epoch": 1.0717665245701973, + "grad_norm": 0.6379965191293414, + "learning_rate": 2.3310557444291878e-06, + "loss": 0.2765, + "step": 22879 + }, + { + "epoch": 1.0718133695601255, + "grad_norm": 0.560823628331192, + "learning_rate": 2.330866528303706e-06, + "loss": 0.2802, + "step": 22880 + }, + { + "epoch": 1.0718602145500538, + "grad_norm": 0.5703218150753917, + "learning_rate": 2.3306773131515445e-06, + "loss": 0.274, + "step": 22881 + }, + { + "epoch": 1.0719070595399822, + "grad_norm": 0.5620692200486407, + "learning_rate": 2.330488098973793e-06, + "loss": 0.2631, + "step": 22882 + }, + { + "epoch": 1.0719539045299105, + "grad_norm": 0.5978456938178949, + "learning_rate": 2.3302988857715407e-06, + "loss": 0.2668, + "step": 22883 + }, + { + "epoch": 1.072000749519839, + "grad_norm": 0.6246144001426905, + "learning_rate": 2.3301096735458744e-06, + "loss": 0.2688, + "step": 22884 + }, + { + "epoch": 1.0720475945097672, + "grad_norm": 0.6076917400171414, + "learning_rate": 2.329920462297884e-06, + "loss": 0.2861, + "step": 22885 + }, + { + "epoch": 1.0720944394996954, + "grad_norm": 0.5825014481507724, + "learning_rate": 2.3297312520286596e-06, + "loss": 0.2765, + "step": 22886 + }, + { + "epoch": 1.0721412844896239, + "grad_norm": 0.5699262028616964, + "learning_rate": 2.3295420427392884e-06, + "loss": 0.2657, + "step": 22887 + }, + { + "epoch": 1.072188129479552, + "grad_norm": 0.604202767352087, + "learning_rate": 2.3293528344308597e-06, + "loss": 0.2919, + "step": 22888 + }, + { + "epoch": 1.0722349744694806, + "grad_norm": 0.6079822361737623, + "learning_rate": 2.3291636271044637e-06, + "loss": 0.2824, + "step": 22889 + }, + { + "epoch": 1.0722818194594088, + "grad_norm": 0.5987627835669872, + "learning_rate": 2.3289744207611875e-06, + "loss": 0.2774, + "step": 22890 + }, + { + "epoch": 1.072328664449337, + "grad_norm": 0.564511560320584, + "learning_rate": 2.3287852154021203e-06, + "loss": 0.2637, + "step": 22891 + }, + { + "epoch": 1.0723755094392655, + "grad_norm": 0.574226382840303, + "learning_rate": 2.328596011028351e-06, + "loss": 0.2759, + "step": 22892 + }, + { + "epoch": 1.0724223544291938, + "grad_norm": 0.6018820838968966, + "learning_rate": 2.3284068076409687e-06, + "loss": 0.2736, + "step": 22893 + }, + { + "epoch": 1.0724691994191222, + "grad_norm": 0.6164152304808497, + "learning_rate": 2.328217605241062e-06, + "loss": 0.2903, + "step": 22894 + }, + { + "epoch": 1.0725160444090505, + "grad_norm": 0.5754406176116983, + "learning_rate": 2.3280284038297207e-06, + "loss": 0.2636, + "step": 22895 + }, + { + "epoch": 1.0725628893989787, + "grad_norm": 0.5636835127759627, + "learning_rate": 2.327839203408032e-06, + "loss": 0.259, + "step": 22896 + }, + { + "epoch": 1.0726097343889072, + "grad_norm": 0.5201634472228056, + "learning_rate": 2.3276500039770857e-06, + "loss": 0.2454, + "step": 22897 + }, + { + "epoch": 1.0726565793788354, + "grad_norm": 0.6297631144549622, + "learning_rate": 2.3274608055379694e-06, + "loss": 0.2829, + "step": 22898 + }, + { + "epoch": 1.0727034243687639, + "grad_norm": 0.6044532444155453, + "learning_rate": 2.3272716080917735e-06, + "loss": 0.2861, + "step": 22899 + }, + { + "epoch": 1.072750269358692, + "grad_norm": 0.5861588606551489, + "learning_rate": 2.327082411639586e-06, + "loss": 0.2746, + "step": 22900 + }, + { + "epoch": 1.0727971143486204, + "grad_norm": 0.5719973524776657, + "learning_rate": 2.3268932161824963e-06, + "loss": 0.28, + "step": 22901 + }, + { + "epoch": 1.0728439593385488, + "grad_norm": 0.6060624531171429, + "learning_rate": 2.3267040217215918e-06, + "loss": 0.2712, + "step": 22902 + }, + { + "epoch": 1.072890804328477, + "grad_norm": 0.5912328872834004, + "learning_rate": 2.3265148282579627e-06, + "loss": 0.2664, + "step": 22903 + }, + { + "epoch": 1.0729376493184053, + "grad_norm": 0.6120671346715149, + "learning_rate": 2.3263256357926966e-06, + "loss": 0.2821, + "step": 22904 + }, + { + "epoch": 1.0729844943083338, + "grad_norm": 0.5809645318967062, + "learning_rate": 2.326136444326883e-06, + "loss": 0.2744, + "step": 22905 + }, + { + "epoch": 1.073031339298262, + "grad_norm": 0.6050071502530496, + "learning_rate": 2.3259472538616106e-06, + "loss": 0.2609, + "step": 22906 + }, + { + "epoch": 1.0730781842881905, + "grad_norm": 0.5866302674490098, + "learning_rate": 2.325758064397969e-06, + "loss": 0.2852, + "step": 22907 + }, + { + "epoch": 1.0731250292781187, + "grad_norm": 0.5672903178662855, + "learning_rate": 2.3255688759370443e-06, + "loss": 0.2733, + "step": 22908 + }, + { + "epoch": 1.073171874268047, + "grad_norm": 0.6118326870220331, + "learning_rate": 2.3253796884799274e-06, + "loss": 0.2879, + "step": 22909 + }, + { + "epoch": 1.0732187192579754, + "grad_norm": 0.5680233354608142, + "learning_rate": 2.3251905020277067e-06, + "loss": 0.2635, + "step": 22910 + }, + { + "epoch": 1.0732655642479036, + "grad_norm": 0.5832616439224982, + "learning_rate": 2.32500131658147e-06, + "loss": 0.27, + "step": 22911 + }, + { + "epoch": 1.073312409237832, + "grad_norm": 0.6363447215090657, + "learning_rate": 2.324812132142307e-06, + "loss": 0.2872, + "step": 22912 + }, + { + "epoch": 1.0733592542277604, + "grad_norm": 0.6400375951958341, + "learning_rate": 2.324622948711307e-06, + "loss": 0.2865, + "step": 22913 + }, + { + "epoch": 1.0734060992176886, + "grad_norm": 0.5927245069538863, + "learning_rate": 2.3244337662895582e-06, + "loss": 0.2857, + "step": 22914 + }, + { + "epoch": 1.073452944207617, + "grad_norm": 0.6122181059831057, + "learning_rate": 2.324244584878148e-06, + "loss": 0.2831, + "step": 22915 + }, + { + "epoch": 1.0734997891975453, + "grad_norm": 0.5946962991595011, + "learning_rate": 2.3240554044781653e-06, + "loss": 0.2831, + "step": 22916 + }, + { + "epoch": 1.0735466341874735, + "grad_norm": 0.5451529563616815, + "learning_rate": 2.323866225090701e-06, + "loss": 0.2747, + "step": 22917 + }, + { + "epoch": 1.073593479177402, + "grad_norm": 0.6377413280328339, + "learning_rate": 2.3236770467168413e-06, + "loss": 0.2859, + "step": 22918 + }, + { + "epoch": 1.0736403241673302, + "grad_norm": 0.5725012158321151, + "learning_rate": 2.3234878693576756e-06, + "loss": 0.2844, + "step": 22919 + }, + { + "epoch": 1.0736871691572587, + "grad_norm": 0.5659265283237459, + "learning_rate": 2.3232986930142946e-06, + "loss": 0.2696, + "step": 22920 + }, + { + "epoch": 1.073734014147187, + "grad_norm": 0.5980747920596965, + "learning_rate": 2.323109517687784e-06, + "loss": 0.259, + "step": 22921 + }, + { + "epoch": 1.0737808591371152, + "grad_norm": 0.5853276296060086, + "learning_rate": 2.3229203433792336e-06, + "loss": 0.2736, + "step": 22922 + }, + { + "epoch": 1.0738277041270436, + "grad_norm": 0.6083640388052116, + "learning_rate": 2.322731170089732e-06, + "loss": 0.2755, + "step": 22923 + }, + { + "epoch": 1.0738745491169719, + "grad_norm": 0.5745037027162504, + "learning_rate": 2.3225419978203683e-06, + "loss": 0.2664, + "step": 22924 + }, + { + "epoch": 1.0739213941069004, + "grad_norm": 0.5682266534642225, + "learning_rate": 2.3223528265722307e-06, + "loss": 0.2653, + "step": 22925 + }, + { + "epoch": 1.0739682390968286, + "grad_norm": 0.5849891997733805, + "learning_rate": 2.322163656346409e-06, + "loss": 0.2786, + "step": 22926 + }, + { + "epoch": 1.0740150840867568, + "grad_norm": 0.5767445319642317, + "learning_rate": 2.321974487143989e-06, + "loss": 0.2671, + "step": 22927 + }, + { + "epoch": 1.0740619290766853, + "grad_norm": 0.5396044702792079, + "learning_rate": 2.321785318966062e-06, + "loss": 0.262, + "step": 22928 + }, + { + "epoch": 1.0741087740666135, + "grad_norm": 0.5919130440262808, + "learning_rate": 2.3215961518137154e-06, + "loss": 0.2718, + "step": 22929 + }, + { + "epoch": 1.074155619056542, + "grad_norm": 0.6301021533632974, + "learning_rate": 2.321406985688038e-06, + "loss": 0.2788, + "step": 22930 + }, + { + "epoch": 1.0742024640464702, + "grad_norm": 0.5901999989392995, + "learning_rate": 2.3212178205901187e-06, + "loss": 0.2702, + "step": 22931 + }, + { + "epoch": 1.0742493090363985, + "grad_norm": 0.5694616209788717, + "learning_rate": 2.321028656521047e-06, + "loss": 0.2712, + "step": 22932 + }, + { + "epoch": 1.074296154026327, + "grad_norm": 0.6254694499260873, + "learning_rate": 2.3208394934819087e-06, + "loss": 0.2929, + "step": 22933 + }, + { + "epoch": 1.0743429990162552, + "grad_norm": 0.5700377756038915, + "learning_rate": 2.3206503314737942e-06, + "loss": 0.2724, + "step": 22934 + }, + { + "epoch": 1.0743898440061836, + "grad_norm": 0.6329143041632158, + "learning_rate": 2.3204611704977932e-06, + "loss": 0.2889, + "step": 22935 + }, + { + "epoch": 1.0744366889961119, + "grad_norm": 0.5973250920370692, + "learning_rate": 2.320272010554992e-06, + "loss": 0.281, + "step": 22936 + }, + { + "epoch": 1.0744835339860401, + "grad_norm": 0.5641792693632899, + "learning_rate": 2.3200828516464807e-06, + "loss": 0.2626, + "step": 22937 + }, + { + "epoch": 1.0745303789759686, + "grad_norm": 0.584552200352826, + "learning_rate": 2.3198936937733473e-06, + "loss": 0.2852, + "step": 22938 + }, + { + "epoch": 1.0745772239658968, + "grad_norm": 0.6151313201147729, + "learning_rate": 2.3197045369366803e-06, + "loss": 0.2851, + "step": 22939 + }, + { + "epoch": 1.074624068955825, + "grad_norm": 0.5762291482470037, + "learning_rate": 2.3195153811375685e-06, + "loss": 0.283, + "step": 22940 + }, + { + "epoch": 1.0746709139457535, + "grad_norm": 0.5769772136405428, + "learning_rate": 2.3193262263771004e-06, + "loss": 0.27, + "step": 22941 + }, + { + "epoch": 1.0747177589356818, + "grad_norm": 0.5464817261637206, + "learning_rate": 2.319137072656364e-06, + "loss": 0.2613, + "step": 22942 + }, + { + "epoch": 1.0747646039256102, + "grad_norm": 0.615565437761555, + "learning_rate": 2.318947919976448e-06, + "loss": 0.2818, + "step": 22943 + }, + { + "epoch": 1.0748114489155385, + "grad_norm": 0.5966239514191897, + "learning_rate": 2.3187587683384427e-06, + "loss": 0.2957, + "step": 22944 + }, + { + "epoch": 1.0748582939054667, + "grad_norm": 0.555626309756878, + "learning_rate": 2.3185696177434344e-06, + "loss": 0.2625, + "step": 22945 + }, + { + "epoch": 1.0749051388953952, + "grad_norm": 0.5737308013433471, + "learning_rate": 2.318380468192512e-06, + "loss": 0.2698, + "step": 22946 + }, + { + "epoch": 1.0749519838853234, + "grad_norm": 0.6252587966586931, + "learning_rate": 2.3181913196867638e-06, + "loss": 0.2728, + "step": 22947 + }, + { + "epoch": 1.0749988288752519, + "grad_norm": 0.5948573567860543, + "learning_rate": 2.31800217222728e-06, + "loss": 0.2769, + "step": 22948 + }, + { + "epoch": 1.0750456738651801, + "grad_norm": 0.6195309786082074, + "learning_rate": 2.317813025815147e-06, + "loss": 0.2907, + "step": 22949 + }, + { + "epoch": 1.0750925188551084, + "grad_norm": 0.6328712172775154, + "learning_rate": 2.3176238804514557e-06, + "loss": 0.2911, + "step": 22950 + }, + { + "epoch": 1.0751393638450368, + "grad_norm": 0.5765776762031032, + "learning_rate": 2.317434736137292e-06, + "loss": 0.2745, + "step": 22951 + }, + { + "epoch": 1.075186208834965, + "grad_norm": 0.5983823549625334, + "learning_rate": 2.3172455928737456e-06, + "loss": 0.2871, + "step": 22952 + }, + { + "epoch": 1.0752330538248933, + "grad_norm": 0.5983810527130717, + "learning_rate": 2.317056450661905e-06, + "loss": 0.2884, + "step": 22953 + }, + { + "epoch": 1.0752798988148218, + "grad_norm": 0.5474330369288033, + "learning_rate": 2.316867309502858e-06, + "loss": 0.266, + "step": 22954 + }, + { + "epoch": 1.07532674380475, + "grad_norm": 0.5690845877919903, + "learning_rate": 2.3166781693976948e-06, + "loss": 0.261, + "step": 22955 + }, + { + "epoch": 1.0753735887946785, + "grad_norm": 0.587835833086636, + "learning_rate": 2.3164890303475025e-06, + "loss": 0.2815, + "step": 22956 + }, + { + "epoch": 1.0754204337846067, + "grad_norm": 0.5558326372835392, + "learning_rate": 2.316299892353369e-06, + "loss": 0.2772, + "step": 22957 + }, + { + "epoch": 1.075467278774535, + "grad_norm": 0.5958069585439116, + "learning_rate": 2.3161107554163835e-06, + "loss": 0.2933, + "step": 22958 + }, + { + "epoch": 1.0755141237644634, + "grad_norm": 0.5701843791563281, + "learning_rate": 2.3159216195376346e-06, + "loss": 0.2732, + "step": 22959 + }, + { + "epoch": 1.0755609687543917, + "grad_norm": 0.5835414930151455, + "learning_rate": 2.31573248471821e-06, + "loss": 0.2719, + "step": 22960 + }, + { + "epoch": 1.0756078137443201, + "grad_norm": 0.5709282574313419, + "learning_rate": 2.315543350959199e-06, + "loss": 0.2762, + "step": 22961 + }, + { + "epoch": 1.0756546587342484, + "grad_norm": 0.6027354291383674, + "learning_rate": 2.3153542182616905e-06, + "loss": 0.2947, + "step": 22962 + }, + { + "epoch": 1.0757015037241766, + "grad_norm": 0.6057063642528907, + "learning_rate": 2.3151650866267714e-06, + "loss": 0.2726, + "step": 22963 + }, + { + "epoch": 1.075748348714105, + "grad_norm": 0.611903677441704, + "learning_rate": 2.3149759560555305e-06, + "loss": 0.2828, + "step": 22964 + }, + { + "epoch": 1.0757951937040333, + "grad_norm": 0.5605309562560611, + "learning_rate": 2.314786826549056e-06, + "loss": 0.2601, + "step": 22965 + }, + { + "epoch": 1.0758420386939618, + "grad_norm": 0.6277333248062333, + "learning_rate": 2.3145976981084377e-06, + "loss": 0.2789, + "step": 22966 + }, + { + "epoch": 1.07588888368389, + "grad_norm": 0.6521597547244459, + "learning_rate": 2.3144085707347627e-06, + "loss": 0.2937, + "step": 22967 + }, + { + "epoch": 1.0759357286738183, + "grad_norm": 0.6084272847341825, + "learning_rate": 2.3142194444291205e-06, + "loss": 0.3011, + "step": 22968 + }, + { + "epoch": 1.0759825736637467, + "grad_norm": 0.586423420695293, + "learning_rate": 2.3140303191925977e-06, + "loss": 0.2795, + "step": 22969 + }, + { + "epoch": 1.076029418653675, + "grad_norm": 0.6061845624467578, + "learning_rate": 2.313841195026284e-06, + "loss": 0.2684, + "step": 22970 + }, + { + "epoch": 1.0760762636436034, + "grad_norm": 0.606286952349337, + "learning_rate": 2.3136520719312673e-06, + "loss": 0.2833, + "step": 22971 + }, + { + "epoch": 1.0761231086335317, + "grad_norm": 0.577282656024982, + "learning_rate": 2.3134629499086357e-06, + "loss": 0.2783, + "step": 22972 + }, + { + "epoch": 1.07616995362346, + "grad_norm": 0.6196341433754627, + "learning_rate": 2.3132738289594786e-06, + "loss": 0.2856, + "step": 22973 + }, + { + "epoch": 1.0762167986133884, + "grad_norm": 0.5860491532382582, + "learning_rate": 2.3130847090848844e-06, + "loss": 0.2805, + "step": 22974 + }, + { + "epoch": 1.0762636436033166, + "grad_norm": 0.5755818041952826, + "learning_rate": 2.3128955902859395e-06, + "loss": 0.2805, + "step": 22975 + }, + { + "epoch": 1.0763104885932449, + "grad_norm": 0.6269145048829469, + "learning_rate": 2.3127064725637335e-06, + "loss": 0.2783, + "step": 22976 + }, + { + "epoch": 1.0763573335831733, + "grad_norm": 0.5734109172478953, + "learning_rate": 2.3125173559193554e-06, + "loss": 0.2732, + "step": 22977 + }, + { + "epoch": 1.0764041785731016, + "grad_norm": 0.5806290652525029, + "learning_rate": 2.312328240353892e-06, + "loss": 0.264, + "step": 22978 + }, + { + "epoch": 1.07645102356303, + "grad_norm": 0.5779874100998554, + "learning_rate": 2.312139125868432e-06, + "loss": 0.2872, + "step": 22979 + }, + { + "epoch": 1.0764978685529583, + "grad_norm": 0.5964846784553706, + "learning_rate": 2.311950012464065e-06, + "loss": 0.2753, + "step": 22980 + }, + { + "epoch": 1.0765447135428865, + "grad_norm": 0.6356690831793099, + "learning_rate": 2.3117609001418794e-06, + "loss": 0.2854, + "step": 22981 + }, + { + "epoch": 1.076591558532815, + "grad_norm": 0.5295017962104976, + "learning_rate": 2.311571788902961e-06, + "loss": 0.2635, + "step": 22982 + }, + { + "epoch": 1.0766384035227432, + "grad_norm": 0.5845377437907182, + "learning_rate": 2.3113826787484e-06, + "loss": 0.2677, + "step": 22983 + }, + { + "epoch": 1.0766852485126717, + "grad_norm": 0.5972275136711833, + "learning_rate": 2.3111935696792843e-06, + "loss": 0.2864, + "step": 22984 + }, + { + "epoch": 1.0767320935026, + "grad_norm": 0.6108760497865954, + "learning_rate": 2.311004461696702e-06, + "loss": 0.2869, + "step": 22985 + }, + { + "epoch": 1.0767789384925281, + "grad_norm": 0.5837909633650924, + "learning_rate": 2.3108153548017416e-06, + "loss": 0.2742, + "step": 22986 + }, + { + "epoch": 1.0768257834824566, + "grad_norm": 0.6081626676919316, + "learning_rate": 2.3106262489954914e-06, + "loss": 0.2652, + "step": 22987 + }, + { + "epoch": 1.0768726284723849, + "grad_norm": 0.6107377126168066, + "learning_rate": 2.3104371442790398e-06, + "loss": 0.2906, + "step": 22988 + }, + { + "epoch": 1.076919473462313, + "grad_norm": 0.5601185079872912, + "learning_rate": 2.310248040653474e-06, + "loss": 0.2744, + "step": 22989 + }, + { + "epoch": 1.0769663184522416, + "grad_norm": 0.5638715619910646, + "learning_rate": 2.310058938119884e-06, + "loss": 0.2793, + "step": 22990 + }, + { + "epoch": 1.0770131634421698, + "grad_norm": 0.6138193447397787, + "learning_rate": 2.3098698366793564e-06, + "loss": 0.2769, + "step": 22991 + }, + { + "epoch": 1.0770600084320983, + "grad_norm": 0.5733670601899139, + "learning_rate": 2.3096807363329798e-06, + "loss": 0.2746, + "step": 22992 + }, + { + "epoch": 1.0771068534220265, + "grad_norm": 0.6235652625312067, + "learning_rate": 2.309491637081844e-06, + "loss": 0.2915, + "step": 22993 + }, + { + "epoch": 1.0771536984119547, + "grad_norm": 0.5843832780497754, + "learning_rate": 2.309302538927035e-06, + "loss": 0.2549, + "step": 22994 + }, + { + "epoch": 1.0772005434018832, + "grad_norm": 0.5469649112694994, + "learning_rate": 2.309113441869642e-06, + "loss": 0.2637, + "step": 22995 + }, + { + "epoch": 1.0772473883918114, + "grad_norm": 0.5484527232827435, + "learning_rate": 2.308924345910753e-06, + "loss": 0.2583, + "step": 22996 + }, + { + "epoch": 1.07729423338174, + "grad_norm": 0.5796021740885438, + "learning_rate": 2.3087352510514565e-06, + "loss": 0.2774, + "step": 22997 + }, + { + "epoch": 1.0773410783716681, + "grad_norm": 0.575320617343916, + "learning_rate": 2.3085461572928405e-06, + "loss": 0.2783, + "step": 22998 + }, + { + "epoch": 1.0773879233615964, + "grad_norm": 0.5830011029764577, + "learning_rate": 2.308357064635994e-06, + "loss": 0.2682, + "step": 22999 + }, + { + "epoch": 1.0774347683515249, + "grad_norm": 0.6264790529278385, + "learning_rate": 2.3081679730820038e-06, + "loss": 0.2736, + "step": 23000 + }, + { + "epoch": 1.077481613341453, + "grad_norm": 0.6053444592064838, + "learning_rate": 2.3079788826319586e-06, + "loss": 0.271, + "step": 23001 + }, + { + "epoch": 1.0775284583313816, + "grad_norm": 0.5734889039879801, + "learning_rate": 2.307789793286947e-06, + "loss": 0.2739, + "step": 23002 + }, + { + "epoch": 1.0775753033213098, + "grad_norm": 0.6384871137053998, + "learning_rate": 2.307600705048056e-06, + "loss": 0.3003, + "step": 23003 + }, + { + "epoch": 1.077622148311238, + "grad_norm": 0.5698026636825559, + "learning_rate": 2.3074116179163755e-06, + "loss": 0.2642, + "step": 23004 + }, + { + "epoch": 1.0776689933011665, + "grad_norm": 0.5875784649652855, + "learning_rate": 2.307222531892993e-06, + "loss": 0.2704, + "step": 23005 + }, + { + "epoch": 1.0777158382910947, + "grad_norm": 0.5735661659037082, + "learning_rate": 2.307033446978996e-06, + "loss": 0.2831, + "step": 23006 + }, + { + "epoch": 1.0777626832810232, + "grad_norm": 0.5713122201207532, + "learning_rate": 2.3068443631754724e-06, + "loss": 0.2652, + "step": 23007 + }, + { + "epoch": 1.0778095282709514, + "grad_norm": 0.6311093990328717, + "learning_rate": 2.3066552804835117e-06, + "loss": 0.2794, + "step": 23008 + }, + { + "epoch": 1.0778563732608797, + "grad_norm": 0.5748713670471255, + "learning_rate": 2.3064661989042007e-06, + "loss": 0.2792, + "step": 23009 + }, + { + "epoch": 1.0779032182508081, + "grad_norm": 0.5755339265309455, + "learning_rate": 2.306277118438628e-06, + "loss": 0.2758, + "step": 23010 + }, + { + "epoch": 1.0779500632407364, + "grad_norm": 0.5980574724649625, + "learning_rate": 2.306088039087883e-06, + "loss": 0.2836, + "step": 23011 + }, + { + "epoch": 1.0779969082306646, + "grad_norm": 0.6010259718951806, + "learning_rate": 2.305898960853052e-06, + "loss": 0.2878, + "step": 23012 + }, + { + "epoch": 1.078043753220593, + "grad_norm": 0.5522203408918583, + "learning_rate": 2.305709883735223e-06, + "loss": 0.2643, + "step": 23013 + }, + { + "epoch": 1.0780905982105213, + "grad_norm": 0.6423531709758272, + "learning_rate": 2.3055208077354846e-06, + "loss": 0.2883, + "step": 23014 + }, + { + "epoch": 1.0781374432004498, + "grad_norm": 0.6041010639198272, + "learning_rate": 2.305331732854926e-06, + "loss": 0.278, + "step": 23015 + }, + { + "epoch": 1.078184288190378, + "grad_norm": 0.5343522358004842, + "learning_rate": 2.305142659094634e-06, + "loss": 0.2581, + "step": 23016 + }, + { + "epoch": 1.0782311331803063, + "grad_norm": 0.6240634144870024, + "learning_rate": 2.3049535864556977e-06, + "loss": 0.276, + "step": 23017 + }, + { + "epoch": 1.0782779781702347, + "grad_norm": 0.5808646324214497, + "learning_rate": 2.3047645149392034e-06, + "loss": 0.2647, + "step": 23018 + }, + { + "epoch": 1.078324823160163, + "grad_norm": 0.6190862486499321, + "learning_rate": 2.3045754445462407e-06, + "loss": 0.2757, + "step": 23019 + }, + { + "epoch": 1.0783716681500914, + "grad_norm": 0.5853035273168633, + "learning_rate": 2.304386375277897e-06, + "loss": 0.2694, + "step": 23020 + }, + { + "epoch": 1.0784185131400197, + "grad_norm": 0.6326232017952179, + "learning_rate": 2.3041973071352605e-06, + "loss": 0.2796, + "step": 23021 + }, + { + "epoch": 1.078465358129948, + "grad_norm": 0.5856221738332664, + "learning_rate": 2.30400824011942e-06, + "loss": 0.2712, + "step": 23022 + }, + { + "epoch": 1.0785122031198764, + "grad_norm": 0.6455303098031104, + "learning_rate": 2.303819174231463e-06, + "loss": 0.2895, + "step": 23023 + }, + { + "epoch": 1.0785590481098046, + "grad_norm": 0.586343931026166, + "learning_rate": 2.303630109472476e-06, + "loss": 0.2782, + "step": 23024 + }, + { + "epoch": 1.0786058930997329, + "grad_norm": 0.5882970026311667, + "learning_rate": 2.3034410458435486e-06, + "loss": 0.2849, + "step": 23025 + }, + { + "epoch": 1.0786527380896613, + "grad_norm": 0.6323028793790133, + "learning_rate": 2.3032519833457695e-06, + "loss": 0.2751, + "step": 23026 + }, + { + "epoch": 1.0786995830795896, + "grad_norm": 0.5998697144439341, + "learning_rate": 2.303062921980225e-06, + "loss": 0.282, + "step": 23027 + }, + { + "epoch": 1.078746428069518, + "grad_norm": 0.5596487918739182, + "learning_rate": 2.302873861748004e-06, + "loss": 0.2573, + "step": 23028 + }, + { + "epoch": 1.0787932730594463, + "grad_norm": 0.5932340337448239, + "learning_rate": 2.3026848026501953e-06, + "loss": 0.2646, + "step": 23029 + }, + { + "epoch": 1.0788401180493745, + "grad_norm": 0.628496609546451, + "learning_rate": 2.3024957446878854e-06, + "loss": 0.2863, + "step": 23030 + }, + { + "epoch": 1.078886963039303, + "grad_norm": 0.6112337885769541, + "learning_rate": 2.3023066878621624e-06, + "loss": 0.2771, + "step": 23031 + }, + { + "epoch": 1.0789338080292312, + "grad_norm": 0.5912993820372971, + "learning_rate": 2.3021176321741144e-06, + "loss": 0.2826, + "step": 23032 + }, + { + "epoch": 1.0789806530191597, + "grad_norm": 0.6072680316816931, + "learning_rate": 2.3019285776248305e-06, + "loss": 0.2811, + "step": 23033 + }, + { + "epoch": 1.079027498009088, + "grad_norm": 0.5833137126515201, + "learning_rate": 2.3017395242153977e-06, + "loss": 0.2779, + "step": 23034 + }, + { + "epoch": 1.0790743429990162, + "grad_norm": 0.6003327935848816, + "learning_rate": 2.3015504719469046e-06, + "loss": 0.2855, + "step": 23035 + }, + { + "epoch": 1.0791211879889446, + "grad_norm": 0.5621082344018921, + "learning_rate": 2.301361420820438e-06, + "loss": 0.277, + "step": 23036 + }, + { + "epoch": 1.0791680329788729, + "grad_norm": 0.6099690661706445, + "learning_rate": 2.3011723708370867e-06, + "loss": 0.2816, + "step": 23037 + }, + { + "epoch": 1.0792148779688013, + "grad_norm": 0.6040845870337384, + "learning_rate": 2.300983321997938e-06, + "loss": 0.2697, + "step": 23038 + }, + { + "epoch": 1.0792617229587296, + "grad_norm": 0.6319132204179848, + "learning_rate": 2.300794274304081e-06, + "loss": 0.2878, + "step": 23039 + }, + { + "epoch": 1.0793085679486578, + "grad_norm": 0.6115271051033687, + "learning_rate": 2.300605227756602e-06, + "loss": 0.2767, + "step": 23040 + }, + { + "epoch": 1.0793554129385863, + "grad_norm": 0.5437612696135034, + "learning_rate": 2.3004161823565908e-06, + "loss": 0.2692, + "step": 23041 + }, + { + "epoch": 1.0794022579285145, + "grad_norm": 0.6072504932268186, + "learning_rate": 2.3002271381051337e-06, + "loss": 0.2929, + "step": 23042 + }, + { + "epoch": 1.079449102918443, + "grad_norm": 0.5599285443822524, + "learning_rate": 2.3000380950033198e-06, + "loss": 0.2623, + "step": 23043 + }, + { + "epoch": 1.0794959479083712, + "grad_norm": 0.5893092450696567, + "learning_rate": 2.2998490530522356e-06, + "loss": 0.2789, + "step": 23044 + }, + { + "epoch": 1.0795427928982995, + "grad_norm": 0.5527553155347517, + "learning_rate": 2.2996600122529693e-06, + "loss": 0.2634, + "step": 23045 + }, + { + "epoch": 1.079589637888228, + "grad_norm": 0.5818344809717207, + "learning_rate": 2.2994709726066106e-06, + "loss": 0.2554, + "step": 23046 + }, + { + "epoch": 1.0796364828781562, + "grad_norm": 0.5793962213255307, + "learning_rate": 2.2992819341142453e-06, + "loss": 0.2624, + "step": 23047 + }, + { + "epoch": 1.0796833278680844, + "grad_norm": 0.5839538930196395, + "learning_rate": 2.2990928967769628e-06, + "loss": 0.2742, + "step": 23048 + }, + { + "epoch": 1.0797301728580129, + "grad_norm": 0.569756880863868, + "learning_rate": 2.2989038605958493e-06, + "loss": 0.2706, + "step": 23049 + }, + { + "epoch": 1.0797770178479411, + "grad_norm": 0.5758362708002898, + "learning_rate": 2.298714825571994e-06, + "loss": 0.2722, + "step": 23050 + }, + { + "epoch": 1.0798238628378696, + "grad_norm": 0.5752261784000604, + "learning_rate": 2.298525791706484e-06, + "loss": 0.277, + "step": 23051 + }, + { + "epoch": 1.0798707078277978, + "grad_norm": 0.5654128179015455, + "learning_rate": 2.298336759000407e-06, + "loss": 0.2819, + "step": 23052 + }, + { + "epoch": 1.079917552817726, + "grad_norm": 0.5536598596754732, + "learning_rate": 2.2981477274548525e-06, + "loss": 0.2678, + "step": 23053 + }, + { + "epoch": 1.0799643978076545, + "grad_norm": 0.6099461370742651, + "learning_rate": 2.297958697070907e-06, + "loss": 0.2719, + "step": 23054 + }, + { + "epoch": 1.0800112427975828, + "grad_norm": 0.6133846318164022, + "learning_rate": 2.2977696678496575e-06, + "loss": 0.2855, + "step": 23055 + }, + { + "epoch": 1.0800580877875112, + "grad_norm": 0.5901276326918451, + "learning_rate": 2.297580639792193e-06, + "loss": 0.2646, + "step": 23056 + }, + { + "epoch": 1.0801049327774395, + "grad_norm": 0.5669723838959455, + "learning_rate": 2.2973916128996014e-06, + "loss": 0.2723, + "step": 23057 + }, + { + "epoch": 1.0801517777673677, + "grad_norm": 0.5977105362309919, + "learning_rate": 2.2972025871729696e-06, + "loss": 0.2953, + "step": 23058 + }, + { + "epoch": 1.0801986227572962, + "grad_norm": 0.5455499078882746, + "learning_rate": 2.2970135626133857e-06, + "loss": 0.2709, + "step": 23059 + }, + { + "epoch": 1.0802454677472244, + "grad_norm": 0.5750637356356161, + "learning_rate": 2.296824539221939e-06, + "loss": 0.2678, + "step": 23060 + }, + { + "epoch": 1.0802923127371526, + "grad_norm": 0.5755562440871438, + "learning_rate": 2.2966355169997155e-06, + "loss": 0.2788, + "step": 23061 + }, + { + "epoch": 1.0803391577270811, + "grad_norm": 0.6369396579038902, + "learning_rate": 2.296446495947803e-06, + "loss": 0.2812, + "step": 23062 + }, + { + "epoch": 1.0803860027170094, + "grad_norm": 0.5749410349213078, + "learning_rate": 2.2962574760672894e-06, + "loss": 0.2698, + "step": 23063 + }, + { + "epoch": 1.0804328477069378, + "grad_norm": 0.5843937280925053, + "learning_rate": 2.296068457359264e-06, + "loss": 0.2704, + "step": 23064 + }, + { + "epoch": 1.080479692696866, + "grad_norm": 0.5653546481756792, + "learning_rate": 2.2958794398248123e-06, + "loss": 0.2474, + "step": 23065 + }, + { + "epoch": 1.0805265376867943, + "grad_norm": 0.6250139079743177, + "learning_rate": 2.2956904234650245e-06, + "loss": 0.2691, + "step": 23066 + }, + { + "epoch": 1.0805733826767228, + "grad_norm": 0.6069092160752795, + "learning_rate": 2.2955014082809857e-06, + "loss": 0.2932, + "step": 23067 + }, + { + "epoch": 1.080620227666651, + "grad_norm": 0.6283896547544704, + "learning_rate": 2.2953123942737853e-06, + "loss": 0.2748, + "step": 23068 + }, + { + "epoch": 1.0806670726565795, + "grad_norm": 0.6095438145790651, + "learning_rate": 2.2951233814445105e-06, + "loss": 0.2752, + "step": 23069 + }, + { + "epoch": 1.0807139176465077, + "grad_norm": 0.6552480368399042, + "learning_rate": 2.2949343697942487e-06, + "loss": 0.2962, + "step": 23070 + }, + { + "epoch": 1.080760762636436, + "grad_norm": 0.5603817381373508, + "learning_rate": 2.294745359324089e-06, + "loss": 0.2751, + "step": 23071 + }, + { + "epoch": 1.0808076076263644, + "grad_norm": 0.5797402922356419, + "learning_rate": 2.2945563500351187e-06, + "loss": 0.2642, + "step": 23072 + }, + { + "epoch": 1.0808544526162926, + "grad_norm": 0.6202632379022199, + "learning_rate": 2.2943673419284235e-06, + "loss": 0.2831, + "step": 23073 + }, + { + "epoch": 1.0809012976062211, + "grad_norm": 0.6144747707554865, + "learning_rate": 2.2941783350050926e-06, + "loss": 0.2875, + "step": 23074 + }, + { + "epoch": 1.0809481425961494, + "grad_norm": 0.6186677805865786, + "learning_rate": 2.2939893292662145e-06, + "loss": 0.2717, + "step": 23075 + }, + { + "epoch": 1.0809949875860776, + "grad_norm": 0.6467582543806303, + "learning_rate": 2.2938003247128755e-06, + "loss": 0.2887, + "step": 23076 + }, + { + "epoch": 1.081041832576006, + "grad_norm": 0.6418119237966409, + "learning_rate": 2.293611321346164e-06, + "loss": 0.3136, + "step": 23077 + }, + { + "epoch": 1.0810886775659343, + "grad_norm": 0.5679808626595355, + "learning_rate": 2.293422319167168e-06, + "loss": 0.2771, + "step": 23078 + }, + { + "epoch": 1.0811355225558628, + "grad_norm": 0.5555672436024809, + "learning_rate": 2.2932333181769743e-06, + "loss": 0.2634, + "step": 23079 + }, + { + "epoch": 1.081182367545791, + "grad_norm": 0.6643772171984162, + "learning_rate": 2.29304431837667e-06, + "loss": 0.289, + "step": 23080 + }, + { + "epoch": 1.0812292125357192, + "grad_norm": 0.6021418936999388, + "learning_rate": 2.292855319767345e-06, + "loss": 0.2826, + "step": 23081 + }, + { + "epoch": 1.0812760575256477, + "grad_norm": 0.6055097232805, + "learning_rate": 2.2926663223500843e-06, + "loss": 0.2872, + "step": 23082 + }, + { + "epoch": 1.081322902515576, + "grad_norm": 0.6135470279846608, + "learning_rate": 2.292477326125977e-06, + "loss": 0.2805, + "step": 23083 + }, + { + "epoch": 1.0813697475055042, + "grad_norm": 0.6258622289189963, + "learning_rate": 2.2922883310961117e-06, + "loss": 0.2951, + "step": 23084 + }, + { + "epoch": 1.0814165924954326, + "grad_norm": 0.588074091252959, + "learning_rate": 2.2920993372615744e-06, + "loss": 0.2749, + "step": 23085 + }, + { + "epoch": 1.081463437485361, + "grad_norm": 0.6211518134161206, + "learning_rate": 2.2919103446234525e-06, + "loss": 0.2959, + "step": 23086 + }, + { + "epoch": 1.0815102824752894, + "grad_norm": 0.6064259604895381, + "learning_rate": 2.291721353182834e-06, + "loss": 0.2884, + "step": 23087 + }, + { + "epoch": 1.0815571274652176, + "grad_norm": 0.5501229487732854, + "learning_rate": 2.291532362940808e-06, + "loss": 0.2795, + "step": 23088 + }, + { + "epoch": 1.0816039724551458, + "grad_norm": 0.5722005934529286, + "learning_rate": 2.29134337389846e-06, + "loss": 0.2663, + "step": 23089 + }, + { + "epoch": 1.0816508174450743, + "grad_norm": 0.6363238953350202, + "learning_rate": 2.2911543860568792e-06, + "loss": 0.2817, + "step": 23090 + }, + { + "epoch": 1.0816976624350025, + "grad_norm": 0.5767279451018856, + "learning_rate": 2.290965399417151e-06, + "loss": 0.2745, + "step": 23091 + }, + { + "epoch": 1.081744507424931, + "grad_norm": 0.5802656902652296, + "learning_rate": 2.290776413980366e-06, + "loss": 0.2737, + "step": 23092 + }, + { + "epoch": 1.0817913524148592, + "grad_norm": 0.5892169171260568, + "learning_rate": 2.290587429747609e-06, + "loss": 0.2808, + "step": 23093 + }, + { + "epoch": 1.0818381974047875, + "grad_norm": 0.5568780778902678, + "learning_rate": 2.2903984467199685e-06, + "loss": 0.2811, + "step": 23094 + }, + { + "epoch": 1.081885042394716, + "grad_norm": 0.5692693846760543, + "learning_rate": 2.2902094648985335e-06, + "loss": 0.2561, + "step": 23095 + }, + { + "epoch": 1.0819318873846442, + "grad_norm": 0.5478411613595724, + "learning_rate": 2.2900204842843903e-06, + "loss": 0.2672, + "step": 23096 + }, + { + "epoch": 1.0819787323745724, + "grad_norm": 0.6088898414170827, + "learning_rate": 2.2898315048786256e-06, + "loss": 0.2796, + "step": 23097 + }, + { + "epoch": 1.082025577364501, + "grad_norm": 0.6064477274117157, + "learning_rate": 2.2896425266823273e-06, + "loss": 0.2719, + "step": 23098 + }, + { + "epoch": 1.0820724223544291, + "grad_norm": 0.5689652192780031, + "learning_rate": 2.2894535496965845e-06, + "loss": 0.2665, + "step": 23099 + }, + { + "epoch": 1.0821192673443576, + "grad_norm": 0.5931480063278384, + "learning_rate": 2.289264573922483e-06, + "loss": 0.2805, + "step": 23100 + }, + { + "epoch": 1.0821661123342858, + "grad_norm": 0.5109450641205634, + "learning_rate": 2.2890755993611104e-06, + "loss": 0.254, + "step": 23101 + }, + { + "epoch": 1.082212957324214, + "grad_norm": 0.591781667898645, + "learning_rate": 2.288886626013556e-06, + "loss": 0.2937, + "step": 23102 + }, + { + "epoch": 1.0822598023141425, + "grad_norm": 0.6004169969508284, + "learning_rate": 2.288697653880906e-06, + "loss": 0.2843, + "step": 23103 + }, + { + "epoch": 1.0823066473040708, + "grad_norm": 0.5724296664634227, + "learning_rate": 2.2885086829642468e-06, + "loss": 0.2812, + "step": 23104 + }, + { + "epoch": 1.0823534922939992, + "grad_norm": 0.599439867064222, + "learning_rate": 2.2883197132646673e-06, + "loss": 0.2729, + "step": 23105 + }, + { + "epoch": 1.0824003372839275, + "grad_norm": 0.6250811845313164, + "learning_rate": 2.288130744783255e-06, + "loss": 0.2924, + "step": 23106 + }, + { + "epoch": 1.0824471822738557, + "grad_norm": 0.6198474486453691, + "learning_rate": 2.2879417775210963e-06, + "loss": 0.2932, + "step": 23107 + }, + { + "epoch": 1.0824940272637842, + "grad_norm": 0.5580521676197051, + "learning_rate": 2.287752811479281e-06, + "loss": 0.2728, + "step": 23108 + }, + { + "epoch": 1.0825408722537124, + "grad_norm": 0.5527612109490327, + "learning_rate": 2.2875638466588932e-06, + "loss": 0.2663, + "step": 23109 + }, + { + "epoch": 1.082587717243641, + "grad_norm": 0.5569639258391865, + "learning_rate": 2.2873748830610227e-06, + "loss": 0.2695, + "step": 23110 + }, + { + "epoch": 1.0826345622335691, + "grad_norm": 0.5770665461152288, + "learning_rate": 2.287185920686756e-06, + "loss": 0.2794, + "step": 23111 + }, + { + "epoch": 1.0826814072234974, + "grad_norm": 0.5915334504961819, + "learning_rate": 2.286996959537181e-06, + "loss": 0.2752, + "step": 23112 + }, + { + "epoch": 1.0827282522134258, + "grad_norm": 0.6100028540472417, + "learning_rate": 2.286807999613385e-06, + "loss": 0.2882, + "step": 23113 + }, + { + "epoch": 1.082775097203354, + "grad_norm": 0.5975356699985724, + "learning_rate": 2.2866190409164552e-06, + "loss": 0.2983, + "step": 23114 + }, + { + "epoch": 1.0828219421932825, + "grad_norm": 0.5540514149484407, + "learning_rate": 2.2864300834474805e-06, + "loss": 0.2728, + "step": 23115 + }, + { + "epoch": 1.0828687871832108, + "grad_norm": 0.5979178288810433, + "learning_rate": 2.2862411272075455e-06, + "loss": 0.2746, + "step": 23116 + }, + { + "epoch": 1.082915632173139, + "grad_norm": 0.5235636440530445, + "learning_rate": 2.2860521721977396e-06, + "loss": 0.2554, + "step": 23117 + }, + { + "epoch": 1.0829624771630675, + "grad_norm": 0.5894433806933594, + "learning_rate": 2.2858632184191494e-06, + "loss": 0.2819, + "step": 23118 + }, + { + "epoch": 1.0830093221529957, + "grad_norm": 0.5588914397784019, + "learning_rate": 2.2856742658728625e-06, + "loss": 0.2689, + "step": 23119 + }, + { + "epoch": 1.083056167142924, + "grad_norm": 0.5478471475924467, + "learning_rate": 2.2854853145599666e-06, + "loss": 0.2636, + "step": 23120 + }, + { + "epoch": 1.0831030121328524, + "grad_norm": 0.5877251267133016, + "learning_rate": 2.28529636448155e-06, + "loss": 0.2774, + "step": 23121 + }, + { + "epoch": 1.0831498571227807, + "grad_norm": 0.6203987926015502, + "learning_rate": 2.285107415638697e-06, + "loss": 0.2914, + "step": 23122 + }, + { + "epoch": 1.0831967021127091, + "grad_norm": 0.563805051792731, + "learning_rate": 2.2849184680324973e-06, + "loss": 0.2656, + "step": 23123 + }, + { + "epoch": 1.0832435471026374, + "grad_norm": 0.6045615675933667, + "learning_rate": 2.284729521664038e-06, + "loss": 0.2712, + "step": 23124 + }, + { + "epoch": 1.0832903920925656, + "grad_norm": 0.5879966351164496, + "learning_rate": 2.284540576534406e-06, + "loss": 0.2809, + "step": 23125 + }, + { + "epoch": 1.083337237082494, + "grad_norm": 0.5876541941579252, + "learning_rate": 2.284351632644689e-06, + "loss": 0.2739, + "step": 23126 + }, + { + "epoch": 1.0833840820724223, + "grad_norm": 0.6287749412029476, + "learning_rate": 2.284162689995974e-06, + "loss": 0.2781, + "step": 23127 + }, + { + "epoch": 1.0834309270623508, + "grad_norm": 0.6245148029394609, + "learning_rate": 2.283973748589349e-06, + "loss": 0.2869, + "step": 23128 + }, + { + "epoch": 1.083477772052279, + "grad_norm": 0.532935624965875, + "learning_rate": 2.2837848084259e-06, + "loss": 0.2639, + "step": 23129 + }, + { + "epoch": 1.0835246170422073, + "grad_norm": 0.6490299019056877, + "learning_rate": 2.283595869506716e-06, + "loss": 0.2956, + "step": 23130 + }, + { + "epoch": 1.0835714620321357, + "grad_norm": 0.5685137246451504, + "learning_rate": 2.2834069318328823e-06, + "loss": 0.2642, + "step": 23131 + }, + { + "epoch": 1.083618307022064, + "grad_norm": 0.5954768635385845, + "learning_rate": 2.2832179954054875e-06, + "loss": 0.2809, + "step": 23132 + }, + { + "epoch": 1.0836651520119922, + "grad_norm": 0.5997683979660599, + "learning_rate": 2.28302906022562e-06, + "loss": 0.2681, + "step": 23133 + }, + { + "epoch": 1.0837119970019207, + "grad_norm": 0.6057951953708876, + "learning_rate": 2.2828401262943643e-06, + "loss": 0.2989, + "step": 23134 + }, + { + "epoch": 1.083758841991849, + "grad_norm": 0.593471100970243, + "learning_rate": 2.2826511936128093e-06, + "loss": 0.2833, + "step": 23135 + }, + { + "epoch": 1.0838056869817774, + "grad_norm": 0.5811349540837462, + "learning_rate": 2.282462262182042e-06, + "loss": 0.2676, + "step": 23136 + }, + { + "epoch": 1.0838525319717056, + "grad_norm": 0.5604755602684992, + "learning_rate": 2.28227333200315e-06, + "loss": 0.2737, + "step": 23137 + }, + { + "epoch": 1.0838993769616339, + "grad_norm": 0.5658062476739111, + "learning_rate": 2.2820844030772198e-06, + "loss": 0.2696, + "step": 23138 + }, + { + "epoch": 1.0839462219515623, + "grad_norm": 0.5590768054519897, + "learning_rate": 2.2818954754053403e-06, + "loss": 0.267, + "step": 23139 + }, + { + "epoch": 1.0839930669414906, + "grad_norm": 0.5566603292902624, + "learning_rate": 2.2817065489885964e-06, + "loss": 0.2721, + "step": 23140 + }, + { + "epoch": 1.084039911931419, + "grad_norm": 0.5850745414644991, + "learning_rate": 2.2815176238280765e-06, + "loss": 0.279, + "step": 23141 + }, + { + "epoch": 1.0840867569213473, + "grad_norm": 0.5609606061477715, + "learning_rate": 2.2813286999248678e-06, + "loss": 0.2644, + "step": 23142 + }, + { + "epoch": 1.0841336019112755, + "grad_norm": 0.5743720959877953, + "learning_rate": 2.2811397772800577e-06, + "loss": 0.2909, + "step": 23143 + }, + { + "epoch": 1.084180446901204, + "grad_norm": 0.6193516284628126, + "learning_rate": 2.280950855894733e-06, + "loss": 0.2927, + "step": 23144 + }, + { + "epoch": 1.0842272918911322, + "grad_norm": 0.5731508136796122, + "learning_rate": 2.280761935769982e-06, + "loss": 0.2638, + "step": 23145 + }, + { + "epoch": 1.0842741368810607, + "grad_norm": 0.6416664123194844, + "learning_rate": 2.28057301690689e-06, + "loss": 0.2982, + "step": 23146 + }, + { + "epoch": 1.084320981870989, + "grad_norm": 0.5382504068415491, + "learning_rate": 2.280384099306545e-06, + "loss": 0.2723, + "step": 23147 + }, + { + "epoch": 1.0843678268609172, + "grad_norm": 0.5794315921171492, + "learning_rate": 2.2801951829700347e-06, + "loss": 0.2888, + "step": 23148 + }, + { + "epoch": 1.0844146718508456, + "grad_norm": 0.6516319612391036, + "learning_rate": 2.2800062678984455e-06, + "loss": 0.2874, + "step": 23149 + }, + { + "epoch": 1.0844615168407739, + "grad_norm": 0.5446099704503835, + "learning_rate": 2.2798173540928647e-06, + "loss": 0.2717, + "step": 23150 + }, + { + "epoch": 1.0845083618307023, + "grad_norm": 0.5539070680889819, + "learning_rate": 2.279628441554381e-06, + "loss": 0.2618, + "step": 23151 + }, + { + "epoch": 1.0845552068206306, + "grad_norm": 0.6150768510444559, + "learning_rate": 2.2794395302840798e-06, + "loss": 0.2759, + "step": 23152 + }, + { + "epoch": 1.0846020518105588, + "grad_norm": 0.5757635490809487, + "learning_rate": 2.279250620283048e-06, + "loss": 0.2757, + "step": 23153 + }, + { + "epoch": 1.0846488968004873, + "grad_norm": 0.5799537709686263, + "learning_rate": 2.2790617115523735e-06, + "loss": 0.2679, + "step": 23154 + }, + { + "epoch": 1.0846957417904155, + "grad_norm": 0.5508010485603423, + "learning_rate": 2.2788728040931435e-06, + "loss": 0.2782, + "step": 23155 + }, + { + "epoch": 1.0847425867803437, + "grad_norm": 0.6102860436582376, + "learning_rate": 2.2786838979064445e-06, + "loss": 0.2729, + "step": 23156 + }, + { + "epoch": 1.0847894317702722, + "grad_norm": 0.5816361757744204, + "learning_rate": 2.2784949929933654e-06, + "loss": 0.2612, + "step": 23157 + }, + { + "epoch": 1.0848362767602004, + "grad_norm": 0.5622111097250423, + "learning_rate": 2.2783060893549905e-06, + "loss": 0.2763, + "step": 23158 + }, + { + "epoch": 1.084883121750129, + "grad_norm": 0.5857838492072583, + "learning_rate": 2.278117186992409e-06, + "loss": 0.2787, + "step": 23159 + }, + { + "epoch": 1.0849299667400572, + "grad_norm": 0.6083314726171708, + "learning_rate": 2.277928285906707e-06, + "loss": 0.2843, + "step": 23160 + }, + { + "epoch": 1.0849768117299854, + "grad_norm": 0.5578585052256881, + "learning_rate": 2.2777393860989717e-06, + "loss": 0.2481, + "step": 23161 + }, + { + "epoch": 1.0850236567199139, + "grad_norm": 0.529570710459911, + "learning_rate": 2.277550487570291e-06, + "loss": 0.2655, + "step": 23162 + }, + { + "epoch": 1.085070501709842, + "grad_norm": 0.5623744936856884, + "learning_rate": 2.277361590321752e-06, + "loss": 0.2596, + "step": 23163 + }, + { + "epoch": 1.0851173466997706, + "grad_norm": 0.5852216678737571, + "learning_rate": 2.27717269435444e-06, + "loss": 0.2578, + "step": 23164 + }, + { + "epoch": 1.0851641916896988, + "grad_norm": 0.5624448579873863, + "learning_rate": 2.276983799669443e-06, + "loss": 0.2684, + "step": 23165 + }, + { + "epoch": 1.085211036679627, + "grad_norm": 0.6046546581725335, + "learning_rate": 2.2767949062678487e-06, + "loss": 0.2787, + "step": 23166 + }, + { + "epoch": 1.0852578816695555, + "grad_norm": 0.5590780436913952, + "learning_rate": 2.2766060141507426e-06, + "loss": 0.2653, + "step": 23167 + }, + { + "epoch": 1.0853047266594837, + "grad_norm": 0.577425038825283, + "learning_rate": 2.2764171233192136e-06, + "loss": 0.2727, + "step": 23168 + }, + { + "epoch": 1.085351571649412, + "grad_norm": 0.6526341239503467, + "learning_rate": 2.2762282337743484e-06, + "loss": 0.2865, + "step": 23169 + }, + { + "epoch": 1.0853984166393404, + "grad_norm": 0.5980887039416471, + "learning_rate": 2.2760393455172333e-06, + "loss": 0.2751, + "step": 23170 + }, + { + "epoch": 1.0854452616292687, + "grad_norm": 0.5952007858371864, + "learning_rate": 2.275850458548954e-06, + "loss": 0.2705, + "step": 23171 + }, + { + "epoch": 1.0854921066191972, + "grad_norm": 0.5573811182400291, + "learning_rate": 2.2756615728706e-06, + "loss": 0.2718, + "step": 23172 + }, + { + "epoch": 1.0855389516091254, + "grad_norm": 0.5849071699712741, + "learning_rate": 2.2754726884832574e-06, + "loss": 0.2966, + "step": 23173 + }, + { + "epoch": 1.0855857965990536, + "grad_norm": 0.5813903506417284, + "learning_rate": 2.2752838053880127e-06, + "loss": 0.2679, + "step": 23174 + }, + { + "epoch": 1.085632641588982, + "grad_norm": 0.5522333010546775, + "learning_rate": 2.2750949235859545e-06, + "loss": 0.2693, + "step": 23175 + }, + { + "epoch": 1.0856794865789103, + "grad_norm": 0.5848294748419166, + "learning_rate": 2.2749060430781668e-06, + "loss": 0.2732, + "step": 23176 + }, + { + "epoch": 1.0857263315688388, + "grad_norm": 0.6101966023200284, + "learning_rate": 2.2747171638657394e-06, + "loss": 0.274, + "step": 23177 + }, + { + "epoch": 1.085773176558767, + "grad_norm": 0.5711858270325161, + "learning_rate": 2.274528285949757e-06, + "loss": 0.2827, + "step": 23178 + }, + { + "epoch": 1.0858200215486953, + "grad_norm": 0.6011392634090371, + "learning_rate": 2.2743394093313086e-06, + "loss": 0.279, + "step": 23179 + }, + { + "epoch": 1.0858668665386237, + "grad_norm": 0.5683904401175828, + "learning_rate": 2.2741505340114795e-06, + "loss": 0.2661, + "step": 23180 + }, + { + "epoch": 1.085913711528552, + "grad_norm": 0.558640859490319, + "learning_rate": 2.273961659991358e-06, + "loss": 0.2773, + "step": 23181 + }, + { + "epoch": 1.0859605565184804, + "grad_norm": 0.6396363737524082, + "learning_rate": 2.27377278727203e-06, + "loss": 0.2788, + "step": 23182 + }, + { + "epoch": 1.0860074015084087, + "grad_norm": 0.5791537437469099, + "learning_rate": 2.273583915854583e-06, + "loss": 0.2677, + "step": 23183 + }, + { + "epoch": 1.086054246498337, + "grad_norm": 0.5667615807515035, + "learning_rate": 2.273395045740103e-06, + "loss": 0.2672, + "step": 23184 + }, + { + "epoch": 1.0861010914882654, + "grad_norm": 0.587854389130895, + "learning_rate": 2.2732061769296776e-06, + "loss": 0.2802, + "step": 23185 + }, + { + "epoch": 1.0861479364781936, + "grad_norm": 0.5758585959721519, + "learning_rate": 2.273017309424394e-06, + "loss": 0.2712, + "step": 23186 + }, + { + "epoch": 1.086194781468122, + "grad_norm": 0.6183190610527228, + "learning_rate": 2.2728284432253383e-06, + "loss": 0.2838, + "step": 23187 + }, + { + "epoch": 1.0862416264580503, + "grad_norm": 0.5808104875544984, + "learning_rate": 2.272639578333599e-06, + "loss": 0.2696, + "step": 23188 + }, + { + "epoch": 1.0862884714479786, + "grad_norm": 0.6166269217318243, + "learning_rate": 2.2724507147502607e-06, + "loss": 0.27, + "step": 23189 + }, + { + "epoch": 1.086335316437907, + "grad_norm": 0.5777718067456941, + "learning_rate": 2.272261852476412e-06, + "loss": 0.276, + "step": 23190 + }, + { + "epoch": 1.0863821614278353, + "grad_norm": 0.6161314020501145, + "learning_rate": 2.2720729915131383e-06, + "loss": 0.2851, + "step": 23191 + }, + { + "epoch": 1.0864290064177635, + "grad_norm": 0.6219121387270374, + "learning_rate": 2.2718841318615273e-06, + "loss": 0.2633, + "step": 23192 + }, + { + "epoch": 1.086475851407692, + "grad_norm": 0.6149243047692391, + "learning_rate": 2.2716952735226662e-06, + "loss": 0.2747, + "step": 23193 + }, + { + "epoch": 1.0865226963976202, + "grad_norm": 0.6179291875742479, + "learning_rate": 2.2715064164976426e-06, + "loss": 0.2752, + "step": 23194 + }, + { + "epoch": 1.0865695413875487, + "grad_norm": 0.576117425142799, + "learning_rate": 2.2713175607875402e-06, + "loss": 0.259, + "step": 23195 + }, + { + "epoch": 1.086616386377477, + "grad_norm": 0.5575680474706147, + "learning_rate": 2.2711287063934484e-06, + "loss": 0.2724, + "step": 23196 + }, + { + "epoch": 1.0866632313674052, + "grad_norm": 0.6383286844917377, + "learning_rate": 2.270939853316454e-06, + "loss": 0.2725, + "step": 23197 + }, + { + "epoch": 1.0867100763573336, + "grad_norm": 0.5983837968855406, + "learning_rate": 2.270751001557642e-06, + "loss": 0.2573, + "step": 23198 + }, + { + "epoch": 1.0867569213472619, + "grad_norm": 0.5472997729220979, + "learning_rate": 2.2705621511181005e-06, + "loss": 0.2659, + "step": 23199 + }, + { + "epoch": 1.0868037663371903, + "grad_norm": 0.5884933661448127, + "learning_rate": 2.2703733019989173e-06, + "loss": 0.2806, + "step": 23200 + }, + { + "epoch": 1.0868506113271186, + "grad_norm": 0.5665058904749777, + "learning_rate": 2.270184454201178e-06, + "loss": 0.2783, + "step": 23201 + }, + { + "epoch": 1.0868974563170468, + "grad_norm": 0.6186996182625647, + "learning_rate": 2.2699956077259684e-06, + "loss": 0.286, + "step": 23202 + }, + { + "epoch": 1.0869443013069753, + "grad_norm": 0.5749518265684967, + "learning_rate": 2.2698067625743765e-06, + "loss": 0.2787, + "step": 23203 + }, + { + "epoch": 1.0869911462969035, + "grad_norm": 0.5755507699428417, + "learning_rate": 2.269617918747489e-06, + "loss": 0.2766, + "step": 23204 + }, + { + "epoch": 1.0870379912868318, + "grad_norm": 0.6024212898220094, + "learning_rate": 2.2694290762463924e-06, + "loss": 0.2899, + "step": 23205 + }, + { + "epoch": 1.0870848362767602, + "grad_norm": 0.5736382592364133, + "learning_rate": 2.269240235072174e-06, + "loss": 0.2616, + "step": 23206 + }, + { + "epoch": 1.0871316812666885, + "grad_norm": 0.5795304639670681, + "learning_rate": 2.2690513952259193e-06, + "loss": 0.2756, + "step": 23207 + }, + { + "epoch": 1.087178526256617, + "grad_norm": 0.5849871886428712, + "learning_rate": 2.2688625567087164e-06, + "loss": 0.2869, + "step": 23208 + }, + { + "epoch": 1.0872253712465452, + "grad_norm": 0.59832241153938, + "learning_rate": 2.268673719521651e-06, + "loss": 0.2771, + "step": 23209 + }, + { + "epoch": 1.0872722162364734, + "grad_norm": 0.5525109998153039, + "learning_rate": 2.2684848836658097e-06, + "loss": 0.2875, + "step": 23210 + }, + { + "epoch": 1.0873190612264019, + "grad_norm": 0.6051254656158006, + "learning_rate": 2.2682960491422803e-06, + "loss": 0.2808, + "step": 23211 + }, + { + "epoch": 1.0873659062163301, + "grad_norm": 0.5972016030155382, + "learning_rate": 2.2681072159521497e-06, + "loss": 0.2738, + "step": 23212 + }, + { + "epoch": 1.0874127512062586, + "grad_norm": 0.6256392385178764, + "learning_rate": 2.2679183840965026e-06, + "loss": 0.2807, + "step": 23213 + }, + { + "epoch": 1.0874595961961868, + "grad_norm": 0.5964274192379878, + "learning_rate": 2.267729553576427e-06, + "loss": 0.2764, + "step": 23214 + }, + { + "epoch": 1.087506441186115, + "grad_norm": 0.6135697162540231, + "learning_rate": 2.26754072439301e-06, + "loss": 0.284, + "step": 23215 + }, + { + "epoch": 1.0875532861760435, + "grad_norm": 0.5496192208502402, + "learning_rate": 2.267351896547337e-06, + "loss": 0.2545, + "step": 23216 + }, + { + "epoch": 1.0876001311659718, + "grad_norm": 0.559210648549149, + "learning_rate": 2.267163070040496e-06, + "loss": 0.2741, + "step": 23217 + }, + { + "epoch": 1.0876469761559002, + "grad_norm": 0.5895307596271426, + "learning_rate": 2.266974244873573e-06, + "loss": 0.2845, + "step": 23218 + }, + { + "epoch": 1.0876938211458285, + "grad_norm": 0.6182923932055252, + "learning_rate": 2.266785421047655e-06, + "loss": 0.2923, + "step": 23219 + }, + { + "epoch": 1.0877406661357567, + "grad_norm": 0.5969669977584918, + "learning_rate": 2.266596598563827e-06, + "loss": 0.2875, + "step": 23220 + }, + { + "epoch": 1.0877875111256852, + "grad_norm": 0.5405268923454943, + "learning_rate": 2.266407777423178e-06, + "loss": 0.2501, + "step": 23221 + }, + { + "epoch": 1.0878343561156134, + "grad_norm": 0.6110288150776204, + "learning_rate": 2.2662189576267926e-06, + "loss": 0.2664, + "step": 23222 + }, + { + "epoch": 1.0878812011055419, + "grad_norm": 0.5780216639489837, + "learning_rate": 2.2660301391757586e-06, + "loss": 0.2842, + "step": 23223 + }, + { + "epoch": 1.0879280460954701, + "grad_norm": 0.6324965268556814, + "learning_rate": 2.265841322071163e-06, + "loss": 0.2807, + "step": 23224 + }, + { + "epoch": 1.0879748910853984, + "grad_norm": 0.584583579731069, + "learning_rate": 2.2656525063140917e-06, + "loss": 0.2699, + "step": 23225 + }, + { + "epoch": 1.0880217360753268, + "grad_norm": 0.585955251963359, + "learning_rate": 2.265463691905631e-06, + "loss": 0.2777, + "step": 23226 + }, + { + "epoch": 1.088068581065255, + "grad_norm": 0.6376701966087709, + "learning_rate": 2.2652748788468672e-06, + "loss": 0.2834, + "step": 23227 + }, + { + "epoch": 1.0881154260551833, + "grad_norm": 0.6095383341798826, + "learning_rate": 2.2650860671388885e-06, + "loss": 0.2793, + "step": 23228 + }, + { + "epoch": 1.0881622710451118, + "grad_norm": 0.5966148457263677, + "learning_rate": 2.2648972567827795e-06, + "loss": 0.3064, + "step": 23229 + }, + { + "epoch": 1.08820911603504, + "grad_norm": 0.6217843199925553, + "learning_rate": 2.264708447779629e-06, + "loss": 0.301, + "step": 23230 + }, + { + "epoch": 1.0882559610249685, + "grad_norm": 0.5947726455820517, + "learning_rate": 2.264519640130521e-06, + "loss": 0.2772, + "step": 23231 + }, + { + "epoch": 1.0883028060148967, + "grad_norm": 0.6020369937984877, + "learning_rate": 2.264330833836544e-06, + "loss": 0.2781, + "step": 23232 + }, + { + "epoch": 1.088349651004825, + "grad_norm": 0.5928144347046344, + "learning_rate": 2.2641420288987833e-06, + "loss": 0.2945, + "step": 23233 + }, + { + "epoch": 1.0883964959947534, + "grad_norm": 0.5939593556739627, + "learning_rate": 2.263953225318326e-06, + "loss": 0.2641, + "step": 23234 + }, + { + "epoch": 1.0884433409846817, + "grad_norm": 0.5963146998226493, + "learning_rate": 2.263764423096259e-06, + "loss": 0.2831, + "step": 23235 + }, + { + "epoch": 1.0884901859746101, + "grad_norm": 0.5509005251989003, + "learning_rate": 2.263575622233669e-06, + "loss": 0.2645, + "step": 23236 + }, + { + "epoch": 1.0885370309645384, + "grad_norm": 0.6437159658266277, + "learning_rate": 2.2633868227316404e-06, + "loss": 0.2795, + "step": 23237 + }, + { + "epoch": 1.0885838759544666, + "grad_norm": 0.5731787898538596, + "learning_rate": 2.2631980245912614e-06, + "loss": 0.2646, + "step": 23238 + }, + { + "epoch": 1.088630720944395, + "grad_norm": 0.5963894949431711, + "learning_rate": 2.263009227813619e-06, + "loss": 0.2779, + "step": 23239 + }, + { + "epoch": 1.0886775659343233, + "grad_norm": 0.6290043775715995, + "learning_rate": 2.262820432399798e-06, + "loss": 0.2953, + "step": 23240 + }, + { + "epoch": 1.0887244109242515, + "grad_norm": 0.5952455750230489, + "learning_rate": 2.2626316383508863e-06, + "loss": 0.2751, + "step": 23241 + }, + { + "epoch": 1.08877125591418, + "grad_norm": 0.5609827869574102, + "learning_rate": 2.2624428456679706e-06, + "loss": 0.2636, + "step": 23242 + }, + { + "epoch": 1.0888181009041082, + "grad_norm": 0.594510676978294, + "learning_rate": 2.262254054352136e-06, + "loss": 0.2663, + "step": 23243 + }, + { + "epoch": 1.0888649458940367, + "grad_norm": 0.5811546066504282, + "learning_rate": 2.2620652644044694e-06, + "loss": 0.2782, + "step": 23244 + }, + { + "epoch": 1.088911790883965, + "grad_norm": 0.5773576795770543, + "learning_rate": 2.261876475826057e-06, + "loss": 0.2516, + "step": 23245 + }, + { + "epoch": 1.0889586358738932, + "grad_norm": 0.5644392989940585, + "learning_rate": 2.2616876886179865e-06, + "loss": 0.2746, + "step": 23246 + }, + { + "epoch": 1.0890054808638217, + "grad_norm": 0.5653409803431176, + "learning_rate": 2.261498902781343e-06, + "loss": 0.2784, + "step": 23247 + }, + { + "epoch": 1.08905232585375, + "grad_norm": 0.6112448258439878, + "learning_rate": 2.2613101183172138e-06, + "loss": 0.2842, + "step": 23248 + }, + { + "epoch": 1.0890991708436784, + "grad_norm": 0.5701568163043079, + "learning_rate": 2.261121335226684e-06, + "loss": 0.2658, + "step": 23249 + }, + { + "epoch": 1.0891460158336066, + "grad_norm": 0.5410591733728632, + "learning_rate": 2.260932553510842e-06, + "loss": 0.2739, + "step": 23250 + }, + { + "epoch": 1.0891928608235348, + "grad_norm": 0.5967123239345866, + "learning_rate": 2.260743773170772e-06, + "loss": 0.2898, + "step": 23251 + }, + { + "epoch": 1.0892397058134633, + "grad_norm": 0.5818012791855501, + "learning_rate": 2.2605549942075616e-06, + "loss": 0.2778, + "step": 23252 + }, + { + "epoch": 1.0892865508033915, + "grad_norm": 0.5727693603018154, + "learning_rate": 2.2603662166222976e-06, + "loss": 0.2834, + "step": 23253 + }, + { + "epoch": 1.08933339579332, + "grad_norm": 0.6231190092686392, + "learning_rate": 2.260177440416065e-06, + "loss": 0.2817, + "step": 23254 + }, + { + "epoch": 1.0893802407832482, + "grad_norm": 0.6122424084179495, + "learning_rate": 2.2599886655899523e-06, + "loss": 0.2917, + "step": 23255 + }, + { + "epoch": 1.0894270857731765, + "grad_norm": 0.5826280769751409, + "learning_rate": 2.2597998921450434e-06, + "loss": 0.2723, + "step": 23256 + }, + { + "epoch": 1.089473930763105, + "grad_norm": 0.5739662107016821, + "learning_rate": 2.259611120082426e-06, + "loss": 0.2688, + "step": 23257 + }, + { + "epoch": 1.0895207757530332, + "grad_norm": 0.6573400087835186, + "learning_rate": 2.259422349403186e-06, + "loss": 0.3041, + "step": 23258 + }, + { + "epoch": 1.0895676207429617, + "grad_norm": 0.531085006263983, + "learning_rate": 2.25923358010841e-06, + "loss": 0.2524, + "step": 23259 + }, + { + "epoch": 1.08961446573289, + "grad_norm": 0.6038868777618225, + "learning_rate": 2.2590448121991847e-06, + "loss": 0.29, + "step": 23260 + }, + { + "epoch": 1.0896613107228181, + "grad_norm": 0.5698703997544798, + "learning_rate": 2.258856045676596e-06, + "loss": 0.2661, + "step": 23261 + }, + { + "epoch": 1.0897081557127466, + "grad_norm": 0.5919698285358465, + "learning_rate": 2.2586672805417296e-06, + "loss": 0.2893, + "step": 23262 + }, + { + "epoch": 1.0897550007026748, + "grad_norm": 0.5911798260138641, + "learning_rate": 2.2584785167956723e-06, + "loss": 0.2785, + "step": 23263 + }, + { + "epoch": 1.089801845692603, + "grad_norm": 0.6012645218826685, + "learning_rate": 2.258289754439511e-06, + "loss": 0.2978, + "step": 23264 + }, + { + "epoch": 1.0898486906825315, + "grad_norm": 0.543668574544501, + "learning_rate": 2.25810099347433e-06, + "loss": 0.249, + "step": 23265 + }, + { + "epoch": 1.0898955356724598, + "grad_norm": 0.5968529563657838, + "learning_rate": 2.2579122339012187e-06, + "loss": 0.2628, + "step": 23266 + }, + { + "epoch": 1.0899423806623882, + "grad_norm": 0.5376596121139327, + "learning_rate": 2.2577234757212614e-06, + "loss": 0.2536, + "step": 23267 + }, + { + "epoch": 1.0899892256523165, + "grad_norm": 0.6202898379850159, + "learning_rate": 2.2575347189355444e-06, + "loss": 0.2752, + "step": 23268 + }, + { + "epoch": 1.0900360706422447, + "grad_norm": 0.5983909020665111, + "learning_rate": 2.2573459635451533e-06, + "loss": 0.2862, + "step": 23269 + }, + { + "epoch": 1.0900829156321732, + "grad_norm": 0.571014922366865, + "learning_rate": 2.257157209551176e-06, + "loss": 0.2712, + "step": 23270 + }, + { + "epoch": 1.0901297606221014, + "grad_norm": 0.5641917450109107, + "learning_rate": 2.2569684569546973e-06, + "loss": 0.2714, + "step": 23271 + }, + { + "epoch": 1.09017660561203, + "grad_norm": 0.5829605891086327, + "learning_rate": 2.2567797057568046e-06, + "loss": 0.2859, + "step": 23272 + }, + { + "epoch": 1.0902234506019581, + "grad_norm": 0.5824810711582898, + "learning_rate": 2.2565909559585843e-06, + "loss": 0.2843, + "step": 23273 + }, + { + "epoch": 1.0902702955918864, + "grad_norm": 0.6287922834328624, + "learning_rate": 2.256402207561121e-06, + "loss": 0.2898, + "step": 23274 + }, + { + "epoch": 1.0903171405818148, + "grad_norm": 0.5693449660361256, + "learning_rate": 2.2562134605655014e-06, + "loss": 0.2674, + "step": 23275 + }, + { + "epoch": 1.090363985571743, + "grad_norm": 0.5801239689562596, + "learning_rate": 2.256024714972812e-06, + "loss": 0.2803, + "step": 23276 + }, + { + "epoch": 1.0904108305616713, + "grad_norm": 0.5692549106437046, + "learning_rate": 2.25583597078414e-06, + "loss": 0.2825, + "step": 23277 + }, + { + "epoch": 1.0904576755515998, + "grad_norm": 0.5709176745743394, + "learning_rate": 2.2556472280005695e-06, + "loss": 0.2787, + "step": 23278 + }, + { + "epoch": 1.090504520541528, + "grad_norm": 0.612619438827832, + "learning_rate": 2.2554584866231895e-06, + "loss": 0.2722, + "step": 23279 + }, + { + "epoch": 1.0905513655314565, + "grad_norm": 0.5556289900696939, + "learning_rate": 2.2552697466530827e-06, + "loss": 0.2733, + "step": 23280 + }, + { + "epoch": 1.0905982105213847, + "grad_norm": 0.6076058089446565, + "learning_rate": 2.255081008091338e-06, + "loss": 0.2806, + "step": 23281 + }, + { + "epoch": 1.090645055511313, + "grad_norm": 0.5723407214156851, + "learning_rate": 2.2548922709390397e-06, + "loss": 0.2828, + "step": 23282 + }, + { + "epoch": 1.0906919005012414, + "grad_norm": 0.5552434543704765, + "learning_rate": 2.2547035351972752e-06, + "loss": 0.2676, + "step": 23283 + }, + { + "epoch": 1.0907387454911697, + "grad_norm": 0.556144932035873, + "learning_rate": 2.25451480086713e-06, + "loss": 0.2645, + "step": 23284 + }, + { + "epoch": 1.0907855904810981, + "grad_norm": 0.587191470407383, + "learning_rate": 2.2543260679496916e-06, + "loss": 0.2779, + "step": 23285 + }, + { + "epoch": 1.0908324354710264, + "grad_norm": 0.5579964147586837, + "learning_rate": 2.254137336446044e-06, + "loss": 0.273, + "step": 23286 + }, + { + "epoch": 1.0908792804609546, + "grad_norm": 0.5652948602922195, + "learning_rate": 2.253948606357274e-06, + "loss": 0.2791, + "step": 23287 + }, + { + "epoch": 1.090926125450883, + "grad_norm": 0.6400091229814359, + "learning_rate": 2.2537598776844682e-06, + "loss": 0.2999, + "step": 23288 + }, + { + "epoch": 1.0909729704408113, + "grad_norm": 0.6116600009220226, + "learning_rate": 2.253571150428712e-06, + "loss": 0.2738, + "step": 23289 + }, + { + "epoch": 1.0910198154307398, + "grad_norm": 0.5925975360430306, + "learning_rate": 2.2533824245910923e-06, + "loss": 0.2898, + "step": 23290 + }, + { + "epoch": 1.091066660420668, + "grad_norm": 0.5768071141322408, + "learning_rate": 2.253193700172695e-06, + "loss": 0.285, + "step": 23291 + }, + { + "epoch": 1.0911135054105963, + "grad_norm": 0.6215142637807263, + "learning_rate": 2.253004977174606e-06, + "loss": 0.2901, + "step": 23292 + }, + { + "epoch": 1.0911603504005247, + "grad_norm": 0.5944422229271923, + "learning_rate": 2.252816255597911e-06, + "loss": 0.2757, + "step": 23293 + }, + { + "epoch": 1.091207195390453, + "grad_norm": 0.5985661911258224, + "learning_rate": 2.2526275354436956e-06, + "loss": 0.285, + "step": 23294 + }, + { + "epoch": 1.0912540403803814, + "grad_norm": 0.5833292859885979, + "learning_rate": 2.252438816713047e-06, + "loss": 0.2733, + "step": 23295 + }, + { + "epoch": 1.0913008853703097, + "grad_norm": 0.636809276553994, + "learning_rate": 2.2522500994070514e-06, + "loss": 0.2652, + "step": 23296 + }, + { + "epoch": 1.091347730360238, + "grad_norm": 0.595525188402677, + "learning_rate": 2.252061383526794e-06, + "loss": 0.2837, + "step": 23297 + }, + { + "epoch": 1.0913945753501664, + "grad_norm": 0.560594557044064, + "learning_rate": 2.251872669073361e-06, + "loss": 0.2706, + "step": 23298 + }, + { + "epoch": 1.0914414203400946, + "grad_norm": 0.6125236645097077, + "learning_rate": 2.251683956047838e-06, + "loss": 0.2939, + "step": 23299 + }, + { + "epoch": 1.0914882653300229, + "grad_norm": 0.5834086349500346, + "learning_rate": 2.2514952444513115e-06, + "loss": 0.2699, + "step": 23300 + }, + { + "epoch": 1.0915351103199513, + "grad_norm": 0.6100906395737162, + "learning_rate": 2.2513065342848675e-06, + "loss": 0.2926, + "step": 23301 + }, + { + "epoch": 1.0915819553098796, + "grad_norm": 0.6122211033645646, + "learning_rate": 2.2511178255495924e-06, + "loss": 0.2873, + "step": 23302 + }, + { + "epoch": 1.091628800299808, + "grad_norm": 0.6108698701767535, + "learning_rate": 2.2509291182465716e-06, + "loss": 0.2865, + "step": 23303 + }, + { + "epoch": 1.0916756452897363, + "grad_norm": 0.5908130049387634, + "learning_rate": 2.2507404123768907e-06, + "loss": 0.2775, + "step": 23304 + }, + { + "epoch": 1.0917224902796645, + "grad_norm": 0.5665904968130117, + "learning_rate": 2.2505517079416355e-06, + "loss": 0.2677, + "step": 23305 + }, + { + "epoch": 1.091769335269593, + "grad_norm": 0.5976023850251372, + "learning_rate": 2.2503630049418936e-06, + "loss": 0.2849, + "step": 23306 + }, + { + "epoch": 1.0918161802595212, + "grad_norm": 0.6184484527084833, + "learning_rate": 2.2501743033787492e-06, + "loss": 0.2858, + "step": 23307 + }, + { + "epoch": 1.0918630252494497, + "grad_norm": 0.5622252182643414, + "learning_rate": 2.2499856032532887e-06, + "loss": 0.2719, + "step": 23308 + }, + { + "epoch": 1.091909870239378, + "grad_norm": 0.5913861542186314, + "learning_rate": 2.2497969045665992e-06, + "loss": 0.271, + "step": 23309 + }, + { + "epoch": 1.0919567152293062, + "grad_norm": 0.5662969663115792, + "learning_rate": 2.2496082073197658e-06, + "loss": 0.264, + "step": 23310 + }, + { + "epoch": 1.0920035602192346, + "grad_norm": 0.5990390865658816, + "learning_rate": 2.249419511513873e-06, + "loss": 0.2753, + "step": 23311 + }, + { + "epoch": 1.0920504052091629, + "grad_norm": 0.5679782719223763, + "learning_rate": 2.249230817150008e-06, + "loss": 0.2608, + "step": 23312 + }, + { + "epoch": 1.092097250199091, + "grad_norm": 0.6330009911211547, + "learning_rate": 2.249042124229257e-06, + "loss": 0.2833, + "step": 23313 + }, + { + "epoch": 1.0921440951890196, + "grad_norm": 0.5993225039477821, + "learning_rate": 2.2488534327527055e-06, + "loss": 0.2733, + "step": 23314 + }, + { + "epoch": 1.0921909401789478, + "grad_norm": 0.5911140695541284, + "learning_rate": 2.24866474272144e-06, + "loss": 0.2694, + "step": 23315 + }, + { + "epoch": 1.0922377851688763, + "grad_norm": 0.6108100308368593, + "learning_rate": 2.248476054136545e-06, + "loss": 0.2838, + "step": 23316 + }, + { + "epoch": 1.0922846301588045, + "grad_norm": 0.5537552988366115, + "learning_rate": 2.2482873669991074e-06, + "loss": 0.2627, + "step": 23317 + }, + { + "epoch": 1.0923314751487327, + "grad_norm": 0.597964065431003, + "learning_rate": 2.248098681310212e-06, + "loss": 0.2722, + "step": 23318 + }, + { + "epoch": 1.0923783201386612, + "grad_norm": 0.596385996789428, + "learning_rate": 2.2479099970709465e-06, + "loss": 0.2681, + "step": 23319 + }, + { + "epoch": 1.0924251651285894, + "grad_norm": 0.6113090285980785, + "learning_rate": 2.2477213142823946e-06, + "loss": 0.2891, + "step": 23320 + }, + { + "epoch": 1.092472010118518, + "grad_norm": 0.5719633630944039, + "learning_rate": 2.247532632945643e-06, + "loss": 0.2558, + "step": 23321 + }, + { + "epoch": 1.0925188551084462, + "grad_norm": 0.646819284914732, + "learning_rate": 2.2473439530617794e-06, + "loss": 0.2789, + "step": 23322 + }, + { + "epoch": 1.0925657000983744, + "grad_norm": 0.5818668485904563, + "learning_rate": 2.2471552746318866e-06, + "loss": 0.2702, + "step": 23323 + }, + { + "epoch": 1.0926125450883029, + "grad_norm": 0.5741166323315208, + "learning_rate": 2.2469665976570516e-06, + "loss": 0.2733, + "step": 23324 + }, + { + "epoch": 1.092659390078231, + "grad_norm": 0.5852277095890734, + "learning_rate": 2.2467779221383603e-06, + "loss": 0.2669, + "step": 23325 + }, + { + "epoch": 1.0927062350681596, + "grad_norm": 0.5648007171572084, + "learning_rate": 2.2465892480768985e-06, + "loss": 0.2824, + "step": 23326 + }, + { + "epoch": 1.0927530800580878, + "grad_norm": 0.5680678055290185, + "learning_rate": 2.2464005754737513e-06, + "loss": 0.2684, + "step": 23327 + }, + { + "epoch": 1.092799925048016, + "grad_norm": 0.5422484665885388, + "learning_rate": 2.2462119043300063e-06, + "loss": 0.26, + "step": 23328 + }, + { + "epoch": 1.0928467700379445, + "grad_norm": 0.5351695077395979, + "learning_rate": 2.2460232346467473e-06, + "loss": 0.2583, + "step": 23329 + }, + { + "epoch": 1.0928936150278727, + "grad_norm": 0.5806422034052698, + "learning_rate": 2.245834566425061e-06, + "loss": 0.2772, + "step": 23330 + }, + { + "epoch": 1.0929404600178012, + "grad_norm": 0.564548943852026, + "learning_rate": 2.245645899666032e-06, + "loss": 0.2679, + "step": 23331 + }, + { + "epoch": 1.0929873050077294, + "grad_norm": 0.6045467836686695, + "learning_rate": 2.245457234370747e-06, + "loss": 0.2883, + "step": 23332 + }, + { + "epoch": 1.0930341499976577, + "grad_norm": 0.6777209471330194, + "learning_rate": 2.2452685705402927e-06, + "loss": 0.2877, + "step": 23333 + }, + { + "epoch": 1.0930809949875862, + "grad_norm": 0.6230374863733544, + "learning_rate": 2.245079908175754e-06, + "loss": 0.2719, + "step": 23334 + }, + { + "epoch": 1.0931278399775144, + "grad_norm": 0.5948943639370593, + "learning_rate": 2.2448912472782156e-06, + "loss": 0.2715, + "step": 23335 + }, + { + "epoch": 1.0931746849674426, + "grad_norm": 0.6721619089337338, + "learning_rate": 2.2447025878487635e-06, + "loss": 0.2973, + "step": 23336 + }, + { + "epoch": 1.093221529957371, + "grad_norm": 0.593962339763352, + "learning_rate": 2.2445139298884845e-06, + "loss": 0.2717, + "step": 23337 + }, + { + "epoch": 1.0932683749472993, + "grad_norm": 0.6296630029967342, + "learning_rate": 2.244325273398463e-06, + "loss": 0.2949, + "step": 23338 + }, + { + "epoch": 1.0933152199372278, + "grad_norm": 0.608243134266549, + "learning_rate": 2.2441366183797853e-06, + "loss": 0.2703, + "step": 23339 + }, + { + "epoch": 1.093362064927156, + "grad_norm": 0.6248896118091664, + "learning_rate": 2.243947964833538e-06, + "loss": 0.2761, + "step": 23340 + }, + { + "epoch": 1.0934089099170843, + "grad_norm": 0.5511302453870903, + "learning_rate": 2.243759312760806e-06, + "loss": 0.2633, + "step": 23341 + }, + { + "epoch": 1.0934557549070127, + "grad_norm": 0.6106617634581181, + "learning_rate": 2.2435706621626736e-06, + "loss": 0.2866, + "step": 23342 + }, + { + "epoch": 1.093502599896941, + "grad_norm": 0.536591061089686, + "learning_rate": 2.2433820130402276e-06, + "loss": 0.2787, + "step": 23343 + }, + { + "epoch": 1.0935494448868694, + "grad_norm": 0.637424770523656, + "learning_rate": 2.2431933653945542e-06, + "loss": 0.269, + "step": 23344 + }, + { + "epoch": 1.0935962898767977, + "grad_norm": 0.6699229550683986, + "learning_rate": 2.243004719226738e-06, + "loss": 0.2867, + "step": 23345 + }, + { + "epoch": 1.093643134866726, + "grad_norm": 0.6429632614376668, + "learning_rate": 2.242816074537866e-06, + "loss": 0.285, + "step": 23346 + }, + { + "epoch": 1.0936899798566544, + "grad_norm": 0.6415343888465145, + "learning_rate": 2.242627431329022e-06, + "loss": 0.2866, + "step": 23347 + }, + { + "epoch": 1.0937368248465826, + "grad_norm": 0.6206510504852488, + "learning_rate": 2.242438789601293e-06, + "loss": 0.2732, + "step": 23348 + }, + { + "epoch": 1.0937836698365109, + "grad_norm": 0.5714084615878386, + "learning_rate": 2.242250149355763e-06, + "loss": 0.2825, + "step": 23349 + }, + { + "epoch": 1.0938305148264393, + "grad_norm": 0.6015949237473541, + "learning_rate": 2.242061510593519e-06, + "loss": 0.2764, + "step": 23350 + }, + { + "epoch": 1.0938773598163676, + "grad_norm": 0.5951733771974599, + "learning_rate": 2.2418728733156467e-06, + "loss": 0.2796, + "step": 23351 + }, + { + "epoch": 1.093924204806296, + "grad_norm": 0.6094916962596709, + "learning_rate": 2.2416842375232317e-06, + "loss": 0.286, + "step": 23352 + }, + { + "epoch": 1.0939710497962243, + "grad_norm": 0.5785535858398062, + "learning_rate": 2.241495603217358e-06, + "loss": 0.276, + "step": 23353 + }, + { + "epoch": 1.0940178947861525, + "grad_norm": 0.6128423034624041, + "learning_rate": 2.2413069703991115e-06, + "loss": 0.2843, + "step": 23354 + }, + { + "epoch": 1.094064739776081, + "grad_norm": 0.6037852308534262, + "learning_rate": 2.2411183390695797e-06, + "loss": 0.2872, + "step": 23355 + }, + { + "epoch": 1.0941115847660092, + "grad_norm": 0.5893143633821994, + "learning_rate": 2.240929709229846e-06, + "loss": 0.2628, + "step": 23356 + }, + { + "epoch": 1.0941584297559377, + "grad_norm": 0.5914984105964101, + "learning_rate": 2.2407410808809968e-06, + "loss": 0.2765, + "step": 23357 + }, + { + "epoch": 1.094205274745866, + "grad_norm": 0.6204004688254493, + "learning_rate": 2.240552454024118e-06, + "loss": 0.2715, + "step": 23358 + }, + { + "epoch": 1.0942521197357942, + "grad_norm": 0.6073944138287845, + "learning_rate": 2.240363828660294e-06, + "loss": 0.3141, + "step": 23359 + }, + { + "epoch": 1.0942989647257226, + "grad_norm": 0.5795015499544665, + "learning_rate": 2.2401752047906112e-06, + "loss": 0.2746, + "step": 23360 + }, + { + "epoch": 1.0943458097156509, + "grad_norm": 0.6060466948103775, + "learning_rate": 2.2399865824161553e-06, + "loss": 0.2818, + "step": 23361 + }, + { + "epoch": 1.0943926547055793, + "grad_norm": 0.5701359944446384, + "learning_rate": 2.23979796153801e-06, + "loss": 0.2722, + "step": 23362 + }, + { + "epoch": 1.0944394996955076, + "grad_norm": 0.5937925084042711, + "learning_rate": 2.2396093421572626e-06, + "loss": 0.2715, + "step": 23363 + }, + { + "epoch": 1.0944863446854358, + "grad_norm": 0.5508637388062445, + "learning_rate": 2.239420724274999e-06, + "loss": 0.2628, + "step": 23364 + }, + { + "epoch": 1.0945331896753643, + "grad_norm": 0.5367501223076899, + "learning_rate": 2.239232107892303e-06, + "loss": 0.2581, + "step": 23365 + }, + { + "epoch": 1.0945800346652925, + "grad_norm": 0.6080688192505314, + "learning_rate": 2.23904349301026e-06, + "loss": 0.2875, + "step": 23366 + }, + { + "epoch": 1.094626879655221, + "grad_norm": 0.6242982774858478, + "learning_rate": 2.2388548796299564e-06, + "loss": 0.2848, + "step": 23367 + }, + { + "epoch": 1.0946737246451492, + "grad_norm": 0.6257591993284649, + "learning_rate": 2.238666267752478e-06, + "loss": 0.2875, + "step": 23368 + }, + { + "epoch": 1.0947205696350775, + "grad_norm": 0.6026110255879751, + "learning_rate": 2.2384776573789087e-06, + "loss": 0.282, + "step": 23369 + }, + { + "epoch": 1.094767414625006, + "grad_norm": 0.5486825733424154, + "learning_rate": 2.2382890485103356e-06, + "loss": 0.259, + "step": 23370 + }, + { + "epoch": 1.0948142596149342, + "grad_norm": 0.6249947916700328, + "learning_rate": 2.2381004411478424e-06, + "loss": 0.2933, + "step": 23371 + }, + { + "epoch": 1.0948611046048624, + "grad_norm": 0.6275704480109551, + "learning_rate": 2.237911835292516e-06, + "loss": 0.2802, + "step": 23372 + }, + { + "epoch": 1.0949079495947909, + "grad_norm": 0.6097337095894572, + "learning_rate": 2.2377232309454404e-06, + "loss": 0.2841, + "step": 23373 + }, + { + "epoch": 1.0949547945847191, + "grad_norm": 0.5795214147941877, + "learning_rate": 2.237534628107702e-06, + "loss": 0.2625, + "step": 23374 + }, + { + "epoch": 1.0950016395746476, + "grad_norm": 0.5788275693892809, + "learning_rate": 2.237346026780386e-06, + "loss": 0.2655, + "step": 23375 + }, + { + "epoch": 1.0950484845645758, + "grad_norm": 0.5620517702328047, + "learning_rate": 2.2371574269645786e-06, + "loss": 0.2662, + "step": 23376 + }, + { + "epoch": 1.095095329554504, + "grad_norm": 0.596781034769136, + "learning_rate": 2.2369688286613624e-06, + "loss": 0.2783, + "step": 23377 + }, + { + "epoch": 1.0951421745444325, + "grad_norm": 0.5116413138299225, + "learning_rate": 2.236780231871825e-06, + "loss": 0.26, + "step": 23378 + }, + { + "epoch": 1.0951890195343608, + "grad_norm": 0.6044411655606302, + "learning_rate": 2.2365916365970516e-06, + "loss": 0.2912, + "step": 23379 + }, + { + "epoch": 1.0952358645242892, + "grad_norm": 0.5662646054131406, + "learning_rate": 2.2364030428381265e-06, + "loss": 0.2713, + "step": 23380 + }, + { + "epoch": 1.0952827095142175, + "grad_norm": 0.573888657372331, + "learning_rate": 2.236214450596136e-06, + "loss": 0.2698, + "step": 23381 + }, + { + "epoch": 1.0953295545041457, + "grad_norm": 0.5589829711797426, + "learning_rate": 2.236025859872166e-06, + "loss": 0.2586, + "step": 23382 + }, + { + "epoch": 1.0953763994940742, + "grad_norm": 0.6437676768217242, + "learning_rate": 2.2358372706673e-06, + "loss": 0.2796, + "step": 23383 + }, + { + "epoch": 1.0954232444840024, + "grad_norm": 0.5662586928409309, + "learning_rate": 2.2356486829826235e-06, + "loss": 0.2679, + "step": 23384 + }, + { + "epoch": 1.0954700894739307, + "grad_norm": 0.6115179664976512, + "learning_rate": 2.2354600968192224e-06, + "loss": 0.2881, + "step": 23385 + }, + { + "epoch": 1.0955169344638591, + "grad_norm": 0.6005131875438223, + "learning_rate": 2.2352715121781827e-06, + "loss": 0.2824, + "step": 23386 + }, + { + "epoch": 1.0955637794537874, + "grad_norm": 0.5743545630479286, + "learning_rate": 2.2350829290605885e-06, + "loss": 0.265, + "step": 23387 + }, + { + "epoch": 1.0956106244437158, + "grad_norm": 0.6130498537633153, + "learning_rate": 2.2348943474675253e-06, + "loss": 0.2876, + "step": 23388 + }, + { + "epoch": 1.095657469433644, + "grad_norm": 0.5635826533440671, + "learning_rate": 2.2347057674000793e-06, + "loss": 0.287, + "step": 23389 + }, + { + "epoch": 1.0957043144235723, + "grad_norm": 0.5723779435907074, + "learning_rate": 2.234517188859335e-06, + "loss": 0.2643, + "step": 23390 + }, + { + "epoch": 1.0957511594135008, + "grad_norm": 0.5812793492692275, + "learning_rate": 2.2343286118463766e-06, + "loss": 0.2552, + "step": 23391 + }, + { + "epoch": 1.095798004403429, + "grad_norm": 0.572299917076993, + "learning_rate": 2.2341400363622903e-06, + "loss": 0.2663, + "step": 23392 + }, + { + "epoch": 1.0958448493933575, + "grad_norm": 0.5632282170040576, + "learning_rate": 2.233951462408162e-06, + "loss": 0.2693, + "step": 23393 + }, + { + "epoch": 1.0958916943832857, + "grad_norm": 0.6277633836332397, + "learning_rate": 2.2337628899850752e-06, + "loss": 0.2727, + "step": 23394 + }, + { + "epoch": 1.095938539373214, + "grad_norm": 0.6140808731416875, + "learning_rate": 2.2335743190941174e-06, + "loss": 0.2718, + "step": 23395 + }, + { + "epoch": 1.0959853843631424, + "grad_norm": 0.6290965695279382, + "learning_rate": 2.2333857497363714e-06, + "loss": 0.2925, + "step": 23396 + }, + { + "epoch": 1.0960322293530707, + "grad_norm": 0.6207993817059869, + "learning_rate": 2.233197181912924e-06, + "loss": 0.2698, + "step": 23397 + }, + { + "epoch": 1.0960790743429991, + "grad_norm": 0.572094337397942, + "learning_rate": 2.2330086156248592e-06, + "loss": 0.2579, + "step": 23398 + }, + { + "epoch": 1.0961259193329274, + "grad_norm": 0.5481193409399355, + "learning_rate": 2.2328200508732627e-06, + "loss": 0.2715, + "step": 23399 + }, + { + "epoch": 1.0961727643228556, + "grad_norm": 0.5926659387757477, + "learning_rate": 2.23263148765922e-06, + "loss": 0.2631, + "step": 23400 + }, + { + "epoch": 1.096219609312784, + "grad_norm": 0.5888537010745734, + "learning_rate": 2.2324429259838167e-06, + "loss": 0.2593, + "step": 23401 + }, + { + "epoch": 1.0962664543027123, + "grad_norm": 0.5825186358517143, + "learning_rate": 2.232254365848136e-06, + "loss": 0.268, + "step": 23402 + }, + { + "epoch": 1.0963132992926408, + "grad_norm": 0.5796628784420053, + "learning_rate": 2.232065807253264e-06, + "loss": 0.2785, + "step": 23403 + }, + { + "epoch": 1.096360144282569, + "grad_norm": 0.5863590700548408, + "learning_rate": 2.231877250200286e-06, + "loss": 0.2759, + "step": 23404 + }, + { + "epoch": 1.0964069892724972, + "grad_norm": 0.5623501997708118, + "learning_rate": 2.231688694690287e-06, + "loss": 0.2553, + "step": 23405 + }, + { + "epoch": 1.0964538342624257, + "grad_norm": 0.5580124816667494, + "learning_rate": 2.231500140724353e-06, + "loss": 0.2563, + "step": 23406 + }, + { + "epoch": 1.096500679252354, + "grad_norm": 0.5658182779453127, + "learning_rate": 2.2313115883035675e-06, + "loss": 0.2708, + "step": 23407 + }, + { + "epoch": 1.0965475242422822, + "grad_norm": 0.5870121492909459, + "learning_rate": 2.231123037429017e-06, + "loss": 0.2652, + "step": 23408 + }, + { + "epoch": 1.0965943692322107, + "grad_norm": 0.6125710445061388, + "learning_rate": 2.230934488101785e-06, + "loss": 0.2815, + "step": 23409 + }, + { + "epoch": 1.096641214222139, + "grad_norm": 0.6218317344228292, + "learning_rate": 2.230745940322958e-06, + "loss": 0.2936, + "step": 23410 + }, + { + "epoch": 1.0966880592120674, + "grad_norm": 0.5888740586214471, + "learning_rate": 2.2305573940936197e-06, + "loss": 0.2744, + "step": 23411 + }, + { + "epoch": 1.0967349042019956, + "grad_norm": 0.6197520399185231, + "learning_rate": 2.230368849414856e-06, + "loss": 0.2935, + "step": 23412 + }, + { + "epoch": 1.0967817491919238, + "grad_norm": 0.5456733657718588, + "learning_rate": 2.2301803062877525e-06, + "loss": 0.2682, + "step": 23413 + }, + { + "epoch": 1.0968285941818523, + "grad_norm": 0.5934303118541078, + "learning_rate": 2.2299917647133936e-06, + "loss": 0.2809, + "step": 23414 + }, + { + "epoch": 1.0968754391717805, + "grad_norm": 0.6051218741507297, + "learning_rate": 2.229803224692863e-06, + "loss": 0.2836, + "step": 23415 + }, + { + "epoch": 1.096922284161709, + "grad_norm": 0.5652852241812854, + "learning_rate": 2.2296146862272474e-06, + "loss": 0.2633, + "step": 23416 + }, + { + "epoch": 1.0969691291516372, + "grad_norm": 0.5385596768863542, + "learning_rate": 2.229426149317632e-06, + "loss": 0.2533, + "step": 23417 + }, + { + "epoch": 1.0970159741415655, + "grad_norm": 0.5587725041841631, + "learning_rate": 2.2292376139651e-06, + "loss": 0.2739, + "step": 23418 + }, + { + "epoch": 1.097062819131494, + "grad_norm": 0.5836546184519115, + "learning_rate": 2.2290490801707393e-06, + "loss": 0.2737, + "step": 23419 + }, + { + "epoch": 1.0971096641214222, + "grad_norm": 0.5437756589566882, + "learning_rate": 2.2288605479356314e-06, + "loss": 0.2708, + "step": 23420 + }, + { + "epoch": 1.0971565091113504, + "grad_norm": 0.5783092670946852, + "learning_rate": 2.2286720172608637e-06, + "loss": 0.265, + "step": 23421 + }, + { + "epoch": 1.097203354101279, + "grad_norm": 0.5490199656314165, + "learning_rate": 2.22848348814752e-06, + "loss": 0.2808, + "step": 23422 + }, + { + "epoch": 1.0972501990912071, + "grad_norm": 0.5718555367978909, + "learning_rate": 2.228294960596685e-06, + "loss": 0.2808, + "step": 23423 + }, + { + "epoch": 1.0972970440811356, + "grad_norm": 0.5538453573918481, + "learning_rate": 2.228106434609445e-06, + "loss": 0.252, + "step": 23424 + }, + { + "epoch": 1.0973438890710638, + "grad_norm": 0.5525514032130364, + "learning_rate": 2.227917910186885e-06, + "loss": 0.2632, + "step": 23425 + }, + { + "epoch": 1.097390734060992, + "grad_norm": 0.5526641513786175, + "learning_rate": 2.2277293873300877e-06, + "loss": 0.2785, + "step": 23426 + }, + { + "epoch": 1.0974375790509205, + "grad_norm": 0.6217366772287731, + "learning_rate": 2.227540866040139e-06, + "loss": 0.2638, + "step": 23427 + }, + { + "epoch": 1.0974844240408488, + "grad_norm": 0.5655886839632057, + "learning_rate": 2.227352346318125e-06, + "loss": 0.2685, + "step": 23428 + }, + { + "epoch": 1.0975312690307772, + "grad_norm": 0.5931416685592072, + "learning_rate": 2.227163828165129e-06, + "loss": 0.2918, + "step": 23429 + }, + { + "epoch": 1.0975781140207055, + "grad_norm": 0.6291098105319162, + "learning_rate": 2.2269753115822373e-06, + "loss": 0.2782, + "step": 23430 + }, + { + "epoch": 1.0976249590106337, + "grad_norm": 0.5676405778268794, + "learning_rate": 2.2267867965705343e-06, + "loss": 0.2792, + "step": 23431 + }, + { + "epoch": 1.0976718040005622, + "grad_norm": 0.5542034433510545, + "learning_rate": 2.2265982831311044e-06, + "loss": 0.2688, + "step": 23432 + }, + { + "epoch": 1.0977186489904904, + "grad_norm": 0.6312595904555173, + "learning_rate": 2.226409771265032e-06, + "loss": 0.2981, + "step": 23433 + }, + { + "epoch": 1.097765493980419, + "grad_norm": 0.6015947449715058, + "learning_rate": 2.226221260973403e-06, + "loss": 0.2879, + "step": 23434 + }, + { + "epoch": 1.0978123389703471, + "grad_norm": 0.5876674477612623, + "learning_rate": 2.226032752257302e-06, + "loss": 0.2881, + "step": 23435 + }, + { + "epoch": 1.0978591839602754, + "grad_norm": 0.5754871925662587, + "learning_rate": 2.225844245117813e-06, + "loss": 0.2768, + "step": 23436 + }, + { + "epoch": 1.0979060289502038, + "grad_norm": 0.5962133008516396, + "learning_rate": 2.225655739556023e-06, + "loss": 0.2747, + "step": 23437 + }, + { + "epoch": 1.097952873940132, + "grad_norm": 0.6192524792392972, + "learning_rate": 2.225467235573014e-06, + "loss": 0.2909, + "step": 23438 + }, + { + "epoch": 1.0979997189300605, + "grad_norm": 0.5554108697904779, + "learning_rate": 2.2252787331698724e-06, + "loss": 0.2759, + "step": 23439 + }, + { + "epoch": 1.0980465639199888, + "grad_norm": 0.6300454832573096, + "learning_rate": 2.2250902323476823e-06, + "loss": 0.289, + "step": 23440 + }, + { + "epoch": 1.098093408909917, + "grad_norm": 0.5958898925289007, + "learning_rate": 2.2249017331075286e-06, + "loss": 0.2634, + "step": 23441 + }, + { + "epoch": 1.0981402538998455, + "grad_norm": 0.555322705484872, + "learning_rate": 2.224713235450497e-06, + "loss": 0.266, + "step": 23442 + }, + { + "epoch": 1.0981870988897737, + "grad_norm": 0.5571129422509883, + "learning_rate": 2.224524739377672e-06, + "loss": 0.2779, + "step": 23443 + }, + { + "epoch": 1.098233943879702, + "grad_norm": 0.5723050635972097, + "learning_rate": 2.224336244890137e-06, + "loss": 0.2572, + "step": 23444 + }, + { + "epoch": 1.0982807888696304, + "grad_norm": 0.6005531618949355, + "learning_rate": 2.224147751988977e-06, + "loss": 0.2793, + "step": 23445 + }, + { + "epoch": 1.0983276338595587, + "grad_norm": 0.5442088133241345, + "learning_rate": 2.223959260675279e-06, + "loss": 0.2551, + "step": 23446 + }, + { + "epoch": 1.0983744788494871, + "grad_norm": 0.5705368321069637, + "learning_rate": 2.2237707709501247e-06, + "loss": 0.2686, + "step": 23447 + }, + { + "epoch": 1.0984213238394154, + "grad_norm": 0.5572803464374874, + "learning_rate": 2.2235822828146e-06, + "loss": 0.2753, + "step": 23448 + }, + { + "epoch": 1.0984681688293436, + "grad_norm": 0.5891014233598709, + "learning_rate": 2.2233937962697915e-06, + "loss": 0.284, + "step": 23449 + }, + { + "epoch": 1.098515013819272, + "grad_norm": 0.5664947726259898, + "learning_rate": 2.223205311316782e-06, + "loss": 0.2689, + "step": 23450 + }, + { + "epoch": 1.0985618588092003, + "grad_norm": 0.5431012043567637, + "learning_rate": 2.223016827956655e-06, + "loss": 0.265, + "step": 23451 + }, + { + "epoch": 1.0986087037991288, + "grad_norm": 0.5921350250228609, + "learning_rate": 2.2228283461904963e-06, + "loss": 0.292, + "step": 23452 + }, + { + "epoch": 1.098655548789057, + "grad_norm": 0.5530562258569385, + "learning_rate": 2.222639866019392e-06, + "loss": 0.2697, + "step": 23453 + }, + { + "epoch": 1.0987023937789853, + "grad_norm": 0.5987602633489486, + "learning_rate": 2.222451387444425e-06, + "loss": 0.2788, + "step": 23454 + }, + { + "epoch": 1.0987492387689137, + "grad_norm": 0.5149958955693099, + "learning_rate": 2.2222629104666807e-06, + "loss": 0.2531, + "step": 23455 + }, + { + "epoch": 1.098796083758842, + "grad_norm": 0.6032285831388803, + "learning_rate": 2.222074435087244e-06, + "loss": 0.2822, + "step": 23456 + }, + { + "epoch": 1.0988429287487702, + "grad_norm": 0.616882898640865, + "learning_rate": 2.2218859613071986e-06, + "loss": 0.2875, + "step": 23457 + }, + { + "epoch": 1.0988897737386987, + "grad_norm": 0.6006102772018442, + "learning_rate": 2.221697489127629e-06, + "loss": 0.2768, + "step": 23458 + }, + { + "epoch": 1.098936618728627, + "grad_norm": 0.5737011087723616, + "learning_rate": 2.221509018549621e-06, + "loss": 0.2716, + "step": 23459 + }, + { + "epoch": 1.0989834637185554, + "grad_norm": 0.6042120338688872, + "learning_rate": 2.221320549574259e-06, + "loss": 0.287, + "step": 23460 + }, + { + "epoch": 1.0990303087084836, + "grad_norm": 0.591454781947168, + "learning_rate": 2.2211320822026263e-06, + "loss": 0.2764, + "step": 23461 + }, + { + "epoch": 1.0990771536984119, + "grad_norm": 0.610737807685174, + "learning_rate": 2.2209436164358095e-06, + "loss": 0.2764, + "step": 23462 + }, + { + "epoch": 1.0991239986883403, + "grad_norm": 0.5943433697645355, + "learning_rate": 2.2207551522748918e-06, + "loss": 0.2658, + "step": 23463 + }, + { + "epoch": 1.0991708436782686, + "grad_norm": 0.575139438962364, + "learning_rate": 2.2205666897209573e-06, + "loss": 0.2734, + "step": 23464 + }, + { + "epoch": 1.099217688668197, + "grad_norm": 0.5423840649870939, + "learning_rate": 2.220378228775091e-06, + "loss": 0.261, + "step": 23465 + }, + { + "epoch": 1.0992645336581253, + "grad_norm": 0.5808486945282431, + "learning_rate": 2.2201897694383785e-06, + "loss": 0.2872, + "step": 23466 + }, + { + "epoch": 1.0993113786480535, + "grad_norm": 0.5811694713436867, + "learning_rate": 2.220001311711903e-06, + "loss": 0.2764, + "step": 23467 + }, + { + "epoch": 1.099358223637982, + "grad_norm": 0.6133415982186115, + "learning_rate": 2.219812855596751e-06, + "loss": 0.2823, + "step": 23468 + }, + { + "epoch": 1.0994050686279102, + "grad_norm": 0.6133747543632401, + "learning_rate": 2.219624401094004e-06, + "loss": 0.2862, + "step": 23469 + }, + { + "epoch": 1.0994519136178387, + "grad_norm": 0.5872783064407662, + "learning_rate": 2.219435948204749e-06, + "loss": 0.2789, + "step": 23470 + }, + { + "epoch": 1.099498758607767, + "grad_norm": 0.5412067803142298, + "learning_rate": 2.2192474969300682e-06, + "loss": 0.2588, + "step": 23471 + }, + { + "epoch": 1.0995456035976952, + "grad_norm": 0.5820574994897503, + "learning_rate": 2.2190590472710487e-06, + "loss": 0.2747, + "step": 23472 + }, + { + "epoch": 1.0995924485876236, + "grad_norm": 0.5613081007080313, + "learning_rate": 2.2188705992287737e-06, + "loss": 0.278, + "step": 23473 + }, + { + "epoch": 1.0996392935775519, + "grad_norm": 0.5666283635883619, + "learning_rate": 2.218682152804328e-06, + "loss": 0.2682, + "step": 23474 + }, + { + "epoch": 1.0996861385674803, + "grad_norm": 0.6667133120780898, + "learning_rate": 2.218493707998795e-06, + "loss": 0.2983, + "step": 23475 + }, + { + "epoch": 1.0997329835574086, + "grad_norm": 0.5595732946379394, + "learning_rate": 2.21830526481326e-06, + "loss": 0.2828, + "step": 23476 + }, + { + "epoch": 1.0997798285473368, + "grad_norm": 0.5776446965511275, + "learning_rate": 2.218116823248808e-06, + "loss": 0.2742, + "step": 23477 + }, + { + "epoch": 1.0998266735372653, + "grad_norm": 0.5894068384497636, + "learning_rate": 2.2179283833065227e-06, + "loss": 0.2813, + "step": 23478 + }, + { + "epoch": 1.0998735185271935, + "grad_norm": 0.5998174701547511, + "learning_rate": 2.217739944987488e-06, + "loss": 0.2822, + "step": 23479 + }, + { + "epoch": 1.0999203635171217, + "grad_norm": 0.6380691930457422, + "learning_rate": 2.2175515082927905e-06, + "loss": 0.2865, + "step": 23480 + }, + { + "epoch": 1.0999672085070502, + "grad_norm": 0.5887607539594798, + "learning_rate": 2.217363073223512e-06, + "loss": 0.2779, + "step": 23481 + }, + { + "epoch": 1.1000140534969785, + "grad_norm": 0.5484434488931028, + "learning_rate": 2.217174639780738e-06, + "loss": 0.272, + "step": 23482 + }, + { + "epoch": 1.100060898486907, + "grad_norm": 0.5965739979393118, + "learning_rate": 2.2169862079655525e-06, + "loss": 0.2762, + "step": 23483 + }, + { + "epoch": 1.1001077434768352, + "grad_norm": 0.6175162016029895, + "learning_rate": 2.216797777779041e-06, + "loss": 0.2866, + "step": 23484 + }, + { + "epoch": 1.1001545884667634, + "grad_norm": 0.5669254791522766, + "learning_rate": 2.2166093492222864e-06, + "loss": 0.2642, + "step": 23485 + }, + { + "epoch": 1.1002014334566919, + "grad_norm": 0.6283646706079555, + "learning_rate": 2.216420922296375e-06, + "loss": 0.2829, + "step": 23486 + }, + { + "epoch": 1.10024827844662, + "grad_norm": 0.603423218924244, + "learning_rate": 2.2162324970023887e-06, + "loss": 0.2781, + "step": 23487 + }, + { + "epoch": 1.1002951234365486, + "grad_norm": 0.6342518562464177, + "learning_rate": 2.2160440733414137e-06, + "loss": 0.2812, + "step": 23488 + }, + { + "epoch": 1.1003419684264768, + "grad_norm": 0.5885593907509029, + "learning_rate": 2.2158556513145334e-06, + "loss": 0.2767, + "step": 23489 + }, + { + "epoch": 1.100388813416405, + "grad_norm": 0.572838504313927, + "learning_rate": 2.215667230922832e-06, + "loss": 0.2646, + "step": 23490 + }, + { + "epoch": 1.1004356584063335, + "grad_norm": 0.6008259502971413, + "learning_rate": 2.215478812167395e-06, + "loss": 0.2638, + "step": 23491 + }, + { + "epoch": 1.1004825033962617, + "grad_norm": 0.6438489311435875, + "learning_rate": 2.215290395049306e-06, + "loss": 0.2871, + "step": 23492 + }, + { + "epoch": 1.10052934838619, + "grad_norm": 0.580072830728198, + "learning_rate": 2.2151019795696486e-06, + "loss": 0.2723, + "step": 23493 + }, + { + "epoch": 1.1005761933761185, + "grad_norm": 0.570544316016848, + "learning_rate": 2.2149135657295077e-06, + "loss": 0.2629, + "step": 23494 + }, + { + "epoch": 1.1006230383660467, + "grad_norm": 0.5795011220240522, + "learning_rate": 2.214725153529968e-06, + "loss": 0.2782, + "step": 23495 + }, + { + "epoch": 1.1006698833559752, + "grad_norm": 0.5854714573927722, + "learning_rate": 2.214536742972113e-06, + "loss": 0.2765, + "step": 23496 + }, + { + "epoch": 1.1007167283459034, + "grad_norm": 0.5883964435123333, + "learning_rate": 2.2143483340570273e-06, + "loss": 0.303, + "step": 23497 + }, + { + "epoch": 1.1007635733358316, + "grad_norm": 0.5990735372299519, + "learning_rate": 2.2141599267857954e-06, + "loss": 0.2709, + "step": 23498 + }, + { + "epoch": 1.10081041832576, + "grad_norm": 0.6079470061208707, + "learning_rate": 2.2139715211595016e-06, + "loss": 0.2736, + "step": 23499 + }, + { + "epoch": 1.1008572633156883, + "grad_norm": 0.6486680674578352, + "learning_rate": 2.2137831171792287e-06, + "loss": 0.309, + "step": 23500 + }, + { + "epoch": 1.1009041083056168, + "grad_norm": 0.5874338104031741, + "learning_rate": 2.213594714846063e-06, + "loss": 0.2803, + "step": 23501 + }, + { + "epoch": 1.100950953295545, + "grad_norm": 0.6271866304081793, + "learning_rate": 2.213406314161087e-06, + "loss": 0.278, + "step": 23502 + }, + { + "epoch": 1.1009977982854733, + "grad_norm": 0.6057317685567416, + "learning_rate": 2.2132179151253856e-06, + "loss": 0.2765, + "step": 23503 + }, + { + "epoch": 1.1010446432754017, + "grad_norm": 0.5605071607717393, + "learning_rate": 2.2130295177400443e-06, + "loss": 0.2727, + "step": 23504 + }, + { + "epoch": 1.10109148826533, + "grad_norm": 0.5824877459890557, + "learning_rate": 2.2128411220061453e-06, + "loss": 0.2656, + "step": 23505 + }, + { + "epoch": 1.1011383332552585, + "grad_norm": 0.5993842667859891, + "learning_rate": 2.212652727924773e-06, + "loss": 0.2792, + "step": 23506 + }, + { + "epoch": 1.1011851782451867, + "grad_norm": 0.6033534025946289, + "learning_rate": 2.212464335497012e-06, + "loss": 0.2728, + "step": 23507 + }, + { + "epoch": 1.101232023235115, + "grad_norm": 0.6164487745390765, + "learning_rate": 2.212275944723947e-06, + "loss": 0.2987, + "step": 23508 + }, + { + "epoch": 1.1012788682250434, + "grad_norm": 0.5487621050278847, + "learning_rate": 2.2120875556066613e-06, + "loss": 0.2707, + "step": 23509 + }, + { + "epoch": 1.1013257132149716, + "grad_norm": 0.5922184953517203, + "learning_rate": 2.21189916814624e-06, + "loss": 0.2863, + "step": 23510 + }, + { + "epoch": 1.1013725582049, + "grad_norm": 0.5761362907190499, + "learning_rate": 2.2117107823437656e-06, + "loss": 0.2703, + "step": 23511 + }, + { + "epoch": 1.1014194031948283, + "grad_norm": 0.6098968352637831, + "learning_rate": 2.211522398200324e-06, + "loss": 0.2729, + "step": 23512 + }, + { + "epoch": 1.1014662481847566, + "grad_norm": 0.6368263875319724, + "learning_rate": 2.211334015716998e-06, + "loss": 0.2894, + "step": 23513 + }, + { + "epoch": 1.101513093174685, + "grad_norm": 0.6146426353739589, + "learning_rate": 2.211145634894872e-06, + "loss": 0.2821, + "step": 23514 + }, + { + "epoch": 1.1015599381646133, + "grad_norm": 0.563694191490243, + "learning_rate": 2.2109572557350308e-06, + "loss": 0.2668, + "step": 23515 + }, + { + "epoch": 1.1016067831545415, + "grad_norm": 0.6185149629176138, + "learning_rate": 2.210768878238559e-06, + "loss": 0.2866, + "step": 23516 + }, + { + "epoch": 1.10165362814447, + "grad_norm": 0.5567406335076142, + "learning_rate": 2.2105805024065375e-06, + "loss": 0.28, + "step": 23517 + }, + { + "epoch": 1.1017004731343982, + "grad_norm": 0.5866460234260675, + "learning_rate": 2.2103921282400532e-06, + "loss": 0.2681, + "step": 23518 + }, + { + "epoch": 1.1017473181243267, + "grad_norm": 0.639697752369047, + "learning_rate": 2.2102037557401897e-06, + "loss": 0.2815, + "step": 23519 + }, + { + "epoch": 1.101794163114255, + "grad_norm": 0.6246613840833649, + "learning_rate": 2.2100153849080302e-06, + "loss": 0.2963, + "step": 23520 + }, + { + "epoch": 1.1018410081041832, + "grad_norm": 0.5382379287554371, + "learning_rate": 2.2098270157446595e-06, + "loss": 0.2517, + "step": 23521 + }, + { + "epoch": 1.1018878530941116, + "grad_norm": 0.5804060879063432, + "learning_rate": 2.209638648251162e-06, + "loss": 0.2755, + "step": 23522 + }, + { + "epoch": 1.1019346980840399, + "grad_norm": 0.5937178342909482, + "learning_rate": 2.209450282428621e-06, + "loss": 0.2805, + "step": 23523 + }, + { + "epoch": 1.1019815430739683, + "grad_norm": 0.5800252482018221, + "learning_rate": 2.2092619182781198e-06, + "loss": 0.267, + "step": 23524 + }, + { + "epoch": 1.1020283880638966, + "grad_norm": 0.5775186140369326, + "learning_rate": 2.2090735558007435e-06, + "loss": 0.2726, + "step": 23525 + }, + { + "epoch": 1.1020752330538248, + "grad_norm": 0.6316416012756212, + "learning_rate": 2.208885194997576e-06, + "loss": 0.2857, + "step": 23526 + }, + { + "epoch": 1.1021220780437533, + "grad_norm": 0.6061831938734068, + "learning_rate": 2.208696835869701e-06, + "loss": 0.2618, + "step": 23527 + }, + { + "epoch": 1.1021689230336815, + "grad_norm": 0.6170467892878254, + "learning_rate": 2.208508478418202e-06, + "loss": 0.2758, + "step": 23528 + }, + { + "epoch": 1.1022157680236098, + "grad_norm": 0.594511286637139, + "learning_rate": 2.2083201226441644e-06, + "loss": 0.2737, + "step": 23529 + }, + { + "epoch": 1.1022626130135382, + "grad_norm": 0.6038377994962073, + "learning_rate": 2.208131768548671e-06, + "loss": 0.2652, + "step": 23530 + }, + { + "epoch": 1.1023094580034665, + "grad_norm": 0.6367835681303686, + "learning_rate": 2.207943416132805e-06, + "loss": 0.2878, + "step": 23531 + }, + { + "epoch": 1.102356302993395, + "grad_norm": 0.5558149317138527, + "learning_rate": 2.2077550653976517e-06, + "loss": 0.2742, + "step": 23532 + }, + { + "epoch": 1.1024031479833232, + "grad_norm": 0.5754821231273898, + "learning_rate": 2.207566716344295e-06, + "loss": 0.2674, + "step": 23533 + }, + { + "epoch": 1.1024499929732514, + "grad_norm": 0.6228295406819622, + "learning_rate": 2.2073783689738174e-06, + "loss": 0.298, + "step": 23534 + }, + { + "epoch": 1.1024968379631799, + "grad_norm": 0.5835457497699075, + "learning_rate": 2.207190023287305e-06, + "loss": 0.2917, + "step": 23535 + }, + { + "epoch": 1.1025436829531081, + "grad_norm": 0.578794476274086, + "learning_rate": 2.2070016792858394e-06, + "loss": 0.2949, + "step": 23536 + }, + { + "epoch": 1.1025905279430366, + "grad_norm": 0.594337318859328, + "learning_rate": 2.2068133369705065e-06, + "loss": 0.2673, + "step": 23537 + }, + { + "epoch": 1.1026373729329648, + "grad_norm": 0.6034740130882791, + "learning_rate": 2.206624996342388e-06, + "loss": 0.2777, + "step": 23538 + }, + { + "epoch": 1.102684217922893, + "grad_norm": 0.5851895901069283, + "learning_rate": 2.206436657402569e-06, + "loss": 0.2802, + "step": 23539 + }, + { + "epoch": 1.1027310629128215, + "grad_norm": 0.5846831131818425, + "learning_rate": 2.206248320152134e-06, + "loss": 0.2464, + "step": 23540 + }, + { + "epoch": 1.1027779079027498, + "grad_norm": 0.6137803924759662, + "learning_rate": 2.2060599845921667e-06, + "loss": 0.2683, + "step": 23541 + }, + { + "epoch": 1.1028247528926782, + "grad_norm": 0.5936589827565946, + "learning_rate": 2.2058716507237488e-06, + "loss": 0.2866, + "step": 23542 + }, + { + "epoch": 1.1028715978826065, + "grad_norm": 0.5759878454808051, + "learning_rate": 2.205683318547966e-06, + "loss": 0.2623, + "step": 23543 + }, + { + "epoch": 1.1029184428725347, + "grad_norm": 0.5586267466057371, + "learning_rate": 2.205494988065902e-06, + "loss": 0.2544, + "step": 23544 + }, + { + "epoch": 1.1029652878624632, + "grad_norm": 0.6058879926748093, + "learning_rate": 2.20530665927864e-06, + "loss": 0.2707, + "step": 23545 + }, + { + "epoch": 1.1030121328523914, + "grad_norm": 0.5986474526859165, + "learning_rate": 2.2051183321872645e-06, + "loss": 0.2745, + "step": 23546 + }, + { + "epoch": 1.1030589778423199, + "grad_norm": 0.5830782288453857, + "learning_rate": 2.204930006792859e-06, + "loss": 0.2818, + "step": 23547 + }, + { + "epoch": 1.1031058228322481, + "grad_norm": 0.5949513336649862, + "learning_rate": 2.2047416830965066e-06, + "loss": 0.2807, + "step": 23548 + }, + { + "epoch": 1.1031526678221764, + "grad_norm": 0.5858745989774559, + "learning_rate": 2.204553361099292e-06, + "loss": 0.2846, + "step": 23549 + }, + { + "epoch": 1.1031995128121048, + "grad_norm": 0.5715270693775197, + "learning_rate": 2.2043650408022984e-06, + "loss": 0.2684, + "step": 23550 + }, + { + "epoch": 1.103246357802033, + "grad_norm": 0.5202258243912928, + "learning_rate": 2.2041767222066096e-06, + "loss": 0.259, + "step": 23551 + }, + { + "epoch": 1.1032932027919613, + "grad_norm": 0.5974718242925062, + "learning_rate": 2.203988405313309e-06, + "loss": 0.2911, + "step": 23552 + }, + { + "epoch": 1.1033400477818898, + "grad_norm": 0.5612294253160508, + "learning_rate": 2.2038000901234822e-06, + "loss": 0.2713, + "step": 23553 + }, + { + "epoch": 1.103386892771818, + "grad_norm": 0.5945886257908802, + "learning_rate": 2.2036117766382104e-06, + "loss": 0.2777, + "step": 23554 + }, + { + "epoch": 1.1034337377617465, + "grad_norm": 0.5689541193974925, + "learning_rate": 2.2034234648585784e-06, + "loss": 0.2745, + "step": 23555 + }, + { + "epoch": 1.1034805827516747, + "grad_norm": 0.6453941137323707, + "learning_rate": 2.2032351547856694e-06, + "loss": 0.2704, + "step": 23556 + }, + { + "epoch": 1.103527427741603, + "grad_norm": 0.5591624159237225, + "learning_rate": 2.203046846420568e-06, + "loss": 0.2569, + "step": 23557 + }, + { + "epoch": 1.1035742727315314, + "grad_norm": 0.5960128700254943, + "learning_rate": 2.202858539764357e-06, + "loss": 0.281, + "step": 23558 + }, + { + "epoch": 1.1036211177214597, + "grad_norm": 0.6105242349297174, + "learning_rate": 2.2026702348181215e-06, + "loss": 0.2818, + "step": 23559 + }, + { + "epoch": 1.1036679627113881, + "grad_norm": 0.5546202883138408, + "learning_rate": 2.202481931582943e-06, + "loss": 0.2552, + "step": 23560 + }, + { + "epoch": 1.1037148077013164, + "grad_norm": 0.5788018400835849, + "learning_rate": 2.202293630059907e-06, + "loss": 0.2838, + "step": 23561 + }, + { + "epoch": 1.1037616526912446, + "grad_norm": 0.5389138780804562, + "learning_rate": 2.202105330250095e-06, + "loss": 0.2677, + "step": 23562 + }, + { + "epoch": 1.103808497681173, + "grad_norm": 0.5707783324092415, + "learning_rate": 2.201917032154593e-06, + "loss": 0.27, + "step": 23563 + }, + { + "epoch": 1.1038553426711013, + "grad_norm": 0.6006478563484788, + "learning_rate": 2.201728735774483e-06, + "loss": 0.2824, + "step": 23564 + }, + { + "epoch": 1.1039021876610295, + "grad_norm": 0.5602176888366367, + "learning_rate": 2.2015404411108504e-06, + "loss": 0.2725, + "step": 23565 + }, + { + "epoch": 1.103949032650958, + "grad_norm": 0.5948276418031512, + "learning_rate": 2.2013521481647766e-06, + "loss": 0.2729, + "step": 23566 + }, + { + "epoch": 1.1039958776408862, + "grad_norm": 0.6080673345084618, + "learning_rate": 2.2011638569373457e-06, + "loss": 0.2832, + "step": 23567 + }, + { + "epoch": 1.1040427226308147, + "grad_norm": 0.6148055546838168, + "learning_rate": 2.2009755674296424e-06, + "loss": 0.2924, + "step": 23568 + }, + { + "epoch": 1.104089567620743, + "grad_norm": 0.5924430778836749, + "learning_rate": 2.200787279642749e-06, + "loss": 0.2878, + "step": 23569 + }, + { + "epoch": 1.1041364126106712, + "grad_norm": 0.5661059935556885, + "learning_rate": 2.2005989935777495e-06, + "loss": 0.2669, + "step": 23570 + }, + { + "epoch": 1.1041832576005997, + "grad_norm": 0.5594697318480835, + "learning_rate": 2.2004107092357283e-06, + "loss": 0.2681, + "step": 23571 + }, + { + "epoch": 1.104230102590528, + "grad_norm": 0.5223689220942664, + "learning_rate": 2.2002224266177686e-06, + "loss": 0.2569, + "step": 23572 + }, + { + "epoch": 1.1042769475804564, + "grad_norm": 0.5948504959550769, + "learning_rate": 2.200034145724952e-06, + "loss": 0.2894, + "step": 23573 + }, + { + "epoch": 1.1043237925703846, + "grad_norm": 0.6008553990431397, + "learning_rate": 2.199845866558364e-06, + "loss": 0.2663, + "step": 23574 + }, + { + "epoch": 1.1043706375603128, + "grad_norm": 0.5799352606234888, + "learning_rate": 2.199657589119088e-06, + "loss": 0.2769, + "step": 23575 + }, + { + "epoch": 1.1044174825502413, + "grad_norm": 0.5768380506289482, + "learning_rate": 2.1994693134082066e-06, + "loss": 0.2677, + "step": 23576 + }, + { + "epoch": 1.1044643275401695, + "grad_norm": 0.5765155153936734, + "learning_rate": 2.199281039426805e-06, + "loss": 0.2702, + "step": 23577 + }, + { + "epoch": 1.104511172530098, + "grad_norm": 0.5572898598447562, + "learning_rate": 2.199092767175964e-06, + "loss": 0.2554, + "step": 23578 + }, + { + "epoch": 1.1045580175200262, + "grad_norm": 0.564268805562858, + "learning_rate": 2.198904496656769e-06, + "loss": 0.289, + "step": 23579 + }, + { + "epoch": 1.1046048625099545, + "grad_norm": 0.5518852820736935, + "learning_rate": 2.198716227870303e-06, + "loss": 0.2476, + "step": 23580 + }, + { + "epoch": 1.104651707499883, + "grad_norm": 0.6022353603154793, + "learning_rate": 2.198527960817649e-06, + "loss": 0.2792, + "step": 23581 + }, + { + "epoch": 1.1046985524898112, + "grad_norm": 0.5857803013196974, + "learning_rate": 2.198339695499891e-06, + "loss": 0.2789, + "step": 23582 + }, + { + "epoch": 1.1047453974797397, + "grad_norm": 0.5858472314699945, + "learning_rate": 2.198151431918113e-06, + "loss": 0.2637, + "step": 23583 + }, + { + "epoch": 1.104792242469668, + "grad_norm": 0.5569983834902863, + "learning_rate": 2.197963170073397e-06, + "loss": 0.2736, + "step": 23584 + }, + { + "epoch": 1.1048390874595961, + "grad_norm": 0.5416990203280752, + "learning_rate": 2.1977749099668266e-06, + "loss": 0.262, + "step": 23585 + }, + { + "epoch": 1.1048859324495246, + "grad_norm": 0.6131704991868617, + "learning_rate": 2.197586651599486e-06, + "loss": 0.2756, + "step": 23586 + }, + { + "epoch": 1.1049327774394528, + "grad_norm": 0.5914343452530723, + "learning_rate": 2.197398394972458e-06, + "loss": 0.279, + "step": 23587 + }, + { + "epoch": 1.104979622429381, + "grad_norm": 0.6197343916706818, + "learning_rate": 2.1972101400868266e-06, + "loss": 0.2834, + "step": 23588 + }, + { + "epoch": 1.1050264674193095, + "grad_norm": 0.6203263733755311, + "learning_rate": 2.197021886943675e-06, + "loss": 0.2956, + "step": 23589 + }, + { + "epoch": 1.1050733124092378, + "grad_norm": 0.5936178545387883, + "learning_rate": 2.196833635544086e-06, + "loss": 0.2854, + "step": 23590 + }, + { + "epoch": 1.1051201573991662, + "grad_norm": 0.6100273419443484, + "learning_rate": 2.1966453858891433e-06, + "loss": 0.2634, + "step": 23591 + }, + { + "epoch": 1.1051670023890945, + "grad_norm": 0.6246413016172754, + "learning_rate": 2.1964571379799294e-06, + "loss": 0.2709, + "step": 23592 + }, + { + "epoch": 1.1052138473790227, + "grad_norm": 0.5843905504909098, + "learning_rate": 2.1962688918175295e-06, + "loss": 0.2649, + "step": 23593 + }, + { + "epoch": 1.1052606923689512, + "grad_norm": 0.5797791480814689, + "learning_rate": 2.196080647403025e-06, + "loss": 0.2694, + "step": 23594 + }, + { + "epoch": 1.1053075373588794, + "grad_norm": 0.5701455906017521, + "learning_rate": 2.1958924047375e-06, + "loss": 0.2658, + "step": 23595 + }, + { + "epoch": 1.105354382348808, + "grad_norm": 0.5905404755261215, + "learning_rate": 2.1957041638220382e-06, + "loss": 0.2742, + "step": 23596 + }, + { + "epoch": 1.1054012273387361, + "grad_norm": 0.5694085537722822, + "learning_rate": 2.1955159246577225e-06, + "loss": 0.2653, + "step": 23597 + }, + { + "epoch": 1.1054480723286644, + "grad_norm": 0.6213908828317144, + "learning_rate": 2.1953276872456355e-06, + "loss": 0.2865, + "step": 23598 + }, + { + "epoch": 1.1054949173185928, + "grad_norm": 0.5995838680625675, + "learning_rate": 2.1951394515868617e-06, + "loss": 0.2702, + "step": 23599 + }, + { + "epoch": 1.105541762308521, + "grad_norm": 0.5970125907252668, + "learning_rate": 2.1949512176824834e-06, + "loss": 0.2886, + "step": 23600 + }, + { + "epoch": 1.1055886072984493, + "grad_norm": 0.6140599591866435, + "learning_rate": 2.1947629855335838e-06, + "loss": 0.2882, + "step": 23601 + }, + { + "epoch": 1.1056354522883778, + "grad_norm": 0.5378701913926576, + "learning_rate": 2.1945747551412475e-06, + "loss": 0.2599, + "step": 23602 + }, + { + "epoch": 1.105682297278306, + "grad_norm": 0.6298624744093647, + "learning_rate": 2.1943865265065568e-06, + "loss": 0.3036, + "step": 23603 + }, + { + "epoch": 1.1057291422682345, + "grad_norm": 0.5891094781286704, + "learning_rate": 2.194198299630594e-06, + "loss": 0.2901, + "step": 23604 + }, + { + "epoch": 1.1057759872581627, + "grad_norm": 0.6203529119441551, + "learning_rate": 2.1940100745144425e-06, + "loss": 0.2832, + "step": 23605 + }, + { + "epoch": 1.105822832248091, + "grad_norm": 0.6070483011652626, + "learning_rate": 2.1938218511591874e-06, + "loss": 0.2681, + "step": 23606 + }, + { + "epoch": 1.1058696772380194, + "grad_norm": 0.589037258030725, + "learning_rate": 2.19363362956591e-06, + "loss": 0.2748, + "step": 23607 + }, + { + "epoch": 1.1059165222279477, + "grad_norm": 0.6030220753382745, + "learning_rate": 2.193445409735695e-06, + "loss": 0.3039, + "step": 23608 + }, + { + "epoch": 1.1059633672178761, + "grad_norm": 0.5632644350418248, + "learning_rate": 2.1932571916696237e-06, + "loss": 0.289, + "step": 23609 + }, + { + "epoch": 1.1060102122078044, + "grad_norm": 0.6013121611227352, + "learning_rate": 2.1930689753687804e-06, + "loss": 0.2908, + "step": 23610 + }, + { + "epoch": 1.1060570571977326, + "grad_norm": 0.6062037254809772, + "learning_rate": 2.1928807608342473e-06, + "loss": 0.2913, + "step": 23611 + }, + { + "epoch": 1.106103902187661, + "grad_norm": 0.5534684253305844, + "learning_rate": 2.1926925480671086e-06, + "loss": 0.266, + "step": 23612 + }, + { + "epoch": 1.1061507471775893, + "grad_norm": 0.6378315676823876, + "learning_rate": 2.1925043370684476e-06, + "loss": 0.2782, + "step": 23613 + }, + { + "epoch": 1.1061975921675178, + "grad_norm": 0.5943537136942283, + "learning_rate": 2.1923161278393473e-06, + "loss": 0.2712, + "step": 23614 + }, + { + "epoch": 1.106244437157446, + "grad_norm": 0.5973371571723828, + "learning_rate": 2.1921279203808893e-06, + "loss": 0.291, + "step": 23615 + }, + { + "epoch": 1.1062912821473743, + "grad_norm": 0.6285274792442079, + "learning_rate": 2.1919397146941575e-06, + "loss": 0.2839, + "step": 23616 + }, + { + "epoch": 1.1063381271373027, + "grad_norm": 0.5960701883647598, + "learning_rate": 2.191751510780236e-06, + "loss": 0.2798, + "step": 23617 + }, + { + "epoch": 1.106384972127231, + "grad_norm": 0.6183038355649823, + "learning_rate": 2.1915633086402068e-06, + "loss": 0.2635, + "step": 23618 + }, + { + "epoch": 1.1064318171171594, + "grad_norm": 0.5657071818678234, + "learning_rate": 2.1913751082751525e-06, + "loss": 0.2754, + "step": 23619 + }, + { + "epoch": 1.1064786621070877, + "grad_norm": 0.5897815587652344, + "learning_rate": 2.1911869096861587e-06, + "loss": 0.3, + "step": 23620 + }, + { + "epoch": 1.106525507097016, + "grad_norm": 0.5689442884825514, + "learning_rate": 2.1909987128743056e-06, + "loss": 0.2823, + "step": 23621 + }, + { + "epoch": 1.1065723520869444, + "grad_norm": 0.6129504259919322, + "learning_rate": 2.190810517840677e-06, + "loss": 0.2995, + "step": 23622 + }, + { + "epoch": 1.1066191970768726, + "grad_norm": 0.5666428211178571, + "learning_rate": 2.190622324586356e-06, + "loss": 0.261, + "step": 23623 + }, + { + "epoch": 1.1066660420668009, + "grad_norm": 0.6147286645474757, + "learning_rate": 2.190434133112426e-06, + "loss": 0.2655, + "step": 23624 + }, + { + "epoch": 1.1067128870567293, + "grad_norm": 0.5802900827370138, + "learning_rate": 2.1902459434199696e-06, + "loss": 0.2727, + "step": 23625 + }, + { + "epoch": 1.1067597320466576, + "grad_norm": 0.6657666190348609, + "learning_rate": 2.190057755510071e-06, + "loss": 0.2915, + "step": 23626 + }, + { + "epoch": 1.106806577036586, + "grad_norm": 0.5999906257750185, + "learning_rate": 2.1898695693838114e-06, + "loss": 0.2827, + "step": 23627 + }, + { + "epoch": 1.1068534220265143, + "grad_norm": 0.5483180007684629, + "learning_rate": 2.1896813850422742e-06, + "loss": 0.2721, + "step": 23628 + }, + { + "epoch": 1.1069002670164425, + "grad_norm": 0.5786463999958741, + "learning_rate": 2.1894932024865426e-06, + "loss": 0.2771, + "step": 23629 + }, + { + "epoch": 1.106947112006371, + "grad_norm": 0.5569097940812893, + "learning_rate": 2.1893050217176993e-06, + "loss": 0.2761, + "step": 23630 + }, + { + "epoch": 1.1069939569962992, + "grad_norm": 0.5845595979652445, + "learning_rate": 2.1891168427368283e-06, + "loss": 0.2815, + "step": 23631 + }, + { + "epoch": 1.1070408019862277, + "grad_norm": 0.5789583183218253, + "learning_rate": 2.1889286655450124e-06, + "loss": 0.2769, + "step": 23632 + }, + { + "epoch": 1.107087646976156, + "grad_norm": 0.5604105540086726, + "learning_rate": 2.188740490143333e-06, + "loss": 0.2648, + "step": 23633 + }, + { + "epoch": 1.1071344919660842, + "grad_norm": 0.6125254518362305, + "learning_rate": 2.188552316532873e-06, + "loss": 0.2845, + "step": 23634 + }, + { + "epoch": 1.1071813369560126, + "grad_norm": 0.5454514315859798, + "learning_rate": 2.188364144714717e-06, + "loss": 0.2912, + "step": 23635 + }, + { + "epoch": 1.1072281819459409, + "grad_norm": 0.5714731901615013, + "learning_rate": 2.188175974689947e-06, + "loss": 0.2646, + "step": 23636 + }, + { + "epoch": 1.107275026935869, + "grad_norm": 0.607962476474445, + "learning_rate": 2.187987806459646e-06, + "loss": 0.277, + "step": 23637 + }, + { + "epoch": 1.1073218719257976, + "grad_norm": 0.611275682319895, + "learning_rate": 2.1877996400248964e-06, + "loss": 0.2795, + "step": 23638 + }, + { + "epoch": 1.1073687169157258, + "grad_norm": 0.6164041136693673, + "learning_rate": 2.1876114753867818e-06, + "loss": 0.2752, + "step": 23639 + }, + { + "epoch": 1.1074155619056543, + "grad_norm": 0.6116620059487824, + "learning_rate": 2.187423312546384e-06, + "loss": 0.275, + "step": 23640 + }, + { + "epoch": 1.1074624068955825, + "grad_norm": 0.6305578913086843, + "learning_rate": 2.1872351515047873e-06, + "loss": 0.2724, + "step": 23641 + }, + { + "epoch": 1.1075092518855107, + "grad_norm": 0.5662644355887011, + "learning_rate": 2.1870469922630725e-06, + "loss": 0.2809, + "step": 23642 + }, + { + "epoch": 1.1075560968754392, + "grad_norm": 0.5813194293111688, + "learning_rate": 2.1868588348223243e-06, + "loss": 0.286, + "step": 23643 + }, + { + "epoch": 1.1076029418653675, + "grad_norm": 0.5932340959084784, + "learning_rate": 2.1866706791836255e-06, + "loss": 0.2803, + "step": 23644 + }, + { + "epoch": 1.107649786855296, + "grad_norm": 0.5864391179421987, + "learning_rate": 2.186482525348058e-06, + "loss": 0.268, + "step": 23645 + }, + { + "epoch": 1.1076966318452242, + "grad_norm": 0.677062999717921, + "learning_rate": 2.1862943733167043e-06, + "loss": 0.3068, + "step": 23646 + }, + { + "epoch": 1.1077434768351524, + "grad_norm": 0.5660052506521229, + "learning_rate": 2.1861062230906473e-06, + "loss": 0.2539, + "step": 23647 + }, + { + "epoch": 1.1077903218250809, + "grad_norm": 0.6275369061501519, + "learning_rate": 2.1859180746709706e-06, + "loss": 0.2872, + "step": 23648 + }, + { + "epoch": 1.107837166815009, + "grad_norm": 0.6873141545873142, + "learning_rate": 2.1857299280587555e-06, + "loss": 0.3062, + "step": 23649 + }, + { + "epoch": 1.1078840118049376, + "grad_norm": 0.5399025814251176, + "learning_rate": 2.1855417832550875e-06, + "loss": 0.2695, + "step": 23650 + }, + { + "epoch": 1.1079308567948658, + "grad_norm": 0.6312558962010875, + "learning_rate": 2.185353640261046e-06, + "loss": 0.28, + "step": 23651 + }, + { + "epoch": 1.107977701784794, + "grad_norm": 0.5602064176528223, + "learning_rate": 2.185165499077716e-06, + "loss": 0.2658, + "step": 23652 + }, + { + "epoch": 1.1080245467747225, + "grad_norm": 0.5798999904750287, + "learning_rate": 2.184977359706179e-06, + "loss": 0.2737, + "step": 23653 + }, + { + "epoch": 1.1080713917646507, + "grad_norm": 0.5932995879913107, + "learning_rate": 2.1847892221475175e-06, + "loss": 0.2694, + "step": 23654 + }, + { + "epoch": 1.1081182367545792, + "grad_norm": 0.6029152280130732, + "learning_rate": 2.184601086402816e-06, + "loss": 0.2678, + "step": 23655 + }, + { + "epoch": 1.1081650817445075, + "grad_norm": 0.5687707744888039, + "learning_rate": 2.184412952473156e-06, + "loss": 0.281, + "step": 23656 + }, + { + "epoch": 1.1082119267344357, + "grad_norm": 0.5833499571934825, + "learning_rate": 2.1842248203596193e-06, + "loss": 0.2729, + "step": 23657 + }, + { + "epoch": 1.1082587717243642, + "grad_norm": 0.5668333354513659, + "learning_rate": 2.1840366900632895e-06, + "loss": 0.2587, + "step": 23658 + }, + { + "epoch": 1.1083056167142924, + "grad_norm": 0.5581122525907517, + "learning_rate": 2.1838485615852494e-06, + "loss": 0.2739, + "step": 23659 + }, + { + "epoch": 1.1083524617042206, + "grad_norm": 0.5774840266237106, + "learning_rate": 2.183660434926581e-06, + "loss": 0.2666, + "step": 23660 + }, + { + "epoch": 1.108399306694149, + "grad_norm": 0.6451683771704851, + "learning_rate": 2.1834723100883676e-06, + "loss": 0.2938, + "step": 23661 + }, + { + "epoch": 1.1084461516840773, + "grad_norm": 0.6088106484488034, + "learning_rate": 2.183284187071692e-06, + "loss": 0.2669, + "step": 23662 + }, + { + "epoch": 1.1084929966740058, + "grad_norm": 0.5552181100017781, + "learning_rate": 2.183096065877636e-06, + "loss": 0.2582, + "step": 23663 + }, + { + "epoch": 1.108539841663934, + "grad_norm": 0.5955937581714911, + "learning_rate": 2.1829079465072822e-06, + "loss": 0.2762, + "step": 23664 + }, + { + "epoch": 1.1085866866538623, + "grad_norm": 0.5553951928034478, + "learning_rate": 2.1827198289617134e-06, + "loss": 0.266, + "step": 23665 + }, + { + "epoch": 1.1086335316437907, + "grad_norm": 0.6071679586691795, + "learning_rate": 2.1825317132420126e-06, + "loss": 0.2962, + "step": 23666 + }, + { + "epoch": 1.108680376633719, + "grad_norm": 0.6381207275460317, + "learning_rate": 2.1823435993492615e-06, + "loss": 0.2825, + "step": 23667 + }, + { + "epoch": 1.1087272216236475, + "grad_norm": 0.5306592947685497, + "learning_rate": 2.182155487284543e-06, + "loss": 0.2531, + "step": 23668 + }, + { + "epoch": 1.1087740666135757, + "grad_norm": 0.6210003682360611, + "learning_rate": 2.181967377048941e-06, + "loss": 0.2733, + "step": 23669 + }, + { + "epoch": 1.108820911603504, + "grad_norm": 0.6139153965762277, + "learning_rate": 2.1817792686435364e-06, + "loss": 0.2931, + "step": 23670 + }, + { + "epoch": 1.1088677565934324, + "grad_norm": 0.5439856356020981, + "learning_rate": 2.1815911620694113e-06, + "loss": 0.2638, + "step": 23671 + }, + { + "epoch": 1.1089146015833606, + "grad_norm": 0.5259060124218946, + "learning_rate": 2.1814030573276492e-06, + "loss": 0.2511, + "step": 23672 + }, + { + "epoch": 1.1089614465732889, + "grad_norm": 0.5634388685872392, + "learning_rate": 2.1812149544193328e-06, + "loss": 0.2867, + "step": 23673 + }, + { + "epoch": 1.1090082915632173, + "grad_norm": 0.6823347738687885, + "learning_rate": 2.1810268533455444e-06, + "loss": 0.2924, + "step": 23674 + }, + { + "epoch": 1.1090551365531456, + "grad_norm": 0.5559516730757208, + "learning_rate": 2.180838754107366e-06, + "loss": 0.2534, + "step": 23675 + }, + { + "epoch": 1.109101981543074, + "grad_norm": 0.60751401748568, + "learning_rate": 2.1806506567058807e-06, + "loss": 0.2671, + "step": 23676 + }, + { + "epoch": 1.1091488265330023, + "grad_norm": 0.5800833345149957, + "learning_rate": 2.1804625611421704e-06, + "loss": 0.2754, + "step": 23677 + }, + { + "epoch": 1.1091956715229305, + "grad_norm": 0.5962913777089289, + "learning_rate": 2.1802744674173174e-06, + "loss": 0.2843, + "step": 23678 + }, + { + "epoch": 1.109242516512859, + "grad_norm": 0.6120997392669847, + "learning_rate": 2.180086375532404e-06, + "loss": 0.2937, + "step": 23679 + }, + { + "epoch": 1.1092893615027872, + "grad_norm": 0.5687503461933481, + "learning_rate": 2.179898285488514e-06, + "loss": 0.2603, + "step": 23680 + }, + { + "epoch": 1.1093362064927157, + "grad_norm": 0.5895388550919572, + "learning_rate": 2.17971019728673e-06, + "loss": 0.281, + "step": 23681 + }, + { + "epoch": 1.109383051482644, + "grad_norm": 0.5756278330441058, + "learning_rate": 2.1795221109281317e-06, + "loss": 0.2779, + "step": 23682 + }, + { + "epoch": 1.1094298964725722, + "grad_norm": 0.5732755381058328, + "learning_rate": 2.179334026413803e-06, + "loss": 0.2769, + "step": 23683 + }, + { + "epoch": 1.1094767414625006, + "grad_norm": 0.5829256313626886, + "learning_rate": 2.179145943744827e-06, + "loss": 0.2871, + "step": 23684 + }, + { + "epoch": 1.1095235864524289, + "grad_norm": 0.5568240938165947, + "learning_rate": 2.178957862922285e-06, + "loss": 0.2709, + "step": 23685 + }, + { + "epoch": 1.1095704314423573, + "grad_norm": 0.6182533407789098, + "learning_rate": 2.1787697839472606e-06, + "loss": 0.294, + "step": 23686 + }, + { + "epoch": 1.1096172764322856, + "grad_norm": 0.545250077509048, + "learning_rate": 2.1785817068208348e-06, + "loss": 0.2585, + "step": 23687 + }, + { + "epoch": 1.1096641214222138, + "grad_norm": 0.5688817859411597, + "learning_rate": 2.178393631544091e-06, + "loss": 0.2739, + "step": 23688 + }, + { + "epoch": 1.1097109664121423, + "grad_norm": 0.5978189939649036, + "learning_rate": 2.17820555811811e-06, + "loss": 0.2845, + "step": 23689 + }, + { + "epoch": 1.1097578114020705, + "grad_norm": 0.5931063980083868, + "learning_rate": 2.178017486543976e-06, + "loss": 0.2685, + "step": 23690 + }, + { + "epoch": 1.109804656391999, + "grad_norm": 0.5691968706143082, + "learning_rate": 2.17782941682277e-06, + "loss": 0.2919, + "step": 23691 + }, + { + "epoch": 1.1098515013819272, + "grad_norm": 0.5754823017867388, + "learning_rate": 2.177641348955574e-06, + "loss": 0.2563, + "step": 23692 + }, + { + "epoch": 1.1098983463718555, + "grad_norm": 0.5709839583316608, + "learning_rate": 2.177453282943473e-06, + "loss": 0.2847, + "step": 23693 + }, + { + "epoch": 1.109945191361784, + "grad_norm": 0.6295593268636401, + "learning_rate": 2.1772652187875464e-06, + "loss": 0.29, + "step": 23694 + }, + { + "epoch": 1.1099920363517122, + "grad_norm": 0.6183749427666978, + "learning_rate": 2.1770771564888765e-06, + "loss": 0.2768, + "step": 23695 + }, + { + "epoch": 1.1100388813416404, + "grad_norm": 0.5902226347452606, + "learning_rate": 2.1768890960485465e-06, + "loss": 0.2862, + "step": 23696 + }, + { + "epoch": 1.1100857263315689, + "grad_norm": 0.6559509978072644, + "learning_rate": 2.1767010374676394e-06, + "loss": 0.2748, + "step": 23697 + }, + { + "epoch": 1.1101325713214971, + "grad_norm": 0.5725160039758448, + "learning_rate": 2.1765129807472363e-06, + "loss": 0.2864, + "step": 23698 + }, + { + "epoch": 1.1101794163114256, + "grad_norm": 0.6059809063025836, + "learning_rate": 2.17632492588842e-06, + "loss": 0.2853, + "step": 23699 + }, + { + "epoch": 1.1102262613013538, + "grad_norm": 0.5488971681298522, + "learning_rate": 2.176136872892272e-06, + "loss": 0.2697, + "step": 23700 + }, + { + "epoch": 1.110273106291282, + "grad_norm": 0.6004408370728881, + "learning_rate": 2.175948821759875e-06, + "loss": 0.2838, + "step": 23701 + }, + { + "epoch": 1.1103199512812105, + "grad_norm": 0.6105214194885563, + "learning_rate": 2.1757607724923108e-06, + "loss": 0.2746, + "step": 23702 + }, + { + "epoch": 1.1103667962711388, + "grad_norm": 0.5846307595319872, + "learning_rate": 2.1755727250906618e-06, + "loss": 0.2742, + "step": 23703 + }, + { + "epoch": 1.1104136412610672, + "grad_norm": 0.5595447941245473, + "learning_rate": 2.1753846795560106e-06, + "loss": 0.2906, + "step": 23704 + }, + { + "epoch": 1.1104604862509955, + "grad_norm": 0.619028287168202, + "learning_rate": 2.1751966358894394e-06, + "loss": 0.2708, + "step": 23705 + }, + { + "epoch": 1.1105073312409237, + "grad_norm": 0.5459150898010251, + "learning_rate": 2.1750085940920293e-06, + "loss": 0.2595, + "step": 23706 + }, + { + "epoch": 1.1105541762308522, + "grad_norm": 0.5840893261293327, + "learning_rate": 2.1748205541648624e-06, + "loss": 0.2814, + "step": 23707 + }, + { + "epoch": 1.1106010212207804, + "grad_norm": 0.6011005781762104, + "learning_rate": 2.174632516109023e-06, + "loss": 0.2923, + "step": 23708 + }, + { + "epoch": 1.1106478662107087, + "grad_norm": 0.600323891514902, + "learning_rate": 2.1744444799255906e-06, + "loss": 0.2799, + "step": 23709 + }, + { + "epoch": 1.1106947112006371, + "grad_norm": 0.5828225932983757, + "learning_rate": 2.174256445615648e-06, + "loss": 0.2654, + "step": 23710 + }, + { + "epoch": 1.1107415561905654, + "grad_norm": 0.5979637863838133, + "learning_rate": 2.1740684131802793e-06, + "loss": 0.2733, + "step": 23711 + }, + { + "epoch": 1.1107884011804938, + "grad_norm": 0.6071856906996498, + "learning_rate": 2.173880382620564e-06, + "loss": 0.2787, + "step": 23712 + }, + { + "epoch": 1.110835246170422, + "grad_norm": 0.5759263786500379, + "learning_rate": 2.173692353937585e-06, + "loss": 0.2689, + "step": 23713 + }, + { + "epoch": 1.1108820911603503, + "grad_norm": 0.5504439386264723, + "learning_rate": 2.1735043271324244e-06, + "loss": 0.2667, + "step": 23714 + }, + { + "epoch": 1.1109289361502788, + "grad_norm": 0.5964753805947878, + "learning_rate": 2.173316302206165e-06, + "loss": 0.2924, + "step": 23715 + }, + { + "epoch": 1.110975781140207, + "grad_norm": 0.5841916049654287, + "learning_rate": 2.1731282791598874e-06, + "loss": 0.2966, + "step": 23716 + }, + { + "epoch": 1.1110226261301355, + "grad_norm": 0.5887726735154503, + "learning_rate": 2.1729402579946757e-06, + "loss": 0.2799, + "step": 23717 + }, + { + "epoch": 1.1110694711200637, + "grad_norm": 0.5855280988831121, + "learning_rate": 2.1727522387116093e-06, + "loss": 0.259, + "step": 23718 + }, + { + "epoch": 1.111116316109992, + "grad_norm": 0.6128999888611859, + "learning_rate": 2.172564221311772e-06, + "loss": 0.2883, + "step": 23719 + }, + { + "epoch": 1.1111631610999204, + "grad_norm": 0.6596018493336226, + "learning_rate": 2.172376205796245e-06, + "loss": 0.2802, + "step": 23720 + }, + { + "epoch": 1.1112100060898487, + "grad_norm": 0.5886138625074022, + "learning_rate": 2.1721881921661104e-06, + "loss": 0.2705, + "step": 23721 + }, + { + "epoch": 1.1112568510797771, + "grad_norm": 0.5953698200032708, + "learning_rate": 2.1720001804224513e-06, + "loss": 0.2654, + "step": 23722 + }, + { + "epoch": 1.1113036960697054, + "grad_norm": 0.5956755152177765, + "learning_rate": 2.1718121705663487e-06, + "loss": 0.2688, + "step": 23723 + }, + { + "epoch": 1.1113505410596336, + "grad_norm": 0.642574425823472, + "learning_rate": 2.171624162598884e-06, + "loss": 0.301, + "step": 23724 + }, + { + "epoch": 1.111397386049562, + "grad_norm": 0.6128680992439403, + "learning_rate": 2.171436156521139e-06, + "loss": 0.2834, + "step": 23725 + }, + { + "epoch": 1.1114442310394903, + "grad_norm": 0.5983623299535312, + "learning_rate": 2.1712481523341975e-06, + "loss": 0.2654, + "step": 23726 + }, + { + "epoch": 1.1114910760294188, + "grad_norm": 0.5984701901158769, + "learning_rate": 2.1710601500391396e-06, + "loss": 0.2635, + "step": 23727 + }, + { + "epoch": 1.111537921019347, + "grad_norm": 0.5695117233850937, + "learning_rate": 2.1708721496370483e-06, + "loss": 0.2742, + "step": 23728 + }, + { + "epoch": 1.1115847660092752, + "grad_norm": 0.5956243700832661, + "learning_rate": 2.1706841511290044e-06, + "loss": 0.2813, + "step": 23729 + }, + { + "epoch": 1.1116316109992037, + "grad_norm": 0.6033583166680097, + "learning_rate": 2.1704961545160917e-06, + "loss": 0.2742, + "step": 23730 + }, + { + "epoch": 1.111678455989132, + "grad_norm": 0.633453288105841, + "learning_rate": 2.1703081597993897e-06, + "loss": 0.2975, + "step": 23731 + }, + { + "epoch": 1.1117253009790602, + "grad_norm": 0.5936566909122574, + "learning_rate": 2.1701201669799814e-06, + "loss": 0.2779, + "step": 23732 + }, + { + "epoch": 1.1117721459689887, + "grad_norm": 0.5715143706829854, + "learning_rate": 2.1699321760589493e-06, + "loss": 0.2677, + "step": 23733 + }, + { + "epoch": 1.111818990958917, + "grad_norm": 0.6409658644258999, + "learning_rate": 2.1697441870373737e-06, + "loss": 0.2744, + "step": 23734 + }, + { + "epoch": 1.1118658359488454, + "grad_norm": 0.6188964479915053, + "learning_rate": 2.169556199916338e-06, + "loss": 0.2836, + "step": 23735 + }, + { + "epoch": 1.1119126809387736, + "grad_norm": 0.5650586999458906, + "learning_rate": 2.1693682146969235e-06, + "loss": 0.2663, + "step": 23736 + }, + { + "epoch": 1.1119595259287018, + "grad_norm": 0.5942163901159087, + "learning_rate": 2.1691802313802114e-06, + "loss": 0.2641, + "step": 23737 + }, + { + "epoch": 1.1120063709186303, + "grad_norm": 0.6348140489835432, + "learning_rate": 2.1689922499672837e-06, + "loss": 0.2918, + "step": 23738 + }, + { + "epoch": 1.1120532159085585, + "grad_norm": 0.5620389138621369, + "learning_rate": 2.168804270459223e-06, + "loss": 0.2596, + "step": 23739 + }, + { + "epoch": 1.112100060898487, + "grad_norm": 0.6162507439630822, + "learning_rate": 2.1686162928571098e-06, + "loss": 0.2737, + "step": 23740 + }, + { + "epoch": 1.1121469058884152, + "grad_norm": 0.571748653557979, + "learning_rate": 2.1684283171620264e-06, + "loss": 0.2735, + "step": 23741 + }, + { + "epoch": 1.1121937508783435, + "grad_norm": 0.583365664549642, + "learning_rate": 2.168240343375056e-06, + "loss": 0.2565, + "step": 23742 + }, + { + "epoch": 1.112240595868272, + "grad_norm": 0.5787824309506495, + "learning_rate": 2.1680523714972784e-06, + "loss": 0.2734, + "step": 23743 + }, + { + "epoch": 1.1122874408582002, + "grad_norm": 0.6123604385662847, + "learning_rate": 2.1678644015297754e-06, + "loss": 0.2757, + "step": 23744 + }, + { + "epoch": 1.1123342858481284, + "grad_norm": 0.6004008559985006, + "learning_rate": 2.1676764334736295e-06, + "loss": 0.2627, + "step": 23745 + }, + { + "epoch": 1.112381130838057, + "grad_norm": 0.5758870832017193, + "learning_rate": 2.1674884673299225e-06, + "loss": 0.2665, + "step": 23746 + }, + { + "epoch": 1.1124279758279851, + "grad_norm": 0.5650647108301335, + "learning_rate": 2.167300503099736e-06, + "loss": 0.2861, + "step": 23747 + }, + { + "epoch": 1.1124748208179136, + "grad_norm": 0.5778743608447627, + "learning_rate": 2.1671125407841515e-06, + "loss": 0.2637, + "step": 23748 + }, + { + "epoch": 1.1125216658078418, + "grad_norm": 0.618000305244528, + "learning_rate": 2.16692458038425e-06, + "loss": 0.2889, + "step": 23749 + }, + { + "epoch": 1.11256851079777, + "grad_norm": 0.6171639894642031, + "learning_rate": 2.1667366219011144e-06, + "loss": 0.2977, + "step": 23750 + }, + { + "epoch": 1.1126153557876985, + "grad_norm": 0.5576388283060271, + "learning_rate": 2.1665486653358254e-06, + "loss": 0.2752, + "step": 23751 + }, + { + "epoch": 1.1126622007776268, + "grad_norm": 0.5748958318674203, + "learning_rate": 2.1663607106894646e-06, + "loss": 0.2709, + "step": 23752 + }, + { + "epoch": 1.1127090457675552, + "grad_norm": 0.5728792633595152, + "learning_rate": 2.166172757963115e-06, + "loss": 0.2649, + "step": 23753 + }, + { + "epoch": 1.1127558907574835, + "grad_norm": 0.5555837533220148, + "learning_rate": 2.1659848071578574e-06, + "loss": 0.2515, + "step": 23754 + }, + { + "epoch": 1.1128027357474117, + "grad_norm": 0.5815282494054976, + "learning_rate": 2.1657968582747723e-06, + "loss": 0.2627, + "step": 23755 + }, + { + "epoch": 1.1128495807373402, + "grad_norm": 0.5634535733737418, + "learning_rate": 2.1656089113149424e-06, + "loss": 0.256, + "step": 23756 + }, + { + "epoch": 1.1128964257272684, + "grad_norm": 0.5769352322440804, + "learning_rate": 2.1654209662794502e-06, + "loss": 0.2667, + "step": 23757 + }, + { + "epoch": 1.112943270717197, + "grad_norm": 0.6414272048645961, + "learning_rate": 2.165233023169375e-06, + "loss": 0.2884, + "step": 23758 + }, + { + "epoch": 1.1129901157071251, + "grad_norm": 0.6145450281570927, + "learning_rate": 2.1650450819857998e-06, + "loss": 0.2723, + "step": 23759 + }, + { + "epoch": 1.1130369606970534, + "grad_norm": 0.5977446617697335, + "learning_rate": 2.164857142729807e-06, + "loss": 0.2678, + "step": 23760 + }, + { + "epoch": 1.1130838056869818, + "grad_norm": 0.6595877460440873, + "learning_rate": 2.164669205402477e-06, + "loss": 0.2847, + "step": 23761 + }, + { + "epoch": 1.11313065067691, + "grad_norm": 0.6241838194059668, + "learning_rate": 2.16448127000489e-06, + "loss": 0.2816, + "step": 23762 + }, + { + "epoch": 1.1131774956668385, + "grad_norm": 0.5652842915397277, + "learning_rate": 2.1642933365381295e-06, + "loss": 0.2739, + "step": 23763 + }, + { + "epoch": 1.1132243406567668, + "grad_norm": 0.6349229674241862, + "learning_rate": 2.1641054050032774e-06, + "loss": 0.2935, + "step": 23764 + }, + { + "epoch": 1.113271185646695, + "grad_norm": 0.5895256264071186, + "learning_rate": 2.1639174754014133e-06, + "loss": 0.2672, + "step": 23765 + }, + { + "epoch": 1.1133180306366235, + "grad_norm": 0.6300549304284317, + "learning_rate": 2.163729547733621e-06, + "loss": 0.2923, + "step": 23766 + }, + { + "epoch": 1.1133648756265517, + "grad_norm": 0.6065034879645808, + "learning_rate": 2.1635416220009793e-06, + "loss": 0.2861, + "step": 23767 + }, + { + "epoch": 1.11341172061648, + "grad_norm": 0.5924374056226653, + "learning_rate": 2.1633536982045715e-06, + "loss": 0.2969, + "step": 23768 + }, + { + "epoch": 1.1134585656064084, + "grad_norm": 0.5956189274995003, + "learning_rate": 2.163165776345478e-06, + "loss": 0.2724, + "step": 23769 + }, + { + "epoch": 1.1135054105963367, + "grad_norm": 0.6058195123088405, + "learning_rate": 2.162977856424781e-06, + "loss": 0.2781, + "step": 23770 + }, + { + "epoch": 1.1135522555862651, + "grad_norm": 0.6088691869168235, + "learning_rate": 2.1627899384435623e-06, + "loss": 0.2799, + "step": 23771 + }, + { + "epoch": 1.1135991005761934, + "grad_norm": 0.5853539539517018, + "learning_rate": 2.1626020224029033e-06, + "loss": 0.2664, + "step": 23772 + }, + { + "epoch": 1.1136459455661216, + "grad_norm": 0.6635028186892507, + "learning_rate": 2.1624141083038834e-06, + "loss": 0.2908, + "step": 23773 + }, + { + "epoch": 1.11369279055605, + "grad_norm": 0.6950910332985105, + "learning_rate": 2.1622261961475857e-06, + "loss": 0.2923, + "step": 23774 + }, + { + "epoch": 1.1137396355459783, + "grad_norm": 0.6000880276144435, + "learning_rate": 2.162038285935092e-06, + "loss": 0.2768, + "step": 23775 + }, + { + "epoch": 1.1137864805359068, + "grad_norm": 0.5565082824227117, + "learning_rate": 2.161850377667482e-06, + "loss": 0.2684, + "step": 23776 + }, + { + "epoch": 1.113833325525835, + "grad_norm": 0.6807121267264666, + "learning_rate": 2.1616624713458392e-06, + "loss": 0.2929, + "step": 23777 + }, + { + "epoch": 1.1138801705157633, + "grad_norm": 0.6159930751321968, + "learning_rate": 2.161474566971244e-06, + "loss": 0.2914, + "step": 23778 + }, + { + "epoch": 1.1139270155056917, + "grad_norm": 0.6571874253039646, + "learning_rate": 2.1612866645447767e-06, + "loss": 0.2808, + "step": 23779 + }, + { + "epoch": 1.11397386049562, + "grad_norm": 0.6466149786510947, + "learning_rate": 2.1610987640675197e-06, + "loss": 0.2908, + "step": 23780 + }, + { + "epoch": 1.1140207054855482, + "grad_norm": 0.6615469144701167, + "learning_rate": 2.1609108655405543e-06, + "loss": 0.3098, + "step": 23781 + }, + { + "epoch": 1.1140675504754767, + "grad_norm": 0.6043530240977509, + "learning_rate": 2.1607229689649613e-06, + "loss": 0.268, + "step": 23782 + }, + { + "epoch": 1.114114395465405, + "grad_norm": 0.617112124675198, + "learning_rate": 2.1605350743418225e-06, + "loss": 0.277, + "step": 23783 + }, + { + "epoch": 1.1141612404553334, + "grad_norm": 0.647972435839366, + "learning_rate": 2.16034718167222e-06, + "loss": 0.2898, + "step": 23784 + }, + { + "epoch": 1.1142080854452616, + "grad_norm": 0.5947466765257886, + "learning_rate": 2.160159290957233e-06, + "loss": 0.2935, + "step": 23785 + }, + { + "epoch": 1.1142549304351899, + "grad_norm": 0.6093690571826877, + "learning_rate": 2.159971402197944e-06, + "loss": 0.2777, + "step": 23786 + }, + { + "epoch": 1.1143017754251183, + "grad_norm": 0.6072557162376871, + "learning_rate": 2.159783515395434e-06, + "loss": 0.2834, + "step": 23787 + }, + { + "epoch": 1.1143486204150466, + "grad_norm": 0.5759875716644454, + "learning_rate": 2.159595630550785e-06, + "loss": 0.2714, + "step": 23788 + }, + { + "epoch": 1.114395465404975, + "grad_norm": 0.5931196156377727, + "learning_rate": 2.1594077476650766e-06, + "loss": 0.2871, + "step": 23789 + }, + { + "epoch": 1.1144423103949033, + "grad_norm": 0.5647029815981427, + "learning_rate": 2.1592198667393923e-06, + "loss": 0.2649, + "step": 23790 + }, + { + "epoch": 1.1144891553848315, + "grad_norm": 0.5901134809057835, + "learning_rate": 2.159031987774811e-06, + "loss": 0.2926, + "step": 23791 + }, + { + "epoch": 1.11453600037476, + "grad_norm": 0.6054343118286786, + "learning_rate": 2.158844110772416e-06, + "loss": 0.2664, + "step": 23792 + }, + { + "epoch": 1.1145828453646882, + "grad_norm": 0.5707601251581081, + "learning_rate": 2.1586562357332857e-06, + "loss": 0.2587, + "step": 23793 + }, + { + "epoch": 1.1146296903546167, + "grad_norm": 0.5658032541681008, + "learning_rate": 2.1584683626585033e-06, + "loss": 0.2629, + "step": 23794 + }, + { + "epoch": 1.114676535344545, + "grad_norm": 0.6019319574300335, + "learning_rate": 2.1582804915491507e-06, + "loss": 0.2886, + "step": 23795 + }, + { + "epoch": 1.1147233803344732, + "grad_norm": 0.5698159006964983, + "learning_rate": 2.158092622406307e-06, + "loss": 0.2762, + "step": 23796 + }, + { + "epoch": 1.1147702253244016, + "grad_norm": 0.6100893827286017, + "learning_rate": 2.157904755231056e-06, + "loss": 0.2847, + "step": 23797 + }, + { + "epoch": 1.1148170703143299, + "grad_norm": 0.5774002090018621, + "learning_rate": 2.1577168900244756e-06, + "loss": 0.2837, + "step": 23798 + }, + { + "epoch": 1.1148639153042583, + "grad_norm": 0.5843183746957714, + "learning_rate": 2.157529026787649e-06, + "loss": 0.2744, + "step": 23799 + }, + { + "epoch": 1.1149107602941866, + "grad_norm": 0.5977310924933086, + "learning_rate": 2.157341165521656e-06, + "loss": 0.2742, + "step": 23800 + }, + { + "epoch": 1.1149576052841148, + "grad_norm": 0.6401111769692921, + "learning_rate": 2.1571533062275792e-06, + "loss": 0.2984, + "step": 23801 + }, + { + "epoch": 1.1150044502740433, + "grad_norm": 0.6367942349828961, + "learning_rate": 2.1569654489064988e-06, + "loss": 0.2965, + "step": 23802 + }, + { + "epoch": 1.1150512952639715, + "grad_norm": 0.5798005763382732, + "learning_rate": 2.156777593559497e-06, + "loss": 0.2719, + "step": 23803 + }, + { + "epoch": 1.1150981402538998, + "grad_norm": 0.5920213086099997, + "learning_rate": 2.1565897401876524e-06, + "loss": 0.2582, + "step": 23804 + }, + { + "epoch": 1.1151449852438282, + "grad_norm": 0.6454893645607694, + "learning_rate": 2.156401888792048e-06, + "loss": 0.3114, + "step": 23805 + }, + { + "epoch": 1.1151918302337565, + "grad_norm": 0.6014134761583892, + "learning_rate": 2.1562140393737647e-06, + "loss": 0.2958, + "step": 23806 + }, + { + "epoch": 1.115238675223685, + "grad_norm": 0.5972622051468293, + "learning_rate": 2.1560261919338822e-06, + "loss": 0.2765, + "step": 23807 + }, + { + "epoch": 1.1152855202136132, + "grad_norm": 0.6441118530014416, + "learning_rate": 2.155838346473483e-06, + "loss": 0.2847, + "step": 23808 + }, + { + "epoch": 1.1153323652035414, + "grad_norm": 0.6050293627036045, + "learning_rate": 2.1556505029936486e-06, + "loss": 0.2827, + "step": 23809 + }, + { + "epoch": 1.1153792101934699, + "grad_norm": 0.5867521738244365, + "learning_rate": 2.1554626614954587e-06, + "loss": 0.2811, + "step": 23810 + }, + { + "epoch": 1.115426055183398, + "grad_norm": 0.6164483428729116, + "learning_rate": 2.155274821979994e-06, + "loss": 0.2815, + "step": 23811 + }, + { + "epoch": 1.1154729001733266, + "grad_norm": 0.5673628177049518, + "learning_rate": 2.155086984448336e-06, + "loss": 0.2841, + "step": 23812 + }, + { + "epoch": 1.1155197451632548, + "grad_norm": 0.580438693764094, + "learning_rate": 2.154899148901566e-06, + "loss": 0.2745, + "step": 23813 + }, + { + "epoch": 1.115566590153183, + "grad_norm": 0.5652773423083338, + "learning_rate": 2.154711315340764e-06, + "loss": 0.2728, + "step": 23814 + }, + { + "epoch": 1.1156134351431115, + "grad_norm": 0.5593282125466587, + "learning_rate": 2.154523483767013e-06, + "loss": 0.278, + "step": 23815 + }, + { + "epoch": 1.1156602801330398, + "grad_norm": 0.5738878407048396, + "learning_rate": 2.1543356541813915e-06, + "loss": 0.2854, + "step": 23816 + }, + { + "epoch": 1.115707125122968, + "grad_norm": 0.687504509864402, + "learning_rate": 2.1541478265849816e-06, + "loss": 0.3023, + "step": 23817 + }, + { + "epoch": 1.1157539701128965, + "grad_norm": 0.5917924014556926, + "learning_rate": 2.1539600009788637e-06, + "loss": 0.2717, + "step": 23818 + }, + { + "epoch": 1.1158008151028247, + "grad_norm": 0.6169135379477088, + "learning_rate": 2.153772177364119e-06, + "loss": 0.2919, + "step": 23819 + }, + { + "epoch": 1.1158476600927532, + "grad_norm": 0.6472144077829811, + "learning_rate": 2.153584355741829e-06, + "loss": 0.2965, + "step": 23820 + }, + { + "epoch": 1.1158945050826814, + "grad_norm": 0.5918132174183148, + "learning_rate": 2.1533965361130744e-06, + "loss": 0.2668, + "step": 23821 + }, + { + "epoch": 1.1159413500726096, + "grad_norm": 0.600416711770205, + "learning_rate": 2.153208718478935e-06, + "loss": 0.2685, + "step": 23822 + }, + { + "epoch": 1.115988195062538, + "grad_norm": 0.59711116638092, + "learning_rate": 2.153020902840492e-06, + "loss": 0.2697, + "step": 23823 + }, + { + "epoch": 1.1160350400524663, + "grad_norm": 0.5435178516931479, + "learning_rate": 2.1528330891988266e-06, + "loss": 0.2577, + "step": 23824 + }, + { + "epoch": 1.1160818850423948, + "grad_norm": 0.6023388774674739, + "learning_rate": 2.1526452775550194e-06, + "loss": 0.2809, + "step": 23825 + }, + { + "epoch": 1.116128730032323, + "grad_norm": 0.6367679676452731, + "learning_rate": 2.152457467910152e-06, + "loss": 0.2815, + "step": 23826 + }, + { + "epoch": 1.1161755750222513, + "grad_norm": 0.6248176871743605, + "learning_rate": 2.1522696602653045e-06, + "loss": 0.2801, + "step": 23827 + }, + { + "epoch": 1.1162224200121798, + "grad_norm": 0.5817129271959994, + "learning_rate": 2.152081854621557e-06, + "loss": 0.2822, + "step": 23828 + }, + { + "epoch": 1.116269265002108, + "grad_norm": 0.5512850181030482, + "learning_rate": 2.151894050979991e-06, + "loss": 0.2643, + "step": 23829 + }, + { + "epoch": 1.1163161099920365, + "grad_norm": 0.6263130654085856, + "learning_rate": 2.1517062493416878e-06, + "loss": 0.2853, + "step": 23830 + }, + { + "epoch": 1.1163629549819647, + "grad_norm": 0.6069925163791146, + "learning_rate": 2.151518449707727e-06, + "loss": 0.2759, + "step": 23831 + }, + { + "epoch": 1.116409799971893, + "grad_norm": 0.5791554588754826, + "learning_rate": 2.15133065207919e-06, + "loss": 0.2602, + "step": 23832 + }, + { + "epoch": 1.1164566449618214, + "grad_norm": 0.5511982124681253, + "learning_rate": 2.1511428564571586e-06, + "loss": 0.2627, + "step": 23833 + }, + { + "epoch": 1.1165034899517496, + "grad_norm": 0.6379652845520063, + "learning_rate": 2.1509550628427115e-06, + "loss": 0.2914, + "step": 23834 + }, + { + "epoch": 1.116550334941678, + "grad_norm": 0.5975797462947909, + "learning_rate": 2.1507672712369297e-06, + "loss": 0.2765, + "step": 23835 + }, + { + "epoch": 1.1165971799316063, + "grad_norm": 0.539903913701441, + "learning_rate": 2.150579481640895e-06, + "loss": 0.2532, + "step": 23836 + }, + { + "epoch": 1.1166440249215346, + "grad_norm": 0.6291821154415352, + "learning_rate": 2.1503916940556877e-06, + "loss": 0.3036, + "step": 23837 + }, + { + "epoch": 1.116690869911463, + "grad_norm": 0.5760094399068689, + "learning_rate": 2.1502039084823878e-06, + "loss": 0.2681, + "step": 23838 + }, + { + "epoch": 1.1167377149013913, + "grad_norm": 0.5937147321582357, + "learning_rate": 2.150016124922078e-06, + "loss": 0.2795, + "step": 23839 + }, + { + "epoch": 1.1167845598913195, + "grad_norm": 0.6102631489339864, + "learning_rate": 2.149828343375836e-06, + "loss": 0.2776, + "step": 23840 + }, + { + "epoch": 1.116831404881248, + "grad_norm": 0.5883477206441007, + "learning_rate": 2.1496405638447443e-06, + "loss": 0.2803, + "step": 23841 + }, + { + "epoch": 1.1168782498711762, + "grad_norm": 0.5857753378136042, + "learning_rate": 2.149452786329883e-06, + "loss": 0.2845, + "step": 23842 + }, + { + "epoch": 1.1169250948611047, + "grad_norm": 0.62344296740885, + "learning_rate": 2.149265010832332e-06, + "loss": 0.278, + "step": 23843 + }, + { + "epoch": 1.116971939851033, + "grad_norm": 0.5972099632962907, + "learning_rate": 2.149077237353174e-06, + "loss": 0.2853, + "step": 23844 + }, + { + "epoch": 1.1170187848409612, + "grad_norm": 0.6014799603422485, + "learning_rate": 2.1488894658934886e-06, + "loss": 0.2835, + "step": 23845 + }, + { + "epoch": 1.1170656298308896, + "grad_norm": 0.5505844975278428, + "learning_rate": 2.148701696454355e-06, + "loss": 0.2624, + "step": 23846 + }, + { + "epoch": 1.1171124748208179, + "grad_norm": 0.676770003653777, + "learning_rate": 2.1485139290368547e-06, + "loss": 0.2931, + "step": 23847 + }, + { + "epoch": 1.1171593198107463, + "grad_norm": 0.6077051023900701, + "learning_rate": 2.148326163642069e-06, + "loss": 0.2895, + "step": 23848 + }, + { + "epoch": 1.1172061648006746, + "grad_norm": 0.5966159397471149, + "learning_rate": 2.1481384002710774e-06, + "loss": 0.2819, + "step": 23849 + }, + { + "epoch": 1.1172530097906028, + "grad_norm": 0.6119118282466266, + "learning_rate": 2.147950638924961e-06, + "loss": 0.2829, + "step": 23850 + }, + { + "epoch": 1.1172998547805313, + "grad_norm": 0.5998905687743917, + "learning_rate": 2.147762879604801e-06, + "loss": 0.2821, + "step": 23851 + }, + { + "epoch": 1.1173466997704595, + "grad_norm": 0.6411300072355222, + "learning_rate": 2.1475751223116765e-06, + "loss": 0.2933, + "step": 23852 + }, + { + "epoch": 1.1173935447603878, + "grad_norm": 0.5806183524532349, + "learning_rate": 2.147387367046668e-06, + "loss": 0.2755, + "step": 23853 + }, + { + "epoch": 1.1174403897503162, + "grad_norm": 0.5653850329615776, + "learning_rate": 2.147199613810857e-06, + "loss": 0.2579, + "step": 23854 + }, + { + "epoch": 1.1174872347402445, + "grad_norm": 0.5784389003242667, + "learning_rate": 2.147011862605324e-06, + "loss": 0.2797, + "step": 23855 + }, + { + "epoch": 1.117534079730173, + "grad_norm": 0.5721306297012473, + "learning_rate": 2.1468241134311483e-06, + "loss": 0.2682, + "step": 23856 + }, + { + "epoch": 1.1175809247201012, + "grad_norm": 0.5519063003547613, + "learning_rate": 2.1466363662894124e-06, + "loss": 0.2787, + "step": 23857 + }, + { + "epoch": 1.1176277697100294, + "grad_norm": 0.5792025118751957, + "learning_rate": 2.1464486211811943e-06, + "loss": 0.2647, + "step": 23858 + }, + { + "epoch": 1.1176746146999579, + "grad_norm": 0.5673648599689981, + "learning_rate": 2.1462608781075756e-06, + "loss": 0.2627, + "step": 23859 + }, + { + "epoch": 1.1177214596898861, + "grad_norm": 0.5706767588795028, + "learning_rate": 2.1460731370696362e-06, + "loss": 0.2564, + "step": 23860 + }, + { + "epoch": 1.1177683046798146, + "grad_norm": 0.5851265875921781, + "learning_rate": 2.1458853980684575e-06, + "loss": 0.2803, + "step": 23861 + }, + { + "epoch": 1.1178151496697428, + "grad_norm": 0.5642149748420503, + "learning_rate": 2.1456976611051193e-06, + "loss": 0.2682, + "step": 23862 + }, + { + "epoch": 1.117861994659671, + "grad_norm": 0.5679823664851907, + "learning_rate": 2.145509926180703e-06, + "loss": 0.2534, + "step": 23863 + }, + { + "epoch": 1.1179088396495995, + "grad_norm": 0.6067779422117647, + "learning_rate": 2.1453221932962866e-06, + "loss": 0.2778, + "step": 23864 + }, + { + "epoch": 1.1179556846395278, + "grad_norm": 0.5457445156468158, + "learning_rate": 2.1451344624529516e-06, + "loss": 0.2664, + "step": 23865 + }, + { + "epoch": 1.1180025296294562, + "grad_norm": 0.6104493770168943, + "learning_rate": 2.144946733651779e-06, + "loss": 0.2819, + "step": 23866 + }, + { + "epoch": 1.1180493746193845, + "grad_norm": 0.601636744986572, + "learning_rate": 2.144759006893849e-06, + "loss": 0.2766, + "step": 23867 + }, + { + "epoch": 1.1180962196093127, + "grad_norm": 0.5831189658159913, + "learning_rate": 2.144571282180242e-06, + "loss": 0.2848, + "step": 23868 + }, + { + "epoch": 1.1181430645992412, + "grad_norm": 0.6050980334111251, + "learning_rate": 2.1443835595120364e-06, + "loss": 0.2828, + "step": 23869 + }, + { + "epoch": 1.1181899095891694, + "grad_norm": 0.5603226175927631, + "learning_rate": 2.144195838890316e-06, + "loss": 0.2691, + "step": 23870 + }, + { + "epoch": 1.1182367545790979, + "grad_norm": 0.5802722665091942, + "learning_rate": 2.144008120316158e-06, + "loss": 0.2673, + "step": 23871 + }, + { + "epoch": 1.1182835995690261, + "grad_norm": 0.6178545428356499, + "learning_rate": 2.143820403790644e-06, + "loss": 0.2773, + "step": 23872 + }, + { + "epoch": 1.1183304445589544, + "grad_norm": 0.5741987548988264, + "learning_rate": 2.143632689314854e-06, + "loss": 0.2713, + "step": 23873 + }, + { + "epoch": 1.1183772895488828, + "grad_norm": 0.5977508588693551, + "learning_rate": 2.143444976889868e-06, + "loss": 0.2848, + "step": 23874 + }, + { + "epoch": 1.118424134538811, + "grad_norm": 0.5873872651383943, + "learning_rate": 2.143257266516767e-06, + "loss": 0.2584, + "step": 23875 + }, + { + "epoch": 1.1184709795287393, + "grad_norm": 0.5799842713758534, + "learning_rate": 2.143069558196631e-06, + "loss": 0.2682, + "step": 23876 + }, + { + "epoch": 1.1185178245186678, + "grad_norm": 0.6031103382516005, + "learning_rate": 2.1428818519305396e-06, + "loss": 0.2956, + "step": 23877 + }, + { + "epoch": 1.118564669508596, + "grad_norm": 0.586297985116536, + "learning_rate": 2.142694147719573e-06, + "loss": 0.2826, + "step": 23878 + }, + { + "epoch": 1.1186115144985245, + "grad_norm": 0.5496349698358561, + "learning_rate": 2.1425064455648124e-06, + "loss": 0.2663, + "step": 23879 + }, + { + "epoch": 1.1186583594884527, + "grad_norm": 0.5570536430814056, + "learning_rate": 2.1423187454673372e-06, + "loss": 0.2524, + "step": 23880 + }, + { + "epoch": 1.118705204478381, + "grad_norm": 0.6547860433522152, + "learning_rate": 2.142131047428227e-06, + "loss": 0.2736, + "step": 23881 + }, + { + "epoch": 1.1187520494683094, + "grad_norm": 0.6225881550460874, + "learning_rate": 2.1419433514485645e-06, + "loss": 0.2875, + "step": 23882 + }, + { + "epoch": 1.1187988944582377, + "grad_norm": 0.567341826562716, + "learning_rate": 2.141755657529427e-06, + "loss": 0.2771, + "step": 23883 + }, + { + "epoch": 1.1188457394481661, + "grad_norm": 0.5743750969116024, + "learning_rate": 2.141567965671895e-06, + "loss": 0.2662, + "step": 23884 + }, + { + "epoch": 1.1188925844380944, + "grad_norm": 0.5761519820822973, + "learning_rate": 2.1413802758770493e-06, + "loss": 0.2654, + "step": 23885 + }, + { + "epoch": 1.1189394294280226, + "grad_norm": 0.5600557621803158, + "learning_rate": 2.1411925881459706e-06, + "loss": 0.2591, + "step": 23886 + }, + { + "epoch": 1.118986274417951, + "grad_norm": 0.5785378842342512, + "learning_rate": 2.141004902479738e-06, + "loss": 0.2732, + "step": 23887 + }, + { + "epoch": 1.1190331194078793, + "grad_norm": 0.5839809127589392, + "learning_rate": 2.140817218879433e-06, + "loss": 0.2799, + "step": 23888 + }, + { + "epoch": 1.1190799643978075, + "grad_norm": 0.6341106206198606, + "learning_rate": 2.1406295373461334e-06, + "loss": 0.2935, + "step": 23889 + }, + { + "epoch": 1.119126809387736, + "grad_norm": 0.652744621578206, + "learning_rate": 2.140441857880921e-06, + "loss": 0.2997, + "step": 23890 + }, + { + "epoch": 1.1191736543776643, + "grad_norm": 0.5988514464588606, + "learning_rate": 2.140254180484875e-06, + "loss": 0.2904, + "step": 23891 + }, + { + "epoch": 1.1192204993675927, + "grad_norm": 0.5866627894115589, + "learning_rate": 2.1400665051590758e-06, + "loss": 0.2862, + "step": 23892 + }, + { + "epoch": 1.119267344357521, + "grad_norm": 0.5604411207866133, + "learning_rate": 2.1398788319046037e-06, + "loss": 0.272, + "step": 23893 + }, + { + "epoch": 1.1193141893474492, + "grad_norm": 0.600967620608498, + "learning_rate": 2.139691160722539e-06, + "loss": 0.2861, + "step": 23894 + }, + { + "epoch": 1.1193610343373777, + "grad_norm": 0.6327653817987827, + "learning_rate": 2.1395034916139605e-06, + "loss": 0.2769, + "step": 23895 + }, + { + "epoch": 1.119407879327306, + "grad_norm": 0.5769538563693387, + "learning_rate": 2.139315824579948e-06, + "loss": 0.2725, + "step": 23896 + }, + { + "epoch": 1.1194547243172344, + "grad_norm": 0.5746930646157368, + "learning_rate": 2.1391281596215834e-06, + "loss": 0.2719, + "step": 23897 + }, + { + "epoch": 1.1195015693071626, + "grad_norm": 0.558034214119064, + "learning_rate": 2.1389404967399446e-06, + "loss": 0.251, + "step": 23898 + }, + { + "epoch": 1.1195484142970908, + "grad_norm": 0.6072856096132369, + "learning_rate": 2.1387528359361127e-06, + "loss": 0.2777, + "step": 23899 + }, + { + "epoch": 1.1195952592870193, + "grad_norm": 0.5957782503600002, + "learning_rate": 2.1385651772111686e-06, + "loss": 0.2832, + "step": 23900 + }, + { + "epoch": 1.1196421042769475, + "grad_norm": 0.613619845211929, + "learning_rate": 2.1383775205661906e-06, + "loss": 0.2727, + "step": 23901 + }, + { + "epoch": 1.119688949266876, + "grad_norm": 0.5558182453128215, + "learning_rate": 2.138189866002258e-06, + "loss": 0.2602, + "step": 23902 + }, + { + "epoch": 1.1197357942568043, + "grad_norm": 0.6090536629622495, + "learning_rate": 2.1380022135204526e-06, + "loss": 0.2716, + "step": 23903 + }, + { + "epoch": 1.1197826392467325, + "grad_norm": 0.6038517360872984, + "learning_rate": 2.1378145631218534e-06, + "loss": 0.2861, + "step": 23904 + }, + { + "epoch": 1.119829484236661, + "grad_norm": 0.5881711132114715, + "learning_rate": 2.13762691480754e-06, + "loss": 0.2843, + "step": 23905 + }, + { + "epoch": 1.1198763292265892, + "grad_norm": 0.6149966378593777, + "learning_rate": 2.137439268578594e-06, + "loss": 0.2797, + "step": 23906 + }, + { + "epoch": 1.1199231742165177, + "grad_norm": 0.6015857131083457, + "learning_rate": 2.137251624436092e-06, + "loss": 0.2946, + "step": 23907 + }, + { + "epoch": 1.119970019206446, + "grad_norm": 0.5482763310047551, + "learning_rate": 2.137063982381117e-06, + "loss": 0.2628, + "step": 23908 + }, + { + "epoch": 1.1200168641963741, + "grad_norm": 0.5658840715430605, + "learning_rate": 2.136876342414747e-06, + "loss": 0.2845, + "step": 23909 + }, + { + "epoch": 1.1200637091863026, + "grad_norm": 0.5763535069100895, + "learning_rate": 2.136688704538062e-06, + "loss": 0.2811, + "step": 23910 + }, + { + "epoch": 1.1201105541762308, + "grad_norm": 0.6040259163444329, + "learning_rate": 2.1365010687521427e-06, + "loss": 0.2818, + "step": 23911 + }, + { + "epoch": 1.120157399166159, + "grad_norm": 0.5746338192772003, + "learning_rate": 2.136313435058069e-06, + "loss": 0.2774, + "step": 23912 + }, + { + "epoch": 1.1202042441560875, + "grad_norm": 0.5580032183120142, + "learning_rate": 2.1361258034569195e-06, + "loss": 0.2783, + "step": 23913 + }, + { + "epoch": 1.1202510891460158, + "grad_norm": 0.5779868458476832, + "learning_rate": 2.135938173949774e-06, + "loss": 0.2757, + "step": 23914 + }, + { + "epoch": 1.1202979341359443, + "grad_norm": 0.5578537332684065, + "learning_rate": 2.1357505465377134e-06, + "loss": 0.2751, + "step": 23915 + }, + { + "epoch": 1.1203447791258725, + "grad_norm": 0.5460453038647345, + "learning_rate": 2.1355629212218163e-06, + "loss": 0.2766, + "step": 23916 + }, + { + "epoch": 1.1203916241158007, + "grad_norm": 0.5705980516472879, + "learning_rate": 2.135375298003164e-06, + "loss": 0.2717, + "step": 23917 + }, + { + "epoch": 1.1204384691057292, + "grad_norm": 0.5969440313011545, + "learning_rate": 2.1351876768828346e-06, + "loss": 0.2859, + "step": 23918 + }, + { + "epoch": 1.1204853140956574, + "grad_norm": 0.5781467504751578, + "learning_rate": 2.135000057861909e-06, + "loss": 0.2767, + "step": 23919 + }, + { + "epoch": 1.120532159085586, + "grad_norm": 0.579921809010491, + "learning_rate": 2.1348124409414654e-06, + "loss": 0.2769, + "step": 23920 + }, + { + "epoch": 1.1205790040755141, + "grad_norm": 0.6264796132900821, + "learning_rate": 2.134624826122585e-06, + "loss": 0.274, + "step": 23921 + }, + { + "epoch": 1.1206258490654424, + "grad_norm": 0.6337299315620266, + "learning_rate": 2.1344372134063463e-06, + "loss": 0.2888, + "step": 23922 + }, + { + "epoch": 1.1206726940553708, + "grad_norm": 0.5844963826070547, + "learning_rate": 2.13424960279383e-06, + "loss": 0.273, + "step": 23923 + }, + { + "epoch": 1.120719539045299, + "grad_norm": 0.5886725665527106, + "learning_rate": 2.134061994286116e-06, + "loss": 0.2812, + "step": 23924 + }, + { + "epoch": 1.1207663840352273, + "grad_norm": 0.5504002255504156, + "learning_rate": 2.1338743878842825e-06, + "loss": 0.2591, + "step": 23925 + }, + { + "epoch": 1.1208132290251558, + "grad_norm": 0.6236537836065758, + "learning_rate": 2.1336867835894096e-06, + "loss": 0.2929, + "step": 23926 + }, + { + "epoch": 1.120860074015084, + "grad_norm": 0.5566403090710842, + "learning_rate": 2.1334991814025773e-06, + "loss": 0.2503, + "step": 23927 + }, + { + "epoch": 1.1209069190050125, + "grad_norm": 0.6345377658669171, + "learning_rate": 2.1333115813248655e-06, + "loss": 0.3004, + "step": 23928 + }, + { + "epoch": 1.1209537639949407, + "grad_norm": 0.6245751974039797, + "learning_rate": 2.133123983357353e-06, + "loss": 0.2889, + "step": 23929 + }, + { + "epoch": 1.121000608984869, + "grad_norm": 0.5818243144012495, + "learning_rate": 2.1329363875011207e-06, + "loss": 0.2691, + "step": 23930 + }, + { + "epoch": 1.1210474539747974, + "grad_norm": 0.5662759829878252, + "learning_rate": 2.132748793757246e-06, + "loss": 0.273, + "step": 23931 + }, + { + "epoch": 1.1210942989647257, + "grad_norm": 0.5561308806452214, + "learning_rate": 2.1325612021268104e-06, + "loss": 0.2619, + "step": 23932 + }, + { + "epoch": 1.1211411439546541, + "grad_norm": 0.5893895791482794, + "learning_rate": 2.132373612610892e-06, + "loss": 0.2641, + "step": 23933 + }, + { + "epoch": 1.1211879889445824, + "grad_norm": 0.5734042307416825, + "learning_rate": 2.1321860252105712e-06, + "loss": 0.2599, + "step": 23934 + }, + { + "epoch": 1.1212348339345106, + "grad_norm": 0.5825437219905175, + "learning_rate": 2.1319984399269277e-06, + "loss": 0.2534, + "step": 23935 + }, + { + "epoch": 1.121281678924439, + "grad_norm": 0.5964272291270656, + "learning_rate": 2.1318108567610405e-06, + "loss": 0.2856, + "step": 23936 + }, + { + "epoch": 1.1213285239143673, + "grad_norm": 0.5971821902545167, + "learning_rate": 2.1316232757139902e-06, + "loss": 0.2801, + "step": 23937 + }, + { + "epoch": 1.1213753689042958, + "grad_norm": 0.653673888539433, + "learning_rate": 2.131435696786854e-06, + "loss": 0.2981, + "step": 23938 + }, + { + "epoch": 1.121422213894224, + "grad_norm": 0.6045801854720506, + "learning_rate": 2.131248119980714e-06, + "loss": 0.2788, + "step": 23939 + }, + { + "epoch": 1.1214690588841523, + "grad_norm": 0.5803019864056717, + "learning_rate": 2.1310605452966476e-06, + "loss": 0.29, + "step": 23940 + }, + { + "epoch": 1.1215159038740807, + "grad_norm": 0.6373250398893534, + "learning_rate": 2.1308729727357345e-06, + "loss": 0.2765, + "step": 23941 + }, + { + "epoch": 1.121562748864009, + "grad_norm": 0.5748681942550449, + "learning_rate": 2.130685402299056e-06, + "loss": 0.2837, + "step": 23942 + }, + { + "epoch": 1.1216095938539374, + "grad_norm": 0.5976988712921842, + "learning_rate": 2.1304978339876902e-06, + "loss": 0.2872, + "step": 23943 + }, + { + "epoch": 1.1216564388438657, + "grad_norm": 0.562023116019198, + "learning_rate": 2.1303102678027156e-06, + "loss": 0.2768, + "step": 23944 + }, + { + "epoch": 1.121703283833794, + "grad_norm": 0.5920303147857707, + "learning_rate": 2.1301227037452124e-06, + "loss": 0.2703, + "step": 23945 + }, + { + "epoch": 1.1217501288237224, + "grad_norm": 0.6206402125549904, + "learning_rate": 2.1299351418162607e-06, + "loss": 0.2837, + "step": 23946 + }, + { + "epoch": 1.1217969738136506, + "grad_norm": 0.6305626187878501, + "learning_rate": 2.1297475820169387e-06, + "loss": 0.286, + "step": 23947 + }, + { + "epoch": 1.1218438188035789, + "grad_norm": 0.593625227103594, + "learning_rate": 2.129560024348326e-06, + "loss": 0.2773, + "step": 23948 + }, + { + "epoch": 1.1218906637935073, + "grad_norm": 0.5350457076467551, + "learning_rate": 2.1293724688115037e-06, + "loss": 0.2616, + "step": 23949 + }, + { + "epoch": 1.1219375087834356, + "grad_norm": 0.5553594452945178, + "learning_rate": 2.129184915407549e-06, + "loss": 0.2638, + "step": 23950 + }, + { + "epoch": 1.121984353773364, + "grad_norm": 0.6105102404030163, + "learning_rate": 2.1289973641375412e-06, + "loss": 0.2647, + "step": 23951 + }, + { + "epoch": 1.1220311987632923, + "grad_norm": 0.5963527363158512, + "learning_rate": 2.1288098150025606e-06, + "loss": 0.2718, + "step": 23952 + }, + { + "epoch": 1.1220780437532205, + "grad_norm": 0.5851301195392904, + "learning_rate": 2.128622268003687e-06, + "loss": 0.273, + "step": 23953 + }, + { + "epoch": 1.122124888743149, + "grad_norm": 0.6093926569409764, + "learning_rate": 2.1284347231419975e-06, + "loss": 0.2965, + "step": 23954 + }, + { + "epoch": 1.1221717337330772, + "grad_norm": 0.6054895250109366, + "learning_rate": 2.1282471804185747e-06, + "loss": 0.2719, + "step": 23955 + }, + { + "epoch": 1.1222185787230057, + "grad_norm": 0.6267878673630604, + "learning_rate": 2.1280596398344946e-06, + "loss": 0.2873, + "step": 23956 + }, + { + "epoch": 1.122265423712934, + "grad_norm": 0.5573544752452084, + "learning_rate": 2.1278721013908383e-06, + "loss": 0.272, + "step": 23957 + }, + { + "epoch": 1.1223122687028622, + "grad_norm": 0.5653531835424574, + "learning_rate": 2.1276845650886837e-06, + "loss": 0.2769, + "step": 23958 + }, + { + "epoch": 1.1223591136927906, + "grad_norm": 0.5902997257517987, + "learning_rate": 2.1274970309291113e-06, + "loss": 0.28, + "step": 23959 + }, + { + "epoch": 1.1224059586827189, + "grad_norm": 0.6219711867542878, + "learning_rate": 2.1273094989132e-06, + "loss": 0.2813, + "step": 23960 + }, + { + "epoch": 1.122452803672647, + "grad_norm": 0.598234420420752, + "learning_rate": 2.1271219690420297e-06, + "loss": 0.281, + "step": 23961 + }, + { + "epoch": 1.1224996486625756, + "grad_norm": 0.6122907897417647, + "learning_rate": 2.1269344413166775e-06, + "loss": 0.2988, + "step": 23962 + }, + { + "epoch": 1.1225464936525038, + "grad_norm": 0.5857413896071332, + "learning_rate": 2.1267469157382243e-06, + "loss": 0.255, + "step": 23963 + }, + { + "epoch": 1.1225933386424323, + "grad_norm": 0.598720567901613, + "learning_rate": 2.126559392307749e-06, + "loss": 0.2849, + "step": 23964 + }, + { + "epoch": 1.1226401836323605, + "grad_norm": 0.5872513868164163, + "learning_rate": 2.12637187102633e-06, + "loss": 0.2662, + "step": 23965 + }, + { + "epoch": 1.1226870286222888, + "grad_norm": 0.5973662137941081, + "learning_rate": 2.126184351895047e-06, + "loss": 0.2788, + "step": 23966 + }, + { + "epoch": 1.1227338736122172, + "grad_norm": 0.6019008446279699, + "learning_rate": 2.12599683491498e-06, + "loss": 0.2665, + "step": 23967 + }, + { + "epoch": 1.1227807186021455, + "grad_norm": 0.5821808942125055, + "learning_rate": 2.1258093200872067e-06, + "loss": 0.2812, + "step": 23968 + }, + { + "epoch": 1.122827563592074, + "grad_norm": 0.584453555212196, + "learning_rate": 2.1256218074128058e-06, + "loss": 0.277, + "step": 23969 + }, + { + "epoch": 1.1228744085820022, + "grad_norm": 0.5929635694373822, + "learning_rate": 2.1254342968928586e-06, + "loss": 0.2767, + "step": 23970 + }, + { + "epoch": 1.1229212535719304, + "grad_norm": 0.5826268461380817, + "learning_rate": 2.125246788528442e-06, + "loss": 0.2718, + "step": 23971 + }, + { + "epoch": 1.1229680985618589, + "grad_norm": 0.607363835808848, + "learning_rate": 2.125059282320636e-06, + "loss": 0.2835, + "step": 23972 + }, + { + "epoch": 1.123014943551787, + "grad_norm": 0.6001179805107774, + "learning_rate": 2.1248717782705204e-06, + "loss": 0.2607, + "step": 23973 + }, + { + "epoch": 1.1230617885417156, + "grad_norm": 0.6088769380235953, + "learning_rate": 2.1246842763791733e-06, + "loss": 0.2749, + "step": 23974 + }, + { + "epoch": 1.1231086335316438, + "grad_norm": 0.5871154806045497, + "learning_rate": 2.124496776647673e-06, + "loss": 0.2817, + "step": 23975 + }, + { + "epoch": 1.123155478521572, + "grad_norm": 0.5530298713640704, + "learning_rate": 2.1243092790770995e-06, + "loss": 0.255, + "step": 23976 + }, + { + "epoch": 1.1232023235115005, + "grad_norm": 0.6698814161930643, + "learning_rate": 2.1241217836685322e-06, + "loss": 0.2863, + "step": 23977 + }, + { + "epoch": 1.1232491685014288, + "grad_norm": 0.54709694996177, + "learning_rate": 2.123934290423049e-06, + "loss": 0.2622, + "step": 23978 + }, + { + "epoch": 1.1232960134913572, + "grad_norm": 0.5944314532362023, + "learning_rate": 2.123746799341731e-06, + "loss": 0.281, + "step": 23979 + }, + { + "epoch": 1.1233428584812855, + "grad_norm": 0.6647317893773665, + "learning_rate": 2.123559310425654e-06, + "loss": 0.2783, + "step": 23980 + }, + { + "epoch": 1.1233897034712137, + "grad_norm": 0.5318410050981525, + "learning_rate": 2.123371823675899e-06, + "loss": 0.2492, + "step": 23981 + }, + { + "epoch": 1.1234365484611422, + "grad_norm": 0.5965502536791047, + "learning_rate": 2.123184339093544e-06, + "loss": 0.2784, + "step": 23982 + }, + { + "epoch": 1.1234833934510704, + "grad_norm": 0.5958053844121561, + "learning_rate": 2.122996856679669e-06, + "loss": 0.2699, + "step": 23983 + }, + { + "epoch": 1.1235302384409986, + "grad_norm": 0.5737709287416719, + "learning_rate": 2.122809376435352e-06, + "loss": 0.2675, + "step": 23984 + }, + { + "epoch": 1.123577083430927, + "grad_norm": 0.5970465518532375, + "learning_rate": 2.1226218983616736e-06, + "loss": 0.286, + "step": 23985 + }, + { + "epoch": 1.1236239284208553, + "grad_norm": 0.617808461319744, + "learning_rate": 2.1224344224597097e-06, + "loss": 0.2881, + "step": 23986 + }, + { + "epoch": 1.1236707734107838, + "grad_norm": 0.6055417108555058, + "learning_rate": 2.1222469487305408e-06, + "loss": 0.2754, + "step": 23987 + }, + { + "epoch": 1.123717618400712, + "grad_norm": 0.6505361745630525, + "learning_rate": 2.1220594771752463e-06, + "loss": 0.2916, + "step": 23988 + }, + { + "epoch": 1.1237644633906403, + "grad_norm": 0.5876447792214721, + "learning_rate": 2.121872007794904e-06, + "loss": 0.2713, + "step": 23989 + }, + { + "epoch": 1.1238113083805688, + "grad_norm": 0.5468091786399704, + "learning_rate": 2.1216845405905935e-06, + "loss": 0.255, + "step": 23990 + }, + { + "epoch": 1.123858153370497, + "grad_norm": 0.6009439810019984, + "learning_rate": 2.1214970755633947e-06, + "loss": 0.2867, + "step": 23991 + }, + { + "epoch": 1.1239049983604255, + "grad_norm": 0.5786497223597843, + "learning_rate": 2.121309612714384e-06, + "loss": 0.2834, + "step": 23992 + }, + { + "epoch": 1.1239518433503537, + "grad_norm": 0.5713356781933729, + "learning_rate": 2.121122152044641e-06, + "loss": 0.2798, + "step": 23993 + }, + { + "epoch": 1.123998688340282, + "grad_norm": 0.6139632300622766, + "learning_rate": 2.1209346935552448e-06, + "loss": 0.2968, + "step": 23994 + }, + { + "epoch": 1.1240455333302104, + "grad_norm": 0.6294461890730251, + "learning_rate": 2.1207472372472743e-06, + "loss": 0.2796, + "step": 23995 + }, + { + "epoch": 1.1240923783201386, + "grad_norm": 0.5997864910408105, + "learning_rate": 2.1205597831218084e-06, + "loss": 0.2772, + "step": 23996 + }, + { + "epoch": 1.1241392233100669, + "grad_norm": 0.6060277207921345, + "learning_rate": 2.120372331179926e-06, + "loss": 0.2811, + "step": 23997 + }, + { + "epoch": 1.1241860682999953, + "grad_norm": 0.5862954908894794, + "learning_rate": 2.1201848814227047e-06, + "loss": 0.2671, + "step": 23998 + }, + { + "epoch": 1.1242329132899236, + "grad_norm": 0.6059549081910376, + "learning_rate": 2.119997433851224e-06, + "loss": 0.2864, + "step": 23999 + }, + { + "epoch": 1.124279758279852, + "grad_norm": 0.5817064261842957, + "learning_rate": 2.1198099884665625e-06, + "loss": 0.2745, + "step": 24000 + }, + { + "epoch": 1.1243266032697803, + "grad_norm": 0.5598942228406119, + "learning_rate": 2.119622545269799e-06, + "loss": 0.2568, + "step": 24001 + }, + { + "epoch": 1.1243734482597085, + "grad_norm": 0.5702877413030005, + "learning_rate": 2.119435104262012e-06, + "loss": 0.2673, + "step": 24002 + }, + { + "epoch": 1.124420293249637, + "grad_norm": 0.5858491537040489, + "learning_rate": 2.1192476654442806e-06, + "loss": 0.2739, + "step": 24003 + }, + { + "epoch": 1.1244671382395652, + "grad_norm": 0.6288791408729155, + "learning_rate": 2.119060228817684e-06, + "loss": 0.2852, + "step": 24004 + }, + { + "epoch": 1.1245139832294937, + "grad_norm": 0.597316432375419, + "learning_rate": 2.118872794383299e-06, + "loss": 0.2783, + "step": 24005 + }, + { + "epoch": 1.124560828219422, + "grad_norm": 0.5385705817325696, + "learning_rate": 2.1186853621422056e-06, + "loss": 0.2688, + "step": 24006 + }, + { + "epoch": 1.1246076732093502, + "grad_norm": 0.625429376417483, + "learning_rate": 2.1184979320954816e-06, + "loss": 0.289, + "step": 24007 + }, + { + "epoch": 1.1246545181992786, + "grad_norm": 0.5632531892055629, + "learning_rate": 2.118310504244207e-06, + "loss": 0.2713, + "step": 24008 + }, + { + "epoch": 1.1247013631892069, + "grad_norm": 0.6354760142357722, + "learning_rate": 2.1181230785894584e-06, + "loss": 0.2972, + "step": 24009 + }, + { + "epoch": 1.1247482081791353, + "grad_norm": 0.5821504250862968, + "learning_rate": 2.117935655132317e-06, + "loss": 0.2816, + "step": 24010 + }, + { + "epoch": 1.1247950531690636, + "grad_norm": 0.6115229704733935, + "learning_rate": 2.1177482338738585e-06, + "loss": 0.2895, + "step": 24011 + }, + { + "epoch": 1.1248418981589918, + "grad_norm": 0.5670624217451654, + "learning_rate": 2.117560814815163e-06, + "loss": 0.2748, + "step": 24012 + }, + { + "epoch": 1.1248887431489203, + "grad_norm": 0.5261844650476352, + "learning_rate": 2.1173733979573097e-06, + "loss": 0.2537, + "step": 24013 + }, + { + "epoch": 1.1249355881388485, + "grad_norm": 0.5921866382655916, + "learning_rate": 2.1171859833013755e-06, + "loss": 0.285, + "step": 24014 + }, + { + "epoch": 1.124982433128777, + "grad_norm": 0.6118496845838988, + "learning_rate": 2.11699857084844e-06, + "loss": 0.2851, + "step": 24015 + }, + { + "epoch": 1.1250292781187052, + "grad_norm": 0.6165586220748237, + "learning_rate": 2.116811160599582e-06, + "loss": 0.2807, + "step": 24016 + }, + { + "epoch": 1.1250761231086335, + "grad_norm": 0.6332864935488398, + "learning_rate": 2.1166237525558793e-06, + "loss": 0.2886, + "step": 24017 + }, + { + "epoch": 1.125122968098562, + "grad_norm": 0.532800699809236, + "learning_rate": 2.1164363467184103e-06, + "loss": 0.2656, + "step": 24018 + }, + { + "epoch": 1.1251698130884902, + "grad_norm": 0.5979436573384866, + "learning_rate": 2.116248943088254e-06, + "loss": 0.2763, + "step": 24019 + }, + { + "epoch": 1.1252166580784184, + "grad_norm": 0.5485398769328848, + "learning_rate": 2.1160615416664875e-06, + "loss": 0.2665, + "step": 24020 + }, + { + "epoch": 1.1252635030683469, + "grad_norm": 0.5514747977832594, + "learning_rate": 2.115874142454191e-06, + "loss": 0.2638, + "step": 24021 + }, + { + "epoch": 1.1253103480582751, + "grad_norm": 0.5657935973413097, + "learning_rate": 2.115686745452443e-06, + "loss": 0.2687, + "step": 24022 + }, + { + "epoch": 1.1253571930482036, + "grad_norm": 0.5981664867475084, + "learning_rate": 2.1154993506623208e-06, + "loss": 0.2796, + "step": 24023 + }, + { + "epoch": 1.1254040380381318, + "grad_norm": 0.5737197864167484, + "learning_rate": 2.1153119580849026e-06, + "loss": 0.2757, + "step": 24024 + }, + { + "epoch": 1.12545088302806, + "grad_norm": 0.5992192349100635, + "learning_rate": 2.1151245677212673e-06, + "loss": 0.2675, + "step": 24025 + }, + { + "epoch": 1.1254977280179885, + "grad_norm": 0.5998134439840792, + "learning_rate": 2.1149371795724945e-06, + "loss": 0.2717, + "step": 24026 + }, + { + "epoch": 1.1255445730079168, + "grad_norm": 0.6571581703318992, + "learning_rate": 2.11474979363966e-06, + "loss": 0.2815, + "step": 24027 + }, + { + "epoch": 1.1255914179978452, + "grad_norm": 0.5700727146419083, + "learning_rate": 2.1145624099238452e-06, + "loss": 0.2714, + "step": 24028 + }, + { + "epoch": 1.1256382629877735, + "grad_norm": 0.5760080305861819, + "learning_rate": 2.114375028426126e-06, + "loss": 0.2742, + "step": 24029 + }, + { + "epoch": 1.1256851079777017, + "grad_norm": 0.5806999670403822, + "learning_rate": 2.1141876491475815e-06, + "loss": 0.2701, + "step": 24030 + }, + { + "epoch": 1.1257319529676302, + "grad_norm": 0.5402516290944142, + "learning_rate": 2.11400027208929e-06, + "loss": 0.2565, + "step": 24031 + }, + { + "epoch": 1.1257787979575584, + "grad_norm": 0.558501367049757, + "learning_rate": 2.11381289725233e-06, + "loss": 0.2695, + "step": 24032 + }, + { + "epoch": 1.1258256429474867, + "grad_norm": 0.5464237654896719, + "learning_rate": 2.11362552463778e-06, + "loss": 0.2584, + "step": 24033 + }, + { + "epoch": 1.1258724879374151, + "grad_norm": 0.6846381089006508, + "learning_rate": 2.1134381542467188e-06, + "loss": 0.3149, + "step": 24034 + }, + { + "epoch": 1.1259193329273434, + "grad_norm": 0.5726805967331434, + "learning_rate": 2.1132507860802222e-06, + "loss": 0.2757, + "step": 24035 + }, + { + "epoch": 1.1259661779172718, + "grad_norm": 0.5889507666266826, + "learning_rate": 2.113063420139371e-06, + "loss": 0.2734, + "step": 24036 + }, + { + "epoch": 1.1260130229072, + "grad_norm": 0.5413379339074231, + "learning_rate": 2.1128760564252424e-06, + "loss": 0.256, + "step": 24037 + }, + { + "epoch": 1.1260598678971283, + "grad_norm": 0.5679865406986275, + "learning_rate": 2.112688694938914e-06, + "loss": 0.2765, + "step": 24038 + }, + { + "epoch": 1.1261067128870568, + "grad_norm": 0.571939699688052, + "learning_rate": 2.1125013356814655e-06, + "loss": 0.2746, + "step": 24039 + }, + { + "epoch": 1.126153557876985, + "grad_norm": 0.5747039068352839, + "learning_rate": 2.1123139786539753e-06, + "loss": 0.2592, + "step": 24040 + }, + { + "epoch": 1.1262004028669135, + "grad_norm": 0.5841783021298427, + "learning_rate": 2.1121266238575203e-06, + "loss": 0.2751, + "step": 24041 + }, + { + "epoch": 1.1262472478568417, + "grad_norm": 0.5832074603748995, + "learning_rate": 2.1119392712931783e-06, + "loss": 0.2744, + "step": 24042 + }, + { + "epoch": 1.12629409284677, + "grad_norm": 0.5941236611519622, + "learning_rate": 2.1117519209620286e-06, + "loss": 0.2723, + "step": 24043 + }, + { + "epoch": 1.1263409378366984, + "grad_norm": 0.5515388718317786, + "learning_rate": 2.111564572865149e-06, + "loss": 0.2549, + "step": 24044 + }, + { + "epoch": 1.1263877828266267, + "grad_norm": 0.5900459776004721, + "learning_rate": 2.1113772270036176e-06, + "loss": 0.2786, + "step": 24045 + }, + { + "epoch": 1.126434627816555, + "grad_norm": 0.5781413078481219, + "learning_rate": 2.1111898833785137e-06, + "loss": 0.2689, + "step": 24046 + }, + { + "epoch": 1.1264814728064834, + "grad_norm": 0.5830760460522254, + "learning_rate": 2.111002541990913e-06, + "loss": 0.2674, + "step": 24047 + }, + { + "epoch": 1.1265283177964116, + "grad_norm": 0.5886386219050241, + "learning_rate": 2.1108152028418953e-06, + "loss": 0.2851, + "step": 24048 + }, + { + "epoch": 1.12657516278634, + "grad_norm": 0.5765005959843558, + "learning_rate": 2.110627865932538e-06, + "loss": 0.2724, + "step": 24049 + }, + { + "epoch": 1.1266220077762683, + "grad_norm": 0.5818689730350222, + "learning_rate": 2.1104405312639193e-06, + "loss": 0.2832, + "step": 24050 + }, + { + "epoch": 1.1266688527661968, + "grad_norm": 0.5883061400183646, + "learning_rate": 2.110253198837118e-06, + "loss": 0.272, + "step": 24051 + }, + { + "epoch": 1.126715697756125, + "grad_norm": 0.5690082943325918, + "learning_rate": 2.110065868653212e-06, + "loss": 0.2634, + "step": 24052 + }, + { + "epoch": 1.1267625427460533, + "grad_norm": 0.5914747526506484, + "learning_rate": 2.1098785407132784e-06, + "loss": 0.2764, + "step": 24053 + }, + { + "epoch": 1.1268093877359817, + "grad_norm": 0.601423853195136, + "learning_rate": 2.1096912150183953e-06, + "loss": 0.2721, + "step": 24054 + }, + { + "epoch": 1.12685623272591, + "grad_norm": 0.6376665290998002, + "learning_rate": 2.1095038915696412e-06, + "loss": 0.2892, + "step": 24055 + }, + { + "epoch": 1.1269030777158382, + "grad_norm": 0.5404457514230476, + "learning_rate": 2.1093165703680943e-06, + "loss": 0.2607, + "step": 24056 + }, + { + "epoch": 1.1269499227057667, + "grad_norm": 0.5606067364665694, + "learning_rate": 2.1091292514148325e-06, + "loss": 0.2601, + "step": 24057 + }, + { + "epoch": 1.126996767695695, + "grad_norm": 0.5989721670727854, + "learning_rate": 2.108941934710934e-06, + "loss": 0.2701, + "step": 24058 + }, + { + "epoch": 1.1270436126856234, + "grad_norm": 0.5568782504255487, + "learning_rate": 2.108754620257476e-06, + "loss": 0.2742, + "step": 24059 + }, + { + "epoch": 1.1270904576755516, + "grad_norm": 0.565143885715257, + "learning_rate": 2.1085673080555366e-06, + "loss": 0.2551, + "step": 24060 + }, + { + "epoch": 1.1271373026654798, + "grad_norm": 0.5707833988676363, + "learning_rate": 2.1083799981061943e-06, + "loss": 0.2758, + "step": 24061 + }, + { + "epoch": 1.1271841476554083, + "grad_norm": 0.6330990084282698, + "learning_rate": 2.108192690410526e-06, + "loss": 0.3015, + "step": 24062 + }, + { + "epoch": 1.1272309926453365, + "grad_norm": 0.5640222162699814, + "learning_rate": 2.1080053849696105e-06, + "loss": 0.2666, + "step": 24063 + }, + { + "epoch": 1.127277837635265, + "grad_norm": 0.5978807360801842, + "learning_rate": 2.107818081784527e-06, + "loss": 0.2808, + "step": 24064 + }, + { + "epoch": 1.1273246826251933, + "grad_norm": 0.5615101606950087, + "learning_rate": 2.1076307808563505e-06, + "loss": 0.2668, + "step": 24065 + }, + { + "epoch": 1.1273715276151215, + "grad_norm": 0.5690990801949795, + "learning_rate": 2.10744348218616e-06, + "loss": 0.2644, + "step": 24066 + }, + { + "epoch": 1.12741837260505, + "grad_norm": 0.5786760305942167, + "learning_rate": 2.1072561857750334e-06, + "loss": 0.2785, + "step": 24067 + }, + { + "epoch": 1.1274652175949782, + "grad_norm": 0.6221263788187339, + "learning_rate": 2.10706889162405e-06, + "loss": 0.2708, + "step": 24068 + }, + { + "epoch": 1.1275120625849064, + "grad_norm": 0.6293389521811916, + "learning_rate": 2.106881599734285e-06, + "loss": 0.2846, + "step": 24069 + }, + { + "epoch": 1.127558907574835, + "grad_norm": 0.5943486640642724, + "learning_rate": 2.106694310106818e-06, + "loss": 0.2827, + "step": 24070 + }, + { + "epoch": 1.1276057525647631, + "grad_norm": 0.6072757864820654, + "learning_rate": 2.1065070227427275e-06, + "loss": 0.2785, + "step": 24071 + }, + { + "epoch": 1.1276525975546916, + "grad_norm": 0.5648833419868291, + "learning_rate": 2.1063197376430894e-06, + "loss": 0.284, + "step": 24072 + }, + { + "epoch": 1.1276994425446198, + "grad_norm": 0.6010153549149495, + "learning_rate": 2.106132454808982e-06, + "loss": 0.2705, + "step": 24073 + }, + { + "epoch": 1.127746287534548, + "grad_norm": 0.5558981167717758, + "learning_rate": 2.1059451742414834e-06, + "loss": 0.2721, + "step": 24074 + }, + { + "epoch": 1.1277931325244765, + "grad_norm": 0.6010901652541312, + "learning_rate": 2.105757895941671e-06, + "loss": 0.2871, + "step": 24075 + }, + { + "epoch": 1.1278399775144048, + "grad_norm": 0.5528585628659591, + "learning_rate": 2.105570619910623e-06, + "loss": 0.2639, + "step": 24076 + }, + { + "epoch": 1.1278868225043333, + "grad_norm": 0.5929174787386966, + "learning_rate": 2.1053833461494184e-06, + "loss": 0.2757, + "step": 24077 + }, + { + "epoch": 1.1279336674942615, + "grad_norm": 0.5627405260040763, + "learning_rate": 2.1051960746591315e-06, + "loss": 0.2686, + "step": 24078 + }, + { + "epoch": 1.1279805124841897, + "grad_norm": 0.5474938936343587, + "learning_rate": 2.1050088054408428e-06, + "loss": 0.2598, + "step": 24079 + }, + { + "epoch": 1.1280273574741182, + "grad_norm": 0.5524754492637638, + "learning_rate": 2.1048215384956288e-06, + "loss": 0.2625, + "step": 24080 + }, + { + "epoch": 1.1280742024640464, + "grad_norm": 0.6083239728263478, + "learning_rate": 2.104634273824568e-06, + "loss": 0.2846, + "step": 24081 + }, + { + "epoch": 1.1281210474539747, + "grad_norm": 0.6218182687723364, + "learning_rate": 2.1044470114287373e-06, + "loss": 0.2748, + "step": 24082 + }, + { + "epoch": 1.1281678924439031, + "grad_norm": 0.6252040515302976, + "learning_rate": 2.1042597513092154e-06, + "loss": 0.2994, + "step": 24083 + }, + { + "epoch": 1.1282147374338314, + "grad_norm": 0.552478310162137, + "learning_rate": 2.104072493467078e-06, + "loss": 0.2705, + "step": 24084 + }, + { + "epoch": 1.1282615824237598, + "grad_norm": 0.5857099364856313, + "learning_rate": 2.1038852379034043e-06, + "loss": 0.2726, + "step": 24085 + }, + { + "epoch": 1.128308427413688, + "grad_norm": 0.5708675980237959, + "learning_rate": 2.1036979846192715e-06, + "loss": 0.2835, + "step": 24086 + }, + { + "epoch": 1.1283552724036165, + "grad_norm": 0.6210156589029352, + "learning_rate": 2.1035107336157567e-06, + "loss": 0.2823, + "step": 24087 + }, + { + "epoch": 1.1284021173935448, + "grad_norm": 0.5905752300513116, + "learning_rate": 2.103323484893938e-06, + "loss": 0.2773, + "step": 24088 + }, + { + "epoch": 1.128448962383473, + "grad_norm": 0.5914439259234958, + "learning_rate": 2.1031362384548946e-06, + "loss": 0.2836, + "step": 24089 + }, + { + "epoch": 1.1284958073734015, + "grad_norm": 0.638796042693367, + "learning_rate": 2.1029489942997013e-06, + "loss": 0.2689, + "step": 24090 + }, + { + "epoch": 1.1285426523633297, + "grad_norm": 0.5874598291046567, + "learning_rate": 2.1027617524294366e-06, + "loss": 0.2723, + "step": 24091 + }, + { + "epoch": 1.128589497353258, + "grad_norm": 0.583144535969907, + "learning_rate": 2.102574512845178e-06, + "loss": 0.2549, + "step": 24092 + }, + { + "epoch": 1.1286363423431864, + "grad_norm": 0.6029784308998879, + "learning_rate": 2.1023872755480033e-06, + "loss": 0.2805, + "step": 24093 + }, + { + "epoch": 1.1286831873331147, + "grad_norm": 0.5542653855740365, + "learning_rate": 2.1022000405389896e-06, + "loss": 0.2637, + "step": 24094 + }, + { + "epoch": 1.1287300323230431, + "grad_norm": 0.5899200636465237, + "learning_rate": 2.102012807819216e-06, + "loss": 0.2629, + "step": 24095 + }, + { + "epoch": 1.1287768773129714, + "grad_norm": 0.5962813954170645, + "learning_rate": 2.1018255773897574e-06, + "loss": 0.2818, + "step": 24096 + }, + { + "epoch": 1.1288237223028996, + "grad_norm": 0.6110109893478264, + "learning_rate": 2.1016383492516933e-06, + "loss": 0.2615, + "step": 24097 + }, + { + "epoch": 1.128870567292828, + "grad_norm": 0.6439444355947423, + "learning_rate": 2.1014511234060993e-06, + "loss": 0.2671, + "step": 24098 + }, + { + "epoch": 1.1289174122827563, + "grad_norm": 0.6236672583950349, + "learning_rate": 2.1012638998540546e-06, + "loss": 0.2635, + "step": 24099 + }, + { + "epoch": 1.1289642572726848, + "grad_norm": 0.6003718594469113, + "learning_rate": 2.101076678596636e-06, + "loss": 0.2798, + "step": 24100 + }, + { + "epoch": 1.129011102262613, + "grad_norm": 0.6155142585097995, + "learning_rate": 2.1008894596349216e-06, + "loss": 0.2837, + "step": 24101 + }, + { + "epoch": 1.1290579472525413, + "grad_norm": 0.5937187149132948, + "learning_rate": 2.100702242969987e-06, + "loss": 0.2766, + "step": 24102 + }, + { + "epoch": 1.1291047922424697, + "grad_norm": 0.5273972486300466, + "learning_rate": 2.1005150286029104e-06, + "loss": 0.251, + "step": 24103 + }, + { + "epoch": 1.129151637232398, + "grad_norm": 0.6195445342871742, + "learning_rate": 2.10032781653477e-06, + "loss": 0.2559, + "step": 24104 + }, + { + "epoch": 1.1291984822223262, + "grad_norm": 0.5774139258195493, + "learning_rate": 2.100140606766642e-06, + "loss": 0.2727, + "step": 24105 + }, + { + "epoch": 1.1292453272122547, + "grad_norm": 0.5875269050303508, + "learning_rate": 2.0999533992996047e-06, + "loss": 0.264, + "step": 24106 + }, + { + "epoch": 1.129292172202183, + "grad_norm": 0.5594215631384042, + "learning_rate": 2.0997661941347356e-06, + "loss": 0.2664, + "step": 24107 + }, + { + "epoch": 1.1293390171921114, + "grad_norm": 0.6019205977968359, + "learning_rate": 2.0995789912731113e-06, + "loss": 0.2852, + "step": 24108 + }, + { + "epoch": 1.1293858621820396, + "grad_norm": 0.6126387923973728, + "learning_rate": 2.0993917907158083e-06, + "loss": 0.2747, + "step": 24109 + }, + { + "epoch": 1.1294327071719679, + "grad_norm": 0.6187528854835821, + "learning_rate": 2.099204592463906e-06, + "loss": 0.2676, + "step": 24110 + }, + { + "epoch": 1.1294795521618963, + "grad_norm": 0.6115048135153445, + "learning_rate": 2.099017396518479e-06, + "loss": 0.2787, + "step": 24111 + }, + { + "epoch": 1.1295263971518246, + "grad_norm": 0.5487278478473813, + "learning_rate": 2.098830202880607e-06, + "loss": 0.258, + "step": 24112 + }, + { + "epoch": 1.129573242141753, + "grad_norm": 0.6020179643210976, + "learning_rate": 2.098643011551367e-06, + "loss": 0.2639, + "step": 24113 + }, + { + "epoch": 1.1296200871316813, + "grad_norm": 0.6114734822204483, + "learning_rate": 2.0984558225318354e-06, + "loss": 0.2797, + "step": 24114 + }, + { + "epoch": 1.1296669321216095, + "grad_norm": 0.594991037493843, + "learning_rate": 2.098268635823089e-06, + "loss": 0.272, + "step": 24115 + }, + { + "epoch": 1.129713777111538, + "grad_norm": 0.5738930978380579, + "learning_rate": 2.0980814514262054e-06, + "loss": 0.2721, + "step": 24116 + }, + { + "epoch": 1.1297606221014662, + "grad_norm": 0.5699385566132739, + "learning_rate": 2.097894269342263e-06, + "loss": 0.2723, + "step": 24117 + }, + { + "epoch": 1.1298074670913945, + "grad_norm": 0.5983147668478331, + "learning_rate": 2.097707089572337e-06, + "loss": 0.2746, + "step": 24118 + }, + { + "epoch": 1.129854312081323, + "grad_norm": 0.599184389471038, + "learning_rate": 2.097519912117507e-06, + "loss": 0.2722, + "step": 24119 + }, + { + "epoch": 1.1299011570712512, + "grad_norm": 0.5849822546400467, + "learning_rate": 2.0973327369788473e-06, + "loss": 0.2671, + "step": 24120 + }, + { + "epoch": 1.1299480020611796, + "grad_norm": 0.6357189192865431, + "learning_rate": 2.0971455641574375e-06, + "loss": 0.2851, + "step": 24121 + }, + { + "epoch": 1.1299948470511079, + "grad_norm": 0.618303095089122, + "learning_rate": 2.0969583936543526e-06, + "loss": 0.278, + "step": 24122 + }, + { + "epoch": 1.1300416920410363, + "grad_norm": 0.6190182655201714, + "learning_rate": 2.0967712254706713e-06, + "loss": 0.2829, + "step": 24123 + }, + { + "epoch": 1.1300885370309646, + "grad_norm": 0.5598302015297878, + "learning_rate": 2.0965840596074707e-06, + "loss": 0.273, + "step": 24124 + }, + { + "epoch": 1.1301353820208928, + "grad_norm": 0.6140391097021177, + "learning_rate": 2.0963968960658277e-06, + "loss": 0.2958, + "step": 24125 + }, + { + "epoch": 1.1301822270108213, + "grad_norm": 0.5969542304606539, + "learning_rate": 2.0962097348468185e-06, + "loss": 0.2694, + "step": 24126 + }, + { + "epoch": 1.1302290720007495, + "grad_norm": 0.5745471733013702, + "learning_rate": 2.0960225759515206e-06, + "loss": 0.2696, + "step": 24127 + }, + { + "epoch": 1.1302759169906778, + "grad_norm": 0.590119014983571, + "learning_rate": 2.0958354193810117e-06, + "loss": 0.2744, + "step": 24128 + }, + { + "epoch": 1.1303227619806062, + "grad_norm": 0.6155145721138724, + "learning_rate": 2.095648265136368e-06, + "loss": 0.2885, + "step": 24129 + }, + { + "epoch": 1.1303696069705345, + "grad_norm": 0.5692900564019061, + "learning_rate": 2.0954611132186666e-06, + "loss": 0.2806, + "step": 24130 + }, + { + "epoch": 1.130416451960463, + "grad_norm": 0.6018024555534538, + "learning_rate": 2.095273963628986e-06, + "loss": 0.2882, + "step": 24131 + }, + { + "epoch": 1.1304632969503912, + "grad_norm": 0.6606230800834557, + "learning_rate": 2.095086816368402e-06, + "loss": 0.3148, + "step": 24132 + }, + { + "epoch": 1.1305101419403194, + "grad_norm": 0.5942352029833686, + "learning_rate": 2.0948996714379904e-06, + "loss": 0.2894, + "step": 24133 + }, + { + "epoch": 1.1305569869302479, + "grad_norm": 0.5557494467641433, + "learning_rate": 2.0947125288388296e-06, + "loss": 0.2574, + "step": 24134 + }, + { + "epoch": 1.130603831920176, + "grad_norm": 0.5561587930038482, + "learning_rate": 2.094525388571997e-06, + "loss": 0.2749, + "step": 24135 + }, + { + "epoch": 1.1306506769101046, + "grad_norm": 0.6959532650515339, + "learning_rate": 2.094338250638568e-06, + "loss": 0.2866, + "step": 24136 + }, + { + "epoch": 1.1306975219000328, + "grad_norm": 0.5686120159681111, + "learning_rate": 2.094151115039621e-06, + "loss": 0.2744, + "step": 24137 + }, + { + "epoch": 1.130744366889961, + "grad_norm": 0.6020373538990373, + "learning_rate": 2.0939639817762327e-06, + "loss": 0.2822, + "step": 24138 + }, + { + "epoch": 1.1307912118798895, + "grad_norm": 0.652236226482415, + "learning_rate": 2.0937768508494795e-06, + "loss": 0.2761, + "step": 24139 + }, + { + "epoch": 1.1308380568698178, + "grad_norm": 0.5831460247216101, + "learning_rate": 2.093589722260438e-06, + "loss": 0.2765, + "step": 24140 + }, + { + "epoch": 1.130884901859746, + "grad_norm": 0.5694181430920955, + "learning_rate": 2.093402596010185e-06, + "loss": 0.2552, + "step": 24141 + }, + { + "epoch": 1.1309317468496745, + "grad_norm": 0.6057226362673526, + "learning_rate": 2.0932154720997986e-06, + "loss": 0.2837, + "step": 24142 + }, + { + "epoch": 1.1309785918396027, + "grad_norm": 0.6047844576420267, + "learning_rate": 2.0930283505303547e-06, + "loss": 0.2678, + "step": 24143 + }, + { + "epoch": 1.1310254368295312, + "grad_norm": 0.6101214032991648, + "learning_rate": 2.092841231302931e-06, + "loss": 0.2702, + "step": 24144 + }, + { + "epoch": 1.1310722818194594, + "grad_norm": 0.588489485678189, + "learning_rate": 2.092654114418603e-06, + "loss": 0.2793, + "step": 24145 + }, + { + "epoch": 1.1311191268093876, + "grad_norm": 0.5872371215835631, + "learning_rate": 2.0924669998784487e-06, + "loss": 0.2748, + "step": 24146 + }, + { + "epoch": 1.131165971799316, + "grad_norm": 0.5848358527353184, + "learning_rate": 2.0922798876835436e-06, + "loss": 0.2714, + "step": 24147 + }, + { + "epoch": 1.1312128167892443, + "grad_norm": 0.5422911096481483, + "learning_rate": 2.092092777834966e-06, + "loss": 0.2552, + "step": 24148 + }, + { + "epoch": 1.1312596617791728, + "grad_norm": 0.5605702779465968, + "learning_rate": 2.091905670333791e-06, + "loss": 0.2551, + "step": 24149 + }, + { + "epoch": 1.131306506769101, + "grad_norm": 0.6179815620162455, + "learning_rate": 2.091718565181098e-06, + "loss": 0.2999, + "step": 24150 + }, + { + "epoch": 1.1313533517590293, + "grad_norm": 0.5912612826099374, + "learning_rate": 2.0915314623779603e-06, + "loss": 0.2701, + "step": 24151 + }, + { + "epoch": 1.1314001967489578, + "grad_norm": 0.5582139582562015, + "learning_rate": 2.0913443619254577e-06, + "loss": 0.2716, + "step": 24152 + }, + { + "epoch": 1.131447041738886, + "grad_norm": 0.5606903282159028, + "learning_rate": 2.0911572638246646e-06, + "loss": 0.2571, + "step": 24153 + }, + { + "epoch": 1.1314938867288142, + "grad_norm": 0.5794682976288221, + "learning_rate": 2.0909701680766584e-06, + "loss": 0.29, + "step": 24154 + }, + { + "epoch": 1.1315407317187427, + "grad_norm": 0.5490934903715026, + "learning_rate": 2.090783074682517e-06, + "loss": 0.2595, + "step": 24155 + }, + { + "epoch": 1.131587576708671, + "grad_norm": 0.5979402228191931, + "learning_rate": 2.090595983643316e-06, + "loss": 0.2872, + "step": 24156 + }, + { + "epoch": 1.1316344216985994, + "grad_norm": 0.5799955685715005, + "learning_rate": 2.0904088949601324e-06, + "loss": 0.2787, + "step": 24157 + }, + { + "epoch": 1.1316812666885276, + "grad_norm": 0.5760002116411427, + "learning_rate": 2.0902218086340417e-06, + "loss": 0.2842, + "step": 24158 + }, + { + "epoch": 1.131728111678456, + "grad_norm": 0.5754804654033997, + "learning_rate": 2.0900347246661225e-06, + "loss": 0.2607, + "step": 24159 + }, + { + "epoch": 1.1317749566683843, + "grad_norm": 0.5857765785968275, + "learning_rate": 2.0898476430574496e-06, + "loss": 0.2544, + "step": 24160 + }, + { + "epoch": 1.1318218016583126, + "grad_norm": 0.577009964570567, + "learning_rate": 2.0896605638091006e-06, + "loss": 0.2541, + "step": 24161 + }, + { + "epoch": 1.131868646648241, + "grad_norm": 0.5834910365213334, + "learning_rate": 2.0894734869221527e-06, + "loss": 0.2747, + "step": 24162 + }, + { + "epoch": 1.1319154916381693, + "grad_norm": 0.5666301275149628, + "learning_rate": 2.0892864123976814e-06, + "loss": 0.2666, + "step": 24163 + }, + { + "epoch": 1.1319623366280975, + "grad_norm": 0.5682934396985924, + "learning_rate": 2.0890993402367633e-06, + "loss": 0.2704, + "step": 24164 + }, + { + "epoch": 1.132009181618026, + "grad_norm": 0.5822177015069233, + "learning_rate": 2.0889122704404748e-06, + "loss": 0.2732, + "step": 24165 + }, + { + "epoch": 1.1320560266079542, + "grad_norm": 0.5682561301556662, + "learning_rate": 2.088725203009894e-06, + "loss": 0.2677, + "step": 24166 + }, + { + "epoch": 1.1321028715978827, + "grad_norm": 0.60347063181186, + "learning_rate": 2.088538137946095e-06, + "loss": 0.2541, + "step": 24167 + }, + { + "epoch": 1.132149716587811, + "grad_norm": 0.5593305817841564, + "learning_rate": 2.0883510752501566e-06, + "loss": 0.261, + "step": 24168 + }, + { + "epoch": 1.1321965615777392, + "grad_norm": 0.5310344718296356, + "learning_rate": 2.088164014923154e-06, + "loss": 0.2496, + "step": 24169 + }, + { + "epoch": 1.1322434065676676, + "grad_norm": 0.622958965995221, + "learning_rate": 2.087976956966164e-06, + "loss": 0.2886, + "step": 24170 + }, + { + "epoch": 1.1322902515575959, + "grad_norm": 0.5767497901390205, + "learning_rate": 2.0877899013802626e-06, + "loss": 0.2685, + "step": 24171 + }, + { + "epoch": 1.1323370965475243, + "grad_norm": 0.569835668525536, + "learning_rate": 2.0876028481665266e-06, + "loss": 0.2601, + "step": 24172 + }, + { + "epoch": 1.1323839415374526, + "grad_norm": 0.6033862240488624, + "learning_rate": 2.0874157973260333e-06, + "loss": 0.2788, + "step": 24173 + }, + { + "epoch": 1.1324307865273808, + "grad_norm": 0.5581356973539452, + "learning_rate": 2.087228748859859e-06, + "loss": 0.2579, + "step": 24174 + }, + { + "epoch": 1.1324776315173093, + "grad_norm": 0.6094749087714894, + "learning_rate": 2.0870417027690783e-06, + "loss": 0.2847, + "step": 24175 + }, + { + "epoch": 1.1325244765072375, + "grad_norm": 0.6083539282260357, + "learning_rate": 2.0868546590547686e-06, + "loss": 0.2815, + "step": 24176 + }, + { + "epoch": 1.1325713214971658, + "grad_norm": 0.5571218839795992, + "learning_rate": 2.0866676177180074e-06, + "loss": 0.264, + "step": 24177 + }, + { + "epoch": 1.1326181664870942, + "grad_norm": 0.5538280667946848, + "learning_rate": 2.086480578759869e-06, + "loss": 0.2751, + "step": 24178 + }, + { + "epoch": 1.1326650114770225, + "grad_norm": 0.6194533002128082, + "learning_rate": 2.086293542181432e-06, + "loss": 0.2846, + "step": 24179 + }, + { + "epoch": 1.132711856466951, + "grad_norm": 0.6237265580987308, + "learning_rate": 2.086106507983772e-06, + "loss": 0.2829, + "step": 24180 + }, + { + "epoch": 1.1327587014568792, + "grad_norm": 0.5548964553280398, + "learning_rate": 2.0859194761679642e-06, + "loss": 0.2676, + "step": 24181 + }, + { + "epoch": 1.1328055464468074, + "grad_norm": 0.5923492755732027, + "learning_rate": 2.085732446735086e-06, + "loss": 0.2717, + "step": 24182 + }, + { + "epoch": 1.1328523914367359, + "grad_norm": 0.5647464608212952, + "learning_rate": 2.085545419686213e-06, + "loss": 0.2791, + "step": 24183 + }, + { + "epoch": 1.1328992364266641, + "grad_norm": 0.6966987947613429, + "learning_rate": 2.0853583950224226e-06, + "loss": 0.3006, + "step": 24184 + }, + { + "epoch": 1.1329460814165926, + "grad_norm": 0.6408819892738283, + "learning_rate": 2.0851713727447896e-06, + "loss": 0.2933, + "step": 24185 + }, + { + "epoch": 1.1329929264065208, + "grad_norm": 0.5761339798966849, + "learning_rate": 2.0849843528543927e-06, + "loss": 0.2672, + "step": 24186 + }, + { + "epoch": 1.133039771396449, + "grad_norm": 0.5540835771241304, + "learning_rate": 2.084797335352305e-06, + "loss": 0.2584, + "step": 24187 + }, + { + "epoch": 1.1330866163863775, + "grad_norm": 0.5782525608506232, + "learning_rate": 2.084610320239605e-06, + "loss": 0.2676, + "step": 24188 + }, + { + "epoch": 1.1331334613763058, + "grad_norm": 0.6574291970575621, + "learning_rate": 2.0844233075173676e-06, + "loss": 0.2921, + "step": 24189 + }, + { + "epoch": 1.133180306366234, + "grad_norm": 0.5962674376544438, + "learning_rate": 2.0842362971866697e-06, + "loss": 0.2872, + "step": 24190 + }, + { + "epoch": 1.1332271513561625, + "grad_norm": 0.5933258901538977, + "learning_rate": 2.084049289248588e-06, + "loss": 0.2916, + "step": 24191 + }, + { + "epoch": 1.1332739963460907, + "grad_norm": 0.6860856954033466, + "learning_rate": 2.0838622837041984e-06, + "loss": 0.3063, + "step": 24192 + }, + { + "epoch": 1.1333208413360192, + "grad_norm": 0.6383697548149073, + "learning_rate": 2.083675280554576e-06, + "loss": 0.301, + "step": 24193 + }, + { + "epoch": 1.1333676863259474, + "grad_norm": 0.6145795670746852, + "learning_rate": 2.0834882798007976e-06, + "loss": 0.278, + "step": 24194 + }, + { + "epoch": 1.1334145313158759, + "grad_norm": 0.5848455304791802, + "learning_rate": 2.0833012814439396e-06, + "loss": 0.2818, + "step": 24195 + }, + { + "epoch": 1.1334613763058041, + "grad_norm": 0.6181722495781287, + "learning_rate": 2.083114285485078e-06, + "loss": 0.295, + "step": 24196 + }, + { + "epoch": 1.1335082212957324, + "grad_norm": 0.5762637726357425, + "learning_rate": 2.082927291925289e-06, + "loss": 0.2784, + "step": 24197 + }, + { + "epoch": 1.1335550662856608, + "grad_norm": 0.5819123861112153, + "learning_rate": 2.0827403007656492e-06, + "loss": 0.2724, + "step": 24198 + }, + { + "epoch": 1.133601911275589, + "grad_norm": 0.5402073146754376, + "learning_rate": 2.0825533120072335e-06, + "loss": 0.2584, + "step": 24199 + }, + { + "epoch": 1.1336487562655173, + "grad_norm": 0.5861215241306911, + "learning_rate": 2.0823663256511183e-06, + "loss": 0.2829, + "step": 24200 + }, + { + "epoch": 1.1336956012554458, + "grad_norm": 0.6905088946462138, + "learning_rate": 2.082179341698381e-06, + "loss": 0.2754, + "step": 24201 + }, + { + "epoch": 1.133742446245374, + "grad_norm": 0.5808189194139911, + "learning_rate": 2.081992360150095e-06, + "loss": 0.2719, + "step": 24202 + }, + { + "epoch": 1.1337892912353025, + "grad_norm": 0.5531785226637956, + "learning_rate": 2.0818053810073383e-06, + "loss": 0.2667, + "step": 24203 + }, + { + "epoch": 1.1338361362252307, + "grad_norm": 0.5919418746786649, + "learning_rate": 2.0816184042711877e-06, + "loss": 0.2874, + "step": 24204 + }, + { + "epoch": 1.133882981215159, + "grad_norm": 0.5994117507189393, + "learning_rate": 2.0814314299427177e-06, + "loss": 0.2709, + "step": 24205 + }, + { + "epoch": 1.1339298262050874, + "grad_norm": 0.5804581860970379, + "learning_rate": 2.081244458023004e-06, + "loss": 0.2776, + "step": 24206 + }, + { + "epoch": 1.1339766711950157, + "grad_norm": 0.6020472398980113, + "learning_rate": 2.081057488513123e-06, + "loss": 0.2908, + "step": 24207 + }, + { + "epoch": 1.1340235161849441, + "grad_norm": 0.5840210686988793, + "learning_rate": 2.080870521414151e-06, + "loss": 0.261, + "step": 24208 + }, + { + "epoch": 1.1340703611748724, + "grad_norm": 0.5359281878138622, + "learning_rate": 2.080683556727164e-06, + "loss": 0.2491, + "step": 24209 + }, + { + "epoch": 1.1341172061648006, + "grad_norm": 0.5861191197837367, + "learning_rate": 2.0804965944532375e-06, + "loss": 0.2696, + "step": 24210 + }, + { + "epoch": 1.134164051154729, + "grad_norm": 0.5362999440449004, + "learning_rate": 2.080309634593449e-06, + "loss": 0.2483, + "step": 24211 + }, + { + "epoch": 1.1342108961446573, + "grad_norm": 0.6049004378215359, + "learning_rate": 2.0801226771488717e-06, + "loss": 0.2687, + "step": 24212 + }, + { + "epoch": 1.1342577411345856, + "grad_norm": 0.578834774602949, + "learning_rate": 2.0799357221205827e-06, + "loss": 0.2737, + "step": 24213 + }, + { + "epoch": 1.134304586124514, + "grad_norm": 0.5810903330743927, + "learning_rate": 2.0797487695096584e-06, + "loss": 0.2691, + "step": 24214 + }, + { + "epoch": 1.1343514311144423, + "grad_norm": 0.5752339463752226, + "learning_rate": 2.0795618193171745e-06, + "loss": 0.2628, + "step": 24215 + }, + { + "epoch": 1.1343982761043707, + "grad_norm": 0.6461740963721925, + "learning_rate": 2.0793748715442063e-06, + "loss": 0.287, + "step": 24216 + }, + { + "epoch": 1.134445121094299, + "grad_norm": 0.6204203098271808, + "learning_rate": 2.0791879261918313e-06, + "loss": 0.2973, + "step": 24217 + }, + { + "epoch": 1.1344919660842272, + "grad_norm": 0.578473792831068, + "learning_rate": 2.0790009832611222e-06, + "loss": 0.2789, + "step": 24218 + }, + { + "epoch": 1.1345388110741557, + "grad_norm": 0.5644517701366796, + "learning_rate": 2.078814042753158e-06, + "loss": 0.2809, + "step": 24219 + }, + { + "epoch": 1.134585656064084, + "grad_norm": 0.5911614812245981, + "learning_rate": 2.078627104669012e-06, + "loss": 0.2792, + "step": 24220 + }, + { + "epoch": 1.1346325010540124, + "grad_norm": 0.6083204087354658, + "learning_rate": 2.078440169009762e-06, + "loss": 0.2819, + "step": 24221 + }, + { + "epoch": 1.1346793460439406, + "grad_norm": 0.5498915563492316, + "learning_rate": 2.0782532357764822e-06, + "loss": 0.2531, + "step": 24222 + }, + { + "epoch": 1.1347261910338688, + "grad_norm": 0.5590539548153017, + "learning_rate": 2.07806630497025e-06, + "loss": 0.2646, + "step": 24223 + }, + { + "epoch": 1.1347730360237973, + "grad_norm": 0.563350983765125, + "learning_rate": 2.0778793765921393e-06, + "loss": 0.2742, + "step": 24224 + }, + { + "epoch": 1.1348198810137256, + "grad_norm": 0.5645590454147797, + "learning_rate": 2.0776924506432266e-06, + "loss": 0.2765, + "step": 24225 + }, + { + "epoch": 1.1348667260036538, + "grad_norm": 0.5540511822804077, + "learning_rate": 2.0775055271245882e-06, + "loss": 0.2635, + "step": 24226 + }, + { + "epoch": 1.1349135709935823, + "grad_norm": 0.6058524348159302, + "learning_rate": 2.0773186060372986e-06, + "loss": 0.2667, + "step": 24227 + }, + { + "epoch": 1.1349604159835105, + "grad_norm": 0.603616854528683, + "learning_rate": 2.077131687382435e-06, + "loss": 0.2758, + "step": 24228 + }, + { + "epoch": 1.135007260973439, + "grad_norm": 0.5774199938278518, + "learning_rate": 2.076944771161072e-06, + "loss": 0.2739, + "step": 24229 + }, + { + "epoch": 1.1350541059633672, + "grad_norm": 0.5830287051787665, + "learning_rate": 2.0767578573742857e-06, + "loss": 0.2815, + "step": 24230 + }, + { + "epoch": 1.1351009509532957, + "grad_norm": 0.6021836326545476, + "learning_rate": 2.076570946023151e-06, + "loss": 0.2927, + "step": 24231 + }, + { + "epoch": 1.135147795943224, + "grad_norm": 0.5966468399617018, + "learning_rate": 2.0763840371087444e-06, + "loss": 0.2781, + "step": 24232 + }, + { + "epoch": 1.1351946409331521, + "grad_norm": 0.5820177036966566, + "learning_rate": 2.076197130632141e-06, + "loss": 0.2626, + "step": 24233 + }, + { + "epoch": 1.1352414859230806, + "grad_norm": 0.5509343960117681, + "learning_rate": 2.0760102265944165e-06, + "loss": 0.2708, + "step": 24234 + }, + { + "epoch": 1.1352883309130088, + "grad_norm": 0.5638540760575793, + "learning_rate": 2.0758233249966478e-06, + "loss": 0.268, + "step": 24235 + }, + { + "epoch": 1.135335175902937, + "grad_norm": 0.5885403014561035, + "learning_rate": 2.075636425839908e-06, + "loss": 0.2572, + "step": 24236 + }, + { + "epoch": 1.1353820208928656, + "grad_norm": 0.5923398061958027, + "learning_rate": 2.0754495291252745e-06, + "loss": 0.2794, + "step": 24237 + }, + { + "epoch": 1.1354288658827938, + "grad_norm": 0.5512986550137751, + "learning_rate": 2.0752626348538215e-06, + "loss": 0.2605, + "step": 24238 + }, + { + "epoch": 1.1354757108727223, + "grad_norm": 0.6240837737829569, + "learning_rate": 2.0750757430266257e-06, + "loss": 0.2912, + "step": 24239 + }, + { + "epoch": 1.1355225558626505, + "grad_norm": 0.5669617710724286, + "learning_rate": 2.0748888536447623e-06, + "loss": 0.2866, + "step": 24240 + }, + { + "epoch": 1.1355694008525787, + "grad_norm": 0.5818242634179752, + "learning_rate": 2.0747019667093075e-06, + "loss": 0.2678, + "step": 24241 + }, + { + "epoch": 1.1356162458425072, + "grad_norm": 0.6201695081486139, + "learning_rate": 2.0745150822213344e-06, + "loss": 0.2769, + "step": 24242 + }, + { + "epoch": 1.1356630908324354, + "grad_norm": 0.5510872850553019, + "learning_rate": 2.0743282001819207e-06, + "loss": 0.2726, + "step": 24243 + }, + { + "epoch": 1.135709935822364, + "grad_norm": 0.5799578046079991, + "learning_rate": 2.0741413205921414e-06, + "loss": 0.2694, + "step": 24244 + }, + { + "epoch": 1.1357567808122921, + "grad_norm": 0.5716924130413933, + "learning_rate": 2.0739544434530716e-06, + "loss": 0.2641, + "step": 24245 + }, + { + "epoch": 1.1358036258022204, + "grad_norm": 0.6230323137418079, + "learning_rate": 2.0737675687657873e-06, + "loss": 0.2822, + "step": 24246 + }, + { + "epoch": 1.1358504707921488, + "grad_norm": 0.5978703298206686, + "learning_rate": 2.073580696531363e-06, + "loss": 0.2705, + "step": 24247 + }, + { + "epoch": 1.135897315782077, + "grad_norm": 0.6012877947983484, + "learning_rate": 2.073393826750875e-06, + "loss": 0.2935, + "step": 24248 + }, + { + "epoch": 1.1359441607720053, + "grad_norm": 0.5540021893218458, + "learning_rate": 2.073206959425398e-06, + "loss": 0.2587, + "step": 24249 + }, + { + "epoch": 1.1359910057619338, + "grad_norm": 0.5319688434193469, + "learning_rate": 2.0730200945560082e-06, + "loss": 0.2675, + "step": 24250 + }, + { + "epoch": 1.136037850751862, + "grad_norm": 0.5854036984186199, + "learning_rate": 2.0728332321437793e-06, + "loss": 0.2824, + "step": 24251 + }, + { + "epoch": 1.1360846957417905, + "grad_norm": 0.6145669467546383, + "learning_rate": 2.0726463721897883e-06, + "loss": 0.2872, + "step": 24252 + }, + { + "epoch": 1.1361315407317187, + "grad_norm": 0.5957207487925779, + "learning_rate": 2.072459514695111e-06, + "loss": 0.266, + "step": 24253 + }, + { + "epoch": 1.136178385721647, + "grad_norm": 0.6325129370904778, + "learning_rate": 2.0722726596608213e-06, + "loss": 0.3014, + "step": 24254 + }, + { + "epoch": 1.1362252307115754, + "grad_norm": 0.6195132352604461, + "learning_rate": 2.072085807087994e-06, + "loss": 0.2747, + "step": 24255 + }, + { + "epoch": 1.1362720757015037, + "grad_norm": 0.6469960801554658, + "learning_rate": 2.0718989569777056e-06, + "loss": 0.2901, + "step": 24256 + }, + { + "epoch": 1.1363189206914321, + "grad_norm": 0.6284876828179738, + "learning_rate": 2.0717121093310317e-06, + "loss": 0.3045, + "step": 24257 + }, + { + "epoch": 1.1363657656813604, + "grad_norm": 0.5649556423641205, + "learning_rate": 2.071525264149046e-06, + "loss": 0.2527, + "step": 24258 + }, + { + "epoch": 1.1364126106712886, + "grad_norm": 0.6191565342690154, + "learning_rate": 2.0713384214328265e-06, + "loss": 0.2969, + "step": 24259 + }, + { + "epoch": 1.136459455661217, + "grad_norm": 0.5701900407632844, + "learning_rate": 2.0711515811834455e-06, + "loss": 0.2836, + "step": 24260 + }, + { + "epoch": 1.1365063006511453, + "grad_norm": 0.6166837397245425, + "learning_rate": 2.0709647434019792e-06, + "loss": 0.2874, + "step": 24261 + }, + { + "epoch": 1.1365531456410736, + "grad_norm": 0.6412491211328369, + "learning_rate": 2.070777908089503e-06, + "loss": 0.2856, + "step": 24262 + }, + { + "epoch": 1.136599990631002, + "grad_norm": 0.5952271515647907, + "learning_rate": 2.070591075247092e-06, + "loss": 0.2764, + "step": 24263 + }, + { + "epoch": 1.1366468356209303, + "grad_norm": 0.5883699040088711, + "learning_rate": 2.0704042448758217e-06, + "loss": 0.2719, + "step": 24264 + }, + { + "epoch": 1.1366936806108587, + "grad_norm": 0.6199228522658838, + "learning_rate": 2.070217416976768e-06, + "loss": 0.2853, + "step": 24265 + }, + { + "epoch": 1.136740525600787, + "grad_norm": 0.6118321772933921, + "learning_rate": 2.0700305915510036e-06, + "loss": 0.2895, + "step": 24266 + }, + { + "epoch": 1.1367873705907154, + "grad_norm": 0.595266401174217, + "learning_rate": 2.069843768599605e-06, + "loss": 0.2722, + "step": 24267 + }, + { + "epoch": 1.1368342155806437, + "grad_norm": 0.5690004130144083, + "learning_rate": 2.0696569481236486e-06, + "loss": 0.2851, + "step": 24268 + }, + { + "epoch": 1.136881060570572, + "grad_norm": 0.5583644854601952, + "learning_rate": 2.069470130124207e-06, + "loss": 0.2523, + "step": 24269 + }, + { + "epoch": 1.1369279055605004, + "grad_norm": 0.5363833589846589, + "learning_rate": 2.0692833146023565e-06, + "loss": 0.2541, + "step": 24270 + }, + { + "epoch": 1.1369747505504286, + "grad_norm": 0.5964064483769892, + "learning_rate": 2.069096501559174e-06, + "loss": 0.2707, + "step": 24271 + }, + { + "epoch": 1.1370215955403569, + "grad_norm": 0.6447953216079552, + "learning_rate": 2.0689096909957317e-06, + "loss": 0.286, + "step": 24272 + }, + { + "epoch": 1.1370684405302853, + "grad_norm": 0.6250188957217919, + "learning_rate": 2.0687228829131057e-06, + "loss": 0.2956, + "step": 24273 + }, + { + "epoch": 1.1371152855202136, + "grad_norm": 0.601588932446766, + "learning_rate": 2.068536077312371e-06, + "loss": 0.2756, + "step": 24274 + }, + { + "epoch": 1.137162130510142, + "grad_norm": 0.6307176970576965, + "learning_rate": 2.0683492741946033e-06, + "loss": 0.2756, + "step": 24275 + }, + { + "epoch": 1.1372089755000703, + "grad_norm": 0.5782155756310123, + "learning_rate": 2.068162473560876e-06, + "loss": 0.2756, + "step": 24276 + }, + { + "epoch": 1.1372558204899985, + "grad_norm": 0.5925737586360826, + "learning_rate": 2.067975675412266e-06, + "loss": 0.273, + "step": 24277 + }, + { + "epoch": 1.137302665479927, + "grad_norm": 0.580979640138191, + "learning_rate": 2.067788879749848e-06, + "loss": 0.2597, + "step": 24278 + }, + { + "epoch": 1.1373495104698552, + "grad_norm": 0.5881098339504152, + "learning_rate": 2.067602086574696e-06, + "loss": 0.278, + "step": 24279 + }, + { + "epoch": 1.1373963554597837, + "grad_norm": 0.5942093753276748, + "learning_rate": 2.0674152958878845e-06, + "loss": 0.2867, + "step": 24280 + }, + { + "epoch": 1.137443200449712, + "grad_norm": 0.5602874596148923, + "learning_rate": 2.067228507690489e-06, + "loss": 0.269, + "step": 24281 + }, + { + "epoch": 1.1374900454396402, + "grad_norm": 0.610071164741641, + "learning_rate": 2.067041721983586e-06, + "loss": 0.2794, + "step": 24282 + }, + { + "epoch": 1.1375368904295686, + "grad_norm": 0.5899843682093536, + "learning_rate": 2.0668549387682477e-06, + "loss": 0.2719, + "step": 24283 + }, + { + "epoch": 1.1375837354194969, + "grad_norm": 0.6108041050127472, + "learning_rate": 2.066668158045552e-06, + "loss": 0.2837, + "step": 24284 + }, + { + "epoch": 1.137630580409425, + "grad_norm": 0.6528170194039665, + "learning_rate": 2.066481379816571e-06, + "loss": 0.3018, + "step": 24285 + }, + { + "epoch": 1.1376774253993536, + "grad_norm": 0.585528577793981, + "learning_rate": 2.066294604082381e-06, + "loss": 0.2744, + "step": 24286 + }, + { + "epoch": 1.1377242703892818, + "grad_norm": 0.5678762944056647, + "learning_rate": 2.066107830844056e-06, + "loss": 0.2785, + "step": 24287 + }, + { + "epoch": 1.1377711153792103, + "grad_norm": 0.6179333305638579, + "learning_rate": 2.065921060102672e-06, + "loss": 0.2775, + "step": 24288 + }, + { + "epoch": 1.1378179603691385, + "grad_norm": 0.588900646207984, + "learning_rate": 2.065734291859303e-06, + "loss": 0.2782, + "step": 24289 + }, + { + "epoch": 1.1378648053590668, + "grad_norm": 0.6472076191166295, + "learning_rate": 2.0655475261150247e-06, + "loss": 0.2785, + "step": 24290 + }, + { + "epoch": 1.1379116503489952, + "grad_norm": 0.5880094117169348, + "learning_rate": 2.0653607628709103e-06, + "loss": 0.2816, + "step": 24291 + }, + { + "epoch": 1.1379584953389235, + "grad_norm": 0.5909191070071376, + "learning_rate": 2.065174002128036e-06, + "loss": 0.2808, + "step": 24292 + }, + { + "epoch": 1.138005340328852, + "grad_norm": 0.5927297168559346, + "learning_rate": 2.064987243887475e-06, + "loss": 0.2704, + "step": 24293 + }, + { + "epoch": 1.1380521853187802, + "grad_norm": 0.5909541087333706, + "learning_rate": 2.0648004881503037e-06, + "loss": 0.2863, + "step": 24294 + }, + { + "epoch": 1.1380990303087084, + "grad_norm": 0.5660425271745261, + "learning_rate": 2.0646137349175965e-06, + "loss": 0.2716, + "step": 24295 + }, + { + "epoch": 1.1381458752986369, + "grad_norm": 0.5902628629819788, + "learning_rate": 2.064426984190428e-06, + "loss": 0.2665, + "step": 24296 + }, + { + "epoch": 1.138192720288565, + "grad_norm": 0.5991266997700592, + "learning_rate": 2.0642402359698722e-06, + "loss": 0.2864, + "step": 24297 + }, + { + "epoch": 1.1382395652784933, + "grad_norm": 0.639600061612193, + "learning_rate": 2.064053490257004e-06, + "loss": 0.2797, + "step": 24298 + }, + { + "epoch": 1.1382864102684218, + "grad_norm": 0.6074632567918897, + "learning_rate": 2.0638667470528993e-06, + "loss": 0.283, + "step": 24299 + }, + { + "epoch": 1.13833325525835, + "grad_norm": 0.6850836609942246, + "learning_rate": 2.063680006358631e-06, + "loss": 0.2955, + "step": 24300 + }, + { + "epoch": 1.1383801002482785, + "grad_norm": 0.5992092359513448, + "learning_rate": 2.063493268175275e-06, + "loss": 0.2856, + "step": 24301 + }, + { + "epoch": 1.1384269452382068, + "grad_norm": 0.6013122235162126, + "learning_rate": 2.063306532503906e-06, + "loss": 0.2798, + "step": 24302 + }, + { + "epoch": 1.1384737902281352, + "grad_norm": 0.572715553387042, + "learning_rate": 2.063119799345598e-06, + "loss": 0.2675, + "step": 24303 + }, + { + "epoch": 1.1385206352180635, + "grad_norm": 0.6443198550009563, + "learning_rate": 2.0629330687014253e-06, + "loss": 0.3007, + "step": 24304 + }, + { + "epoch": 1.1385674802079917, + "grad_norm": 0.5849813684321349, + "learning_rate": 2.0627463405724627e-06, + "loss": 0.2728, + "step": 24305 + }, + { + "epoch": 1.1386143251979202, + "grad_norm": 0.6288739255518976, + "learning_rate": 2.0625596149597855e-06, + "loss": 0.2905, + "step": 24306 + }, + { + "epoch": 1.1386611701878484, + "grad_norm": 0.6088723768103655, + "learning_rate": 2.0623728918644674e-06, + "loss": 0.2502, + "step": 24307 + }, + { + "epoch": 1.1387080151777766, + "grad_norm": 0.6136680881590427, + "learning_rate": 2.0621861712875845e-06, + "loss": 0.2799, + "step": 24308 + }, + { + "epoch": 1.138754860167705, + "grad_norm": 0.606088066641531, + "learning_rate": 2.0619994532302087e-06, + "loss": 0.2706, + "step": 24309 + }, + { + "epoch": 1.1388017051576333, + "grad_norm": 0.5890765464141988, + "learning_rate": 2.061812737693417e-06, + "loss": 0.283, + "step": 24310 + }, + { + "epoch": 1.1388485501475618, + "grad_norm": 0.6366709752465018, + "learning_rate": 2.061626024678282e-06, + "loss": 0.2901, + "step": 24311 + }, + { + "epoch": 1.13889539513749, + "grad_norm": 0.5927989693679825, + "learning_rate": 2.061439314185879e-06, + "loss": 0.2886, + "step": 24312 + }, + { + "epoch": 1.1389422401274183, + "grad_norm": 0.5927662770953901, + "learning_rate": 2.061252606217283e-06, + "loss": 0.287, + "step": 24313 + }, + { + "epoch": 1.1389890851173468, + "grad_norm": 0.6250089429817745, + "learning_rate": 2.0610659007735685e-06, + "loss": 0.2836, + "step": 24314 + }, + { + "epoch": 1.139035930107275, + "grad_norm": 0.5981319708859426, + "learning_rate": 2.0608791978558085e-06, + "loss": 0.2753, + "step": 24315 + }, + { + "epoch": 1.1390827750972035, + "grad_norm": 0.5747386789788369, + "learning_rate": 2.060692497465078e-06, + "loss": 0.2634, + "step": 24316 + }, + { + "epoch": 1.1391296200871317, + "grad_norm": 0.5804054161856468, + "learning_rate": 2.060505799602453e-06, + "loss": 0.2749, + "step": 24317 + }, + { + "epoch": 1.13917646507706, + "grad_norm": 0.5871696524040981, + "learning_rate": 2.0603191042690056e-06, + "loss": 0.2931, + "step": 24318 + }, + { + "epoch": 1.1392233100669884, + "grad_norm": 0.5775438975234964, + "learning_rate": 2.0601324114658117e-06, + "loss": 0.278, + "step": 24319 + }, + { + "epoch": 1.1392701550569166, + "grad_norm": 0.6145301307767376, + "learning_rate": 2.0599457211939457e-06, + "loss": 0.2757, + "step": 24320 + }, + { + "epoch": 1.1393170000468449, + "grad_norm": 0.5790351643712435, + "learning_rate": 2.0597590334544816e-06, + "loss": 0.2665, + "step": 24321 + }, + { + "epoch": 1.1393638450367733, + "grad_norm": 0.5526847438466546, + "learning_rate": 2.0595723482484926e-06, + "loss": 0.2709, + "step": 24322 + }, + { + "epoch": 1.1394106900267016, + "grad_norm": 0.5540417608871376, + "learning_rate": 2.059385665577054e-06, + "loss": 0.2743, + "step": 24323 + }, + { + "epoch": 1.13945753501663, + "grad_norm": 0.6071148101855702, + "learning_rate": 2.0591989854412408e-06, + "loss": 0.2765, + "step": 24324 + }, + { + "epoch": 1.1395043800065583, + "grad_norm": 0.6098797527605496, + "learning_rate": 2.059012307842126e-06, + "loss": 0.2943, + "step": 24325 + }, + { + "epoch": 1.1395512249964865, + "grad_norm": 0.5956663661003522, + "learning_rate": 2.058825632780786e-06, + "loss": 0.2787, + "step": 24326 + }, + { + "epoch": 1.139598069986415, + "grad_norm": 0.6308921047447594, + "learning_rate": 2.058638960258292e-06, + "loss": 0.2827, + "step": 24327 + }, + { + "epoch": 1.1396449149763432, + "grad_norm": 0.5216096173660258, + "learning_rate": 2.058452290275721e-06, + "loss": 0.2604, + "step": 24328 + }, + { + "epoch": 1.1396917599662717, + "grad_norm": 0.6095598249710694, + "learning_rate": 2.0582656228341448e-06, + "loss": 0.2985, + "step": 24329 + }, + { + "epoch": 1.1397386049562, + "grad_norm": 0.6078075006287271, + "learning_rate": 2.0580789579346395e-06, + "loss": 0.2822, + "step": 24330 + }, + { + "epoch": 1.1397854499461282, + "grad_norm": 0.5895266081338595, + "learning_rate": 2.057892295578279e-06, + "loss": 0.2852, + "step": 24331 + }, + { + "epoch": 1.1398322949360566, + "grad_norm": 0.5857182140687527, + "learning_rate": 2.057705635766138e-06, + "loss": 0.274, + "step": 24332 + }, + { + "epoch": 1.1398791399259849, + "grad_norm": 0.658430524601869, + "learning_rate": 2.0575189784992885e-06, + "loss": 0.2869, + "step": 24333 + }, + { + "epoch": 1.1399259849159131, + "grad_norm": 0.555187312326552, + "learning_rate": 2.0573323237788064e-06, + "loss": 0.2753, + "step": 24334 + }, + { + "epoch": 1.1399728299058416, + "grad_norm": 0.5996620397507549, + "learning_rate": 2.0571456716057657e-06, + "loss": 0.2702, + "step": 24335 + }, + { + "epoch": 1.1400196748957698, + "grad_norm": 0.6405668498377411, + "learning_rate": 2.05695902198124e-06, + "loss": 0.2856, + "step": 24336 + }, + { + "epoch": 1.1400665198856983, + "grad_norm": 0.6013364070566005, + "learning_rate": 2.0567723749063044e-06, + "loss": 0.2726, + "step": 24337 + }, + { + "epoch": 1.1401133648756265, + "grad_norm": 0.5840641822348497, + "learning_rate": 2.0565857303820322e-06, + "loss": 0.2642, + "step": 24338 + }, + { + "epoch": 1.140160209865555, + "grad_norm": 0.604053572826416, + "learning_rate": 2.0563990884094976e-06, + "loss": 0.2717, + "step": 24339 + }, + { + "epoch": 1.1402070548554832, + "grad_norm": 0.6451200493982953, + "learning_rate": 2.0562124489897743e-06, + "loss": 0.2993, + "step": 24340 + }, + { + "epoch": 1.1402538998454115, + "grad_norm": 0.5931375416395501, + "learning_rate": 2.0560258121239378e-06, + "loss": 0.2628, + "step": 24341 + }, + { + "epoch": 1.14030074483534, + "grad_norm": 0.6628315356906325, + "learning_rate": 2.05583917781306e-06, + "loss": 0.2788, + "step": 24342 + }, + { + "epoch": 1.1403475898252682, + "grad_norm": 0.5899506222624258, + "learning_rate": 2.0556525460582167e-06, + "loss": 0.2763, + "step": 24343 + }, + { + "epoch": 1.1403944348151964, + "grad_norm": 0.6187646502200286, + "learning_rate": 2.0554659168604814e-06, + "loss": 0.2917, + "step": 24344 + }, + { + "epoch": 1.1404412798051249, + "grad_norm": 0.6365628779267932, + "learning_rate": 2.0552792902209283e-06, + "loss": 0.288, + "step": 24345 + }, + { + "epoch": 1.1404881247950531, + "grad_norm": 0.5662682562251351, + "learning_rate": 2.0550926661406307e-06, + "loss": 0.2607, + "step": 24346 + }, + { + "epoch": 1.1405349697849816, + "grad_norm": 0.5748373627312325, + "learning_rate": 2.0549060446206625e-06, + "loss": 0.2788, + "step": 24347 + }, + { + "epoch": 1.1405818147749098, + "grad_norm": 0.6077543247529132, + "learning_rate": 2.054719425662099e-06, + "loss": 0.2811, + "step": 24348 + }, + { + "epoch": 1.140628659764838, + "grad_norm": 0.5373561616526984, + "learning_rate": 2.0545328092660127e-06, + "loss": 0.243, + "step": 24349 + }, + { + "epoch": 1.1406755047547665, + "grad_norm": 0.5909604714251111, + "learning_rate": 2.054346195433478e-06, + "loss": 0.2615, + "step": 24350 + }, + { + "epoch": 1.1407223497446948, + "grad_norm": 0.5588694261299006, + "learning_rate": 2.0541595841655696e-06, + "loss": 0.2671, + "step": 24351 + }, + { + "epoch": 1.1407691947346232, + "grad_norm": 0.6126255745475188, + "learning_rate": 2.053972975463361e-06, + "loss": 0.2769, + "step": 24352 + }, + { + "epoch": 1.1408160397245515, + "grad_norm": 0.6210878413241949, + "learning_rate": 2.0537863693279247e-06, + "loss": 0.2703, + "step": 24353 + }, + { + "epoch": 1.1408628847144797, + "grad_norm": 0.6172991310302877, + "learning_rate": 2.053599765760336e-06, + "loss": 0.2841, + "step": 24354 + }, + { + "epoch": 1.1409097297044082, + "grad_norm": 0.5839672083410408, + "learning_rate": 2.053413164761669e-06, + "loss": 0.2602, + "step": 24355 + }, + { + "epoch": 1.1409565746943364, + "grad_norm": 0.5754202622283506, + "learning_rate": 2.0532265663329963e-06, + "loss": 0.2629, + "step": 24356 + }, + { + "epoch": 1.1410034196842647, + "grad_norm": 0.5989629518390343, + "learning_rate": 2.053039970475394e-06, + "loss": 0.2704, + "step": 24357 + }, + { + "epoch": 1.1410502646741931, + "grad_norm": 0.5761350045364457, + "learning_rate": 2.0528533771899324e-06, + "loss": 0.2708, + "step": 24358 + }, + { + "epoch": 1.1410971096641214, + "grad_norm": 0.5964388203935321, + "learning_rate": 2.052666786477688e-06, + "loss": 0.2675, + "step": 24359 + }, + { + "epoch": 1.1411439546540498, + "grad_norm": 0.5944780541790173, + "learning_rate": 2.0524801983397337e-06, + "loss": 0.2799, + "step": 24360 + }, + { + "epoch": 1.141190799643978, + "grad_norm": 0.6016555386931781, + "learning_rate": 2.052293612777143e-06, + "loss": 0.2806, + "step": 24361 + }, + { + "epoch": 1.1412376446339063, + "grad_norm": 0.5685687238682638, + "learning_rate": 2.0521070297909905e-06, + "loss": 0.2711, + "step": 24362 + }, + { + "epoch": 1.1412844896238348, + "grad_norm": 0.5786199986368148, + "learning_rate": 2.0519204493823504e-06, + "loss": 0.2892, + "step": 24363 + }, + { + "epoch": 1.141331334613763, + "grad_norm": 0.5778293724869846, + "learning_rate": 2.051733871552294e-06, + "loss": 0.263, + "step": 24364 + }, + { + "epoch": 1.1413781796036915, + "grad_norm": 0.5723947490876099, + "learning_rate": 2.0515472963018967e-06, + "loss": 0.2622, + "step": 24365 + }, + { + "epoch": 1.1414250245936197, + "grad_norm": 0.5344745855484268, + "learning_rate": 2.051360723632233e-06, + "loss": 0.263, + "step": 24366 + }, + { + "epoch": 1.141471869583548, + "grad_norm": 0.6039099646269619, + "learning_rate": 2.051174153544374e-06, + "loss": 0.2912, + "step": 24367 + }, + { + "epoch": 1.1415187145734764, + "grad_norm": 0.5492749317306691, + "learning_rate": 2.050987586039396e-06, + "loss": 0.2762, + "step": 24368 + }, + { + "epoch": 1.1415655595634047, + "grad_norm": 0.6323691141654992, + "learning_rate": 2.050801021118372e-06, + "loss": 0.2776, + "step": 24369 + }, + { + "epoch": 1.141612404553333, + "grad_norm": 0.5564550601875538, + "learning_rate": 2.0506144587823747e-06, + "loss": 0.2635, + "step": 24370 + }, + { + "epoch": 1.1416592495432614, + "grad_norm": 0.6293128992508089, + "learning_rate": 2.050427899032478e-06, + "loss": 0.2826, + "step": 24371 + }, + { + "epoch": 1.1417060945331896, + "grad_norm": 0.5776911423015455, + "learning_rate": 2.0502413418697557e-06, + "loss": 0.2734, + "step": 24372 + }, + { + "epoch": 1.141752939523118, + "grad_norm": 0.6427961187144893, + "learning_rate": 2.050054787295282e-06, + "loss": 0.2814, + "step": 24373 + }, + { + "epoch": 1.1417997845130463, + "grad_norm": 0.5648195896309203, + "learning_rate": 2.049868235310129e-06, + "loss": 0.2714, + "step": 24374 + }, + { + "epoch": 1.1418466295029748, + "grad_norm": 0.5838763614809919, + "learning_rate": 2.0496816859153724e-06, + "loss": 0.2805, + "step": 24375 + }, + { + "epoch": 1.141893474492903, + "grad_norm": 0.6235927010919684, + "learning_rate": 2.049495139112084e-06, + "loss": 0.288, + "step": 24376 + }, + { + "epoch": 1.1419403194828313, + "grad_norm": 0.6037251290964201, + "learning_rate": 2.0493085949013376e-06, + "loss": 0.2857, + "step": 24377 + }, + { + "epoch": 1.1419871644727597, + "grad_norm": 0.5907124762715301, + "learning_rate": 2.049122053284207e-06, + "loss": 0.2875, + "step": 24378 + }, + { + "epoch": 1.142034009462688, + "grad_norm": 0.6076634966129102, + "learning_rate": 2.0489355142617652e-06, + "loss": 0.2876, + "step": 24379 + }, + { + "epoch": 1.1420808544526162, + "grad_norm": 0.5733009878775727, + "learning_rate": 2.048748977835087e-06, + "loss": 0.2632, + "step": 24380 + }, + { + "epoch": 1.1421276994425447, + "grad_norm": 0.5739875675981078, + "learning_rate": 2.0485624440052456e-06, + "loss": 0.2688, + "step": 24381 + }, + { + "epoch": 1.142174544432473, + "grad_norm": 0.560536735894885, + "learning_rate": 2.0483759127733123e-06, + "loss": 0.2716, + "step": 24382 + }, + { + "epoch": 1.1422213894224014, + "grad_norm": 0.5609803633271696, + "learning_rate": 2.0481893841403624e-06, + "loss": 0.271, + "step": 24383 + }, + { + "epoch": 1.1422682344123296, + "grad_norm": 0.6067341663640556, + "learning_rate": 2.0480028581074695e-06, + "loss": 0.2688, + "step": 24384 + }, + { + "epoch": 1.1423150794022578, + "grad_norm": 0.5966221948681355, + "learning_rate": 2.0478163346757064e-06, + "loss": 0.2811, + "step": 24385 + }, + { + "epoch": 1.1423619243921863, + "grad_norm": 0.524765671316622, + "learning_rate": 2.0476298138461467e-06, + "loss": 0.2506, + "step": 24386 + }, + { + "epoch": 1.1424087693821146, + "grad_norm": 0.6146628505671036, + "learning_rate": 2.0474432956198643e-06, + "loss": 0.2729, + "step": 24387 + }, + { + "epoch": 1.142455614372043, + "grad_norm": 0.6014060325461484, + "learning_rate": 2.0472567799979313e-06, + "loss": 0.2761, + "step": 24388 + }, + { + "epoch": 1.1425024593619713, + "grad_norm": 0.5739418151088266, + "learning_rate": 2.0470702669814217e-06, + "loss": 0.2744, + "step": 24389 + }, + { + "epoch": 1.1425493043518995, + "grad_norm": 0.5882399103918318, + "learning_rate": 2.046883756571409e-06, + "loss": 0.2773, + "step": 24390 + }, + { + "epoch": 1.142596149341828, + "grad_norm": 0.5723848058313317, + "learning_rate": 2.046697248768966e-06, + "loss": 0.273, + "step": 24391 + }, + { + "epoch": 1.1426429943317562, + "grad_norm": 0.5770640218591794, + "learning_rate": 2.0465107435751666e-06, + "loss": 0.2626, + "step": 24392 + }, + { + "epoch": 1.1426898393216844, + "grad_norm": 0.5978550544789799, + "learning_rate": 2.046324240991085e-06, + "loss": 0.2866, + "step": 24393 + }, + { + "epoch": 1.142736684311613, + "grad_norm": 0.5878727179767149, + "learning_rate": 2.0461377410177926e-06, + "loss": 0.2859, + "step": 24394 + }, + { + "epoch": 1.1427835293015411, + "grad_norm": 0.5770415262169916, + "learning_rate": 2.045951243656363e-06, + "loss": 0.2836, + "step": 24395 + }, + { + "epoch": 1.1428303742914696, + "grad_norm": 0.5867258134667435, + "learning_rate": 2.04576474890787e-06, + "loss": 0.2767, + "step": 24396 + }, + { + "epoch": 1.1428772192813978, + "grad_norm": 0.5764325869301549, + "learning_rate": 2.045578256773387e-06, + "loss": 0.2793, + "step": 24397 + }, + { + "epoch": 1.142924064271326, + "grad_norm": 0.5841803660440382, + "learning_rate": 2.0453917672539865e-06, + "loss": 0.2789, + "step": 24398 + }, + { + "epoch": 1.1429709092612546, + "grad_norm": 0.5855035479211059, + "learning_rate": 2.045205280350743e-06, + "loss": 0.2606, + "step": 24399 + }, + { + "epoch": 1.1430177542511828, + "grad_norm": 0.6072609094606795, + "learning_rate": 2.0450187960647276e-06, + "loss": 0.2816, + "step": 24400 + }, + { + "epoch": 1.1430645992411113, + "grad_norm": 0.5861773726793561, + "learning_rate": 2.0448323143970158e-06, + "loss": 0.2816, + "step": 24401 + }, + { + "epoch": 1.1431114442310395, + "grad_norm": 0.5870800426825408, + "learning_rate": 2.044645835348679e-06, + "loss": 0.2818, + "step": 24402 + }, + { + "epoch": 1.1431582892209677, + "grad_norm": 0.5976849752913224, + "learning_rate": 2.0444593589207907e-06, + "loss": 0.2723, + "step": 24403 + }, + { + "epoch": 1.1432051342108962, + "grad_norm": 0.5650537413897487, + "learning_rate": 2.0442728851144247e-06, + "loss": 0.2646, + "step": 24404 + }, + { + "epoch": 1.1432519792008244, + "grad_norm": 0.6005779669335927, + "learning_rate": 2.0440864139306544e-06, + "loss": 0.3024, + "step": 24405 + }, + { + "epoch": 1.1432988241907527, + "grad_norm": 0.5623213325241949, + "learning_rate": 2.0438999453705512e-06, + "loss": 0.2816, + "step": 24406 + }, + { + "epoch": 1.1433456691806811, + "grad_norm": 0.5853694942882164, + "learning_rate": 2.0437134794351893e-06, + "loss": 0.2694, + "step": 24407 + }, + { + "epoch": 1.1433925141706094, + "grad_norm": 0.6046092736339932, + "learning_rate": 2.043527016125642e-06, + "loss": 0.293, + "step": 24408 + }, + { + "epoch": 1.1434393591605378, + "grad_norm": 0.6125888242748042, + "learning_rate": 2.0433405554429815e-06, + "loss": 0.2647, + "step": 24409 + }, + { + "epoch": 1.143486204150466, + "grad_norm": 0.5856958962384015, + "learning_rate": 2.043154097388281e-06, + "loss": 0.2725, + "step": 24410 + }, + { + "epoch": 1.1435330491403946, + "grad_norm": 0.5852043313665092, + "learning_rate": 2.042967641962615e-06, + "loss": 0.2727, + "step": 24411 + }, + { + "epoch": 1.1435798941303228, + "grad_norm": 0.5960706716250359, + "learning_rate": 2.0427811891670552e-06, + "loss": 0.2808, + "step": 24412 + }, + { + "epoch": 1.143626739120251, + "grad_norm": 0.6208056996164373, + "learning_rate": 2.042594739002674e-06, + "loss": 0.2821, + "step": 24413 + }, + { + "epoch": 1.1436735841101795, + "grad_norm": 0.6070653683950934, + "learning_rate": 2.0424082914705453e-06, + "loss": 0.2736, + "step": 24414 + }, + { + "epoch": 1.1437204291001077, + "grad_norm": 0.6257180864488541, + "learning_rate": 2.042221846571742e-06, + "loss": 0.2712, + "step": 24415 + }, + { + "epoch": 1.143767274090036, + "grad_norm": 0.5846118176782114, + "learning_rate": 2.042035404307337e-06, + "loss": 0.2749, + "step": 24416 + }, + { + "epoch": 1.1438141190799644, + "grad_norm": 0.5757525324802334, + "learning_rate": 2.0418489646784025e-06, + "loss": 0.2811, + "step": 24417 + }, + { + "epoch": 1.1438609640698927, + "grad_norm": 0.6321977711296078, + "learning_rate": 2.0416625276860137e-06, + "loss": 0.2889, + "step": 24418 + }, + { + "epoch": 1.1439078090598211, + "grad_norm": 0.5418430707898322, + "learning_rate": 2.041476093331241e-06, + "loss": 0.2623, + "step": 24419 + }, + { + "epoch": 1.1439546540497494, + "grad_norm": 0.5911509481344231, + "learning_rate": 2.041289661615157e-06, + "loss": 0.282, + "step": 24420 + }, + { + "epoch": 1.1440014990396776, + "grad_norm": 0.6471379320306367, + "learning_rate": 2.0411032325388367e-06, + "loss": 0.2754, + "step": 24421 + }, + { + "epoch": 1.144048344029606, + "grad_norm": 0.5781312856320938, + "learning_rate": 2.040916806103352e-06, + "loss": 0.2712, + "step": 24422 + }, + { + "epoch": 1.1440951890195343, + "grad_norm": 0.5611733407750437, + "learning_rate": 2.0407303823097753e-06, + "loss": 0.2722, + "step": 24423 + }, + { + "epoch": 1.1441420340094628, + "grad_norm": 0.5963442431680458, + "learning_rate": 2.040543961159181e-06, + "loss": 0.2859, + "step": 24424 + }, + { + "epoch": 1.144188878999391, + "grad_norm": 0.611998006573064, + "learning_rate": 2.0403575426526396e-06, + "loss": 0.2815, + "step": 24425 + }, + { + "epoch": 1.1442357239893193, + "grad_norm": 0.5879928602575352, + "learning_rate": 2.0401711267912254e-06, + "loss": 0.2739, + "step": 24426 + }, + { + "epoch": 1.1442825689792477, + "grad_norm": 0.55286611371254, + "learning_rate": 2.0399847135760102e-06, + "loss": 0.271, + "step": 24427 + }, + { + "epoch": 1.144329413969176, + "grad_norm": 0.5829498960295246, + "learning_rate": 2.0397983030080682e-06, + "loss": 0.2775, + "step": 24428 + }, + { + "epoch": 1.1443762589591042, + "grad_norm": 0.5370744338776856, + "learning_rate": 2.0396118950884704e-06, + "loss": 0.2731, + "step": 24429 + }, + { + "epoch": 1.1444231039490327, + "grad_norm": 0.6263826641081671, + "learning_rate": 2.0394254898182918e-06, + "loss": 0.2958, + "step": 24430 + }, + { + "epoch": 1.144469948938961, + "grad_norm": 0.5969900436507861, + "learning_rate": 2.0392390871986025e-06, + "loss": 0.2894, + "step": 24431 + }, + { + "epoch": 1.1445167939288894, + "grad_norm": 0.5800060596702582, + "learning_rate": 2.039052687230477e-06, + "loss": 0.2707, + "step": 24432 + }, + { + "epoch": 1.1445636389188176, + "grad_norm": 0.5786944469787072, + "learning_rate": 2.038866289914987e-06, + "loss": 0.265, + "step": 24433 + }, + { + "epoch": 1.1446104839087459, + "grad_norm": 0.5674572728042367, + "learning_rate": 2.0386798952532053e-06, + "loss": 0.2665, + "step": 24434 + }, + { + "epoch": 1.1446573288986743, + "grad_norm": 0.5695008462224452, + "learning_rate": 2.0384935032462057e-06, + "loss": 0.2589, + "step": 24435 + }, + { + "epoch": 1.1447041738886026, + "grad_norm": 0.5879672232724765, + "learning_rate": 2.0383071138950605e-06, + "loss": 0.2719, + "step": 24436 + }, + { + "epoch": 1.144751018878531, + "grad_norm": 0.6199825507446468, + "learning_rate": 2.0381207272008403e-06, + "loss": 0.277, + "step": 24437 + }, + { + "epoch": 1.1447978638684593, + "grad_norm": 0.5570918045092426, + "learning_rate": 2.0379343431646198e-06, + "loss": 0.2724, + "step": 24438 + }, + { + "epoch": 1.1448447088583875, + "grad_norm": 0.5684956059457248, + "learning_rate": 2.0377479617874715e-06, + "loss": 0.2811, + "step": 24439 + }, + { + "epoch": 1.144891553848316, + "grad_norm": 0.6268271997980307, + "learning_rate": 2.0375615830704666e-06, + "loss": 0.2849, + "step": 24440 + }, + { + "epoch": 1.1449383988382442, + "grad_norm": 0.5975306054476618, + "learning_rate": 2.0373752070146792e-06, + "loss": 0.2799, + "step": 24441 + }, + { + "epoch": 1.1449852438281725, + "grad_norm": 0.6158828046679116, + "learning_rate": 2.0371888336211816e-06, + "loss": 0.2808, + "step": 24442 + }, + { + "epoch": 1.145032088818101, + "grad_norm": 0.6339390321574737, + "learning_rate": 2.0370024628910456e-06, + "loss": 0.2803, + "step": 24443 + }, + { + "epoch": 1.1450789338080292, + "grad_norm": 0.647172961733691, + "learning_rate": 2.0368160948253437e-06, + "loss": 0.2717, + "step": 24444 + }, + { + "epoch": 1.1451257787979576, + "grad_norm": 0.6506094415565425, + "learning_rate": 2.0366297294251486e-06, + "loss": 0.2812, + "step": 24445 + }, + { + "epoch": 1.1451726237878859, + "grad_norm": 0.5768919622795593, + "learning_rate": 2.0364433666915334e-06, + "loss": 0.2727, + "step": 24446 + }, + { + "epoch": 1.1452194687778143, + "grad_norm": 0.5760222421120029, + "learning_rate": 2.03625700662557e-06, + "loss": 0.2768, + "step": 24447 + }, + { + "epoch": 1.1452663137677426, + "grad_norm": 0.5830917675376763, + "learning_rate": 2.0360706492283317e-06, + "loss": 0.2759, + "step": 24448 + }, + { + "epoch": 1.1453131587576708, + "grad_norm": 0.5931898129963014, + "learning_rate": 2.035884294500889e-06, + "loss": 0.282, + "step": 24449 + }, + { + "epoch": 1.1453600037475993, + "grad_norm": 0.5517552793671704, + "learning_rate": 2.0356979424443164e-06, + "loss": 0.2572, + "step": 24450 + }, + { + "epoch": 1.1454068487375275, + "grad_norm": 0.6103607381717888, + "learning_rate": 2.0355115930596847e-06, + "loss": 0.2776, + "step": 24451 + }, + { + "epoch": 1.1454536937274558, + "grad_norm": 0.5697554203434037, + "learning_rate": 2.035325246348067e-06, + "loss": 0.2616, + "step": 24452 + }, + { + "epoch": 1.1455005387173842, + "grad_norm": 0.6226697959207999, + "learning_rate": 2.0351389023105363e-06, + "loss": 0.2818, + "step": 24453 + }, + { + "epoch": 1.1455473837073125, + "grad_norm": 0.5760877660953078, + "learning_rate": 2.034952560948165e-06, + "loss": 0.2851, + "step": 24454 + }, + { + "epoch": 1.145594228697241, + "grad_norm": 0.5958403793227384, + "learning_rate": 2.0347662222620238e-06, + "loss": 0.2815, + "step": 24455 + }, + { + "epoch": 1.1456410736871692, + "grad_norm": 0.5449083901905023, + "learning_rate": 2.034579886253186e-06, + "loss": 0.2809, + "step": 24456 + }, + { + "epoch": 1.1456879186770974, + "grad_norm": 0.6595978215531126, + "learning_rate": 2.0343935529227245e-06, + "loss": 0.2785, + "step": 24457 + }, + { + "epoch": 1.1457347636670259, + "grad_norm": 0.5749335447035353, + "learning_rate": 2.0342072222717105e-06, + "loss": 0.2608, + "step": 24458 + }, + { + "epoch": 1.145781608656954, + "grad_norm": 0.5890874730520642, + "learning_rate": 2.034020894301217e-06, + "loss": 0.2769, + "step": 24459 + }, + { + "epoch": 1.1458284536468826, + "grad_norm": 0.5792802321386596, + "learning_rate": 2.033834569012317e-06, + "loss": 0.2565, + "step": 24460 + }, + { + "epoch": 1.1458752986368108, + "grad_norm": 0.6087769143014985, + "learning_rate": 2.0336482464060814e-06, + "loss": 0.2755, + "step": 24461 + }, + { + "epoch": 1.145922143626739, + "grad_norm": 0.5810498916716045, + "learning_rate": 2.0334619264835824e-06, + "loss": 0.2896, + "step": 24462 + }, + { + "epoch": 1.1459689886166675, + "grad_norm": 0.5476242214482102, + "learning_rate": 2.033275609245893e-06, + "loss": 0.2612, + "step": 24463 + }, + { + "epoch": 1.1460158336065958, + "grad_norm": 0.6173835147070373, + "learning_rate": 2.0330892946940855e-06, + "loss": 0.2856, + "step": 24464 + }, + { + "epoch": 1.146062678596524, + "grad_norm": 0.6167462217136503, + "learning_rate": 2.032902982829231e-06, + "loss": 0.2737, + "step": 24465 + }, + { + "epoch": 1.1461095235864525, + "grad_norm": 0.5985381368783221, + "learning_rate": 2.0327166736524037e-06, + "loss": 0.2908, + "step": 24466 + }, + { + "epoch": 1.1461563685763807, + "grad_norm": 0.5983754561147887, + "learning_rate": 2.0325303671646735e-06, + "loss": 0.2741, + "step": 24467 + }, + { + "epoch": 1.1462032135663092, + "grad_norm": 0.6000805145971341, + "learning_rate": 2.032344063367114e-06, + "loss": 0.2716, + "step": 24468 + }, + { + "epoch": 1.1462500585562374, + "grad_norm": 0.6364765501485617, + "learning_rate": 2.0321577622607965e-06, + "loss": 0.286, + "step": 24469 + }, + { + "epoch": 1.1462969035461656, + "grad_norm": 0.5785077013133236, + "learning_rate": 2.0319714638467934e-06, + "loss": 0.2709, + "step": 24470 + }, + { + "epoch": 1.146343748536094, + "grad_norm": 0.615669790218706, + "learning_rate": 2.031785168126177e-06, + "loss": 0.2851, + "step": 24471 + }, + { + "epoch": 1.1463905935260224, + "grad_norm": 0.6248667338209296, + "learning_rate": 2.0315988751000206e-06, + "loss": 0.2839, + "step": 24472 + }, + { + "epoch": 1.1464374385159508, + "grad_norm": 0.5442294494541948, + "learning_rate": 2.031412584769393e-06, + "loss": 0.259, + "step": 24473 + }, + { + "epoch": 1.146484283505879, + "grad_norm": 0.5835783648157945, + "learning_rate": 2.0312262971353687e-06, + "loss": 0.2635, + "step": 24474 + }, + { + "epoch": 1.1465311284958073, + "grad_norm": 0.5669428121384924, + "learning_rate": 2.0310400121990197e-06, + "loss": 0.2569, + "step": 24475 + }, + { + "epoch": 1.1465779734857358, + "grad_norm": 0.588665930497518, + "learning_rate": 2.030853729961417e-06, + "loss": 0.2808, + "step": 24476 + }, + { + "epoch": 1.146624818475664, + "grad_norm": 0.6003128776982428, + "learning_rate": 2.030667450423634e-06, + "loss": 0.2872, + "step": 24477 + }, + { + "epoch": 1.1466716634655922, + "grad_norm": 0.5820087234739683, + "learning_rate": 2.0304811735867415e-06, + "loss": 0.2612, + "step": 24478 + }, + { + "epoch": 1.1467185084555207, + "grad_norm": 0.603151727319188, + "learning_rate": 2.0302948994518125e-06, + "loss": 0.2774, + "step": 24479 + }, + { + "epoch": 1.146765353445449, + "grad_norm": 0.5921864851806501, + "learning_rate": 2.030108628019917e-06, + "loss": 0.2786, + "step": 24480 + }, + { + "epoch": 1.1468121984353774, + "grad_norm": 0.5308582959938165, + "learning_rate": 2.0299223592921287e-06, + "loss": 0.2602, + "step": 24481 + }, + { + "epoch": 1.1468590434253056, + "grad_norm": 0.587175858072337, + "learning_rate": 2.029736093269519e-06, + "loss": 0.2779, + "step": 24482 + }, + { + "epoch": 1.146905888415234, + "grad_norm": 0.5683442662048648, + "learning_rate": 2.0295498299531594e-06, + "loss": 0.262, + "step": 24483 + }, + { + "epoch": 1.1469527334051624, + "grad_norm": 0.5686862812632413, + "learning_rate": 2.0293635693441235e-06, + "loss": 0.2713, + "step": 24484 + }, + { + "epoch": 1.1469995783950906, + "grad_norm": 0.5850121164252968, + "learning_rate": 2.0291773114434823e-06, + "loss": 0.2761, + "step": 24485 + }, + { + "epoch": 1.147046423385019, + "grad_norm": 0.5824632548418107, + "learning_rate": 2.028991056252306e-06, + "loss": 0.2784, + "step": 24486 + }, + { + "epoch": 1.1470932683749473, + "grad_norm": 0.5832713040236118, + "learning_rate": 2.028804803771668e-06, + "loss": 0.2883, + "step": 24487 + }, + { + "epoch": 1.1471401133648755, + "grad_norm": 0.5644936637460696, + "learning_rate": 2.0286185540026403e-06, + "loss": 0.2668, + "step": 24488 + }, + { + "epoch": 1.147186958354804, + "grad_norm": 0.592588660341425, + "learning_rate": 2.0284323069462938e-06, + "loss": 0.2884, + "step": 24489 + }, + { + "epoch": 1.1472338033447322, + "grad_norm": 0.5955558776043507, + "learning_rate": 2.028246062603701e-06, + "loss": 0.2864, + "step": 24490 + }, + { + "epoch": 1.1472806483346607, + "grad_norm": 0.6028737406374217, + "learning_rate": 2.0280598209759345e-06, + "loss": 0.2835, + "step": 24491 + }, + { + "epoch": 1.147327493324589, + "grad_norm": 0.6124288476456874, + "learning_rate": 2.0278735820640647e-06, + "loss": 0.29, + "step": 24492 + }, + { + "epoch": 1.1473743383145172, + "grad_norm": 0.5848100345286065, + "learning_rate": 2.027687345869163e-06, + "loss": 0.2616, + "step": 24493 + }, + { + "epoch": 1.1474211833044456, + "grad_norm": 0.5784982983097379, + "learning_rate": 2.0275011123923023e-06, + "loss": 0.2725, + "step": 24494 + }, + { + "epoch": 1.1474680282943739, + "grad_norm": 0.6263554409302586, + "learning_rate": 2.027314881634554e-06, + "loss": 0.2846, + "step": 24495 + }, + { + "epoch": 1.1475148732843024, + "grad_norm": 0.5710086900930645, + "learning_rate": 2.0271286535969894e-06, + "loss": 0.249, + "step": 24496 + }, + { + "epoch": 1.1475617182742306, + "grad_norm": 0.5776823292195786, + "learning_rate": 2.026942428280682e-06, + "loss": 0.2748, + "step": 24497 + }, + { + "epoch": 1.1476085632641588, + "grad_norm": 0.5893508992716567, + "learning_rate": 2.0267562056867003e-06, + "loss": 0.2781, + "step": 24498 + }, + { + "epoch": 1.1476554082540873, + "grad_norm": 0.6115483175448089, + "learning_rate": 2.0265699858161184e-06, + "loss": 0.2955, + "step": 24499 + }, + { + "epoch": 1.1477022532440155, + "grad_norm": 0.5677442481779705, + "learning_rate": 2.026383768670007e-06, + "loss": 0.2765, + "step": 24500 + }, + { + "epoch": 1.1477490982339438, + "grad_norm": 0.6260025856816728, + "learning_rate": 2.0261975542494376e-06, + "loss": 0.2646, + "step": 24501 + }, + { + "epoch": 1.1477959432238722, + "grad_norm": 0.5662884973226817, + "learning_rate": 2.0260113425554834e-06, + "loss": 0.2686, + "step": 24502 + }, + { + "epoch": 1.1478427882138005, + "grad_norm": 0.6074452727280119, + "learning_rate": 2.025825133589215e-06, + "loss": 0.2685, + "step": 24503 + }, + { + "epoch": 1.147889633203729, + "grad_norm": 0.5979689691245262, + "learning_rate": 2.0256389273517025e-06, + "loss": 0.2564, + "step": 24504 + }, + { + "epoch": 1.1479364781936572, + "grad_norm": 0.6224259224486758, + "learning_rate": 2.0254527238440184e-06, + "loss": 0.2793, + "step": 24505 + }, + { + "epoch": 1.1479833231835854, + "grad_norm": 0.652113394151647, + "learning_rate": 2.025266523067236e-06, + "loss": 0.2914, + "step": 24506 + }, + { + "epoch": 1.1480301681735139, + "grad_norm": 0.5640102400230935, + "learning_rate": 2.0250803250224243e-06, + "loss": 0.2519, + "step": 24507 + }, + { + "epoch": 1.1480770131634421, + "grad_norm": 0.6292643077484039, + "learning_rate": 2.024894129710656e-06, + "loss": 0.2738, + "step": 24508 + }, + { + "epoch": 1.1481238581533706, + "grad_norm": 0.5625545681575126, + "learning_rate": 2.024707937133004e-06, + "loss": 0.2631, + "step": 24509 + }, + { + "epoch": 1.1481707031432988, + "grad_norm": 0.5908226427879885, + "learning_rate": 2.0245217472905374e-06, + "loss": 0.2804, + "step": 24510 + }, + { + "epoch": 1.148217548133227, + "grad_norm": 0.5747080822061855, + "learning_rate": 2.0243355601843284e-06, + "loss": 0.2821, + "step": 24511 + }, + { + "epoch": 1.1482643931231555, + "grad_norm": 0.5746371864552401, + "learning_rate": 2.0241493758154487e-06, + "loss": 0.2825, + "step": 24512 + }, + { + "epoch": 1.1483112381130838, + "grad_norm": 0.6055457699159326, + "learning_rate": 2.02396319418497e-06, + "loss": 0.2672, + "step": 24513 + }, + { + "epoch": 1.148358083103012, + "grad_norm": 0.5795928292503374, + "learning_rate": 2.023777015293963e-06, + "loss": 0.2589, + "step": 24514 + }, + { + "epoch": 1.1484049280929405, + "grad_norm": 0.6109968963820455, + "learning_rate": 2.0235908391435013e-06, + "loss": 0.2896, + "step": 24515 + }, + { + "epoch": 1.1484517730828687, + "grad_norm": 0.5510085245598629, + "learning_rate": 2.0234046657346527e-06, + "loss": 0.2696, + "step": 24516 + }, + { + "epoch": 1.1484986180727972, + "grad_norm": 0.5885662074744398, + "learning_rate": 2.0232184950684913e-06, + "loss": 0.2758, + "step": 24517 + }, + { + "epoch": 1.1485454630627254, + "grad_norm": 0.6205374605652051, + "learning_rate": 2.023032327146087e-06, + "loss": 0.2697, + "step": 24518 + }, + { + "epoch": 1.1485923080526539, + "grad_norm": 0.5930269130851892, + "learning_rate": 2.022846161968512e-06, + "loss": 0.2833, + "step": 24519 + }, + { + "epoch": 1.1486391530425821, + "grad_norm": 0.5961811766678383, + "learning_rate": 2.0226599995368376e-06, + "loss": 0.2646, + "step": 24520 + }, + { + "epoch": 1.1486859980325104, + "grad_norm": 0.5794888069121121, + "learning_rate": 2.0224738398521357e-06, + "loss": 0.2671, + "step": 24521 + }, + { + "epoch": 1.1487328430224388, + "grad_norm": 0.6112298414725306, + "learning_rate": 2.022287682915476e-06, + "loss": 0.2945, + "step": 24522 + }, + { + "epoch": 1.148779688012367, + "grad_norm": 0.608131450032016, + "learning_rate": 2.02210152872793e-06, + "loss": 0.2779, + "step": 24523 + }, + { + "epoch": 1.1488265330022953, + "grad_norm": 0.5934032768654149, + "learning_rate": 2.0219153772905703e-06, + "loss": 0.2798, + "step": 24524 + }, + { + "epoch": 1.1488733779922238, + "grad_norm": 0.6087523539840022, + "learning_rate": 2.021729228604467e-06, + "loss": 0.2696, + "step": 24525 + }, + { + "epoch": 1.148920222982152, + "grad_norm": 0.5720524779434292, + "learning_rate": 2.0215430826706924e-06, + "loss": 0.2874, + "step": 24526 + }, + { + "epoch": 1.1489670679720805, + "grad_norm": 0.6138385295641565, + "learning_rate": 2.021356939490317e-06, + "loss": 0.2678, + "step": 24527 + }, + { + "epoch": 1.1490139129620087, + "grad_norm": 0.6207695840380709, + "learning_rate": 2.0211707990644125e-06, + "loss": 0.2876, + "step": 24528 + }, + { + "epoch": 1.149060757951937, + "grad_norm": 0.6091077607514985, + "learning_rate": 2.0209846613940486e-06, + "loss": 0.2927, + "step": 24529 + }, + { + "epoch": 1.1491076029418654, + "grad_norm": 0.5414513087629199, + "learning_rate": 2.0207985264802983e-06, + "loss": 0.2672, + "step": 24530 + }, + { + "epoch": 1.1491544479317937, + "grad_norm": 0.5986548866701973, + "learning_rate": 2.0206123943242315e-06, + "loss": 0.2729, + "step": 24531 + }, + { + "epoch": 1.1492012929217221, + "grad_norm": 0.6181010038805488, + "learning_rate": 2.02042626492692e-06, + "loss": 0.2769, + "step": 24532 + }, + { + "epoch": 1.1492481379116504, + "grad_norm": 0.599946887911511, + "learning_rate": 2.020240138289436e-06, + "loss": 0.2873, + "step": 24533 + }, + { + "epoch": 1.1492949829015786, + "grad_norm": 0.6372252939374937, + "learning_rate": 2.0200540144128487e-06, + "loss": 0.3044, + "step": 24534 + }, + { + "epoch": 1.149341827891507, + "grad_norm": 0.601631726260714, + "learning_rate": 2.0198678932982295e-06, + "loss": 0.267, + "step": 24535 + }, + { + "epoch": 1.1493886728814353, + "grad_norm": 0.5942593114108446, + "learning_rate": 2.0196817749466496e-06, + "loss": 0.2632, + "step": 24536 + }, + { + "epoch": 1.1494355178713636, + "grad_norm": 0.5788616510144681, + "learning_rate": 2.0194956593591813e-06, + "loss": 0.2644, + "step": 24537 + }, + { + "epoch": 1.149482362861292, + "grad_norm": 0.6157685628448943, + "learning_rate": 2.019309546536894e-06, + "loss": 0.3027, + "step": 24538 + }, + { + "epoch": 1.1495292078512203, + "grad_norm": 0.6201959626538872, + "learning_rate": 2.0191234364808604e-06, + "loss": 0.285, + "step": 24539 + }, + { + "epoch": 1.1495760528411487, + "grad_norm": 0.6267171620553753, + "learning_rate": 2.0189373291921495e-06, + "loss": 0.2987, + "step": 24540 + }, + { + "epoch": 1.149622897831077, + "grad_norm": 0.5567262281824098, + "learning_rate": 2.0187512246718336e-06, + "loss": 0.2667, + "step": 24541 + }, + { + "epoch": 1.1496697428210052, + "grad_norm": 0.6151565046370238, + "learning_rate": 2.0185651229209835e-06, + "loss": 0.285, + "step": 24542 + }, + { + "epoch": 1.1497165878109337, + "grad_norm": 0.6390805363610007, + "learning_rate": 2.01837902394067e-06, + "loss": 0.2811, + "step": 24543 + }, + { + "epoch": 1.149763432800862, + "grad_norm": 0.615281910607002, + "learning_rate": 2.0181929277319647e-06, + "loss": 0.2665, + "step": 24544 + }, + { + "epoch": 1.1498102777907904, + "grad_norm": 0.5547900203743986, + "learning_rate": 2.0180068342959385e-06, + "loss": 0.2645, + "step": 24545 + }, + { + "epoch": 1.1498571227807186, + "grad_norm": 0.5850408895922882, + "learning_rate": 2.0178207436336606e-06, + "loss": 0.2808, + "step": 24546 + }, + { + "epoch": 1.1499039677706469, + "grad_norm": 0.6456038603583942, + "learning_rate": 2.017634655746203e-06, + "loss": 0.2908, + "step": 24547 + }, + { + "epoch": 1.1499508127605753, + "grad_norm": 0.6057864408714044, + "learning_rate": 2.0174485706346375e-06, + "loss": 0.289, + "step": 24548 + }, + { + "epoch": 1.1499976577505036, + "grad_norm": 0.591654139155432, + "learning_rate": 2.0172624883000335e-06, + "loss": 0.2783, + "step": 24549 + }, + { + "epoch": 1.1500445027404318, + "grad_norm": 0.6535211091002818, + "learning_rate": 2.017076408743463e-06, + "loss": 0.2992, + "step": 24550 + }, + { + "epoch": 1.1500913477303603, + "grad_norm": 0.5996658252615484, + "learning_rate": 2.0168903319659965e-06, + "loss": 0.2679, + "step": 24551 + }, + { + "epoch": 1.1501381927202885, + "grad_norm": 0.5734896173638445, + "learning_rate": 2.016704257968705e-06, + "loss": 0.2619, + "step": 24552 + }, + { + "epoch": 1.150185037710217, + "grad_norm": 0.6549812904348725, + "learning_rate": 2.0165181867526584e-06, + "loss": 0.2801, + "step": 24553 + }, + { + "epoch": 1.1502318827001452, + "grad_norm": 0.5651289727013464, + "learning_rate": 2.016332118318928e-06, + "loss": 0.2646, + "step": 24554 + }, + { + "epoch": 1.1502787276900737, + "grad_norm": 0.5847668047786853, + "learning_rate": 2.0161460526685854e-06, + "loss": 0.2639, + "step": 24555 + }, + { + "epoch": 1.150325572680002, + "grad_norm": 0.5899499598555752, + "learning_rate": 2.0159599898027e-06, + "loss": 0.289, + "step": 24556 + }, + { + "epoch": 1.1503724176699301, + "grad_norm": 0.5915401435510121, + "learning_rate": 2.015773929722343e-06, + "loss": 0.2664, + "step": 24557 + }, + { + "epoch": 1.1504192626598586, + "grad_norm": 0.606474491147016, + "learning_rate": 2.0155878724285867e-06, + "loss": 0.2923, + "step": 24558 + }, + { + "epoch": 1.1504661076497869, + "grad_norm": 0.5924651018938014, + "learning_rate": 2.0154018179224997e-06, + "loss": 0.2876, + "step": 24559 + }, + { + "epoch": 1.150512952639715, + "grad_norm": 0.5663161756796947, + "learning_rate": 2.015215766205153e-06, + "loss": 0.2727, + "step": 24560 + }, + { + "epoch": 1.1505597976296436, + "grad_norm": 0.6307533673900158, + "learning_rate": 2.0150297172776175e-06, + "loss": 0.2971, + "step": 24561 + }, + { + "epoch": 1.1506066426195718, + "grad_norm": 0.5659755473842272, + "learning_rate": 2.014843671140965e-06, + "loss": 0.2803, + "step": 24562 + }, + { + "epoch": 1.1506534876095003, + "grad_norm": 0.6211257895839308, + "learning_rate": 2.0146576277962644e-06, + "loss": 0.2831, + "step": 24563 + }, + { + "epoch": 1.1507003325994285, + "grad_norm": 0.5715959580811503, + "learning_rate": 2.0144715872445887e-06, + "loss": 0.2793, + "step": 24564 + }, + { + "epoch": 1.1507471775893567, + "grad_norm": 0.5674476614216857, + "learning_rate": 2.0142855494870053e-06, + "loss": 0.2763, + "step": 24565 + }, + { + "epoch": 1.1507940225792852, + "grad_norm": 0.6036887917496896, + "learning_rate": 2.0140995145245875e-06, + "loss": 0.2809, + "step": 24566 + }, + { + "epoch": 1.1508408675692134, + "grad_norm": 0.6031650367216127, + "learning_rate": 2.013913482358404e-06, + "loss": 0.2797, + "step": 24567 + }, + { + "epoch": 1.150887712559142, + "grad_norm": 0.5632059928226232, + "learning_rate": 2.0137274529895267e-06, + "loss": 0.2695, + "step": 24568 + }, + { + "epoch": 1.1509345575490701, + "grad_norm": 0.5915240118368608, + "learning_rate": 2.0135414264190255e-06, + "loss": 0.2797, + "step": 24569 + }, + { + "epoch": 1.1509814025389984, + "grad_norm": 0.5839470342748798, + "learning_rate": 2.0133554026479716e-06, + "loss": 0.2686, + "step": 24570 + }, + { + "epoch": 1.1510282475289269, + "grad_norm": 0.5491751001502063, + "learning_rate": 2.0131693816774343e-06, + "loss": 0.2542, + "step": 24571 + }, + { + "epoch": 1.151075092518855, + "grad_norm": 0.6362272481952426, + "learning_rate": 2.0129833635084857e-06, + "loss": 0.3016, + "step": 24572 + }, + { + "epoch": 1.1511219375087833, + "grad_norm": 0.631224799557046, + "learning_rate": 2.0127973481421945e-06, + "loss": 0.2737, + "step": 24573 + }, + { + "epoch": 1.1511687824987118, + "grad_norm": 0.5980991875766323, + "learning_rate": 2.0126113355796324e-06, + "loss": 0.2846, + "step": 24574 + }, + { + "epoch": 1.15121562748864, + "grad_norm": 0.5830185660642546, + "learning_rate": 2.01242532582187e-06, + "loss": 0.2714, + "step": 24575 + }, + { + "epoch": 1.1512624724785685, + "grad_norm": 0.6262675042144377, + "learning_rate": 2.012239318869978e-06, + "loss": 0.2684, + "step": 24576 + }, + { + "epoch": 1.1513093174684967, + "grad_norm": 0.5912485679579907, + "learning_rate": 2.0120533147250247e-06, + "loss": 0.2701, + "step": 24577 + }, + { + "epoch": 1.151356162458425, + "grad_norm": 0.5790404271372019, + "learning_rate": 2.011867313388082e-06, + "loss": 0.2858, + "step": 24578 + }, + { + "epoch": 1.1514030074483534, + "grad_norm": 0.6416287839659003, + "learning_rate": 2.011681314860221e-06, + "loss": 0.2885, + "step": 24579 + }, + { + "epoch": 1.1514498524382817, + "grad_norm": 0.6195345989221519, + "learning_rate": 2.0114953191425105e-06, + "loss": 0.2955, + "step": 24580 + }, + { + "epoch": 1.1514966974282101, + "grad_norm": 0.646465774102617, + "learning_rate": 2.0113093262360218e-06, + "loss": 0.2947, + "step": 24581 + }, + { + "epoch": 1.1515435424181384, + "grad_norm": 0.5820003631567163, + "learning_rate": 2.0111233361418264e-06, + "loss": 0.2556, + "step": 24582 + }, + { + "epoch": 1.1515903874080666, + "grad_norm": 0.5616244882150315, + "learning_rate": 2.0109373488609925e-06, + "loss": 0.2495, + "step": 24583 + }, + { + "epoch": 1.151637232397995, + "grad_norm": 0.5865006067415395, + "learning_rate": 2.0107513643945908e-06, + "loss": 0.285, + "step": 24584 + }, + { + "epoch": 1.1516840773879233, + "grad_norm": 0.5512541969283548, + "learning_rate": 2.010565382743692e-06, + "loss": 0.2812, + "step": 24585 + }, + { + "epoch": 1.1517309223778516, + "grad_norm": 0.5539029470003959, + "learning_rate": 2.0103794039093667e-06, + "loss": 0.2594, + "step": 24586 + }, + { + "epoch": 1.15177776736778, + "grad_norm": 0.6053360442172638, + "learning_rate": 2.010193427892685e-06, + "loss": 0.2839, + "step": 24587 + }, + { + "epoch": 1.1518246123577083, + "grad_norm": 0.5971934941040284, + "learning_rate": 2.0100074546947173e-06, + "loss": 0.2725, + "step": 24588 + }, + { + "epoch": 1.1518714573476367, + "grad_norm": 0.5917272265594814, + "learning_rate": 2.009821484316533e-06, + "loss": 0.279, + "step": 24589 + }, + { + "epoch": 1.151918302337565, + "grad_norm": 0.6258624064536223, + "learning_rate": 2.009635516759203e-06, + "loss": 0.3133, + "step": 24590 + }, + { + "epoch": 1.1519651473274934, + "grad_norm": 0.592507467717065, + "learning_rate": 2.0094495520237973e-06, + "loss": 0.2776, + "step": 24591 + }, + { + "epoch": 1.1520119923174217, + "grad_norm": 0.5517026318187495, + "learning_rate": 2.009263590111386e-06, + "loss": 0.2753, + "step": 24592 + }, + { + "epoch": 1.15205883730735, + "grad_norm": 0.6221471939790248, + "learning_rate": 2.0090776310230395e-06, + "loss": 0.2852, + "step": 24593 + }, + { + "epoch": 1.1521056822972784, + "grad_norm": 0.5962047179315705, + "learning_rate": 2.008891674759829e-06, + "loss": 0.2745, + "step": 24594 + }, + { + "epoch": 1.1521525272872066, + "grad_norm": 0.58058177700278, + "learning_rate": 2.008705721322822e-06, + "loss": 0.2716, + "step": 24595 + }, + { + "epoch": 1.1521993722771349, + "grad_norm": 0.5980505246165057, + "learning_rate": 2.0085197707130898e-06, + "loss": 0.2787, + "step": 24596 + }, + { + "epoch": 1.1522462172670633, + "grad_norm": 0.598261701445197, + "learning_rate": 2.0083338229317036e-06, + "loss": 0.2937, + "step": 24597 + }, + { + "epoch": 1.1522930622569916, + "grad_norm": 0.6096376430451811, + "learning_rate": 2.0081478779797327e-06, + "loss": 0.2772, + "step": 24598 + }, + { + "epoch": 1.15233990724692, + "grad_norm": 0.6325835058093625, + "learning_rate": 2.0079619358582466e-06, + "loss": 0.2912, + "step": 24599 + }, + { + "epoch": 1.1523867522368483, + "grad_norm": 0.595597877358628, + "learning_rate": 2.007775996568317e-06, + "loss": 0.2691, + "step": 24600 + }, + { + "epoch": 1.1524335972267765, + "grad_norm": 0.5795706887441269, + "learning_rate": 2.007590060111012e-06, + "loss": 0.2693, + "step": 24601 + }, + { + "epoch": 1.152480442216705, + "grad_norm": 0.6245503615351734, + "learning_rate": 2.0074041264874022e-06, + "loss": 0.2819, + "step": 24602 + }, + { + "epoch": 1.1525272872066332, + "grad_norm": 0.609246410076591, + "learning_rate": 2.0072181956985583e-06, + "loss": 0.2862, + "step": 24603 + }, + { + "epoch": 1.1525741321965617, + "grad_norm": 0.6259624812888944, + "learning_rate": 2.0070322677455494e-06, + "loss": 0.282, + "step": 24604 + }, + { + "epoch": 1.15262097718649, + "grad_norm": 0.5851580585589645, + "learning_rate": 2.006846342629446e-06, + "loss": 0.2741, + "step": 24605 + }, + { + "epoch": 1.1526678221764182, + "grad_norm": 0.5901799547813917, + "learning_rate": 2.006660420351319e-06, + "loss": 0.2858, + "step": 24606 + }, + { + "epoch": 1.1527146671663466, + "grad_norm": 0.6210524080929359, + "learning_rate": 2.006474500912236e-06, + "loss": 0.2657, + "step": 24607 + }, + { + "epoch": 1.1527615121562749, + "grad_norm": 0.603802701361813, + "learning_rate": 2.006288584313269e-06, + "loss": 0.2743, + "step": 24608 + }, + { + "epoch": 1.152808357146203, + "grad_norm": 0.5760750203160281, + "learning_rate": 2.0061026705554866e-06, + "loss": 0.2712, + "step": 24609 + }, + { + "epoch": 1.1528552021361316, + "grad_norm": 0.5357601245550453, + "learning_rate": 2.005916759639959e-06, + "loss": 0.2614, + "step": 24610 + }, + { + "epoch": 1.1529020471260598, + "grad_norm": 0.6154976827855837, + "learning_rate": 2.0057308515677568e-06, + "loss": 0.2864, + "step": 24611 + }, + { + "epoch": 1.1529488921159883, + "grad_norm": 0.5949698586440028, + "learning_rate": 2.0055449463399498e-06, + "loss": 0.2775, + "step": 24612 + }, + { + "epoch": 1.1529957371059165, + "grad_norm": 0.571653702508543, + "learning_rate": 2.0053590439576064e-06, + "loss": 0.2647, + "step": 24613 + }, + { + "epoch": 1.1530425820958448, + "grad_norm": 0.6320133929313912, + "learning_rate": 2.0051731444217973e-06, + "loss": 0.2783, + "step": 24614 + }, + { + "epoch": 1.1530894270857732, + "grad_norm": 0.596835654161206, + "learning_rate": 2.004987247733593e-06, + "loss": 0.2917, + "step": 24615 + }, + { + "epoch": 1.1531362720757015, + "grad_norm": 0.6034875626779167, + "learning_rate": 2.004801353894062e-06, + "loss": 0.2839, + "step": 24616 + }, + { + "epoch": 1.15318311706563, + "grad_norm": 0.5725986385182014, + "learning_rate": 2.0046154629042757e-06, + "loss": 0.2618, + "step": 24617 + }, + { + "epoch": 1.1532299620555582, + "grad_norm": 0.6264499052348615, + "learning_rate": 2.004429574765302e-06, + "loss": 0.293, + "step": 24618 + }, + { + "epoch": 1.1532768070454864, + "grad_norm": 0.626356135235441, + "learning_rate": 2.0042436894782126e-06, + "loss": 0.2748, + "step": 24619 + }, + { + "epoch": 1.1533236520354149, + "grad_norm": 0.6056309488523328, + "learning_rate": 2.004057807044075e-06, + "loss": 0.2798, + "step": 24620 + }, + { + "epoch": 1.153370497025343, + "grad_norm": 0.5931637636501317, + "learning_rate": 2.003871927463961e-06, + "loss": 0.2995, + "step": 24621 + }, + { + "epoch": 1.1534173420152714, + "grad_norm": 0.6246866386736415, + "learning_rate": 2.0036860507389384e-06, + "loss": 0.2712, + "step": 24622 + }, + { + "epoch": 1.1534641870051998, + "grad_norm": 0.6482680242475162, + "learning_rate": 2.003500176870078e-06, + "loss": 0.296, + "step": 24623 + }, + { + "epoch": 1.153511031995128, + "grad_norm": 0.6017849038287854, + "learning_rate": 2.0033143058584497e-06, + "loss": 0.2877, + "step": 24624 + }, + { + "epoch": 1.1535578769850565, + "grad_norm": 0.6093685639880788, + "learning_rate": 2.0031284377051237e-06, + "loss": 0.2909, + "step": 24625 + }, + { + "epoch": 1.1536047219749848, + "grad_norm": 0.5608436500247711, + "learning_rate": 2.0029425724111673e-06, + "loss": 0.269, + "step": 24626 + }, + { + "epoch": 1.1536515669649132, + "grad_norm": 0.5952992607387875, + "learning_rate": 2.0027567099776515e-06, + "loss": 0.2697, + "step": 24627 + }, + { + "epoch": 1.1536984119548415, + "grad_norm": 0.5807726953479274, + "learning_rate": 2.0025708504056462e-06, + "loss": 0.2702, + "step": 24628 + }, + { + "epoch": 1.1537452569447697, + "grad_norm": 0.5687287456438427, + "learning_rate": 2.00238499369622e-06, + "loss": 0.2489, + "step": 24629 + }, + { + "epoch": 1.1537921019346982, + "grad_norm": 0.5705483909094289, + "learning_rate": 2.0021991398504435e-06, + "loss": 0.2715, + "step": 24630 + }, + { + "epoch": 1.1538389469246264, + "grad_norm": 0.6205175446170172, + "learning_rate": 2.002013288869387e-06, + "loss": 0.266, + "step": 24631 + }, + { + "epoch": 1.1538857919145546, + "grad_norm": 0.5692673476763354, + "learning_rate": 2.001827440754118e-06, + "loss": 0.2699, + "step": 24632 + }, + { + "epoch": 1.1539326369044831, + "grad_norm": 0.5932384142859425, + "learning_rate": 2.0016415955057064e-06, + "loss": 0.2634, + "step": 24633 + }, + { + "epoch": 1.1539794818944114, + "grad_norm": 0.5737617603384614, + "learning_rate": 2.001455753125222e-06, + "loss": 0.277, + "step": 24634 + }, + { + "epoch": 1.1540263268843398, + "grad_norm": 0.6312188509681917, + "learning_rate": 2.0012699136137353e-06, + "loss": 0.2756, + "step": 24635 + }, + { + "epoch": 1.154073171874268, + "grad_norm": 0.5954227404642025, + "learning_rate": 2.0010840769723142e-06, + "loss": 0.2828, + "step": 24636 + }, + { + "epoch": 1.1541200168641963, + "grad_norm": 0.6065023969949296, + "learning_rate": 2.00089824320203e-06, + "loss": 0.2855, + "step": 24637 + }, + { + "epoch": 1.1541668618541248, + "grad_norm": 0.6092131177973902, + "learning_rate": 2.0007124123039496e-06, + "loss": 0.2794, + "step": 24638 + }, + { + "epoch": 1.154213706844053, + "grad_norm": 0.5975951031910971, + "learning_rate": 2.000526584279145e-06, + "loss": 0.2951, + "step": 24639 + }, + { + "epoch": 1.1542605518339815, + "grad_norm": 0.5648095935379439, + "learning_rate": 2.000340759128683e-06, + "loss": 0.2794, + "step": 24640 + }, + { + "epoch": 1.1543073968239097, + "grad_norm": 0.5880241056626107, + "learning_rate": 2.0001549368536347e-06, + "loss": 0.294, + "step": 24641 + }, + { + "epoch": 1.154354241813838, + "grad_norm": 0.6144952920753152, + "learning_rate": 1.9999691174550693e-06, + "loss": 0.2911, + "step": 24642 + }, + { + "epoch": 1.1544010868037664, + "grad_norm": 0.5750600489352176, + "learning_rate": 1.999783300934057e-06, + "loss": 0.272, + "step": 24643 + }, + { + "epoch": 1.1544479317936946, + "grad_norm": 0.5476251207465176, + "learning_rate": 1.999597487291665e-06, + "loss": 0.2714, + "step": 24644 + }, + { + "epoch": 1.154494776783623, + "grad_norm": 0.6081642479236492, + "learning_rate": 1.9994116765289635e-06, + "loss": 0.2695, + "step": 24645 + }, + { + "epoch": 1.1545416217735514, + "grad_norm": 0.5812003749777676, + "learning_rate": 1.9992258686470223e-06, + "loss": 0.2628, + "step": 24646 + }, + { + "epoch": 1.1545884667634796, + "grad_norm": 0.5666008895957936, + "learning_rate": 1.99904006364691e-06, + "loss": 0.2584, + "step": 24647 + }, + { + "epoch": 1.154635311753408, + "grad_norm": 0.6144792159152678, + "learning_rate": 1.9988542615296967e-06, + "loss": 0.2903, + "step": 24648 + }, + { + "epoch": 1.1546821567433363, + "grad_norm": 0.569133543205735, + "learning_rate": 1.9986684622964515e-06, + "loss": 0.2602, + "step": 24649 + }, + { + "epoch": 1.1547290017332645, + "grad_norm": 0.5363544826111515, + "learning_rate": 1.9984826659482433e-06, + "loss": 0.2604, + "step": 24650 + }, + { + "epoch": 1.154775846723193, + "grad_norm": 0.5582427226393664, + "learning_rate": 1.9982968724861402e-06, + "loss": 0.2526, + "step": 24651 + }, + { + "epoch": 1.1548226917131212, + "grad_norm": 0.534141465814043, + "learning_rate": 1.9981110819112133e-06, + "loss": 0.2656, + "step": 24652 + }, + { + "epoch": 1.1548695367030497, + "grad_norm": 0.5590369487776174, + "learning_rate": 1.9979252942245307e-06, + "loss": 0.2724, + "step": 24653 + }, + { + "epoch": 1.154916381692978, + "grad_norm": 0.5956678053116743, + "learning_rate": 1.9977395094271617e-06, + "loss": 0.2656, + "step": 24654 + }, + { + "epoch": 1.1549632266829062, + "grad_norm": 0.5497780582591623, + "learning_rate": 1.9975537275201766e-06, + "loss": 0.2804, + "step": 24655 + }, + { + "epoch": 1.1550100716728346, + "grad_norm": 0.6033215810292701, + "learning_rate": 1.9973679485046427e-06, + "loss": 0.2856, + "step": 24656 + }, + { + "epoch": 1.155056916662763, + "grad_norm": 0.5913834222496508, + "learning_rate": 1.99718217238163e-06, + "loss": 0.2978, + "step": 24657 + }, + { + "epoch": 1.1551037616526911, + "grad_norm": 0.5962983415469209, + "learning_rate": 1.9969963991522074e-06, + "loss": 0.2611, + "step": 24658 + }, + { + "epoch": 1.1551506066426196, + "grad_norm": 0.5386272681928336, + "learning_rate": 1.9968106288174437e-06, + "loss": 0.2435, + "step": 24659 + }, + { + "epoch": 1.1551974516325478, + "grad_norm": 0.5525504490133196, + "learning_rate": 1.9966248613784094e-06, + "loss": 0.2794, + "step": 24660 + }, + { + "epoch": 1.1552442966224763, + "grad_norm": 0.5653606506922056, + "learning_rate": 1.9964390968361727e-06, + "loss": 0.2575, + "step": 24661 + }, + { + "epoch": 1.1552911416124045, + "grad_norm": 0.5588337168528269, + "learning_rate": 1.9962533351918014e-06, + "loss": 0.2635, + "step": 24662 + }, + { + "epoch": 1.155337986602333, + "grad_norm": 0.6706242819658353, + "learning_rate": 1.9960675764463656e-06, + "loss": 0.299, + "step": 24663 + }, + { + "epoch": 1.1553848315922612, + "grad_norm": 0.5954899531698533, + "learning_rate": 1.9958818206009347e-06, + "loss": 0.2917, + "step": 24664 + }, + { + "epoch": 1.1554316765821895, + "grad_norm": 0.5739392105482048, + "learning_rate": 1.9956960676565768e-06, + "loss": 0.2781, + "step": 24665 + }, + { + "epoch": 1.155478521572118, + "grad_norm": 0.603137524280658, + "learning_rate": 1.9955103176143613e-06, + "loss": 0.2744, + "step": 24666 + }, + { + "epoch": 1.1555253665620462, + "grad_norm": 0.5909503376697248, + "learning_rate": 1.9953245704753576e-06, + "loss": 0.2871, + "step": 24667 + }, + { + "epoch": 1.1555722115519744, + "grad_norm": 0.5838804002066175, + "learning_rate": 1.9951388262406342e-06, + "loss": 0.272, + "step": 24668 + }, + { + "epoch": 1.155619056541903, + "grad_norm": 0.606199650349646, + "learning_rate": 1.9949530849112592e-06, + "loss": 0.2933, + "step": 24669 + }, + { + "epoch": 1.1556659015318311, + "grad_norm": 0.6025509942877201, + "learning_rate": 1.9947673464883033e-06, + "loss": 0.2839, + "step": 24670 + }, + { + "epoch": 1.1557127465217596, + "grad_norm": 0.5999010163411386, + "learning_rate": 1.9945816109728334e-06, + "loss": 0.2679, + "step": 24671 + }, + { + "epoch": 1.1557595915116878, + "grad_norm": 0.5629620528234824, + "learning_rate": 1.994395878365919e-06, + "loss": 0.2841, + "step": 24672 + }, + { + "epoch": 1.155806436501616, + "grad_norm": 0.5713205473321097, + "learning_rate": 1.9942101486686307e-06, + "loss": 0.2765, + "step": 24673 + }, + { + "epoch": 1.1558532814915445, + "grad_norm": 0.5647786840023001, + "learning_rate": 1.9940244218820356e-06, + "loss": 0.2566, + "step": 24674 + }, + { + "epoch": 1.1559001264814728, + "grad_norm": 0.6495596678232752, + "learning_rate": 1.9938386980072017e-06, + "loss": 0.3003, + "step": 24675 + }, + { + "epoch": 1.1559469714714012, + "grad_norm": 0.5883491464073122, + "learning_rate": 1.993652977045199e-06, + "loss": 0.2779, + "step": 24676 + }, + { + "epoch": 1.1559938164613295, + "grad_norm": 0.5980769962827112, + "learning_rate": 1.9934672589970968e-06, + "loss": 0.2828, + "step": 24677 + }, + { + "epoch": 1.1560406614512577, + "grad_norm": 0.6157669470366752, + "learning_rate": 1.9932815438639627e-06, + "loss": 0.2938, + "step": 24678 + }, + { + "epoch": 1.1560875064411862, + "grad_norm": 0.5380005657146828, + "learning_rate": 1.9930958316468666e-06, + "loss": 0.2585, + "step": 24679 + }, + { + "epoch": 1.1561343514311144, + "grad_norm": 0.5811917299968017, + "learning_rate": 1.992910122346876e-06, + "loss": 0.2757, + "step": 24680 + }, + { + "epoch": 1.1561811964210427, + "grad_norm": 0.5549659704622896, + "learning_rate": 1.9927244159650608e-06, + "loss": 0.2708, + "step": 24681 + }, + { + "epoch": 1.1562280414109711, + "grad_norm": 0.6065112456173627, + "learning_rate": 1.9925387125024885e-06, + "loss": 0.2797, + "step": 24682 + }, + { + "epoch": 1.1562748864008994, + "grad_norm": 0.5729733335736129, + "learning_rate": 1.992353011960228e-06, + "loss": 0.2698, + "step": 24683 + }, + { + "epoch": 1.1563217313908278, + "grad_norm": 0.635804324355777, + "learning_rate": 1.9921673143393493e-06, + "loss": 0.2954, + "step": 24684 + }, + { + "epoch": 1.156368576380756, + "grad_norm": 0.5603507267289615, + "learning_rate": 1.991981619640919e-06, + "loss": 0.2607, + "step": 24685 + }, + { + "epoch": 1.1564154213706843, + "grad_norm": 0.606999334639243, + "learning_rate": 1.9917959278660085e-06, + "loss": 0.2706, + "step": 24686 + }, + { + "epoch": 1.1564622663606128, + "grad_norm": 0.575530055057591, + "learning_rate": 1.9916102390156833e-06, + "loss": 0.2768, + "step": 24687 + }, + { + "epoch": 1.156509111350541, + "grad_norm": 0.536232102538902, + "learning_rate": 1.991424553091014e-06, + "loss": 0.2661, + "step": 24688 + }, + { + "epoch": 1.1565559563404695, + "grad_norm": 0.6084161041531487, + "learning_rate": 1.991238870093068e-06, + "loss": 0.2674, + "step": 24689 + }, + { + "epoch": 1.1566028013303977, + "grad_norm": 0.6104293122311034, + "learning_rate": 1.991053190022914e-06, + "loss": 0.2721, + "step": 24690 + }, + { + "epoch": 1.156649646320326, + "grad_norm": 0.5445593122366393, + "learning_rate": 1.990867512881622e-06, + "loss": 0.2735, + "step": 24691 + }, + { + "epoch": 1.1566964913102544, + "grad_norm": 0.5595259650575508, + "learning_rate": 1.99068183867026e-06, + "loss": 0.2484, + "step": 24692 + }, + { + "epoch": 1.1567433363001827, + "grad_norm": 0.5757032613341637, + "learning_rate": 1.990496167389895e-06, + "loss": 0.2788, + "step": 24693 + }, + { + "epoch": 1.156790181290111, + "grad_norm": 0.6299039814918828, + "learning_rate": 1.9903104990415964e-06, + "loss": 0.2793, + "step": 24694 + }, + { + "epoch": 1.1568370262800394, + "grad_norm": 0.6227062400904502, + "learning_rate": 1.9901248336264334e-06, + "loss": 0.2515, + "step": 24695 + }, + { + "epoch": 1.1568838712699676, + "grad_norm": 0.6037150422265986, + "learning_rate": 1.989939171145473e-06, + "loss": 0.275, + "step": 24696 + }, + { + "epoch": 1.156930716259896, + "grad_norm": 0.6101012211199818, + "learning_rate": 1.9897535115997845e-06, + "loss": 0.2665, + "step": 24697 + }, + { + "epoch": 1.1569775612498243, + "grad_norm": 0.6158807455209774, + "learning_rate": 1.9895678549904378e-06, + "loss": 0.2915, + "step": 24698 + }, + { + "epoch": 1.1570244062397528, + "grad_norm": 0.5822468110452582, + "learning_rate": 1.989382201318499e-06, + "loss": 0.2823, + "step": 24699 + }, + { + "epoch": 1.157071251229681, + "grad_norm": 0.6193988714608034, + "learning_rate": 1.9891965505850367e-06, + "loss": 0.2715, + "step": 24700 + }, + { + "epoch": 1.1571180962196093, + "grad_norm": 0.6399734689837979, + "learning_rate": 1.9890109027911198e-06, + "loss": 0.3098, + "step": 24701 + }, + { + "epoch": 1.1571649412095377, + "grad_norm": 0.6502065106056688, + "learning_rate": 1.9888252579378175e-06, + "loss": 0.2763, + "step": 24702 + }, + { + "epoch": 1.157211786199466, + "grad_norm": 0.5789099350282688, + "learning_rate": 1.9886396160261966e-06, + "loss": 0.2624, + "step": 24703 + }, + { + "epoch": 1.1572586311893942, + "grad_norm": 0.6316374343342848, + "learning_rate": 1.9884539770573275e-06, + "loss": 0.2815, + "step": 24704 + }, + { + "epoch": 1.1573054761793227, + "grad_norm": 0.5956579442799075, + "learning_rate": 1.988268341032276e-06, + "loss": 0.2632, + "step": 24705 + }, + { + "epoch": 1.157352321169251, + "grad_norm": 0.5536262934616253, + "learning_rate": 1.988082707952112e-06, + "loss": 0.2678, + "step": 24706 + }, + { + "epoch": 1.1573991661591794, + "grad_norm": 0.5965140586871941, + "learning_rate": 1.987897077817903e-06, + "loss": 0.2738, + "step": 24707 + }, + { + "epoch": 1.1574460111491076, + "grad_norm": 0.6461317219456876, + "learning_rate": 1.987711450630718e-06, + "loss": 0.2944, + "step": 24708 + }, + { + "epoch": 1.1574928561390359, + "grad_norm": 0.6026447376171814, + "learning_rate": 1.9875258263916243e-06, + "loss": 0.2709, + "step": 24709 + }, + { + "epoch": 1.1575397011289643, + "grad_norm": 0.5556200553532162, + "learning_rate": 1.987340205101692e-06, + "loss": 0.2616, + "step": 24710 + }, + { + "epoch": 1.1575865461188926, + "grad_norm": 0.600362982413706, + "learning_rate": 1.9871545867619866e-06, + "loss": 0.264, + "step": 24711 + }, + { + "epoch": 1.157633391108821, + "grad_norm": 0.5487822766435217, + "learning_rate": 1.9869689713735784e-06, + "loss": 0.2532, + "step": 24712 + }, + { + "epoch": 1.1576802360987493, + "grad_norm": 0.5591910774726907, + "learning_rate": 1.986783358937534e-06, + "loss": 0.2581, + "step": 24713 + }, + { + "epoch": 1.1577270810886775, + "grad_norm": 0.5521625373646415, + "learning_rate": 1.9865977494549223e-06, + "loss": 0.2889, + "step": 24714 + }, + { + "epoch": 1.157773926078606, + "grad_norm": 0.5961467914592673, + "learning_rate": 1.9864121429268124e-06, + "loss": 0.2743, + "step": 24715 + }, + { + "epoch": 1.1578207710685342, + "grad_norm": 0.6193407664229569, + "learning_rate": 1.986226539354272e-06, + "loss": 0.2853, + "step": 24716 + }, + { + "epoch": 1.1578676160584624, + "grad_norm": 0.570980178966102, + "learning_rate": 1.9860409387383677e-06, + "loss": 0.2623, + "step": 24717 + }, + { + "epoch": 1.157914461048391, + "grad_norm": 0.5794867119152672, + "learning_rate": 1.9858553410801683e-06, + "loss": 0.2707, + "step": 24718 + }, + { + "epoch": 1.1579613060383191, + "grad_norm": 0.5926090971390712, + "learning_rate": 1.985669746380743e-06, + "loss": 0.2753, + "step": 24719 + }, + { + "epoch": 1.1580081510282476, + "grad_norm": 0.6260956512305035, + "learning_rate": 1.9854841546411584e-06, + "loss": 0.2694, + "step": 24720 + }, + { + "epoch": 1.1580549960181759, + "grad_norm": 0.6265460343926802, + "learning_rate": 1.9852985658624833e-06, + "loss": 0.2836, + "step": 24721 + }, + { + "epoch": 1.158101841008104, + "grad_norm": 0.5941211298019868, + "learning_rate": 1.9851129800457867e-06, + "loss": 0.2693, + "step": 24722 + }, + { + "epoch": 1.1581486859980326, + "grad_norm": 0.5964265131119056, + "learning_rate": 1.9849273971921346e-06, + "loss": 0.2838, + "step": 24723 + }, + { + "epoch": 1.1581955309879608, + "grad_norm": 0.6734479216146417, + "learning_rate": 1.9847418173025955e-06, + "loss": 0.2775, + "step": 24724 + }, + { + "epoch": 1.1582423759778893, + "grad_norm": 0.5652746032480411, + "learning_rate": 1.984556240378238e-06, + "loss": 0.2698, + "step": 24725 + }, + { + "epoch": 1.1582892209678175, + "grad_norm": 0.6598412838272354, + "learning_rate": 1.98437066642013e-06, + "loss": 0.2651, + "step": 24726 + }, + { + "epoch": 1.1583360659577457, + "grad_norm": 0.5740702399958044, + "learning_rate": 1.9841850954293392e-06, + "loss": 0.2763, + "step": 24727 + }, + { + "epoch": 1.1583829109476742, + "grad_norm": 0.5839960059608249, + "learning_rate": 1.983999527406934e-06, + "loss": 0.2736, + "step": 24728 + }, + { + "epoch": 1.1584297559376024, + "grad_norm": 0.5855305199102004, + "learning_rate": 1.983813962353981e-06, + "loss": 0.2722, + "step": 24729 + }, + { + "epoch": 1.1584766009275307, + "grad_norm": 0.6116535771565469, + "learning_rate": 1.9836284002715495e-06, + "loss": 0.2818, + "step": 24730 + }, + { + "epoch": 1.1585234459174591, + "grad_norm": 0.6060066841496929, + "learning_rate": 1.983442841160706e-06, + "loss": 0.2816, + "step": 24731 + }, + { + "epoch": 1.1585702909073874, + "grad_norm": 0.6355436583391554, + "learning_rate": 1.9832572850225193e-06, + "loss": 0.2845, + "step": 24732 + }, + { + "epoch": 1.1586171358973159, + "grad_norm": 0.6379928414434007, + "learning_rate": 1.9830717318580577e-06, + "loss": 0.3056, + "step": 24733 + }, + { + "epoch": 1.158663980887244, + "grad_norm": 0.5787262602429936, + "learning_rate": 1.9828861816683885e-06, + "loss": 0.2765, + "step": 24734 + }, + { + "epoch": 1.1587108258771726, + "grad_norm": 0.5953310897397655, + "learning_rate": 1.9827006344545785e-06, + "loss": 0.2582, + "step": 24735 + }, + { + "epoch": 1.1587576708671008, + "grad_norm": 0.595571408943761, + "learning_rate": 1.9825150902176963e-06, + "loss": 0.2711, + "step": 24736 + }, + { + "epoch": 1.158804515857029, + "grad_norm": 0.6148900964972958, + "learning_rate": 1.9823295489588105e-06, + "loss": 0.2979, + "step": 24737 + }, + { + "epoch": 1.1588513608469575, + "grad_norm": 0.6431694118818937, + "learning_rate": 1.9821440106789874e-06, + "loss": 0.2869, + "step": 24738 + }, + { + "epoch": 1.1588982058368857, + "grad_norm": 0.6073181937379771, + "learning_rate": 1.981958475379295e-06, + "loss": 0.2741, + "step": 24739 + }, + { + "epoch": 1.158945050826814, + "grad_norm": 0.6027724446800229, + "learning_rate": 1.9817729430608026e-06, + "loss": 0.2752, + "step": 24740 + }, + { + "epoch": 1.1589918958167424, + "grad_norm": 0.6085856838648382, + "learning_rate": 1.9815874137245763e-06, + "loss": 0.2689, + "step": 24741 + }, + { + "epoch": 1.1590387408066707, + "grad_norm": 0.5589278828133368, + "learning_rate": 1.9814018873716835e-06, + "loss": 0.27, + "step": 24742 + }, + { + "epoch": 1.1590855857965991, + "grad_norm": 0.5864934934728607, + "learning_rate": 1.9812163640031927e-06, + "loss": 0.2775, + "step": 24743 + }, + { + "epoch": 1.1591324307865274, + "grad_norm": 0.5780942551966698, + "learning_rate": 1.981030843620171e-06, + "loss": 0.2813, + "step": 24744 + }, + { + "epoch": 1.1591792757764556, + "grad_norm": 0.5967065996068008, + "learning_rate": 1.980845326223687e-06, + "loss": 0.2763, + "step": 24745 + }, + { + "epoch": 1.159226120766384, + "grad_norm": 0.5844135733788505, + "learning_rate": 1.9806598118148084e-06, + "loss": 0.2877, + "step": 24746 + }, + { + "epoch": 1.1592729657563123, + "grad_norm": 0.5935066980195071, + "learning_rate": 1.9804743003946008e-06, + "loss": 0.2843, + "step": 24747 + }, + { + "epoch": 1.1593198107462408, + "grad_norm": 0.6196983260727511, + "learning_rate": 1.9802887919641336e-06, + "loss": 0.2787, + "step": 24748 + }, + { + "epoch": 1.159366655736169, + "grad_norm": 0.6061500105921361, + "learning_rate": 1.980103286524473e-06, + "loss": 0.2801, + "step": 24749 + }, + { + "epoch": 1.1594135007260973, + "grad_norm": 0.6126009705121211, + "learning_rate": 1.9799177840766874e-06, + "loss": 0.27, + "step": 24750 + }, + { + "epoch": 1.1594603457160257, + "grad_norm": 0.5674414569918644, + "learning_rate": 1.979732284621845e-06, + "loss": 0.2692, + "step": 24751 + }, + { + "epoch": 1.159507190705954, + "grad_norm": 0.5916409269193379, + "learning_rate": 1.979546788161012e-06, + "loss": 0.2879, + "step": 24752 + }, + { + "epoch": 1.1595540356958822, + "grad_norm": 0.6197832361337015, + "learning_rate": 1.9793612946952574e-06, + "loss": 0.2896, + "step": 24753 + }, + { + "epoch": 1.1596008806858107, + "grad_norm": 0.6227963641990027, + "learning_rate": 1.9791758042256466e-06, + "loss": 0.2915, + "step": 24754 + }, + { + "epoch": 1.159647725675739, + "grad_norm": 0.5648044886086567, + "learning_rate": 1.9789903167532487e-06, + "loss": 0.2649, + "step": 24755 + }, + { + "epoch": 1.1596945706656674, + "grad_norm": 0.6412073055847307, + "learning_rate": 1.9788048322791297e-06, + "loss": 0.2775, + "step": 24756 + }, + { + "epoch": 1.1597414156555956, + "grad_norm": 0.6297307279380014, + "learning_rate": 1.9786193508043587e-06, + "loss": 0.28, + "step": 24757 + }, + { + "epoch": 1.1597882606455239, + "grad_norm": 0.5830808574672409, + "learning_rate": 1.978433872330002e-06, + "loss": 0.2654, + "step": 24758 + }, + { + "epoch": 1.1598351056354523, + "grad_norm": 0.6078670907175024, + "learning_rate": 1.978248396857128e-06, + "loss": 0.2733, + "step": 24759 + }, + { + "epoch": 1.1598819506253806, + "grad_norm": 0.6143061528004032, + "learning_rate": 1.9780629243868026e-06, + "loss": 0.2926, + "step": 24760 + }, + { + "epoch": 1.159928795615309, + "grad_norm": 0.5449347223626181, + "learning_rate": 1.9778774549200945e-06, + "loss": 0.2626, + "step": 24761 + }, + { + "epoch": 1.1599756406052373, + "grad_norm": 0.5872686645354632, + "learning_rate": 1.9776919884580694e-06, + "loss": 0.2691, + "step": 24762 + }, + { + "epoch": 1.1600224855951655, + "grad_norm": 0.6055575806085908, + "learning_rate": 1.9775065250017957e-06, + "loss": 0.2735, + "step": 24763 + }, + { + "epoch": 1.160069330585094, + "grad_norm": 0.5819391344281379, + "learning_rate": 1.9773210645523416e-06, + "loss": 0.2676, + "step": 24764 + }, + { + "epoch": 1.1601161755750222, + "grad_norm": 0.6113333194496728, + "learning_rate": 1.9771356071107735e-06, + "loss": 0.2666, + "step": 24765 + }, + { + "epoch": 1.1601630205649505, + "grad_norm": 0.6401158593958818, + "learning_rate": 1.9769501526781578e-06, + "loss": 0.2919, + "step": 24766 + }, + { + "epoch": 1.160209865554879, + "grad_norm": 0.608595730939218, + "learning_rate": 1.9767647012555626e-06, + "loss": 0.282, + "step": 24767 + }, + { + "epoch": 1.1602567105448072, + "grad_norm": 0.6233981128802235, + "learning_rate": 1.976579252844055e-06, + "loss": 0.265, + "step": 24768 + }, + { + "epoch": 1.1603035555347356, + "grad_norm": 0.618722861388303, + "learning_rate": 1.976393807444702e-06, + "loss": 0.2818, + "step": 24769 + }, + { + "epoch": 1.1603504005246639, + "grad_norm": 0.6278494477938329, + "learning_rate": 1.976208365058571e-06, + "loss": 0.2841, + "step": 24770 + }, + { + "epoch": 1.1603972455145923, + "grad_norm": 0.6114210453452326, + "learning_rate": 1.97602292568673e-06, + "loss": 0.2777, + "step": 24771 + }, + { + "epoch": 1.1604440905045206, + "grad_norm": 0.6198391379184542, + "learning_rate": 1.9758374893302456e-06, + "loss": 0.2804, + "step": 24772 + }, + { + "epoch": 1.1604909354944488, + "grad_norm": 0.6236445672452033, + "learning_rate": 1.975652055990184e-06, + "loss": 0.2973, + "step": 24773 + }, + { + "epoch": 1.1605377804843773, + "grad_norm": 0.5493724003444626, + "learning_rate": 1.9754666256676127e-06, + "loss": 0.2571, + "step": 24774 + }, + { + "epoch": 1.1605846254743055, + "grad_norm": 0.5707700046255315, + "learning_rate": 1.9752811983635996e-06, + "loss": 0.2631, + "step": 24775 + }, + { + "epoch": 1.1606314704642338, + "grad_norm": 0.6182989239216169, + "learning_rate": 1.9750957740792108e-06, + "loss": 0.2882, + "step": 24776 + }, + { + "epoch": 1.1606783154541622, + "grad_norm": 0.5848263504580149, + "learning_rate": 1.974910352815515e-06, + "loss": 0.2583, + "step": 24777 + }, + { + "epoch": 1.1607251604440905, + "grad_norm": 0.6012897866101155, + "learning_rate": 1.9747249345735776e-06, + "loss": 0.2638, + "step": 24778 + }, + { + "epoch": 1.160772005434019, + "grad_norm": 0.599175864516398, + "learning_rate": 1.9745395193544657e-06, + "loss": 0.2644, + "step": 24779 + }, + { + "epoch": 1.1608188504239472, + "grad_norm": 0.591207975890178, + "learning_rate": 1.974354107159247e-06, + "loss": 0.2684, + "step": 24780 + }, + { + "epoch": 1.1608656954138754, + "grad_norm": 0.6078636151839759, + "learning_rate": 1.9741686979889884e-06, + "loss": 0.2742, + "step": 24781 + }, + { + "epoch": 1.1609125404038039, + "grad_norm": 0.5507930007854122, + "learning_rate": 1.973983291844757e-06, + "loss": 0.2623, + "step": 24782 + }, + { + "epoch": 1.1609593853937321, + "grad_norm": 0.5856393004535929, + "learning_rate": 1.9737978887276204e-06, + "loss": 0.2776, + "step": 24783 + }, + { + "epoch": 1.1610062303836606, + "grad_norm": 0.560248718518979, + "learning_rate": 1.9736124886386434e-06, + "loss": 0.2725, + "step": 24784 + }, + { + "epoch": 1.1610530753735888, + "grad_norm": 0.5881686391415795, + "learning_rate": 1.973427091578894e-06, + "loss": 0.2723, + "step": 24785 + }, + { + "epoch": 1.161099920363517, + "grad_norm": 0.5802685307079564, + "learning_rate": 1.9732416975494397e-06, + "loss": 0.2865, + "step": 24786 + }, + { + "epoch": 1.1611467653534455, + "grad_norm": 0.6255670602892731, + "learning_rate": 1.9730563065513468e-06, + "loss": 0.2801, + "step": 24787 + }, + { + "epoch": 1.1611936103433738, + "grad_norm": 0.6196436651985954, + "learning_rate": 1.9728709185856826e-06, + "loss": 0.2813, + "step": 24788 + }, + { + "epoch": 1.161240455333302, + "grad_norm": 0.6315829199669413, + "learning_rate": 1.972685533653515e-06, + "loss": 0.284, + "step": 24789 + }, + { + "epoch": 1.1612873003232305, + "grad_norm": 0.554362660080094, + "learning_rate": 1.9725001517559087e-06, + "loss": 0.264, + "step": 24790 + }, + { + "epoch": 1.1613341453131587, + "grad_norm": 0.5399160738032209, + "learning_rate": 1.972314772893931e-06, + "loss": 0.2678, + "step": 24791 + }, + { + "epoch": 1.1613809903030872, + "grad_norm": 0.5532673729417162, + "learning_rate": 1.972129397068649e-06, + "loss": 0.2738, + "step": 24792 + }, + { + "epoch": 1.1614278352930154, + "grad_norm": 0.5708796788379124, + "learning_rate": 1.9719440242811304e-06, + "loss": 0.2848, + "step": 24793 + }, + { + "epoch": 1.1614746802829437, + "grad_norm": 0.5928324929445783, + "learning_rate": 1.9717586545324407e-06, + "loss": 0.2862, + "step": 24794 + }, + { + "epoch": 1.1615215252728721, + "grad_norm": 0.6175257543965513, + "learning_rate": 1.971573287823648e-06, + "loss": 0.2806, + "step": 24795 + }, + { + "epoch": 1.1615683702628004, + "grad_norm": 0.578575425851179, + "learning_rate": 1.9713879241558173e-06, + "loss": 0.2675, + "step": 24796 + }, + { + "epoch": 1.1616152152527288, + "grad_norm": 0.5530705982244764, + "learning_rate": 1.971202563530017e-06, + "loss": 0.2559, + "step": 24797 + }, + { + "epoch": 1.161662060242657, + "grad_norm": 0.592752022750702, + "learning_rate": 1.9710172059473122e-06, + "loss": 0.2705, + "step": 24798 + }, + { + "epoch": 1.1617089052325853, + "grad_norm": 0.5707357184535431, + "learning_rate": 1.9708318514087703e-06, + "loss": 0.2644, + "step": 24799 + }, + { + "epoch": 1.1617557502225138, + "grad_norm": 0.5853563204664697, + "learning_rate": 1.970646499915459e-06, + "loss": 0.2836, + "step": 24800 + }, + { + "epoch": 1.161802595212442, + "grad_norm": 0.5808172511676267, + "learning_rate": 1.970461151468444e-06, + "loss": 0.2742, + "step": 24801 + }, + { + "epoch": 1.1618494402023702, + "grad_norm": 0.5871483995757588, + "learning_rate": 1.970275806068792e-06, + "loss": 0.2831, + "step": 24802 + }, + { + "epoch": 1.1618962851922987, + "grad_norm": 0.6294300302950551, + "learning_rate": 1.970090463717569e-06, + "loss": 0.268, + "step": 24803 + }, + { + "epoch": 1.161943130182227, + "grad_norm": 0.6312709106451756, + "learning_rate": 1.9699051244158423e-06, + "loss": 0.2844, + "step": 24804 + }, + { + "epoch": 1.1619899751721554, + "grad_norm": 0.571575340687694, + "learning_rate": 1.969719788164678e-06, + "loss": 0.277, + "step": 24805 + }, + { + "epoch": 1.1620368201620837, + "grad_norm": 0.5741532628644489, + "learning_rate": 1.9695344549651443e-06, + "loss": 0.2728, + "step": 24806 + }, + { + "epoch": 1.1620836651520121, + "grad_norm": 0.5931749258163184, + "learning_rate": 1.9693491248183057e-06, + "loss": 0.2746, + "step": 24807 + }, + { + "epoch": 1.1621305101419404, + "grad_norm": 0.5951611572384492, + "learning_rate": 1.9691637977252296e-06, + "loss": 0.2742, + "step": 24808 + }, + { + "epoch": 1.1621773551318686, + "grad_norm": 0.5884185513470406, + "learning_rate": 1.968978473686982e-06, + "loss": 0.2719, + "step": 24809 + }, + { + "epoch": 1.162224200121797, + "grad_norm": 0.5895271029377205, + "learning_rate": 1.9687931527046304e-06, + "loss": 0.2741, + "step": 24810 + }, + { + "epoch": 1.1622710451117253, + "grad_norm": 0.592434605540092, + "learning_rate": 1.96860783477924e-06, + "loss": 0.2945, + "step": 24811 + }, + { + "epoch": 1.1623178901016535, + "grad_norm": 0.5720919505815825, + "learning_rate": 1.9684225199118785e-06, + "loss": 0.2628, + "step": 24812 + }, + { + "epoch": 1.162364735091582, + "grad_norm": 0.583385482830347, + "learning_rate": 1.9682372081036124e-06, + "loss": 0.2733, + "step": 24813 + }, + { + "epoch": 1.1624115800815102, + "grad_norm": 0.5771864087211371, + "learning_rate": 1.968051899355507e-06, + "loss": 0.2643, + "step": 24814 + }, + { + "epoch": 1.1624584250714387, + "grad_norm": 0.559776977639651, + "learning_rate": 1.967866593668629e-06, + "loss": 0.2621, + "step": 24815 + }, + { + "epoch": 1.162505270061367, + "grad_norm": 0.5864261767773147, + "learning_rate": 1.9676812910440447e-06, + "loss": 0.2709, + "step": 24816 + }, + { + "epoch": 1.1625521150512952, + "grad_norm": 0.5864275354550005, + "learning_rate": 1.9674959914828212e-06, + "loss": 0.2783, + "step": 24817 + }, + { + "epoch": 1.1625989600412237, + "grad_norm": 0.576672068085572, + "learning_rate": 1.967310694986024e-06, + "loss": 0.2835, + "step": 24818 + }, + { + "epoch": 1.162645805031152, + "grad_norm": 0.6089921768689984, + "learning_rate": 1.9671254015547197e-06, + "loss": 0.2755, + "step": 24819 + }, + { + "epoch": 1.1626926500210804, + "grad_norm": 0.5728614571487811, + "learning_rate": 1.9669401111899765e-06, + "loss": 0.2916, + "step": 24820 + }, + { + "epoch": 1.1627394950110086, + "grad_norm": 0.5642617337979019, + "learning_rate": 1.9667548238928575e-06, + "loss": 0.2753, + "step": 24821 + }, + { + "epoch": 1.1627863400009368, + "grad_norm": 0.6014876186956063, + "learning_rate": 1.966569539664431e-06, + "loss": 0.2795, + "step": 24822 + }, + { + "epoch": 1.1628331849908653, + "grad_norm": 0.5876263624004461, + "learning_rate": 1.966384258505762e-06, + "loss": 0.2665, + "step": 24823 + }, + { + "epoch": 1.1628800299807935, + "grad_norm": 0.5597629951336333, + "learning_rate": 1.966198980417918e-06, + "loss": 0.2565, + "step": 24824 + }, + { + "epoch": 1.1629268749707218, + "grad_norm": 0.5336751404287089, + "learning_rate": 1.966013705401964e-06, + "loss": 0.2567, + "step": 24825 + }, + { + "epoch": 1.1629737199606502, + "grad_norm": 0.5532174668580876, + "learning_rate": 1.9658284334589686e-06, + "loss": 0.2595, + "step": 24826 + }, + { + "epoch": 1.1630205649505785, + "grad_norm": 0.5980849635172389, + "learning_rate": 1.965643164589995e-06, + "loss": 0.2643, + "step": 24827 + }, + { + "epoch": 1.163067409940507, + "grad_norm": 0.6107653910306745, + "learning_rate": 1.965457898796111e-06, + "loss": 0.2945, + "step": 24828 + }, + { + "epoch": 1.1631142549304352, + "grad_norm": 0.6099259202500068, + "learning_rate": 1.965272636078382e-06, + "loss": 0.2909, + "step": 24829 + }, + { + "epoch": 1.1631610999203634, + "grad_norm": 0.5575858774840331, + "learning_rate": 1.965087376437875e-06, + "loss": 0.2693, + "step": 24830 + }, + { + "epoch": 1.163207944910292, + "grad_norm": 0.6028557598664006, + "learning_rate": 1.9649021198756557e-06, + "loss": 0.2699, + "step": 24831 + }, + { + "epoch": 1.1632547899002201, + "grad_norm": 0.5665306344553599, + "learning_rate": 1.9647168663927908e-06, + "loss": 0.2708, + "step": 24832 + }, + { + "epoch": 1.1633016348901486, + "grad_norm": 0.5748417502213594, + "learning_rate": 1.964531615990345e-06, + "loss": 0.2571, + "step": 24833 + }, + { + "epoch": 1.1633484798800768, + "grad_norm": 0.6212820800254211, + "learning_rate": 1.964346368669385e-06, + "loss": 0.2914, + "step": 24834 + }, + { + "epoch": 1.163395324870005, + "grad_norm": 0.5818614169913107, + "learning_rate": 1.9641611244309778e-06, + "loss": 0.2754, + "step": 24835 + }, + { + "epoch": 1.1634421698599335, + "grad_norm": 0.5846220923736083, + "learning_rate": 1.963975883276188e-06, + "loss": 0.2808, + "step": 24836 + }, + { + "epoch": 1.1634890148498618, + "grad_norm": 0.629355933719191, + "learning_rate": 1.963790645206082e-06, + "loss": 0.2809, + "step": 24837 + }, + { + "epoch": 1.16353585983979, + "grad_norm": 0.5973894458813906, + "learning_rate": 1.963605410221727e-06, + "loss": 0.2713, + "step": 24838 + }, + { + "epoch": 1.1635827048297185, + "grad_norm": 0.6194392488929347, + "learning_rate": 1.963420178324188e-06, + "loss": 0.2751, + "step": 24839 + }, + { + "epoch": 1.1636295498196467, + "grad_norm": 0.5544266842198681, + "learning_rate": 1.9632349495145303e-06, + "loss": 0.2742, + "step": 24840 + }, + { + "epoch": 1.1636763948095752, + "grad_norm": 0.6294180956903171, + "learning_rate": 1.963049723793821e-06, + "loss": 0.2898, + "step": 24841 + }, + { + "epoch": 1.1637232397995034, + "grad_norm": 0.5696210082025983, + "learning_rate": 1.962864501163125e-06, + "loss": 0.292, + "step": 24842 + }, + { + "epoch": 1.163770084789432, + "grad_norm": 0.5692928366870348, + "learning_rate": 1.962679281623509e-06, + "loss": 0.2625, + "step": 24843 + }, + { + "epoch": 1.1638169297793601, + "grad_norm": 0.5896762490738255, + "learning_rate": 1.96249406517604e-06, + "loss": 0.271, + "step": 24844 + }, + { + "epoch": 1.1638637747692884, + "grad_norm": 0.5828703086895237, + "learning_rate": 1.962308851821781e-06, + "loss": 0.2636, + "step": 24845 + }, + { + "epoch": 1.1639106197592168, + "grad_norm": 0.6048542262365812, + "learning_rate": 1.9621236415618e-06, + "loss": 0.2651, + "step": 24846 + }, + { + "epoch": 1.163957464749145, + "grad_norm": 0.54929409264949, + "learning_rate": 1.961938434397162e-06, + "loss": 0.2633, + "step": 24847 + }, + { + "epoch": 1.1640043097390733, + "grad_norm": 0.5878477063293965, + "learning_rate": 1.9617532303289334e-06, + "loss": 0.2825, + "step": 24848 + }, + { + "epoch": 1.1640511547290018, + "grad_norm": 0.6260704814793577, + "learning_rate": 1.961568029358179e-06, + "loss": 0.2814, + "step": 24849 + }, + { + "epoch": 1.16409799971893, + "grad_norm": 0.5709389068990162, + "learning_rate": 1.9613828314859666e-06, + "loss": 0.2731, + "step": 24850 + }, + { + "epoch": 1.1641448447088585, + "grad_norm": 0.5921416111460087, + "learning_rate": 1.9611976367133596e-06, + "loss": 0.2719, + "step": 24851 + }, + { + "epoch": 1.1641916896987867, + "grad_norm": 0.5434576922492919, + "learning_rate": 1.961012445041425e-06, + "loss": 0.2787, + "step": 24852 + }, + { + "epoch": 1.164238534688715, + "grad_norm": 0.6179011745196693, + "learning_rate": 1.960827256471228e-06, + "loss": 0.273, + "step": 24853 + }, + { + "epoch": 1.1642853796786434, + "grad_norm": 0.5827431551672757, + "learning_rate": 1.9606420710038347e-06, + "loss": 0.2702, + "step": 24854 + }, + { + "epoch": 1.1643322246685717, + "grad_norm": 0.5891011664698568, + "learning_rate": 1.960456888640311e-06, + "loss": 0.2783, + "step": 24855 + }, + { + "epoch": 1.1643790696585001, + "grad_norm": 0.5584025244522773, + "learning_rate": 1.960271709381723e-06, + "loss": 0.2636, + "step": 24856 + }, + { + "epoch": 1.1644259146484284, + "grad_norm": 0.5891930098247117, + "learning_rate": 1.9600865332291345e-06, + "loss": 0.2801, + "step": 24857 + }, + { + "epoch": 1.1644727596383566, + "grad_norm": 0.5978359847193419, + "learning_rate": 1.9599013601836125e-06, + "loss": 0.2768, + "step": 24858 + }, + { + "epoch": 1.164519604628285, + "grad_norm": 0.5714378684045449, + "learning_rate": 1.9597161902462224e-06, + "loss": 0.2586, + "step": 24859 + }, + { + "epoch": 1.1645664496182133, + "grad_norm": 0.6202275702868623, + "learning_rate": 1.95953102341803e-06, + "loss": 0.2751, + "step": 24860 + }, + { + "epoch": 1.1646132946081416, + "grad_norm": 0.6237592878340146, + "learning_rate": 1.9593458597001003e-06, + "loss": 0.2922, + "step": 24861 + }, + { + "epoch": 1.16466013959807, + "grad_norm": 0.5948822573552769, + "learning_rate": 1.9591606990935007e-06, + "loss": 0.2786, + "step": 24862 + }, + { + "epoch": 1.1647069845879983, + "grad_norm": 0.568448896311276, + "learning_rate": 1.9589755415992943e-06, + "loss": 0.2852, + "step": 24863 + }, + { + "epoch": 1.1647538295779267, + "grad_norm": 0.6116264800189813, + "learning_rate": 1.958790387218548e-06, + "loss": 0.2729, + "step": 24864 + }, + { + "epoch": 1.164800674567855, + "grad_norm": 0.587467902620424, + "learning_rate": 1.958605235952326e-06, + "loss": 0.2732, + "step": 24865 + }, + { + "epoch": 1.1648475195577832, + "grad_norm": 0.5853196187516683, + "learning_rate": 1.958420087801696e-06, + "loss": 0.273, + "step": 24866 + }, + { + "epoch": 1.1648943645477117, + "grad_norm": 0.5896400988932344, + "learning_rate": 1.958234942767721e-06, + "loss": 0.2794, + "step": 24867 + }, + { + "epoch": 1.16494120953764, + "grad_norm": 0.5672844529442556, + "learning_rate": 1.9580498008514697e-06, + "loss": 0.267, + "step": 24868 + }, + { + "epoch": 1.1649880545275684, + "grad_norm": 0.5924697075553443, + "learning_rate": 1.957864662054004e-06, + "loss": 0.29, + "step": 24869 + }, + { + "epoch": 1.1650348995174966, + "grad_norm": 0.6730758906245252, + "learning_rate": 1.9576795263763913e-06, + "loss": 0.304, + "step": 24870 + }, + { + "epoch": 1.1650817445074249, + "grad_norm": 0.5962501498136291, + "learning_rate": 1.9574943938196964e-06, + "loss": 0.2663, + "step": 24871 + }, + { + "epoch": 1.1651285894973533, + "grad_norm": 0.5593638893525043, + "learning_rate": 1.957309264384985e-06, + "loss": 0.2614, + "step": 24872 + }, + { + "epoch": 1.1651754344872816, + "grad_norm": 0.6135921515749108, + "learning_rate": 1.957124138073323e-06, + "loss": 0.273, + "step": 24873 + }, + { + "epoch": 1.1652222794772098, + "grad_norm": 0.5886484465719728, + "learning_rate": 1.956939014885775e-06, + "loss": 0.2738, + "step": 24874 + }, + { + "epoch": 1.1652691244671383, + "grad_norm": 0.5913175584361693, + "learning_rate": 1.956753894823406e-06, + "loss": 0.2747, + "step": 24875 + }, + { + "epoch": 1.1653159694570665, + "grad_norm": 0.6157685962630901, + "learning_rate": 1.9565687778872816e-06, + "loss": 0.276, + "step": 24876 + }, + { + "epoch": 1.165362814446995, + "grad_norm": 0.5864917154036386, + "learning_rate": 1.956383664078468e-06, + "loss": 0.2747, + "step": 24877 + }, + { + "epoch": 1.1654096594369232, + "grad_norm": 0.5951443147417073, + "learning_rate": 1.956198553398029e-06, + "loss": 0.273, + "step": 24878 + }, + { + "epoch": 1.1654565044268517, + "grad_norm": 0.6042311819912416, + "learning_rate": 1.9560134458470308e-06, + "loss": 0.287, + "step": 24879 + }, + { + "epoch": 1.16550334941678, + "grad_norm": 0.5836042419655174, + "learning_rate": 1.9558283414265393e-06, + "loss": 0.2784, + "step": 24880 + }, + { + "epoch": 1.1655501944067082, + "grad_norm": 0.6112480129961795, + "learning_rate": 1.955643240137619e-06, + "loss": 0.2746, + "step": 24881 + }, + { + "epoch": 1.1655970393966366, + "grad_norm": 0.553913980369766, + "learning_rate": 1.955458141981334e-06, + "loss": 0.2774, + "step": 24882 + }, + { + "epoch": 1.1656438843865649, + "grad_norm": 0.6126455983442486, + "learning_rate": 1.9552730469587507e-06, + "loss": 0.2802, + "step": 24883 + }, + { + "epoch": 1.165690729376493, + "grad_norm": 0.5621811120041147, + "learning_rate": 1.955087955070934e-06, + "loss": 0.272, + "step": 24884 + }, + { + "epoch": 1.1657375743664216, + "grad_norm": 0.6039060416342172, + "learning_rate": 1.9549028663189496e-06, + "loss": 0.2817, + "step": 24885 + }, + { + "epoch": 1.1657844193563498, + "grad_norm": 0.5909588192522915, + "learning_rate": 1.954717780703863e-06, + "loss": 0.2827, + "step": 24886 + }, + { + "epoch": 1.1658312643462783, + "grad_norm": 0.6101059031335453, + "learning_rate": 1.954532698226737e-06, + "loss": 0.2761, + "step": 24887 + }, + { + "epoch": 1.1658781093362065, + "grad_norm": 0.6204230459905946, + "learning_rate": 1.9543476188886394e-06, + "loss": 0.2756, + "step": 24888 + }, + { + "epoch": 1.1659249543261347, + "grad_norm": 0.6089862806331378, + "learning_rate": 1.954162542690633e-06, + "loss": 0.2782, + "step": 24889 + }, + { + "epoch": 1.1659717993160632, + "grad_norm": 0.624159464358987, + "learning_rate": 1.9539774696337845e-06, + "loss": 0.2873, + "step": 24890 + }, + { + "epoch": 1.1660186443059914, + "grad_norm": 0.538694049009251, + "learning_rate": 1.9537923997191584e-06, + "loss": 0.2705, + "step": 24891 + }, + { + "epoch": 1.16606548929592, + "grad_norm": 0.6020490628386372, + "learning_rate": 1.9536073329478194e-06, + "loss": 0.2891, + "step": 24892 + }, + { + "epoch": 1.1661123342858482, + "grad_norm": 0.6004017194389056, + "learning_rate": 1.9534222693208345e-06, + "loss": 0.2726, + "step": 24893 + }, + { + "epoch": 1.1661591792757764, + "grad_norm": 0.5797543061258185, + "learning_rate": 1.9532372088392653e-06, + "loss": 0.2803, + "step": 24894 + }, + { + "epoch": 1.1662060242657049, + "grad_norm": 0.5598965653042017, + "learning_rate": 1.953052151504179e-06, + "loss": 0.2737, + "step": 24895 + }, + { + "epoch": 1.166252869255633, + "grad_norm": 0.6243699844780436, + "learning_rate": 1.9528670973166403e-06, + "loss": 0.2916, + "step": 24896 + }, + { + "epoch": 1.1662997142455613, + "grad_norm": 0.6214055812613002, + "learning_rate": 1.952682046277714e-06, + "loss": 0.2718, + "step": 24897 + }, + { + "epoch": 1.1663465592354898, + "grad_norm": 0.5784010704655057, + "learning_rate": 1.9524969983884644e-06, + "loss": 0.273, + "step": 24898 + }, + { + "epoch": 1.166393404225418, + "grad_norm": 0.6178527500190832, + "learning_rate": 1.952311953649958e-06, + "loss": 0.2846, + "step": 24899 + }, + { + "epoch": 1.1664402492153465, + "grad_norm": 0.5452977707612875, + "learning_rate": 1.952126912063258e-06, + "loss": 0.2728, + "step": 24900 + }, + { + "epoch": 1.1664870942052747, + "grad_norm": 0.5702355343443375, + "learning_rate": 1.95194187362943e-06, + "loss": 0.2465, + "step": 24901 + }, + { + "epoch": 1.166533939195203, + "grad_norm": 0.5667961284126504, + "learning_rate": 1.9517568383495383e-06, + "loss": 0.2722, + "step": 24902 + }, + { + "epoch": 1.1665807841851314, + "grad_norm": 0.6494242098625982, + "learning_rate": 1.9515718062246485e-06, + "loss": 0.2844, + "step": 24903 + }, + { + "epoch": 1.1666276291750597, + "grad_norm": 0.564518628787006, + "learning_rate": 1.951386777255825e-06, + "loss": 0.2586, + "step": 24904 + }, + { + "epoch": 1.1666744741649882, + "grad_norm": 0.6981431705113091, + "learning_rate": 1.951201751444134e-06, + "loss": 0.2887, + "step": 24905 + }, + { + "epoch": 1.1667213191549164, + "grad_norm": 0.6026075368906078, + "learning_rate": 1.9510167287906372e-06, + "loss": 0.2811, + "step": 24906 + }, + { + "epoch": 1.1667681641448446, + "grad_norm": 0.6138417172599074, + "learning_rate": 1.950831709296402e-06, + "loss": 0.2831, + "step": 24907 + }, + { + "epoch": 1.166815009134773, + "grad_norm": 0.5491973820106653, + "learning_rate": 1.950646692962492e-06, + "loss": 0.2768, + "step": 24908 + }, + { + "epoch": 1.1668618541247013, + "grad_norm": 0.6125212448528071, + "learning_rate": 1.950461679789972e-06, + "loss": 0.2825, + "step": 24909 + }, + { + "epoch": 1.1669086991146296, + "grad_norm": 0.5581701418403464, + "learning_rate": 1.950276669779907e-06, + "loss": 0.2496, + "step": 24910 + }, + { + "epoch": 1.166955544104558, + "grad_norm": 0.5990412863703476, + "learning_rate": 1.9500916629333628e-06, + "loss": 0.2735, + "step": 24911 + }, + { + "epoch": 1.1670023890944863, + "grad_norm": 0.5975976963498217, + "learning_rate": 1.9499066592514018e-06, + "loss": 0.2799, + "step": 24912 + }, + { + "epoch": 1.1670492340844147, + "grad_norm": 0.5724985259994361, + "learning_rate": 1.9497216587350897e-06, + "loss": 0.2798, + "step": 24913 + }, + { + "epoch": 1.167096079074343, + "grad_norm": 0.5616761599853465, + "learning_rate": 1.949536661385491e-06, + "loss": 0.2677, + "step": 24914 + }, + { + "epoch": 1.1671429240642714, + "grad_norm": 0.5656451177145286, + "learning_rate": 1.949351667203671e-06, + "loss": 0.2732, + "step": 24915 + }, + { + "epoch": 1.1671897690541997, + "grad_norm": 0.5720477604677363, + "learning_rate": 1.949166676190693e-06, + "loss": 0.2851, + "step": 24916 + }, + { + "epoch": 1.167236614044128, + "grad_norm": 0.5617960529610586, + "learning_rate": 1.9489816883476235e-06, + "loss": 0.2824, + "step": 24917 + }, + { + "epoch": 1.1672834590340564, + "grad_norm": 0.5672259319800086, + "learning_rate": 1.948796703675525e-06, + "loss": 0.2722, + "step": 24918 + }, + { + "epoch": 1.1673303040239846, + "grad_norm": 0.575166336394109, + "learning_rate": 1.948611722175463e-06, + "loss": 0.2834, + "step": 24919 + }, + { + "epoch": 1.1673771490139129, + "grad_norm": 0.6161201640187479, + "learning_rate": 1.948426743848502e-06, + "loss": 0.2763, + "step": 24920 + }, + { + "epoch": 1.1674239940038413, + "grad_norm": 0.5801269871364884, + "learning_rate": 1.948241768695706e-06, + "loss": 0.2859, + "step": 24921 + }, + { + "epoch": 1.1674708389937696, + "grad_norm": 0.6449182153169797, + "learning_rate": 1.9480567967181412e-06, + "loss": 0.2889, + "step": 24922 + }, + { + "epoch": 1.167517683983698, + "grad_norm": 0.5729565431816298, + "learning_rate": 1.947871827916871e-06, + "loss": 0.2877, + "step": 24923 + }, + { + "epoch": 1.1675645289736263, + "grad_norm": 0.594180825306521, + "learning_rate": 1.947686862292958e-06, + "loss": 0.2759, + "step": 24924 + }, + { + "epoch": 1.1676113739635545, + "grad_norm": 0.6183190595075831, + "learning_rate": 1.9475018998474685e-06, + "loss": 0.2721, + "step": 24925 + }, + { + "epoch": 1.167658218953483, + "grad_norm": 0.6058642697668148, + "learning_rate": 1.947316940581468e-06, + "loss": 0.2967, + "step": 24926 + }, + { + "epoch": 1.1677050639434112, + "grad_norm": 0.5529604772180962, + "learning_rate": 1.947131984496018e-06, + "loss": 0.2523, + "step": 24927 + }, + { + "epoch": 1.1677519089333397, + "grad_norm": 0.5934346960360717, + "learning_rate": 1.9469470315921853e-06, + "loss": 0.2899, + "step": 24928 + }, + { + "epoch": 1.167798753923268, + "grad_norm": 0.5371056808996533, + "learning_rate": 1.946762081871034e-06, + "loss": 0.257, + "step": 24929 + }, + { + "epoch": 1.1678455989131962, + "grad_norm": 0.5740947229383687, + "learning_rate": 1.9465771353336275e-06, + "loss": 0.2651, + "step": 24930 + }, + { + "epoch": 1.1678924439031246, + "grad_norm": 0.6049647604200482, + "learning_rate": 1.9463921919810304e-06, + "loss": 0.2856, + "step": 24931 + }, + { + "epoch": 1.1679392888930529, + "grad_norm": 0.5927744772114427, + "learning_rate": 1.9462072518143064e-06, + "loss": 0.272, + "step": 24932 + }, + { + "epoch": 1.1679861338829811, + "grad_norm": 0.5798289989105291, + "learning_rate": 1.9460223148345214e-06, + "loss": 0.2692, + "step": 24933 + }, + { + "epoch": 1.1680329788729096, + "grad_norm": 0.5798297332903256, + "learning_rate": 1.945837381042738e-06, + "loss": 0.2667, + "step": 24934 + }, + { + "epoch": 1.1680798238628378, + "grad_norm": 0.5525973805551697, + "learning_rate": 1.945652450440022e-06, + "loss": 0.2783, + "step": 24935 + }, + { + "epoch": 1.1681266688527663, + "grad_norm": 0.5595684043496939, + "learning_rate": 1.9454675230274365e-06, + "loss": 0.2671, + "step": 24936 + }, + { + "epoch": 1.1681735138426945, + "grad_norm": 0.636633302679819, + "learning_rate": 1.9452825988060463e-06, + "loss": 0.2858, + "step": 24937 + }, + { + "epoch": 1.1682203588326228, + "grad_norm": 0.6066443872145588, + "learning_rate": 1.9450976777769145e-06, + "loss": 0.2699, + "step": 24938 + }, + { + "epoch": 1.1682672038225512, + "grad_norm": 0.6720780704145806, + "learning_rate": 1.944912759941106e-06, + "loss": 0.2886, + "step": 24939 + }, + { + "epoch": 1.1683140488124795, + "grad_norm": 0.5947895539503602, + "learning_rate": 1.944727845299686e-06, + "loss": 0.2808, + "step": 24940 + }, + { + "epoch": 1.168360893802408, + "grad_norm": 0.6433327135516089, + "learning_rate": 1.944542933853718e-06, + "loss": 0.3022, + "step": 24941 + }, + { + "epoch": 1.1684077387923362, + "grad_norm": 0.5743252880064986, + "learning_rate": 1.944358025604265e-06, + "loss": 0.2734, + "step": 24942 + }, + { + "epoch": 1.1684545837822644, + "grad_norm": 0.5597895605351846, + "learning_rate": 1.9441731205523915e-06, + "loss": 0.2604, + "step": 24943 + }, + { + "epoch": 1.1685014287721929, + "grad_norm": 0.5732917940968915, + "learning_rate": 1.9439882186991628e-06, + "loss": 0.2716, + "step": 24944 + }, + { + "epoch": 1.1685482737621211, + "grad_norm": 0.59843745034851, + "learning_rate": 1.943803320045641e-06, + "loss": 0.2841, + "step": 24945 + }, + { + "epoch": 1.1685951187520494, + "grad_norm": 0.611297127443274, + "learning_rate": 1.943618424592893e-06, + "loss": 0.2791, + "step": 24946 + }, + { + "epoch": 1.1686419637419778, + "grad_norm": 0.5817516019511956, + "learning_rate": 1.9434335323419802e-06, + "loss": 0.2779, + "step": 24947 + }, + { + "epoch": 1.168688808731906, + "grad_norm": 0.5551799602643162, + "learning_rate": 1.9432486432939677e-06, + "loss": 0.2715, + "step": 24948 + }, + { + "epoch": 1.1687356537218345, + "grad_norm": 0.6381482383513993, + "learning_rate": 1.943063757449919e-06, + "loss": 0.2818, + "step": 24949 + }, + { + "epoch": 1.1687824987117628, + "grad_norm": 0.6200631420082013, + "learning_rate": 1.942878874810899e-06, + "loss": 0.2884, + "step": 24950 + }, + { + "epoch": 1.1688293437016912, + "grad_norm": 0.6241953326580263, + "learning_rate": 1.9426939953779706e-06, + "loss": 0.2679, + "step": 24951 + }, + { + "epoch": 1.1688761886916195, + "grad_norm": 0.5782011636426538, + "learning_rate": 1.9425091191521976e-06, + "loss": 0.2806, + "step": 24952 + }, + { + "epoch": 1.1689230336815477, + "grad_norm": 0.6180673787277832, + "learning_rate": 1.9423242461346457e-06, + "loss": 0.2758, + "step": 24953 + }, + { + "epoch": 1.1689698786714762, + "grad_norm": 0.5747469683194883, + "learning_rate": 1.9421393763263776e-06, + "loss": 0.2538, + "step": 24954 + }, + { + "epoch": 1.1690167236614044, + "grad_norm": 0.6038152404507753, + "learning_rate": 1.9419545097284566e-06, + "loss": 0.2729, + "step": 24955 + }, + { + "epoch": 1.1690635686513327, + "grad_norm": 0.6280608829912356, + "learning_rate": 1.9417696463419468e-06, + "loss": 0.279, + "step": 24956 + }, + { + "epoch": 1.1691104136412611, + "grad_norm": 0.551253531397598, + "learning_rate": 1.9415847861679127e-06, + "loss": 0.2506, + "step": 24957 + }, + { + "epoch": 1.1691572586311894, + "grad_norm": 0.5902349641424037, + "learning_rate": 1.9413999292074174e-06, + "loss": 0.2665, + "step": 24958 + }, + { + "epoch": 1.1692041036211178, + "grad_norm": 0.5927853253777439, + "learning_rate": 1.9412150754615257e-06, + "loss": 0.2766, + "step": 24959 + }, + { + "epoch": 1.169250948611046, + "grad_norm": 0.6046158492639151, + "learning_rate": 1.9410302249313014e-06, + "loss": 0.2808, + "step": 24960 + }, + { + "epoch": 1.1692977936009743, + "grad_norm": 0.5462121726477099, + "learning_rate": 1.9408453776178066e-06, + "loss": 0.2613, + "step": 24961 + }, + { + "epoch": 1.1693446385909028, + "grad_norm": 0.6207092596257034, + "learning_rate": 1.9406605335221062e-06, + "loss": 0.2695, + "step": 24962 + }, + { + "epoch": 1.169391483580831, + "grad_norm": 0.6095135471486226, + "learning_rate": 1.940475692645264e-06, + "loss": 0.2614, + "step": 24963 + }, + { + "epoch": 1.1694383285707595, + "grad_norm": 0.580400313321972, + "learning_rate": 1.940290854988344e-06, + "loss": 0.2617, + "step": 24964 + }, + { + "epoch": 1.1694851735606877, + "grad_norm": 0.5418646472207782, + "learning_rate": 1.9401060205524087e-06, + "loss": 0.2567, + "step": 24965 + }, + { + "epoch": 1.169532018550616, + "grad_norm": 0.6273487939899676, + "learning_rate": 1.9399211893385234e-06, + "loss": 0.2743, + "step": 24966 + }, + { + "epoch": 1.1695788635405444, + "grad_norm": 0.6358510237273887, + "learning_rate": 1.93973636134775e-06, + "loss": 0.275, + "step": 24967 + }, + { + "epoch": 1.1696257085304727, + "grad_norm": 0.5565305003968345, + "learning_rate": 1.939551536581154e-06, + "loss": 0.2633, + "step": 24968 + }, + { + "epoch": 1.169672553520401, + "grad_norm": 0.6376450940013015, + "learning_rate": 1.9393667150397967e-06, + "loss": 0.2789, + "step": 24969 + }, + { + "epoch": 1.1697193985103294, + "grad_norm": 0.6327290025071292, + "learning_rate": 1.939181896724744e-06, + "loss": 0.2875, + "step": 24970 + }, + { + "epoch": 1.1697662435002576, + "grad_norm": 0.5745704840391277, + "learning_rate": 1.938997081637058e-06, + "loss": 0.2774, + "step": 24971 + }, + { + "epoch": 1.169813088490186, + "grad_norm": 0.5687354748348086, + "learning_rate": 1.938812269777804e-06, + "loss": 0.2642, + "step": 24972 + }, + { + "epoch": 1.1698599334801143, + "grad_norm": 0.630312510993054, + "learning_rate": 1.938627461148043e-06, + "loss": 0.2855, + "step": 24973 + }, + { + "epoch": 1.1699067784700425, + "grad_norm": 0.5663737771618902, + "learning_rate": 1.9384426557488404e-06, + "loss": 0.2754, + "step": 24974 + }, + { + "epoch": 1.169953623459971, + "grad_norm": 0.5849865045060371, + "learning_rate": 1.9382578535812595e-06, + "loss": 0.2865, + "step": 24975 + }, + { + "epoch": 1.1700004684498992, + "grad_norm": 0.5829339847661497, + "learning_rate": 1.9380730546463624e-06, + "loss": 0.2794, + "step": 24976 + }, + { + "epoch": 1.1700473134398277, + "grad_norm": 0.603295547397437, + "learning_rate": 1.9378882589452143e-06, + "loss": 0.285, + "step": 24977 + }, + { + "epoch": 1.170094158429756, + "grad_norm": 0.6028428580238612, + "learning_rate": 1.9377034664788787e-06, + "loss": 0.2615, + "step": 24978 + }, + { + "epoch": 1.1701410034196842, + "grad_norm": 0.5840834829744737, + "learning_rate": 1.937518677248418e-06, + "loss": 0.281, + "step": 24979 + }, + { + "epoch": 1.1701878484096127, + "grad_norm": 0.6013623368473643, + "learning_rate": 1.937333891254895e-06, + "loss": 0.2865, + "step": 24980 + }, + { + "epoch": 1.170234693399541, + "grad_norm": 0.6029138918891366, + "learning_rate": 1.9371491084993745e-06, + "loss": 0.2765, + "step": 24981 + }, + { + "epoch": 1.1702815383894691, + "grad_norm": 0.5746034131700242, + "learning_rate": 1.9369643289829195e-06, + "loss": 0.2682, + "step": 24982 + }, + { + "epoch": 1.1703283833793976, + "grad_norm": 0.6212127881585494, + "learning_rate": 1.936779552706593e-06, + "loss": 0.2691, + "step": 24983 + }, + { + "epoch": 1.1703752283693258, + "grad_norm": 0.5990038292999327, + "learning_rate": 1.9365947796714596e-06, + "loss": 0.2733, + "step": 24984 + }, + { + "epoch": 1.1704220733592543, + "grad_norm": 0.5922556089897494, + "learning_rate": 1.9364100098785803e-06, + "loss": 0.2649, + "step": 24985 + }, + { + "epoch": 1.1704689183491825, + "grad_norm": 0.622733333552413, + "learning_rate": 1.9362252433290206e-06, + "loss": 0.2667, + "step": 24986 + }, + { + "epoch": 1.170515763339111, + "grad_norm": 0.612170674254253, + "learning_rate": 1.9360404800238423e-06, + "loss": 0.2969, + "step": 24987 + }, + { + "epoch": 1.1705626083290392, + "grad_norm": 0.6159331272423759, + "learning_rate": 1.9358557199641102e-06, + "loss": 0.2707, + "step": 24988 + }, + { + "epoch": 1.1706094533189675, + "grad_norm": 0.5351903517027478, + "learning_rate": 1.935670963150886e-06, + "loss": 0.266, + "step": 24989 + }, + { + "epoch": 1.170656298308896, + "grad_norm": 0.5787307879846364, + "learning_rate": 1.9354862095852343e-06, + "loss": 0.2733, + "step": 24990 + }, + { + "epoch": 1.1707031432988242, + "grad_norm": 0.5887616362447939, + "learning_rate": 1.9353014592682166e-06, + "loss": 0.2678, + "step": 24991 + }, + { + "epoch": 1.1707499882887524, + "grad_norm": 0.5354915277356348, + "learning_rate": 1.9351167122008975e-06, + "loss": 0.2574, + "step": 24992 + }, + { + "epoch": 1.170796833278681, + "grad_norm": 0.6281304450441593, + "learning_rate": 1.9349319683843397e-06, + "loss": 0.2966, + "step": 24993 + }, + { + "epoch": 1.1708436782686091, + "grad_norm": 0.5992862807925275, + "learning_rate": 1.934747227819606e-06, + "loss": 0.2937, + "step": 24994 + }, + { + "epoch": 1.1708905232585374, + "grad_norm": 0.5787416404817255, + "learning_rate": 1.9345624905077604e-06, + "loss": 0.2851, + "step": 24995 + }, + { + "epoch": 1.1709373682484658, + "grad_norm": 0.6121186199782548, + "learning_rate": 1.9343777564498662e-06, + "loss": 0.285, + "step": 24996 + }, + { + "epoch": 1.170984213238394, + "grad_norm": 0.6204768803160108, + "learning_rate": 1.934193025646985e-06, + "loss": 0.2756, + "step": 24997 + }, + { + "epoch": 1.1710310582283225, + "grad_norm": 0.6099369410383793, + "learning_rate": 1.9340082981001806e-06, + "loss": 0.2772, + "step": 24998 + }, + { + "epoch": 1.1710779032182508, + "grad_norm": 0.6125270309593247, + "learning_rate": 1.9338235738105164e-06, + "loss": 0.2668, + "step": 24999 + }, + { + "epoch": 1.1711247482081792, + "grad_norm": 0.58337455050841, + "learning_rate": 1.933638852779055e-06, + "loss": 0.2741, + "step": 25000 + }, + { + "epoch": 1.1711715931981075, + "grad_norm": 0.5932144809893668, + "learning_rate": 1.9334541350068597e-06, + "loss": 0.2691, + "step": 25001 + }, + { + "epoch": 1.1712184381880357, + "grad_norm": 0.6039868090620155, + "learning_rate": 1.9332694204949944e-06, + "loss": 0.2784, + "step": 25002 + }, + { + "epoch": 1.1712652831779642, + "grad_norm": 0.5359888797031285, + "learning_rate": 1.9330847092445203e-06, + "loss": 0.2465, + "step": 25003 + }, + { + "epoch": 1.1713121281678924, + "grad_norm": 0.5525922894460592, + "learning_rate": 1.9329000012565015e-06, + "loss": 0.2641, + "step": 25004 + }, + { + "epoch": 1.1713589731578207, + "grad_norm": 0.6231200643449353, + "learning_rate": 1.932715296532e-06, + "loss": 0.2795, + "step": 25005 + }, + { + "epoch": 1.1714058181477491, + "grad_norm": 0.6322718193342116, + "learning_rate": 1.93253059507208e-06, + "loss": 0.3039, + "step": 25006 + }, + { + "epoch": 1.1714526631376774, + "grad_norm": 0.6292196056813358, + "learning_rate": 1.9323458968778033e-06, + "loss": 0.2991, + "step": 25007 + }, + { + "epoch": 1.1714995081276058, + "grad_norm": 0.5775147835318251, + "learning_rate": 1.9321612019502347e-06, + "loss": 0.2673, + "step": 25008 + }, + { + "epoch": 1.171546353117534, + "grad_norm": 0.599212979899633, + "learning_rate": 1.931976510290434e-06, + "loss": 0.2967, + "step": 25009 + }, + { + "epoch": 1.1715931981074623, + "grad_norm": 0.557283261786815, + "learning_rate": 1.931791821899466e-06, + "loss": 0.2685, + "step": 25010 + }, + { + "epoch": 1.1716400430973908, + "grad_norm": 0.5861264701700238, + "learning_rate": 1.9316071367783935e-06, + "loss": 0.2767, + "step": 25011 + }, + { + "epoch": 1.171686888087319, + "grad_norm": 0.5593225385867485, + "learning_rate": 1.931422454928279e-06, + "loss": 0.2658, + "step": 25012 + }, + { + "epoch": 1.1717337330772475, + "grad_norm": 0.6102427392489772, + "learning_rate": 1.931237776350186e-06, + "loss": 0.2754, + "step": 25013 + }, + { + "epoch": 1.1717805780671757, + "grad_norm": 0.5842874665938403, + "learning_rate": 1.9310531010451766e-06, + "loss": 0.2751, + "step": 25014 + }, + { + "epoch": 1.171827423057104, + "grad_norm": 0.5787145567879493, + "learning_rate": 1.930868429014313e-06, + "loss": 0.2701, + "step": 25015 + }, + { + "epoch": 1.1718742680470324, + "grad_norm": 0.6413127578416062, + "learning_rate": 1.9306837602586584e-06, + "loss": 0.2964, + "step": 25016 + }, + { + "epoch": 1.1719211130369607, + "grad_norm": 0.6112795727411782, + "learning_rate": 1.930499094779276e-06, + "loss": 0.2773, + "step": 25017 + }, + { + "epoch": 1.171967958026889, + "grad_norm": 0.5633915859574071, + "learning_rate": 1.930314432577228e-06, + "loss": 0.2683, + "step": 25018 + }, + { + "epoch": 1.1720148030168174, + "grad_norm": 0.6390984664579239, + "learning_rate": 1.9301297736535772e-06, + "loss": 0.2995, + "step": 25019 + }, + { + "epoch": 1.1720616480067456, + "grad_norm": 0.6334385569831471, + "learning_rate": 1.929945118009387e-06, + "loss": 0.2958, + "step": 25020 + }, + { + "epoch": 1.172108492996674, + "grad_norm": 0.5913471037749745, + "learning_rate": 1.9297604656457192e-06, + "loss": 0.2775, + "step": 25021 + }, + { + "epoch": 1.1721553379866023, + "grad_norm": 0.6374060332884135, + "learning_rate": 1.929575816563636e-06, + "loss": 0.2742, + "step": 25022 + }, + { + "epoch": 1.1722021829765308, + "grad_norm": 0.5976961254005755, + "learning_rate": 1.9293911707642004e-06, + "loss": 0.2776, + "step": 25023 + }, + { + "epoch": 1.172249027966459, + "grad_norm": 0.5799572709777643, + "learning_rate": 1.9292065282484763e-06, + "loss": 0.2563, + "step": 25024 + }, + { + "epoch": 1.1722958729563873, + "grad_norm": 0.6199010996262093, + "learning_rate": 1.929021889017524e-06, + "loss": 0.2866, + "step": 25025 + }, + { + "epoch": 1.1723427179463157, + "grad_norm": 0.6474354543481837, + "learning_rate": 1.9288372530724073e-06, + "loss": 0.2881, + "step": 25026 + }, + { + "epoch": 1.172389562936244, + "grad_norm": 0.5933365802660845, + "learning_rate": 1.92865262041419e-06, + "loss": 0.2686, + "step": 25027 + }, + { + "epoch": 1.1724364079261722, + "grad_norm": 0.5815542959474718, + "learning_rate": 1.928467991043933e-06, + "loss": 0.2673, + "step": 25028 + }, + { + "epoch": 1.1724832529161007, + "grad_norm": 0.5851374214604678, + "learning_rate": 1.928283364962698e-06, + "loss": 0.2721, + "step": 25029 + }, + { + "epoch": 1.172530097906029, + "grad_norm": 0.6099997331863396, + "learning_rate": 1.9280987421715484e-06, + "loss": 0.2751, + "step": 25030 + }, + { + "epoch": 1.1725769428959572, + "grad_norm": 0.6163950623353222, + "learning_rate": 1.9279141226715476e-06, + "loss": 0.2834, + "step": 25031 + }, + { + "epoch": 1.1726237878858856, + "grad_norm": 0.6056314712492562, + "learning_rate": 1.9277295064637568e-06, + "loss": 0.2737, + "step": 25032 + }, + { + "epoch": 1.1726706328758139, + "grad_norm": 0.5480626754642974, + "learning_rate": 1.9275448935492397e-06, + "loss": 0.2588, + "step": 25033 + }, + { + "epoch": 1.1727174778657423, + "grad_norm": 0.5582026937355131, + "learning_rate": 1.927360283929057e-06, + "loss": 0.2621, + "step": 25034 + }, + { + "epoch": 1.1727643228556706, + "grad_norm": 0.6400768044771071, + "learning_rate": 1.9271756776042726e-06, + "loss": 0.2813, + "step": 25035 + }, + { + "epoch": 1.172811167845599, + "grad_norm": 0.575614854862104, + "learning_rate": 1.926991074575947e-06, + "loss": 0.2668, + "step": 25036 + }, + { + "epoch": 1.1728580128355273, + "grad_norm": 0.559632131408127, + "learning_rate": 1.926806474845145e-06, + "loss": 0.2633, + "step": 25037 + }, + { + "epoch": 1.1729048578254555, + "grad_norm": 0.526611931014152, + "learning_rate": 1.9266218784129267e-06, + "loss": 0.2502, + "step": 25038 + }, + { + "epoch": 1.172951702815384, + "grad_norm": 0.6015236599456361, + "learning_rate": 1.926437285280357e-06, + "loss": 0.2801, + "step": 25039 + }, + { + "epoch": 1.1729985478053122, + "grad_norm": 0.5857046629038979, + "learning_rate": 1.926252695448495e-06, + "loss": 0.2853, + "step": 25040 + }, + { + "epoch": 1.1730453927952404, + "grad_norm": 0.5824425692891475, + "learning_rate": 1.926068108918405e-06, + "loss": 0.2772, + "step": 25041 + }, + { + "epoch": 1.173092237785169, + "grad_norm": 0.5979227230862346, + "learning_rate": 1.9258835256911483e-06, + "loss": 0.2718, + "step": 25042 + }, + { + "epoch": 1.1731390827750972, + "grad_norm": 0.6296741134724061, + "learning_rate": 1.925698945767788e-06, + "loss": 0.2999, + "step": 25043 + }, + { + "epoch": 1.1731859277650256, + "grad_norm": 0.5797494524610162, + "learning_rate": 1.925514369149386e-06, + "loss": 0.2648, + "step": 25044 + }, + { + "epoch": 1.1732327727549539, + "grad_norm": 0.6448032423801167, + "learning_rate": 1.9253297958370056e-06, + "loss": 0.2791, + "step": 25045 + }, + { + "epoch": 1.173279617744882, + "grad_norm": 0.6130565409800982, + "learning_rate": 1.925145225831706e-06, + "loss": 0.2543, + "step": 25046 + }, + { + "epoch": 1.1733264627348106, + "grad_norm": 0.6036442568253169, + "learning_rate": 1.924960659134551e-06, + "loss": 0.2793, + "step": 25047 + }, + { + "epoch": 1.1733733077247388, + "grad_norm": 0.6376145595395947, + "learning_rate": 1.924776095746604e-06, + "loss": 0.2822, + "step": 25048 + }, + { + "epoch": 1.1734201527146673, + "grad_norm": 0.5926918002396228, + "learning_rate": 1.9245915356689253e-06, + "loss": 0.279, + "step": 25049 + }, + { + "epoch": 1.1734669977045955, + "grad_norm": 0.6112614819400339, + "learning_rate": 1.924406978902578e-06, + "loss": 0.2866, + "step": 25050 + }, + { + "epoch": 1.1735138426945237, + "grad_norm": 0.6141906470227508, + "learning_rate": 1.9242224254486243e-06, + "loss": 0.2879, + "step": 25051 + }, + { + "epoch": 1.1735606876844522, + "grad_norm": 0.5522953199057973, + "learning_rate": 1.9240378753081256e-06, + "loss": 0.2706, + "step": 25052 + }, + { + "epoch": 1.1736075326743804, + "grad_norm": 0.5613199915803309, + "learning_rate": 1.923853328482144e-06, + "loss": 0.2638, + "step": 25053 + }, + { + "epoch": 1.1736543776643087, + "grad_norm": 0.588921770545065, + "learning_rate": 1.923668784971741e-06, + "loss": 0.2744, + "step": 25054 + }, + { + "epoch": 1.1737012226542372, + "grad_norm": 0.5716949843612436, + "learning_rate": 1.92348424477798e-06, + "loss": 0.2589, + "step": 25055 + }, + { + "epoch": 1.1737480676441654, + "grad_norm": 0.6263418304831794, + "learning_rate": 1.923299707901922e-06, + "loss": 0.2737, + "step": 25056 + }, + { + "epoch": 1.1737949126340939, + "grad_norm": 0.5929043334197739, + "learning_rate": 1.92311517434463e-06, + "loss": 0.2824, + "step": 25057 + }, + { + "epoch": 1.173841757624022, + "grad_norm": 0.57354150770007, + "learning_rate": 1.9229306441071645e-06, + "loss": 0.2674, + "step": 25058 + }, + { + "epoch": 1.1738886026139506, + "grad_norm": 0.5966352699367269, + "learning_rate": 1.922746117190588e-06, + "loss": 0.2795, + "step": 25059 + }, + { + "epoch": 1.1739354476038788, + "grad_norm": 0.6007023675627907, + "learning_rate": 1.9225615935959626e-06, + "loss": 0.2773, + "step": 25060 + }, + { + "epoch": 1.173982292593807, + "grad_norm": 0.5538231514386869, + "learning_rate": 1.92237707332435e-06, + "loss": 0.2716, + "step": 25061 + }, + { + "epoch": 1.1740291375837355, + "grad_norm": 0.6349155888569106, + "learning_rate": 1.9221925563768124e-06, + "loss": 0.2776, + "step": 25062 + }, + { + "epoch": 1.1740759825736637, + "grad_norm": 0.612331775957101, + "learning_rate": 1.922008042754412e-06, + "loss": 0.2773, + "step": 25063 + }, + { + "epoch": 1.174122827563592, + "grad_norm": 0.6073274356665692, + "learning_rate": 1.921823532458209e-06, + "loss": 0.2803, + "step": 25064 + }, + { + "epoch": 1.1741696725535204, + "grad_norm": 0.6088830748214082, + "learning_rate": 1.9216390254892665e-06, + "loss": 0.28, + "step": 25065 + }, + { + "epoch": 1.1742165175434487, + "grad_norm": 0.5936305538927558, + "learning_rate": 1.9214545218486464e-06, + "loss": 0.2734, + "step": 25066 + }, + { + "epoch": 1.174263362533377, + "grad_norm": 0.6006529641702539, + "learning_rate": 1.92127002153741e-06, + "loss": 0.2863, + "step": 25067 + }, + { + "epoch": 1.1743102075233054, + "grad_norm": 0.6175006216249438, + "learning_rate": 1.9210855245566185e-06, + "loss": 0.2688, + "step": 25068 + }, + { + "epoch": 1.1743570525132336, + "grad_norm": 0.6087744539044644, + "learning_rate": 1.9209010309073356e-06, + "loss": 0.278, + "step": 25069 + }, + { + "epoch": 1.174403897503162, + "grad_norm": 0.5471706874762761, + "learning_rate": 1.920716540590621e-06, + "loss": 0.2564, + "step": 25070 + }, + { + "epoch": 1.1744507424930903, + "grad_norm": 0.5705565600015452, + "learning_rate": 1.920532053607537e-06, + "loss": 0.2786, + "step": 25071 + }, + { + "epoch": 1.1744975874830188, + "grad_norm": 0.5435077957541073, + "learning_rate": 1.920347569959145e-06, + "loss": 0.2531, + "step": 25072 + }, + { + "epoch": 1.174544432472947, + "grad_norm": 0.5703503380146734, + "learning_rate": 1.9201630896465077e-06, + "loss": 0.2782, + "step": 25073 + }, + { + "epoch": 1.1745912774628753, + "grad_norm": 0.5854609616742027, + "learning_rate": 1.919978612670686e-06, + "loss": 0.269, + "step": 25074 + }, + { + "epoch": 1.1746381224528037, + "grad_norm": 0.564804956179797, + "learning_rate": 1.919794139032742e-06, + "loss": 0.2687, + "step": 25075 + }, + { + "epoch": 1.174684967442732, + "grad_norm": 0.5993462536428957, + "learning_rate": 1.919609668733736e-06, + "loss": 0.2759, + "step": 25076 + }, + { + "epoch": 1.1747318124326602, + "grad_norm": 0.5874601236841642, + "learning_rate": 1.9194252017747315e-06, + "loss": 0.2724, + "step": 25077 + }, + { + "epoch": 1.1747786574225887, + "grad_norm": 0.551611831615442, + "learning_rate": 1.919240738156788e-06, + "loss": 0.2645, + "step": 25078 + }, + { + "epoch": 1.174825502412517, + "grad_norm": 0.5875881402778754, + "learning_rate": 1.919056277880968e-06, + "loss": 0.2776, + "step": 25079 + }, + { + "epoch": 1.1748723474024454, + "grad_norm": 0.6406381228302791, + "learning_rate": 1.918871820948334e-06, + "loss": 0.2756, + "step": 25080 + }, + { + "epoch": 1.1749191923923736, + "grad_norm": 0.6080366970039549, + "learning_rate": 1.918687367359947e-06, + "loss": 0.2659, + "step": 25081 + }, + { + "epoch": 1.1749660373823019, + "grad_norm": 0.6218772963266271, + "learning_rate": 1.918502917116867e-06, + "loss": 0.2771, + "step": 25082 + }, + { + "epoch": 1.1750128823722303, + "grad_norm": 0.601120179387674, + "learning_rate": 1.9183184702201564e-06, + "loss": 0.2646, + "step": 25083 + }, + { + "epoch": 1.1750597273621586, + "grad_norm": 0.596280154590884, + "learning_rate": 1.918134026670878e-06, + "loss": 0.2832, + "step": 25084 + }, + { + "epoch": 1.175106572352087, + "grad_norm": 0.5870823875448016, + "learning_rate": 1.917949586470091e-06, + "loss": 0.2791, + "step": 25085 + }, + { + "epoch": 1.1751534173420153, + "grad_norm": 0.5876532792524265, + "learning_rate": 1.917765149618858e-06, + "loss": 0.2691, + "step": 25086 + }, + { + "epoch": 1.1752002623319435, + "grad_norm": 0.5900183199453134, + "learning_rate": 1.9175807161182407e-06, + "loss": 0.2731, + "step": 25087 + }, + { + "epoch": 1.175247107321872, + "grad_norm": 0.5753973509550008, + "learning_rate": 1.9173962859693e-06, + "loss": 0.262, + "step": 25088 + }, + { + "epoch": 1.1752939523118002, + "grad_norm": 0.6274275436294386, + "learning_rate": 1.9172118591730967e-06, + "loss": 0.2882, + "step": 25089 + }, + { + "epoch": 1.1753407973017285, + "grad_norm": 0.6151250051807582, + "learning_rate": 1.917027435730693e-06, + "loss": 0.2753, + "step": 25090 + }, + { + "epoch": 1.175387642291657, + "grad_norm": 0.6015106415106207, + "learning_rate": 1.9168430156431495e-06, + "loss": 0.2971, + "step": 25091 + }, + { + "epoch": 1.1754344872815852, + "grad_norm": 0.638741548876127, + "learning_rate": 1.916658598911528e-06, + "loss": 0.2862, + "step": 25092 + }, + { + "epoch": 1.1754813322715136, + "grad_norm": 0.572930497234295, + "learning_rate": 1.9164741855368898e-06, + "loss": 0.2782, + "step": 25093 + }, + { + "epoch": 1.1755281772614419, + "grad_norm": 0.6049412426253221, + "learning_rate": 1.916289775520297e-06, + "loss": 0.2845, + "step": 25094 + }, + { + "epoch": 1.1755750222513703, + "grad_norm": 0.5942787721555972, + "learning_rate": 1.9161053688628083e-06, + "loss": 0.2675, + "step": 25095 + }, + { + "epoch": 1.1756218672412986, + "grad_norm": 0.577260812956703, + "learning_rate": 1.9159209655654867e-06, + "loss": 0.2753, + "step": 25096 + }, + { + "epoch": 1.1756687122312268, + "grad_norm": 0.6182124803056929, + "learning_rate": 1.9157365656293935e-06, + "loss": 0.2817, + "step": 25097 + }, + { + "epoch": 1.1757155572211553, + "grad_norm": 0.6547544262620154, + "learning_rate": 1.915552169055589e-06, + "loss": 0.2911, + "step": 25098 + }, + { + "epoch": 1.1757624022110835, + "grad_norm": 0.5694043755246758, + "learning_rate": 1.915367775845135e-06, + "loss": 0.2736, + "step": 25099 + }, + { + "epoch": 1.1758092472010118, + "grad_norm": 0.5837187454183318, + "learning_rate": 1.9151833859990936e-06, + "loss": 0.2706, + "step": 25100 + }, + { + "epoch": 1.1758560921909402, + "grad_norm": 0.5922416988481786, + "learning_rate": 1.9149989995185245e-06, + "loss": 0.276, + "step": 25101 + }, + { + "epoch": 1.1759029371808685, + "grad_norm": 0.6567255025097306, + "learning_rate": 1.9148146164044882e-06, + "loss": 0.2795, + "step": 25102 + }, + { + "epoch": 1.1759497821707967, + "grad_norm": 0.5975503055149056, + "learning_rate": 1.914630236658047e-06, + "loss": 0.2662, + "step": 25103 + }, + { + "epoch": 1.1759966271607252, + "grad_norm": 0.5677127982739831, + "learning_rate": 1.914445860280262e-06, + "loss": 0.2685, + "step": 25104 + }, + { + "epoch": 1.1760434721506534, + "grad_norm": 0.5890373148433644, + "learning_rate": 1.9142614872721934e-06, + "loss": 0.2747, + "step": 25105 + }, + { + "epoch": 1.1760903171405819, + "grad_norm": 0.5872406917626228, + "learning_rate": 1.9140771176349036e-06, + "loss": 0.2746, + "step": 25106 + }, + { + "epoch": 1.1761371621305101, + "grad_norm": 0.6371317494098766, + "learning_rate": 1.913892751369452e-06, + "loss": 0.2745, + "step": 25107 + }, + { + "epoch": 1.1761840071204386, + "grad_norm": 0.6579990335099453, + "learning_rate": 1.913708388476901e-06, + "loss": 0.2956, + "step": 25108 + }, + { + "epoch": 1.1762308521103668, + "grad_norm": 0.6200696642378885, + "learning_rate": 1.9135240289583097e-06, + "loss": 0.2939, + "step": 25109 + }, + { + "epoch": 1.176277697100295, + "grad_norm": 0.6220373845468333, + "learning_rate": 1.913339672814741e-06, + "loss": 0.2853, + "step": 25110 + }, + { + "epoch": 1.1763245420902235, + "grad_norm": 0.5448383012893414, + "learning_rate": 1.9131553200472546e-06, + "loss": 0.2564, + "step": 25111 + }, + { + "epoch": 1.1763713870801518, + "grad_norm": 0.6165693431262627, + "learning_rate": 1.912970970656913e-06, + "loss": 0.2748, + "step": 25112 + }, + { + "epoch": 1.17641823207008, + "grad_norm": 0.5791500008395689, + "learning_rate": 1.9127866246447745e-06, + "loss": 0.2529, + "step": 25113 + }, + { + "epoch": 1.1764650770600085, + "grad_norm": 0.6239298416418302, + "learning_rate": 1.912602282011902e-06, + "loss": 0.281, + "step": 25114 + }, + { + "epoch": 1.1765119220499367, + "grad_norm": 0.6409491075628194, + "learning_rate": 1.9124179427593555e-06, + "loss": 0.2836, + "step": 25115 + }, + { + "epoch": 1.1765587670398652, + "grad_norm": 0.6086307158967428, + "learning_rate": 1.912233606888196e-06, + "loss": 0.2852, + "step": 25116 + }, + { + "epoch": 1.1766056120297934, + "grad_norm": 0.607615404836057, + "learning_rate": 1.9120492743994843e-06, + "loss": 0.28, + "step": 25117 + }, + { + "epoch": 1.1766524570197217, + "grad_norm": 0.6076448727501539, + "learning_rate": 1.911864945294282e-06, + "loss": 0.2693, + "step": 25118 + }, + { + "epoch": 1.1766993020096501, + "grad_norm": 0.5678846315020663, + "learning_rate": 1.911680619573649e-06, + "loss": 0.2761, + "step": 25119 + }, + { + "epoch": 1.1767461469995784, + "grad_norm": 0.551541274889888, + "learning_rate": 1.9114962972386454e-06, + "loss": 0.2614, + "step": 25120 + }, + { + "epoch": 1.1767929919895068, + "grad_norm": 0.6164240986687163, + "learning_rate": 1.911311978290333e-06, + "loss": 0.2572, + "step": 25121 + }, + { + "epoch": 1.176839836979435, + "grad_norm": 0.5624415964915191, + "learning_rate": 1.9111276627297726e-06, + "loss": 0.2778, + "step": 25122 + }, + { + "epoch": 1.1768866819693633, + "grad_norm": 0.5734854971868903, + "learning_rate": 1.9109433505580237e-06, + "loss": 0.2784, + "step": 25123 + }, + { + "epoch": 1.1769335269592918, + "grad_norm": 0.5970790440480793, + "learning_rate": 1.910759041776149e-06, + "loss": 0.2784, + "step": 25124 + }, + { + "epoch": 1.17698037194922, + "grad_norm": 0.5784001243028307, + "learning_rate": 1.910574736385207e-06, + "loss": 0.2927, + "step": 25125 + }, + { + "epoch": 1.1770272169391482, + "grad_norm": 0.600265526857702, + "learning_rate": 1.9103904343862595e-06, + "loss": 0.2829, + "step": 25126 + }, + { + "epoch": 1.1770740619290767, + "grad_norm": 0.5842275013521795, + "learning_rate": 1.9102061357803662e-06, + "loss": 0.2732, + "step": 25127 + }, + { + "epoch": 1.177120906919005, + "grad_norm": 0.6151657345330416, + "learning_rate": 1.9100218405685895e-06, + "loss": 0.2847, + "step": 25128 + }, + { + "epoch": 1.1771677519089334, + "grad_norm": 0.5839770392090489, + "learning_rate": 1.909837548751988e-06, + "loss": 0.2723, + "step": 25129 + }, + { + "epoch": 1.1772145968988617, + "grad_norm": 0.5924257390909966, + "learning_rate": 1.909653260331624e-06, + "loss": 0.2838, + "step": 25130 + }, + { + "epoch": 1.1772614418887901, + "grad_norm": 0.5467390658208816, + "learning_rate": 1.909468975308556e-06, + "loss": 0.2549, + "step": 25131 + }, + { + "epoch": 1.1773082868787184, + "grad_norm": 0.6349367962961621, + "learning_rate": 1.9092846936838465e-06, + "loss": 0.2855, + "step": 25132 + }, + { + "epoch": 1.1773551318686466, + "grad_norm": 0.5813205507701636, + "learning_rate": 1.9091004154585544e-06, + "loss": 0.2654, + "step": 25133 + }, + { + "epoch": 1.177401976858575, + "grad_norm": 0.5342551005728752, + "learning_rate": 1.9089161406337405e-06, + "loss": 0.2506, + "step": 25134 + }, + { + "epoch": 1.1774488218485033, + "grad_norm": 0.52271552880427, + "learning_rate": 1.908731869210467e-06, + "loss": 0.2702, + "step": 25135 + }, + { + "epoch": 1.1774956668384315, + "grad_norm": 0.5915276429227334, + "learning_rate": 1.9085476011897928e-06, + "loss": 0.268, + "step": 25136 + }, + { + "epoch": 1.17754251182836, + "grad_norm": 0.5822445239244806, + "learning_rate": 1.908363336572778e-06, + "loss": 0.2589, + "step": 25137 + }, + { + "epoch": 1.1775893568182882, + "grad_norm": 0.5811473112388026, + "learning_rate": 1.908179075360483e-06, + "loss": 0.2626, + "step": 25138 + }, + { + "epoch": 1.1776362018082165, + "grad_norm": 0.6069729926263944, + "learning_rate": 1.9079948175539692e-06, + "loss": 0.2685, + "step": 25139 + }, + { + "epoch": 1.177683046798145, + "grad_norm": 0.6033345861378883, + "learning_rate": 1.907810563154296e-06, + "loss": 0.2772, + "step": 25140 + }, + { + "epoch": 1.1777298917880732, + "grad_norm": 0.6039289937273611, + "learning_rate": 1.9076263121625243e-06, + "loss": 0.2696, + "step": 25141 + }, + { + "epoch": 1.1777767367780017, + "grad_norm": 0.5779230945815377, + "learning_rate": 1.9074420645797156e-06, + "loss": 0.2809, + "step": 25142 + }, + { + "epoch": 1.17782358176793, + "grad_norm": 0.5690255815582951, + "learning_rate": 1.9072578204069278e-06, + "loss": 0.2773, + "step": 25143 + }, + { + "epoch": 1.1778704267578584, + "grad_norm": 0.6074941876570457, + "learning_rate": 1.907073579645222e-06, + "loss": 0.2807, + "step": 25144 + }, + { + "epoch": 1.1779172717477866, + "grad_norm": 0.6179036881880603, + "learning_rate": 1.9068893422956585e-06, + "loss": 0.2858, + "step": 25145 + }, + { + "epoch": 1.1779641167377148, + "grad_norm": 0.568127587324014, + "learning_rate": 1.9067051083592987e-06, + "loss": 0.2631, + "step": 25146 + }, + { + "epoch": 1.1780109617276433, + "grad_norm": 0.5990535235660217, + "learning_rate": 1.9065208778372011e-06, + "loss": 0.2791, + "step": 25147 + }, + { + "epoch": 1.1780578067175715, + "grad_norm": 0.5666699162308979, + "learning_rate": 1.9063366507304277e-06, + "loss": 0.271, + "step": 25148 + }, + { + "epoch": 1.1781046517074998, + "grad_norm": 0.587968429312011, + "learning_rate": 1.9061524270400367e-06, + "loss": 0.2801, + "step": 25149 + }, + { + "epoch": 1.1781514966974282, + "grad_norm": 0.5824169760342462, + "learning_rate": 1.9059682067670899e-06, + "loss": 0.2772, + "step": 25150 + }, + { + "epoch": 1.1781983416873565, + "grad_norm": 0.598358317503002, + "learning_rate": 1.9057839899126459e-06, + "loss": 0.267, + "step": 25151 + }, + { + "epoch": 1.178245186677285, + "grad_norm": 0.5368807238605205, + "learning_rate": 1.9055997764777658e-06, + "loss": 0.2421, + "step": 25152 + }, + { + "epoch": 1.1782920316672132, + "grad_norm": 0.5712259384985228, + "learning_rate": 1.9054155664635105e-06, + "loss": 0.2773, + "step": 25153 + }, + { + "epoch": 1.1783388766571414, + "grad_norm": 0.5683016348728426, + "learning_rate": 1.9052313598709393e-06, + "loss": 0.2685, + "step": 25154 + }, + { + "epoch": 1.17838572164707, + "grad_norm": 0.5826959085383838, + "learning_rate": 1.9050471567011111e-06, + "loss": 0.2669, + "step": 25155 + }, + { + "epoch": 1.1784325666369981, + "grad_norm": 0.5873581579338498, + "learning_rate": 1.9048629569550869e-06, + "loss": 0.2647, + "step": 25156 + }, + { + "epoch": 1.1784794116269266, + "grad_norm": 0.6583526604301562, + "learning_rate": 1.9046787606339277e-06, + "loss": 0.2875, + "step": 25157 + }, + { + "epoch": 1.1785262566168548, + "grad_norm": 0.6084738599916779, + "learning_rate": 1.9044945677386917e-06, + "loss": 0.2756, + "step": 25158 + }, + { + "epoch": 1.178573101606783, + "grad_norm": 0.5989049828197486, + "learning_rate": 1.90431037827044e-06, + "loss": 0.2688, + "step": 25159 + }, + { + "epoch": 1.1786199465967115, + "grad_norm": 0.5646903129385392, + "learning_rate": 1.904126192230233e-06, + "loss": 0.2583, + "step": 25160 + }, + { + "epoch": 1.1786667915866398, + "grad_norm": 0.5940612908879761, + "learning_rate": 1.9039420096191302e-06, + "loss": 0.2647, + "step": 25161 + }, + { + "epoch": 1.178713636576568, + "grad_norm": 0.6116037620331564, + "learning_rate": 1.9037578304381905e-06, + "loss": 0.2744, + "step": 25162 + }, + { + "epoch": 1.1787604815664965, + "grad_norm": 0.5750521598170638, + "learning_rate": 1.9035736546884743e-06, + "loss": 0.2728, + "step": 25163 + }, + { + "epoch": 1.1788073265564247, + "grad_norm": 0.5724064466354366, + "learning_rate": 1.9033894823710424e-06, + "loss": 0.2741, + "step": 25164 + }, + { + "epoch": 1.1788541715463532, + "grad_norm": 0.5756722875161261, + "learning_rate": 1.9032053134869539e-06, + "loss": 0.2626, + "step": 25165 + }, + { + "epoch": 1.1789010165362814, + "grad_norm": 0.573523479211204, + "learning_rate": 1.9030211480372687e-06, + "loss": 0.2885, + "step": 25166 + }, + { + "epoch": 1.17894786152621, + "grad_norm": 0.5703654150055136, + "learning_rate": 1.9028369860230477e-06, + "loss": 0.2627, + "step": 25167 + }, + { + "epoch": 1.1789947065161381, + "grad_norm": 0.598451855599688, + "learning_rate": 1.9026528274453493e-06, + "loss": 0.2611, + "step": 25168 + }, + { + "epoch": 1.1790415515060664, + "grad_norm": 0.6426472660209593, + "learning_rate": 1.9024686723052333e-06, + "loss": 0.27, + "step": 25169 + }, + { + "epoch": 1.1790883964959948, + "grad_norm": 0.5984509057394339, + "learning_rate": 1.9022845206037595e-06, + "loss": 0.2833, + "step": 25170 + }, + { + "epoch": 1.179135241485923, + "grad_norm": 0.6218902855553153, + "learning_rate": 1.902100372341989e-06, + "loss": 0.2698, + "step": 25171 + }, + { + "epoch": 1.1791820864758513, + "grad_norm": 0.5807772557451746, + "learning_rate": 1.9019162275209802e-06, + "loss": 0.2781, + "step": 25172 + }, + { + "epoch": 1.1792289314657798, + "grad_norm": 0.6133518124237395, + "learning_rate": 1.9017320861417938e-06, + "loss": 0.2741, + "step": 25173 + }, + { + "epoch": 1.179275776455708, + "grad_norm": 0.589704671345441, + "learning_rate": 1.901547948205488e-06, + "loss": 0.2646, + "step": 25174 + }, + { + "epoch": 1.1793226214456363, + "grad_norm": 0.5597465901751167, + "learning_rate": 1.9013638137131239e-06, + "loss": 0.2546, + "step": 25175 + }, + { + "epoch": 1.1793694664355647, + "grad_norm": 0.5471933256773344, + "learning_rate": 1.90117968266576e-06, + "loss": 0.2755, + "step": 25176 + }, + { + "epoch": 1.179416311425493, + "grad_norm": 0.6245869226040789, + "learning_rate": 1.900995555064457e-06, + "loss": 0.2858, + "step": 25177 + }, + { + "epoch": 1.1794631564154214, + "grad_norm": 0.6311772480175891, + "learning_rate": 1.9008114309102735e-06, + "loss": 0.2844, + "step": 25178 + }, + { + "epoch": 1.1795100014053497, + "grad_norm": 0.6217876751952423, + "learning_rate": 1.9006273102042707e-06, + "loss": 0.2926, + "step": 25179 + }, + { + "epoch": 1.1795568463952781, + "grad_norm": 0.5838799368655828, + "learning_rate": 1.9004431929475061e-06, + "loss": 0.2638, + "step": 25180 + }, + { + "epoch": 1.1796036913852064, + "grad_norm": 0.5527816881729031, + "learning_rate": 1.9002590791410409e-06, + "loss": 0.2701, + "step": 25181 + }, + { + "epoch": 1.1796505363751346, + "grad_norm": 0.5691126597609232, + "learning_rate": 1.9000749687859331e-06, + "loss": 0.2758, + "step": 25182 + }, + { + "epoch": 1.179697381365063, + "grad_norm": 0.5828679717772224, + "learning_rate": 1.8998908618832434e-06, + "loss": 0.2792, + "step": 25183 + }, + { + "epoch": 1.1797442263549913, + "grad_norm": 0.5858640030350173, + "learning_rate": 1.8997067584340313e-06, + "loss": 0.2861, + "step": 25184 + }, + { + "epoch": 1.1797910713449196, + "grad_norm": 0.610753570883968, + "learning_rate": 1.8995226584393564e-06, + "loss": 0.289, + "step": 25185 + }, + { + "epoch": 1.179837916334848, + "grad_norm": 0.5707778445855414, + "learning_rate": 1.8993385619002766e-06, + "loss": 0.2781, + "step": 25186 + }, + { + "epoch": 1.1798847613247763, + "grad_norm": 0.6192663430518038, + "learning_rate": 1.8991544688178526e-06, + "loss": 0.2791, + "step": 25187 + }, + { + "epoch": 1.1799316063147047, + "grad_norm": 0.5733740563262437, + "learning_rate": 1.898970379193144e-06, + "loss": 0.268, + "step": 25188 + }, + { + "epoch": 1.179978451304633, + "grad_norm": 0.5868532073389369, + "learning_rate": 1.8987862930272093e-06, + "loss": 0.2704, + "step": 25189 + }, + { + "epoch": 1.1800252962945612, + "grad_norm": 0.6323372901865103, + "learning_rate": 1.8986022103211082e-06, + "loss": 0.2839, + "step": 25190 + }, + { + "epoch": 1.1800721412844897, + "grad_norm": 0.5981087359683124, + "learning_rate": 1.8984181310759017e-06, + "loss": 0.27, + "step": 25191 + }, + { + "epoch": 1.180118986274418, + "grad_norm": 0.5963946842262425, + "learning_rate": 1.8982340552926468e-06, + "loss": 0.28, + "step": 25192 + }, + { + "epoch": 1.1801658312643464, + "grad_norm": 0.6089312695631849, + "learning_rate": 1.8980499829724033e-06, + "loss": 0.2665, + "step": 25193 + }, + { + "epoch": 1.1802126762542746, + "grad_norm": 0.5957130979414035, + "learning_rate": 1.8978659141162308e-06, + "loss": 0.2896, + "step": 25194 + }, + { + "epoch": 1.1802595212442029, + "grad_norm": 0.6161194147082794, + "learning_rate": 1.897681848725189e-06, + "loss": 0.28, + "step": 25195 + }, + { + "epoch": 1.1803063662341313, + "grad_norm": 0.5814244538932066, + "learning_rate": 1.8974977868003363e-06, + "loss": 0.2768, + "step": 25196 + }, + { + "epoch": 1.1803532112240596, + "grad_norm": 0.6164665391261311, + "learning_rate": 1.8973137283427337e-06, + "loss": 0.2708, + "step": 25197 + }, + { + "epoch": 1.1804000562139878, + "grad_norm": 0.5348366538780231, + "learning_rate": 1.8971296733534378e-06, + "loss": 0.2625, + "step": 25198 + }, + { + "epoch": 1.1804469012039163, + "grad_norm": 0.6420307657617721, + "learning_rate": 1.8969456218335096e-06, + "loss": 0.2722, + "step": 25199 + }, + { + "epoch": 1.1804937461938445, + "grad_norm": 0.5766041868647341, + "learning_rate": 1.8967615737840076e-06, + "loss": 0.2722, + "step": 25200 + }, + { + "epoch": 1.180540591183773, + "grad_norm": 0.5894123371853089, + "learning_rate": 1.8965775292059909e-06, + "loss": 0.2855, + "step": 25201 + }, + { + "epoch": 1.1805874361737012, + "grad_norm": 0.6321602427293763, + "learning_rate": 1.8963934881005194e-06, + "loss": 0.2835, + "step": 25202 + }, + { + "epoch": 1.1806342811636297, + "grad_norm": 0.5929642668275064, + "learning_rate": 1.8962094504686525e-06, + "loss": 0.2635, + "step": 25203 + }, + { + "epoch": 1.180681126153558, + "grad_norm": 0.5824142904728736, + "learning_rate": 1.8960254163114466e-06, + "loss": 0.2822, + "step": 25204 + }, + { + "epoch": 1.1807279711434862, + "grad_norm": 0.6073139576098198, + "learning_rate": 1.8958413856299632e-06, + "loss": 0.2838, + "step": 25205 + }, + { + "epoch": 1.1807748161334146, + "grad_norm": 0.5910908504280308, + "learning_rate": 1.8956573584252614e-06, + "loss": 0.2783, + "step": 25206 + }, + { + "epoch": 1.1808216611233429, + "grad_norm": 0.5941232692567193, + "learning_rate": 1.895473334698399e-06, + "loss": 0.2856, + "step": 25207 + }, + { + "epoch": 1.180868506113271, + "grad_norm": 0.5466218563064115, + "learning_rate": 1.8952893144504353e-06, + "loss": 0.2665, + "step": 25208 + }, + { + "epoch": 1.1809153511031996, + "grad_norm": 0.5779808043129241, + "learning_rate": 1.8951052976824309e-06, + "loss": 0.2841, + "step": 25209 + }, + { + "epoch": 1.1809621960931278, + "grad_norm": 0.6359145075582259, + "learning_rate": 1.8949212843954428e-06, + "loss": 0.2703, + "step": 25210 + }, + { + "epoch": 1.181009041083056, + "grad_norm": 0.5840987560137177, + "learning_rate": 1.8947372745905301e-06, + "loss": 0.2668, + "step": 25211 + }, + { + "epoch": 1.1810558860729845, + "grad_norm": 0.6066879147940482, + "learning_rate": 1.8945532682687527e-06, + "loss": 0.2792, + "step": 25212 + }, + { + "epoch": 1.1811027310629127, + "grad_norm": 0.622081271809417, + "learning_rate": 1.894369265431169e-06, + "loss": 0.2964, + "step": 25213 + }, + { + "epoch": 1.1811495760528412, + "grad_norm": 0.5401736916683205, + "learning_rate": 1.894185266078838e-06, + "loss": 0.257, + "step": 25214 + }, + { + "epoch": 1.1811964210427695, + "grad_norm": 0.667261587639434, + "learning_rate": 1.894001270212819e-06, + "loss": 0.2985, + "step": 25215 + }, + { + "epoch": 1.181243266032698, + "grad_norm": 0.5592099783471108, + "learning_rate": 1.8938172778341696e-06, + "loss": 0.2577, + "step": 25216 + }, + { + "epoch": 1.1812901110226262, + "grad_norm": 0.5962476839308644, + "learning_rate": 1.8936332889439503e-06, + "loss": 0.2881, + "step": 25217 + }, + { + "epoch": 1.1813369560125544, + "grad_norm": 0.5623273697351786, + "learning_rate": 1.8934493035432179e-06, + "loss": 0.269, + "step": 25218 + }, + { + "epoch": 1.1813838010024829, + "grad_norm": 0.6056043318444441, + "learning_rate": 1.8932653216330329e-06, + "loss": 0.2847, + "step": 25219 + }, + { + "epoch": 1.181430645992411, + "grad_norm": 0.6015480789331754, + "learning_rate": 1.8930813432144532e-06, + "loss": 0.2825, + "step": 25220 + }, + { + "epoch": 1.1814774909823393, + "grad_norm": 0.5586403707872764, + "learning_rate": 1.8928973682885387e-06, + "loss": 0.2452, + "step": 25221 + }, + { + "epoch": 1.1815243359722678, + "grad_norm": 0.575903832418167, + "learning_rate": 1.8927133968563463e-06, + "loss": 0.2716, + "step": 25222 + }, + { + "epoch": 1.181571180962196, + "grad_norm": 0.5822952647574968, + "learning_rate": 1.8925294289189358e-06, + "loss": 0.2927, + "step": 25223 + }, + { + "epoch": 1.1816180259521245, + "grad_norm": 0.562219505753854, + "learning_rate": 1.8923454644773662e-06, + "loss": 0.2612, + "step": 25224 + }, + { + "epoch": 1.1816648709420527, + "grad_norm": 0.5583568819785613, + "learning_rate": 1.892161503532695e-06, + "loss": 0.2564, + "step": 25225 + }, + { + "epoch": 1.181711715931981, + "grad_norm": 0.6066059827802821, + "learning_rate": 1.8919775460859824e-06, + "loss": 0.2836, + "step": 25226 + }, + { + "epoch": 1.1817585609219095, + "grad_norm": 0.5756230211568606, + "learning_rate": 1.891793592138286e-06, + "loss": 0.2669, + "step": 25227 + }, + { + "epoch": 1.1818054059118377, + "grad_norm": 0.5921830838824289, + "learning_rate": 1.8916096416906645e-06, + "loss": 0.27, + "step": 25228 + }, + { + "epoch": 1.1818522509017662, + "grad_norm": 0.6154012776640382, + "learning_rate": 1.891425694744176e-06, + "loss": 0.2762, + "step": 25229 + }, + { + "epoch": 1.1818990958916944, + "grad_norm": 0.5961595635291037, + "learning_rate": 1.8912417512998807e-06, + "loss": 0.2778, + "step": 25230 + }, + { + "epoch": 1.1819459408816226, + "grad_norm": 0.6217409264655884, + "learning_rate": 1.891057811358835e-06, + "loss": 0.2774, + "step": 25231 + }, + { + "epoch": 1.181992785871551, + "grad_norm": 0.5605699018657474, + "learning_rate": 1.8908738749220992e-06, + "loss": 0.2618, + "step": 25232 + }, + { + "epoch": 1.1820396308614793, + "grad_norm": 0.5883996491960322, + "learning_rate": 1.8906899419907312e-06, + "loss": 0.2683, + "step": 25233 + }, + { + "epoch": 1.1820864758514076, + "grad_norm": 0.5673061932387995, + "learning_rate": 1.89050601256579e-06, + "loss": 0.2712, + "step": 25234 + }, + { + "epoch": 1.182133320841336, + "grad_norm": 0.5559752206920868, + "learning_rate": 1.8903220866483326e-06, + "loss": 0.2534, + "step": 25235 + }, + { + "epoch": 1.1821801658312643, + "grad_norm": 0.6136413333645901, + "learning_rate": 1.8901381642394184e-06, + "loss": 0.2676, + "step": 25236 + }, + { + "epoch": 1.1822270108211927, + "grad_norm": 0.6807750247696343, + "learning_rate": 1.8899542453401062e-06, + "loss": 0.2862, + "step": 25237 + }, + { + "epoch": 1.182273855811121, + "grad_norm": 0.5932156413569071, + "learning_rate": 1.8897703299514536e-06, + "loss": 0.2602, + "step": 25238 + }, + { + "epoch": 1.1823207008010495, + "grad_norm": 0.5540334742583316, + "learning_rate": 1.8895864180745192e-06, + "loss": 0.2546, + "step": 25239 + }, + { + "epoch": 1.1823675457909777, + "grad_norm": 0.6227251271271056, + "learning_rate": 1.8894025097103624e-06, + "loss": 0.2668, + "step": 25240 + }, + { + "epoch": 1.182414390780906, + "grad_norm": 0.5928375615116677, + "learning_rate": 1.8892186048600406e-06, + "loss": 0.2678, + "step": 25241 + }, + { + "epoch": 1.1824612357708344, + "grad_norm": 0.5852261993796155, + "learning_rate": 1.8890347035246116e-06, + "loss": 0.2633, + "step": 25242 + }, + { + "epoch": 1.1825080807607626, + "grad_norm": 0.5765841473311336, + "learning_rate": 1.8888508057051342e-06, + "loss": 0.2821, + "step": 25243 + }, + { + "epoch": 1.1825549257506909, + "grad_norm": 0.5861411722943045, + "learning_rate": 1.8886669114026673e-06, + "loss": 0.268, + "step": 25244 + }, + { + "epoch": 1.1826017707406193, + "grad_norm": 0.6123915499671349, + "learning_rate": 1.888483020618268e-06, + "loss": 0.2728, + "step": 25245 + }, + { + "epoch": 1.1826486157305476, + "grad_norm": 0.6147383588063097, + "learning_rate": 1.8882991333529964e-06, + "loss": 0.2719, + "step": 25246 + }, + { + "epoch": 1.1826954607204758, + "grad_norm": 0.5891246644286687, + "learning_rate": 1.8881152496079087e-06, + "loss": 0.2713, + "step": 25247 + }, + { + "epoch": 1.1827423057104043, + "grad_norm": 0.5935472416077097, + "learning_rate": 1.8879313693840646e-06, + "loss": 0.2763, + "step": 25248 + }, + { + "epoch": 1.1827891507003325, + "grad_norm": 0.5993005420735382, + "learning_rate": 1.8877474926825206e-06, + "loss": 0.2888, + "step": 25249 + }, + { + "epoch": 1.182835995690261, + "grad_norm": 0.636139313435702, + "learning_rate": 1.8875636195043361e-06, + "loss": 0.2887, + "step": 25250 + }, + { + "epoch": 1.1828828406801892, + "grad_norm": 0.5574279328313637, + "learning_rate": 1.8873797498505698e-06, + "loss": 0.2591, + "step": 25251 + }, + { + "epoch": 1.1829296856701177, + "grad_norm": 0.5758283613818049, + "learning_rate": 1.8871958837222793e-06, + "loss": 0.2775, + "step": 25252 + }, + { + "epoch": 1.182976530660046, + "grad_norm": 0.6149223901331093, + "learning_rate": 1.8870120211205215e-06, + "loss": 0.2687, + "step": 25253 + }, + { + "epoch": 1.1830233756499742, + "grad_norm": 0.5718221662254565, + "learning_rate": 1.8868281620463551e-06, + "loss": 0.2638, + "step": 25254 + }, + { + "epoch": 1.1830702206399026, + "grad_norm": 0.5859935779787817, + "learning_rate": 1.8866443065008396e-06, + "loss": 0.2741, + "step": 25255 + }, + { + "epoch": 1.1831170656298309, + "grad_norm": 0.6257026282999808, + "learning_rate": 1.8864604544850312e-06, + "loss": 0.2695, + "step": 25256 + }, + { + "epoch": 1.1831639106197591, + "grad_norm": 0.5904851339002404, + "learning_rate": 1.8862766059999887e-06, + "loss": 0.2775, + "step": 25257 + }, + { + "epoch": 1.1832107556096876, + "grad_norm": 0.6024305309316478, + "learning_rate": 1.8860927610467712e-06, + "loss": 0.2849, + "step": 25258 + }, + { + "epoch": 1.1832576005996158, + "grad_norm": 0.5961796865296791, + "learning_rate": 1.885908919626435e-06, + "loss": 0.2762, + "step": 25259 + }, + { + "epoch": 1.1833044455895443, + "grad_norm": 0.6596385874344055, + "learning_rate": 1.885725081740038e-06, + "loss": 0.3025, + "step": 25260 + }, + { + "epoch": 1.1833512905794725, + "grad_norm": 0.5920739206287742, + "learning_rate": 1.8855412473886386e-06, + "loss": 0.2831, + "step": 25261 + }, + { + "epoch": 1.1833981355694008, + "grad_norm": 0.6076725045757578, + "learning_rate": 1.8853574165732957e-06, + "loss": 0.2691, + "step": 25262 + }, + { + "epoch": 1.1834449805593292, + "grad_norm": 0.5951695970818559, + "learning_rate": 1.885173589295066e-06, + "loss": 0.2668, + "step": 25263 + }, + { + "epoch": 1.1834918255492575, + "grad_norm": 0.5786410907338352, + "learning_rate": 1.8849897655550086e-06, + "loss": 0.2812, + "step": 25264 + }, + { + "epoch": 1.183538670539186, + "grad_norm": 0.6446493779501834, + "learning_rate": 1.8848059453541794e-06, + "loss": 0.2817, + "step": 25265 + }, + { + "epoch": 1.1835855155291142, + "grad_norm": 0.5668567044975269, + "learning_rate": 1.8846221286936378e-06, + "loss": 0.287, + "step": 25266 + }, + { + "epoch": 1.1836323605190424, + "grad_norm": 0.5664671234225178, + "learning_rate": 1.8844383155744409e-06, + "loss": 0.2702, + "step": 25267 + }, + { + "epoch": 1.1836792055089709, + "grad_norm": 0.5755575695979969, + "learning_rate": 1.8842545059976472e-06, + "loss": 0.2692, + "step": 25268 + }, + { + "epoch": 1.1837260504988991, + "grad_norm": 0.6265231973215291, + "learning_rate": 1.8840706999643136e-06, + "loss": 0.2823, + "step": 25269 + }, + { + "epoch": 1.1837728954888274, + "grad_norm": 0.6152379787092951, + "learning_rate": 1.8838868974754992e-06, + "loss": 0.2828, + "step": 25270 + }, + { + "epoch": 1.1838197404787558, + "grad_norm": 0.6136621493935217, + "learning_rate": 1.88370309853226e-06, + "loss": 0.2904, + "step": 25271 + }, + { + "epoch": 1.183866585468684, + "grad_norm": 0.5728437659594816, + "learning_rate": 1.883519303135655e-06, + "loss": 0.2824, + "step": 25272 + }, + { + "epoch": 1.1839134304586125, + "grad_norm": 0.5603159207290501, + "learning_rate": 1.883335511286741e-06, + "loss": 0.2573, + "step": 25273 + }, + { + "epoch": 1.1839602754485408, + "grad_norm": 0.5952211447638147, + "learning_rate": 1.883151722986576e-06, + "loss": 0.2771, + "step": 25274 + }, + { + "epoch": 1.1840071204384692, + "grad_norm": 0.5602473032386678, + "learning_rate": 1.8829679382362184e-06, + "loss": 0.2648, + "step": 25275 + }, + { + "epoch": 1.1840539654283975, + "grad_norm": 0.5874678429833421, + "learning_rate": 1.8827841570367259e-06, + "loss": 0.2794, + "step": 25276 + }, + { + "epoch": 1.1841008104183257, + "grad_norm": 0.5895278953995285, + "learning_rate": 1.882600379389154e-06, + "loss": 0.2696, + "step": 25277 + }, + { + "epoch": 1.1841476554082542, + "grad_norm": 0.5763153692224317, + "learning_rate": 1.882416605294562e-06, + "loss": 0.2828, + "step": 25278 + }, + { + "epoch": 1.1841945003981824, + "grad_norm": 0.6155855181043154, + "learning_rate": 1.8822328347540078e-06, + "loss": 0.2874, + "step": 25279 + }, + { + "epoch": 1.1842413453881107, + "grad_norm": 0.6082595033035616, + "learning_rate": 1.8820490677685477e-06, + "loss": 0.285, + "step": 25280 + }, + { + "epoch": 1.1842881903780391, + "grad_norm": 0.5599738933139972, + "learning_rate": 1.8818653043392402e-06, + "loss": 0.2686, + "step": 25281 + }, + { + "epoch": 1.1843350353679674, + "grad_norm": 0.6155616586674728, + "learning_rate": 1.8816815444671432e-06, + "loss": 0.2822, + "step": 25282 + }, + { + "epoch": 1.1843818803578956, + "grad_norm": 0.5827590730933523, + "learning_rate": 1.881497788153313e-06, + "loss": 0.279, + "step": 25283 + }, + { + "epoch": 1.184428725347824, + "grad_norm": 0.5711697036692684, + "learning_rate": 1.8813140353988073e-06, + "loss": 0.276, + "step": 25284 + }, + { + "epoch": 1.1844755703377523, + "grad_norm": 0.5804684322519948, + "learning_rate": 1.8811302862046836e-06, + "loss": 0.2688, + "step": 25285 + }, + { + "epoch": 1.1845224153276808, + "grad_norm": 0.5926085584163454, + "learning_rate": 1.8809465405720002e-06, + "loss": 0.2726, + "step": 25286 + }, + { + "epoch": 1.184569260317609, + "grad_norm": 0.5692784936641959, + "learning_rate": 1.8807627985018134e-06, + "loss": 0.2532, + "step": 25287 + }, + { + "epoch": 1.1846161053075375, + "grad_norm": 0.5824540739712136, + "learning_rate": 1.8805790599951818e-06, + "loss": 0.2543, + "step": 25288 + }, + { + "epoch": 1.1846629502974657, + "grad_norm": 0.5876154731893664, + "learning_rate": 1.8803953250531612e-06, + "loss": 0.2622, + "step": 25289 + }, + { + "epoch": 1.184709795287394, + "grad_norm": 0.5622622604288057, + "learning_rate": 1.8802115936768103e-06, + "loss": 0.2719, + "step": 25290 + }, + { + "epoch": 1.1847566402773224, + "grad_norm": 0.5638828245564674, + "learning_rate": 1.8800278658671855e-06, + "loss": 0.2684, + "step": 25291 + }, + { + "epoch": 1.1848034852672507, + "grad_norm": 0.6293071036169154, + "learning_rate": 1.8798441416253444e-06, + "loss": 0.2846, + "step": 25292 + }, + { + "epoch": 1.184850330257179, + "grad_norm": 0.6542616908571574, + "learning_rate": 1.8796604209523447e-06, + "loss": 0.2961, + "step": 25293 + }, + { + "epoch": 1.1848971752471074, + "grad_norm": 0.610084590517013, + "learning_rate": 1.879476703849244e-06, + "loss": 0.2742, + "step": 25294 + }, + { + "epoch": 1.1849440202370356, + "grad_norm": 0.5859546977483325, + "learning_rate": 1.8792929903170981e-06, + "loss": 0.2703, + "step": 25295 + }, + { + "epoch": 1.184990865226964, + "grad_norm": 0.5690522903276932, + "learning_rate": 1.879109280356965e-06, + "loss": 0.2673, + "step": 25296 + }, + { + "epoch": 1.1850377102168923, + "grad_norm": 0.5772849385222623, + "learning_rate": 1.8789255739699019e-06, + "loss": 0.2833, + "step": 25297 + }, + { + "epoch": 1.1850845552068205, + "grad_norm": 0.59149489941837, + "learning_rate": 1.878741871156966e-06, + "loss": 0.2787, + "step": 25298 + }, + { + "epoch": 1.185131400196749, + "grad_norm": 0.6284942952076461, + "learning_rate": 1.8785581719192144e-06, + "loss": 0.2926, + "step": 25299 + }, + { + "epoch": 1.1851782451866772, + "grad_norm": 0.5546840488691154, + "learning_rate": 1.878374476257705e-06, + "loss": 0.2729, + "step": 25300 + }, + { + "epoch": 1.1852250901766057, + "grad_norm": 0.6267848507439526, + "learning_rate": 1.8781907841734945e-06, + "loss": 0.285, + "step": 25301 + }, + { + "epoch": 1.185271935166534, + "grad_norm": 0.5995751049901157, + "learning_rate": 1.8780070956676389e-06, + "loss": 0.2763, + "step": 25302 + }, + { + "epoch": 1.1853187801564622, + "grad_norm": 0.6073407966668856, + "learning_rate": 1.877823410741196e-06, + "loss": 0.2732, + "step": 25303 + }, + { + "epoch": 1.1853656251463907, + "grad_norm": 0.6118130671424546, + "learning_rate": 1.8776397293952237e-06, + "loss": 0.2834, + "step": 25304 + }, + { + "epoch": 1.185412470136319, + "grad_norm": 0.5619190960924566, + "learning_rate": 1.8774560516307778e-06, + "loss": 0.2733, + "step": 25305 + }, + { + "epoch": 1.1854593151262471, + "grad_norm": 0.5399023506553045, + "learning_rate": 1.8772723774489155e-06, + "loss": 0.2555, + "step": 25306 + }, + { + "epoch": 1.1855061601161756, + "grad_norm": 0.658077851187525, + "learning_rate": 1.8770887068506954e-06, + "loss": 0.2804, + "step": 25307 + }, + { + "epoch": 1.1855530051061038, + "grad_norm": 0.6252801842399404, + "learning_rate": 1.876905039837173e-06, + "loss": 0.2881, + "step": 25308 + }, + { + "epoch": 1.1855998500960323, + "grad_norm": 0.5613301937473121, + "learning_rate": 1.8767213764094045e-06, + "loss": 0.2611, + "step": 25309 + }, + { + "epoch": 1.1856466950859605, + "grad_norm": 0.5246406340961982, + "learning_rate": 1.8765377165684482e-06, + "loss": 0.2534, + "step": 25310 + }, + { + "epoch": 1.185693540075889, + "grad_norm": 0.6185857484991278, + "learning_rate": 1.8763540603153607e-06, + "loss": 0.2846, + "step": 25311 + }, + { + "epoch": 1.1857403850658172, + "grad_norm": 0.5746386391729037, + "learning_rate": 1.8761704076511988e-06, + "loss": 0.2579, + "step": 25312 + }, + { + "epoch": 1.1857872300557455, + "grad_norm": 0.5747523233449786, + "learning_rate": 1.8759867585770204e-06, + "loss": 0.269, + "step": 25313 + }, + { + "epoch": 1.185834075045674, + "grad_norm": 0.5897199749674601, + "learning_rate": 1.8758031130938801e-06, + "loss": 0.2825, + "step": 25314 + }, + { + "epoch": 1.1858809200356022, + "grad_norm": 0.6225434929292994, + "learning_rate": 1.8756194712028363e-06, + "loss": 0.2826, + "step": 25315 + }, + { + "epoch": 1.1859277650255304, + "grad_norm": 0.6249539608201157, + "learning_rate": 1.8754358329049455e-06, + "loss": 0.2837, + "step": 25316 + }, + { + "epoch": 1.185974610015459, + "grad_norm": 0.6422480569100879, + "learning_rate": 1.8752521982012647e-06, + "loss": 0.2723, + "step": 25317 + }, + { + "epoch": 1.1860214550053871, + "grad_norm": 0.6227061738276312, + "learning_rate": 1.8750685670928501e-06, + "loss": 0.2938, + "step": 25318 + }, + { + "epoch": 1.1860682999953154, + "grad_norm": 0.6165000214741811, + "learning_rate": 1.87488493958076e-06, + "loss": 0.2868, + "step": 25319 + }, + { + "epoch": 1.1861151449852438, + "grad_norm": 0.6188363633014534, + "learning_rate": 1.8747013156660487e-06, + "loss": 0.2876, + "step": 25320 + }, + { + "epoch": 1.186161989975172, + "grad_norm": 0.5644114438720395, + "learning_rate": 1.8745176953497746e-06, + "loss": 0.2524, + "step": 25321 + }, + { + "epoch": 1.1862088349651005, + "grad_norm": 0.6395944218177202, + "learning_rate": 1.8743340786329936e-06, + "loss": 0.2812, + "step": 25322 + }, + { + "epoch": 1.1862556799550288, + "grad_norm": 0.6310177764943931, + "learning_rate": 1.8741504655167625e-06, + "loss": 0.2754, + "step": 25323 + }, + { + "epoch": 1.1863025249449572, + "grad_norm": 0.5727387529232151, + "learning_rate": 1.873966856002139e-06, + "loss": 0.2704, + "step": 25324 + }, + { + "epoch": 1.1863493699348855, + "grad_norm": 0.5441781937184667, + "learning_rate": 1.8737832500901793e-06, + "loss": 0.26, + "step": 25325 + }, + { + "epoch": 1.1863962149248137, + "grad_norm": 0.6061124776950314, + "learning_rate": 1.8735996477819385e-06, + "loss": 0.2809, + "step": 25326 + }, + { + "epoch": 1.1864430599147422, + "grad_norm": 0.6010980195920002, + "learning_rate": 1.8734160490784744e-06, + "loss": 0.2707, + "step": 25327 + }, + { + "epoch": 1.1864899049046704, + "grad_norm": 0.5927869898301937, + "learning_rate": 1.8732324539808438e-06, + "loss": 0.2794, + "step": 25328 + }, + { + "epoch": 1.1865367498945987, + "grad_norm": 0.6062287064860806, + "learning_rate": 1.8730488624901027e-06, + "loss": 0.2791, + "step": 25329 + }, + { + "epoch": 1.1865835948845271, + "grad_norm": 0.569367966198624, + "learning_rate": 1.8728652746073075e-06, + "loss": 0.272, + "step": 25330 + }, + { + "epoch": 1.1866304398744554, + "grad_norm": 0.5656897750949884, + "learning_rate": 1.8726816903335162e-06, + "loss": 0.2714, + "step": 25331 + }, + { + "epoch": 1.1866772848643838, + "grad_norm": 0.6388815398075356, + "learning_rate": 1.8724981096697836e-06, + "loss": 0.2816, + "step": 25332 + }, + { + "epoch": 1.186724129854312, + "grad_norm": 0.5801738249073599, + "learning_rate": 1.872314532617166e-06, + "loss": 0.2738, + "step": 25333 + }, + { + "epoch": 1.1867709748442403, + "grad_norm": 0.534509173314475, + "learning_rate": 1.8721309591767205e-06, + "loss": 0.2534, + "step": 25334 + }, + { + "epoch": 1.1868178198341688, + "grad_norm": 0.6007915299678532, + "learning_rate": 1.8719473893495043e-06, + "loss": 0.2765, + "step": 25335 + }, + { + "epoch": 1.186864664824097, + "grad_norm": 0.638693409170665, + "learning_rate": 1.8717638231365726e-06, + "loss": 0.2767, + "step": 25336 + }, + { + "epoch": 1.1869115098140255, + "grad_norm": 0.57797580735569, + "learning_rate": 1.8715802605389827e-06, + "loss": 0.2512, + "step": 25337 + }, + { + "epoch": 1.1869583548039537, + "grad_norm": 0.5844506544266105, + "learning_rate": 1.8713967015577896e-06, + "loss": 0.2826, + "step": 25338 + }, + { + "epoch": 1.187005199793882, + "grad_norm": 0.6145882759398403, + "learning_rate": 1.8712131461940515e-06, + "loss": 0.2901, + "step": 25339 + }, + { + "epoch": 1.1870520447838104, + "grad_norm": 0.5867049646435929, + "learning_rate": 1.8710295944488228e-06, + "loss": 0.2853, + "step": 25340 + }, + { + "epoch": 1.1870988897737387, + "grad_norm": 0.6201996921508973, + "learning_rate": 1.8708460463231603e-06, + "loss": 0.273, + "step": 25341 + }, + { + "epoch": 1.187145734763667, + "grad_norm": 0.5691175894119112, + "learning_rate": 1.8706625018181218e-06, + "loss": 0.2632, + "step": 25342 + }, + { + "epoch": 1.1871925797535954, + "grad_norm": 0.5932065505284906, + "learning_rate": 1.8704789609347625e-06, + "loss": 0.2719, + "step": 25343 + }, + { + "epoch": 1.1872394247435236, + "grad_norm": 0.5845074623589183, + "learning_rate": 1.8702954236741378e-06, + "loss": 0.2613, + "step": 25344 + }, + { + "epoch": 1.187286269733452, + "grad_norm": 0.6176261048610733, + "learning_rate": 1.8701118900373044e-06, + "loss": 0.2818, + "step": 25345 + }, + { + "epoch": 1.1873331147233803, + "grad_norm": 0.5934961490384753, + "learning_rate": 1.8699283600253198e-06, + "loss": 0.2789, + "step": 25346 + }, + { + "epoch": 1.1873799597133088, + "grad_norm": 0.5646260841761703, + "learning_rate": 1.8697448336392382e-06, + "loss": 0.2595, + "step": 25347 + }, + { + "epoch": 1.187426804703237, + "grad_norm": 0.5857044588898389, + "learning_rate": 1.8695613108801167e-06, + "loss": 0.273, + "step": 25348 + }, + { + "epoch": 1.1874736496931653, + "grad_norm": 0.623549258306682, + "learning_rate": 1.8693777917490125e-06, + "loss": 0.2858, + "step": 25349 + }, + { + "epoch": 1.1875204946830937, + "grad_norm": 0.599774668857607, + "learning_rate": 1.86919427624698e-06, + "loss": 0.2704, + "step": 25350 + }, + { + "epoch": 1.187567339673022, + "grad_norm": 0.5719249621835665, + "learning_rate": 1.8690107643750754e-06, + "loss": 0.2742, + "step": 25351 + }, + { + "epoch": 1.1876141846629502, + "grad_norm": 0.5661806738934911, + "learning_rate": 1.8688272561343553e-06, + "loss": 0.2676, + "step": 25352 + }, + { + "epoch": 1.1876610296528787, + "grad_norm": 0.5710235930996164, + "learning_rate": 1.8686437515258766e-06, + "loss": 0.269, + "step": 25353 + }, + { + "epoch": 1.187707874642807, + "grad_norm": 0.5923877834865225, + "learning_rate": 1.8684602505506932e-06, + "loss": 0.2808, + "step": 25354 + }, + { + "epoch": 1.1877547196327352, + "grad_norm": 0.6228821409474026, + "learning_rate": 1.8682767532098639e-06, + "loss": 0.2639, + "step": 25355 + }, + { + "epoch": 1.1878015646226636, + "grad_norm": 0.5767156118035569, + "learning_rate": 1.8680932595044417e-06, + "loss": 0.2755, + "step": 25356 + }, + { + "epoch": 1.1878484096125919, + "grad_norm": 0.5799456513517878, + "learning_rate": 1.8679097694354847e-06, + "loss": 0.2746, + "step": 25357 + }, + { + "epoch": 1.1878952546025203, + "grad_norm": 0.6277073055786933, + "learning_rate": 1.8677262830040476e-06, + "loss": 0.2811, + "step": 25358 + }, + { + "epoch": 1.1879420995924486, + "grad_norm": 0.5816334499233039, + "learning_rate": 1.8675428002111873e-06, + "loss": 0.274, + "step": 25359 + }, + { + "epoch": 1.187988944582377, + "grad_norm": 0.5844403939802049, + "learning_rate": 1.867359321057959e-06, + "loss": 0.251, + "step": 25360 + }, + { + "epoch": 1.1880357895723053, + "grad_norm": 0.6190384939446323, + "learning_rate": 1.8671758455454192e-06, + "loss": 0.2827, + "step": 25361 + }, + { + "epoch": 1.1880826345622335, + "grad_norm": 0.6504712051743679, + "learning_rate": 1.8669923736746226e-06, + "loss": 0.2771, + "step": 25362 + }, + { + "epoch": 1.188129479552162, + "grad_norm": 0.5713498796379676, + "learning_rate": 1.866808905446626e-06, + "loss": 0.2778, + "step": 25363 + }, + { + "epoch": 1.1881763245420902, + "grad_norm": 0.60438629097798, + "learning_rate": 1.8666254408624852e-06, + "loss": 0.2755, + "step": 25364 + }, + { + "epoch": 1.1882231695320185, + "grad_norm": 0.58798652278013, + "learning_rate": 1.8664419799232553e-06, + "loss": 0.2734, + "step": 25365 + }, + { + "epoch": 1.188270014521947, + "grad_norm": 0.5823560854066028, + "learning_rate": 1.8662585226299934e-06, + "loss": 0.2799, + "step": 25366 + }, + { + "epoch": 1.1883168595118752, + "grad_norm": 0.6616116770446323, + "learning_rate": 1.8660750689837536e-06, + "loss": 0.2717, + "step": 25367 + }, + { + "epoch": 1.1883637045018036, + "grad_norm": 0.5752224326414341, + "learning_rate": 1.8658916189855936e-06, + "loss": 0.2805, + "step": 25368 + }, + { + "epoch": 1.1884105494917319, + "grad_norm": 0.5916287310048532, + "learning_rate": 1.865708172636567e-06, + "loss": 0.2765, + "step": 25369 + }, + { + "epoch": 1.18845739448166, + "grad_norm": 0.5917449228102644, + "learning_rate": 1.8655247299377311e-06, + "loss": 0.2649, + "step": 25370 + }, + { + "epoch": 1.1885042394715886, + "grad_norm": 0.5803602551964129, + "learning_rate": 1.86534129089014e-06, + "loss": 0.2624, + "step": 25371 + }, + { + "epoch": 1.1885510844615168, + "grad_norm": 0.625810135779301, + "learning_rate": 1.8651578554948503e-06, + "loss": 0.2885, + "step": 25372 + }, + { + "epoch": 1.1885979294514453, + "grad_norm": 0.610543358046971, + "learning_rate": 1.8649744237529183e-06, + "loss": 0.2795, + "step": 25373 + }, + { + "epoch": 1.1886447744413735, + "grad_norm": 0.5844555105461615, + "learning_rate": 1.8647909956653992e-06, + "loss": 0.2728, + "step": 25374 + }, + { + "epoch": 1.1886916194313017, + "grad_norm": 0.5927233138290461, + "learning_rate": 1.8646075712333476e-06, + "loss": 0.2818, + "step": 25375 + }, + { + "epoch": 1.1887384644212302, + "grad_norm": 0.6198098778938698, + "learning_rate": 1.8644241504578192e-06, + "loss": 0.2766, + "step": 25376 + }, + { + "epoch": 1.1887853094111585, + "grad_norm": 0.5806460220612815, + "learning_rate": 1.864240733339871e-06, + "loss": 0.2719, + "step": 25377 + }, + { + "epoch": 1.1888321544010867, + "grad_norm": 0.6126043213549697, + "learning_rate": 1.864057319880557e-06, + "loss": 0.2676, + "step": 25378 + }, + { + "epoch": 1.1888789993910152, + "grad_norm": 0.5752170480769081, + "learning_rate": 1.8638739100809332e-06, + "loss": 0.2682, + "step": 25379 + }, + { + "epoch": 1.1889258443809434, + "grad_norm": 0.6238366079972432, + "learning_rate": 1.863690503942056e-06, + "loss": 0.2828, + "step": 25380 + }, + { + "epoch": 1.1889726893708719, + "grad_norm": 0.5795102547233719, + "learning_rate": 1.86350710146498e-06, + "loss": 0.2738, + "step": 25381 + }, + { + "epoch": 1.1890195343608, + "grad_norm": 0.5556875190420828, + "learning_rate": 1.86332370265076e-06, + "loss": 0.2742, + "step": 25382 + }, + { + "epoch": 1.1890663793507286, + "grad_norm": 0.57416377871928, + "learning_rate": 1.8631403075004516e-06, + "loss": 0.2857, + "step": 25383 + }, + { + "epoch": 1.1891132243406568, + "grad_norm": 0.6184580342659738, + "learning_rate": 1.8629569160151117e-06, + "loss": 0.285, + "step": 25384 + }, + { + "epoch": 1.189160069330585, + "grad_norm": 0.6315285547183632, + "learning_rate": 1.8627735281957937e-06, + "loss": 0.2876, + "step": 25385 + }, + { + "epoch": 1.1892069143205135, + "grad_norm": 0.5959103047833223, + "learning_rate": 1.8625901440435553e-06, + "loss": 0.2644, + "step": 25386 + }, + { + "epoch": 1.1892537593104417, + "grad_norm": 0.535428535970807, + "learning_rate": 1.8624067635594494e-06, + "loss": 0.2487, + "step": 25387 + }, + { + "epoch": 1.18930060430037, + "grad_norm": 0.6157228294128527, + "learning_rate": 1.8622233867445327e-06, + "loss": 0.284, + "step": 25388 + }, + { + "epoch": 1.1893474492902985, + "grad_norm": 0.6382112838004257, + "learning_rate": 1.8620400135998595e-06, + "loss": 0.2645, + "step": 25389 + }, + { + "epoch": 1.1893942942802267, + "grad_norm": 0.6169118964630009, + "learning_rate": 1.8618566441264857e-06, + "loss": 0.297, + "step": 25390 + }, + { + "epoch": 1.189441139270155, + "grad_norm": 0.5800159676940382, + "learning_rate": 1.861673278325467e-06, + "loss": 0.2785, + "step": 25391 + }, + { + "epoch": 1.1894879842600834, + "grad_norm": 0.5914084861602404, + "learning_rate": 1.861489916197859e-06, + "loss": 0.2758, + "step": 25392 + }, + { + "epoch": 1.1895348292500116, + "grad_norm": 0.6101319505739413, + "learning_rate": 1.8613065577447146e-06, + "loss": 0.2779, + "step": 25393 + }, + { + "epoch": 1.18958167423994, + "grad_norm": 0.5570339163932002, + "learning_rate": 1.8611232029670905e-06, + "loss": 0.2721, + "step": 25394 + }, + { + "epoch": 1.1896285192298683, + "grad_norm": 0.5872040464651507, + "learning_rate": 1.8609398518660424e-06, + "loss": 0.2797, + "step": 25395 + }, + { + "epoch": 1.1896753642197968, + "grad_norm": 0.6034115553655898, + "learning_rate": 1.8607565044426243e-06, + "loss": 0.2736, + "step": 25396 + }, + { + "epoch": 1.189722209209725, + "grad_norm": 0.5632657884558768, + "learning_rate": 1.860573160697892e-06, + "loss": 0.2713, + "step": 25397 + }, + { + "epoch": 1.1897690541996533, + "grad_norm": 0.627958596402919, + "learning_rate": 1.8603898206329013e-06, + "loss": 0.3083, + "step": 25398 + }, + { + "epoch": 1.1898158991895817, + "grad_norm": 0.5785179355974792, + "learning_rate": 1.8602064842487058e-06, + "loss": 0.2691, + "step": 25399 + }, + { + "epoch": 1.18986274417951, + "grad_norm": 0.5854066047034565, + "learning_rate": 1.8600231515463606e-06, + "loss": 0.2581, + "step": 25400 + }, + { + "epoch": 1.1899095891694382, + "grad_norm": 0.6549292205194739, + "learning_rate": 1.8598398225269218e-06, + "loss": 0.2965, + "step": 25401 + }, + { + "epoch": 1.1899564341593667, + "grad_norm": 0.5348477042885076, + "learning_rate": 1.859656497191444e-06, + "loss": 0.2665, + "step": 25402 + }, + { + "epoch": 1.190003279149295, + "grad_norm": 0.6112222248045743, + "learning_rate": 1.8594731755409817e-06, + "loss": 0.2862, + "step": 25403 + }, + { + "epoch": 1.1900501241392234, + "grad_norm": 0.5393721659925366, + "learning_rate": 1.8592898575765917e-06, + "loss": 0.2659, + "step": 25404 + }, + { + "epoch": 1.1900969691291516, + "grad_norm": 0.5932932156899251, + "learning_rate": 1.859106543299326e-06, + "loss": 0.2622, + "step": 25405 + }, + { + "epoch": 1.1901438141190799, + "grad_norm": 0.5589368402402305, + "learning_rate": 1.8589232327102419e-06, + "loss": 0.2685, + "step": 25406 + }, + { + "epoch": 1.1901906591090083, + "grad_norm": 0.5854890394412178, + "learning_rate": 1.8587399258103928e-06, + "loss": 0.2736, + "step": 25407 + }, + { + "epoch": 1.1902375040989366, + "grad_norm": 0.574996388824612, + "learning_rate": 1.8585566226008344e-06, + "loss": 0.2702, + "step": 25408 + }, + { + "epoch": 1.190284349088865, + "grad_norm": 0.5439737459853601, + "learning_rate": 1.8583733230826215e-06, + "loss": 0.2509, + "step": 25409 + }, + { + "epoch": 1.1903311940787933, + "grad_norm": 0.546760149173142, + "learning_rate": 1.8581900272568096e-06, + "loss": 0.2681, + "step": 25410 + }, + { + "epoch": 1.1903780390687215, + "grad_norm": 0.611607844976269, + "learning_rate": 1.8580067351244518e-06, + "loss": 0.2726, + "step": 25411 + }, + { + "epoch": 1.19042488405865, + "grad_norm": 0.610401991573615, + "learning_rate": 1.8578234466866046e-06, + "loss": 0.266, + "step": 25412 + }, + { + "epoch": 1.1904717290485782, + "grad_norm": 0.5674615280555468, + "learning_rate": 1.8576401619443214e-06, + "loss": 0.2682, + "step": 25413 + }, + { + "epoch": 1.1905185740385065, + "grad_norm": 0.5784629861755249, + "learning_rate": 1.8574568808986574e-06, + "loss": 0.2564, + "step": 25414 + }, + { + "epoch": 1.190565419028435, + "grad_norm": 0.6222299423696774, + "learning_rate": 1.8572736035506682e-06, + "loss": 0.2938, + "step": 25415 + }, + { + "epoch": 1.1906122640183632, + "grad_norm": 0.5895752116987607, + "learning_rate": 1.8570903299014084e-06, + "loss": 0.2782, + "step": 25416 + }, + { + "epoch": 1.1906591090082916, + "grad_norm": 0.5458230580015921, + "learning_rate": 1.8569070599519312e-06, + "loss": 0.267, + "step": 25417 + }, + { + "epoch": 1.1907059539982199, + "grad_norm": 0.5944326649764852, + "learning_rate": 1.8567237937032923e-06, + "loss": 0.2676, + "step": 25418 + }, + { + "epoch": 1.1907527989881483, + "grad_norm": 0.5846848074914145, + "learning_rate": 1.8565405311565466e-06, + "loss": 0.2766, + "step": 25419 + }, + { + "epoch": 1.1907996439780766, + "grad_norm": 0.6264938053150203, + "learning_rate": 1.8563572723127476e-06, + "loss": 0.2793, + "step": 25420 + }, + { + "epoch": 1.1908464889680048, + "grad_norm": 0.606631667595456, + "learning_rate": 1.8561740171729514e-06, + "loss": 0.285, + "step": 25421 + }, + { + "epoch": 1.1908933339579333, + "grad_norm": 0.6126005578007022, + "learning_rate": 1.8559907657382127e-06, + "loss": 0.296, + "step": 25422 + }, + { + "epoch": 1.1909401789478615, + "grad_norm": 0.5925682442613589, + "learning_rate": 1.8558075180095846e-06, + "loss": 0.2794, + "step": 25423 + }, + { + "epoch": 1.1909870239377898, + "grad_norm": 0.592020270085716, + "learning_rate": 1.8556242739881219e-06, + "loss": 0.2832, + "step": 25424 + }, + { + "epoch": 1.1910338689277182, + "grad_norm": 0.6368301008031712, + "learning_rate": 1.8554410336748796e-06, + "loss": 0.2756, + "step": 25425 + }, + { + "epoch": 1.1910807139176465, + "grad_norm": 0.5982226068435014, + "learning_rate": 1.8552577970709127e-06, + "loss": 0.2774, + "step": 25426 + }, + { + "epoch": 1.1911275589075747, + "grad_norm": 0.6251091812943181, + "learning_rate": 1.8550745641772743e-06, + "loss": 0.2727, + "step": 25427 + }, + { + "epoch": 1.1911744038975032, + "grad_norm": 0.6259500586617703, + "learning_rate": 1.854891334995021e-06, + "loss": 0.2945, + "step": 25428 + }, + { + "epoch": 1.1912212488874314, + "grad_norm": 0.6233432953622003, + "learning_rate": 1.854708109525205e-06, + "loss": 0.2831, + "step": 25429 + }, + { + "epoch": 1.1912680938773599, + "grad_norm": 0.6082000320382409, + "learning_rate": 1.854524887768882e-06, + "loss": 0.2597, + "step": 25430 + }, + { + "epoch": 1.1913149388672881, + "grad_norm": 0.581081310599786, + "learning_rate": 1.8543416697271054e-06, + "loss": 0.2724, + "step": 25431 + }, + { + "epoch": 1.1913617838572166, + "grad_norm": 0.5971532438754995, + "learning_rate": 1.8541584554009303e-06, + "loss": 0.2899, + "step": 25432 + }, + { + "epoch": 1.1914086288471448, + "grad_norm": 0.6803229364035508, + "learning_rate": 1.8539752447914116e-06, + "loss": 0.2931, + "step": 25433 + }, + { + "epoch": 1.191455473837073, + "grad_norm": 0.6080928715921111, + "learning_rate": 1.8537920378996025e-06, + "loss": 0.2784, + "step": 25434 + }, + { + "epoch": 1.1915023188270015, + "grad_norm": 0.5346532877181446, + "learning_rate": 1.8536088347265586e-06, + "loss": 0.2527, + "step": 25435 + }, + { + "epoch": 1.1915491638169298, + "grad_norm": 0.632918173659925, + "learning_rate": 1.8534256352733327e-06, + "loss": 0.2928, + "step": 25436 + }, + { + "epoch": 1.191596008806858, + "grad_norm": 0.6035370638602298, + "learning_rate": 1.8532424395409798e-06, + "loss": 0.286, + "step": 25437 + }, + { + "epoch": 1.1916428537967865, + "grad_norm": 0.5693094990967404, + "learning_rate": 1.8530592475305539e-06, + "loss": 0.261, + "step": 25438 + }, + { + "epoch": 1.1916896987867147, + "grad_norm": 0.5311236017471849, + "learning_rate": 1.8528760592431094e-06, + "loss": 0.2655, + "step": 25439 + }, + { + "epoch": 1.1917365437766432, + "grad_norm": 0.5754327161613062, + "learning_rate": 1.8526928746797012e-06, + "loss": 0.2754, + "step": 25440 + }, + { + "epoch": 1.1917833887665714, + "grad_norm": 0.6136120387293453, + "learning_rate": 1.852509693841383e-06, + "loss": 0.2884, + "step": 25441 + }, + { + "epoch": 1.1918302337564997, + "grad_norm": 0.6116095307587276, + "learning_rate": 1.8523265167292082e-06, + "loss": 0.2705, + "step": 25442 + }, + { + "epoch": 1.1918770787464281, + "grad_norm": 0.579404037843793, + "learning_rate": 1.852143343344231e-06, + "loss": 0.2775, + "step": 25443 + }, + { + "epoch": 1.1919239237363564, + "grad_norm": 0.6121780030421938, + "learning_rate": 1.8519601736875069e-06, + "loss": 0.2927, + "step": 25444 + }, + { + "epoch": 1.1919707687262848, + "grad_norm": 0.5817916025111873, + "learning_rate": 1.8517770077600883e-06, + "loss": 0.2749, + "step": 25445 + }, + { + "epoch": 1.192017613716213, + "grad_norm": 0.6186649973611923, + "learning_rate": 1.85159384556303e-06, + "loss": 0.2711, + "step": 25446 + }, + { + "epoch": 1.1920644587061413, + "grad_norm": 0.572057200045901, + "learning_rate": 1.8514106870973875e-06, + "loss": 0.267, + "step": 25447 + }, + { + "epoch": 1.1921113036960698, + "grad_norm": 0.6487493673026903, + "learning_rate": 1.8512275323642132e-06, + "loss": 0.2904, + "step": 25448 + }, + { + "epoch": 1.192158148685998, + "grad_norm": 0.5843255988858329, + "learning_rate": 1.8510443813645603e-06, + "loss": 0.2786, + "step": 25449 + }, + { + "epoch": 1.1922049936759263, + "grad_norm": 0.5682650214134527, + "learning_rate": 1.8508612340994841e-06, + "loss": 0.2686, + "step": 25450 + }, + { + "epoch": 1.1922518386658547, + "grad_norm": 0.5790765099341334, + "learning_rate": 1.8506780905700388e-06, + "loss": 0.2611, + "step": 25451 + }, + { + "epoch": 1.192298683655783, + "grad_norm": 0.599003467712413, + "learning_rate": 1.8504949507772777e-06, + "loss": 0.281, + "step": 25452 + }, + { + "epoch": 1.1923455286457114, + "grad_norm": 0.6172381248216205, + "learning_rate": 1.8503118147222556e-06, + "loss": 0.283, + "step": 25453 + }, + { + "epoch": 1.1923923736356397, + "grad_norm": 0.5790367474042294, + "learning_rate": 1.850128682406025e-06, + "loss": 0.2823, + "step": 25454 + }, + { + "epoch": 1.1924392186255681, + "grad_norm": 0.6313913467414606, + "learning_rate": 1.8499455538296406e-06, + "loss": 0.3028, + "step": 25455 + }, + { + "epoch": 1.1924860636154964, + "grad_norm": 0.5899905012864815, + "learning_rate": 1.849762428994156e-06, + "loss": 0.292, + "step": 25456 + }, + { + "epoch": 1.1925329086054246, + "grad_norm": 0.605906032373125, + "learning_rate": 1.8495793079006254e-06, + "loss": 0.2726, + "step": 25457 + }, + { + "epoch": 1.192579753595353, + "grad_norm": 0.5696650276189844, + "learning_rate": 1.8493961905501021e-06, + "loss": 0.2684, + "step": 25458 + }, + { + "epoch": 1.1926265985852813, + "grad_norm": 0.6263323764380307, + "learning_rate": 1.849213076943641e-06, + "loss": 0.2778, + "step": 25459 + }, + { + "epoch": 1.1926734435752095, + "grad_norm": 0.6187382023552817, + "learning_rate": 1.8490299670822945e-06, + "loss": 0.2848, + "step": 25460 + }, + { + "epoch": 1.192720288565138, + "grad_norm": 0.5602364491223432, + "learning_rate": 1.848846860967117e-06, + "loss": 0.2586, + "step": 25461 + }, + { + "epoch": 1.1927671335550663, + "grad_norm": 0.6728120880054662, + "learning_rate": 1.848663758599162e-06, + "loss": 0.2938, + "step": 25462 + }, + { + "epoch": 1.1928139785449945, + "grad_norm": 0.5432121139323961, + "learning_rate": 1.848480659979483e-06, + "loss": 0.2591, + "step": 25463 + }, + { + "epoch": 1.192860823534923, + "grad_norm": 0.6006437236820168, + "learning_rate": 1.8482975651091348e-06, + "loss": 0.2802, + "step": 25464 + }, + { + "epoch": 1.1929076685248512, + "grad_norm": 0.5957309383333526, + "learning_rate": 1.8481144739891706e-06, + "loss": 0.2696, + "step": 25465 + }, + { + "epoch": 1.1929545135147797, + "grad_norm": 0.6314870056535624, + "learning_rate": 1.8479313866206427e-06, + "loss": 0.2827, + "step": 25466 + }, + { + "epoch": 1.193001358504708, + "grad_norm": 0.5700496878681173, + "learning_rate": 1.8477483030046063e-06, + "loss": 0.2635, + "step": 25467 + }, + { + "epoch": 1.1930482034946364, + "grad_norm": 0.5981488739352452, + "learning_rate": 1.8475652231421146e-06, + "loss": 0.2752, + "step": 25468 + }, + { + "epoch": 1.1930950484845646, + "grad_norm": 0.5795239315337612, + "learning_rate": 1.8473821470342208e-06, + "loss": 0.2554, + "step": 25469 + }, + { + "epoch": 1.1931418934744928, + "grad_norm": 0.5794007490079058, + "learning_rate": 1.847199074681978e-06, + "loss": 0.2747, + "step": 25470 + }, + { + "epoch": 1.1931887384644213, + "grad_norm": 0.6313053103430919, + "learning_rate": 1.8470160060864424e-06, + "loss": 0.2785, + "step": 25471 + }, + { + "epoch": 1.1932355834543495, + "grad_norm": 0.5998278899587044, + "learning_rate": 1.8468329412486647e-06, + "loss": 0.2759, + "step": 25472 + }, + { + "epoch": 1.1932824284442778, + "grad_norm": 0.6073975041127359, + "learning_rate": 1.8466498801696985e-06, + "loss": 0.2693, + "step": 25473 + }, + { + "epoch": 1.1933292734342063, + "grad_norm": 0.6353035081581055, + "learning_rate": 1.846466822850598e-06, + "loss": 0.2806, + "step": 25474 + }, + { + "epoch": 1.1933761184241345, + "grad_norm": 0.6306322223923949, + "learning_rate": 1.8462837692924173e-06, + "loss": 0.2787, + "step": 25475 + }, + { + "epoch": 1.193422963414063, + "grad_norm": 0.6279559391085642, + "learning_rate": 1.8461007194962088e-06, + "loss": 0.2778, + "step": 25476 + }, + { + "epoch": 1.1934698084039912, + "grad_norm": 0.5676633215389023, + "learning_rate": 1.8459176734630274e-06, + "loss": 0.2727, + "step": 25477 + }, + { + "epoch": 1.1935166533939194, + "grad_norm": 0.6036972434701653, + "learning_rate": 1.8457346311939245e-06, + "loss": 0.2705, + "step": 25478 + }, + { + "epoch": 1.193563498383848, + "grad_norm": 0.5971659700394247, + "learning_rate": 1.8455515926899544e-06, + "loss": 0.2907, + "step": 25479 + }, + { + "epoch": 1.1936103433737761, + "grad_norm": 0.5610662320292571, + "learning_rate": 1.8453685579521702e-06, + "loss": 0.2749, + "step": 25480 + }, + { + "epoch": 1.1936571883637046, + "grad_norm": 0.5974510995695623, + "learning_rate": 1.8451855269816254e-06, + "loss": 0.2705, + "step": 25481 + }, + { + "epoch": 1.1937040333536328, + "grad_norm": 0.5137374655925464, + "learning_rate": 1.8450024997793736e-06, + "loss": 0.2609, + "step": 25482 + }, + { + "epoch": 1.193750878343561, + "grad_norm": 0.5747720536393657, + "learning_rate": 1.8448194763464685e-06, + "loss": 0.2658, + "step": 25483 + }, + { + "epoch": 1.1937977233334895, + "grad_norm": 0.5883998127124466, + "learning_rate": 1.8446364566839617e-06, + "loss": 0.2754, + "step": 25484 + }, + { + "epoch": 1.1938445683234178, + "grad_norm": 0.5949005424348526, + "learning_rate": 1.844453440792907e-06, + "loss": 0.2784, + "step": 25485 + }, + { + "epoch": 1.193891413313346, + "grad_norm": 0.5696647479625347, + "learning_rate": 1.844270428674359e-06, + "loss": 0.2657, + "step": 25486 + }, + { + "epoch": 1.1939382583032745, + "grad_norm": 0.6215840052514681, + "learning_rate": 1.8440874203293691e-06, + "loss": 0.2826, + "step": 25487 + }, + { + "epoch": 1.1939851032932027, + "grad_norm": 0.5549148858259627, + "learning_rate": 1.8439044157589911e-06, + "loss": 0.2568, + "step": 25488 + }, + { + "epoch": 1.1940319482831312, + "grad_norm": 0.5987242001118209, + "learning_rate": 1.8437214149642797e-06, + "loss": 0.2935, + "step": 25489 + }, + { + "epoch": 1.1940787932730594, + "grad_norm": 0.5426949283301276, + "learning_rate": 1.8435384179462857e-06, + "loss": 0.2606, + "step": 25490 + }, + { + "epoch": 1.1941256382629877, + "grad_norm": 0.5567721239331466, + "learning_rate": 1.8433554247060626e-06, + "loss": 0.2682, + "step": 25491 + }, + { + "epoch": 1.1941724832529161, + "grad_norm": 0.5724167110687545, + "learning_rate": 1.8431724352446642e-06, + "loss": 0.2751, + "step": 25492 + }, + { + "epoch": 1.1942193282428444, + "grad_norm": 0.5713805834960266, + "learning_rate": 1.842989449563144e-06, + "loss": 0.2738, + "step": 25493 + }, + { + "epoch": 1.1942661732327728, + "grad_norm": 0.5419771175172189, + "learning_rate": 1.8428064676625534e-06, + "loss": 0.2578, + "step": 25494 + }, + { + "epoch": 1.194313018222701, + "grad_norm": 0.6033017804631059, + "learning_rate": 1.8426234895439477e-06, + "loss": 0.265, + "step": 25495 + }, + { + "epoch": 1.1943598632126293, + "grad_norm": 0.5435663146470796, + "learning_rate": 1.8424405152083773e-06, + "loss": 0.2576, + "step": 25496 + }, + { + "epoch": 1.1944067082025578, + "grad_norm": 0.5911447225171637, + "learning_rate": 1.8422575446568974e-06, + "loss": 0.2695, + "step": 25497 + }, + { + "epoch": 1.194453553192486, + "grad_norm": 0.5636985234598881, + "learning_rate": 1.8420745778905598e-06, + "loss": 0.2695, + "step": 25498 + }, + { + "epoch": 1.1945003981824143, + "grad_norm": 0.605203431584135, + "learning_rate": 1.8418916149104174e-06, + "loss": 0.2671, + "step": 25499 + }, + { + "epoch": 1.1945472431723427, + "grad_norm": 0.5905314188879953, + "learning_rate": 1.841708655717523e-06, + "loss": 0.2803, + "step": 25500 + }, + { + "epoch": 1.194594088162271, + "grad_norm": 0.6157597368726907, + "learning_rate": 1.8415257003129312e-06, + "loss": 0.2922, + "step": 25501 + }, + { + "epoch": 1.1946409331521994, + "grad_norm": 0.5661610036054566, + "learning_rate": 1.841342748697692e-06, + "loss": 0.2689, + "step": 25502 + }, + { + "epoch": 1.1946877781421277, + "grad_norm": 0.5869349584778428, + "learning_rate": 1.8411598008728604e-06, + "loss": 0.2934, + "step": 25503 + }, + { + "epoch": 1.1947346231320561, + "grad_norm": 0.5867593038746892, + "learning_rate": 1.8409768568394881e-06, + "loss": 0.286, + "step": 25504 + }, + { + "epoch": 1.1947814681219844, + "grad_norm": 0.5829039897485484, + "learning_rate": 1.8407939165986286e-06, + "loss": 0.2668, + "step": 25505 + }, + { + "epoch": 1.1948283131119126, + "grad_norm": 0.5794330664806732, + "learning_rate": 1.8406109801513347e-06, + "loss": 0.2771, + "step": 25506 + }, + { + "epoch": 1.194875158101841, + "grad_norm": 0.6274083523564518, + "learning_rate": 1.8404280474986587e-06, + "loss": 0.2817, + "step": 25507 + }, + { + "epoch": 1.1949220030917693, + "grad_norm": 0.5918402484352013, + "learning_rate": 1.8402451186416539e-06, + "loss": 0.2721, + "step": 25508 + }, + { + "epoch": 1.1949688480816976, + "grad_norm": 0.5983636133850224, + "learning_rate": 1.8400621935813718e-06, + "loss": 0.2859, + "step": 25509 + }, + { + "epoch": 1.195015693071626, + "grad_norm": 0.5863987988754137, + "learning_rate": 1.8398792723188665e-06, + "loss": 0.2658, + "step": 25510 + }, + { + "epoch": 1.1950625380615543, + "grad_norm": 0.5977080610791129, + "learning_rate": 1.8396963548551897e-06, + "loss": 0.2933, + "step": 25511 + }, + { + "epoch": 1.1951093830514827, + "grad_norm": 0.546186437305947, + "learning_rate": 1.8395134411913946e-06, + "loss": 0.2514, + "step": 25512 + }, + { + "epoch": 1.195156228041411, + "grad_norm": 0.6104047510702968, + "learning_rate": 1.8393305313285336e-06, + "loss": 0.3057, + "step": 25513 + }, + { + "epoch": 1.1952030730313392, + "grad_norm": 0.5554130726742676, + "learning_rate": 1.8391476252676603e-06, + "loss": 0.2557, + "step": 25514 + }, + { + "epoch": 1.1952499180212677, + "grad_norm": 0.6034803742075024, + "learning_rate": 1.838964723009825e-06, + "loss": 0.2687, + "step": 25515 + }, + { + "epoch": 1.195296763011196, + "grad_norm": 0.5717843239238443, + "learning_rate": 1.8387818245560816e-06, + "loss": 0.26, + "step": 25516 + }, + { + "epoch": 1.1953436080011244, + "grad_norm": 0.6260191395453357, + "learning_rate": 1.838598929907483e-06, + "loss": 0.2935, + "step": 25517 + }, + { + "epoch": 1.1953904529910526, + "grad_norm": 0.5755009182208878, + "learning_rate": 1.8384160390650812e-06, + "loss": 0.2725, + "step": 25518 + }, + { + "epoch": 1.1954372979809809, + "grad_norm": 0.5690795771250806, + "learning_rate": 1.8382331520299284e-06, + "loss": 0.2662, + "step": 25519 + }, + { + "epoch": 1.1954841429709093, + "grad_norm": 0.6343250697602265, + "learning_rate": 1.8380502688030788e-06, + "loss": 0.2991, + "step": 25520 + }, + { + "epoch": 1.1955309879608376, + "grad_norm": 0.6431174048145055, + "learning_rate": 1.8378673893855831e-06, + "loss": 0.2939, + "step": 25521 + }, + { + "epoch": 1.1955778329507658, + "grad_norm": 0.5921799921974504, + "learning_rate": 1.8376845137784934e-06, + "loss": 0.2844, + "step": 25522 + }, + { + "epoch": 1.1956246779406943, + "grad_norm": 0.5228258594734824, + "learning_rate": 1.8375016419828628e-06, + "loss": 0.2628, + "step": 25523 + }, + { + "epoch": 1.1956715229306225, + "grad_norm": 0.599445674158046, + "learning_rate": 1.8373187739997445e-06, + "loss": 0.2706, + "step": 25524 + }, + { + "epoch": 1.195718367920551, + "grad_norm": 0.5724808506899148, + "learning_rate": 1.8371359098301894e-06, + "loss": 0.2736, + "step": 25525 + }, + { + "epoch": 1.1957652129104792, + "grad_norm": 0.5809551797763508, + "learning_rate": 1.836953049475252e-06, + "loss": 0.2625, + "step": 25526 + }, + { + "epoch": 1.1958120579004075, + "grad_norm": 0.6296771863545085, + "learning_rate": 1.8367701929359816e-06, + "loss": 0.2896, + "step": 25527 + }, + { + "epoch": 1.195858902890336, + "grad_norm": 0.627683489160719, + "learning_rate": 1.8365873402134326e-06, + "loss": 0.2767, + "step": 25528 + }, + { + "epoch": 1.1959057478802642, + "grad_norm": 0.5869107126263285, + "learning_rate": 1.8364044913086565e-06, + "loss": 0.2767, + "step": 25529 + }, + { + "epoch": 1.1959525928701926, + "grad_norm": 0.6094233034505617, + "learning_rate": 1.8362216462227056e-06, + "loss": 0.2715, + "step": 25530 + }, + { + "epoch": 1.1959994378601209, + "grad_norm": 0.5977081991517247, + "learning_rate": 1.8360388049566325e-06, + "loss": 0.2944, + "step": 25531 + }, + { + "epoch": 1.196046282850049, + "grad_norm": 0.5991716616388887, + "learning_rate": 1.83585596751149e-06, + "loss": 0.2888, + "step": 25532 + }, + { + "epoch": 1.1960931278399776, + "grad_norm": 0.5858434784801421, + "learning_rate": 1.8356731338883285e-06, + "loss": 0.2722, + "step": 25533 + }, + { + "epoch": 1.1961399728299058, + "grad_norm": 0.588187630896487, + "learning_rate": 1.8354903040882011e-06, + "loss": 0.284, + "step": 25534 + }, + { + "epoch": 1.196186817819834, + "grad_norm": 0.6198472790181392, + "learning_rate": 1.8353074781121606e-06, + "loss": 0.2918, + "step": 25535 + }, + { + "epoch": 1.1962336628097625, + "grad_norm": 0.5752085127606694, + "learning_rate": 1.8351246559612579e-06, + "loss": 0.2682, + "step": 25536 + }, + { + "epoch": 1.1962805077996908, + "grad_norm": 0.5770471142927006, + "learning_rate": 1.8349418376365455e-06, + "loss": 0.2734, + "step": 25537 + }, + { + "epoch": 1.1963273527896192, + "grad_norm": 0.6037063732339676, + "learning_rate": 1.834759023139077e-06, + "loss": 0.2799, + "step": 25538 + }, + { + "epoch": 1.1963741977795475, + "grad_norm": 0.550803143234568, + "learning_rate": 1.8345762124699024e-06, + "loss": 0.2683, + "step": 25539 + }, + { + "epoch": 1.196421042769476, + "grad_norm": 0.5442724725783847, + "learning_rate": 1.8343934056300738e-06, + "loss": 0.2653, + "step": 25540 + }, + { + "epoch": 1.1964678877594042, + "grad_norm": 0.5866986772126385, + "learning_rate": 1.8342106026206442e-06, + "loss": 0.2678, + "step": 25541 + }, + { + "epoch": 1.1965147327493324, + "grad_norm": 0.5660110833009074, + "learning_rate": 1.8340278034426657e-06, + "loss": 0.2649, + "step": 25542 + }, + { + "epoch": 1.1965615777392609, + "grad_norm": 0.5998471042443426, + "learning_rate": 1.8338450080971893e-06, + "loss": 0.2661, + "step": 25543 + }, + { + "epoch": 1.196608422729189, + "grad_norm": 0.5798427312667497, + "learning_rate": 1.8336622165852683e-06, + "loss": 0.2648, + "step": 25544 + }, + { + "epoch": 1.1966552677191173, + "grad_norm": 0.6576828190499137, + "learning_rate": 1.8334794289079526e-06, + "loss": 0.2897, + "step": 25545 + }, + { + "epoch": 1.1967021127090458, + "grad_norm": 0.6364396694296345, + "learning_rate": 1.833296645066296e-06, + "loss": 0.2952, + "step": 25546 + }, + { + "epoch": 1.196748957698974, + "grad_norm": 0.5922659383941297, + "learning_rate": 1.8331138650613494e-06, + "loss": 0.2649, + "step": 25547 + }, + { + "epoch": 1.1967958026889025, + "grad_norm": 0.5889565936541286, + "learning_rate": 1.8329310888941648e-06, + "loss": 0.2633, + "step": 25548 + }, + { + "epoch": 1.1968426476788308, + "grad_norm": 0.585242532463909, + "learning_rate": 1.832748316565794e-06, + "loss": 0.2622, + "step": 25549 + }, + { + "epoch": 1.196889492668759, + "grad_norm": 0.5427164967297073, + "learning_rate": 1.83256554807729e-06, + "loss": 0.2581, + "step": 25550 + }, + { + "epoch": 1.1969363376586875, + "grad_norm": 0.6055971738391542, + "learning_rate": 1.8323827834297025e-06, + "loss": 0.2967, + "step": 25551 + }, + { + "epoch": 1.1969831826486157, + "grad_norm": 0.590185959625524, + "learning_rate": 1.8322000226240846e-06, + "loss": 0.2684, + "step": 25552 + }, + { + "epoch": 1.1970300276385442, + "grad_norm": 0.6134876290628207, + "learning_rate": 1.832017265661487e-06, + "loss": 0.2832, + "step": 25553 + }, + { + "epoch": 1.1970768726284724, + "grad_norm": 0.630833801763123, + "learning_rate": 1.8318345125429625e-06, + "loss": 0.2805, + "step": 25554 + }, + { + "epoch": 1.1971237176184006, + "grad_norm": 0.6037744182420101, + "learning_rate": 1.831651763269563e-06, + "loss": 0.2749, + "step": 25555 + }, + { + "epoch": 1.197170562608329, + "grad_norm": 0.6363256528124224, + "learning_rate": 1.83146901784234e-06, + "loss": 0.2981, + "step": 25556 + }, + { + "epoch": 1.1972174075982573, + "grad_norm": 0.59702859482333, + "learning_rate": 1.831286276262344e-06, + "loss": 0.2924, + "step": 25557 + }, + { + "epoch": 1.1972642525881856, + "grad_norm": 0.597298274245846, + "learning_rate": 1.831103538530627e-06, + "loss": 0.2976, + "step": 25558 + }, + { + "epoch": 1.197311097578114, + "grad_norm": 0.613946621008271, + "learning_rate": 1.830920804648242e-06, + "loss": 0.2626, + "step": 25559 + }, + { + "epoch": 1.1973579425680423, + "grad_norm": 0.5919533848273718, + "learning_rate": 1.8307380746162384e-06, + "loss": 0.2688, + "step": 25560 + }, + { + "epoch": 1.1974047875579708, + "grad_norm": 0.5522967158472606, + "learning_rate": 1.8305553484356694e-06, + "loss": 0.2648, + "step": 25561 + }, + { + "epoch": 1.197451632547899, + "grad_norm": 0.6027694441971448, + "learning_rate": 1.830372626107587e-06, + "loss": 0.2774, + "step": 25562 + }, + { + "epoch": 1.1974984775378272, + "grad_norm": 0.6024391547519046, + "learning_rate": 1.8301899076330415e-06, + "loss": 0.2656, + "step": 25563 + }, + { + "epoch": 1.1975453225277557, + "grad_norm": 0.6431316462783094, + "learning_rate": 1.8300071930130841e-06, + "loss": 0.2742, + "step": 25564 + }, + { + "epoch": 1.197592167517684, + "grad_norm": 0.6126550779619301, + "learning_rate": 1.8298244822487671e-06, + "loss": 0.2847, + "step": 25565 + }, + { + "epoch": 1.1976390125076124, + "grad_norm": 0.6041848720950462, + "learning_rate": 1.8296417753411421e-06, + "loss": 0.2842, + "step": 25566 + }, + { + "epoch": 1.1976858574975406, + "grad_norm": 0.6087497508074478, + "learning_rate": 1.8294590722912597e-06, + "loss": 0.2696, + "step": 25567 + }, + { + "epoch": 1.1977327024874689, + "grad_norm": 0.5780229866325217, + "learning_rate": 1.8292763731001728e-06, + "loss": 0.2719, + "step": 25568 + }, + { + "epoch": 1.1977795474773973, + "grad_norm": 0.5638143010623241, + "learning_rate": 1.8290936777689308e-06, + "loss": 0.2612, + "step": 25569 + }, + { + "epoch": 1.1978263924673256, + "grad_norm": 0.6049703456131434, + "learning_rate": 1.8289109862985865e-06, + "loss": 0.2946, + "step": 25570 + }, + { + "epoch": 1.1978732374572538, + "grad_norm": 0.5762088465831445, + "learning_rate": 1.8287282986901905e-06, + "loss": 0.2721, + "step": 25571 + }, + { + "epoch": 1.1979200824471823, + "grad_norm": 0.5678923151283325, + "learning_rate": 1.8285456149447943e-06, + "loss": 0.275, + "step": 25572 + }, + { + "epoch": 1.1979669274371105, + "grad_norm": 0.5925553570580993, + "learning_rate": 1.8283629350634496e-06, + "loss": 0.2716, + "step": 25573 + }, + { + "epoch": 1.198013772427039, + "grad_norm": 0.5831128816640291, + "learning_rate": 1.8281802590472073e-06, + "loss": 0.2668, + "step": 25574 + }, + { + "epoch": 1.1980606174169672, + "grad_norm": 0.6001059106179082, + "learning_rate": 1.8279975868971195e-06, + "loss": 0.2928, + "step": 25575 + }, + { + "epoch": 1.1981074624068957, + "grad_norm": 0.5699914506372917, + "learning_rate": 1.8278149186142357e-06, + "loss": 0.2684, + "step": 25576 + }, + { + "epoch": 1.198154307396824, + "grad_norm": 0.5564177789302394, + "learning_rate": 1.8276322541996088e-06, + "loss": 0.2624, + "step": 25577 + }, + { + "epoch": 1.1982011523867522, + "grad_norm": 0.589344014916764, + "learning_rate": 1.8274495936542886e-06, + "loss": 0.2756, + "step": 25578 + }, + { + "epoch": 1.1982479973766806, + "grad_norm": 0.6194786448726726, + "learning_rate": 1.827266936979327e-06, + "loss": 0.2939, + "step": 25579 + }, + { + "epoch": 1.1982948423666089, + "grad_norm": 0.5952820387575063, + "learning_rate": 1.8270842841757754e-06, + "loss": 0.2787, + "step": 25580 + }, + { + "epoch": 1.1983416873565371, + "grad_norm": 0.6196802313499801, + "learning_rate": 1.8269016352446855e-06, + "loss": 0.2901, + "step": 25581 + }, + { + "epoch": 1.1983885323464656, + "grad_norm": 0.580848502892328, + "learning_rate": 1.8267189901871063e-06, + "loss": 0.2803, + "step": 25582 + }, + { + "epoch": 1.1984353773363938, + "grad_norm": 0.6017526311053337, + "learning_rate": 1.82653634900409e-06, + "loss": 0.2851, + "step": 25583 + }, + { + "epoch": 1.1984822223263223, + "grad_norm": 0.5742638835647451, + "learning_rate": 1.8263537116966883e-06, + "loss": 0.2667, + "step": 25584 + }, + { + "epoch": 1.1985290673162505, + "grad_norm": 0.6084888202974509, + "learning_rate": 1.826171078265951e-06, + "loss": 0.2818, + "step": 25585 + }, + { + "epoch": 1.1985759123061788, + "grad_norm": 0.5630397016615638, + "learning_rate": 1.8259884487129303e-06, + "loss": 0.2814, + "step": 25586 + }, + { + "epoch": 1.1986227572961072, + "grad_norm": 0.6034415314745094, + "learning_rate": 1.8258058230386772e-06, + "loss": 0.2673, + "step": 25587 + }, + { + "epoch": 1.1986696022860355, + "grad_norm": 0.6057502555390805, + "learning_rate": 1.8256232012442419e-06, + "loss": 0.2787, + "step": 25588 + }, + { + "epoch": 1.198716447275964, + "grad_norm": 0.5748972819603473, + "learning_rate": 1.825440583330675e-06, + "loss": 0.2568, + "step": 25589 + }, + { + "epoch": 1.1987632922658922, + "grad_norm": 0.6204422195402679, + "learning_rate": 1.825257969299028e-06, + "loss": 0.285, + "step": 25590 + }, + { + "epoch": 1.1988101372558204, + "grad_norm": 0.6387779103079021, + "learning_rate": 1.8250753591503524e-06, + "loss": 0.2797, + "step": 25591 + }, + { + "epoch": 1.1988569822457489, + "grad_norm": 0.5818109209697513, + "learning_rate": 1.8248927528856975e-06, + "loss": 0.259, + "step": 25592 + }, + { + "epoch": 1.1989038272356771, + "grad_norm": 0.6354645164960023, + "learning_rate": 1.8247101505061166e-06, + "loss": 0.2791, + "step": 25593 + }, + { + "epoch": 1.1989506722256054, + "grad_norm": 0.5761312509220298, + "learning_rate": 1.824527552012658e-06, + "loss": 0.269, + "step": 25594 + }, + { + "epoch": 1.1989975172155338, + "grad_norm": 0.5954111310280482, + "learning_rate": 1.8243449574063738e-06, + "loss": 0.2748, + "step": 25595 + }, + { + "epoch": 1.199044362205462, + "grad_norm": 0.6111194901088853, + "learning_rate": 1.8241623666883143e-06, + "loss": 0.2668, + "step": 25596 + }, + { + "epoch": 1.1990912071953905, + "grad_norm": 0.608284249738902, + "learning_rate": 1.823979779859531e-06, + "loss": 0.271, + "step": 25597 + }, + { + "epoch": 1.1991380521853188, + "grad_norm": 0.6037946691883536, + "learning_rate": 1.8237971969210736e-06, + "loss": 0.2817, + "step": 25598 + }, + { + "epoch": 1.199184897175247, + "grad_norm": 0.6125089551795538, + "learning_rate": 1.8236146178739944e-06, + "loss": 0.2739, + "step": 25599 + }, + { + "epoch": 1.1992317421651755, + "grad_norm": 0.6127845266424611, + "learning_rate": 1.8234320427193418e-06, + "loss": 0.2717, + "step": 25600 + }, + { + "epoch": 1.1992785871551037, + "grad_norm": 0.5986775889124591, + "learning_rate": 1.823249471458169e-06, + "loss": 0.2695, + "step": 25601 + }, + { + "epoch": 1.1993254321450322, + "grad_norm": 0.5937869253330268, + "learning_rate": 1.8230669040915242e-06, + "loss": 0.2865, + "step": 25602 + }, + { + "epoch": 1.1993722771349604, + "grad_norm": 0.57909108458385, + "learning_rate": 1.8228843406204594e-06, + "loss": 0.2623, + "step": 25603 + }, + { + "epoch": 1.1994191221248887, + "grad_norm": 0.6443340227876581, + "learning_rate": 1.8227017810460254e-06, + "loss": 0.2613, + "step": 25604 + }, + { + "epoch": 1.1994659671148171, + "grad_norm": 0.5768436633175281, + "learning_rate": 1.822519225369273e-06, + "loss": 0.277, + "step": 25605 + }, + { + "epoch": 1.1995128121047454, + "grad_norm": 0.585933181398003, + "learning_rate": 1.8223366735912513e-06, + "loss": 0.2783, + "step": 25606 + }, + { + "epoch": 1.1995596570946736, + "grad_norm": 0.5783200044073312, + "learning_rate": 1.8221541257130118e-06, + "loss": 0.2744, + "step": 25607 + }, + { + "epoch": 1.199606502084602, + "grad_norm": 0.5394564513816038, + "learning_rate": 1.8219715817356054e-06, + "loss": 0.2602, + "step": 25608 + }, + { + "epoch": 1.1996533470745303, + "grad_norm": 0.5983608793688057, + "learning_rate": 1.8217890416600814e-06, + "loss": 0.2604, + "step": 25609 + }, + { + "epoch": 1.1997001920644588, + "grad_norm": 0.5923573026033266, + "learning_rate": 1.821606505487491e-06, + "loss": 0.2668, + "step": 25610 + }, + { + "epoch": 1.199747037054387, + "grad_norm": 0.6355761472700794, + "learning_rate": 1.8214239732188859e-06, + "loss": 0.283, + "step": 25611 + }, + { + "epoch": 1.1997938820443155, + "grad_norm": 0.5855672899467506, + "learning_rate": 1.8212414448553148e-06, + "loss": 0.285, + "step": 25612 + }, + { + "epoch": 1.1998407270342437, + "grad_norm": 0.5369069174008785, + "learning_rate": 1.821058920397828e-06, + "loss": 0.262, + "step": 25613 + }, + { + "epoch": 1.199887572024172, + "grad_norm": 0.6073852281264203, + "learning_rate": 1.8208763998474762e-06, + "loss": 0.3008, + "step": 25614 + }, + { + "epoch": 1.1999344170141004, + "grad_norm": 0.5478515553597112, + "learning_rate": 1.820693883205311e-06, + "loss": 0.2566, + "step": 25615 + }, + { + "epoch": 1.1999812620040287, + "grad_norm": 0.6105144521569145, + "learning_rate": 1.8205113704723809e-06, + "loss": 0.2852, + "step": 25616 + }, + { + "epoch": 1.200028106993957, + "grad_norm": 0.6074201040891756, + "learning_rate": 1.8203288616497384e-06, + "loss": 0.2876, + "step": 25617 + }, + { + "epoch": 1.2000749519838854, + "grad_norm": 0.5760245416169406, + "learning_rate": 1.8201463567384308e-06, + "loss": 0.2662, + "step": 25618 + }, + { + "epoch": 1.2001217969738136, + "grad_norm": 0.5561641217697788, + "learning_rate": 1.8199638557395114e-06, + "loss": 0.2733, + "step": 25619 + }, + { + "epoch": 1.200168641963742, + "grad_norm": 0.5828328124550203, + "learning_rate": 1.819781358654028e-06, + "loss": 0.2685, + "step": 25620 + }, + { + "epoch": 1.2002154869536703, + "grad_norm": 0.5987313781353131, + "learning_rate": 1.819598865483032e-06, + "loss": 0.2782, + "step": 25621 + }, + { + "epoch": 1.2002623319435985, + "grad_norm": 0.5464549968266418, + "learning_rate": 1.8194163762275743e-06, + "loss": 0.2545, + "step": 25622 + }, + { + "epoch": 1.200309176933527, + "grad_norm": 0.6703540135322512, + "learning_rate": 1.8192338908887046e-06, + "loss": 0.2775, + "step": 25623 + }, + { + "epoch": 1.2003560219234553, + "grad_norm": 0.6259050679306182, + "learning_rate": 1.8190514094674719e-06, + "loss": 0.2809, + "step": 25624 + }, + { + "epoch": 1.2004028669133837, + "grad_norm": 0.6035188658377739, + "learning_rate": 1.8188689319649271e-06, + "loss": 0.2757, + "step": 25625 + }, + { + "epoch": 1.200449711903312, + "grad_norm": 0.6121611098860735, + "learning_rate": 1.8186864583821208e-06, + "loss": 0.2784, + "step": 25626 + }, + { + "epoch": 1.2004965568932402, + "grad_norm": 0.5611109201310737, + "learning_rate": 1.8185039887201022e-06, + "loss": 0.2729, + "step": 25627 + }, + { + "epoch": 1.2005434018831687, + "grad_norm": 0.5943432477456827, + "learning_rate": 1.818321522979922e-06, + "loss": 0.284, + "step": 25628 + }, + { + "epoch": 1.200590246873097, + "grad_norm": 0.5284191686800085, + "learning_rate": 1.8181390611626309e-06, + "loss": 0.2453, + "step": 25629 + }, + { + "epoch": 1.2006370918630251, + "grad_norm": 0.6333349485547929, + "learning_rate": 1.817956603269278e-06, + "loss": 0.3039, + "step": 25630 + }, + { + "epoch": 1.2006839368529536, + "grad_norm": 0.5874274489139091, + "learning_rate": 1.8177741493009126e-06, + "loss": 0.2824, + "step": 25631 + }, + { + "epoch": 1.2007307818428818, + "grad_norm": 0.5965726368814048, + "learning_rate": 1.8175916992585854e-06, + "loss": 0.2702, + "step": 25632 + }, + { + "epoch": 1.2007776268328103, + "grad_norm": 0.6019616077600152, + "learning_rate": 1.8174092531433474e-06, + "loss": 0.2903, + "step": 25633 + }, + { + "epoch": 1.2008244718227385, + "grad_norm": 0.6561979676198957, + "learning_rate": 1.8172268109562466e-06, + "loss": 0.2948, + "step": 25634 + }, + { + "epoch": 1.2008713168126668, + "grad_norm": 0.5541372278515176, + "learning_rate": 1.817044372698335e-06, + "loss": 0.2628, + "step": 25635 + }, + { + "epoch": 1.2009181618025953, + "grad_norm": 0.6000946185601199, + "learning_rate": 1.8168619383706607e-06, + "loss": 0.2777, + "step": 25636 + }, + { + "epoch": 1.2009650067925235, + "grad_norm": 0.56899066912367, + "learning_rate": 1.8166795079742744e-06, + "loss": 0.2609, + "step": 25637 + }, + { + "epoch": 1.201011851782452, + "grad_norm": 0.585550344681444, + "learning_rate": 1.8164970815102255e-06, + "loss": 0.2631, + "step": 25638 + }, + { + "epoch": 1.2010586967723802, + "grad_norm": 0.5727249302118349, + "learning_rate": 1.8163146589795645e-06, + "loss": 0.2749, + "step": 25639 + }, + { + "epoch": 1.2011055417623084, + "grad_norm": 0.6035644239743501, + "learning_rate": 1.8161322403833403e-06, + "loss": 0.2772, + "step": 25640 + }, + { + "epoch": 1.201152386752237, + "grad_norm": 0.6294087822755283, + "learning_rate": 1.8159498257226032e-06, + "loss": 0.2827, + "step": 25641 + }, + { + "epoch": 1.2011992317421651, + "grad_norm": 0.5403212843485168, + "learning_rate": 1.8157674149984039e-06, + "loss": 0.2707, + "step": 25642 + }, + { + "epoch": 1.2012460767320934, + "grad_norm": 0.6177864135535569, + "learning_rate": 1.8155850082117907e-06, + "loss": 0.2739, + "step": 25643 + }, + { + "epoch": 1.2012929217220218, + "grad_norm": 0.5661192491754343, + "learning_rate": 1.8154026053638132e-06, + "loss": 0.266, + "step": 25644 + }, + { + "epoch": 1.20133976671195, + "grad_norm": 0.5968950402377875, + "learning_rate": 1.8152202064555216e-06, + "loss": 0.2875, + "step": 25645 + }, + { + "epoch": 1.2013866117018785, + "grad_norm": 0.5627295790640957, + "learning_rate": 1.8150378114879664e-06, + "loss": 0.2598, + "step": 25646 + }, + { + "epoch": 1.2014334566918068, + "grad_norm": 0.5747319381580468, + "learning_rate": 1.8148554204621954e-06, + "loss": 0.2743, + "step": 25647 + }, + { + "epoch": 1.2014803016817353, + "grad_norm": 0.5604241779297059, + "learning_rate": 1.8146730333792606e-06, + "loss": 0.2786, + "step": 25648 + }, + { + "epoch": 1.2015271466716635, + "grad_norm": 0.610875631110188, + "learning_rate": 1.8144906502402094e-06, + "loss": 0.2955, + "step": 25649 + }, + { + "epoch": 1.2015739916615917, + "grad_norm": 0.5722555305763495, + "learning_rate": 1.8143082710460923e-06, + "loss": 0.2667, + "step": 25650 + }, + { + "epoch": 1.2016208366515202, + "grad_norm": 0.6622885532582332, + "learning_rate": 1.8141258957979585e-06, + "loss": 0.3036, + "step": 25651 + }, + { + "epoch": 1.2016676816414484, + "grad_norm": 0.6393501351237293, + "learning_rate": 1.813943524496858e-06, + "loss": 0.3082, + "step": 25652 + }, + { + "epoch": 1.2017145266313767, + "grad_norm": 0.5487427578688585, + "learning_rate": 1.81376115714384e-06, + "loss": 0.2691, + "step": 25653 + }, + { + "epoch": 1.2017613716213051, + "grad_norm": 0.6052876716218267, + "learning_rate": 1.8135787937399552e-06, + "loss": 0.2845, + "step": 25654 + }, + { + "epoch": 1.2018082166112334, + "grad_norm": 0.5850493105423534, + "learning_rate": 1.8133964342862504e-06, + "loss": 0.2629, + "step": 25655 + }, + { + "epoch": 1.2018550616011618, + "grad_norm": 0.563931527075333, + "learning_rate": 1.8132140787837766e-06, + "loss": 0.2663, + "step": 25656 + }, + { + "epoch": 1.20190190659109, + "grad_norm": 0.6002133882006038, + "learning_rate": 1.813031727233584e-06, + "loss": 0.2762, + "step": 25657 + }, + { + "epoch": 1.2019487515810183, + "grad_norm": 0.5745767534713228, + "learning_rate": 1.8128493796367203e-06, + "loss": 0.2757, + "step": 25658 + }, + { + "epoch": 1.2019955965709468, + "grad_norm": 0.5914266536659822, + "learning_rate": 1.8126670359942358e-06, + "loss": 0.2809, + "step": 25659 + }, + { + "epoch": 1.202042441560875, + "grad_norm": 0.5864903662300679, + "learning_rate": 1.812484696307181e-06, + "loss": 0.2635, + "step": 25660 + }, + { + "epoch": 1.2020892865508035, + "grad_norm": 0.580510842576406, + "learning_rate": 1.8123023605766032e-06, + "loss": 0.2825, + "step": 25661 + }, + { + "epoch": 1.2021361315407317, + "grad_norm": 0.6076565069715241, + "learning_rate": 1.8121200288035524e-06, + "loss": 0.2588, + "step": 25662 + }, + { + "epoch": 1.20218297653066, + "grad_norm": 0.6769453260471932, + "learning_rate": 1.8119377009890773e-06, + "loss": 0.3031, + "step": 25663 + }, + { + "epoch": 1.2022298215205884, + "grad_norm": 0.596612576493927, + "learning_rate": 1.8117553771342287e-06, + "loss": 0.2936, + "step": 25664 + }, + { + "epoch": 1.2022766665105167, + "grad_norm": 0.6141003316078238, + "learning_rate": 1.8115730572400542e-06, + "loss": 0.2832, + "step": 25665 + }, + { + "epoch": 1.202323511500445, + "grad_norm": 0.6092054606966528, + "learning_rate": 1.8113907413076048e-06, + "loss": 0.283, + "step": 25666 + }, + { + "epoch": 1.2023703564903734, + "grad_norm": 0.6246374693163881, + "learning_rate": 1.811208429337928e-06, + "loss": 0.2745, + "step": 25667 + }, + { + "epoch": 1.2024172014803016, + "grad_norm": 0.5693429624525783, + "learning_rate": 1.8110261213320734e-06, + "loss": 0.2624, + "step": 25668 + }, + { + "epoch": 1.20246404647023, + "grad_norm": 0.6007766416739381, + "learning_rate": 1.8108438172910902e-06, + "loss": 0.2722, + "step": 25669 + }, + { + "epoch": 1.2025108914601583, + "grad_norm": 0.5800559692096556, + "learning_rate": 1.8106615172160274e-06, + "loss": 0.264, + "step": 25670 + }, + { + "epoch": 1.2025577364500866, + "grad_norm": 0.5756899875700172, + "learning_rate": 1.810479221107935e-06, + "loss": 0.274, + "step": 25671 + }, + { + "epoch": 1.202604581440015, + "grad_norm": 0.6045161068568421, + "learning_rate": 1.8102969289678617e-06, + "loss": 0.2776, + "step": 25672 + }, + { + "epoch": 1.2026514264299433, + "grad_norm": 0.586874062330171, + "learning_rate": 1.8101146407968556e-06, + "loss": 0.272, + "step": 25673 + }, + { + "epoch": 1.2026982714198717, + "grad_norm": 0.5915948906171465, + "learning_rate": 1.809932356595966e-06, + "loss": 0.2765, + "step": 25674 + }, + { + "epoch": 1.2027451164098, + "grad_norm": 0.6012840585009123, + "learning_rate": 1.8097500763662428e-06, + "loss": 0.2873, + "step": 25675 + }, + { + "epoch": 1.2027919613997282, + "grad_norm": 0.5828451324106646, + "learning_rate": 1.8095678001087341e-06, + "loss": 0.276, + "step": 25676 + }, + { + "epoch": 1.2028388063896567, + "grad_norm": 0.6623276419578654, + "learning_rate": 1.809385527824489e-06, + "loss": 0.2936, + "step": 25677 + }, + { + "epoch": 1.202885651379585, + "grad_norm": 0.6045915765218298, + "learning_rate": 1.8092032595145575e-06, + "loss": 0.2815, + "step": 25678 + }, + { + "epoch": 1.2029324963695132, + "grad_norm": 0.6431996117109958, + "learning_rate": 1.8090209951799875e-06, + "loss": 0.3027, + "step": 25679 + }, + { + "epoch": 1.2029793413594416, + "grad_norm": 0.5421743641122442, + "learning_rate": 1.8088387348218272e-06, + "loss": 0.2521, + "step": 25680 + }, + { + "epoch": 1.2030261863493699, + "grad_norm": 0.6090378886788351, + "learning_rate": 1.8086564784411265e-06, + "loss": 0.2668, + "step": 25681 + }, + { + "epoch": 1.2030730313392983, + "grad_norm": 0.5610017095992661, + "learning_rate": 1.8084742260389343e-06, + "loss": 0.2715, + "step": 25682 + }, + { + "epoch": 1.2031198763292266, + "grad_norm": 0.6261091842913302, + "learning_rate": 1.8082919776162986e-06, + "loss": 0.2909, + "step": 25683 + }, + { + "epoch": 1.203166721319155, + "grad_norm": 0.6010456739389162, + "learning_rate": 1.8081097331742698e-06, + "loss": 0.274, + "step": 25684 + }, + { + "epoch": 1.2032135663090833, + "grad_norm": 0.5674459269804326, + "learning_rate": 1.8079274927138945e-06, + "loss": 0.2689, + "step": 25685 + }, + { + "epoch": 1.2032604112990115, + "grad_norm": 0.5818317537887647, + "learning_rate": 1.807745256236223e-06, + "loss": 0.273, + "step": 25686 + }, + { + "epoch": 1.20330725628894, + "grad_norm": 0.5746217848817217, + "learning_rate": 1.8075630237423031e-06, + "loss": 0.2728, + "step": 25687 + }, + { + "epoch": 1.2033541012788682, + "grad_norm": 0.5717535620023771, + "learning_rate": 1.8073807952331845e-06, + "loss": 0.264, + "step": 25688 + }, + { + "epoch": 1.2034009462687965, + "grad_norm": 0.5631466232213063, + "learning_rate": 1.807198570709915e-06, + "loss": 0.2635, + "step": 25689 + }, + { + "epoch": 1.203447791258725, + "grad_norm": 0.6375966919227603, + "learning_rate": 1.8070163501735444e-06, + "loss": 0.3009, + "step": 25690 + }, + { + "epoch": 1.2034946362486532, + "grad_norm": 0.6216088846362784, + "learning_rate": 1.8068341336251197e-06, + "loss": 0.2921, + "step": 25691 + }, + { + "epoch": 1.2035414812385816, + "grad_norm": 0.604006798267647, + "learning_rate": 1.8066519210656908e-06, + "loss": 0.2614, + "step": 25692 + }, + { + "epoch": 1.2035883262285099, + "grad_norm": 0.6307725434818451, + "learning_rate": 1.8064697124963052e-06, + "loss": 0.2768, + "step": 25693 + }, + { + "epoch": 1.203635171218438, + "grad_norm": 0.6173540249447492, + "learning_rate": 1.806287507918012e-06, + "loss": 0.2689, + "step": 25694 + }, + { + "epoch": 1.2036820162083666, + "grad_norm": 0.5708522912701491, + "learning_rate": 1.8061053073318606e-06, + "loss": 0.2588, + "step": 25695 + }, + { + "epoch": 1.2037288611982948, + "grad_norm": 0.597299497690232, + "learning_rate": 1.8059231107388992e-06, + "loss": 0.278, + "step": 25696 + }, + { + "epoch": 1.2037757061882233, + "grad_norm": 0.510951053360424, + "learning_rate": 1.8057409181401748e-06, + "loss": 0.2615, + "step": 25697 + }, + { + "epoch": 1.2038225511781515, + "grad_norm": 0.5663453584998538, + "learning_rate": 1.8055587295367366e-06, + "loss": 0.251, + "step": 25698 + }, + { + "epoch": 1.2038693961680798, + "grad_norm": 0.6340494086383347, + "learning_rate": 1.8053765449296343e-06, + "loss": 0.28, + "step": 25699 + }, + { + "epoch": 1.2039162411580082, + "grad_norm": 0.5916496787963184, + "learning_rate": 1.805194364319915e-06, + "loss": 0.2862, + "step": 25700 + }, + { + "epoch": 1.2039630861479365, + "grad_norm": 0.5938247600252397, + "learning_rate": 1.8050121877086268e-06, + "loss": 0.2757, + "step": 25701 + }, + { + "epoch": 1.2040099311378647, + "grad_norm": 0.6082172378816806, + "learning_rate": 1.8048300150968207e-06, + "loss": 0.2793, + "step": 25702 + }, + { + "epoch": 1.2040567761277932, + "grad_norm": 0.6012402406634745, + "learning_rate": 1.8046478464855421e-06, + "loss": 0.2825, + "step": 25703 + }, + { + "epoch": 1.2041036211177214, + "grad_norm": 0.6491451588513634, + "learning_rate": 1.8044656818758399e-06, + "loss": 0.2798, + "step": 25704 + }, + { + "epoch": 1.2041504661076499, + "grad_norm": 0.6241454640829518, + "learning_rate": 1.8042835212687626e-06, + "loss": 0.2785, + "step": 25705 + }, + { + "epoch": 1.204197311097578, + "grad_norm": 0.548598161243461, + "learning_rate": 1.80410136466536e-06, + "loss": 0.2596, + "step": 25706 + }, + { + "epoch": 1.2042441560875063, + "grad_norm": 0.6356457473122057, + "learning_rate": 1.8039192120666782e-06, + "loss": 0.2938, + "step": 25707 + }, + { + "epoch": 1.2042910010774348, + "grad_norm": 0.5657697006010003, + "learning_rate": 1.8037370634737661e-06, + "loss": 0.281, + "step": 25708 + }, + { + "epoch": 1.204337846067363, + "grad_norm": 0.6052412451347492, + "learning_rate": 1.803554918887674e-06, + "loss": 0.2865, + "step": 25709 + }, + { + "epoch": 1.2043846910572915, + "grad_norm": 0.5875750560658483, + "learning_rate": 1.803372778309447e-06, + "loss": 0.2858, + "step": 25710 + }, + { + "epoch": 1.2044315360472198, + "grad_norm": 0.6292496928192102, + "learning_rate": 1.8031906417401346e-06, + "loss": 0.2869, + "step": 25711 + }, + { + "epoch": 1.204478381037148, + "grad_norm": 0.619410127387714, + "learning_rate": 1.8030085091807848e-06, + "loss": 0.2776, + "step": 25712 + }, + { + "epoch": 1.2045252260270765, + "grad_norm": 0.572113762490747, + "learning_rate": 1.802826380632446e-06, + "loss": 0.2675, + "step": 25713 + }, + { + "epoch": 1.2045720710170047, + "grad_norm": 0.5602081899085635, + "learning_rate": 1.802644256096166e-06, + "loss": 0.2681, + "step": 25714 + }, + { + "epoch": 1.204618916006933, + "grad_norm": 0.5730729991830404, + "learning_rate": 1.8024621355729937e-06, + "loss": 0.2846, + "step": 25715 + }, + { + "epoch": 1.2046657609968614, + "grad_norm": 0.5652398704312565, + "learning_rate": 1.8022800190639761e-06, + "loss": 0.2647, + "step": 25716 + }, + { + "epoch": 1.2047126059867896, + "grad_norm": 0.5513106250070798, + "learning_rate": 1.8020979065701617e-06, + "loss": 0.2709, + "step": 25717 + }, + { + "epoch": 1.204759450976718, + "grad_norm": 0.5900513583455216, + "learning_rate": 1.8019157980925978e-06, + "loss": 0.2649, + "step": 25718 + }, + { + "epoch": 1.2048062959666463, + "grad_norm": 0.5445074008452774, + "learning_rate": 1.8017336936323332e-06, + "loss": 0.27, + "step": 25719 + }, + { + "epoch": 1.2048531409565748, + "grad_norm": 0.5470600847593736, + "learning_rate": 1.8015515931904165e-06, + "loss": 0.2593, + "step": 25720 + }, + { + "epoch": 1.204899985946503, + "grad_norm": 0.5659207786528313, + "learning_rate": 1.8013694967678952e-06, + "loss": 0.2708, + "step": 25721 + }, + { + "epoch": 1.2049468309364313, + "grad_norm": 0.6349384513942017, + "learning_rate": 1.8011874043658156e-06, + "loss": 0.2979, + "step": 25722 + }, + { + "epoch": 1.2049936759263598, + "grad_norm": 0.590495555733982, + "learning_rate": 1.801005315985227e-06, + "loss": 0.297, + "step": 25723 + }, + { + "epoch": 1.205040520916288, + "grad_norm": 0.5852452510295938, + "learning_rate": 1.8008232316271774e-06, + "loss": 0.2695, + "step": 25724 + }, + { + "epoch": 1.2050873659062162, + "grad_norm": 0.5613647436763877, + "learning_rate": 1.800641151292714e-06, + "loss": 0.2687, + "step": 25725 + }, + { + "epoch": 1.2051342108961447, + "grad_norm": 0.6312486738002815, + "learning_rate": 1.800459074982885e-06, + "loss": 0.2907, + "step": 25726 + }, + { + "epoch": 1.205181055886073, + "grad_norm": 0.5935143437075312, + "learning_rate": 1.8002770026987392e-06, + "loss": 0.2915, + "step": 25727 + }, + { + "epoch": 1.2052279008760014, + "grad_norm": 0.5994073251004659, + "learning_rate": 1.8000949344413232e-06, + "loss": 0.2832, + "step": 25728 + }, + { + "epoch": 1.2052747458659296, + "grad_norm": 0.581274823722642, + "learning_rate": 1.7999128702116841e-06, + "loss": 0.259, + "step": 25729 + }, + { + "epoch": 1.2053215908558579, + "grad_norm": 0.5659677663239285, + "learning_rate": 1.7997308100108705e-06, + "loss": 0.2519, + "step": 25730 + }, + { + "epoch": 1.2053684358457863, + "grad_norm": 0.591064210135153, + "learning_rate": 1.7995487538399306e-06, + "loss": 0.2796, + "step": 25731 + }, + { + "epoch": 1.2054152808357146, + "grad_norm": 0.5868941403102211, + "learning_rate": 1.799366701699911e-06, + "loss": 0.2576, + "step": 25732 + }, + { + "epoch": 1.205462125825643, + "grad_norm": 0.5575434004409884, + "learning_rate": 1.799184653591861e-06, + "loss": 0.2548, + "step": 25733 + }, + { + "epoch": 1.2055089708155713, + "grad_norm": 0.5404265149029956, + "learning_rate": 1.7990026095168261e-06, + "loss": 0.264, + "step": 25734 + }, + { + "epoch": 1.2055558158054995, + "grad_norm": 0.603459708207144, + "learning_rate": 1.798820569475856e-06, + "loss": 0.2766, + "step": 25735 + }, + { + "epoch": 1.205602660795428, + "grad_norm": 0.6162469883128082, + "learning_rate": 1.7986385334699963e-06, + "loss": 0.2738, + "step": 25736 + }, + { + "epoch": 1.2056495057853562, + "grad_norm": 0.534410854120158, + "learning_rate": 1.7984565015002959e-06, + "loss": 0.246, + "step": 25737 + }, + { + "epoch": 1.2056963507752845, + "grad_norm": 0.6219691032689301, + "learning_rate": 1.7982744735678018e-06, + "loss": 0.3062, + "step": 25738 + }, + { + "epoch": 1.205743195765213, + "grad_norm": 0.61261173510962, + "learning_rate": 1.7980924496735627e-06, + "loss": 0.2829, + "step": 25739 + }, + { + "epoch": 1.2057900407551412, + "grad_norm": 0.5839333250038762, + "learning_rate": 1.7979104298186239e-06, + "loss": 0.2724, + "step": 25740 + }, + { + "epoch": 1.2058368857450696, + "grad_norm": 0.6130844625645725, + "learning_rate": 1.7977284140040352e-06, + "loss": 0.2832, + "step": 25741 + }, + { + "epoch": 1.2058837307349979, + "grad_norm": 0.5329703548002138, + "learning_rate": 1.797546402230842e-06, + "loss": 0.2447, + "step": 25742 + }, + { + "epoch": 1.2059305757249261, + "grad_norm": 0.6240942105371035, + "learning_rate": 1.7973643945000926e-06, + "loss": 0.2779, + "step": 25743 + }, + { + "epoch": 1.2059774207148546, + "grad_norm": 0.6136825858945336, + "learning_rate": 1.7971823908128353e-06, + "loss": 0.2959, + "step": 25744 + }, + { + "epoch": 1.2060242657047828, + "grad_norm": 0.5887161147325782, + "learning_rate": 1.7970003911701172e-06, + "loss": 0.2659, + "step": 25745 + }, + { + "epoch": 1.2060711106947113, + "grad_norm": 0.5864322748444932, + "learning_rate": 1.796818395572984e-06, + "loss": 0.2731, + "step": 25746 + }, + { + "epoch": 1.2061179556846395, + "grad_norm": 0.620053515163103, + "learning_rate": 1.7966364040224843e-06, + "loss": 0.2602, + "step": 25747 + }, + { + "epoch": 1.2061648006745678, + "grad_norm": 0.5973683515535637, + "learning_rate": 1.7964544165196656e-06, + "loss": 0.2903, + "step": 25748 + }, + { + "epoch": 1.2062116456644962, + "grad_norm": 0.5825432625116395, + "learning_rate": 1.7962724330655744e-06, + "loss": 0.2945, + "step": 25749 + }, + { + "epoch": 1.2062584906544245, + "grad_norm": 0.5369532989637404, + "learning_rate": 1.796090453661259e-06, + "loss": 0.2545, + "step": 25750 + }, + { + "epoch": 1.2063053356443527, + "grad_norm": 0.594884444275706, + "learning_rate": 1.7959084783077665e-06, + "loss": 0.2684, + "step": 25751 + }, + { + "epoch": 1.2063521806342812, + "grad_norm": 0.5604933185195303, + "learning_rate": 1.7957265070061437e-06, + "loss": 0.256, + "step": 25752 + }, + { + "epoch": 1.2063990256242094, + "grad_norm": 0.6307205939164006, + "learning_rate": 1.795544539757437e-06, + "loss": 0.2926, + "step": 25753 + }, + { + "epoch": 1.2064458706141379, + "grad_norm": 0.6171303405783335, + "learning_rate": 1.7953625765626946e-06, + "loss": 0.2766, + "step": 25754 + }, + { + "epoch": 1.2064927156040661, + "grad_norm": 0.6020070803545768, + "learning_rate": 1.7951806174229639e-06, + "loss": 0.2759, + "step": 25755 + }, + { + "epoch": 1.2065395605939946, + "grad_norm": 0.5732287937068083, + "learning_rate": 1.7949986623392913e-06, + "loss": 0.2669, + "step": 25756 + }, + { + "epoch": 1.2065864055839228, + "grad_norm": 0.6068705096492646, + "learning_rate": 1.7948167113127247e-06, + "loss": 0.2818, + "step": 25757 + }, + { + "epoch": 1.206633250573851, + "grad_norm": 0.5707231886194896, + "learning_rate": 1.7946347643443103e-06, + "loss": 0.274, + "step": 25758 + }, + { + "epoch": 1.2066800955637795, + "grad_norm": 0.5669006699511511, + "learning_rate": 1.7944528214350954e-06, + "loss": 0.2756, + "step": 25759 + }, + { + "epoch": 1.2067269405537078, + "grad_norm": 0.6436934586412983, + "learning_rate": 1.794270882586127e-06, + "loss": 0.274, + "step": 25760 + }, + { + "epoch": 1.206773785543636, + "grad_norm": 0.6059545050629038, + "learning_rate": 1.7940889477984524e-06, + "loss": 0.2821, + "step": 25761 + }, + { + "epoch": 1.2068206305335645, + "grad_norm": 0.5865757012835734, + "learning_rate": 1.7939070170731187e-06, + "loss": 0.2839, + "step": 25762 + }, + { + "epoch": 1.2068674755234927, + "grad_norm": 0.5803258357903274, + "learning_rate": 1.7937250904111735e-06, + "loss": 0.2738, + "step": 25763 + }, + { + "epoch": 1.2069143205134212, + "grad_norm": 0.5785434492649012, + "learning_rate": 1.7935431678136616e-06, + "loss": 0.2718, + "step": 25764 + }, + { + "epoch": 1.2069611655033494, + "grad_norm": 0.6117673650701557, + "learning_rate": 1.7933612492816317e-06, + "loss": 0.2807, + "step": 25765 + }, + { + "epoch": 1.2070080104932777, + "grad_norm": 0.597969095589059, + "learning_rate": 1.7931793348161303e-06, + "loss": 0.2776, + "step": 25766 + }, + { + "epoch": 1.2070548554832061, + "grad_norm": 0.6175759123661769, + "learning_rate": 1.7929974244182037e-06, + "loss": 0.2743, + "step": 25767 + }, + { + "epoch": 1.2071017004731344, + "grad_norm": 0.6153412863485799, + "learning_rate": 1.792815518088899e-06, + "loss": 0.2859, + "step": 25768 + }, + { + "epoch": 1.2071485454630628, + "grad_norm": 0.5614400144297411, + "learning_rate": 1.7926336158292646e-06, + "loss": 0.265, + "step": 25769 + }, + { + "epoch": 1.207195390452991, + "grad_norm": 0.6087649116963377, + "learning_rate": 1.7924517176403455e-06, + "loss": 0.3014, + "step": 25770 + }, + { + "epoch": 1.2072422354429193, + "grad_norm": 0.5942267508965304, + "learning_rate": 1.7922698235231884e-06, + "loss": 0.2693, + "step": 25771 + }, + { + "epoch": 1.2072890804328478, + "grad_norm": 0.5567178632792564, + "learning_rate": 1.7920879334788401e-06, + "loss": 0.2722, + "step": 25772 + }, + { + "epoch": 1.207335925422776, + "grad_norm": 0.6261287616954856, + "learning_rate": 1.791906047508349e-06, + "loss": 0.2758, + "step": 25773 + }, + { + "epoch": 1.2073827704127043, + "grad_norm": 0.5540907588131561, + "learning_rate": 1.7917241656127595e-06, + "loss": 0.2639, + "step": 25774 + }, + { + "epoch": 1.2074296154026327, + "grad_norm": 0.5912183441760877, + "learning_rate": 1.7915422877931205e-06, + "loss": 0.2605, + "step": 25775 + }, + { + "epoch": 1.207476460392561, + "grad_norm": 0.5648026589438379, + "learning_rate": 1.791360414050477e-06, + "loss": 0.271, + "step": 25776 + }, + { + "epoch": 1.2075233053824894, + "grad_norm": 0.6317577176199457, + "learning_rate": 1.7911785443858764e-06, + "loss": 0.2861, + "step": 25777 + }, + { + "epoch": 1.2075701503724177, + "grad_norm": 0.6234203762345194, + "learning_rate": 1.790996678800365e-06, + "loss": 0.2828, + "step": 25778 + }, + { + "epoch": 1.207616995362346, + "grad_norm": 0.6080335557713731, + "learning_rate": 1.7908148172949895e-06, + "loss": 0.2865, + "step": 25779 + }, + { + "epoch": 1.2076638403522744, + "grad_norm": 0.5919144853035847, + "learning_rate": 1.7906329598707961e-06, + "loss": 0.2653, + "step": 25780 + }, + { + "epoch": 1.2077106853422026, + "grad_norm": 0.619511124199797, + "learning_rate": 1.790451106528832e-06, + "loss": 0.2702, + "step": 25781 + }, + { + "epoch": 1.207757530332131, + "grad_norm": 0.5984933471246747, + "learning_rate": 1.790269257270144e-06, + "loss": 0.2762, + "step": 25782 + }, + { + "epoch": 1.2078043753220593, + "grad_norm": 0.5887459256364338, + "learning_rate": 1.7900874120957781e-06, + "loss": 0.2695, + "step": 25783 + }, + { + "epoch": 1.2078512203119876, + "grad_norm": 0.5737665405862142, + "learning_rate": 1.7899055710067799e-06, + "loss": 0.2507, + "step": 25784 + }, + { + "epoch": 1.207898065301916, + "grad_norm": 0.5885330031355873, + "learning_rate": 1.7897237340041967e-06, + "loss": 0.2795, + "step": 25785 + }, + { + "epoch": 1.2079449102918443, + "grad_norm": 0.5391768591809005, + "learning_rate": 1.7895419010890753e-06, + "loss": 0.2551, + "step": 25786 + }, + { + "epoch": 1.2079917552817725, + "grad_norm": 0.5835530270504031, + "learning_rate": 1.7893600722624612e-06, + "loss": 0.2719, + "step": 25787 + }, + { + "epoch": 1.208038600271701, + "grad_norm": 0.5820408677461775, + "learning_rate": 1.7891782475254024e-06, + "loss": 0.2915, + "step": 25788 + }, + { + "epoch": 1.2080854452616292, + "grad_norm": 0.6226044348228177, + "learning_rate": 1.7889964268789431e-06, + "loss": 0.2694, + "step": 25789 + }, + { + "epoch": 1.2081322902515577, + "grad_norm": 0.621193826522941, + "learning_rate": 1.7888146103241311e-06, + "loss": 0.2639, + "step": 25790 + }, + { + "epoch": 1.208179135241486, + "grad_norm": 0.611371264468365, + "learning_rate": 1.7886327978620116e-06, + "loss": 0.2717, + "step": 25791 + }, + { + "epoch": 1.2082259802314144, + "grad_norm": 0.6182353506469218, + "learning_rate": 1.7884509894936318e-06, + "loss": 0.2828, + "step": 25792 + }, + { + "epoch": 1.2082728252213426, + "grad_norm": 0.6079894138993384, + "learning_rate": 1.7882691852200384e-06, + "loss": 0.2744, + "step": 25793 + }, + { + "epoch": 1.2083196702112708, + "grad_norm": 0.5376892166848183, + "learning_rate": 1.788087385042277e-06, + "loss": 0.2513, + "step": 25794 + }, + { + "epoch": 1.2083665152011993, + "grad_norm": 0.5761701748266732, + "learning_rate": 1.787905588961393e-06, + "loss": 0.2749, + "step": 25795 + }, + { + "epoch": 1.2084133601911276, + "grad_norm": 0.5843670380577735, + "learning_rate": 1.7877237969784334e-06, + "loss": 0.2834, + "step": 25796 + }, + { + "epoch": 1.2084602051810558, + "grad_norm": 0.6553328450067375, + "learning_rate": 1.7875420090944449e-06, + "loss": 0.2815, + "step": 25797 + }, + { + "epoch": 1.2085070501709843, + "grad_norm": 0.6027461097667849, + "learning_rate": 1.7873602253104722e-06, + "loss": 0.2779, + "step": 25798 + }, + { + "epoch": 1.2085538951609125, + "grad_norm": 0.5993488800984211, + "learning_rate": 1.7871784456275625e-06, + "loss": 0.271, + "step": 25799 + }, + { + "epoch": 1.208600740150841, + "grad_norm": 0.563662836338163, + "learning_rate": 1.7869966700467628e-06, + "loss": 0.2662, + "step": 25800 + }, + { + "epoch": 1.2086475851407692, + "grad_norm": 0.6276113874791002, + "learning_rate": 1.7868148985691174e-06, + "loss": 0.2721, + "step": 25801 + }, + { + "epoch": 1.2086944301306974, + "grad_norm": 0.6181574473564809, + "learning_rate": 1.7866331311956724e-06, + "loss": 0.2774, + "step": 25802 + }, + { + "epoch": 1.208741275120626, + "grad_norm": 0.6099291128948758, + "learning_rate": 1.7864513679274747e-06, + "loss": 0.2874, + "step": 25803 + }, + { + "epoch": 1.2087881201105541, + "grad_norm": 0.5792767173149438, + "learning_rate": 1.7862696087655706e-06, + "loss": 0.2508, + "step": 25804 + }, + { + "epoch": 1.2088349651004826, + "grad_norm": 0.5788856276204608, + "learning_rate": 1.786087853711005e-06, + "loss": 0.2933, + "step": 25805 + }, + { + "epoch": 1.2088818100904108, + "grad_norm": 0.6209625403447653, + "learning_rate": 1.785906102764825e-06, + "loss": 0.2866, + "step": 25806 + }, + { + "epoch": 1.208928655080339, + "grad_norm": 0.6134664175859279, + "learning_rate": 1.7857243559280752e-06, + "loss": 0.2824, + "step": 25807 + }, + { + "epoch": 1.2089755000702676, + "grad_norm": 0.5895096955561124, + "learning_rate": 1.7855426132018024e-06, + "loss": 0.2879, + "step": 25808 + }, + { + "epoch": 1.2090223450601958, + "grad_norm": 0.5548765767097038, + "learning_rate": 1.7853608745870521e-06, + "loss": 0.2638, + "step": 25809 + }, + { + "epoch": 1.209069190050124, + "grad_norm": 0.6022068027467742, + "learning_rate": 1.78517914008487e-06, + "loss": 0.2624, + "step": 25810 + }, + { + "epoch": 1.2091160350400525, + "grad_norm": 0.5528924589043439, + "learning_rate": 1.7849974096963033e-06, + "loss": 0.2674, + "step": 25811 + }, + { + "epoch": 1.2091628800299807, + "grad_norm": 0.5713133296927067, + "learning_rate": 1.784815683422397e-06, + "loss": 0.2674, + "step": 25812 + }, + { + "epoch": 1.2092097250199092, + "grad_norm": 0.6055124438175868, + "learning_rate": 1.7846339612641956e-06, + "loss": 0.2833, + "step": 25813 + }, + { + "epoch": 1.2092565700098374, + "grad_norm": 0.5639774696530173, + "learning_rate": 1.7844522432227463e-06, + "loss": 0.2602, + "step": 25814 + }, + { + "epoch": 1.2093034149997657, + "grad_norm": 0.552521850212075, + "learning_rate": 1.7842705292990947e-06, + "loss": 0.2491, + "step": 25815 + }, + { + "epoch": 1.2093502599896941, + "grad_norm": 0.6083879992189982, + "learning_rate": 1.7840888194942856e-06, + "loss": 0.2824, + "step": 25816 + }, + { + "epoch": 1.2093971049796224, + "grad_norm": 0.6073475451523991, + "learning_rate": 1.7839071138093659e-06, + "loss": 0.2732, + "step": 25817 + }, + { + "epoch": 1.2094439499695508, + "grad_norm": 0.5947633010234872, + "learning_rate": 1.7837254122453817e-06, + "loss": 0.2733, + "step": 25818 + }, + { + "epoch": 1.209490794959479, + "grad_norm": 0.6200297603880797, + "learning_rate": 1.7835437148033768e-06, + "loss": 0.2766, + "step": 25819 + }, + { + "epoch": 1.2095376399494073, + "grad_norm": 0.6166839467349566, + "learning_rate": 1.7833620214843977e-06, + "loss": 0.2856, + "step": 25820 + }, + { + "epoch": 1.2095844849393358, + "grad_norm": 0.6279986836001482, + "learning_rate": 1.7831803322894898e-06, + "loss": 0.2818, + "step": 25821 + }, + { + "epoch": 1.209631329929264, + "grad_norm": 0.6219668143226532, + "learning_rate": 1.7829986472196994e-06, + "loss": 0.3009, + "step": 25822 + }, + { + "epoch": 1.2096781749191923, + "grad_norm": 0.6438665967568549, + "learning_rate": 1.7828169662760714e-06, + "loss": 0.2983, + "step": 25823 + }, + { + "epoch": 1.2097250199091207, + "grad_norm": 0.6560546262540878, + "learning_rate": 1.782635289459652e-06, + "loss": 0.2887, + "step": 25824 + }, + { + "epoch": 1.209771864899049, + "grad_norm": 0.5717656683191495, + "learning_rate": 1.7824536167714856e-06, + "loss": 0.2712, + "step": 25825 + }, + { + "epoch": 1.2098187098889774, + "grad_norm": 0.6038701430827542, + "learning_rate": 1.7822719482126188e-06, + "loss": 0.288, + "step": 25826 + }, + { + "epoch": 1.2098655548789057, + "grad_norm": 0.5998776778385421, + "learning_rate": 1.782090283784096e-06, + "loss": 0.2651, + "step": 25827 + }, + { + "epoch": 1.2099123998688341, + "grad_norm": 0.6128809305994799, + "learning_rate": 1.7819086234869634e-06, + "loss": 0.2696, + "step": 25828 + }, + { + "epoch": 1.2099592448587624, + "grad_norm": 0.6122791149400314, + "learning_rate": 1.7817269673222659e-06, + "loss": 0.2662, + "step": 25829 + }, + { + "epoch": 1.2100060898486906, + "grad_norm": 0.6088267084145951, + "learning_rate": 1.78154531529105e-06, + "loss": 0.2777, + "step": 25830 + }, + { + "epoch": 1.210052934838619, + "grad_norm": 0.5699219904058717, + "learning_rate": 1.7813636673943597e-06, + "loss": 0.2688, + "step": 25831 + }, + { + "epoch": 1.2100997798285473, + "grad_norm": 0.5905872074410109, + "learning_rate": 1.7811820236332409e-06, + "loss": 0.2714, + "step": 25832 + }, + { + "epoch": 1.2101466248184756, + "grad_norm": 0.567533963039917, + "learning_rate": 1.7810003840087387e-06, + "loss": 0.268, + "step": 25833 + }, + { + "epoch": 1.210193469808404, + "grad_norm": 0.5905017560730078, + "learning_rate": 1.780818748521898e-06, + "loss": 0.2793, + "step": 25834 + }, + { + "epoch": 1.2102403147983323, + "grad_norm": 0.6201772223280628, + "learning_rate": 1.7806371171737656e-06, + "loss": 0.2925, + "step": 25835 + }, + { + "epoch": 1.2102871597882607, + "grad_norm": 0.6264778078787534, + "learning_rate": 1.7804554899653865e-06, + "loss": 0.2912, + "step": 25836 + }, + { + "epoch": 1.210334004778189, + "grad_norm": 0.5590090309507277, + "learning_rate": 1.7802738668978037e-06, + "loss": 0.2602, + "step": 25837 + }, + { + "epoch": 1.2103808497681172, + "grad_norm": 0.73602677393323, + "learning_rate": 1.7800922479720644e-06, + "loss": 0.2782, + "step": 25838 + }, + { + "epoch": 1.2104276947580457, + "grad_norm": 0.5755285718689144, + "learning_rate": 1.7799106331892136e-06, + "loss": 0.2789, + "step": 25839 + }, + { + "epoch": 1.210474539747974, + "grad_norm": 0.5827403319375152, + "learning_rate": 1.7797290225502952e-06, + "loss": 0.2705, + "step": 25840 + }, + { + "epoch": 1.2105213847379024, + "grad_norm": 0.5691603681287207, + "learning_rate": 1.7795474160563558e-06, + "loss": 0.2804, + "step": 25841 + }, + { + "epoch": 1.2105682297278306, + "grad_norm": 0.5988562958297818, + "learning_rate": 1.7793658137084408e-06, + "loss": 0.2848, + "step": 25842 + }, + { + "epoch": 1.2106150747177589, + "grad_norm": 0.5804739620189249, + "learning_rate": 1.7791842155075937e-06, + "loss": 0.276, + "step": 25843 + }, + { + "epoch": 1.2106619197076873, + "grad_norm": 0.6038929613280208, + "learning_rate": 1.77900262145486e-06, + "loss": 0.2726, + "step": 25844 + }, + { + "epoch": 1.2107087646976156, + "grad_norm": 0.5641933393498906, + "learning_rate": 1.778821031551285e-06, + "loss": 0.2544, + "step": 25845 + }, + { + "epoch": 1.2107556096875438, + "grad_norm": 0.5584270811401845, + "learning_rate": 1.778639445797914e-06, + "loss": 0.2753, + "step": 25846 + }, + { + "epoch": 1.2108024546774723, + "grad_norm": 0.6378312281684124, + "learning_rate": 1.778457864195791e-06, + "loss": 0.2919, + "step": 25847 + }, + { + "epoch": 1.2108492996674005, + "grad_norm": 0.6187723131910289, + "learning_rate": 1.778276286745962e-06, + "loss": 0.2738, + "step": 25848 + }, + { + "epoch": 1.210896144657329, + "grad_norm": 0.6082897745016821, + "learning_rate": 1.7780947134494725e-06, + "loss": 0.2617, + "step": 25849 + }, + { + "epoch": 1.2109429896472572, + "grad_norm": 0.6175471837763356, + "learning_rate": 1.777913144307366e-06, + "loss": 0.2641, + "step": 25850 + }, + { + "epoch": 1.2109898346371855, + "grad_norm": 0.6006573445131629, + "learning_rate": 1.7777315793206873e-06, + "loss": 0.2686, + "step": 25851 + }, + { + "epoch": 1.211036679627114, + "grad_norm": 0.6049917651844695, + "learning_rate": 1.777550018490482e-06, + "loss": 0.2846, + "step": 25852 + }, + { + "epoch": 1.2110835246170422, + "grad_norm": 0.5965278773055184, + "learning_rate": 1.777368461817795e-06, + "loss": 0.2743, + "step": 25853 + }, + { + "epoch": 1.2111303696069706, + "grad_norm": 0.5497274116074271, + "learning_rate": 1.7771869093036706e-06, + "loss": 0.2572, + "step": 25854 + }, + { + "epoch": 1.2111772145968989, + "grad_norm": 0.5879970444034377, + "learning_rate": 1.7770053609491547e-06, + "loss": 0.2594, + "step": 25855 + }, + { + "epoch": 1.211224059586827, + "grad_norm": 0.6187169220385802, + "learning_rate": 1.7768238167552905e-06, + "loss": 0.2699, + "step": 25856 + }, + { + "epoch": 1.2112709045767556, + "grad_norm": 0.590566327138651, + "learning_rate": 1.7766422767231236e-06, + "loss": 0.2609, + "step": 25857 + }, + { + "epoch": 1.2113177495666838, + "grad_norm": 0.5294743521326073, + "learning_rate": 1.7764607408536982e-06, + "loss": 0.253, + "step": 25858 + }, + { + "epoch": 1.211364594556612, + "grad_norm": 0.5543296211940621, + "learning_rate": 1.7762792091480596e-06, + "loss": 0.271, + "step": 25859 + }, + { + "epoch": 1.2114114395465405, + "grad_norm": 0.5883238401201765, + "learning_rate": 1.7760976816072525e-06, + "loss": 0.2733, + "step": 25860 + }, + { + "epoch": 1.2114582845364688, + "grad_norm": 0.6260953418891841, + "learning_rate": 1.775916158232322e-06, + "loss": 0.2922, + "step": 25861 + }, + { + "epoch": 1.2115051295263972, + "grad_norm": 0.5315615352948557, + "learning_rate": 1.7757346390243107e-06, + "loss": 0.2439, + "step": 25862 + }, + { + "epoch": 1.2115519745163255, + "grad_norm": 0.6031982164590243, + "learning_rate": 1.7755531239842649e-06, + "loss": 0.2843, + "step": 25863 + }, + { + "epoch": 1.211598819506254, + "grad_norm": 0.574882082420888, + "learning_rate": 1.775371613113229e-06, + "loss": 0.274, + "step": 25864 + }, + { + "epoch": 1.2116456644961822, + "grad_norm": 0.6158619670410024, + "learning_rate": 1.7751901064122468e-06, + "loss": 0.2844, + "step": 25865 + }, + { + "epoch": 1.2116925094861104, + "grad_norm": 0.5679842128277472, + "learning_rate": 1.7750086038823639e-06, + "loss": 0.2777, + "step": 25866 + }, + { + "epoch": 1.2117393544760389, + "grad_norm": 0.5974015117579645, + "learning_rate": 1.7748271055246246e-06, + "loss": 0.28, + "step": 25867 + }, + { + "epoch": 1.211786199465967, + "grad_norm": 0.5541592368983371, + "learning_rate": 1.7746456113400728e-06, + "loss": 0.2696, + "step": 25868 + }, + { + "epoch": 1.2118330444558953, + "grad_norm": 0.6340265327165705, + "learning_rate": 1.7744641213297528e-06, + "loss": 0.2725, + "step": 25869 + }, + { + "epoch": 1.2118798894458238, + "grad_norm": 0.5766705581576913, + "learning_rate": 1.7742826354947093e-06, + "loss": 0.2649, + "step": 25870 + }, + { + "epoch": 1.211926734435752, + "grad_norm": 0.6207075497742535, + "learning_rate": 1.7741011538359876e-06, + "loss": 0.2815, + "step": 25871 + }, + { + "epoch": 1.2119735794256805, + "grad_norm": 0.624583643020588, + "learning_rate": 1.7739196763546303e-06, + "loss": 0.282, + "step": 25872 + }, + { + "epoch": 1.2120204244156088, + "grad_norm": 0.6699218681052849, + "learning_rate": 1.7737382030516843e-06, + "loss": 0.2884, + "step": 25873 + }, + { + "epoch": 1.212067269405537, + "grad_norm": 0.6197374190697817, + "learning_rate": 1.7735567339281911e-06, + "loss": 0.2832, + "step": 25874 + }, + { + "epoch": 1.2121141143954655, + "grad_norm": 0.5671873708116759, + "learning_rate": 1.7733752689851968e-06, + "loss": 0.266, + "step": 25875 + }, + { + "epoch": 1.2121609593853937, + "grad_norm": 0.5521375538960656, + "learning_rate": 1.7731938082237448e-06, + "loss": 0.2685, + "step": 25876 + }, + { + "epoch": 1.2122078043753222, + "grad_norm": 0.5810634707833259, + "learning_rate": 1.7730123516448799e-06, + "loss": 0.2704, + "step": 25877 + }, + { + "epoch": 1.2122546493652504, + "grad_norm": 0.5911663193973035, + "learning_rate": 1.7728308992496462e-06, + "loss": 0.2903, + "step": 25878 + }, + { + "epoch": 1.2123014943551786, + "grad_norm": 0.5708835871869694, + "learning_rate": 1.7726494510390888e-06, + "loss": 0.2638, + "step": 25879 + }, + { + "epoch": 1.212348339345107, + "grad_norm": 0.5804390100685813, + "learning_rate": 1.7724680070142497e-06, + "loss": 0.2689, + "step": 25880 + }, + { + "epoch": 1.2123951843350353, + "grad_norm": 0.6638810091591248, + "learning_rate": 1.7722865671761753e-06, + "loss": 0.2845, + "step": 25881 + }, + { + "epoch": 1.2124420293249636, + "grad_norm": 0.5570752485417411, + "learning_rate": 1.772105131525908e-06, + "loss": 0.2567, + "step": 25882 + }, + { + "epoch": 1.212488874314892, + "grad_norm": 0.5603999481960111, + "learning_rate": 1.7719237000644928e-06, + "loss": 0.268, + "step": 25883 + }, + { + "epoch": 1.2125357193048203, + "grad_norm": 0.610701186936275, + "learning_rate": 1.7717422727929742e-06, + "loss": 0.2879, + "step": 25884 + }, + { + "epoch": 1.2125825642947488, + "grad_norm": 0.6129023600371073, + "learning_rate": 1.7715608497123965e-06, + "loss": 0.2587, + "step": 25885 + }, + { + "epoch": 1.212629409284677, + "grad_norm": 0.5568767514105365, + "learning_rate": 1.7713794308238016e-06, + "loss": 0.2487, + "step": 25886 + }, + { + "epoch": 1.2126762542746052, + "grad_norm": 0.5615652113028895, + "learning_rate": 1.7711980161282352e-06, + "loss": 0.2671, + "step": 25887 + }, + { + "epoch": 1.2127230992645337, + "grad_norm": 0.5970126799111046, + "learning_rate": 1.7710166056267413e-06, + "loss": 0.2909, + "step": 25888 + }, + { + "epoch": 1.212769944254462, + "grad_norm": 0.6143690190151667, + "learning_rate": 1.7708351993203635e-06, + "loss": 0.2856, + "step": 25889 + }, + { + "epoch": 1.2128167892443904, + "grad_norm": 0.6438586142654898, + "learning_rate": 1.7706537972101455e-06, + "loss": 0.2801, + "step": 25890 + }, + { + "epoch": 1.2128636342343186, + "grad_norm": 0.5963350066436783, + "learning_rate": 1.770472399297133e-06, + "loss": 0.2711, + "step": 25891 + }, + { + "epoch": 1.2129104792242469, + "grad_norm": 0.5714507569073114, + "learning_rate": 1.7702910055823677e-06, + "loss": 0.2629, + "step": 25892 + }, + { + "epoch": 1.2129573242141753, + "grad_norm": 0.585762704366766, + "learning_rate": 1.7701096160668937e-06, + "loss": 0.2529, + "step": 25893 + }, + { + "epoch": 1.2130041692041036, + "grad_norm": 0.5713904226075605, + "learning_rate": 1.7699282307517556e-06, + "loss": 0.2744, + "step": 25894 + }, + { + "epoch": 1.2130510141940318, + "grad_norm": 0.5859312711433073, + "learning_rate": 1.7697468496379975e-06, + "loss": 0.2802, + "step": 25895 + }, + { + "epoch": 1.2130978591839603, + "grad_norm": 0.5662152274427963, + "learning_rate": 1.769565472726662e-06, + "loss": 0.2672, + "step": 25896 + }, + { + "epoch": 1.2131447041738885, + "grad_norm": 0.6058102474653916, + "learning_rate": 1.7693841000187951e-06, + "loss": 0.278, + "step": 25897 + }, + { + "epoch": 1.213191549163817, + "grad_norm": 0.5761226165710576, + "learning_rate": 1.7692027315154375e-06, + "loss": 0.258, + "step": 25898 + }, + { + "epoch": 1.2132383941537452, + "grad_norm": 0.6176041322973709, + "learning_rate": 1.7690213672176354e-06, + "loss": 0.2998, + "step": 25899 + }, + { + "epoch": 1.2132852391436737, + "grad_norm": 0.5850656495015557, + "learning_rate": 1.7688400071264308e-06, + "loss": 0.2795, + "step": 25900 + }, + { + "epoch": 1.213332084133602, + "grad_norm": 0.6230393151251339, + "learning_rate": 1.7686586512428682e-06, + "loss": 0.2983, + "step": 25901 + }, + { + "epoch": 1.2133789291235302, + "grad_norm": 0.616910931847539, + "learning_rate": 1.7684772995679921e-06, + "loss": 0.2825, + "step": 25902 + }, + { + "epoch": 1.2134257741134586, + "grad_norm": 0.587179563701707, + "learning_rate": 1.7682959521028457e-06, + "loss": 0.2795, + "step": 25903 + }, + { + "epoch": 1.2134726191033869, + "grad_norm": 0.6078505688827592, + "learning_rate": 1.7681146088484711e-06, + "loss": 0.2701, + "step": 25904 + }, + { + "epoch": 1.2135194640933151, + "grad_norm": 0.5832345962693174, + "learning_rate": 1.7679332698059126e-06, + "loss": 0.268, + "step": 25905 + }, + { + "epoch": 1.2135663090832436, + "grad_norm": 0.5668496785266779, + "learning_rate": 1.7677519349762152e-06, + "loss": 0.27, + "step": 25906 + }, + { + "epoch": 1.2136131540731718, + "grad_norm": 0.6139096356183223, + "learning_rate": 1.7675706043604207e-06, + "loss": 0.2727, + "step": 25907 + }, + { + "epoch": 1.2136599990631003, + "grad_norm": 0.6007988301445893, + "learning_rate": 1.767389277959573e-06, + "loss": 0.2707, + "step": 25908 + }, + { + "epoch": 1.2137068440530285, + "grad_norm": 0.5572344030346361, + "learning_rate": 1.7672079557747175e-06, + "loss": 0.268, + "step": 25909 + }, + { + "epoch": 1.2137536890429568, + "grad_norm": 0.6185440059767174, + "learning_rate": 1.7670266378068952e-06, + "loss": 0.2898, + "step": 25910 + }, + { + "epoch": 1.2138005340328852, + "grad_norm": 0.5815764573218266, + "learning_rate": 1.7668453240571496e-06, + "loss": 0.2852, + "step": 25911 + }, + { + "epoch": 1.2138473790228135, + "grad_norm": 0.5851212787086946, + "learning_rate": 1.766664014526525e-06, + "loss": 0.264, + "step": 25912 + }, + { + "epoch": 1.213894224012742, + "grad_norm": 0.5544307554555072, + "learning_rate": 1.7664827092160652e-06, + "loss": 0.2791, + "step": 25913 + }, + { + "epoch": 1.2139410690026702, + "grad_norm": 0.5717433983711663, + "learning_rate": 1.7663014081268126e-06, + "loss": 0.2746, + "step": 25914 + }, + { + "epoch": 1.2139879139925984, + "grad_norm": 0.5512644432551081, + "learning_rate": 1.7661201112598116e-06, + "loss": 0.2626, + "step": 25915 + }, + { + "epoch": 1.2140347589825269, + "grad_norm": 0.5676575815270078, + "learning_rate": 1.7659388186161048e-06, + "loss": 0.26, + "step": 25916 + }, + { + "epoch": 1.2140816039724551, + "grad_norm": 0.5802621904399384, + "learning_rate": 1.7657575301967357e-06, + "loss": 0.279, + "step": 25917 + }, + { + "epoch": 1.2141284489623834, + "grad_norm": 0.6163888205299884, + "learning_rate": 1.765576246002747e-06, + "loss": 0.2854, + "step": 25918 + }, + { + "epoch": 1.2141752939523118, + "grad_norm": 0.6291197820958772, + "learning_rate": 1.7653949660351826e-06, + "loss": 0.2694, + "step": 25919 + }, + { + "epoch": 1.21422213894224, + "grad_norm": 0.5455636281260386, + "learning_rate": 1.7652136902950853e-06, + "loss": 0.2567, + "step": 25920 + }, + { + "epoch": 1.2142689839321685, + "grad_norm": 0.6211319372192632, + "learning_rate": 1.7650324187834984e-06, + "loss": 0.286, + "step": 25921 + }, + { + "epoch": 1.2143158289220968, + "grad_norm": 0.5497244548011299, + "learning_rate": 1.7648511515014663e-06, + "loss": 0.2519, + "step": 25922 + }, + { + "epoch": 1.214362673912025, + "grad_norm": 0.5511780367245778, + "learning_rate": 1.7646698884500308e-06, + "loss": 0.2637, + "step": 25923 + }, + { + "epoch": 1.2144095189019535, + "grad_norm": 0.5320840314964593, + "learning_rate": 1.7644886296302344e-06, + "loss": 0.2426, + "step": 25924 + }, + { + "epoch": 1.2144563638918817, + "grad_norm": 0.5532751658800785, + "learning_rate": 1.7643073750431211e-06, + "loss": 0.2688, + "step": 25925 + }, + { + "epoch": 1.2145032088818102, + "grad_norm": 0.5956598927016842, + "learning_rate": 1.7641261246897346e-06, + "loss": 0.2619, + "step": 25926 + }, + { + "epoch": 1.2145500538717384, + "grad_norm": 0.6184511638806218, + "learning_rate": 1.7639448785711165e-06, + "loss": 0.282, + "step": 25927 + }, + { + "epoch": 1.2145968988616667, + "grad_norm": 0.6130468905983847, + "learning_rate": 1.763763636688312e-06, + "loss": 0.2756, + "step": 25928 + }, + { + "epoch": 1.2146437438515951, + "grad_norm": 0.6430404116765804, + "learning_rate": 1.7635823990423615e-06, + "loss": 0.2956, + "step": 25929 + }, + { + "epoch": 1.2146905888415234, + "grad_norm": 0.6187247172617898, + "learning_rate": 1.7634011656343097e-06, + "loss": 0.2881, + "step": 25930 + }, + { + "epoch": 1.2147374338314516, + "grad_norm": 0.6011957617168767, + "learning_rate": 1.7632199364651986e-06, + "loss": 0.2801, + "step": 25931 + }, + { + "epoch": 1.21478427882138, + "grad_norm": 0.5958050225662547, + "learning_rate": 1.7630387115360715e-06, + "loss": 0.2705, + "step": 25932 + }, + { + "epoch": 1.2148311238113083, + "grad_norm": 0.5692726788678327, + "learning_rate": 1.762857490847972e-06, + "loss": 0.2743, + "step": 25933 + }, + { + "epoch": 1.2148779688012368, + "grad_norm": 0.6256947720983513, + "learning_rate": 1.762676274401943e-06, + "loss": 0.2769, + "step": 25934 + }, + { + "epoch": 1.214924813791165, + "grad_norm": 0.5765815404242461, + "learning_rate": 1.7624950621990256e-06, + "loss": 0.2629, + "step": 25935 + }, + { + "epoch": 1.2149716587810935, + "grad_norm": 0.6091192488754289, + "learning_rate": 1.7623138542402636e-06, + "loss": 0.2818, + "step": 25936 + }, + { + "epoch": 1.2150185037710217, + "grad_norm": 0.5953011750068982, + "learning_rate": 1.7621326505267006e-06, + "loss": 0.273, + "step": 25937 + }, + { + "epoch": 1.21506534876095, + "grad_norm": 0.6076418558023745, + "learning_rate": 1.7619514510593783e-06, + "loss": 0.269, + "step": 25938 + }, + { + "epoch": 1.2151121937508784, + "grad_norm": 0.5789011555001707, + "learning_rate": 1.7617702558393396e-06, + "loss": 0.2676, + "step": 25939 + }, + { + "epoch": 1.2151590387408067, + "grad_norm": 0.575021113380517, + "learning_rate": 1.761589064867629e-06, + "loss": 0.2802, + "step": 25940 + }, + { + "epoch": 1.215205883730735, + "grad_norm": 0.5498863802914772, + "learning_rate": 1.761407878145287e-06, + "loss": 0.2545, + "step": 25941 + }, + { + "epoch": 1.2152527287206634, + "grad_norm": 0.5851337543069867, + "learning_rate": 1.7612266956733564e-06, + "loss": 0.2572, + "step": 25942 + }, + { + "epoch": 1.2152995737105916, + "grad_norm": 0.644593738216295, + "learning_rate": 1.7610455174528806e-06, + "loss": 0.275, + "step": 25943 + }, + { + "epoch": 1.21534641870052, + "grad_norm": 0.5982238816020119, + "learning_rate": 1.7608643434849027e-06, + "loss": 0.2654, + "step": 25944 + }, + { + "epoch": 1.2153932636904483, + "grad_norm": 0.6006433691470121, + "learning_rate": 1.7606831737704641e-06, + "loss": 0.284, + "step": 25945 + }, + { + "epoch": 1.2154401086803766, + "grad_norm": 0.6194927803831995, + "learning_rate": 1.7605020083106089e-06, + "loss": 0.2819, + "step": 25946 + }, + { + "epoch": 1.215486953670305, + "grad_norm": 0.5844895967679464, + "learning_rate": 1.7603208471063783e-06, + "loss": 0.2643, + "step": 25947 + }, + { + "epoch": 1.2155337986602333, + "grad_norm": 0.5735765100246029, + "learning_rate": 1.7601396901588152e-06, + "loss": 0.2682, + "step": 25948 + }, + { + "epoch": 1.2155806436501617, + "grad_norm": 0.5703288328840617, + "learning_rate": 1.759958537468962e-06, + "loss": 0.2695, + "step": 25949 + }, + { + "epoch": 1.21562748864009, + "grad_norm": 0.5651472261231173, + "learning_rate": 1.7597773890378614e-06, + "loss": 0.2593, + "step": 25950 + }, + { + "epoch": 1.2156743336300182, + "grad_norm": 0.565052274376068, + "learning_rate": 1.7595962448665562e-06, + "loss": 0.259, + "step": 25951 + }, + { + "epoch": 1.2157211786199467, + "grad_norm": 0.5634008018270054, + "learning_rate": 1.7594151049560893e-06, + "loss": 0.2629, + "step": 25952 + }, + { + "epoch": 1.215768023609875, + "grad_norm": 0.6037200924317827, + "learning_rate": 1.759233969307501e-06, + "loss": 0.2801, + "step": 25953 + }, + { + "epoch": 1.2158148685998031, + "grad_norm": 0.5989658491645146, + "learning_rate": 1.7590528379218354e-06, + "loss": 0.2656, + "step": 25954 + }, + { + "epoch": 1.2158617135897316, + "grad_norm": 0.6177768261326061, + "learning_rate": 1.758871710800135e-06, + "loss": 0.2789, + "step": 25955 + }, + { + "epoch": 1.2159085585796598, + "grad_norm": 0.6064918788194681, + "learning_rate": 1.7586905879434408e-06, + "loss": 0.2767, + "step": 25956 + }, + { + "epoch": 1.2159554035695883, + "grad_norm": 0.5573564021070276, + "learning_rate": 1.758509469352796e-06, + "loss": 0.267, + "step": 25957 + }, + { + "epoch": 1.2160022485595166, + "grad_norm": 0.5970306649159477, + "learning_rate": 1.758328355029244e-06, + "loss": 0.272, + "step": 25958 + }, + { + "epoch": 1.2160490935494448, + "grad_norm": 0.591726478467628, + "learning_rate": 1.7581472449738255e-06, + "loss": 0.2865, + "step": 25959 + }, + { + "epoch": 1.2160959385393733, + "grad_norm": 0.5738671833976277, + "learning_rate": 1.7579661391875825e-06, + "loss": 0.2752, + "step": 25960 + }, + { + "epoch": 1.2161427835293015, + "grad_norm": 0.5834314642328904, + "learning_rate": 1.7577850376715578e-06, + "loss": 0.2687, + "step": 25961 + }, + { + "epoch": 1.21618962851923, + "grad_norm": 0.5591124195102896, + "learning_rate": 1.757603940426794e-06, + "loss": 0.261, + "step": 25962 + }, + { + "epoch": 1.2162364735091582, + "grad_norm": 0.5662052211474963, + "learning_rate": 1.7574228474543329e-06, + "loss": 0.2857, + "step": 25963 + }, + { + "epoch": 1.2162833184990864, + "grad_norm": 0.6276032672357352, + "learning_rate": 1.7572417587552173e-06, + "loss": 0.282, + "step": 25964 + }, + { + "epoch": 1.216330163489015, + "grad_norm": 0.5892331028419968, + "learning_rate": 1.757060674330488e-06, + "loss": 0.2585, + "step": 25965 + }, + { + "epoch": 1.2163770084789431, + "grad_norm": 0.6028198025202631, + "learning_rate": 1.7568795941811877e-06, + "loss": 0.2817, + "step": 25966 + }, + { + "epoch": 1.2164238534688714, + "grad_norm": 0.5698228030648436, + "learning_rate": 1.7566985183083587e-06, + "loss": 0.2763, + "step": 25967 + }, + { + "epoch": 1.2164706984587998, + "grad_norm": 0.6634020135322548, + "learning_rate": 1.756517446713043e-06, + "loss": 0.3022, + "step": 25968 + }, + { + "epoch": 1.216517543448728, + "grad_norm": 0.6095837830541768, + "learning_rate": 1.7563363793962824e-06, + "loss": 0.2759, + "step": 25969 + }, + { + "epoch": 1.2165643884386566, + "grad_norm": 0.616156789938302, + "learning_rate": 1.7561553163591199e-06, + "loss": 0.2539, + "step": 25970 + }, + { + "epoch": 1.2166112334285848, + "grad_norm": 0.6278179671839953, + "learning_rate": 1.7559742576025954e-06, + "loss": 0.2836, + "step": 25971 + }, + { + "epoch": 1.2166580784185133, + "grad_norm": 0.5393216128483813, + "learning_rate": 1.7557932031277525e-06, + "loss": 0.2583, + "step": 25972 + }, + { + "epoch": 1.2167049234084415, + "grad_norm": 0.6048215855358166, + "learning_rate": 1.7556121529356324e-06, + "loss": 0.2786, + "step": 25973 + }, + { + "epoch": 1.2167517683983697, + "grad_norm": 0.6141746911548127, + "learning_rate": 1.7554311070272772e-06, + "loss": 0.2848, + "step": 25974 + }, + { + "epoch": 1.2167986133882982, + "grad_norm": 0.6166218970464011, + "learning_rate": 1.7552500654037293e-06, + "loss": 0.277, + "step": 25975 + }, + { + "epoch": 1.2168454583782264, + "grad_norm": 0.583167740758436, + "learning_rate": 1.7550690280660307e-06, + "loss": 0.2771, + "step": 25976 + }, + { + "epoch": 1.2168923033681547, + "grad_norm": 0.5544613130987516, + "learning_rate": 1.7548879950152215e-06, + "loss": 0.2604, + "step": 25977 + }, + { + "epoch": 1.2169391483580831, + "grad_norm": 0.5658701034959006, + "learning_rate": 1.7547069662523446e-06, + "loss": 0.253, + "step": 25978 + }, + { + "epoch": 1.2169859933480114, + "grad_norm": 0.6032491004117287, + "learning_rate": 1.7545259417784424e-06, + "loss": 0.2744, + "step": 25979 + }, + { + "epoch": 1.2170328383379398, + "grad_norm": 0.5988308771325724, + "learning_rate": 1.7543449215945554e-06, + "loss": 0.2731, + "step": 25980 + }, + { + "epoch": 1.217079683327868, + "grad_norm": 0.6290633586767145, + "learning_rate": 1.754163905701726e-06, + "loss": 0.2769, + "step": 25981 + }, + { + "epoch": 1.2171265283177963, + "grad_norm": 0.576157987323093, + "learning_rate": 1.7539828941009962e-06, + "loss": 0.2854, + "step": 25982 + }, + { + "epoch": 1.2171733733077248, + "grad_norm": 0.6437751863564656, + "learning_rate": 1.753801886793408e-06, + "loss": 0.2904, + "step": 25983 + }, + { + "epoch": 1.217220218297653, + "grad_norm": 0.5677207191477317, + "learning_rate": 1.7536208837800018e-06, + "loss": 0.2685, + "step": 25984 + }, + { + "epoch": 1.2172670632875815, + "grad_norm": 0.6507503075812017, + "learning_rate": 1.7534398850618192e-06, + "loss": 0.3086, + "step": 25985 + }, + { + "epoch": 1.2173139082775097, + "grad_norm": 0.6068254620374407, + "learning_rate": 1.7532588906399035e-06, + "loss": 0.2698, + "step": 25986 + }, + { + "epoch": 1.217360753267438, + "grad_norm": 0.5825878408576443, + "learning_rate": 1.7530779005152943e-06, + "loss": 0.2674, + "step": 25987 + }, + { + "epoch": 1.2174075982573664, + "grad_norm": 0.6269938133197055, + "learning_rate": 1.752896914689034e-06, + "loss": 0.2916, + "step": 25988 + }, + { + "epoch": 1.2174544432472947, + "grad_norm": 0.6500784015271419, + "learning_rate": 1.7527159331621652e-06, + "loss": 0.2758, + "step": 25989 + }, + { + "epoch": 1.217501288237223, + "grad_norm": 0.5831599006644594, + "learning_rate": 1.752534955935728e-06, + "loss": 0.2764, + "step": 25990 + }, + { + "epoch": 1.2175481332271514, + "grad_norm": 0.6158237592900563, + "learning_rate": 1.7523539830107639e-06, + "loss": 0.2771, + "step": 25991 + }, + { + "epoch": 1.2175949782170796, + "grad_norm": 0.6699438952401415, + "learning_rate": 1.7521730143883143e-06, + "loss": 0.283, + "step": 25992 + }, + { + "epoch": 1.217641823207008, + "grad_norm": 0.5738852740840359, + "learning_rate": 1.7519920500694218e-06, + "loss": 0.2587, + "step": 25993 + }, + { + "epoch": 1.2176886681969363, + "grad_norm": 0.5724045276549338, + "learning_rate": 1.7518110900551267e-06, + "loss": 0.2709, + "step": 25994 + }, + { + "epoch": 1.2177355131868646, + "grad_norm": 0.5419650259750068, + "learning_rate": 1.7516301343464713e-06, + "loss": 0.2585, + "step": 25995 + }, + { + "epoch": 1.217782358176793, + "grad_norm": 0.5865904818493409, + "learning_rate": 1.7514491829444957e-06, + "loss": 0.2612, + "step": 25996 + }, + { + "epoch": 1.2178292031667213, + "grad_norm": 0.6097542776060367, + "learning_rate": 1.7512682358502425e-06, + "loss": 0.2766, + "step": 25997 + }, + { + "epoch": 1.2178760481566497, + "grad_norm": 0.6028541318072985, + "learning_rate": 1.7510872930647517e-06, + "loss": 0.2781, + "step": 25998 + }, + { + "epoch": 1.217922893146578, + "grad_norm": 0.5797134246002382, + "learning_rate": 1.7509063545890653e-06, + "loss": 0.2701, + "step": 25999 + }, + { + "epoch": 1.2179697381365062, + "grad_norm": 0.6154876537137826, + "learning_rate": 1.7507254204242251e-06, + "loss": 0.2779, + "step": 26000 + }, + { + "epoch": 1.2180165831264347, + "grad_norm": 0.6673873299669136, + "learning_rate": 1.7505444905712723e-06, + "loss": 0.2776, + "step": 26001 + }, + { + "epoch": 1.218063428116363, + "grad_norm": 0.6543067940160323, + "learning_rate": 1.7503635650312467e-06, + "loss": 0.2946, + "step": 26002 + }, + { + "epoch": 1.2181102731062912, + "grad_norm": 0.5693267499521442, + "learning_rate": 1.75018264380519e-06, + "loss": 0.2641, + "step": 26003 + }, + { + "epoch": 1.2181571180962196, + "grad_norm": 0.5381830763246981, + "learning_rate": 1.7500017268941446e-06, + "loss": 0.2825, + "step": 26004 + }, + { + "epoch": 1.2182039630861479, + "grad_norm": 0.5844535173503919, + "learning_rate": 1.74982081429915e-06, + "loss": 0.2654, + "step": 26005 + }, + { + "epoch": 1.2182508080760763, + "grad_norm": 0.5922052457171687, + "learning_rate": 1.7496399060212483e-06, + "loss": 0.2825, + "step": 26006 + }, + { + "epoch": 1.2182976530660046, + "grad_norm": 0.6274637078128609, + "learning_rate": 1.7494590020614813e-06, + "loss": 0.2848, + "step": 26007 + }, + { + "epoch": 1.218344498055933, + "grad_norm": 0.615176220152075, + "learning_rate": 1.7492781024208884e-06, + "loss": 0.2897, + "step": 26008 + }, + { + "epoch": 1.2183913430458613, + "grad_norm": 0.6058177447625297, + "learning_rate": 1.749097207100511e-06, + "loss": 0.278, + "step": 26009 + }, + { + "epoch": 1.2184381880357895, + "grad_norm": 0.5574570537862085, + "learning_rate": 1.7489163161013905e-06, + "loss": 0.2683, + "step": 26010 + }, + { + "epoch": 1.218485033025718, + "grad_norm": 0.5921455282753266, + "learning_rate": 1.7487354294245685e-06, + "loss": 0.2828, + "step": 26011 + }, + { + "epoch": 1.2185318780156462, + "grad_norm": 0.5978578581907587, + "learning_rate": 1.7485545470710841e-06, + "loss": 0.2872, + "step": 26012 + }, + { + "epoch": 1.2185787230055745, + "grad_norm": 0.5466909249036748, + "learning_rate": 1.7483736690419812e-06, + "loss": 0.2607, + "step": 26013 + }, + { + "epoch": 1.218625567995503, + "grad_norm": 0.6135266468729494, + "learning_rate": 1.7481927953382974e-06, + "loss": 0.289, + "step": 26014 + }, + { + "epoch": 1.2186724129854312, + "grad_norm": 0.6017342179876533, + "learning_rate": 1.7480119259610755e-06, + "loss": 0.2887, + "step": 26015 + }, + { + "epoch": 1.2187192579753596, + "grad_norm": 0.6446447682461541, + "learning_rate": 1.7478310609113558e-06, + "loss": 0.2937, + "step": 26016 + }, + { + "epoch": 1.2187661029652879, + "grad_norm": 0.6089351200659985, + "learning_rate": 1.7476502001901799e-06, + "loss": 0.2695, + "step": 26017 + }, + { + "epoch": 1.218812947955216, + "grad_norm": 0.556787849667404, + "learning_rate": 1.7474693437985874e-06, + "loss": 0.2657, + "step": 26018 + }, + { + "epoch": 1.2188597929451446, + "grad_norm": 0.5730532375167555, + "learning_rate": 1.7472884917376205e-06, + "loss": 0.2566, + "step": 26019 + }, + { + "epoch": 1.2189066379350728, + "grad_norm": 0.5701409475707367, + "learning_rate": 1.7471076440083182e-06, + "loss": 0.2613, + "step": 26020 + }, + { + "epoch": 1.2189534829250013, + "grad_norm": 0.6178851956768098, + "learning_rate": 1.746926800611723e-06, + "loss": 0.2924, + "step": 26021 + }, + { + "epoch": 1.2190003279149295, + "grad_norm": 0.6255727503955724, + "learning_rate": 1.7467459615488737e-06, + "loss": 0.2654, + "step": 26022 + }, + { + "epoch": 1.2190471729048578, + "grad_norm": 0.5732867800922107, + "learning_rate": 1.7465651268208123e-06, + "loss": 0.2679, + "step": 26023 + }, + { + "epoch": 1.2190940178947862, + "grad_norm": 0.5684235062086865, + "learning_rate": 1.74638429642858e-06, + "loss": 0.2694, + "step": 26024 + }, + { + "epoch": 1.2191408628847145, + "grad_norm": 0.5839267926901163, + "learning_rate": 1.746203470373217e-06, + "loss": 0.2675, + "step": 26025 + }, + { + "epoch": 1.2191877078746427, + "grad_norm": 0.582492772156586, + "learning_rate": 1.7460226486557624e-06, + "loss": 0.2746, + "step": 26026 + }, + { + "epoch": 1.2192345528645712, + "grad_norm": 0.5634047456926781, + "learning_rate": 1.745841831277258e-06, + "loss": 0.2681, + "step": 26027 + }, + { + "epoch": 1.2192813978544994, + "grad_norm": 0.6319239627496914, + "learning_rate": 1.745661018238745e-06, + "loss": 0.2744, + "step": 26028 + }, + { + "epoch": 1.2193282428444279, + "grad_norm": 0.5979102590978349, + "learning_rate": 1.7454802095412627e-06, + "loss": 0.2862, + "step": 26029 + }, + { + "epoch": 1.219375087834356, + "grad_norm": 0.5964385309997723, + "learning_rate": 1.7452994051858518e-06, + "loss": 0.2663, + "step": 26030 + }, + { + "epoch": 1.2194219328242843, + "grad_norm": 0.6068315941401972, + "learning_rate": 1.7451186051735548e-06, + "loss": 0.292, + "step": 26031 + }, + { + "epoch": 1.2194687778142128, + "grad_norm": 0.6345206448580034, + "learning_rate": 1.7449378095054092e-06, + "loss": 0.3035, + "step": 26032 + }, + { + "epoch": 1.219515622804141, + "grad_norm": 0.5864351090169142, + "learning_rate": 1.744757018182457e-06, + "loss": 0.2645, + "step": 26033 + }, + { + "epoch": 1.2195624677940695, + "grad_norm": 0.6629359546090334, + "learning_rate": 1.744576231205738e-06, + "loss": 0.3097, + "step": 26034 + }, + { + "epoch": 1.2196093127839978, + "grad_norm": 0.549960232788721, + "learning_rate": 1.7443954485762932e-06, + "loss": 0.2543, + "step": 26035 + }, + { + "epoch": 1.219656157773926, + "grad_norm": 0.5772819162917726, + "learning_rate": 1.7442146702951624e-06, + "loss": 0.2664, + "step": 26036 + }, + { + "epoch": 1.2197030027638545, + "grad_norm": 0.6035912057652106, + "learning_rate": 1.7440338963633874e-06, + "loss": 0.2751, + "step": 26037 + }, + { + "epoch": 1.2197498477537827, + "grad_norm": 0.569148158021884, + "learning_rate": 1.7438531267820058e-06, + "loss": 0.2582, + "step": 26038 + }, + { + "epoch": 1.219796692743711, + "grad_norm": 0.5397430884456861, + "learning_rate": 1.7436723615520604e-06, + "loss": 0.2484, + "step": 26039 + }, + { + "epoch": 1.2198435377336394, + "grad_norm": 0.6231248396877014, + "learning_rate": 1.7434916006745897e-06, + "loss": 0.2773, + "step": 26040 + }, + { + "epoch": 1.2198903827235676, + "grad_norm": 0.6038278490121461, + "learning_rate": 1.7433108441506347e-06, + "loss": 0.2716, + "step": 26041 + }, + { + "epoch": 1.219937227713496, + "grad_norm": 0.5903385240192448, + "learning_rate": 1.7431300919812363e-06, + "loss": 0.2724, + "step": 26042 + }, + { + "epoch": 1.2199840727034243, + "grad_norm": 0.6195180891957092, + "learning_rate": 1.7429493441674344e-06, + "loss": 0.2799, + "step": 26043 + }, + { + "epoch": 1.2200309176933528, + "grad_norm": 0.6080376709876308, + "learning_rate": 1.742768600710268e-06, + "loss": 0.2756, + "step": 26044 + }, + { + "epoch": 1.220077762683281, + "grad_norm": 0.6237354427432007, + "learning_rate": 1.7425878616107774e-06, + "loss": 0.2801, + "step": 26045 + }, + { + "epoch": 1.2201246076732093, + "grad_norm": 0.5932828919597548, + "learning_rate": 1.7424071268700043e-06, + "loss": 0.2864, + "step": 26046 + }, + { + "epoch": 1.2201714526631378, + "grad_norm": 0.6303306974204309, + "learning_rate": 1.7422263964889869e-06, + "loss": 0.2774, + "step": 26047 + }, + { + "epoch": 1.220218297653066, + "grad_norm": 0.5267055145857392, + "learning_rate": 1.7420456704687663e-06, + "loss": 0.2735, + "step": 26048 + }, + { + "epoch": 1.2202651426429942, + "grad_norm": 0.6018557161263128, + "learning_rate": 1.7418649488103828e-06, + "loss": 0.2732, + "step": 26049 + }, + { + "epoch": 1.2203119876329227, + "grad_norm": 0.5719018832985965, + "learning_rate": 1.7416842315148767e-06, + "loss": 0.2632, + "step": 26050 + }, + { + "epoch": 1.220358832622851, + "grad_norm": 0.5698012748925645, + "learning_rate": 1.741503518583286e-06, + "loss": 0.2666, + "step": 26051 + }, + { + "epoch": 1.2204056776127794, + "grad_norm": 0.631099368876178, + "learning_rate": 1.7413228100166517e-06, + "loss": 0.2692, + "step": 26052 + }, + { + "epoch": 1.2204525226027076, + "grad_norm": 0.5620591320195849, + "learning_rate": 1.741142105816015e-06, + "loss": 0.2776, + "step": 26053 + }, + { + "epoch": 1.2204993675926359, + "grad_norm": 0.5807439745016115, + "learning_rate": 1.740961405982414e-06, + "loss": 0.2645, + "step": 26054 + }, + { + "epoch": 1.2205462125825643, + "grad_norm": 0.6057032007100354, + "learning_rate": 1.7407807105168898e-06, + "loss": 0.269, + "step": 26055 + }, + { + "epoch": 1.2205930575724926, + "grad_norm": 0.5992698855151988, + "learning_rate": 1.7406000194204816e-06, + "loss": 0.2536, + "step": 26056 + }, + { + "epoch": 1.220639902562421, + "grad_norm": 0.5944876299841414, + "learning_rate": 1.7404193326942298e-06, + "loss": 0.2769, + "step": 26057 + }, + { + "epoch": 1.2206867475523493, + "grad_norm": 0.5964831372647115, + "learning_rate": 1.7402386503391733e-06, + "loss": 0.2631, + "step": 26058 + }, + { + "epoch": 1.2207335925422775, + "grad_norm": 0.6133166729500732, + "learning_rate": 1.7400579723563526e-06, + "loss": 0.2864, + "step": 26059 + }, + { + "epoch": 1.220780437532206, + "grad_norm": 0.6321366221760154, + "learning_rate": 1.7398772987468068e-06, + "loss": 0.2975, + "step": 26060 + }, + { + "epoch": 1.2208272825221342, + "grad_norm": 0.5319718665434336, + "learning_rate": 1.7396966295115763e-06, + "loss": 0.2504, + "step": 26061 + }, + { + "epoch": 1.2208741275120625, + "grad_norm": 0.5813200278662313, + "learning_rate": 1.7395159646517016e-06, + "loss": 0.2753, + "step": 26062 + }, + { + "epoch": 1.220920972501991, + "grad_norm": 0.6019158798445541, + "learning_rate": 1.7393353041682207e-06, + "loss": 0.2598, + "step": 26063 + }, + { + "epoch": 1.2209678174919192, + "grad_norm": 0.5822167310688432, + "learning_rate": 1.7391546480621735e-06, + "loss": 0.2625, + "step": 26064 + }, + { + "epoch": 1.2210146624818476, + "grad_norm": 0.5843904732927586, + "learning_rate": 1.7389739963346004e-06, + "loss": 0.2683, + "step": 26065 + }, + { + "epoch": 1.2210615074717759, + "grad_norm": 0.6687805895326835, + "learning_rate": 1.7387933489865405e-06, + "loss": 0.313, + "step": 26066 + }, + { + "epoch": 1.2211083524617041, + "grad_norm": 0.6109938812944189, + "learning_rate": 1.7386127060190334e-06, + "loss": 0.2835, + "step": 26067 + }, + { + "epoch": 1.2211551974516326, + "grad_norm": 0.5754823697197483, + "learning_rate": 1.73843206743312e-06, + "loss": 0.2729, + "step": 26068 + }, + { + "epoch": 1.2212020424415608, + "grad_norm": 0.576022857699867, + "learning_rate": 1.7382514332298376e-06, + "loss": 0.2677, + "step": 26069 + }, + { + "epoch": 1.2212488874314893, + "grad_norm": 0.5911510512027424, + "learning_rate": 1.738070803410227e-06, + "loss": 0.2776, + "step": 26070 + }, + { + "epoch": 1.2212957324214175, + "grad_norm": 0.547029835324453, + "learning_rate": 1.737890177975327e-06, + "loss": 0.2643, + "step": 26071 + }, + { + "epoch": 1.2213425774113458, + "grad_norm": 0.5932480712878335, + "learning_rate": 1.7377095569261776e-06, + "loss": 0.2763, + "step": 26072 + }, + { + "epoch": 1.2213894224012742, + "grad_norm": 0.594958151997035, + "learning_rate": 1.7375289402638184e-06, + "loss": 0.2717, + "step": 26073 + }, + { + "epoch": 1.2214362673912025, + "grad_norm": 0.61394843383219, + "learning_rate": 1.7373483279892895e-06, + "loss": 0.2879, + "step": 26074 + }, + { + "epoch": 1.2214831123811307, + "grad_norm": 0.606796812784005, + "learning_rate": 1.7371677201036278e-06, + "loss": 0.2706, + "step": 26075 + }, + { + "epoch": 1.2215299573710592, + "grad_norm": 0.5632389774427602, + "learning_rate": 1.7369871166078742e-06, + "loss": 0.2634, + "step": 26076 + }, + { + "epoch": 1.2215768023609874, + "grad_norm": 0.59373916226781, + "learning_rate": 1.7368065175030684e-06, + "loss": 0.2671, + "step": 26077 + }, + { + "epoch": 1.2216236473509159, + "grad_norm": 0.5677944246009328, + "learning_rate": 1.7366259227902487e-06, + "loss": 0.2646, + "step": 26078 + }, + { + "epoch": 1.2216704923408441, + "grad_norm": 0.5410197597201001, + "learning_rate": 1.7364453324704556e-06, + "loss": 0.2612, + "step": 26079 + }, + { + "epoch": 1.2217173373307726, + "grad_norm": 0.5960062926691773, + "learning_rate": 1.736264746544728e-06, + "loss": 0.267, + "step": 26080 + }, + { + "epoch": 1.2217641823207008, + "grad_norm": 0.6321962311403814, + "learning_rate": 1.7360841650141046e-06, + "loss": 0.2943, + "step": 26081 + }, + { + "epoch": 1.221811027310629, + "grad_norm": 0.5930929399661916, + "learning_rate": 1.7359035878796244e-06, + "loss": 0.2764, + "step": 26082 + }, + { + "epoch": 1.2218578723005575, + "grad_norm": 0.6182612701720286, + "learning_rate": 1.7357230151423269e-06, + "loss": 0.2678, + "step": 26083 + }, + { + "epoch": 1.2219047172904858, + "grad_norm": 0.6513003353464949, + "learning_rate": 1.7355424468032517e-06, + "loss": 0.2829, + "step": 26084 + }, + { + "epoch": 1.221951562280414, + "grad_norm": 0.5883931000068057, + "learning_rate": 1.7353618828634372e-06, + "loss": 0.2849, + "step": 26085 + }, + { + "epoch": 1.2219984072703425, + "grad_norm": 0.5983888192064767, + "learning_rate": 1.735181323323924e-06, + "loss": 0.2506, + "step": 26086 + }, + { + "epoch": 1.2220452522602707, + "grad_norm": 0.6137138790727394, + "learning_rate": 1.735000768185749e-06, + "loss": 0.2726, + "step": 26087 + }, + { + "epoch": 1.2220920972501992, + "grad_norm": 0.6049112343493653, + "learning_rate": 1.7348202174499529e-06, + "loss": 0.2699, + "step": 26088 + }, + { + "epoch": 1.2221389422401274, + "grad_norm": 0.5577445822466465, + "learning_rate": 1.7346396711175734e-06, + "loss": 0.266, + "step": 26089 + }, + { + "epoch": 1.2221857872300557, + "grad_norm": 0.5928668832473202, + "learning_rate": 1.7344591291896504e-06, + "loss": 0.2708, + "step": 26090 + }, + { + "epoch": 1.2222326322199841, + "grad_norm": 0.577856733219101, + "learning_rate": 1.7342785916672231e-06, + "loss": 0.2768, + "step": 26091 + }, + { + "epoch": 1.2222794772099124, + "grad_norm": 0.5792311196410981, + "learning_rate": 1.7340980585513306e-06, + "loss": 0.2696, + "step": 26092 + }, + { + "epoch": 1.2223263221998408, + "grad_norm": 0.5975954933496619, + "learning_rate": 1.7339175298430102e-06, + "loss": 0.2715, + "step": 26093 + }, + { + "epoch": 1.222373167189769, + "grad_norm": 0.6149395956139196, + "learning_rate": 1.733737005543302e-06, + "loss": 0.2805, + "step": 26094 + }, + { + "epoch": 1.2224200121796973, + "grad_norm": 0.5890894378994781, + "learning_rate": 1.733556485653245e-06, + "loss": 0.2815, + "step": 26095 + }, + { + "epoch": 1.2224668571696258, + "grad_norm": 0.6266792422653861, + "learning_rate": 1.7333759701738775e-06, + "loss": 0.28, + "step": 26096 + }, + { + "epoch": 1.222513702159554, + "grad_norm": 0.6286790955515754, + "learning_rate": 1.7331954591062386e-06, + "loss": 0.28, + "step": 26097 + }, + { + "epoch": 1.2225605471494823, + "grad_norm": 0.6556546166940866, + "learning_rate": 1.7330149524513682e-06, + "loss": 0.2888, + "step": 26098 + }, + { + "epoch": 1.2226073921394107, + "grad_norm": 0.6048219871406838, + "learning_rate": 1.7328344502103034e-06, + "loss": 0.2756, + "step": 26099 + }, + { + "epoch": 1.222654237129339, + "grad_norm": 0.5953018863474261, + "learning_rate": 1.732653952384083e-06, + "loss": 0.2741, + "step": 26100 + }, + { + "epoch": 1.2227010821192674, + "grad_norm": 0.6049052506599802, + "learning_rate": 1.7324734589737457e-06, + "loss": 0.2814, + "step": 26101 + }, + { + "epoch": 1.2227479271091957, + "grad_norm": 0.6201632688827275, + "learning_rate": 1.732292969980332e-06, + "loss": 0.2924, + "step": 26102 + }, + { + "epoch": 1.222794772099124, + "grad_norm": 0.5691235377866397, + "learning_rate": 1.7321124854048786e-06, + "loss": 0.2759, + "step": 26103 + }, + { + "epoch": 1.2228416170890524, + "grad_norm": 0.5793533964876255, + "learning_rate": 1.7319320052484257e-06, + "loss": 0.2774, + "step": 26104 + }, + { + "epoch": 1.2228884620789806, + "grad_norm": 0.6189364423250431, + "learning_rate": 1.7317515295120102e-06, + "loss": 0.2912, + "step": 26105 + }, + { + "epoch": 1.222935307068909, + "grad_norm": 0.5443263479684236, + "learning_rate": 1.7315710581966719e-06, + "loss": 0.2695, + "step": 26106 + }, + { + "epoch": 1.2229821520588373, + "grad_norm": 0.6168091251530761, + "learning_rate": 1.7313905913034484e-06, + "loss": 0.284, + "step": 26107 + }, + { + "epoch": 1.2230289970487656, + "grad_norm": 0.5781369846007484, + "learning_rate": 1.73121012883338e-06, + "loss": 0.2753, + "step": 26108 + }, + { + "epoch": 1.223075842038694, + "grad_norm": 0.5485476973904571, + "learning_rate": 1.731029670787503e-06, + "loss": 0.2764, + "step": 26109 + }, + { + "epoch": 1.2231226870286223, + "grad_norm": 0.6753315376867879, + "learning_rate": 1.730849217166858e-06, + "loss": 0.2774, + "step": 26110 + }, + { + "epoch": 1.2231695320185505, + "grad_norm": 0.6314783445103602, + "learning_rate": 1.7306687679724816e-06, + "loss": 0.2812, + "step": 26111 + }, + { + "epoch": 1.223216377008479, + "grad_norm": 0.5965417703424409, + "learning_rate": 1.7304883232054136e-06, + "loss": 0.2741, + "step": 26112 + }, + { + "epoch": 1.2232632219984072, + "grad_norm": 0.6139547858894436, + "learning_rate": 1.7303078828666913e-06, + "loss": 0.2892, + "step": 26113 + }, + { + "epoch": 1.2233100669883357, + "grad_norm": 0.6723097641963008, + "learning_rate": 1.7301274469573537e-06, + "loss": 0.3018, + "step": 26114 + }, + { + "epoch": 1.223356911978264, + "grad_norm": 0.6191072524031505, + "learning_rate": 1.7299470154784397e-06, + "loss": 0.2856, + "step": 26115 + }, + { + "epoch": 1.2234037569681924, + "grad_norm": 0.6323140047536019, + "learning_rate": 1.7297665884309867e-06, + "loss": 0.2788, + "step": 26116 + }, + { + "epoch": 1.2234506019581206, + "grad_norm": 0.6497856368118344, + "learning_rate": 1.729586165816034e-06, + "loss": 0.3009, + "step": 26117 + }, + { + "epoch": 1.2234974469480489, + "grad_norm": 0.5936897024654773, + "learning_rate": 1.7294057476346188e-06, + "loss": 0.2779, + "step": 26118 + }, + { + "epoch": 1.2235442919379773, + "grad_norm": 0.7091453188329289, + "learning_rate": 1.7292253338877799e-06, + "loss": 0.286, + "step": 26119 + }, + { + "epoch": 1.2235911369279056, + "grad_norm": 0.5739043649903267, + "learning_rate": 1.729044924576555e-06, + "loss": 0.2686, + "step": 26120 + }, + { + "epoch": 1.2236379819178338, + "grad_norm": 0.6130642335261937, + "learning_rate": 1.728864519701983e-06, + "loss": 0.2844, + "step": 26121 + }, + { + "epoch": 1.2236848269077623, + "grad_norm": 0.5714051891948773, + "learning_rate": 1.7286841192651022e-06, + "loss": 0.2749, + "step": 26122 + }, + { + "epoch": 1.2237316718976905, + "grad_norm": 0.5795720412999563, + "learning_rate": 1.728503723266951e-06, + "loss": 0.2605, + "step": 26123 + }, + { + "epoch": 1.223778516887619, + "grad_norm": 0.5882551853685772, + "learning_rate": 1.728323331708566e-06, + "loss": 0.2769, + "step": 26124 + }, + { + "epoch": 1.2238253618775472, + "grad_norm": 0.5625209514950622, + "learning_rate": 1.7281429445909865e-06, + "loss": 0.271, + "step": 26125 + }, + { + "epoch": 1.2238722068674754, + "grad_norm": 0.6032725908215312, + "learning_rate": 1.7279625619152505e-06, + "loss": 0.274, + "step": 26126 + }, + { + "epoch": 1.223919051857404, + "grad_norm": 0.5882190237205654, + "learning_rate": 1.7277821836823954e-06, + "loss": 0.268, + "step": 26127 + }, + { + "epoch": 1.2239658968473321, + "grad_norm": 0.6176965698127627, + "learning_rate": 1.72760180989346e-06, + "loss": 0.2772, + "step": 26128 + }, + { + "epoch": 1.2240127418372606, + "grad_norm": 0.6139977150572598, + "learning_rate": 1.7274214405494826e-06, + "loss": 0.2755, + "step": 26129 + }, + { + "epoch": 1.2240595868271889, + "grad_norm": 0.6366898467067238, + "learning_rate": 1.7272410756515007e-06, + "loss": 0.2757, + "step": 26130 + }, + { + "epoch": 1.224106431817117, + "grad_norm": 0.5792322468121037, + "learning_rate": 1.7270607152005514e-06, + "loss": 0.2792, + "step": 26131 + }, + { + "epoch": 1.2241532768070456, + "grad_norm": 0.611144825600717, + "learning_rate": 1.7268803591976735e-06, + "loss": 0.2845, + "step": 26132 + }, + { + "epoch": 1.2242001217969738, + "grad_norm": 0.6040165652335414, + "learning_rate": 1.726700007643905e-06, + "loss": 0.2734, + "step": 26133 + }, + { + "epoch": 1.224246966786902, + "grad_norm": 0.5733743164978196, + "learning_rate": 1.7265196605402834e-06, + "loss": 0.2635, + "step": 26134 + }, + { + "epoch": 1.2242938117768305, + "grad_norm": 0.5987068690612821, + "learning_rate": 1.726339317887848e-06, + "loss": 0.2795, + "step": 26135 + }, + { + "epoch": 1.2243406567667587, + "grad_norm": 0.6268566253230896, + "learning_rate": 1.7261589796876339e-06, + "loss": 0.305, + "step": 26136 + }, + { + "epoch": 1.2243875017566872, + "grad_norm": 0.6799384250577549, + "learning_rate": 1.7259786459406807e-06, + "loss": 0.2863, + "step": 26137 + }, + { + "epoch": 1.2244343467466154, + "grad_norm": 0.6274844023961169, + "learning_rate": 1.7257983166480258e-06, + "loss": 0.276, + "step": 26138 + }, + { + "epoch": 1.2244811917365437, + "grad_norm": 0.5799823265846459, + "learning_rate": 1.7256179918107069e-06, + "loss": 0.2709, + "step": 26139 + }, + { + "epoch": 1.2245280367264721, + "grad_norm": 0.6260916514476941, + "learning_rate": 1.7254376714297625e-06, + "loss": 0.2961, + "step": 26140 + }, + { + "epoch": 1.2245748817164004, + "grad_norm": 0.5962908009931028, + "learning_rate": 1.72525735550623e-06, + "loss": 0.275, + "step": 26141 + }, + { + "epoch": 1.2246217267063289, + "grad_norm": 0.5904896212792329, + "learning_rate": 1.7250770440411454e-06, + "loss": 0.2561, + "step": 26142 + }, + { + "epoch": 1.224668571696257, + "grad_norm": 0.5769562038491214, + "learning_rate": 1.724896737035548e-06, + "loss": 0.2878, + "step": 26143 + }, + { + "epoch": 1.2247154166861853, + "grad_norm": 0.5447008412305546, + "learning_rate": 1.7247164344904754e-06, + "loss": 0.2559, + "step": 26144 + }, + { + "epoch": 1.2247622616761138, + "grad_norm": 0.548747848365307, + "learning_rate": 1.7245361364069644e-06, + "loss": 0.2489, + "step": 26145 + }, + { + "epoch": 1.224809106666042, + "grad_norm": 0.5932326781996865, + "learning_rate": 1.724355842786053e-06, + "loss": 0.2838, + "step": 26146 + }, + { + "epoch": 1.2248559516559703, + "grad_norm": 0.6029765223334512, + "learning_rate": 1.7241755536287795e-06, + "loss": 0.2772, + "step": 26147 + }, + { + "epoch": 1.2249027966458987, + "grad_norm": 0.5526298366086472, + "learning_rate": 1.7239952689361807e-06, + "loss": 0.2758, + "step": 26148 + }, + { + "epoch": 1.224949641635827, + "grad_norm": 0.6208102424175297, + "learning_rate": 1.7238149887092936e-06, + "loss": 0.2803, + "step": 26149 + }, + { + "epoch": 1.2249964866257554, + "grad_norm": 0.5533576544783478, + "learning_rate": 1.723634712949156e-06, + "loss": 0.2634, + "step": 26150 + }, + { + "epoch": 1.2250433316156837, + "grad_norm": 0.5901455820938234, + "learning_rate": 1.7234544416568058e-06, + "loss": 0.2828, + "step": 26151 + }, + { + "epoch": 1.2250901766056121, + "grad_norm": 0.5895242131425235, + "learning_rate": 1.7232741748332799e-06, + "loss": 0.2995, + "step": 26152 + }, + { + "epoch": 1.2251370215955404, + "grad_norm": 0.6372475986456131, + "learning_rate": 1.7230939124796165e-06, + "loss": 0.2911, + "step": 26153 + }, + { + "epoch": 1.2251838665854686, + "grad_norm": 0.5957367602777027, + "learning_rate": 1.7229136545968516e-06, + "loss": 0.2665, + "step": 26154 + }, + { + "epoch": 1.225230711575397, + "grad_norm": 0.5448530550010983, + "learning_rate": 1.7227334011860239e-06, + "loss": 0.2636, + "step": 26155 + }, + { + "epoch": 1.2252775565653253, + "grad_norm": 0.5988050926551179, + "learning_rate": 1.7225531522481696e-06, + "loss": 0.284, + "step": 26156 + }, + { + "epoch": 1.2253244015552536, + "grad_norm": 0.5819564390507836, + "learning_rate": 1.7223729077843268e-06, + "loss": 0.268, + "step": 26157 + }, + { + "epoch": 1.225371246545182, + "grad_norm": 0.5912750098782839, + "learning_rate": 1.722192667795532e-06, + "loss": 0.2752, + "step": 26158 + }, + { + "epoch": 1.2254180915351103, + "grad_norm": 0.5777281270255112, + "learning_rate": 1.722012432282824e-06, + "loss": 0.2666, + "step": 26159 + }, + { + "epoch": 1.2254649365250387, + "grad_norm": 0.6689772449103848, + "learning_rate": 1.721832201247238e-06, + "loss": 0.3112, + "step": 26160 + }, + { + "epoch": 1.225511781514967, + "grad_norm": 0.600672092724866, + "learning_rate": 1.7216519746898124e-06, + "loss": 0.3055, + "step": 26161 + }, + { + "epoch": 1.2255586265048952, + "grad_norm": 0.5916625395823376, + "learning_rate": 1.7214717526115838e-06, + "loss": 0.274, + "step": 26162 + }, + { + "epoch": 1.2256054714948237, + "grad_norm": 0.5969124342024791, + "learning_rate": 1.7212915350135894e-06, + "loss": 0.2824, + "step": 26163 + }, + { + "epoch": 1.225652316484752, + "grad_norm": 0.6058559061754609, + "learning_rate": 1.7211113218968673e-06, + "loss": 0.2866, + "step": 26164 + }, + { + "epoch": 1.2256991614746804, + "grad_norm": 0.5892884806485108, + "learning_rate": 1.720931113262454e-06, + "loss": 0.2793, + "step": 26165 + }, + { + "epoch": 1.2257460064646086, + "grad_norm": 0.5804886634211631, + "learning_rate": 1.720750909111385e-06, + "loss": 0.2771, + "step": 26166 + }, + { + "epoch": 1.2257928514545369, + "grad_norm": 0.5745565267574974, + "learning_rate": 1.7205707094446991e-06, + "loss": 0.2739, + "step": 26167 + }, + { + "epoch": 1.2258396964444653, + "grad_norm": 0.5876748754387379, + "learning_rate": 1.7203905142634334e-06, + "loss": 0.2642, + "step": 26168 + }, + { + "epoch": 1.2258865414343936, + "grad_norm": 0.6067512236806797, + "learning_rate": 1.7202103235686235e-06, + "loss": 0.2693, + "step": 26169 + }, + { + "epoch": 1.2259333864243218, + "grad_norm": 0.5883646837972769, + "learning_rate": 1.7200301373613076e-06, + "loss": 0.2628, + "step": 26170 + }, + { + "epoch": 1.2259802314142503, + "grad_norm": 0.591403508141441, + "learning_rate": 1.719849955642523e-06, + "loss": 0.2827, + "step": 26171 + }, + { + "epoch": 1.2260270764041785, + "grad_norm": 0.632004130091583, + "learning_rate": 1.7196697784133054e-06, + "loss": 0.2728, + "step": 26172 + }, + { + "epoch": 1.226073921394107, + "grad_norm": 0.6262546639863673, + "learning_rate": 1.7194896056746915e-06, + "loss": 0.2753, + "step": 26173 + }, + { + "epoch": 1.2261207663840352, + "grad_norm": 0.600339230990445, + "learning_rate": 1.7193094374277186e-06, + "loss": 0.2824, + "step": 26174 + }, + { + "epoch": 1.2261676113739635, + "grad_norm": 0.60022987426717, + "learning_rate": 1.7191292736734244e-06, + "loss": 0.2666, + "step": 26175 + }, + { + "epoch": 1.226214456363892, + "grad_norm": 0.5852205333626828, + "learning_rate": 1.7189491144128445e-06, + "loss": 0.2768, + "step": 26176 + }, + { + "epoch": 1.2262613013538202, + "grad_norm": 0.5939352222862839, + "learning_rate": 1.7187689596470172e-06, + "loss": 0.2722, + "step": 26177 + }, + { + "epoch": 1.2263081463437486, + "grad_norm": 0.6351773270557831, + "learning_rate": 1.718588809376977e-06, + "loss": 0.2864, + "step": 26178 + }, + { + "epoch": 1.2263549913336769, + "grad_norm": 0.5623446128634543, + "learning_rate": 1.7184086636037622e-06, + "loss": 0.2779, + "step": 26179 + }, + { + "epoch": 1.226401836323605, + "grad_norm": 0.5942636475650195, + "learning_rate": 1.718228522328409e-06, + "loss": 0.2578, + "step": 26180 + }, + { + "epoch": 1.2264486813135336, + "grad_norm": 0.6033318004301892, + "learning_rate": 1.718048385551954e-06, + "loss": 0.2659, + "step": 26181 + }, + { + "epoch": 1.2264955263034618, + "grad_norm": 0.5276937799567383, + "learning_rate": 1.7178682532754342e-06, + "loss": 0.2448, + "step": 26182 + }, + { + "epoch": 1.22654237129339, + "grad_norm": 0.6173093071489364, + "learning_rate": 1.7176881254998868e-06, + "loss": 0.2842, + "step": 26183 + }, + { + "epoch": 1.2265892162833185, + "grad_norm": 0.6329832283591216, + "learning_rate": 1.7175080022263466e-06, + "loss": 0.2822, + "step": 26184 + }, + { + "epoch": 1.2266360612732468, + "grad_norm": 0.5982476166420899, + "learning_rate": 1.717327883455851e-06, + "loss": 0.2755, + "step": 26185 + }, + { + "epoch": 1.2266829062631752, + "grad_norm": 0.5852746712673212, + "learning_rate": 1.7171477691894372e-06, + "loss": 0.264, + "step": 26186 + }, + { + "epoch": 1.2267297512531035, + "grad_norm": 0.5357296139139945, + "learning_rate": 1.716967659428141e-06, + "loss": 0.2685, + "step": 26187 + }, + { + "epoch": 1.226776596243032, + "grad_norm": 0.560728627115505, + "learning_rate": 1.7167875541729988e-06, + "loss": 0.2645, + "step": 26188 + }, + { + "epoch": 1.2268234412329602, + "grad_norm": 0.6258987450086464, + "learning_rate": 1.7166074534250482e-06, + "loss": 0.2728, + "step": 26189 + }, + { + "epoch": 1.2268702862228884, + "grad_norm": 0.578128837709396, + "learning_rate": 1.7164273571853251e-06, + "loss": 0.2664, + "step": 26190 + }, + { + "epoch": 1.2269171312128169, + "grad_norm": 0.5819740534570652, + "learning_rate": 1.7162472654548652e-06, + "loss": 0.2697, + "step": 26191 + }, + { + "epoch": 1.226963976202745, + "grad_norm": 0.6049804592667519, + "learning_rate": 1.7160671782347043e-06, + "loss": 0.2874, + "step": 26192 + }, + { + "epoch": 1.2270108211926734, + "grad_norm": 0.6284015057590724, + "learning_rate": 1.715887095525881e-06, + "loss": 0.294, + "step": 26193 + }, + { + "epoch": 1.2270576661826018, + "grad_norm": 0.6211948012111409, + "learning_rate": 1.7157070173294294e-06, + "loss": 0.278, + "step": 26194 + }, + { + "epoch": 1.22710451117253, + "grad_norm": 0.5913734535572684, + "learning_rate": 1.7155269436463875e-06, + "loss": 0.2744, + "step": 26195 + }, + { + "epoch": 1.2271513561624585, + "grad_norm": 0.6206115152002147, + "learning_rate": 1.715346874477791e-06, + "loss": 0.3031, + "step": 26196 + }, + { + "epoch": 1.2271982011523868, + "grad_norm": 0.5517149121242922, + "learning_rate": 1.715166809824676e-06, + "loss": 0.2647, + "step": 26197 + }, + { + "epoch": 1.227245046142315, + "grad_norm": 0.5726650608808271, + "learning_rate": 1.7149867496880779e-06, + "loss": 0.2703, + "step": 26198 + }, + { + "epoch": 1.2272918911322435, + "grad_norm": 0.5802715087922766, + "learning_rate": 1.7148066940690345e-06, + "loss": 0.2839, + "step": 26199 + }, + { + "epoch": 1.2273387361221717, + "grad_norm": 0.6100982713355068, + "learning_rate": 1.7146266429685808e-06, + "loss": 0.282, + "step": 26200 + }, + { + "epoch": 1.2273855811121002, + "grad_norm": 0.5698896015795866, + "learning_rate": 1.7144465963877531e-06, + "loss": 0.2751, + "step": 26201 + }, + { + "epoch": 1.2274324261020284, + "grad_norm": 0.6038541215637606, + "learning_rate": 1.7142665543275894e-06, + "loss": 0.2627, + "step": 26202 + }, + { + "epoch": 1.2274792710919566, + "grad_norm": 0.6002913702889735, + "learning_rate": 1.714086516789123e-06, + "loss": 0.2732, + "step": 26203 + }, + { + "epoch": 1.227526116081885, + "grad_norm": 0.6037673630269516, + "learning_rate": 1.713906483773391e-06, + "loss": 0.2797, + "step": 26204 + }, + { + "epoch": 1.2275729610718134, + "grad_norm": 0.6133356707288397, + "learning_rate": 1.7137264552814298e-06, + "loss": 0.275, + "step": 26205 + }, + { + "epoch": 1.2276198060617416, + "grad_norm": 0.5861167657977768, + "learning_rate": 1.7135464313142752e-06, + "loss": 0.2748, + "step": 26206 + }, + { + "epoch": 1.22766665105167, + "grad_norm": 0.6148550778323537, + "learning_rate": 1.7133664118729632e-06, + "loss": 0.2895, + "step": 26207 + }, + { + "epoch": 1.2277134960415983, + "grad_norm": 0.5654472054934745, + "learning_rate": 1.7131863969585306e-06, + "loss": 0.2803, + "step": 26208 + }, + { + "epoch": 1.2277603410315268, + "grad_norm": 0.6031573137804462, + "learning_rate": 1.7130063865720115e-06, + "loss": 0.2726, + "step": 26209 + }, + { + "epoch": 1.227807186021455, + "grad_norm": 0.6070382977514817, + "learning_rate": 1.7128263807144432e-06, + "loss": 0.2894, + "step": 26210 + }, + { + "epoch": 1.2278540310113832, + "grad_norm": 0.6748236826629964, + "learning_rate": 1.7126463793868608e-06, + "loss": 0.2819, + "step": 26211 + }, + { + "epoch": 1.2279008760013117, + "grad_norm": 0.5444041607279878, + "learning_rate": 1.712466382590301e-06, + "loss": 0.2523, + "step": 26212 + }, + { + "epoch": 1.22794772099124, + "grad_norm": 0.6102629876671263, + "learning_rate": 1.7122863903257992e-06, + "loss": 0.2608, + "step": 26213 + }, + { + "epoch": 1.2279945659811684, + "grad_norm": 0.6062037280830235, + "learning_rate": 1.712106402594392e-06, + "loss": 0.2648, + "step": 26214 + }, + { + "epoch": 1.2280414109710966, + "grad_norm": 0.5567007996052048, + "learning_rate": 1.7119264193971135e-06, + "loss": 0.2659, + "step": 26215 + }, + { + "epoch": 1.2280882559610249, + "grad_norm": 0.6000937269460009, + "learning_rate": 1.711746440735e-06, + "loss": 0.284, + "step": 26216 + }, + { + "epoch": 1.2281351009509534, + "grad_norm": 0.5801472645938522, + "learning_rate": 1.7115664666090886e-06, + "loss": 0.2529, + "step": 26217 + }, + { + "epoch": 1.2281819459408816, + "grad_norm": 0.5836594856872072, + "learning_rate": 1.711386497020413e-06, + "loss": 0.2798, + "step": 26218 + }, + { + "epoch": 1.2282287909308098, + "grad_norm": 0.5977714851953699, + "learning_rate": 1.7112065319700106e-06, + "loss": 0.2864, + "step": 26219 + }, + { + "epoch": 1.2282756359207383, + "grad_norm": 0.5902689591500705, + "learning_rate": 1.7110265714589169e-06, + "loss": 0.2957, + "step": 26220 + }, + { + "epoch": 1.2283224809106665, + "grad_norm": 0.5699548147429416, + "learning_rate": 1.7108466154881665e-06, + "loss": 0.2736, + "step": 26221 + }, + { + "epoch": 1.228369325900595, + "grad_norm": 0.6073823769349705, + "learning_rate": 1.7106666640587949e-06, + "loss": 0.2614, + "step": 26222 + }, + { + "epoch": 1.2284161708905232, + "grad_norm": 0.5871370235585592, + "learning_rate": 1.7104867171718382e-06, + "loss": 0.2724, + "step": 26223 + }, + { + "epoch": 1.2284630158804517, + "grad_norm": 0.5805235390181216, + "learning_rate": 1.7103067748283328e-06, + "loss": 0.2752, + "step": 26224 + }, + { + "epoch": 1.22850986087038, + "grad_norm": 0.593923310783502, + "learning_rate": 1.7101268370293128e-06, + "loss": 0.2846, + "step": 26225 + }, + { + "epoch": 1.2285567058603082, + "grad_norm": 0.5921380558113682, + "learning_rate": 1.7099469037758154e-06, + "loss": 0.2765, + "step": 26226 + }, + { + "epoch": 1.2286035508502366, + "grad_norm": 0.5697923238952484, + "learning_rate": 1.7097669750688738e-06, + "loss": 0.2538, + "step": 26227 + }, + { + "epoch": 1.2286503958401649, + "grad_norm": 0.5755965047201946, + "learning_rate": 1.7095870509095253e-06, + "loss": 0.2741, + "step": 26228 + }, + { + "epoch": 1.2286972408300931, + "grad_norm": 0.613232078724169, + "learning_rate": 1.7094071312988042e-06, + "loss": 0.2847, + "step": 26229 + }, + { + "epoch": 1.2287440858200216, + "grad_norm": 0.6123906409650559, + "learning_rate": 1.709227216237746e-06, + "loss": 0.2653, + "step": 26230 + }, + { + "epoch": 1.2287909308099498, + "grad_norm": 0.5999310119894861, + "learning_rate": 1.7090473057273875e-06, + "loss": 0.2636, + "step": 26231 + }, + { + "epoch": 1.2288377757998783, + "grad_norm": 0.5931065428586266, + "learning_rate": 1.708867399768763e-06, + "loss": 0.2801, + "step": 26232 + }, + { + "epoch": 1.2288846207898065, + "grad_norm": 0.6530782319188777, + "learning_rate": 1.708687498362907e-06, + "loss": 0.2924, + "step": 26233 + }, + { + "epoch": 1.2289314657797348, + "grad_norm": 0.6042045047097792, + "learning_rate": 1.7085076015108553e-06, + "loss": 0.2705, + "step": 26234 + }, + { + "epoch": 1.2289783107696632, + "grad_norm": 0.5930471367732443, + "learning_rate": 1.7083277092136442e-06, + "loss": 0.2797, + "step": 26235 + }, + { + "epoch": 1.2290251557595915, + "grad_norm": 0.6190799417296755, + "learning_rate": 1.7081478214723074e-06, + "loss": 0.2506, + "step": 26236 + }, + { + "epoch": 1.22907200074952, + "grad_norm": 0.5732682728581014, + "learning_rate": 1.707967938287881e-06, + "loss": 0.2699, + "step": 26237 + }, + { + "epoch": 1.2291188457394482, + "grad_norm": 0.5765405166646584, + "learning_rate": 1.707788059661401e-06, + "loss": 0.2558, + "step": 26238 + }, + { + "epoch": 1.2291656907293764, + "grad_norm": 0.5701800836355051, + "learning_rate": 1.7076081855939015e-06, + "loss": 0.2641, + "step": 26239 + }, + { + "epoch": 1.2292125357193049, + "grad_norm": 0.5789319170109003, + "learning_rate": 1.7074283160864166e-06, + "loss": 0.2668, + "step": 26240 + }, + { + "epoch": 1.2292593807092331, + "grad_norm": 0.6042319474088061, + "learning_rate": 1.7072484511399828e-06, + "loss": 0.2737, + "step": 26241 + }, + { + "epoch": 1.2293062256991614, + "grad_norm": 0.6090759107041342, + "learning_rate": 1.7070685907556357e-06, + "loss": 0.2747, + "step": 26242 + }, + { + "epoch": 1.2293530706890898, + "grad_norm": 0.6089794966451023, + "learning_rate": 1.7068887349344088e-06, + "loss": 0.2734, + "step": 26243 + }, + { + "epoch": 1.229399915679018, + "grad_norm": 0.5970997227235268, + "learning_rate": 1.7067088836773388e-06, + "loss": 0.273, + "step": 26244 + }, + { + "epoch": 1.2294467606689465, + "grad_norm": 0.569151024082196, + "learning_rate": 1.7065290369854586e-06, + "loss": 0.2688, + "step": 26245 + }, + { + "epoch": 1.2294936056588748, + "grad_norm": 0.603372093855421, + "learning_rate": 1.7063491948598054e-06, + "loss": 0.2584, + "step": 26246 + }, + { + "epoch": 1.229540450648803, + "grad_norm": 0.5750989238111046, + "learning_rate": 1.706169357301412e-06, + "loss": 0.2608, + "step": 26247 + }, + { + "epoch": 1.2295872956387315, + "grad_norm": 0.6394664086311198, + "learning_rate": 1.7059895243113157e-06, + "loss": 0.2984, + "step": 26248 + }, + { + "epoch": 1.2296341406286597, + "grad_norm": 0.5959252567837734, + "learning_rate": 1.705809695890549e-06, + "loss": 0.2858, + "step": 26249 + }, + { + "epoch": 1.2296809856185882, + "grad_norm": 0.6247225449455128, + "learning_rate": 1.7056298720401493e-06, + "loss": 0.261, + "step": 26250 + }, + { + "epoch": 1.2297278306085164, + "grad_norm": 0.6137173296436428, + "learning_rate": 1.7054500527611486e-06, + "loss": 0.2724, + "step": 26251 + }, + { + "epoch": 1.2297746755984447, + "grad_norm": 0.6394451094022645, + "learning_rate": 1.705270238054584e-06, + "loss": 0.3031, + "step": 26252 + }, + { + "epoch": 1.2298215205883731, + "grad_norm": 0.5732615653306314, + "learning_rate": 1.7050904279214885e-06, + "loss": 0.2609, + "step": 26253 + }, + { + "epoch": 1.2298683655783014, + "grad_norm": 0.5843958320568575, + "learning_rate": 1.704910622362898e-06, + "loss": 0.2695, + "step": 26254 + }, + { + "epoch": 1.2299152105682296, + "grad_norm": 0.6378192607932984, + "learning_rate": 1.704730821379848e-06, + "loss": 0.2824, + "step": 26255 + }, + { + "epoch": 1.229962055558158, + "grad_norm": 0.6234756897431504, + "learning_rate": 1.704551024973371e-06, + "loss": 0.2764, + "step": 26256 + }, + { + "epoch": 1.2300089005480863, + "grad_norm": 0.5863693331008871, + "learning_rate": 1.7043712331445045e-06, + "loss": 0.28, + "step": 26257 + }, + { + "epoch": 1.2300557455380148, + "grad_norm": 0.5818151720156172, + "learning_rate": 1.7041914458942804e-06, + "loss": 0.2854, + "step": 26258 + }, + { + "epoch": 1.230102590527943, + "grad_norm": 0.6048948213280173, + "learning_rate": 1.704011663223735e-06, + "loss": 0.2782, + "step": 26259 + }, + { + "epoch": 1.2301494355178715, + "grad_norm": 0.6100201416470767, + "learning_rate": 1.703831885133902e-06, + "loss": 0.2755, + "step": 26260 + }, + { + "epoch": 1.2301962805077997, + "grad_norm": 0.5992825028340134, + "learning_rate": 1.7036521116258158e-06, + "loss": 0.2653, + "step": 26261 + }, + { + "epoch": 1.230243125497728, + "grad_norm": 0.6078266588251502, + "learning_rate": 1.7034723427005129e-06, + "loss": 0.2791, + "step": 26262 + }, + { + "epoch": 1.2302899704876564, + "grad_norm": 0.5778733302162792, + "learning_rate": 1.7032925783590266e-06, + "loss": 0.2707, + "step": 26263 + }, + { + "epoch": 1.2303368154775847, + "grad_norm": 0.6320782818923385, + "learning_rate": 1.70311281860239e-06, + "loss": 0.3052, + "step": 26264 + }, + { + "epoch": 1.230383660467513, + "grad_norm": 0.6041110618797921, + "learning_rate": 1.7029330634316393e-06, + "loss": 0.2972, + "step": 26265 + }, + { + "epoch": 1.2304305054574414, + "grad_norm": 0.5842777021381698, + "learning_rate": 1.7027533128478085e-06, + "loss": 0.2684, + "step": 26266 + }, + { + "epoch": 1.2304773504473696, + "grad_norm": 0.5925973816487292, + "learning_rate": 1.702573566851932e-06, + "loss": 0.2751, + "step": 26267 + }, + { + "epoch": 1.230524195437298, + "grad_norm": 0.5925862588767601, + "learning_rate": 1.702393825445044e-06, + "loss": 0.2693, + "step": 26268 + }, + { + "epoch": 1.2305710404272263, + "grad_norm": 0.6139000484936771, + "learning_rate": 1.7022140886281798e-06, + "loss": 0.2801, + "step": 26269 + }, + { + "epoch": 1.2306178854171546, + "grad_norm": 0.6558413158721464, + "learning_rate": 1.7020343564023727e-06, + "loss": 0.3069, + "step": 26270 + }, + { + "epoch": 1.230664730407083, + "grad_norm": 0.6178666667474092, + "learning_rate": 1.7018546287686571e-06, + "loss": 0.2831, + "step": 26271 + }, + { + "epoch": 1.2307115753970113, + "grad_norm": 0.5579432138860879, + "learning_rate": 1.701674905728067e-06, + "loss": 0.2707, + "step": 26272 + }, + { + "epoch": 1.2307584203869397, + "grad_norm": 0.659584907851513, + "learning_rate": 1.7014951872816382e-06, + "loss": 0.2876, + "step": 26273 + }, + { + "epoch": 1.230805265376868, + "grad_norm": 0.5342288791021177, + "learning_rate": 1.7013154734304029e-06, + "loss": 0.2682, + "step": 26274 + }, + { + "epoch": 1.2308521103667962, + "grad_norm": 0.5919812839379051, + "learning_rate": 1.701135764175398e-06, + "loss": 0.2739, + "step": 26275 + }, + { + "epoch": 1.2308989553567247, + "grad_norm": 0.5844848745955261, + "learning_rate": 1.700956059517654e-06, + "loss": 0.2576, + "step": 26276 + }, + { + "epoch": 1.230945800346653, + "grad_norm": 0.5863317308529249, + "learning_rate": 1.7007763594582084e-06, + "loss": 0.2601, + "step": 26277 + }, + { + "epoch": 1.2309926453365811, + "grad_norm": 0.5799891888227486, + "learning_rate": 1.7005966639980932e-06, + "loss": 0.2715, + "step": 26278 + }, + { + "epoch": 1.2310394903265096, + "grad_norm": 0.5616639047565479, + "learning_rate": 1.700416973138343e-06, + "loss": 0.2791, + "step": 26279 + }, + { + "epoch": 1.2310863353164379, + "grad_norm": 0.5782212275725975, + "learning_rate": 1.7002372868799927e-06, + "loss": 0.2643, + "step": 26280 + }, + { + "epoch": 1.2311331803063663, + "grad_norm": 0.6408712261140826, + "learning_rate": 1.7000576052240767e-06, + "loss": 0.2863, + "step": 26281 + }, + { + "epoch": 1.2311800252962946, + "grad_norm": 0.5768858047932888, + "learning_rate": 1.6998779281716269e-06, + "loss": 0.2799, + "step": 26282 + }, + { + "epoch": 1.2312268702862228, + "grad_norm": 0.6109597635656903, + "learning_rate": 1.6996982557236786e-06, + "loss": 0.2571, + "step": 26283 + }, + { + "epoch": 1.2312737152761513, + "grad_norm": 0.5804157994195362, + "learning_rate": 1.699518587881266e-06, + "loss": 0.2744, + "step": 26284 + }, + { + "epoch": 1.2313205602660795, + "grad_norm": 0.6061266431397515, + "learning_rate": 1.6993389246454223e-06, + "loss": 0.2746, + "step": 26285 + }, + { + "epoch": 1.231367405256008, + "grad_norm": 0.6095097136914617, + "learning_rate": 1.6991592660171819e-06, + "loss": 0.2736, + "step": 26286 + }, + { + "epoch": 1.2314142502459362, + "grad_norm": 0.5627190288617001, + "learning_rate": 1.6989796119975795e-06, + "loss": 0.2678, + "step": 26287 + }, + { + "epoch": 1.2314610952358644, + "grad_norm": 0.6003387035809447, + "learning_rate": 1.6987999625876478e-06, + "loss": 0.2765, + "step": 26288 + }, + { + "epoch": 1.231507940225793, + "grad_norm": 0.556962790916313, + "learning_rate": 1.6986203177884202e-06, + "loss": 0.2625, + "step": 26289 + }, + { + "epoch": 1.2315547852157211, + "grad_norm": 0.5556387807000348, + "learning_rate": 1.6984406776009315e-06, + "loss": 0.2802, + "step": 26290 + }, + { + "epoch": 1.2316016302056494, + "grad_norm": 0.6134996082076537, + "learning_rate": 1.6982610420262157e-06, + "loss": 0.2819, + "step": 26291 + }, + { + "epoch": 1.2316484751955779, + "grad_norm": 0.6609938704570895, + "learning_rate": 1.6980814110653057e-06, + "loss": 0.2912, + "step": 26292 + }, + { + "epoch": 1.231695320185506, + "grad_norm": 0.597241575648566, + "learning_rate": 1.6979017847192364e-06, + "loss": 0.2874, + "step": 26293 + }, + { + "epoch": 1.2317421651754346, + "grad_norm": 0.635385826604625, + "learning_rate": 1.69772216298904e-06, + "loss": 0.2841, + "step": 26294 + }, + { + "epoch": 1.2317890101653628, + "grad_norm": 0.5612461214724538, + "learning_rate": 1.697542545875751e-06, + "loss": 0.264, + "step": 26295 + }, + { + "epoch": 1.2318358551552913, + "grad_norm": 0.6350569994967726, + "learning_rate": 1.6973629333804027e-06, + "loss": 0.2832, + "step": 26296 + }, + { + "epoch": 1.2318827001452195, + "grad_norm": 0.6180108214889437, + "learning_rate": 1.6971833255040295e-06, + "loss": 0.2766, + "step": 26297 + }, + { + "epoch": 1.2319295451351477, + "grad_norm": 0.6187401473108814, + "learning_rate": 1.697003722247664e-06, + "loss": 0.2847, + "step": 26298 + }, + { + "epoch": 1.2319763901250762, + "grad_norm": 0.6293438034821457, + "learning_rate": 1.6968241236123412e-06, + "loss": 0.2759, + "step": 26299 + }, + { + "epoch": 1.2320232351150044, + "grad_norm": 0.64075725353136, + "learning_rate": 1.6966445295990927e-06, + "loss": 0.2856, + "step": 26300 + }, + { + "epoch": 1.2320700801049327, + "grad_norm": 0.5821369729194041, + "learning_rate": 1.6964649402089534e-06, + "loss": 0.2888, + "step": 26301 + }, + { + "epoch": 1.2321169250948611, + "grad_norm": 0.6044950653251148, + "learning_rate": 1.696285355442956e-06, + "loss": 0.2939, + "step": 26302 + }, + { + "epoch": 1.2321637700847894, + "grad_norm": 0.6322900264532835, + "learning_rate": 1.6961057753021344e-06, + "loss": 0.2904, + "step": 26303 + }, + { + "epoch": 1.2322106150747179, + "grad_norm": 0.6207988979421201, + "learning_rate": 1.6959261997875225e-06, + "loss": 0.2785, + "step": 26304 + }, + { + "epoch": 1.232257460064646, + "grad_norm": 0.6355668252354386, + "learning_rate": 1.695746628900154e-06, + "loss": 0.2787, + "step": 26305 + }, + { + "epoch": 1.2323043050545743, + "grad_norm": 0.5717908973429392, + "learning_rate": 1.69556706264106e-06, + "loss": 0.2751, + "step": 26306 + }, + { + "epoch": 1.2323511500445028, + "grad_norm": 0.6246521126172684, + "learning_rate": 1.6953875010112759e-06, + "loss": 0.2845, + "step": 26307 + }, + { + "epoch": 1.232397995034431, + "grad_norm": 0.5936379534324914, + "learning_rate": 1.6952079440118346e-06, + "loss": 0.2798, + "step": 26308 + }, + { + "epoch": 1.2324448400243595, + "grad_norm": 0.5899353639201667, + "learning_rate": 1.6950283916437688e-06, + "loss": 0.2856, + "step": 26309 + }, + { + "epoch": 1.2324916850142877, + "grad_norm": 0.5535943209360267, + "learning_rate": 1.6948488439081124e-06, + "loss": 0.2707, + "step": 26310 + }, + { + "epoch": 1.232538530004216, + "grad_norm": 0.6281790707850984, + "learning_rate": 1.6946693008058996e-06, + "loss": 0.2893, + "step": 26311 + }, + { + "epoch": 1.2325853749941444, + "grad_norm": 0.5487805346297393, + "learning_rate": 1.6944897623381618e-06, + "loss": 0.2651, + "step": 26312 + }, + { + "epoch": 1.2326322199840727, + "grad_norm": 0.5507195791798336, + "learning_rate": 1.6943102285059326e-06, + "loss": 0.2632, + "step": 26313 + }, + { + "epoch": 1.232679064974001, + "grad_norm": 0.6250264683747798, + "learning_rate": 1.6941306993102458e-06, + "loss": 0.2761, + "step": 26314 + }, + { + "epoch": 1.2327259099639294, + "grad_norm": 0.558332893264138, + "learning_rate": 1.6939511747521345e-06, + "loss": 0.2719, + "step": 26315 + }, + { + "epoch": 1.2327727549538576, + "grad_norm": 0.5749091407076762, + "learning_rate": 1.6937716548326311e-06, + "loss": 0.2567, + "step": 26316 + }, + { + "epoch": 1.232819599943786, + "grad_norm": 0.617170033708973, + "learning_rate": 1.6935921395527705e-06, + "loss": 0.2628, + "step": 26317 + }, + { + "epoch": 1.2328664449337143, + "grad_norm": 0.5595526109912307, + "learning_rate": 1.693412628913583e-06, + "loss": 0.2686, + "step": 26318 + }, + { + "epoch": 1.2329132899236426, + "grad_norm": 0.5999140760250365, + "learning_rate": 1.6932331229161036e-06, + "loss": 0.2883, + "step": 26319 + }, + { + "epoch": 1.232960134913571, + "grad_norm": 0.6364094038158918, + "learning_rate": 1.6930536215613647e-06, + "loss": 0.2929, + "step": 26320 + }, + { + "epoch": 1.2330069799034993, + "grad_norm": 0.5739915548506144, + "learning_rate": 1.6928741248503991e-06, + "loss": 0.2744, + "step": 26321 + }, + { + "epoch": 1.2330538248934277, + "grad_norm": 0.5822044354927342, + "learning_rate": 1.6926946327842408e-06, + "loss": 0.2732, + "step": 26322 + }, + { + "epoch": 1.233100669883356, + "grad_norm": 0.5840616695512784, + "learning_rate": 1.692515145363921e-06, + "loss": 0.2708, + "step": 26323 + }, + { + "epoch": 1.2331475148732842, + "grad_norm": 0.6060277367019108, + "learning_rate": 1.6923356625904752e-06, + "loss": 0.2629, + "step": 26324 + }, + { + "epoch": 1.2331943598632127, + "grad_norm": 0.5857485032506247, + "learning_rate": 1.6921561844649332e-06, + "loss": 0.2771, + "step": 26325 + }, + { + "epoch": 1.233241204853141, + "grad_norm": 0.5948035248392232, + "learning_rate": 1.69197671098833e-06, + "loss": 0.2728, + "step": 26326 + }, + { + "epoch": 1.2332880498430692, + "grad_norm": 0.6011578566313898, + "learning_rate": 1.6917972421616973e-06, + "loss": 0.2619, + "step": 26327 + }, + { + "epoch": 1.2333348948329976, + "grad_norm": 0.640061247770228, + "learning_rate": 1.6916177779860682e-06, + "loss": 0.2855, + "step": 26328 + }, + { + "epoch": 1.2333817398229259, + "grad_norm": 0.5972522071108212, + "learning_rate": 1.691438318462476e-06, + "loss": 0.2694, + "step": 26329 + }, + { + "epoch": 1.2334285848128543, + "grad_norm": 0.5673202017188016, + "learning_rate": 1.6912588635919538e-06, + "loss": 0.2739, + "step": 26330 + }, + { + "epoch": 1.2334754298027826, + "grad_norm": 0.5932185765841342, + "learning_rate": 1.6910794133755327e-06, + "loss": 0.2563, + "step": 26331 + }, + { + "epoch": 1.233522274792711, + "grad_norm": 0.5875157473787851, + "learning_rate": 1.6908999678142458e-06, + "loss": 0.2654, + "step": 26332 + }, + { + "epoch": 1.2335691197826393, + "grad_norm": 0.5816566461592272, + "learning_rate": 1.6907205269091271e-06, + "loss": 0.2629, + "step": 26333 + }, + { + "epoch": 1.2336159647725675, + "grad_norm": 0.6044086782297642, + "learning_rate": 1.6905410906612076e-06, + "loss": 0.2673, + "step": 26334 + }, + { + "epoch": 1.233662809762496, + "grad_norm": 0.6132837442346594, + "learning_rate": 1.6903616590715216e-06, + "loss": 0.2765, + "step": 26335 + }, + { + "epoch": 1.2337096547524242, + "grad_norm": 0.5635117054211207, + "learning_rate": 1.6901822321411005e-06, + "loss": 0.26, + "step": 26336 + }, + { + "epoch": 1.2337564997423525, + "grad_norm": 0.6080602484642015, + "learning_rate": 1.690002809870977e-06, + "loss": 0.2671, + "step": 26337 + }, + { + "epoch": 1.233803344732281, + "grad_norm": 0.5686184394586471, + "learning_rate": 1.6898233922621832e-06, + "loss": 0.2707, + "step": 26338 + }, + { + "epoch": 1.2338501897222092, + "grad_norm": 0.6246241675962504, + "learning_rate": 1.6896439793157526e-06, + "loss": 0.2635, + "step": 26339 + }, + { + "epoch": 1.2338970347121376, + "grad_norm": 0.6143757663172986, + "learning_rate": 1.689464571032717e-06, + "loss": 0.279, + "step": 26340 + }, + { + "epoch": 1.2339438797020659, + "grad_norm": 0.5859083252050566, + "learning_rate": 1.6892851674141093e-06, + "loss": 0.2756, + "step": 26341 + }, + { + "epoch": 1.2339907246919941, + "grad_norm": 0.6378615880854535, + "learning_rate": 1.6891057684609621e-06, + "loss": 0.2917, + "step": 26342 + }, + { + "epoch": 1.2340375696819226, + "grad_norm": 0.6006205182593601, + "learning_rate": 1.6889263741743073e-06, + "loss": 0.2812, + "step": 26343 + }, + { + "epoch": 1.2340844146718508, + "grad_norm": 0.597490448550303, + "learning_rate": 1.6887469845551765e-06, + "loss": 0.2731, + "step": 26344 + }, + { + "epoch": 1.2341312596617793, + "grad_norm": 0.5640459267680874, + "learning_rate": 1.6885675996046031e-06, + "loss": 0.2711, + "step": 26345 + }, + { + "epoch": 1.2341781046517075, + "grad_norm": 0.6106301696399651, + "learning_rate": 1.6883882193236199e-06, + "loss": 0.2732, + "step": 26346 + }, + { + "epoch": 1.2342249496416358, + "grad_norm": 0.5978908305240517, + "learning_rate": 1.6882088437132576e-06, + "loss": 0.2826, + "step": 26347 + }, + { + "epoch": 1.2342717946315642, + "grad_norm": 0.5608195045434522, + "learning_rate": 1.6880294727745508e-06, + "loss": 0.2575, + "step": 26348 + }, + { + "epoch": 1.2343186396214925, + "grad_norm": 0.5760937212596452, + "learning_rate": 1.687850106508529e-06, + "loss": 0.2713, + "step": 26349 + }, + { + "epoch": 1.2343654846114207, + "grad_norm": 0.5514724257268077, + "learning_rate": 1.6876707449162266e-06, + "loss": 0.2664, + "step": 26350 + }, + { + "epoch": 1.2344123296013492, + "grad_norm": 0.6036782043939185, + "learning_rate": 1.6874913879986738e-06, + "loss": 0.2842, + "step": 26351 + }, + { + "epoch": 1.2344591745912774, + "grad_norm": 0.553945997220396, + "learning_rate": 1.6873120357569045e-06, + "loss": 0.2548, + "step": 26352 + }, + { + "epoch": 1.2345060195812059, + "grad_norm": 0.566316830144704, + "learning_rate": 1.6871326881919503e-06, + "loss": 0.2699, + "step": 26353 + }, + { + "epoch": 1.2345528645711341, + "grad_norm": 0.5972501021805445, + "learning_rate": 1.6869533453048437e-06, + "loss": 0.2766, + "step": 26354 + }, + { + "epoch": 1.2345997095610624, + "grad_norm": 0.6113521903098165, + "learning_rate": 1.686774007096615e-06, + "loss": 0.2832, + "step": 26355 + }, + { + "epoch": 1.2346465545509908, + "grad_norm": 0.561617075850221, + "learning_rate": 1.6865946735682978e-06, + "loss": 0.2661, + "step": 26356 + }, + { + "epoch": 1.234693399540919, + "grad_norm": 0.5686012156547315, + "learning_rate": 1.6864153447209242e-06, + "loss": 0.2769, + "step": 26357 + }, + { + "epoch": 1.2347402445308475, + "grad_norm": 0.579848294245655, + "learning_rate": 1.6862360205555256e-06, + "loss": 0.2741, + "step": 26358 + }, + { + "epoch": 1.2347870895207758, + "grad_norm": 0.5676923393925454, + "learning_rate": 1.6860567010731339e-06, + "loss": 0.2594, + "step": 26359 + }, + { + "epoch": 1.234833934510704, + "grad_norm": 0.5843166144679371, + "learning_rate": 1.6858773862747823e-06, + "loss": 0.2804, + "step": 26360 + }, + { + "epoch": 1.2348807795006325, + "grad_norm": 0.5769428306741208, + "learning_rate": 1.685698076161501e-06, + "loss": 0.2747, + "step": 26361 + }, + { + "epoch": 1.2349276244905607, + "grad_norm": 0.5658183167405039, + "learning_rate": 1.6855187707343226e-06, + "loss": 0.2693, + "step": 26362 + }, + { + "epoch": 1.234974469480489, + "grad_norm": 0.6068933412859683, + "learning_rate": 1.6853394699942787e-06, + "loss": 0.2672, + "step": 26363 + }, + { + "epoch": 1.2350213144704174, + "grad_norm": 0.5722237694384129, + "learning_rate": 1.685160173942402e-06, + "loss": 0.273, + "step": 26364 + }, + { + "epoch": 1.2350681594603456, + "grad_norm": 0.5991389376757273, + "learning_rate": 1.6849808825797232e-06, + "loss": 0.2881, + "step": 26365 + }, + { + "epoch": 1.2351150044502741, + "grad_norm": 0.5839308471985991, + "learning_rate": 1.6848015959072755e-06, + "loss": 0.2617, + "step": 26366 + }, + { + "epoch": 1.2351618494402024, + "grad_norm": 0.6323296608260205, + "learning_rate": 1.6846223139260887e-06, + "loss": 0.2829, + "step": 26367 + }, + { + "epoch": 1.2352086944301308, + "grad_norm": 0.5640905097853365, + "learning_rate": 1.6844430366371962e-06, + "loss": 0.2696, + "step": 26368 + }, + { + "epoch": 1.235255539420059, + "grad_norm": 0.5421259978764811, + "learning_rate": 1.6842637640416282e-06, + "loss": 0.2662, + "step": 26369 + }, + { + "epoch": 1.2353023844099873, + "grad_norm": 0.5865507080514356, + "learning_rate": 1.6840844961404173e-06, + "loss": 0.2912, + "step": 26370 + }, + { + "epoch": 1.2353492293999158, + "grad_norm": 0.6229146604451284, + "learning_rate": 1.6839052329345956e-06, + "loss": 0.2714, + "step": 26371 + }, + { + "epoch": 1.235396074389844, + "grad_norm": 0.6036101758218296, + "learning_rate": 1.683725974425195e-06, + "loss": 0.2836, + "step": 26372 + }, + { + "epoch": 1.2354429193797722, + "grad_norm": 0.6287618419701433, + "learning_rate": 1.683546720613245e-06, + "loss": 0.2977, + "step": 26373 + }, + { + "epoch": 1.2354897643697007, + "grad_norm": 0.6163725329059032, + "learning_rate": 1.6833674714997783e-06, + "loss": 0.2895, + "step": 26374 + }, + { + "epoch": 1.235536609359629, + "grad_norm": 0.6111374336923268, + "learning_rate": 1.683188227085827e-06, + "loss": 0.274, + "step": 26375 + }, + { + "epoch": 1.2355834543495574, + "grad_norm": 0.6189547067283826, + "learning_rate": 1.683008987372422e-06, + "loss": 0.2947, + "step": 26376 + }, + { + "epoch": 1.2356302993394856, + "grad_norm": 0.6067638619481039, + "learning_rate": 1.6828297523605946e-06, + "loss": 0.2858, + "step": 26377 + }, + { + "epoch": 1.235677144329414, + "grad_norm": 0.5610020561796781, + "learning_rate": 1.6826505220513777e-06, + "loss": 0.2743, + "step": 26378 + }, + { + "epoch": 1.2357239893193424, + "grad_norm": 0.5509384836700514, + "learning_rate": 1.6824712964458011e-06, + "loss": 0.2615, + "step": 26379 + }, + { + "epoch": 1.2357708343092706, + "grad_norm": 0.6455562611583263, + "learning_rate": 1.6822920755448962e-06, + "loss": 0.2906, + "step": 26380 + }, + { + "epoch": 1.235817679299199, + "grad_norm": 0.5739757398933995, + "learning_rate": 1.682112859349695e-06, + "loss": 0.2714, + "step": 26381 + }, + { + "epoch": 1.2358645242891273, + "grad_norm": 0.6732627954534103, + "learning_rate": 1.6819336478612291e-06, + "loss": 0.2953, + "step": 26382 + }, + { + "epoch": 1.2359113692790555, + "grad_norm": 0.6145088326361764, + "learning_rate": 1.6817544410805293e-06, + "loss": 0.2823, + "step": 26383 + }, + { + "epoch": 1.235958214268984, + "grad_norm": 0.5697417967781788, + "learning_rate": 1.6815752390086277e-06, + "loss": 0.2714, + "step": 26384 + }, + { + "epoch": 1.2360050592589122, + "grad_norm": 0.6360530661601796, + "learning_rate": 1.6813960416465538e-06, + "loss": 0.2969, + "step": 26385 + }, + { + "epoch": 1.2360519042488405, + "grad_norm": 0.5802333529831087, + "learning_rate": 1.6812168489953406e-06, + "loss": 0.269, + "step": 26386 + }, + { + "epoch": 1.236098749238769, + "grad_norm": 0.6019424500383884, + "learning_rate": 1.6810376610560181e-06, + "loss": 0.2719, + "step": 26387 + }, + { + "epoch": 1.2361455942286972, + "grad_norm": 0.5457708636061315, + "learning_rate": 1.6808584778296185e-06, + "loss": 0.2666, + "step": 26388 + }, + { + "epoch": 1.2361924392186256, + "grad_norm": 0.566078623858671, + "learning_rate": 1.6806792993171722e-06, + "loss": 0.2711, + "step": 26389 + }, + { + "epoch": 1.236239284208554, + "grad_norm": 0.5559116264145335, + "learning_rate": 1.6805001255197102e-06, + "loss": 0.2672, + "step": 26390 + }, + { + "epoch": 1.2362861291984821, + "grad_norm": 0.5967296187434855, + "learning_rate": 1.6803209564382653e-06, + "loss": 0.2664, + "step": 26391 + }, + { + "epoch": 1.2363329741884106, + "grad_norm": 0.5602402233238348, + "learning_rate": 1.6801417920738666e-06, + "loss": 0.2723, + "step": 26392 + }, + { + "epoch": 1.2363798191783388, + "grad_norm": 0.5612380346020465, + "learning_rate": 1.6799626324275454e-06, + "loss": 0.2535, + "step": 26393 + }, + { + "epoch": 1.2364266641682673, + "grad_norm": 0.5835357058139202, + "learning_rate": 1.6797834775003333e-06, + "loss": 0.277, + "step": 26394 + }, + { + "epoch": 1.2364735091581955, + "grad_norm": 0.611279908949327, + "learning_rate": 1.6796043272932613e-06, + "loss": 0.2832, + "step": 26395 + }, + { + "epoch": 1.2365203541481238, + "grad_norm": 0.6045892357664905, + "learning_rate": 1.6794251818073598e-06, + "loss": 0.2729, + "step": 26396 + }, + { + "epoch": 1.2365671991380522, + "grad_norm": 0.5751470530623235, + "learning_rate": 1.6792460410436614e-06, + "loss": 0.2668, + "step": 26397 + }, + { + "epoch": 1.2366140441279805, + "grad_norm": 0.596744757482888, + "learning_rate": 1.6790669050031946e-06, + "loss": 0.2524, + "step": 26398 + }, + { + "epoch": 1.2366608891179087, + "grad_norm": 0.5570498650085012, + "learning_rate": 1.678887773686992e-06, + "loss": 0.2655, + "step": 26399 + }, + { + "epoch": 1.2367077341078372, + "grad_norm": 0.5826453725143805, + "learning_rate": 1.678708647096083e-06, + "loss": 0.2627, + "step": 26400 + }, + { + "epoch": 1.2367545790977654, + "grad_norm": 0.591673902721073, + "learning_rate": 1.6785295252314998e-06, + "loss": 0.279, + "step": 26401 + }, + { + "epoch": 1.236801424087694, + "grad_norm": 0.548205562136069, + "learning_rate": 1.678350408094273e-06, + "loss": 0.2628, + "step": 26402 + }, + { + "epoch": 1.2368482690776221, + "grad_norm": 0.5642362083036583, + "learning_rate": 1.6781712956854335e-06, + "loss": 0.2666, + "step": 26403 + }, + { + "epoch": 1.2368951140675506, + "grad_norm": 0.5664444572431258, + "learning_rate": 1.6779921880060107e-06, + "loss": 0.2632, + "step": 26404 + }, + { + "epoch": 1.2369419590574788, + "grad_norm": 0.582474759264662, + "learning_rate": 1.6778130850570362e-06, + "loss": 0.2502, + "step": 26405 + }, + { + "epoch": 1.236988804047407, + "grad_norm": 0.6358035564610857, + "learning_rate": 1.6776339868395414e-06, + "loss": 0.2837, + "step": 26406 + }, + { + "epoch": 1.2370356490373355, + "grad_norm": 0.6227227407999345, + "learning_rate": 1.677454893354556e-06, + "loss": 0.2734, + "step": 26407 + }, + { + "epoch": 1.2370824940272638, + "grad_norm": 0.5628236591811102, + "learning_rate": 1.6772758046031105e-06, + "loss": 0.2567, + "step": 26408 + }, + { + "epoch": 1.237129339017192, + "grad_norm": 0.6113007208497374, + "learning_rate": 1.677096720586237e-06, + "loss": 0.2834, + "step": 26409 + }, + { + "epoch": 1.2371761840071205, + "grad_norm": 0.6510128765166956, + "learning_rate": 1.6769176413049648e-06, + "loss": 0.2851, + "step": 26410 + }, + { + "epoch": 1.2372230289970487, + "grad_norm": 0.626832885227421, + "learning_rate": 1.6767385667603242e-06, + "loss": 0.2765, + "step": 26411 + }, + { + "epoch": 1.2372698739869772, + "grad_norm": 0.5500051573584265, + "learning_rate": 1.6765594969533462e-06, + "loss": 0.2593, + "step": 26412 + }, + { + "epoch": 1.2373167189769054, + "grad_norm": 0.604809339721457, + "learning_rate": 1.6763804318850616e-06, + "loss": 0.2659, + "step": 26413 + }, + { + "epoch": 1.2373635639668337, + "grad_norm": 0.5571345264128418, + "learning_rate": 1.6762013715565002e-06, + "loss": 0.2677, + "step": 26414 + }, + { + "epoch": 1.2374104089567621, + "grad_norm": 0.587360103001398, + "learning_rate": 1.6760223159686939e-06, + "loss": 0.2607, + "step": 26415 + }, + { + "epoch": 1.2374572539466904, + "grad_norm": 0.5918660488356655, + "learning_rate": 1.6758432651226712e-06, + "loss": 0.2856, + "step": 26416 + }, + { + "epoch": 1.2375040989366188, + "grad_norm": 0.5975363661375673, + "learning_rate": 1.6756642190194638e-06, + "loss": 0.2757, + "step": 26417 + }, + { + "epoch": 1.237550943926547, + "grad_norm": 0.6127034057858923, + "learning_rate": 1.675485177660101e-06, + "loss": 0.2791, + "step": 26418 + }, + { + "epoch": 1.2375977889164753, + "grad_norm": 0.5692875597362206, + "learning_rate": 1.675306141045614e-06, + "loss": 0.2738, + "step": 26419 + }, + { + "epoch": 1.2376446339064038, + "grad_norm": 0.5669981249700395, + "learning_rate": 1.6751271091770329e-06, + "loss": 0.2738, + "step": 26420 + }, + { + "epoch": 1.237691478896332, + "grad_norm": 0.6350279458421119, + "learning_rate": 1.674948082055389e-06, + "loss": 0.2908, + "step": 26421 + }, + { + "epoch": 1.2377383238862603, + "grad_norm": 0.5418537321613535, + "learning_rate": 1.67476905968171e-06, + "loss": 0.264, + "step": 26422 + }, + { + "epoch": 1.2377851688761887, + "grad_norm": 0.5878599688686476, + "learning_rate": 1.6745900420570283e-06, + "loss": 0.2869, + "step": 26423 + }, + { + "epoch": 1.237832013866117, + "grad_norm": 0.6247861678488318, + "learning_rate": 1.6744110291823735e-06, + "loss": 0.2829, + "step": 26424 + }, + { + "epoch": 1.2378788588560454, + "grad_norm": 0.5768062151709697, + "learning_rate": 1.6742320210587754e-06, + "loss": 0.2814, + "step": 26425 + }, + { + "epoch": 1.2379257038459737, + "grad_norm": 0.6492396558903968, + "learning_rate": 1.6740530176872649e-06, + "loss": 0.2678, + "step": 26426 + }, + { + "epoch": 1.237972548835902, + "grad_norm": 0.5738470470548863, + "learning_rate": 1.6738740190688715e-06, + "loss": 0.2682, + "step": 26427 + }, + { + "epoch": 1.2380193938258304, + "grad_norm": 0.6046186812134605, + "learning_rate": 1.6736950252046258e-06, + "loss": 0.2759, + "step": 26428 + }, + { + "epoch": 1.2380662388157586, + "grad_norm": 0.5988827859604149, + "learning_rate": 1.6735160360955568e-06, + "loss": 0.2868, + "step": 26429 + }, + { + "epoch": 1.238113083805687, + "grad_norm": 0.6206747267487259, + "learning_rate": 1.6733370517426956e-06, + "loss": 0.2853, + "step": 26430 + }, + { + "epoch": 1.2381599287956153, + "grad_norm": 0.6870780662304427, + "learning_rate": 1.673158072147072e-06, + "loss": 0.2917, + "step": 26431 + }, + { + "epoch": 1.2382067737855436, + "grad_norm": 0.6771679653097166, + "learning_rate": 1.6729790973097155e-06, + "loss": 0.2747, + "step": 26432 + }, + { + "epoch": 1.238253618775472, + "grad_norm": 0.5707536548242311, + "learning_rate": 1.6728001272316576e-06, + "loss": 0.2722, + "step": 26433 + }, + { + "epoch": 1.2383004637654003, + "grad_norm": 0.5499179508807938, + "learning_rate": 1.672621161913926e-06, + "loss": 0.2552, + "step": 26434 + }, + { + "epoch": 1.2383473087553285, + "grad_norm": 0.5610635223355652, + "learning_rate": 1.672442201357552e-06, + "loss": 0.2579, + "step": 26435 + }, + { + "epoch": 1.238394153745257, + "grad_norm": 0.6179879659427732, + "learning_rate": 1.672263245563565e-06, + "loss": 0.272, + "step": 26436 + }, + { + "epoch": 1.2384409987351852, + "grad_norm": 0.5707802842803962, + "learning_rate": 1.672084294532995e-06, + "loss": 0.2641, + "step": 26437 + }, + { + "epoch": 1.2384878437251137, + "grad_norm": 0.62588192846847, + "learning_rate": 1.6719053482668719e-06, + "loss": 0.2882, + "step": 26438 + }, + { + "epoch": 1.238534688715042, + "grad_norm": 0.5779227750407611, + "learning_rate": 1.6717264067662259e-06, + "loss": 0.2539, + "step": 26439 + }, + { + "epoch": 1.2385815337049704, + "grad_norm": 0.6066229158398208, + "learning_rate": 1.6715474700320852e-06, + "loss": 0.2778, + "step": 26440 + }, + { + "epoch": 1.2386283786948986, + "grad_norm": 0.5694404441765808, + "learning_rate": 1.6713685380654812e-06, + "loss": 0.2586, + "step": 26441 + }, + { + "epoch": 1.2386752236848269, + "grad_norm": 0.6043346191008053, + "learning_rate": 1.6711896108674424e-06, + "loss": 0.2823, + "step": 26442 + }, + { + "epoch": 1.2387220686747553, + "grad_norm": 0.6180567571899753, + "learning_rate": 1.6710106884389994e-06, + "loss": 0.2862, + "step": 26443 + }, + { + "epoch": 1.2387689136646836, + "grad_norm": 0.6022168794662826, + "learning_rate": 1.6708317707811816e-06, + "loss": 0.2644, + "step": 26444 + }, + { + "epoch": 1.2388157586546118, + "grad_norm": 0.6126247296051053, + "learning_rate": 1.6706528578950196e-06, + "loss": 0.2798, + "step": 26445 + }, + { + "epoch": 1.2388626036445403, + "grad_norm": 0.570665855105203, + "learning_rate": 1.6704739497815406e-06, + "loss": 0.2653, + "step": 26446 + }, + { + "epoch": 1.2389094486344685, + "grad_norm": 0.6142561160637651, + "learning_rate": 1.6702950464417755e-06, + "loss": 0.2953, + "step": 26447 + }, + { + "epoch": 1.238956293624397, + "grad_norm": 0.6239113679160359, + "learning_rate": 1.6701161478767546e-06, + "loss": 0.2744, + "step": 26448 + }, + { + "epoch": 1.2390031386143252, + "grad_norm": 0.601335717266593, + "learning_rate": 1.6699372540875058e-06, + "loss": 0.278, + "step": 26449 + }, + { + "epoch": 1.2390499836042534, + "grad_norm": 0.6003270974500481, + "learning_rate": 1.6697583650750598e-06, + "loss": 0.2983, + "step": 26450 + }, + { + "epoch": 1.239096828594182, + "grad_norm": 0.5539494872938953, + "learning_rate": 1.6695794808404466e-06, + "loss": 0.2654, + "step": 26451 + }, + { + "epoch": 1.2391436735841102, + "grad_norm": 0.6111380802464241, + "learning_rate": 1.6694006013846944e-06, + "loss": 0.2856, + "step": 26452 + }, + { + "epoch": 1.2391905185740386, + "grad_norm": 0.5930078072574066, + "learning_rate": 1.6692217267088324e-06, + "loss": 0.271, + "step": 26453 + }, + { + "epoch": 1.2392373635639669, + "grad_norm": 0.6011684860570732, + "learning_rate": 1.6690428568138905e-06, + "loss": 0.2776, + "step": 26454 + }, + { + "epoch": 1.239284208553895, + "grad_norm": 0.6028216503589803, + "learning_rate": 1.6688639917008985e-06, + "loss": 0.2706, + "step": 26455 + }, + { + "epoch": 1.2393310535438236, + "grad_norm": 0.5762501568635213, + "learning_rate": 1.6686851313708852e-06, + "loss": 0.2522, + "step": 26456 + }, + { + "epoch": 1.2393778985337518, + "grad_norm": 0.6146765508899776, + "learning_rate": 1.6685062758248798e-06, + "loss": 0.2948, + "step": 26457 + }, + { + "epoch": 1.23942474352368, + "grad_norm": 0.5642553031153273, + "learning_rate": 1.668327425063913e-06, + "loss": 0.2622, + "step": 26458 + }, + { + "epoch": 1.2394715885136085, + "grad_norm": 0.6348481678207957, + "learning_rate": 1.6681485790890118e-06, + "loss": 0.275, + "step": 26459 + }, + { + "epoch": 1.2395184335035367, + "grad_norm": 0.6011191968840769, + "learning_rate": 1.6679697379012064e-06, + "loss": 0.278, + "step": 26460 + }, + { + "epoch": 1.2395652784934652, + "grad_norm": 0.5902488388328002, + "learning_rate": 1.667790901501526e-06, + "loss": 0.2716, + "step": 26461 + }, + { + "epoch": 1.2396121234833934, + "grad_norm": 0.6376326692108002, + "learning_rate": 1.6676120698910003e-06, + "loss": 0.2727, + "step": 26462 + }, + { + "epoch": 1.2396589684733217, + "grad_norm": 0.5493453274892772, + "learning_rate": 1.6674332430706573e-06, + "loss": 0.2701, + "step": 26463 + }, + { + "epoch": 1.2397058134632502, + "grad_norm": 0.619377854101626, + "learning_rate": 1.6672544210415275e-06, + "loss": 0.2835, + "step": 26464 + }, + { + "epoch": 1.2397526584531784, + "grad_norm": 0.58866997015956, + "learning_rate": 1.6670756038046387e-06, + "loss": 0.2804, + "step": 26465 + }, + { + "epoch": 1.2397995034431069, + "grad_norm": 0.5780484025103778, + "learning_rate": 1.6668967913610207e-06, + "loss": 0.2793, + "step": 26466 + }, + { + "epoch": 1.239846348433035, + "grad_norm": 0.6534676417359663, + "learning_rate": 1.6667179837117018e-06, + "loss": 0.2972, + "step": 26467 + }, + { + "epoch": 1.2398931934229633, + "grad_norm": 0.6035325705002454, + "learning_rate": 1.6665391808577114e-06, + "loss": 0.2629, + "step": 26468 + }, + { + "epoch": 1.2399400384128918, + "grad_norm": 0.5820998575534526, + "learning_rate": 1.666360382800079e-06, + "loss": 0.2745, + "step": 26469 + }, + { + "epoch": 1.23998688340282, + "grad_norm": 0.5987101101080522, + "learning_rate": 1.6661815895398336e-06, + "loss": 0.2768, + "step": 26470 + }, + { + "epoch": 1.2400337283927483, + "grad_norm": 0.6154606345865574, + "learning_rate": 1.6660028010780027e-06, + "loss": 0.286, + "step": 26471 + }, + { + "epoch": 1.2400805733826767, + "grad_norm": 0.6862681734499625, + "learning_rate": 1.665824017415616e-06, + "loss": 0.2897, + "step": 26472 + }, + { + "epoch": 1.240127418372605, + "grad_norm": 0.5883989741164033, + "learning_rate": 1.665645238553703e-06, + "loss": 0.2655, + "step": 26473 + }, + { + "epoch": 1.2401742633625334, + "grad_norm": 0.6018555900287534, + "learning_rate": 1.6654664644932913e-06, + "loss": 0.2764, + "step": 26474 + }, + { + "epoch": 1.2402211083524617, + "grad_norm": 0.590894408936874, + "learning_rate": 1.6652876952354111e-06, + "loss": 0.2755, + "step": 26475 + }, + { + "epoch": 1.2402679533423902, + "grad_norm": 0.5997851107352646, + "learning_rate": 1.6651089307810903e-06, + "loss": 0.2654, + "step": 26476 + }, + { + "epoch": 1.2403147983323184, + "grad_norm": 0.5808775019948442, + "learning_rate": 1.6649301711313577e-06, + "loss": 0.2525, + "step": 26477 + }, + { + "epoch": 1.2403616433222466, + "grad_norm": 0.5559148026689165, + "learning_rate": 1.6647514162872414e-06, + "loss": 0.2414, + "step": 26478 + }, + { + "epoch": 1.240408488312175, + "grad_norm": 0.589908527585212, + "learning_rate": 1.6645726662497712e-06, + "loss": 0.2712, + "step": 26479 + }, + { + "epoch": 1.2404553333021033, + "grad_norm": 0.5927970918635002, + "learning_rate": 1.6643939210199753e-06, + "loss": 0.2714, + "step": 26480 + }, + { + "epoch": 1.2405021782920316, + "grad_norm": 0.6248719780943746, + "learning_rate": 1.664215180598882e-06, + "loss": 0.2851, + "step": 26481 + }, + { + "epoch": 1.24054902328196, + "grad_norm": 0.592659818194347, + "learning_rate": 1.6640364449875215e-06, + "loss": 0.2842, + "step": 26482 + }, + { + "epoch": 1.2405958682718883, + "grad_norm": 0.5605697821693392, + "learning_rate": 1.6638577141869205e-06, + "loss": 0.2609, + "step": 26483 + }, + { + "epoch": 1.2406427132618167, + "grad_norm": 0.5578907679880034, + "learning_rate": 1.663678988198108e-06, + "loss": 0.2598, + "step": 26484 + }, + { + "epoch": 1.240689558251745, + "grad_norm": 0.6226755645934163, + "learning_rate": 1.6635002670221123e-06, + "loss": 0.2833, + "step": 26485 + }, + { + "epoch": 1.2407364032416732, + "grad_norm": 0.6516597225884981, + "learning_rate": 1.6633215506599632e-06, + "loss": 0.2842, + "step": 26486 + }, + { + "epoch": 1.2407832482316017, + "grad_norm": 0.628830204891315, + "learning_rate": 1.6631428391126874e-06, + "loss": 0.2725, + "step": 26487 + }, + { + "epoch": 1.24083009322153, + "grad_norm": 0.6126424666957655, + "learning_rate": 1.6629641323813153e-06, + "loss": 0.2781, + "step": 26488 + }, + { + "epoch": 1.2408769382114584, + "grad_norm": 0.5573507553231827, + "learning_rate": 1.6627854304668734e-06, + "loss": 0.2668, + "step": 26489 + }, + { + "epoch": 1.2409237832013866, + "grad_norm": 0.5877718302478039, + "learning_rate": 1.6626067333703914e-06, + "loss": 0.2769, + "step": 26490 + }, + { + "epoch": 1.2409706281913149, + "grad_norm": 0.6252564796964443, + "learning_rate": 1.6624280410928968e-06, + "loss": 0.2753, + "step": 26491 + }, + { + "epoch": 1.2410174731812433, + "grad_norm": 0.6470987472915976, + "learning_rate": 1.6622493536354178e-06, + "loss": 0.2859, + "step": 26492 + }, + { + "epoch": 1.2410643181711716, + "grad_norm": 0.634203082184924, + "learning_rate": 1.6620706709989841e-06, + "loss": 0.2819, + "step": 26493 + }, + { + "epoch": 1.2411111631610998, + "grad_norm": 0.5593898147785155, + "learning_rate": 1.6618919931846235e-06, + "loss": 0.2652, + "step": 26494 + }, + { + "epoch": 1.2411580081510283, + "grad_norm": 0.5616340803313749, + "learning_rate": 1.661713320193363e-06, + "loss": 0.2729, + "step": 26495 + }, + { + "epoch": 1.2412048531409565, + "grad_norm": 0.5731804151176094, + "learning_rate": 1.6615346520262316e-06, + "loss": 0.266, + "step": 26496 + }, + { + "epoch": 1.241251698130885, + "grad_norm": 0.6093263592069005, + "learning_rate": 1.6613559886842579e-06, + "loss": 0.2776, + "step": 26497 + }, + { + "epoch": 1.2412985431208132, + "grad_norm": 0.5996395338707153, + "learning_rate": 1.6611773301684691e-06, + "loss": 0.2824, + "step": 26498 + }, + { + "epoch": 1.2413453881107415, + "grad_norm": 0.5975702613329424, + "learning_rate": 1.660998676479894e-06, + "loss": 0.2844, + "step": 26499 + }, + { + "epoch": 1.24139223310067, + "grad_norm": 0.61211947165928, + "learning_rate": 1.6608200276195618e-06, + "loss": 0.2761, + "step": 26500 + }, + { + "epoch": 1.2414390780905982, + "grad_norm": 0.6281406633754532, + "learning_rate": 1.6606413835884987e-06, + "loss": 0.2778, + "step": 26501 + }, + { + "epoch": 1.2414859230805266, + "grad_norm": 0.6179825510467366, + "learning_rate": 1.6604627443877333e-06, + "loss": 0.2688, + "step": 26502 + }, + { + "epoch": 1.2415327680704549, + "grad_norm": 0.6390426943165821, + "learning_rate": 1.6602841100182938e-06, + "loss": 0.2768, + "step": 26503 + }, + { + "epoch": 1.2415796130603831, + "grad_norm": 0.5880642823561965, + "learning_rate": 1.6601054804812083e-06, + "loss": 0.2814, + "step": 26504 + }, + { + "epoch": 1.2416264580503116, + "grad_norm": 0.6207873174226639, + "learning_rate": 1.6599268557775046e-06, + "loss": 0.2809, + "step": 26505 + }, + { + "epoch": 1.2416733030402398, + "grad_norm": 0.5696618003647514, + "learning_rate": 1.6597482359082113e-06, + "loss": 0.2686, + "step": 26506 + }, + { + "epoch": 1.241720148030168, + "grad_norm": 0.5912930318444389, + "learning_rate": 1.659569620874355e-06, + "loss": 0.2838, + "step": 26507 + }, + { + "epoch": 1.2417669930200965, + "grad_norm": 0.5886927794559996, + "learning_rate": 1.6593910106769646e-06, + "loss": 0.2673, + "step": 26508 + }, + { + "epoch": 1.2418138380100248, + "grad_norm": 0.6457413206138136, + "learning_rate": 1.6592124053170672e-06, + "loss": 0.2819, + "step": 26509 + }, + { + "epoch": 1.2418606829999532, + "grad_norm": 0.6254505129720371, + "learning_rate": 1.659033804795691e-06, + "loss": 0.2734, + "step": 26510 + }, + { + "epoch": 1.2419075279898815, + "grad_norm": 0.5555186535842428, + "learning_rate": 1.6588552091138643e-06, + "loss": 0.2542, + "step": 26511 + }, + { + "epoch": 1.24195437297981, + "grad_norm": 0.5748889244584475, + "learning_rate": 1.6586766182726154e-06, + "loss": 0.267, + "step": 26512 + }, + { + "epoch": 1.2420012179697382, + "grad_norm": 0.5624362041833648, + "learning_rate": 1.6584980322729699e-06, + "loss": 0.2568, + "step": 26513 + }, + { + "epoch": 1.2420480629596664, + "grad_norm": 0.63174375917913, + "learning_rate": 1.6583194511159567e-06, + "loss": 0.2694, + "step": 26514 + }, + { + "epoch": 1.2420949079495949, + "grad_norm": 0.6334999199709153, + "learning_rate": 1.658140874802604e-06, + "loss": 0.2915, + "step": 26515 + }, + { + "epoch": 1.2421417529395231, + "grad_norm": 0.571593407722221, + "learning_rate": 1.657962303333938e-06, + "loss": 0.2597, + "step": 26516 + }, + { + "epoch": 1.2421885979294514, + "grad_norm": 0.6158797075897169, + "learning_rate": 1.6577837367109881e-06, + "loss": 0.2804, + "step": 26517 + }, + { + "epoch": 1.2422354429193798, + "grad_norm": 0.6023157250596124, + "learning_rate": 1.657605174934782e-06, + "loss": 0.2892, + "step": 26518 + }, + { + "epoch": 1.242282287909308, + "grad_norm": 0.6375554966019789, + "learning_rate": 1.6574266180063453e-06, + "loss": 0.2979, + "step": 26519 + }, + { + "epoch": 1.2423291328992365, + "grad_norm": 0.5786335231886988, + "learning_rate": 1.6572480659267065e-06, + "loss": 0.2702, + "step": 26520 + }, + { + "epoch": 1.2423759778891648, + "grad_norm": 0.601686943411658, + "learning_rate": 1.6570695186968933e-06, + "loss": 0.2836, + "step": 26521 + }, + { + "epoch": 1.242422822879093, + "grad_norm": 0.6210560470494257, + "learning_rate": 1.6568909763179337e-06, + "loss": 0.268, + "step": 26522 + }, + { + "epoch": 1.2424696678690215, + "grad_norm": 0.6230087621469463, + "learning_rate": 1.6567124387908539e-06, + "loss": 0.2637, + "step": 26523 + }, + { + "epoch": 1.2425165128589497, + "grad_norm": 0.5512627203422857, + "learning_rate": 1.6565339061166836e-06, + "loss": 0.265, + "step": 26524 + }, + { + "epoch": 1.2425633578488782, + "grad_norm": 0.5933377309754797, + "learning_rate": 1.656355378296447e-06, + "loss": 0.2856, + "step": 26525 + }, + { + "epoch": 1.2426102028388064, + "grad_norm": 0.5992200777975195, + "learning_rate": 1.6561768553311741e-06, + "loss": 0.2803, + "step": 26526 + }, + { + "epoch": 1.2426570478287347, + "grad_norm": 0.6030443734385681, + "learning_rate": 1.6559983372218908e-06, + "loss": 0.2987, + "step": 26527 + }, + { + "epoch": 1.2427038928186631, + "grad_norm": 0.6006201473473919, + "learning_rate": 1.655819823969625e-06, + "loss": 0.277, + "step": 26528 + }, + { + "epoch": 1.2427507378085914, + "grad_norm": 0.6221192371841475, + "learning_rate": 1.655641315575404e-06, + "loss": 0.2823, + "step": 26529 + }, + { + "epoch": 1.2427975827985196, + "grad_norm": 0.6242757228093694, + "learning_rate": 1.6554628120402545e-06, + "loss": 0.2946, + "step": 26530 + }, + { + "epoch": 1.242844427788448, + "grad_norm": 0.5615688242065827, + "learning_rate": 1.6552843133652059e-06, + "loss": 0.2519, + "step": 26531 + }, + { + "epoch": 1.2428912727783763, + "grad_norm": 0.6184684261658058, + "learning_rate": 1.6551058195512826e-06, + "loss": 0.2894, + "step": 26532 + }, + { + "epoch": 1.2429381177683048, + "grad_norm": 0.6669026362971541, + "learning_rate": 1.6549273305995128e-06, + "loss": 0.2951, + "step": 26533 + }, + { + "epoch": 1.242984962758233, + "grad_norm": 0.6164104216251582, + "learning_rate": 1.6547488465109235e-06, + "loss": 0.2765, + "step": 26534 + }, + { + "epoch": 1.2430318077481612, + "grad_norm": 0.6146795666162386, + "learning_rate": 1.654570367286543e-06, + "loss": 0.2869, + "step": 26535 + }, + { + "epoch": 1.2430786527380897, + "grad_norm": 0.6209465535189522, + "learning_rate": 1.6543918929273967e-06, + "loss": 0.2742, + "step": 26536 + }, + { + "epoch": 1.243125497728018, + "grad_norm": 0.5280141376245514, + "learning_rate": 1.654213423434514e-06, + "loss": 0.2534, + "step": 26537 + }, + { + "epoch": 1.2431723427179464, + "grad_norm": 0.5786237324079017, + "learning_rate": 1.654034958808919e-06, + "loss": 0.2796, + "step": 26538 + }, + { + "epoch": 1.2432191877078747, + "grad_norm": 0.5646089759130284, + "learning_rate": 1.6538564990516409e-06, + "loss": 0.2669, + "step": 26539 + }, + { + "epoch": 1.243266032697803, + "grad_norm": 0.5903971344670423, + "learning_rate": 1.6536780441637053e-06, + "loss": 0.2767, + "step": 26540 + }, + { + "epoch": 1.2433128776877314, + "grad_norm": 0.5760525034875109, + "learning_rate": 1.6534995941461402e-06, + "loss": 0.2815, + "step": 26541 + }, + { + "epoch": 1.2433597226776596, + "grad_norm": 0.6116575729956267, + "learning_rate": 1.6533211489999723e-06, + "loss": 0.2816, + "step": 26542 + }, + { + "epoch": 1.2434065676675878, + "grad_norm": 0.5835730922433223, + "learning_rate": 1.6531427087262291e-06, + "loss": 0.2711, + "step": 26543 + }, + { + "epoch": 1.2434534126575163, + "grad_norm": 0.5975873009727739, + "learning_rate": 1.652964273325936e-06, + "loss": 0.277, + "step": 26544 + }, + { + "epoch": 1.2435002576474445, + "grad_norm": 0.5722886219326597, + "learning_rate": 1.6527858428001204e-06, + "loss": 0.2809, + "step": 26545 + }, + { + "epoch": 1.243547102637373, + "grad_norm": 0.5763904039514222, + "learning_rate": 1.6526074171498096e-06, + "loss": 0.2792, + "step": 26546 + }, + { + "epoch": 1.2435939476273012, + "grad_norm": 0.6279466506463264, + "learning_rate": 1.6524289963760299e-06, + "loss": 0.2915, + "step": 26547 + }, + { + "epoch": 1.2436407926172297, + "grad_norm": 0.5986602324772659, + "learning_rate": 1.6522505804798079e-06, + "loss": 0.285, + "step": 26548 + }, + { + "epoch": 1.243687637607158, + "grad_norm": 0.5423281986562644, + "learning_rate": 1.6520721694621723e-06, + "loss": 0.2578, + "step": 26549 + }, + { + "epoch": 1.2437344825970862, + "grad_norm": 0.6161857519298684, + "learning_rate": 1.6518937633241472e-06, + "loss": 0.2708, + "step": 26550 + }, + { + "epoch": 1.2437813275870147, + "grad_norm": 0.6320484547224491, + "learning_rate": 1.6517153620667602e-06, + "loss": 0.29, + "step": 26551 + }, + { + "epoch": 1.243828172576943, + "grad_norm": 0.5927530605543513, + "learning_rate": 1.651536965691038e-06, + "loss": 0.2679, + "step": 26552 + }, + { + "epoch": 1.2438750175668711, + "grad_norm": 0.5814920035570835, + "learning_rate": 1.6513585741980076e-06, + "loss": 0.2597, + "step": 26553 + }, + { + "epoch": 1.2439218625567996, + "grad_norm": 0.599057708141295, + "learning_rate": 1.651180187588695e-06, + "loss": 0.2684, + "step": 26554 + }, + { + "epoch": 1.2439687075467278, + "grad_norm": 0.5899202056856009, + "learning_rate": 1.651001805864128e-06, + "loss": 0.27, + "step": 26555 + }, + { + "epoch": 1.2440155525366563, + "grad_norm": 0.5925191887889616, + "learning_rate": 1.6508234290253317e-06, + "loss": 0.2746, + "step": 26556 + }, + { + "epoch": 1.2440623975265845, + "grad_norm": 0.5915672859245196, + "learning_rate": 1.650645057073333e-06, + "loss": 0.2721, + "step": 26557 + }, + { + "epoch": 1.2441092425165128, + "grad_norm": 0.6260366866981187, + "learning_rate": 1.6504666900091581e-06, + "loss": 0.2668, + "step": 26558 + }, + { + "epoch": 1.2441560875064412, + "grad_norm": 0.6170816810360058, + "learning_rate": 1.6502883278338345e-06, + "loss": 0.2795, + "step": 26559 + }, + { + "epoch": 1.2442029324963695, + "grad_norm": 0.5801601096961616, + "learning_rate": 1.6501099705483878e-06, + "loss": 0.2673, + "step": 26560 + }, + { + "epoch": 1.244249777486298, + "grad_norm": 0.5898429692659741, + "learning_rate": 1.6499316181538455e-06, + "loss": 0.2603, + "step": 26561 + }, + { + "epoch": 1.2442966224762262, + "grad_norm": 0.5860901540171377, + "learning_rate": 1.6497532706512317e-06, + "loss": 0.2821, + "step": 26562 + }, + { + "epoch": 1.2443434674661544, + "grad_norm": 0.5735624438926276, + "learning_rate": 1.6495749280415747e-06, + "loss": 0.2787, + "step": 26563 + }, + { + "epoch": 1.244390312456083, + "grad_norm": 0.6123420223005672, + "learning_rate": 1.6493965903259001e-06, + "loss": 0.2793, + "step": 26564 + }, + { + "epoch": 1.2444371574460111, + "grad_norm": 0.6085145981321638, + "learning_rate": 1.649218257505234e-06, + "loss": 0.2792, + "step": 26565 + }, + { + "epoch": 1.2444840024359394, + "grad_norm": 0.6099540658704349, + "learning_rate": 1.6490399295806037e-06, + "loss": 0.2931, + "step": 26566 + }, + { + "epoch": 1.2445308474258678, + "grad_norm": 0.607362017580814, + "learning_rate": 1.6488616065530347e-06, + "loss": 0.2647, + "step": 26567 + }, + { + "epoch": 1.244577692415796, + "grad_norm": 0.556342709007632, + "learning_rate": 1.6486832884235532e-06, + "loss": 0.2656, + "step": 26568 + }, + { + "epoch": 1.2446245374057245, + "grad_norm": 0.5806269338555009, + "learning_rate": 1.6485049751931848e-06, + "loss": 0.2805, + "step": 26569 + }, + { + "epoch": 1.2446713823956528, + "grad_norm": 0.5934351537368119, + "learning_rate": 1.6483266668629562e-06, + "loss": 0.2727, + "step": 26570 + }, + { + "epoch": 1.244718227385581, + "grad_norm": 0.6487630551604299, + "learning_rate": 1.6481483634338943e-06, + "loss": 0.2989, + "step": 26571 + }, + { + "epoch": 1.2447650723755095, + "grad_norm": 0.5732811439083588, + "learning_rate": 1.647970064907024e-06, + "loss": 0.2759, + "step": 26572 + }, + { + "epoch": 1.2448119173654377, + "grad_norm": 0.6435153782861751, + "learning_rate": 1.6477917712833724e-06, + "loss": 0.2833, + "step": 26573 + }, + { + "epoch": 1.2448587623553662, + "grad_norm": 0.5717508874324624, + "learning_rate": 1.6476134825639644e-06, + "loss": 0.2613, + "step": 26574 + }, + { + "epoch": 1.2449056073452944, + "grad_norm": 0.5871399383786923, + "learning_rate": 1.6474351987498271e-06, + "loss": 0.2721, + "step": 26575 + }, + { + "epoch": 1.2449524523352227, + "grad_norm": 0.5740437777583671, + "learning_rate": 1.6472569198419853e-06, + "loss": 0.2619, + "step": 26576 + }, + { + "epoch": 1.2449992973251511, + "grad_norm": 0.6204085267506632, + "learning_rate": 1.6470786458414658e-06, + "loss": 0.2882, + "step": 26577 + }, + { + "epoch": 1.2450461423150794, + "grad_norm": 0.6333245909220784, + "learning_rate": 1.6469003767492942e-06, + "loss": 0.2886, + "step": 26578 + }, + { + "epoch": 1.2450929873050076, + "grad_norm": 0.620059839452665, + "learning_rate": 1.6467221125664973e-06, + "loss": 0.2816, + "step": 26579 + }, + { + "epoch": 1.245139832294936, + "grad_norm": 0.56428140979336, + "learning_rate": 1.6465438532940991e-06, + "loss": 0.2622, + "step": 26580 + }, + { + "epoch": 1.2451866772848643, + "grad_norm": 0.5673916074086911, + "learning_rate": 1.6463655989331272e-06, + "loss": 0.2711, + "step": 26581 + }, + { + "epoch": 1.2452335222747928, + "grad_norm": 0.5865164867238811, + "learning_rate": 1.6461873494846064e-06, + "loss": 0.2958, + "step": 26582 + }, + { + "epoch": 1.245280367264721, + "grad_norm": 0.5857637455067297, + "learning_rate": 1.6460091049495627e-06, + "loss": 0.274, + "step": 26583 + }, + { + "epoch": 1.2453272122546495, + "grad_norm": 0.5814641427287514, + "learning_rate": 1.6458308653290224e-06, + "loss": 0.2609, + "step": 26584 + }, + { + "epoch": 1.2453740572445777, + "grad_norm": 0.6046060559529748, + "learning_rate": 1.6456526306240112e-06, + "loss": 0.2593, + "step": 26585 + }, + { + "epoch": 1.245420902234506, + "grad_norm": 0.5389781551008745, + "learning_rate": 1.6454744008355534e-06, + "loss": 0.2617, + "step": 26586 + }, + { + "epoch": 1.2454677472244344, + "grad_norm": 0.5348402497430808, + "learning_rate": 1.6452961759646758e-06, + "loss": 0.2645, + "step": 26587 + }, + { + "epoch": 1.2455145922143627, + "grad_norm": 0.6127618601022358, + "learning_rate": 1.6451179560124042e-06, + "loss": 0.2665, + "step": 26588 + }, + { + "epoch": 1.245561437204291, + "grad_norm": 0.5639542833337365, + "learning_rate": 1.6449397409797635e-06, + "loss": 0.2712, + "step": 26589 + }, + { + "epoch": 1.2456082821942194, + "grad_norm": 0.5552713954445767, + "learning_rate": 1.6447615308677794e-06, + "loss": 0.2581, + "step": 26590 + }, + { + "epoch": 1.2456551271841476, + "grad_norm": 0.6144505572673203, + "learning_rate": 1.6445833256774793e-06, + "loss": 0.2713, + "step": 26591 + }, + { + "epoch": 1.245701972174076, + "grad_norm": 0.6053291032367717, + "learning_rate": 1.6444051254098864e-06, + "loss": 0.2601, + "step": 26592 + }, + { + "epoch": 1.2457488171640043, + "grad_norm": 0.6133701847687304, + "learning_rate": 1.6442269300660264e-06, + "loss": 0.2892, + "step": 26593 + }, + { + "epoch": 1.2457956621539326, + "grad_norm": 0.6023432971536878, + "learning_rate": 1.6440487396469252e-06, + "loss": 0.2693, + "step": 26594 + }, + { + "epoch": 1.245842507143861, + "grad_norm": 0.5873114782079936, + "learning_rate": 1.643870554153609e-06, + "loss": 0.2752, + "step": 26595 + }, + { + "epoch": 1.2458893521337893, + "grad_norm": 0.6334412507471185, + "learning_rate": 1.6436923735871018e-06, + "loss": 0.2818, + "step": 26596 + }, + { + "epoch": 1.2459361971237177, + "grad_norm": 0.6001458295832749, + "learning_rate": 1.6435141979484304e-06, + "loss": 0.2711, + "step": 26597 + }, + { + "epoch": 1.245983042113646, + "grad_norm": 0.574310938780641, + "learning_rate": 1.6433360272386201e-06, + "loss": 0.2681, + "step": 26598 + }, + { + "epoch": 1.2460298871035742, + "grad_norm": 0.5679013832914848, + "learning_rate": 1.6431578614586952e-06, + "loss": 0.281, + "step": 26599 + }, + { + "epoch": 1.2460767320935027, + "grad_norm": 0.5503033393457739, + "learning_rate": 1.642979700609681e-06, + "loss": 0.2559, + "step": 26600 + }, + { + "epoch": 1.246123577083431, + "grad_norm": 0.5853752055924502, + "learning_rate": 1.6428015446926032e-06, + "loss": 0.2659, + "step": 26601 + }, + { + "epoch": 1.2461704220733592, + "grad_norm": 0.635187682029858, + "learning_rate": 1.6426233937084874e-06, + "loss": 0.2812, + "step": 26602 + }, + { + "epoch": 1.2462172670632876, + "grad_norm": 0.5429048030037332, + "learning_rate": 1.642445247658358e-06, + "loss": 0.2676, + "step": 26603 + }, + { + "epoch": 1.2462641120532159, + "grad_norm": 0.5903523045434331, + "learning_rate": 1.642267106543242e-06, + "loss": 0.2798, + "step": 26604 + }, + { + "epoch": 1.2463109570431443, + "grad_norm": 0.5781538054469004, + "learning_rate": 1.6420889703641618e-06, + "loss": 0.2692, + "step": 26605 + }, + { + "epoch": 1.2463578020330726, + "grad_norm": 0.5706750547107221, + "learning_rate": 1.641910839122145e-06, + "loss": 0.2661, + "step": 26606 + }, + { + "epoch": 1.2464046470230008, + "grad_norm": 0.6467969740731226, + "learning_rate": 1.6417327128182147e-06, + "loss": 0.2975, + "step": 26607 + }, + { + "epoch": 1.2464514920129293, + "grad_norm": 0.5995028462354318, + "learning_rate": 1.641554591453397e-06, + "loss": 0.2913, + "step": 26608 + }, + { + "epoch": 1.2464983370028575, + "grad_norm": 0.5631344753290587, + "learning_rate": 1.6413764750287174e-06, + "loss": 0.2607, + "step": 26609 + }, + { + "epoch": 1.246545181992786, + "grad_norm": 0.6433277052732054, + "learning_rate": 1.6411983635452005e-06, + "loss": 0.2753, + "step": 26610 + }, + { + "epoch": 1.2465920269827142, + "grad_norm": 0.5698166445272629, + "learning_rate": 1.6410202570038709e-06, + "loss": 0.2698, + "step": 26611 + }, + { + "epoch": 1.2466388719726424, + "grad_norm": 0.6321569166791494, + "learning_rate": 1.6408421554057533e-06, + "loss": 0.283, + "step": 26612 + }, + { + "epoch": 1.246685716962571, + "grad_norm": 0.6514958209814472, + "learning_rate": 1.6406640587518738e-06, + "loss": 0.2792, + "step": 26613 + }, + { + "epoch": 1.2467325619524992, + "grad_norm": 0.6266957640994492, + "learning_rate": 1.640485967043256e-06, + "loss": 0.2761, + "step": 26614 + }, + { + "epoch": 1.2467794069424274, + "grad_norm": 0.6032879361751603, + "learning_rate": 1.6403078802809263e-06, + "loss": 0.2633, + "step": 26615 + }, + { + "epoch": 1.2468262519323559, + "grad_norm": 0.5796697170360401, + "learning_rate": 1.6401297984659084e-06, + "loss": 0.285, + "step": 26616 + }, + { + "epoch": 1.246873096922284, + "grad_norm": 0.6048056356971854, + "learning_rate": 1.6399517215992276e-06, + "loss": 0.2778, + "step": 26617 + }, + { + "epoch": 1.2469199419122126, + "grad_norm": 0.5994502683760972, + "learning_rate": 1.6397736496819079e-06, + "loss": 0.2797, + "step": 26618 + }, + { + "epoch": 1.2469667869021408, + "grad_norm": 0.565316879579416, + "learning_rate": 1.6395955827149748e-06, + "loss": 0.2713, + "step": 26619 + }, + { + "epoch": 1.2470136318920693, + "grad_norm": 0.5929010945956854, + "learning_rate": 1.6394175206994529e-06, + "loss": 0.2928, + "step": 26620 + }, + { + "epoch": 1.2470604768819975, + "grad_norm": 0.6152483828030106, + "learning_rate": 1.6392394636363661e-06, + "loss": 0.2715, + "step": 26621 + }, + { + "epoch": 1.2471073218719257, + "grad_norm": 0.5561254245951586, + "learning_rate": 1.6390614115267416e-06, + "loss": 0.262, + "step": 26622 + }, + { + "epoch": 1.2471541668618542, + "grad_norm": 0.613165928601972, + "learning_rate": 1.6388833643716017e-06, + "loss": 0.2847, + "step": 26623 + }, + { + "epoch": 1.2472010118517824, + "grad_norm": 0.6260343133700619, + "learning_rate": 1.6387053221719707e-06, + "loss": 0.2816, + "step": 26624 + }, + { + "epoch": 1.2472478568417107, + "grad_norm": 0.6189527228778976, + "learning_rate": 1.6385272849288745e-06, + "loss": 0.2753, + "step": 26625 + }, + { + "epoch": 1.2472947018316392, + "grad_norm": 0.563457764864793, + "learning_rate": 1.6383492526433376e-06, + "loss": 0.2784, + "step": 26626 + }, + { + "epoch": 1.2473415468215674, + "grad_norm": 0.6575834524108674, + "learning_rate": 1.6381712253163835e-06, + "loss": 0.2885, + "step": 26627 + }, + { + "epoch": 1.2473883918114959, + "grad_norm": 0.6229082427632857, + "learning_rate": 1.6379932029490387e-06, + "loss": 0.2776, + "step": 26628 + }, + { + "epoch": 1.247435236801424, + "grad_norm": 0.5638043606553098, + "learning_rate": 1.6378151855423253e-06, + "loss": 0.2597, + "step": 26629 + }, + { + "epoch": 1.2474820817913523, + "grad_norm": 0.5691518769113706, + "learning_rate": 1.637637173097269e-06, + "loss": 0.2753, + "step": 26630 + }, + { + "epoch": 1.2475289267812808, + "grad_norm": 0.5478961017089012, + "learning_rate": 1.6374591656148936e-06, + "loss": 0.2633, + "step": 26631 + }, + { + "epoch": 1.247575771771209, + "grad_norm": 0.5950398830194596, + "learning_rate": 1.6372811630962244e-06, + "loss": 0.2778, + "step": 26632 + }, + { + "epoch": 1.2476226167611375, + "grad_norm": 0.5864995457034576, + "learning_rate": 1.6371031655422852e-06, + "loss": 0.2649, + "step": 26633 + }, + { + "epoch": 1.2476694617510657, + "grad_norm": 0.6264416945956975, + "learning_rate": 1.6369251729541013e-06, + "loss": 0.2884, + "step": 26634 + }, + { + "epoch": 1.247716306740994, + "grad_norm": 0.5889566639979913, + "learning_rate": 1.636747185332695e-06, + "loss": 0.2633, + "step": 26635 + }, + { + "epoch": 1.2477631517309224, + "grad_norm": 0.6003529859645343, + "learning_rate": 1.6365692026790913e-06, + "loss": 0.2757, + "step": 26636 + }, + { + "epoch": 1.2478099967208507, + "grad_norm": 0.6278968910329872, + "learning_rate": 1.6363912249943157e-06, + "loss": 0.2648, + "step": 26637 + }, + { + "epoch": 1.247856841710779, + "grad_norm": 0.6551056085047143, + "learning_rate": 1.6362132522793912e-06, + "loss": 0.2983, + "step": 26638 + }, + { + "epoch": 1.2479036867007074, + "grad_norm": 0.5537968923657777, + "learning_rate": 1.6360352845353417e-06, + "loss": 0.2717, + "step": 26639 + }, + { + "epoch": 1.2479505316906356, + "grad_norm": 0.5634089706467696, + "learning_rate": 1.6358573217631934e-06, + "loss": 0.267, + "step": 26640 + }, + { + "epoch": 1.247997376680564, + "grad_norm": 0.6110411701971121, + "learning_rate": 1.6356793639639687e-06, + "loss": 0.2715, + "step": 26641 + }, + { + "epoch": 1.2480442216704923, + "grad_norm": 0.5652690205083709, + "learning_rate": 1.6355014111386913e-06, + "loss": 0.2879, + "step": 26642 + }, + { + "epoch": 1.2480910666604206, + "grad_norm": 0.5838579545181272, + "learning_rate": 1.6353234632883858e-06, + "loss": 0.2761, + "step": 26643 + }, + { + "epoch": 1.248137911650349, + "grad_norm": 0.6376981438127219, + "learning_rate": 1.6351455204140773e-06, + "loss": 0.2913, + "step": 26644 + }, + { + "epoch": 1.2481847566402773, + "grad_norm": 0.6470744883378523, + "learning_rate": 1.6349675825167883e-06, + "loss": 0.2968, + "step": 26645 + }, + { + "epoch": 1.2482316016302057, + "grad_norm": 0.6343360353162558, + "learning_rate": 1.6347896495975445e-06, + "loss": 0.2717, + "step": 26646 + }, + { + "epoch": 1.248278446620134, + "grad_norm": 0.599524289580626, + "learning_rate": 1.6346117216573675e-06, + "loss": 0.2882, + "step": 26647 + }, + { + "epoch": 1.2483252916100622, + "grad_norm": 0.6639132396119541, + "learning_rate": 1.6344337986972836e-06, + "loss": 0.2799, + "step": 26648 + }, + { + "epoch": 1.2483721365999907, + "grad_norm": 0.6030881347008181, + "learning_rate": 1.6342558807183149e-06, + "loss": 0.2656, + "step": 26649 + }, + { + "epoch": 1.248418981589919, + "grad_norm": 0.6072610807830768, + "learning_rate": 1.6340779677214857e-06, + "loss": 0.299, + "step": 26650 + }, + { + "epoch": 1.2484658265798472, + "grad_norm": 0.6577090455590741, + "learning_rate": 1.6339000597078208e-06, + "loss": 0.2855, + "step": 26651 + }, + { + "epoch": 1.2485126715697756, + "grad_norm": 0.6308432486329001, + "learning_rate": 1.6337221566783443e-06, + "loss": 0.2888, + "step": 26652 + }, + { + "epoch": 1.2485595165597039, + "grad_norm": 0.6349830872049945, + "learning_rate": 1.6335442586340778e-06, + "loss": 0.2987, + "step": 26653 + }, + { + "epoch": 1.2486063615496323, + "grad_norm": 0.5820950694222213, + "learning_rate": 1.6333663655760463e-06, + "loss": 0.2803, + "step": 26654 + }, + { + "epoch": 1.2486532065395606, + "grad_norm": 0.5880985729067402, + "learning_rate": 1.6331884775052737e-06, + "loss": 0.2808, + "step": 26655 + }, + { + "epoch": 1.248700051529489, + "grad_norm": 0.6102229036875075, + "learning_rate": 1.6330105944227833e-06, + "loss": 0.2809, + "step": 26656 + }, + { + "epoch": 1.2487468965194173, + "grad_norm": 0.6117717525550262, + "learning_rate": 1.6328327163295993e-06, + "loss": 0.2756, + "step": 26657 + }, + { + "epoch": 1.2487937415093455, + "grad_norm": 0.5600163797197251, + "learning_rate": 1.6326548432267459e-06, + "loss": 0.2683, + "step": 26658 + }, + { + "epoch": 1.248840586499274, + "grad_norm": 0.5960320597906225, + "learning_rate": 1.632476975115245e-06, + "loss": 0.2748, + "step": 26659 + }, + { + "epoch": 1.2488874314892022, + "grad_norm": 0.6480342319852795, + "learning_rate": 1.632299111996121e-06, + "loss": 0.2869, + "step": 26660 + }, + { + "epoch": 1.2489342764791305, + "grad_norm": 0.6374315455744781, + "learning_rate": 1.6321212538703974e-06, + "loss": 0.2637, + "step": 26661 + }, + { + "epoch": 1.248981121469059, + "grad_norm": 0.5939837729489464, + "learning_rate": 1.6319434007390984e-06, + "loss": 0.2599, + "step": 26662 + }, + { + "epoch": 1.2490279664589872, + "grad_norm": 0.603753813746966, + "learning_rate": 1.631765552603246e-06, + "loss": 0.2787, + "step": 26663 + }, + { + "epoch": 1.2490748114489156, + "grad_norm": 0.5853008961354903, + "learning_rate": 1.6315877094638657e-06, + "loss": 0.2633, + "step": 26664 + }, + { + "epoch": 1.2491216564388439, + "grad_norm": 0.6171864586483963, + "learning_rate": 1.6314098713219795e-06, + "loss": 0.2828, + "step": 26665 + }, + { + "epoch": 1.2491685014287721, + "grad_norm": 0.580579892899765, + "learning_rate": 1.6312320381786114e-06, + "loss": 0.2663, + "step": 26666 + }, + { + "epoch": 1.2492153464187006, + "grad_norm": 0.5603768817658312, + "learning_rate": 1.6310542100347843e-06, + "loss": 0.2649, + "step": 26667 + }, + { + "epoch": 1.2492621914086288, + "grad_norm": 0.6037717594843576, + "learning_rate": 1.6308763868915217e-06, + "loss": 0.2909, + "step": 26668 + }, + { + "epoch": 1.2493090363985573, + "grad_norm": 0.5585064049874722, + "learning_rate": 1.6306985687498471e-06, + "loss": 0.2662, + "step": 26669 + }, + { + "epoch": 1.2493558813884855, + "grad_norm": 0.5843684677556354, + "learning_rate": 1.6305207556107833e-06, + "loss": 0.27, + "step": 26670 + }, + { + "epoch": 1.2494027263784138, + "grad_norm": 0.5781029678720575, + "learning_rate": 1.6303429474753554e-06, + "loss": 0.2789, + "step": 26671 + }, + { + "epoch": 1.2494495713683422, + "grad_norm": 0.5634772621947921, + "learning_rate": 1.630165144344585e-06, + "loss": 0.2768, + "step": 26672 + }, + { + "epoch": 1.2494964163582705, + "grad_norm": 0.6082143183231203, + "learning_rate": 1.6299873462194943e-06, + "loss": 0.2661, + "step": 26673 + }, + { + "epoch": 1.2495432613481987, + "grad_norm": 0.5572955986363437, + "learning_rate": 1.6298095531011083e-06, + "loss": 0.2502, + "step": 26674 + }, + { + "epoch": 1.2495901063381272, + "grad_norm": 0.6057623158788906, + "learning_rate": 1.6296317649904497e-06, + "loss": 0.2755, + "step": 26675 + }, + { + "epoch": 1.2496369513280554, + "grad_norm": 0.5945231279502424, + "learning_rate": 1.6294539818885412e-06, + "loss": 0.2677, + "step": 26676 + }, + { + "epoch": 1.2496837963179839, + "grad_norm": 0.6211353821034926, + "learning_rate": 1.6292762037964075e-06, + "loss": 0.2679, + "step": 26677 + }, + { + "epoch": 1.2497306413079121, + "grad_norm": 0.6191062369786771, + "learning_rate": 1.6290984307150693e-06, + "loss": 0.2757, + "step": 26678 + }, + { + "epoch": 1.2497774862978404, + "grad_norm": 0.5266609235899757, + "learning_rate": 1.6289206626455509e-06, + "loss": 0.2494, + "step": 26679 + }, + { + "epoch": 1.2498243312877688, + "grad_norm": 0.5719597782413988, + "learning_rate": 1.628742899588875e-06, + "loss": 0.2518, + "step": 26680 + }, + { + "epoch": 1.249871176277697, + "grad_norm": 0.5419016048738616, + "learning_rate": 1.6285651415460646e-06, + "loss": 0.2565, + "step": 26681 + }, + { + "epoch": 1.2499180212676255, + "grad_norm": 0.5966062678578078, + "learning_rate": 1.6283873885181433e-06, + "loss": 0.2741, + "step": 26682 + }, + { + "epoch": 1.2499648662575538, + "grad_norm": 0.6167299440340818, + "learning_rate": 1.6282096405061337e-06, + "loss": 0.2828, + "step": 26683 + }, + { + "epoch": 1.250011711247482, + "grad_norm": 0.5890248738536739, + "learning_rate": 1.628031897511058e-06, + "loss": 0.2835, + "step": 26684 + }, + { + "epoch": 1.2500585562374105, + "grad_norm": 0.5817392905967184, + "learning_rate": 1.6278541595339392e-06, + "loss": 0.2794, + "step": 26685 + }, + { + "epoch": 1.2501054012273387, + "grad_norm": 0.6247251355626727, + "learning_rate": 1.6276764265758013e-06, + "loss": 0.2922, + "step": 26686 + }, + { + "epoch": 1.250152246217267, + "grad_norm": 0.588380885153067, + "learning_rate": 1.6274986986376657e-06, + "loss": 0.2705, + "step": 26687 + }, + { + "epoch": 1.2501990912071954, + "grad_norm": 0.5794675187405501, + "learning_rate": 1.627320975720556e-06, + "loss": 0.2688, + "step": 26688 + }, + { + "epoch": 1.2502459361971237, + "grad_norm": 0.6019586902927229, + "learning_rate": 1.6271432578254954e-06, + "loss": 0.2784, + "step": 26689 + }, + { + "epoch": 1.2502927811870521, + "grad_norm": 0.6091599098414455, + "learning_rate": 1.6269655449535054e-06, + "loss": 0.2737, + "step": 26690 + }, + { + "epoch": 1.2503396261769804, + "grad_norm": 0.5492233708880335, + "learning_rate": 1.626787837105609e-06, + "loss": 0.2674, + "step": 26691 + }, + { + "epoch": 1.2503864711669088, + "grad_norm": 0.5753977447200911, + "learning_rate": 1.6266101342828291e-06, + "loss": 0.2689, + "step": 26692 + }, + { + "epoch": 1.250433316156837, + "grad_norm": 0.5464744324445916, + "learning_rate": 1.6264324364861888e-06, + "loss": 0.2604, + "step": 26693 + }, + { + "epoch": 1.2504801611467653, + "grad_norm": 0.5965933404142181, + "learning_rate": 1.6262547437167098e-06, + "loss": 0.2587, + "step": 26694 + }, + { + "epoch": 1.2505270061366938, + "grad_norm": 0.5816907958792178, + "learning_rate": 1.6260770559754163e-06, + "loss": 0.2758, + "step": 26695 + }, + { + "epoch": 1.250573851126622, + "grad_norm": 0.6169149314901825, + "learning_rate": 1.6258993732633285e-06, + "loss": 0.2706, + "step": 26696 + }, + { + "epoch": 1.2506206961165502, + "grad_norm": 0.6147310130461697, + "learning_rate": 1.625721695581471e-06, + "loss": 0.2864, + "step": 26697 + }, + { + "epoch": 1.2506675411064787, + "grad_norm": 0.6303233493231356, + "learning_rate": 1.6255440229308644e-06, + "loss": 0.2847, + "step": 26698 + }, + { + "epoch": 1.250714386096407, + "grad_norm": 0.5767241739231682, + "learning_rate": 1.625366355312532e-06, + "loss": 0.2706, + "step": 26699 + }, + { + "epoch": 1.2507612310863352, + "grad_norm": 0.5398478364826595, + "learning_rate": 1.6251886927274973e-06, + "loss": 0.2568, + "step": 26700 + }, + { + "epoch": 1.2508080760762637, + "grad_norm": 0.6369468565643746, + "learning_rate": 1.6250110351767824e-06, + "loss": 0.2819, + "step": 26701 + }, + { + "epoch": 1.250854921066192, + "grad_norm": 0.6241387228339464, + "learning_rate": 1.6248333826614076e-06, + "loss": 0.2767, + "step": 26702 + }, + { + "epoch": 1.2509017660561204, + "grad_norm": 0.5856364861861707, + "learning_rate": 1.6246557351823972e-06, + "loss": 0.2832, + "step": 26703 + }, + { + "epoch": 1.2509486110460486, + "grad_norm": 0.5778533684697467, + "learning_rate": 1.6244780927407733e-06, + "loss": 0.2712, + "step": 26704 + }, + { + "epoch": 1.250995456035977, + "grad_norm": 0.6008739778161167, + "learning_rate": 1.6243004553375574e-06, + "loss": 0.29, + "step": 26705 + }, + { + "epoch": 1.2510423010259053, + "grad_norm": 0.6159892622931888, + "learning_rate": 1.624122822973773e-06, + "loss": 0.2707, + "step": 26706 + }, + { + "epoch": 1.2510891460158335, + "grad_norm": 0.6040560235808595, + "learning_rate": 1.6239451956504414e-06, + "loss": 0.2738, + "step": 26707 + }, + { + "epoch": 1.251135991005762, + "grad_norm": 0.6030764756487335, + "learning_rate": 1.6237675733685847e-06, + "loss": 0.2866, + "step": 26708 + }, + { + "epoch": 1.2511828359956902, + "grad_norm": 0.6206176752147863, + "learning_rate": 1.6235899561292257e-06, + "loss": 0.2744, + "step": 26709 + }, + { + "epoch": 1.2512296809856185, + "grad_norm": 0.5635634367706727, + "learning_rate": 1.623412343933386e-06, + "loss": 0.2779, + "step": 26710 + }, + { + "epoch": 1.251276525975547, + "grad_norm": 0.521375392159375, + "learning_rate": 1.6232347367820877e-06, + "loss": 0.2465, + "step": 26711 + }, + { + "epoch": 1.2513233709654752, + "grad_norm": 0.6342012732415093, + "learning_rate": 1.6230571346763532e-06, + "loss": 0.2988, + "step": 26712 + }, + { + "epoch": 1.2513702159554034, + "grad_norm": 0.6027857051634908, + "learning_rate": 1.6228795376172055e-06, + "loss": 0.2834, + "step": 26713 + }, + { + "epoch": 1.251417060945332, + "grad_norm": 0.5891968733006251, + "learning_rate": 1.6227019456056645e-06, + "loss": 0.2845, + "step": 26714 + }, + { + "epoch": 1.2514639059352604, + "grad_norm": 0.6169843544607497, + "learning_rate": 1.6225243586427536e-06, + "loss": 0.2926, + "step": 26715 + }, + { + "epoch": 1.2515107509251886, + "grad_norm": 0.6095386215177415, + "learning_rate": 1.6223467767294942e-06, + "loss": 0.2833, + "step": 26716 + }, + { + "epoch": 1.2515575959151168, + "grad_norm": 0.5517568311451316, + "learning_rate": 1.6221691998669093e-06, + "loss": 0.2522, + "step": 26717 + }, + { + "epoch": 1.2516044409050453, + "grad_norm": 0.6058858981466945, + "learning_rate": 1.6219916280560187e-06, + "loss": 0.2654, + "step": 26718 + }, + { + "epoch": 1.2516512858949735, + "grad_norm": 0.6108599383094158, + "learning_rate": 1.6218140612978474e-06, + "loss": 0.2927, + "step": 26719 + }, + { + "epoch": 1.2516981308849018, + "grad_norm": 0.6605954771111968, + "learning_rate": 1.6216364995934141e-06, + "loss": 0.2747, + "step": 26720 + }, + { + "epoch": 1.2517449758748302, + "grad_norm": 0.5945928043125636, + "learning_rate": 1.6214589429437427e-06, + "loss": 0.2767, + "step": 26721 + }, + { + "epoch": 1.2517918208647585, + "grad_norm": 0.6179257994999848, + "learning_rate": 1.6212813913498538e-06, + "loss": 0.2811, + "step": 26722 + }, + { + "epoch": 1.2518386658546867, + "grad_norm": 0.6261079298292525, + "learning_rate": 1.6211038448127692e-06, + "loss": 0.2974, + "step": 26723 + }, + { + "epoch": 1.2518855108446152, + "grad_norm": 0.6134073325113012, + "learning_rate": 1.6209263033335118e-06, + "loss": 0.2772, + "step": 26724 + }, + { + "epoch": 1.2519323558345434, + "grad_norm": 0.6040204809320304, + "learning_rate": 1.6207487669131033e-06, + "loss": 0.2823, + "step": 26725 + }, + { + "epoch": 1.251979200824472, + "grad_norm": 0.5986829279093879, + "learning_rate": 1.6205712355525632e-06, + "loss": 0.2839, + "step": 26726 + }, + { + "epoch": 1.2520260458144001, + "grad_norm": 0.6202822522264785, + "learning_rate": 1.6203937092529149e-06, + "loss": 0.296, + "step": 26727 + }, + { + "epoch": 1.2520728908043286, + "grad_norm": 0.6674831972586696, + "learning_rate": 1.6202161880151801e-06, + "loss": 0.2883, + "step": 26728 + }, + { + "epoch": 1.2521197357942568, + "grad_norm": 0.5674245334046565, + "learning_rate": 1.6200386718403799e-06, + "loss": 0.2615, + "step": 26729 + }, + { + "epoch": 1.252166580784185, + "grad_norm": 0.5536228922723304, + "learning_rate": 1.6198611607295356e-06, + "loss": 0.263, + "step": 26730 + }, + { + "epoch": 1.2522134257741135, + "grad_norm": 0.5854782898810496, + "learning_rate": 1.6196836546836696e-06, + "loss": 0.2722, + "step": 26731 + }, + { + "epoch": 1.2522602707640418, + "grad_norm": 0.6001120506610752, + "learning_rate": 1.6195061537038034e-06, + "loss": 0.2788, + "step": 26732 + }, + { + "epoch": 1.25230711575397, + "grad_norm": 0.5868088885102594, + "learning_rate": 1.6193286577909572e-06, + "loss": 0.2686, + "step": 26733 + }, + { + "epoch": 1.2523539607438985, + "grad_norm": 0.6263819438530257, + "learning_rate": 1.619151166946153e-06, + "loss": 0.2812, + "step": 26734 + }, + { + "epoch": 1.2524008057338267, + "grad_norm": 0.7119468612237051, + "learning_rate": 1.6189736811704129e-06, + "loss": 0.3161, + "step": 26735 + }, + { + "epoch": 1.252447650723755, + "grad_norm": 0.5274472452292958, + "learning_rate": 1.6187962004647575e-06, + "loss": 0.2543, + "step": 26736 + }, + { + "epoch": 1.2524944957136834, + "grad_norm": 0.6053217657185281, + "learning_rate": 1.6186187248302085e-06, + "loss": 0.2767, + "step": 26737 + }, + { + "epoch": 1.2525413407036117, + "grad_norm": 0.5926728347900428, + "learning_rate": 1.618441254267788e-06, + "loss": 0.2828, + "step": 26738 + }, + { + "epoch": 1.2525881856935401, + "grad_norm": 0.5795838801093278, + "learning_rate": 1.6182637887785162e-06, + "loss": 0.2601, + "step": 26739 + }, + { + "epoch": 1.2526350306834684, + "grad_norm": 0.5975931088555613, + "learning_rate": 1.6180863283634142e-06, + "loss": 0.2796, + "step": 26740 + }, + { + "epoch": 1.2526818756733968, + "grad_norm": 0.5735860297232533, + "learning_rate": 1.6179088730235038e-06, + "loss": 0.2681, + "step": 26741 + }, + { + "epoch": 1.252728720663325, + "grad_norm": 0.6373914271576372, + "learning_rate": 1.6177314227598062e-06, + "loss": 0.2916, + "step": 26742 + }, + { + "epoch": 1.2527755656532533, + "grad_norm": 0.5933042185093993, + "learning_rate": 1.6175539775733424e-06, + "loss": 0.2738, + "step": 26743 + }, + { + "epoch": 1.2528224106431818, + "grad_norm": 0.6428098920571426, + "learning_rate": 1.6173765374651345e-06, + "loss": 0.2963, + "step": 26744 + }, + { + "epoch": 1.25286925563311, + "grad_norm": 0.5664331994319846, + "learning_rate": 1.6171991024362021e-06, + "loss": 0.2619, + "step": 26745 + }, + { + "epoch": 1.2529161006230383, + "grad_norm": 0.6462415633324191, + "learning_rate": 1.617021672487567e-06, + "loss": 0.2943, + "step": 26746 + }, + { + "epoch": 1.2529629456129667, + "grad_norm": 0.6112037831063641, + "learning_rate": 1.61684424762025e-06, + "loss": 0.2619, + "step": 26747 + }, + { + "epoch": 1.253009790602895, + "grad_norm": 0.6123469628970891, + "learning_rate": 1.6166668278352727e-06, + "loss": 0.2785, + "step": 26748 + }, + { + "epoch": 1.2530566355928232, + "grad_norm": 0.5799562616961497, + "learning_rate": 1.6164894131336556e-06, + "loss": 0.2655, + "step": 26749 + }, + { + "epoch": 1.2531034805827517, + "grad_norm": 0.5814187720394466, + "learning_rate": 1.6163120035164209e-06, + "loss": 0.2668, + "step": 26750 + }, + { + "epoch": 1.2531503255726801, + "grad_norm": 0.5424982155354356, + "learning_rate": 1.6161345989845873e-06, + "loss": 0.2536, + "step": 26751 + }, + { + "epoch": 1.2531971705626084, + "grad_norm": 0.6039997098533636, + "learning_rate": 1.615957199539177e-06, + "loss": 0.2834, + "step": 26752 + }, + { + "epoch": 1.2532440155525366, + "grad_norm": 0.6183915205149891, + "learning_rate": 1.6157798051812112e-06, + "loss": 0.2854, + "step": 26753 + }, + { + "epoch": 1.253290860542465, + "grad_norm": 0.6300420988996912, + "learning_rate": 1.61560241591171e-06, + "loss": 0.2816, + "step": 26754 + }, + { + "epoch": 1.2533377055323933, + "grad_norm": 0.5708493870572419, + "learning_rate": 1.6154250317316949e-06, + "loss": 0.2536, + "step": 26755 + }, + { + "epoch": 1.2533845505223216, + "grad_norm": 0.6508955446324158, + "learning_rate": 1.6152476526421868e-06, + "loss": 0.285, + "step": 26756 + }, + { + "epoch": 1.25343139551225, + "grad_norm": 0.6044269203373461, + "learning_rate": 1.6150702786442058e-06, + "loss": 0.2773, + "step": 26757 + }, + { + "epoch": 1.2534782405021783, + "grad_norm": 0.5776851496835315, + "learning_rate": 1.6148929097387726e-06, + "loss": 0.275, + "step": 26758 + }, + { + "epoch": 1.2535250854921065, + "grad_norm": 0.6094671608768327, + "learning_rate": 1.6147155459269087e-06, + "loss": 0.2864, + "step": 26759 + }, + { + "epoch": 1.253571930482035, + "grad_norm": 0.5956124342917274, + "learning_rate": 1.6145381872096336e-06, + "loss": 0.2675, + "step": 26760 + }, + { + "epoch": 1.2536187754719632, + "grad_norm": 0.5995368930061736, + "learning_rate": 1.614360833587969e-06, + "loss": 0.2879, + "step": 26761 + }, + { + "epoch": 1.2536656204618917, + "grad_norm": 0.6495641068990862, + "learning_rate": 1.6141834850629359e-06, + "loss": 0.2773, + "step": 26762 + }, + { + "epoch": 1.25371246545182, + "grad_norm": 0.6301908440755764, + "learning_rate": 1.614006141635554e-06, + "loss": 0.2754, + "step": 26763 + }, + { + "epoch": 1.2537593104417484, + "grad_norm": 0.647671735530261, + "learning_rate": 1.6138288033068433e-06, + "loss": 0.2852, + "step": 26764 + }, + { + "epoch": 1.2538061554316766, + "grad_norm": 0.615680613173614, + "learning_rate": 1.6136514700778252e-06, + "loss": 0.2804, + "step": 26765 + }, + { + "epoch": 1.2538530004216049, + "grad_norm": 0.5652976570219843, + "learning_rate": 1.6134741419495205e-06, + "loss": 0.2694, + "step": 26766 + }, + { + "epoch": 1.2538998454115333, + "grad_norm": 0.578593360653583, + "learning_rate": 1.6132968189229493e-06, + "loss": 0.2587, + "step": 26767 + }, + { + "epoch": 1.2539466904014616, + "grad_norm": 0.5715143358444015, + "learning_rate": 1.6131195009991324e-06, + "loss": 0.2654, + "step": 26768 + }, + { + "epoch": 1.2539935353913898, + "grad_norm": 0.5606050390886781, + "learning_rate": 1.612942188179089e-06, + "loss": 0.2651, + "step": 26769 + }, + { + "epoch": 1.2540403803813183, + "grad_norm": 0.6442255040345787, + "learning_rate": 1.6127648804638413e-06, + "loss": 0.2903, + "step": 26770 + }, + { + "epoch": 1.2540872253712465, + "grad_norm": 0.5772537583462516, + "learning_rate": 1.6125875778544078e-06, + "loss": 0.2709, + "step": 26771 + }, + { + "epoch": 1.2541340703611747, + "grad_norm": 0.602629621451463, + "learning_rate": 1.6124102803518099e-06, + "loss": 0.2755, + "step": 26772 + }, + { + "epoch": 1.2541809153511032, + "grad_norm": 0.5670204682307204, + "learning_rate": 1.6122329879570682e-06, + "loss": 0.2602, + "step": 26773 + }, + { + "epoch": 1.2542277603410315, + "grad_norm": 0.5591836326759543, + "learning_rate": 1.612055700671203e-06, + "loss": 0.2691, + "step": 26774 + }, + { + "epoch": 1.25427460533096, + "grad_norm": 0.6273925406981343, + "learning_rate": 1.6118784184952329e-06, + "loss": 0.2865, + "step": 26775 + }, + { + "epoch": 1.2543214503208882, + "grad_norm": 0.6027832886812037, + "learning_rate": 1.6117011414301798e-06, + "loss": 0.2641, + "step": 26776 + }, + { + "epoch": 1.2543682953108166, + "grad_norm": 0.6184549534356087, + "learning_rate": 1.6115238694770636e-06, + "loss": 0.2631, + "step": 26777 + }, + { + "epoch": 1.2544151403007449, + "grad_norm": 0.5616924052196883, + "learning_rate": 1.6113466026369035e-06, + "loss": 0.2655, + "step": 26778 + }, + { + "epoch": 1.254461985290673, + "grad_norm": 0.5447026243750124, + "learning_rate": 1.6111693409107204e-06, + "loss": 0.2542, + "step": 26779 + }, + { + "epoch": 1.2545088302806016, + "grad_norm": 0.6710320597574914, + "learning_rate": 1.6109920842995356e-06, + "loss": 0.2719, + "step": 26780 + }, + { + "epoch": 1.2545556752705298, + "grad_norm": 0.6144206882479736, + "learning_rate": 1.6108148328043672e-06, + "loss": 0.2852, + "step": 26781 + }, + { + "epoch": 1.254602520260458, + "grad_norm": 0.5809527374783169, + "learning_rate": 1.6106375864262355e-06, + "loss": 0.265, + "step": 26782 + }, + { + "epoch": 1.2546493652503865, + "grad_norm": 0.5632280254859104, + "learning_rate": 1.610460345166161e-06, + "loss": 0.2764, + "step": 26783 + }, + { + "epoch": 1.2546962102403147, + "grad_norm": 0.6110769387466599, + "learning_rate": 1.6102831090251642e-06, + "loss": 0.2834, + "step": 26784 + }, + { + "epoch": 1.254743055230243, + "grad_norm": 0.6018229582523834, + "learning_rate": 1.610105878004264e-06, + "loss": 0.2762, + "step": 26785 + }, + { + "epoch": 1.2547899002201715, + "grad_norm": 0.6218114935303786, + "learning_rate": 1.6099286521044815e-06, + "loss": 0.2804, + "step": 26786 + }, + { + "epoch": 1.2548367452101, + "grad_norm": 0.5307792841768529, + "learning_rate": 1.6097514313268352e-06, + "loss": 0.2439, + "step": 26787 + }, + { + "epoch": 1.2548835902000282, + "grad_norm": 0.6360826265015406, + "learning_rate": 1.609574215672346e-06, + "loss": 0.2799, + "step": 26788 + }, + { + "epoch": 1.2549304351899564, + "grad_norm": 0.6041196815303643, + "learning_rate": 1.6093970051420329e-06, + "loss": 0.283, + "step": 26789 + }, + { + "epoch": 1.2549772801798849, + "grad_norm": 0.603768579650892, + "learning_rate": 1.6092197997369162e-06, + "loss": 0.2887, + "step": 26790 + }, + { + "epoch": 1.255024125169813, + "grad_norm": 0.5865271528862436, + "learning_rate": 1.6090425994580161e-06, + "loss": 0.2576, + "step": 26791 + }, + { + "epoch": 1.2550709701597413, + "grad_norm": 0.6229755120247242, + "learning_rate": 1.6088654043063528e-06, + "loss": 0.2611, + "step": 26792 + }, + { + "epoch": 1.2551178151496698, + "grad_norm": 0.6491925179322295, + "learning_rate": 1.6086882142829435e-06, + "loss": 0.2688, + "step": 26793 + }, + { + "epoch": 1.255164660139598, + "grad_norm": 0.5596730885479173, + "learning_rate": 1.6085110293888102e-06, + "loss": 0.2582, + "step": 26794 + }, + { + "epoch": 1.2552115051295263, + "grad_norm": 0.5659226627637672, + "learning_rate": 1.6083338496249718e-06, + "loss": 0.2532, + "step": 26795 + }, + { + "epoch": 1.2552583501194547, + "grad_norm": 0.5602914759908114, + "learning_rate": 1.6081566749924476e-06, + "loss": 0.2666, + "step": 26796 + }, + { + "epoch": 1.255305195109383, + "grad_norm": 0.5856598786264917, + "learning_rate": 1.6079795054922576e-06, + "loss": 0.276, + "step": 26797 + }, + { + "epoch": 1.2553520400993115, + "grad_norm": 0.5973521956309592, + "learning_rate": 1.6078023411254219e-06, + "loss": 0.2678, + "step": 26798 + }, + { + "epoch": 1.2553988850892397, + "grad_norm": 0.5939287903142628, + "learning_rate": 1.60762518189296e-06, + "loss": 0.2899, + "step": 26799 + }, + { + "epoch": 1.2554457300791682, + "grad_norm": 0.6211563804223321, + "learning_rate": 1.6074480277958898e-06, + "loss": 0.2947, + "step": 26800 + }, + { + "epoch": 1.2554925750690964, + "grad_norm": 0.6170120779868132, + "learning_rate": 1.607270878835232e-06, + "loss": 0.2654, + "step": 26801 + }, + { + "epoch": 1.2555394200590246, + "grad_norm": 0.6393006575334489, + "learning_rate": 1.6070937350120063e-06, + "loss": 0.2881, + "step": 26802 + }, + { + "epoch": 1.255586265048953, + "grad_norm": 0.6070227506168991, + "learning_rate": 1.6069165963272316e-06, + "loss": 0.2731, + "step": 26803 + }, + { + "epoch": 1.2556331100388813, + "grad_norm": 0.5638791213223312, + "learning_rate": 1.6067394627819272e-06, + "loss": 0.2645, + "step": 26804 + }, + { + "epoch": 1.2556799550288096, + "grad_norm": 0.5861673823121923, + "learning_rate": 1.606562334377113e-06, + "loss": 0.2712, + "step": 26805 + }, + { + "epoch": 1.255726800018738, + "grad_norm": 0.5992177639621173, + "learning_rate": 1.6063852111138084e-06, + "loss": 0.2778, + "step": 26806 + }, + { + "epoch": 1.2557736450086663, + "grad_norm": 0.5915756543656339, + "learning_rate": 1.6062080929930312e-06, + "loss": 0.2771, + "step": 26807 + }, + { + "epoch": 1.2558204899985945, + "grad_norm": 0.5752252965578062, + "learning_rate": 1.6060309800158028e-06, + "loss": 0.2802, + "step": 26808 + }, + { + "epoch": 1.255867334988523, + "grad_norm": 0.5739357141102625, + "learning_rate": 1.6058538721831407e-06, + "loss": 0.2603, + "step": 26809 + }, + { + "epoch": 1.2559141799784512, + "grad_norm": 0.577744049225492, + "learning_rate": 1.6056767694960645e-06, + "loss": 0.2784, + "step": 26810 + }, + { + "epoch": 1.2559610249683797, + "grad_norm": 0.6048830032882571, + "learning_rate": 1.6054996719555954e-06, + "loss": 0.2934, + "step": 26811 + }, + { + "epoch": 1.256007869958308, + "grad_norm": 0.6641592890133066, + "learning_rate": 1.6053225795627498e-06, + "loss": 0.2888, + "step": 26812 + }, + { + "epoch": 1.2560547149482364, + "grad_norm": 0.5719297401016784, + "learning_rate": 1.6051454923185479e-06, + "loss": 0.2755, + "step": 26813 + }, + { + "epoch": 1.2561015599381646, + "grad_norm": 0.6039352119516956, + "learning_rate": 1.604968410224008e-06, + "loss": 0.2941, + "step": 26814 + }, + { + "epoch": 1.2561484049280929, + "grad_norm": 0.6234819583760531, + "learning_rate": 1.6047913332801511e-06, + "loss": 0.2894, + "step": 26815 + }, + { + "epoch": 1.2561952499180213, + "grad_norm": 0.6787824569209888, + "learning_rate": 1.6046142614879945e-06, + "loss": 0.2928, + "step": 26816 + }, + { + "epoch": 1.2562420949079496, + "grad_norm": 0.630002183348034, + "learning_rate": 1.6044371948485585e-06, + "loss": 0.2996, + "step": 26817 + }, + { + "epoch": 1.2562889398978778, + "grad_norm": 0.5773427232970112, + "learning_rate": 1.6042601333628604e-06, + "loss": 0.2725, + "step": 26818 + }, + { + "epoch": 1.2563357848878063, + "grad_norm": 0.5766278449651383, + "learning_rate": 1.604083077031921e-06, + "loss": 0.2631, + "step": 26819 + }, + { + "epoch": 1.2563826298777345, + "grad_norm": 0.599212258418971, + "learning_rate": 1.6039060258567575e-06, + "loss": 0.2526, + "step": 26820 + }, + { + "epoch": 1.2564294748676628, + "grad_norm": 0.5917301828969688, + "learning_rate": 1.6037289798383892e-06, + "loss": 0.2724, + "step": 26821 + }, + { + "epoch": 1.2564763198575912, + "grad_norm": 0.6021003771094106, + "learning_rate": 1.6035519389778364e-06, + "loss": 0.2591, + "step": 26822 + }, + { + "epoch": 1.2565231648475197, + "grad_norm": 0.5655214219306964, + "learning_rate": 1.6033749032761174e-06, + "loss": 0.2703, + "step": 26823 + }, + { + "epoch": 1.256570009837448, + "grad_norm": 0.5589287280573029, + "learning_rate": 1.6031978727342493e-06, + "loss": 0.2711, + "step": 26824 + }, + { + "epoch": 1.2566168548273762, + "grad_norm": 0.6225155229601045, + "learning_rate": 1.6030208473532522e-06, + "loss": 0.2726, + "step": 26825 + }, + { + "epoch": 1.2566636998173046, + "grad_norm": 0.6717940184088862, + "learning_rate": 1.6028438271341448e-06, + "loss": 0.3067, + "step": 26826 + }, + { + "epoch": 1.2567105448072329, + "grad_norm": 0.6144899783296411, + "learning_rate": 1.6026668120779455e-06, + "loss": 0.2728, + "step": 26827 + }, + { + "epoch": 1.2567573897971611, + "grad_norm": 0.5638972379369056, + "learning_rate": 1.602489802185673e-06, + "loss": 0.2694, + "step": 26828 + }, + { + "epoch": 1.2568042347870896, + "grad_norm": 0.550664823545356, + "learning_rate": 1.6023127974583471e-06, + "loss": 0.2657, + "step": 26829 + }, + { + "epoch": 1.2568510797770178, + "grad_norm": 0.5986010833799272, + "learning_rate": 1.602135797896985e-06, + "loss": 0.2674, + "step": 26830 + }, + { + "epoch": 1.256897924766946, + "grad_norm": 0.5792114696867406, + "learning_rate": 1.601958803502605e-06, + "loss": 0.2822, + "step": 26831 + }, + { + "epoch": 1.2569447697568745, + "grad_norm": 0.6079712913017116, + "learning_rate": 1.6017818142762271e-06, + "loss": 0.2687, + "step": 26832 + }, + { + "epoch": 1.2569916147468028, + "grad_norm": 0.627280745002791, + "learning_rate": 1.6016048302188689e-06, + "loss": 0.2807, + "step": 26833 + }, + { + "epoch": 1.2570384597367312, + "grad_norm": 0.576898948498024, + "learning_rate": 1.6014278513315488e-06, + "loss": 0.2877, + "step": 26834 + }, + { + "epoch": 1.2570853047266595, + "grad_norm": 0.6366280407079443, + "learning_rate": 1.6012508776152862e-06, + "loss": 0.313, + "step": 26835 + }, + { + "epoch": 1.257132149716588, + "grad_norm": 0.5933405297273193, + "learning_rate": 1.6010739090710986e-06, + "loss": 0.2836, + "step": 26836 + }, + { + "epoch": 1.2571789947065162, + "grad_norm": 0.636576908255867, + "learning_rate": 1.6008969457000048e-06, + "loss": 0.3017, + "step": 26837 + }, + { + "epoch": 1.2572258396964444, + "grad_norm": 0.5901032028105568, + "learning_rate": 1.6007199875030228e-06, + "loss": 0.2731, + "step": 26838 + }, + { + "epoch": 1.2572726846863729, + "grad_norm": 0.6146456425676043, + "learning_rate": 1.6005430344811713e-06, + "loss": 0.2836, + "step": 26839 + }, + { + "epoch": 1.2573195296763011, + "grad_norm": 0.5830618171755448, + "learning_rate": 1.6003660866354686e-06, + "loss": 0.2747, + "step": 26840 + }, + { + "epoch": 1.2573663746662294, + "grad_norm": 0.6304858995278768, + "learning_rate": 1.6001891439669337e-06, + "loss": 0.2785, + "step": 26841 + }, + { + "epoch": 1.2574132196561578, + "grad_norm": 0.5797278300508294, + "learning_rate": 1.6000122064765832e-06, + "loss": 0.2689, + "step": 26842 + }, + { + "epoch": 1.257460064646086, + "grad_norm": 0.5652247428472293, + "learning_rate": 1.599835274165436e-06, + "loss": 0.2567, + "step": 26843 + }, + { + "epoch": 1.2575069096360143, + "grad_norm": 0.5943234345033725, + "learning_rate": 1.5996583470345114e-06, + "loss": 0.2574, + "step": 26844 + }, + { + "epoch": 1.2575537546259428, + "grad_norm": 0.5811267836624041, + "learning_rate": 1.5994814250848263e-06, + "loss": 0.2797, + "step": 26845 + }, + { + "epoch": 1.257600599615871, + "grad_norm": 0.6018629733132427, + "learning_rate": 1.5993045083173995e-06, + "loss": 0.2747, + "step": 26846 + }, + { + "epoch": 1.2576474446057995, + "grad_norm": 0.6220801303861413, + "learning_rate": 1.5991275967332487e-06, + "loss": 0.2829, + "step": 26847 + }, + { + "epoch": 1.2576942895957277, + "grad_norm": 0.6120603082860668, + "learning_rate": 1.5989506903333923e-06, + "loss": 0.2915, + "step": 26848 + }, + { + "epoch": 1.2577411345856562, + "grad_norm": 0.5675640689527507, + "learning_rate": 1.5987737891188482e-06, + "loss": 0.2835, + "step": 26849 + }, + { + "epoch": 1.2577879795755844, + "grad_norm": 0.6166815698769321, + "learning_rate": 1.5985968930906343e-06, + "loss": 0.2778, + "step": 26850 + }, + { + "epoch": 1.2578348245655127, + "grad_norm": 0.5642755424064189, + "learning_rate": 1.5984200022497684e-06, + "loss": 0.2556, + "step": 26851 + }, + { + "epoch": 1.2578816695554411, + "grad_norm": 0.618439681190948, + "learning_rate": 1.598243116597269e-06, + "loss": 0.2836, + "step": 26852 + }, + { + "epoch": 1.2579285145453694, + "grad_norm": 0.6518459471883353, + "learning_rate": 1.5980662361341545e-06, + "loss": 0.2965, + "step": 26853 + }, + { + "epoch": 1.2579753595352976, + "grad_norm": 0.5978556450994656, + "learning_rate": 1.597889360861441e-06, + "loss": 0.2759, + "step": 26854 + }, + { + "epoch": 1.258022204525226, + "grad_norm": 0.548924835117192, + "learning_rate": 1.5977124907801484e-06, + "loss": 0.2604, + "step": 26855 + }, + { + "epoch": 1.2580690495151543, + "grad_norm": 0.5965415749974488, + "learning_rate": 1.597535625891293e-06, + "loss": 0.279, + "step": 26856 + }, + { + "epoch": 1.2581158945050825, + "grad_norm": 0.5905958764705664, + "learning_rate": 1.5973587661958936e-06, + "loss": 0.273, + "step": 26857 + }, + { + "epoch": 1.258162739495011, + "grad_norm": 0.5905271375275721, + "learning_rate": 1.597181911694967e-06, + "loss": 0.2739, + "step": 26858 + }, + { + "epoch": 1.2582095844849395, + "grad_norm": 0.6159643489364561, + "learning_rate": 1.5970050623895334e-06, + "loss": 0.2797, + "step": 26859 + }, + { + "epoch": 1.2582564294748677, + "grad_norm": 0.5939615722490653, + "learning_rate": 1.596828218280607e-06, + "loss": 0.2754, + "step": 26860 + }, + { + "epoch": 1.258303274464796, + "grad_norm": 0.6694634243769981, + "learning_rate": 1.596651379369208e-06, + "loss": 0.2882, + "step": 26861 + }, + { + "epoch": 1.2583501194547244, + "grad_norm": 0.5328965308461514, + "learning_rate": 1.5964745456563525e-06, + "loss": 0.2626, + "step": 26862 + }, + { + "epoch": 1.2583969644446527, + "grad_norm": 0.5853723430487441, + "learning_rate": 1.5962977171430587e-06, + "loss": 0.2792, + "step": 26863 + }, + { + "epoch": 1.258443809434581, + "grad_norm": 0.5661085636352663, + "learning_rate": 1.5961208938303453e-06, + "loss": 0.261, + "step": 26864 + }, + { + "epoch": 1.2584906544245094, + "grad_norm": 0.6169987057797663, + "learning_rate": 1.5959440757192296e-06, + "loss": 0.2693, + "step": 26865 + }, + { + "epoch": 1.2585374994144376, + "grad_norm": 0.561103970409341, + "learning_rate": 1.595767262810727e-06, + "loss": 0.2739, + "step": 26866 + }, + { + "epoch": 1.2585843444043658, + "grad_norm": 0.5592731591214584, + "learning_rate": 1.5955904551058571e-06, + "loss": 0.2548, + "step": 26867 + }, + { + "epoch": 1.2586311893942943, + "grad_norm": 0.6496750813563559, + "learning_rate": 1.5954136526056368e-06, + "loss": 0.2923, + "step": 26868 + }, + { + "epoch": 1.2586780343842225, + "grad_norm": 0.582975087835073, + "learning_rate": 1.5952368553110834e-06, + "loss": 0.2616, + "step": 26869 + }, + { + "epoch": 1.258724879374151, + "grad_norm": 0.614955398134667, + "learning_rate": 1.5950600632232144e-06, + "loss": 0.2903, + "step": 26870 + }, + { + "epoch": 1.2587717243640792, + "grad_norm": 0.585143121627142, + "learning_rate": 1.5948832763430478e-06, + "loss": 0.2859, + "step": 26871 + }, + { + "epoch": 1.2588185693540077, + "grad_norm": 0.5405177402796313, + "learning_rate": 1.594706494671601e-06, + "loss": 0.2527, + "step": 26872 + }, + { + "epoch": 1.258865414343936, + "grad_norm": 0.5897868122216907, + "learning_rate": 1.5945297182098896e-06, + "loss": 0.2771, + "step": 26873 + }, + { + "epoch": 1.2589122593338642, + "grad_norm": 0.5888218295960608, + "learning_rate": 1.5943529469589323e-06, + "loss": 0.2719, + "step": 26874 + }, + { + "epoch": 1.2589591043237927, + "grad_norm": 0.5998149110259344, + "learning_rate": 1.5941761809197464e-06, + "loss": 0.2864, + "step": 26875 + }, + { + "epoch": 1.259005949313721, + "grad_norm": 0.5716886885669876, + "learning_rate": 1.5939994200933489e-06, + "loss": 0.2665, + "step": 26876 + }, + { + "epoch": 1.2590527943036491, + "grad_norm": 0.6188825148883434, + "learning_rate": 1.5938226644807564e-06, + "loss": 0.2968, + "step": 26877 + }, + { + "epoch": 1.2590996392935776, + "grad_norm": 0.6012219845403031, + "learning_rate": 1.5936459140829882e-06, + "loss": 0.2701, + "step": 26878 + }, + { + "epoch": 1.2591464842835058, + "grad_norm": 0.5862204881767449, + "learning_rate": 1.5934691689010595e-06, + "loss": 0.2778, + "step": 26879 + }, + { + "epoch": 1.259193329273434, + "grad_norm": 0.5684230382644012, + "learning_rate": 1.593292428935987e-06, + "loss": 0.2615, + "step": 26880 + }, + { + "epoch": 1.2592401742633625, + "grad_norm": 0.6228081117577906, + "learning_rate": 1.5931156941887888e-06, + "loss": 0.2806, + "step": 26881 + }, + { + "epoch": 1.2592870192532908, + "grad_norm": 0.6706792554846066, + "learning_rate": 1.5929389646604825e-06, + "loss": 0.2798, + "step": 26882 + }, + { + "epoch": 1.2593338642432192, + "grad_norm": 0.6152711886921444, + "learning_rate": 1.5927622403520837e-06, + "loss": 0.2694, + "step": 26883 + }, + { + "epoch": 1.2593807092331475, + "grad_norm": 0.5439243867364331, + "learning_rate": 1.5925855212646113e-06, + "loss": 0.2557, + "step": 26884 + }, + { + "epoch": 1.259427554223076, + "grad_norm": 0.608878833914181, + "learning_rate": 1.5924088073990803e-06, + "loss": 0.2743, + "step": 26885 + }, + { + "epoch": 1.2594743992130042, + "grad_norm": 0.6184379231471946, + "learning_rate": 1.5922320987565088e-06, + "loss": 0.26, + "step": 26886 + }, + { + "epoch": 1.2595212442029324, + "grad_norm": 0.5278464890039399, + "learning_rate": 1.592055395337913e-06, + "loss": 0.2439, + "step": 26887 + }, + { + "epoch": 1.259568089192861, + "grad_norm": 0.5770796880623948, + "learning_rate": 1.5918786971443103e-06, + "loss": 0.2596, + "step": 26888 + }, + { + "epoch": 1.2596149341827891, + "grad_norm": 0.594411652846992, + "learning_rate": 1.5917020041767178e-06, + "loss": 0.285, + "step": 26889 + }, + { + "epoch": 1.2596617791727174, + "grad_norm": 0.5703061113023979, + "learning_rate": 1.5915253164361528e-06, + "loss": 0.2776, + "step": 26890 + }, + { + "epoch": 1.2597086241626458, + "grad_norm": 0.6184838334478107, + "learning_rate": 1.59134863392363e-06, + "loss": 0.2812, + "step": 26891 + }, + { + "epoch": 1.259755469152574, + "grad_norm": 0.5893962998414656, + "learning_rate": 1.5911719566401674e-06, + "loss": 0.2608, + "step": 26892 + }, + { + "epoch": 1.2598023141425023, + "grad_norm": 0.5967762909292104, + "learning_rate": 1.590995284586782e-06, + "loss": 0.2725, + "step": 26893 + }, + { + "epoch": 1.2598491591324308, + "grad_norm": 0.5510094217945316, + "learning_rate": 1.5908186177644902e-06, + "loss": 0.2537, + "step": 26894 + }, + { + "epoch": 1.2598960041223592, + "grad_norm": 0.5885332825623154, + "learning_rate": 1.5906419561743092e-06, + "loss": 0.2717, + "step": 26895 + }, + { + "epoch": 1.2599428491122875, + "grad_norm": 0.6068978839265612, + "learning_rate": 1.5904652998172552e-06, + "loss": 0.286, + "step": 26896 + }, + { + "epoch": 1.2599896941022157, + "grad_norm": 0.6478751754715066, + "learning_rate": 1.5902886486943448e-06, + "loss": 0.2898, + "step": 26897 + }, + { + "epoch": 1.2600365390921442, + "grad_norm": 0.5580028575766297, + "learning_rate": 1.5901120028065936e-06, + "loss": 0.277, + "step": 26898 + }, + { + "epoch": 1.2600833840820724, + "grad_norm": 0.6126642914997599, + "learning_rate": 1.5899353621550201e-06, + "loss": 0.2635, + "step": 26899 + }, + { + "epoch": 1.2601302290720007, + "grad_norm": 0.5785180271706991, + "learning_rate": 1.5897587267406395e-06, + "loss": 0.2724, + "step": 26900 + }, + { + "epoch": 1.2601770740619291, + "grad_norm": 0.6083411706997168, + "learning_rate": 1.5895820965644681e-06, + "loss": 0.2825, + "step": 26901 + }, + { + "epoch": 1.2602239190518574, + "grad_norm": 0.5739905940526379, + "learning_rate": 1.5894054716275242e-06, + "loss": 0.2683, + "step": 26902 + }, + { + "epoch": 1.2602707640417856, + "grad_norm": 0.593715971333918, + "learning_rate": 1.5892288519308224e-06, + "loss": 0.27, + "step": 26903 + }, + { + "epoch": 1.260317609031714, + "grad_norm": 0.6233647120416126, + "learning_rate": 1.5890522374753793e-06, + "loss": 0.273, + "step": 26904 + }, + { + "epoch": 1.2603644540216423, + "grad_norm": 0.5860661284981212, + "learning_rate": 1.5888756282622114e-06, + "loss": 0.266, + "step": 26905 + }, + { + "epoch": 1.2604112990115708, + "grad_norm": 0.5938174037026959, + "learning_rate": 1.588699024292336e-06, + "loss": 0.2936, + "step": 26906 + }, + { + "epoch": 1.260458144001499, + "grad_norm": 0.6095996629646238, + "learning_rate": 1.588522425566768e-06, + "loss": 0.2715, + "step": 26907 + }, + { + "epoch": 1.2605049889914275, + "grad_norm": 0.6093044373667612, + "learning_rate": 1.5883458320865252e-06, + "loss": 0.2784, + "step": 26908 + }, + { + "epoch": 1.2605518339813557, + "grad_norm": 0.6146758730720384, + "learning_rate": 1.5881692438526225e-06, + "loss": 0.2792, + "step": 26909 + }, + { + "epoch": 1.260598678971284, + "grad_norm": 0.6317592173124298, + "learning_rate": 1.5879926608660767e-06, + "loss": 0.2834, + "step": 26910 + }, + { + "epoch": 1.2606455239612124, + "grad_norm": 0.6018804487037523, + "learning_rate": 1.5878160831279035e-06, + "loss": 0.2639, + "step": 26911 + }, + { + "epoch": 1.2606923689511407, + "grad_norm": 0.6248394817883354, + "learning_rate": 1.5876395106391196e-06, + "loss": 0.2841, + "step": 26912 + }, + { + "epoch": 1.260739213941069, + "grad_norm": 0.5695861180780938, + "learning_rate": 1.5874629434007412e-06, + "loss": 0.2727, + "step": 26913 + }, + { + "epoch": 1.2607860589309974, + "grad_norm": 0.6623187539453318, + "learning_rate": 1.5872863814137852e-06, + "loss": 0.2909, + "step": 26914 + }, + { + "epoch": 1.2608329039209256, + "grad_norm": 0.6165955509852161, + "learning_rate": 1.5871098246792652e-06, + "loss": 0.2719, + "step": 26915 + }, + { + "epoch": 1.2608797489108539, + "grad_norm": 0.5894162268550096, + "learning_rate": 1.586933273198199e-06, + "loss": 0.2856, + "step": 26916 + }, + { + "epoch": 1.2609265939007823, + "grad_norm": 0.6334768553960446, + "learning_rate": 1.5867567269716028e-06, + "loss": 0.2869, + "step": 26917 + }, + { + "epoch": 1.2609734388907106, + "grad_norm": 0.5650784680005447, + "learning_rate": 1.5865801860004914e-06, + "loss": 0.277, + "step": 26918 + }, + { + "epoch": 1.261020283880639, + "grad_norm": 0.5522388952513555, + "learning_rate": 1.5864036502858815e-06, + "loss": 0.2822, + "step": 26919 + }, + { + "epoch": 1.2610671288705673, + "grad_norm": 0.6161582188218448, + "learning_rate": 1.58622711982879e-06, + "loss": 0.2778, + "step": 26920 + }, + { + "epoch": 1.2611139738604957, + "grad_norm": 0.6309985392869166, + "learning_rate": 1.5860505946302313e-06, + "loss": 0.2815, + "step": 26921 + }, + { + "epoch": 1.261160818850424, + "grad_norm": 0.6570626104754155, + "learning_rate": 1.585874074691221e-06, + "loss": 0.2784, + "step": 26922 + }, + { + "epoch": 1.2612076638403522, + "grad_norm": 0.605053229364492, + "learning_rate": 1.5856975600127762e-06, + "loss": 0.278, + "step": 26923 + }, + { + "epoch": 1.2612545088302807, + "grad_norm": 0.611306954077933, + "learning_rate": 1.585521050595912e-06, + "loss": 0.2837, + "step": 26924 + }, + { + "epoch": 1.261301353820209, + "grad_norm": 0.663776659054447, + "learning_rate": 1.5853445464416442e-06, + "loss": 0.2802, + "step": 26925 + }, + { + "epoch": 1.2613481988101372, + "grad_norm": 0.6387783385220538, + "learning_rate": 1.5851680475509895e-06, + "loss": 0.2677, + "step": 26926 + }, + { + "epoch": 1.2613950438000656, + "grad_norm": 0.5488239107931839, + "learning_rate": 1.584991553924962e-06, + "loss": 0.2549, + "step": 26927 + }, + { + "epoch": 1.2614418887899939, + "grad_norm": 0.593322247715522, + "learning_rate": 1.5848150655645783e-06, + "loss": 0.2623, + "step": 26928 + }, + { + "epoch": 1.261488733779922, + "grad_norm": 0.6303344409543119, + "learning_rate": 1.5846385824708536e-06, + "loss": 0.2891, + "step": 26929 + }, + { + "epoch": 1.2615355787698506, + "grad_norm": 0.6020662922459624, + "learning_rate": 1.5844621046448034e-06, + "loss": 0.2611, + "step": 26930 + }, + { + "epoch": 1.261582423759779, + "grad_norm": 0.6204624228753242, + "learning_rate": 1.5842856320874445e-06, + "loss": 0.2788, + "step": 26931 + }, + { + "epoch": 1.2616292687497073, + "grad_norm": 0.6044965470823631, + "learning_rate": 1.5841091647997922e-06, + "loss": 0.2738, + "step": 26932 + }, + { + "epoch": 1.2616761137396355, + "grad_norm": 0.6142422656191391, + "learning_rate": 1.5839327027828605e-06, + "loss": 0.2693, + "step": 26933 + }, + { + "epoch": 1.261722958729564, + "grad_norm": 0.6040359164320702, + "learning_rate": 1.5837562460376655e-06, + "loss": 0.2783, + "step": 26934 + }, + { + "epoch": 1.2617698037194922, + "grad_norm": 0.5919869654199911, + "learning_rate": 1.5835797945652238e-06, + "loss": 0.2472, + "step": 26935 + }, + { + "epoch": 1.2618166487094205, + "grad_norm": 0.5997179674820153, + "learning_rate": 1.5834033483665495e-06, + "loss": 0.2796, + "step": 26936 + }, + { + "epoch": 1.261863493699349, + "grad_norm": 0.5854568157571737, + "learning_rate": 1.5832269074426584e-06, + "loss": 0.264, + "step": 26937 + }, + { + "epoch": 1.2619103386892772, + "grad_norm": 0.5877152744673071, + "learning_rate": 1.5830504717945666e-06, + "loss": 0.2607, + "step": 26938 + }, + { + "epoch": 1.2619571836792054, + "grad_norm": 0.6151267490901143, + "learning_rate": 1.5828740414232896e-06, + "loss": 0.2836, + "step": 26939 + }, + { + "epoch": 1.2620040286691339, + "grad_norm": 0.5750713214528438, + "learning_rate": 1.5826976163298407e-06, + "loss": 0.2705, + "step": 26940 + }, + { + "epoch": 1.262050873659062, + "grad_norm": 0.5877547286920252, + "learning_rate": 1.5825211965152365e-06, + "loss": 0.2702, + "step": 26941 + }, + { + "epoch": 1.2620977186489906, + "grad_norm": 0.6059851659154584, + "learning_rate": 1.5823447819804927e-06, + "loss": 0.2778, + "step": 26942 + }, + { + "epoch": 1.2621445636389188, + "grad_norm": 0.6205400398420255, + "learning_rate": 1.5821683727266235e-06, + "loss": 0.3038, + "step": 26943 + }, + { + "epoch": 1.2621914086288473, + "grad_norm": 0.5750494284402561, + "learning_rate": 1.5819919687546453e-06, + "loss": 0.2576, + "step": 26944 + }, + { + "epoch": 1.2622382536187755, + "grad_norm": 0.5923337302624871, + "learning_rate": 1.5818155700655724e-06, + "loss": 0.279, + "step": 26945 + }, + { + "epoch": 1.2622850986087037, + "grad_norm": 0.6705766114893638, + "learning_rate": 1.5816391766604206e-06, + "loss": 0.2867, + "step": 26946 + }, + { + "epoch": 1.2623319435986322, + "grad_norm": 0.5762492998235158, + "learning_rate": 1.5814627885402034e-06, + "loss": 0.2762, + "step": 26947 + }, + { + "epoch": 1.2623787885885605, + "grad_norm": 0.6019669326941348, + "learning_rate": 1.581286405705938e-06, + "loss": 0.2845, + "step": 26948 + }, + { + "epoch": 1.2624256335784887, + "grad_norm": 0.5262102906686803, + "learning_rate": 1.5811100281586371e-06, + "loss": 0.2368, + "step": 26949 + }, + { + "epoch": 1.2624724785684172, + "grad_norm": 0.6109036528980952, + "learning_rate": 1.580933655899318e-06, + "loss": 0.2845, + "step": 26950 + }, + { + "epoch": 1.2625193235583454, + "grad_norm": 0.5437191626948048, + "learning_rate": 1.580757288928995e-06, + "loss": 0.2641, + "step": 26951 + }, + { + "epoch": 1.2625661685482736, + "grad_norm": 0.5991127172282599, + "learning_rate": 1.5805809272486826e-06, + "loss": 0.2809, + "step": 26952 + }, + { + "epoch": 1.262613013538202, + "grad_norm": 0.6341101563562999, + "learning_rate": 1.5804045708593956e-06, + "loss": 0.272, + "step": 26953 + }, + { + "epoch": 1.2626598585281303, + "grad_norm": 0.604038360590819, + "learning_rate": 1.5802282197621487e-06, + "loss": 0.2729, + "step": 26954 + }, + { + "epoch": 1.2627067035180588, + "grad_norm": 0.6705466068750445, + "learning_rate": 1.580051873957958e-06, + "loss": 0.2841, + "step": 26955 + }, + { + "epoch": 1.262753548507987, + "grad_norm": 0.5940677734243541, + "learning_rate": 1.579875533447837e-06, + "loss": 0.2724, + "step": 26956 + }, + { + "epoch": 1.2628003934979155, + "grad_norm": 0.672970919490316, + "learning_rate": 1.5796991982328019e-06, + "loss": 0.2918, + "step": 26957 + }, + { + "epoch": 1.2628472384878437, + "grad_norm": 0.6610697265108836, + "learning_rate": 1.5795228683138658e-06, + "loss": 0.2948, + "step": 26958 + }, + { + "epoch": 1.262894083477772, + "grad_norm": 0.5989461909267245, + "learning_rate": 1.5793465436920446e-06, + "loss": 0.2857, + "step": 26959 + }, + { + "epoch": 1.2629409284677005, + "grad_norm": 0.5917847143887757, + "learning_rate": 1.579170224368352e-06, + "loss": 0.2567, + "step": 26960 + }, + { + "epoch": 1.2629877734576287, + "grad_norm": 0.6122869216552954, + "learning_rate": 1.578993910343804e-06, + "loss": 0.2788, + "step": 26961 + }, + { + "epoch": 1.263034618447557, + "grad_norm": 0.6120970141646598, + "learning_rate": 1.5788176016194145e-06, + "loss": 0.284, + "step": 26962 + }, + { + "epoch": 1.2630814634374854, + "grad_norm": 0.5707435708637223, + "learning_rate": 1.5786412981961986e-06, + "loss": 0.2762, + "step": 26963 + }, + { + "epoch": 1.2631283084274136, + "grad_norm": 0.602231079823529, + "learning_rate": 1.5784650000751694e-06, + "loss": 0.2914, + "step": 26964 + }, + { + "epoch": 1.2631751534173419, + "grad_norm": 0.6272674598120653, + "learning_rate": 1.5782887072573428e-06, + "loss": 0.2861, + "step": 26965 + }, + { + "epoch": 1.2632219984072703, + "grad_norm": 0.580877543856037, + "learning_rate": 1.5781124197437337e-06, + "loss": 0.2549, + "step": 26966 + }, + { + "epoch": 1.2632688433971988, + "grad_norm": 0.5746987782353952, + "learning_rate": 1.577936137535355e-06, + "loss": 0.2705, + "step": 26967 + }, + { + "epoch": 1.263315688387127, + "grad_norm": 0.5469751101652216, + "learning_rate": 1.5777598606332224e-06, + "loss": 0.2667, + "step": 26968 + }, + { + "epoch": 1.2633625333770553, + "grad_norm": 0.5644057179468625, + "learning_rate": 1.5775835890383507e-06, + "loss": 0.2837, + "step": 26969 + }, + { + "epoch": 1.2634093783669837, + "grad_norm": 0.5680332716957623, + "learning_rate": 1.5774073227517534e-06, + "loss": 0.2704, + "step": 26970 + }, + { + "epoch": 1.263456223356912, + "grad_norm": 0.558897547497184, + "learning_rate": 1.5772310617744442e-06, + "loss": 0.2629, + "step": 26971 + }, + { + "epoch": 1.2635030683468402, + "grad_norm": 0.613442226405582, + "learning_rate": 1.5770548061074386e-06, + "loss": 0.2704, + "step": 26972 + }, + { + "epoch": 1.2635499133367687, + "grad_norm": 0.59326298680493, + "learning_rate": 1.576878555751751e-06, + "loss": 0.2834, + "step": 26973 + }, + { + "epoch": 1.263596758326697, + "grad_norm": 0.655709594319533, + "learning_rate": 1.5767023107083948e-06, + "loss": 0.2901, + "step": 26974 + }, + { + "epoch": 1.2636436033166252, + "grad_norm": 0.5710041490757846, + "learning_rate": 1.576526070978386e-06, + "loss": 0.2646, + "step": 26975 + }, + { + "epoch": 1.2636904483065536, + "grad_norm": 0.6058119540288279, + "learning_rate": 1.576349836562736e-06, + "loss": 0.2584, + "step": 26976 + }, + { + "epoch": 1.2637372932964819, + "grad_norm": 0.6001855591617228, + "learning_rate": 1.5761736074624615e-06, + "loss": 0.2873, + "step": 26977 + }, + { + "epoch": 1.2637841382864103, + "grad_norm": 0.6327053824047175, + "learning_rate": 1.575997383678575e-06, + "loss": 0.2616, + "step": 26978 + }, + { + "epoch": 1.2638309832763386, + "grad_norm": 0.5791244833046655, + "learning_rate": 1.5758211652120914e-06, + "loss": 0.2588, + "step": 26979 + }, + { + "epoch": 1.263877828266267, + "grad_norm": 0.6115130299959365, + "learning_rate": 1.5756449520640251e-06, + "loss": 0.2783, + "step": 26980 + }, + { + "epoch": 1.2639246732561953, + "grad_norm": 0.5536409886874329, + "learning_rate": 1.5754687442353906e-06, + "loss": 0.2649, + "step": 26981 + }, + { + "epoch": 1.2639715182461235, + "grad_norm": 0.6102621131051456, + "learning_rate": 1.5752925417271997e-06, + "loss": 0.2778, + "step": 26982 + }, + { + "epoch": 1.264018363236052, + "grad_norm": 0.5649806398103838, + "learning_rate": 1.575116344540468e-06, + "loss": 0.2695, + "step": 26983 + }, + { + "epoch": 1.2640652082259802, + "grad_norm": 0.5897060653626136, + "learning_rate": 1.5749401526762098e-06, + "loss": 0.2527, + "step": 26984 + }, + { + "epoch": 1.2641120532159085, + "grad_norm": 0.5634871910643353, + "learning_rate": 1.5747639661354381e-06, + "loss": 0.2708, + "step": 26985 + }, + { + "epoch": 1.264158898205837, + "grad_norm": 0.5660550558265393, + "learning_rate": 1.5745877849191674e-06, + "loss": 0.2761, + "step": 26986 + }, + { + "epoch": 1.2642057431957652, + "grad_norm": 0.5730415484857825, + "learning_rate": 1.5744116090284118e-06, + "loss": 0.273, + "step": 26987 + }, + { + "epoch": 1.2642525881856934, + "grad_norm": 0.6183936302602353, + "learning_rate": 1.5742354384641846e-06, + "loss": 0.2796, + "step": 26988 + }, + { + "epoch": 1.2642994331756219, + "grad_norm": 0.6087640784790879, + "learning_rate": 1.574059273227499e-06, + "loss": 0.2604, + "step": 26989 + }, + { + "epoch": 1.2643462781655501, + "grad_norm": 0.5968110458275517, + "learning_rate": 1.5738831133193703e-06, + "loss": 0.2908, + "step": 26990 + }, + { + "epoch": 1.2643931231554786, + "grad_norm": 0.5806799764441567, + "learning_rate": 1.573706958740811e-06, + "loss": 0.2715, + "step": 26991 + }, + { + "epoch": 1.2644399681454068, + "grad_norm": 0.5718217909626699, + "learning_rate": 1.5735308094928352e-06, + "loss": 0.2611, + "step": 26992 + }, + { + "epoch": 1.2644868131353353, + "grad_norm": 0.5906847447448295, + "learning_rate": 1.5733546655764578e-06, + "loss": 0.2678, + "step": 26993 + }, + { + "epoch": 1.2645336581252635, + "grad_norm": 0.575395838525883, + "learning_rate": 1.5731785269926907e-06, + "loss": 0.2615, + "step": 26994 + }, + { + "epoch": 1.2645805031151918, + "grad_norm": 0.607932784977069, + "learning_rate": 1.5730023937425482e-06, + "loss": 0.2609, + "step": 26995 + }, + { + "epoch": 1.2646273481051202, + "grad_norm": 0.6181522975727306, + "learning_rate": 1.5728262658270437e-06, + "loss": 0.2811, + "step": 26996 + }, + { + "epoch": 1.2646741930950485, + "grad_norm": 0.6828071625222966, + "learning_rate": 1.5726501432471914e-06, + "loss": 0.2876, + "step": 26997 + }, + { + "epoch": 1.2647210380849767, + "grad_norm": 0.5919122261997886, + "learning_rate": 1.5724740260040041e-06, + "loss": 0.2698, + "step": 26998 + }, + { + "epoch": 1.2647678830749052, + "grad_norm": 0.574999609268829, + "learning_rate": 1.5722979140984968e-06, + "loss": 0.2669, + "step": 26999 + }, + { + "epoch": 1.2648147280648334, + "grad_norm": 0.6435336858065697, + "learning_rate": 1.5721218075316808e-06, + "loss": 0.281, + "step": 27000 + }, + { + "epoch": 1.2648615730547617, + "grad_norm": 0.6406886847795209, + "learning_rate": 1.5719457063045707e-06, + "loss": 0.2911, + "step": 27001 + }, + { + "epoch": 1.2649084180446901, + "grad_norm": 0.6003891429919309, + "learning_rate": 1.5717696104181795e-06, + "loss": 0.2829, + "step": 27002 + }, + { + "epoch": 1.2649552630346186, + "grad_norm": 0.6131986594241754, + "learning_rate": 1.571593519873521e-06, + "loss": 0.2766, + "step": 27003 + }, + { + "epoch": 1.2650021080245468, + "grad_norm": 0.5953197553267172, + "learning_rate": 1.5714174346716089e-06, + "loss": 0.2775, + "step": 27004 + }, + { + "epoch": 1.265048953014475, + "grad_norm": 0.5798946765131866, + "learning_rate": 1.5712413548134553e-06, + "loss": 0.2577, + "step": 27005 + }, + { + "epoch": 1.2650957980044035, + "grad_norm": 0.6126382703318001, + "learning_rate": 1.571065280300076e-06, + "loss": 0.2705, + "step": 27006 + }, + { + "epoch": 1.2651426429943318, + "grad_norm": 0.6073898583977987, + "learning_rate": 1.5708892111324808e-06, + "loss": 0.2707, + "step": 27007 + }, + { + "epoch": 1.26518948798426, + "grad_norm": 0.6033432580132908, + "learning_rate": 1.5707131473116855e-06, + "loss": 0.2829, + "step": 27008 + }, + { + "epoch": 1.2652363329741885, + "grad_norm": 0.5841112338561916, + "learning_rate": 1.5705370888387016e-06, + "loss": 0.2685, + "step": 27009 + }, + { + "epoch": 1.2652831779641167, + "grad_norm": 0.6199087513922964, + "learning_rate": 1.5703610357145435e-06, + "loss": 0.2943, + "step": 27010 + }, + { + "epoch": 1.265330022954045, + "grad_norm": 0.5872533023725061, + "learning_rate": 1.5701849879402244e-06, + "loss": 0.2721, + "step": 27011 + }, + { + "epoch": 1.2653768679439734, + "grad_norm": 0.6000927147816526, + "learning_rate": 1.5700089455167578e-06, + "loss": 0.2817, + "step": 27012 + }, + { + "epoch": 1.2654237129339017, + "grad_norm": 0.5832410916059101, + "learning_rate": 1.5698329084451545e-06, + "loss": 0.272, + "step": 27013 + }, + { + "epoch": 1.2654705579238301, + "grad_norm": 0.5781801142493861, + "learning_rate": 1.5696568767264293e-06, + "loss": 0.2758, + "step": 27014 + }, + { + "epoch": 1.2655174029137584, + "grad_norm": 0.5984440425687739, + "learning_rate": 1.5694808503615951e-06, + "loss": 0.2859, + "step": 27015 + }, + { + "epoch": 1.2655642479036868, + "grad_norm": 0.5911375749021668, + "learning_rate": 1.5693048293516644e-06, + "loss": 0.2772, + "step": 27016 + }, + { + "epoch": 1.265611092893615, + "grad_norm": 0.6240202409813419, + "learning_rate": 1.5691288136976506e-06, + "loss": 0.2797, + "step": 27017 + }, + { + "epoch": 1.2656579378835433, + "grad_norm": 0.5823350013968439, + "learning_rate": 1.5689528034005675e-06, + "loss": 0.2696, + "step": 27018 + }, + { + "epoch": 1.2657047828734718, + "grad_norm": 0.5687590350207451, + "learning_rate": 1.5687767984614265e-06, + "loss": 0.2652, + "step": 27019 + }, + { + "epoch": 1.2657516278634, + "grad_norm": 0.5633162062823335, + "learning_rate": 1.5686007988812404e-06, + "loss": 0.273, + "step": 27020 + }, + { + "epoch": 1.2657984728533282, + "grad_norm": 0.6377213655177447, + "learning_rate": 1.5684248046610229e-06, + "loss": 0.2985, + "step": 27021 + }, + { + "epoch": 1.2658453178432567, + "grad_norm": 0.602105043374863, + "learning_rate": 1.5682488158017866e-06, + "loss": 0.281, + "step": 27022 + }, + { + "epoch": 1.265892162833185, + "grad_norm": 0.6147728875426747, + "learning_rate": 1.568072832304544e-06, + "loss": 0.2752, + "step": 27023 + }, + { + "epoch": 1.2659390078231132, + "grad_norm": 0.5841515733174619, + "learning_rate": 1.5678968541703088e-06, + "loss": 0.2722, + "step": 27024 + }, + { + "epoch": 1.2659858528130417, + "grad_norm": 0.5975356517027255, + "learning_rate": 1.5677208814000919e-06, + "loss": 0.2718, + "step": 27025 + }, + { + "epoch": 1.26603269780297, + "grad_norm": 0.5790823372805594, + "learning_rate": 1.5675449139949075e-06, + "loss": 0.2735, + "step": 27026 + }, + { + "epoch": 1.2660795427928984, + "grad_norm": 0.5903227068866109, + "learning_rate": 1.5673689519557671e-06, + "loss": 0.2855, + "step": 27027 + }, + { + "epoch": 1.2661263877828266, + "grad_norm": 0.5663886145440656, + "learning_rate": 1.5671929952836846e-06, + "loss": 0.2686, + "step": 27028 + }, + { + "epoch": 1.266173232772755, + "grad_norm": 0.6165837410923803, + "learning_rate": 1.567017043979672e-06, + "loss": 0.2742, + "step": 27029 + }, + { + "epoch": 1.2662200777626833, + "grad_norm": 0.6175761482942956, + "learning_rate": 1.5668410980447424e-06, + "loss": 0.2717, + "step": 27030 + }, + { + "epoch": 1.2662669227526115, + "grad_norm": 0.5836928768507846, + "learning_rate": 1.566665157479907e-06, + "loss": 0.268, + "step": 27031 + }, + { + "epoch": 1.26631376774254, + "grad_norm": 0.5869986094795541, + "learning_rate": 1.5664892222861784e-06, + "loss": 0.2813, + "step": 27032 + }, + { + "epoch": 1.2663606127324682, + "grad_norm": 0.5600292701500069, + "learning_rate": 1.5663132924645708e-06, + "loss": 0.2686, + "step": 27033 + }, + { + "epoch": 1.2664074577223965, + "grad_norm": 0.5998654204442471, + "learning_rate": 1.5661373680160946e-06, + "loss": 0.2883, + "step": 27034 + }, + { + "epoch": 1.266454302712325, + "grad_norm": 0.6338824995046518, + "learning_rate": 1.5659614489417637e-06, + "loss": 0.2695, + "step": 27035 + }, + { + "epoch": 1.2665011477022532, + "grad_norm": 0.595947508563773, + "learning_rate": 1.5657855352425903e-06, + "loss": 0.2603, + "step": 27036 + }, + { + "epoch": 1.2665479926921814, + "grad_norm": 0.6003924822537529, + "learning_rate": 1.5656096269195858e-06, + "loss": 0.2835, + "step": 27037 + }, + { + "epoch": 1.26659483768211, + "grad_norm": 0.5307384561418319, + "learning_rate": 1.565433723973763e-06, + "loss": 0.2571, + "step": 27038 + }, + { + "epoch": 1.2666416826720384, + "grad_norm": 0.5098945333160293, + "learning_rate": 1.5652578264061345e-06, + "loss": 0.2488, + "step": 27039 + }, + { + "epoch": 1.2666885276619666, + "grad_norm": 0.6422477947422903, + "learning_rate": 1.5650819342177118e-06, + "loss": 0.2767, + "step": 27040 + }, + { + "epoch": 1.2667353726518948, + "grad_norm": 0.5982187529191282, + "learning_rate": 1.5649060474095075e-06, + "loss": 0.2857, + "step": 27041 + }, + { + "epoch": 1.2667822176418233, + "grad_norm": 0.6086070226908643, + "learning_rate": 1.5647301659825348e-06, + "loss": 0.2859, + "step": 27042 + }, + { + "epoch": 1.2668290626317515, + "grad_norm": 0.5991770923485209, + "learning_rate": 1.5645542899378047e-06, + "loss": 0.2851, + "step": 27043 + }, + { + "epoch": 1.2668759076216798, + "grad_norm": 0.5988562844631711, + "learning_rate": 1.564378419276329e-06, + "loss": 0.2811, + "step": 27044 + }, + { + "epoch": 1.2669227526116082, + "grad_norm": 0.6300948454147192, + "learning_rate": 1.56420255399912e-06, + "loss": 0.2765, + "step": 27045 + }, + { + "epoch": 1.2669695976015365, + "grad_norm": 0.6046872405756538, + "learning_rate": 1.5640266941071914e-06, + "loss": 0.2707, + "step": 27046 + }, + { + "epoch": 1.2670164425914647, + "grad_norm": 0.6384569153750091, + "learning_rate": 1.5638508396015525e-06, + "loss": 0.2899, + "step": 27047 + }, + { + "epoch": 1.2670632875813932, + "grad_norm": 0.5752669014702195, + "learning_rate": 1.5636749904832182e-06, + "loss": 0.2805, + "step": 27048 + }, + { + "epoch": 1.2671101325713214, + "grad_norm": 0.5950859212793856, + "learning_rate": 1.563499146753198e-06, + "loss": 0.2706, + "step": 27049 + }, + { + "epoch": 1.26715697756125, + "grad_norm": 0.6746639695369354, + "learning_rate": 1.5633233084125052e-06, + "loss": 0.2988, + "step": 27050 + }, + { + "epoch": 1.2672038225511781, + "grad_norm": 0.6065565156383024, + "learning_rate": 1.563147475462151e-06, + "loss": 0.2711, + "step": 27051 + }, + { + "epoch": 1.2672506675411066, + "grad_norm": 0.6267694225662555, + "learning_rate": 1.5629716479031473e-06, + "loss": 0.2807, + "step": 27052 + }, + { + "epoch": 1.2672975125310348, + "grad_norm": 0.5587736284968823, + "learning_rate": 1.5627958257365069e-06, + "loss": 0.262, + "step": 27053 + }, + { + "epoch": 1.267344357520963, + "grad_norm": 0.6048184686142263, + "learning_rate": 1.5626200089632414e-06, + "loss": 0.2911, + "step": 27054 + }, + { + "epoch": 1.2673912025108915, + "grad_norm": 0.6213401262056318, + "learning_rate": 1.5624441975843612e-06, + "loss": 0.2932, + "step": 27055 + }, + { + "epoch": 1.2674380475008198, + "grad_norm": 0.5993913349712785, + "learning_rate": 1.562268391600879e-06, + "loss": 0.2779, + "step": 27056 + }, + { + "epoch": 1.267484892490748, + "grad_norm": 0.5905558852215042, + "learning_rate": 1.5620925910138074e-06, + "loss": 0.268, + "step": 27057 + }, + { + "epoch": 1.2675317374806765, + "grad_norm": 0.6291978441400002, + "learning_rate": 1.5619167958241564e-06, + "loss": 0.2891, + "step": 27058 + }, + { + "epoch": 1.2675785824706047, + "grad_norm": 0.6719446406410846, + "learning_rate": 1.5617410060329383e-06, + "loss": 0.301, + "step": 27059 + }, + { + "epoch": 1.267625427460533, + "grad_norm": 0.6433812846533629, + "learning_rate": 1.5615652216411658e-06, + "loss": 0.2812, + "step": 27060 + }, + { + "epoch": 1.2676722724504614, + "grad_norm": 0.555123808167367, + "learning_rate": 1.5613894426498494e-06, + "loss": 0.2637, + "step": 27061 + }, + { + "epoch": 1.2677191174403897, + "grad_norm": 0.6161688744092895, + "learning_rate": 1.5612136690600005e-06, + "loss": 0.2804, + "step": 27062 + }, + { + "epoch": 1.2677659624303181, + "grad_norm": 0.5903751535163769, + "learning_rate": 1.5610379008726307e-06, + "loss": 0.2773, + "step": 27063 + }, + { + "epoch": 1.2678128074202464, + "grad_norm": 0.6198105171508316, + "learning_rate": 1.5608621380887524e-06, + "loss": 0.2805, + "step": 27064 + }, + { + "epoch": 1.2678596524101748, + "grad_norm": 0.5936463436036821, + "learning_rate": 1.5606863807093758e-06, + "loss": 0.2689, + "step": 27065 + }, + { + "epoch": 1.267906497400103, + "grad_norm": 0.6129719937559751, + "learning_rate": 1.5605106287355138e-06, + "loss": 0.3047, + "step": 27066 + }, + { + "epoch": 1.2679533423900313, + "grad_norm": 0.6300010655021784, + "learning_rate": 1.5603348821681763e-06, + "loss": 0.2775, + "step": 27067 + }, + { + "epoch": 1.2680001873799598, + "grad_norm": 0.5697331339843872, + "learning_rate": 1.5601591410083761e-06, + "loss": 0.2669, + "step": 27068 + }, + { + "epoch": 1.268047032369888, + "grad_norm": 0.6050815327156912, + "learning_rate": 1.5599834052571227e-06, + "loss": 0.2834, + "step": 27069 + }, + { + "epoch": 1.2680938773598163, + "grad_norm": 0.5997102860950763, + "learning_rate": 1.559807674915429e-06, + "loss": 0.2757, + "step": 27070 + }, + { + "epoch": 1.2681407223497447, + "grad_norm": 0.5892537833473536, + "learning_rate": 1.5596319499843063e-06, + "loss": 0.2787, + "step": 27071 + }, + { + "epoch": 1.268187567339673, + "grad_norm": 0.6712807222800456, + "learning_rate": 1.5594562304647648e-06, + "loss": 0.3026, + "step": 27072 + }, + { + "epoch": 1.2682344123296012, + "grad_norm": 0.6043770694602504, + "learning_rate": 1.5592805163578173e-06, + "loss": 0.2584, + "step": 27073 + }, + { + "epoch": 1.2682812573195297, + "grad_norm": 0.5793942007595603, + "learning_rate": 1.5591048076644728e-06, + "loss": 0.2592, + "step": 27074 + }, + { + "epoch": 1.2683281023094581, + "grad_norm": 0.6009924995019017, + "learning_rate": 1.5589291043857444e-06, + "loss": 0.28, + "step": 27075 + }, + { + "epoch": 1.2683749472993864, + "grad_norm": 0.6200358911709568, + "learning_rate": 1.5587534065226417e-06, + "loss": 0.2974, + "step": 27076 + }, + { + "epoch": 1.2684217922893146, + "grad_norm": 0.5916160643948215, + "learning_rate": 1.558577714076177e-06, + "loss": 0.2952, + "step": 27077 + }, + { + "epoch": 1.268468637279243, + "grad_norm": 0.5740248512131083, + "learning_rate": 1.558402027047361e-06, + "loss": 0.2791, + "step": 27078 + }, + { + "epoch": 1.2685154822691713, + "grad_norm": 0.6148681948121949, + "learning_rate": 1.5582263454372055e-06, + "loss": 0.2812, + "step": 27079 + }, + { + "epoch": 1.2685623272590996, + "grad_norm": 0.6029579930355924, + "learning_rate": 1.5580506692467196e-06, + "loss": 0.2804, + "step": 27080 + }, + { + "epoch": 1.268609172249028, + "grad_norm": 0.6012268106159996, + "learning_rate": 1.5578749984769154e-06, + "loss": 0.2683, + "step": 27081 + }, + { + "epoch": 1.2686560172389563, + "grad_norm": 0.5802198866133428, + "learning_rate": 1.557699333128804e-06, + "loss": 0.2728, + "step": 27082 + }, + { + "epoch": 1.2687028622288845, + "grad_norm": 0.5712218270899727, + "learning_rate": 1.557523673203396e-06, + "loss": 0.2748, + "step": 27083 + }, + { + "epoch": 1.268749707218813, + "grad_norm": 0.5972590973867551, + "learning_rate": 1.5573480187017026e-06, + "loss": 0.2734, + "step": 27084 + }, + { + "epoch": 1.2687965522087412, + "grad_norm": 0.5660568234942642, + "learning_rate": 1.5571723696247344e-06, + "loss": 0.2602, + "step": 27085 + }, + { + "epoch": 1.2688433971986697, + "grad_norm": 0.6143653792611727, + "learning_rate": 1.5569967259735025e-06, + "loss": 0.2863, + "step": 27086 + }, + { + "epoch": 1.268890242188598, + "grad_norm": 0.5873561663796132, + "learning_rate": 1.5568210877490164e-06, + "loss": 0.2691, + "step": 27087 + }, + { + "epoch": 1.2689370871785264, + "grad_norm": 0.6521900271669919, + "learning_rate": 1.556645454952289e-06, + "loss": 0.272, + "step": 27088 + }, + { + "epoch": 1.2689839321684546, + "grad_norm": 0.6193505527227973, + "learning_rate": 1.5564698275843292e-06, + "loss": 0.2764, + "step": 27089 + }, + { + "epoch": 1.2690307771583829, + "grad_norm": 0.5481108606289309, + "learning_rate": 1.5562942056461484e-06, + "loss": 0.2467, + "step": 27090 + }, + { + "epoch": 1.2690776221483113, + "grad_norm": 0.5627525343207573, + "learning_rate": 1.5561185891387582e-06, + "loss": 0.2791, + "step": 27091 + }, + { + "epoch": 1.2691244671382396, + "grad_norm": 0.5763766886916585, + "learning_rate": 1.5559429780631677e-06, + "loss": 0.2727, + "step": 27092 + }, + { + "epoch": 1.2691713121281678, + "grad_norm": 0.5854927495621207, + "learning_rate": 1.5557673724203875e-06, + "loss": 0.2757, + "step": 27093 + }, + { + "epoch": 1.2692181571180963, + "grad_norm": 0.5771379733207245, + "learning_rate": 1.555591772211429e-06, + "loss": 0.2845, + "step": 27094 + }, + { + "epoch": 1.2692650021080245, + "grad_norm": 0.567678099007048, + "learning_rate": 1.5554161774373029e-06, + "loss": 0.2702, + "step": 27095 + }, + { + "epoch": 1.2693118470979528, + "grad_norm": 0.6426532430044036, + "learning_rate": 1.5552405880990188e-06, + "loss": 0.2877, + "step": 27096 + }, + { + "epoch": 1.2693586920878812, + "grad_norm": 0.5949891492843657, + "learning_rate": 1.5550650041975885e-06, + "loss": 0.2784, + "step": 27097 + }, + { + "epoch": 1.2694055370778095, + "grad_norm": 0.5890569747876313, + "learning_rate": 1.5548894257340208e-06, + "loss": 0.28, + "step": 27098 + }, + { + "epoch": 1.269452382067738, + "grad_norm": 0.5816949948055468, + "learning_rate": 1.5547138527093276e-06, + "loss": 0.2842, + "step": 27099 + }, + { + "epoch": 1.2694992270576662, + "grad_norm": 0.5919244920208638, + "learning_rate": 1.5545382851245178e-06, + "loss": 0.2755, + "step": 27100 + }, + { + "epoch": 1.2695460720475946, + "grad_norm": 0.6284661377561898, + "learning_rate": 1.5543627229806024e-06, + "loss": 0.2851, + "step": 27101 + }, + { + "epoch": 1.2695929170375229, + "grad_norm": 0.5694523087128073, + "learning_rate": 1.5541871662785924e-06, + "loss": 0.2668, + "step": 27102 + }, + { + "epoch": 1.269639762027451, + "grad_norm": 0.5478901567681639, + "learning_rate": 1.5540116150194984e-06, + "loss": 0.2558, + "step": 27103 + }, + { + "epoch": 1.2696866070173796, + "grad_norm": 0.5853031617591655, + "learning_rate": 1.5538360692043286e-06, + "loss": 0.2755, + "step": 27104 + }, + { + "epoch": 1.2697334520073078, + "grad_norm": 0.6154262219802379, + "learning_rate": 1.5536605288340941e-06, + "loss": 0.27, + "step": 27105 + }, + { + "epoch": 1.269780296997236, + "grad_norm": 0.6033742376292432, + "learning_rate": 1.5534849939098063e-06, + "loss": 0.2845, + "step": 27106 + }, + { + "epoch": 1.2698271419871645, + "grad_norm": 0.6045871374074973, + "learning_rate": 1.5533094644324737e-06, + "loss": 0.2942, + "step": 27107 + }, + { + "epoch": 1.2698739869770928, + "grad_norm": 0.5488761120887152, + "learning_rate": 1.5531339404031073e-06, + "loss": 0.2713, + "step": 27108 + }, + { + "epoch": 1.269920831967021, + "grad_norm": 0.5726607620820041, + "learning_rate": 1.5529584218227185e-06, + "loss": 0.2682, + "step": 27109 + }, + { + "epoch": 1.2699676769569495, + "grad_norm": 0.5722454038011002, + "learning_rate": 1.5527829086923146e-06, + "loss": 0.2772, + "step": 27110 + }, + { + "epoch": 1.270014521946878, + "grad_norm": 0.5904676803877543, + "learning_rate": 1.5526074010129071e-06, + "loss": 0.2821, + "step": 27111 + }, + { + "epoch": 1.2700613669368062, + "grad_norm": 0.5642868333634052, + "learning_rate": 1.5524318987855058e-06, + "loss": 0.2725, + "step": 27112 + }, + { + "epoch": 1.2701082119267344, + "grad_norm": 0.5772580400645886, + "learning_rate": 1.552256402011121e-06, + "loss": 0.2707, + "step": 27113 + }, + { + "epoch": 1.2701550569166629, + "grad_norm": 0.5638431296858532, + "learning_rate": 1.5520809106907623e-06, + "loss": 0.2604, + "step": 27114 + }, + { + "epoch": 1.270201901906591, + "grad_norm": 0.6426187634451824, + "learning_rate": 1.5519054248254407e-06, + "loss": 0.2718, + "step": 27115 + }, + { + "epoch": 1.2702487468965193, + "grad_norm": 0.5594465321251897, + "learning_rate": 1.5517299444161637e-06, + "loss": 0.2584, + "step": 27116 + }, + { + "epoch": 1.2702955918864478, + "grad_norm": 0.5687379412891853, + "learning_rate": 1.551554469463943e-06, + "loss": 0.2736, + "step": 27117 + }, + { + "epoch": 1.270342436876376, + "grad_norm": 0.6407992257162531, + "learning_rate": 1.551378999969788e-06, + "loss": 0.2932, + "step": 27118 + }, + { + "epoch": 1.2703892818663043, + "grad_norm": 0.5815150121525254, + "learning_rate": 1.5512035359347077e-06, + "loss": 0.2641, + "step": 27119 + }, + { + "epoch": 1.2704361268562328, + "grad_norm": 0.628344682273713, + "learning_rate": 1.5510280773597137e-06, + "loss": 0.277, + "step": 27120 + }, + { + "epoch": 1.270482971846161, + "grad_norm": 0.6316328020078209, + "learning_rate": 1.550852624245815e-06, + "loss": 0.2813, + "step": 27121 + }, + { + "epoch": 1.2705298168360895, + "grad_norm": 0.6469294240197254, + "learning_rate": 1.5506771765940196e-06, + "loss": 0.3016, + "step": 27122 + }, + { + "epoch": 1.2705766618260177, + "grad_norm": 0.5938815767363127, + "learning_rate": 1.5505017344053387e-06, + "loss": 0.2813, + "step": 27123 + }, + { + "epoch": 1.2706235068159462, + "grad_norm": 0.6368075966611771, + "learning_rate": 1.550326297680782e-06, + "loss": 0.2843, + "step": 27124 + }, + { + "epoch": 1.2706703518058744, + "grad_norm": 0.6231189991705989, + "learning_rate": 1.5501508664213583e-06, + "loss": 0.267, + "step": 27125 + }, + { + "epoch": 1.2707171967958026, + "grad_norm": 0.5650765489201249, + "learning_rate": 1.5499754406280781e-06, + "loss": 0.2717, + "step": 27126 + }, + { + "epoch": 1.270764041785731, + "grad_norm": 0.5907168551225455, + "learning_rate": 1.5498000203019506e-06, + "loss": 0.2774, + "step": 27127 + }, + { + "epoch": 1.2708108867756593, + "grad_norm": 0.6251541806781548, + "learning_rate": 1.549624605443985e-06, + "loss": 0.2847, + "step": 27128 + }, + { + "epoch": 1.2708577317655876, + "grad_norm": 0.5585233762875256, + "learning_rate": 1.5494491960551904e-06, + "loss": 0.2575, + "step": 27129 + }, + { + "epoch": 1.270904576755516, + "grad_norm": 0.5815326818709515, + "learning_rate": 1.5492737921365774e-06, + "loss": 0.2654, + "step": 27130 + }, + { + "epoch": 1.2709514217454443, + "grad_norm": 0.6210728867537245, + "learning_rate": 1.5490983936891546e-06, + "loss": 0.2784, + "step": 27131 + }, + { + "epoch": 1.2709982667353725, + "grad_norm": 0.597151538974653, + "learning_rate": 1.5489230007139311e-06, + "loss": 0.2725, + "step": 27132 + }, + { + "epoch": 1.271045111725301, + "grad_norm": 0.6092793500151644, + "learning_rate": 1.5487476132119178e-06, + "loss": 0.2782, + "step": 27133 + }, + { + "epoch": 1.2710919567152292, + "grad_norm": 0.6847900228292334, + "learning_rate": 1.5485722311841224e-06, + "loss": 0.2965, + "step": 27134 + }, + { + "epoch": 1.2711388017051577, + "grad_norm": 0.6034014486368183, + "learning_rate": 1.5483968546315542e-06, + "loss": 0.2739, + "step": 27135 + }, + { + "epoch": 1.271185646695086, + "grad_norm": 0.5845411902670107, + "learning_rate": 1.5482214835552229e-06, + "loss": 0.2739, + "step": 27136 + }, + { + "epoch": 1.2712324916850144, + "grad_norm": 0.5600391654355278, + "learning_rate": 1.5480461179561381e-06, + "loss": 0.2678, + "step": 27137 + }, + { + "epoch": 1.2712793366749426, + "grad_norm": 0.6189778027314184, + "learning_rate": 1.5478707578353083e-06, + "loss": 0.2825, + "step": 27138 + }, + { + "epoch": 1.2713261816648709, + "grad_norm": 0.5710233155325473, + "learning_rate": 1.547695403193743e-06, + "loss": 0.2583, + "step": 27139 + }, + { + "epoch": 1.2713730266547993, + "grad_norm": 0.5714248068912813, + "learning_rate": 1.547520054032452e-06, + "loss": 0.2707, + "step": 27140 + }, + { + "epoch": 1.2714198716447276, + "grad_norm": 0.5649982674771915, + "learning_rate": 1.5473447103524436e-06, + "loss": 0.2745, + "step": 27141 + }, + { + "epoch": 1.2714667166346558, + "grad_norm": 0.555943819427214, + "learning_rate": 1.5471693721547263e-06, + "loss": 0.26, + "step": 27142 + }, + { + "epoch": 1.2715135616245843, + "grad_norm": 0.6446701855139749, + "learning_rate": 1.5469940394403098e-06, + "loss": 0.292, + "step": 27143 + }, + { + "epoch": 1.2715604066145125, + "grad_norm": 0.5885432745557387, + "learning_rate": 1.5468187122102036e-06, + "loss": 0.2762, + "step": 27144 + }, + { + "epoch": 1.2716072516044408, + "grad_norm": 0.579372281286474, + "learning_rate": 1.5466433904654154e-06, + "loss": 0.2694, + "step": 27145 + }, + { + "epoch": 1.2716540965943692, + "grad_norm": 0.6003320380543542, + "learning_rate": 1.546468074206956e-06, + "loss": 0.2791, + "step": 27146 + }, + { + "epoch": 1.2717009415842977, + "grad_norm": 0.5736359560988467, + "learning_rate": 1.5462927634358321e-06, + "loss": 0.284, + "step": 27147 + }, + { + "epoch": 1.271747786574226, + "grad_norm": 0.6118210488961237, + "learning_rate": 1.5461174581530543e-06, + "loss": 0.2697, + "step": 27148 + }, + { + "epoch": 1.2717946315641542, + "grad_norm": 0.5924942784515854, + "learning_rate": 1.5459421583596304e-06, + "loss": 0.2746, + "step": 27149 + }, + { + "epoch": 1.2718414765540826, + "grad_norm": 0.6492205794846911, + "learning_rate": 1.5457668640565693e-06, + "loss": 0.2966, + "step": 27150 + }, + { + "epoch": 1.2718883215440109, + "grad_norm": 0.5552481444245454, + "learning_rate": 1.5455915752448807e-06, + "loss": 0.2703, + "step": 27151 + }, + { + "epoch": 1.2719351665339391, + "grad_norm": 0.6002018154237324, + "learning_rate": 1.5454162919255732e-06, + "loss": 0.2794, + "step": 27152 + }, + { + "epoch": 1.2719820115238676, + "grad_norm": 0.5940383284979126, + "learning_rate": 1.545241014099654e-06, + "loss": 0.27, + "step": 27153 + }, + { + "epoch": 1.2720288565137958, + "grad_norm": 0.6170453042448623, + "learning_rate": 1.5450657417681328e-06, + "loss": 0.2567, + "step": 27154 + }, + { + "epoch": 1.272075701503724, + "grad_norm": 0.6105167674106937, + "learning_rate": 1.5448904749320187e-06, + "loss": 0.2729, + "step": 27155 + }, + { + "epoch": 1.2721225464936525, + "grad_norm": 0.6325719454809079, + "learning_rate": 1.5447152135923194e-06, + "loss": 0.2945, + "step": 27156 + }, + { + "epoch": 1.2721693914835808, + "grad_norm": 0.5894920562926109, + "learning_rate": 1.5445399577500439e-06, + "loss": 0.2897, + "step": 27157 + }, + { + "epoch": 1.2722162364735092, + "grad_norm": 0.6200742102407046, + "learning_rate": 1.5443647074062018e-06, + "loss": 0.282, + "step": 27158 + }, + { + "epoch": 1.2722630814634375, + "grad_norm": 0.5935865194252482, + "learning_rate": 1.5441894625618004e-06, + "loss": 0.2706, + "step": 27159 + }, + { + "epoch": 1.272309926453366, + "grad_norm": 0.6213856552467812, + "learning_rate": 1.5440142232178473e-06, + "loss": 0.2848, + "step": 27160 + }, + { + "epoch": 1.2723567714432942, + "grad_norm": 0.5815300886205298, + "learning_rate": 1.5438389893753525e-06, + "loss": 0.2727, + "step": 27161 + }, + { + "epoch": 1.2724036164332224, + "grad_norm": 0.5756293480043204, + "learning_rate": 1.5436637610353245e-06, + "loss": 0.278, + "step": 27162 + }, + { + "epoch": 1.2724504614231509, + "grad_norm": 0.5613180527381701, + "learning_rate": 1.5434885381987707e-06, + "loss": 0.2652, + "step": 27163 + }, + { + "epoch": 1.2724973064130791, + "grad_norm": 0.579800223736671, + "learning_rate": 1.5433133208667006e-06, + "loss": 0.2645, + "step": 27164 + }, + { + "epoch": 1.2725441514030074, + "grad_norm": 0.579229718850036, + "learning_rate": 1.5431381090401216e-06, + "loss": 0.2792, + "step": 27165 + }, + { + "epoch": 1.2725909963929358, + "grad_norm": 0.6359261552038152, + "learning_rate": 1.5429629027200422e-06, + "loss": 0.26, + "step": 27166 + }, + { + "epoch": 1.272637841382864, + "grad_norm": 0.6127937370356847, + "learning_rate": 1.5427877019074703e-06, + "loss": 0.3027, + "step": 27167 + }, + { + "epoch": 1.2726846863727923, + "grad_norm": 0.577178342624638, + "learning_rate": 1.5426125066034147e-06, + "loss": 0.2753, + "step": 27168 + }, + { + "epoch": 1.2727315313627208, + "grad_norm": 0.5806345911031187, + "learning_rate": 1.5424373168088842e-06, + "loss": 0.2748, + "step": 27169 + }, + { + "epoch": 1.272778376352649, + "grad_norm": 0.5806983413788636, + "learning_rate": 1.5422621325248863e-06, + "loss": 0.2697, + "step": 27170 + }, + { + "epoch": 1.2728252213425775, + "grad_norm": 0.6009750359182416, + "learning_rate": 1.5420869537524283e-06, + "loss": 0.2818, + "step": 27171 + }, + { + "epoch": 1.2728720663325057, + "grad_norm": 0.6146395777174508, + "learning_rate": 1.5419117804925191e-06, + "loss": 0.2763, + "step": 27172 + }, + { + "epoch": 1.2729189113224342, + "grad_norm": 0.5560196867558662, + "learning_rate": 1.5417366127461674e-06, + "loss": 0.2509, + "step": 27173 + }, + { + "epoch": 1.2729657563123624, + "grad_norm": 0.6041096459576647, + "learning_rate": 1.54156145051438e-06, + "loss": 0.2767, + "step": 27174 + }, + { + "epoch": 1.2730126013022907, + "grad_norm": 0.6821341834986917, + "learning_rate": 1.541386293798166e-06, + "loss": 0.2904, + "step": 27175 + }, + { + "epoch": 1.2730594462922191, + "grad_norm": 0.614042074772451, + "learning_rate": 1.5412111425985333e-06, + "loss": 0.2816, + "step": 27176 + }, + { + "epoch": 1.2731062912821474, + "grad_norm": 0.63546074231763, + "learning_rate": 1.5410359969164895e-06, + "loss": 0.2858, + "step": 27177 + }, + { + "epoch": 1.2731531362720756, + "grad_norm": 0.5706546050264011, + "learning_rate": 1.5408608567530418e-06, + "loss": 0.2687, + "step": 27178 + }, + { + "epoch": 1.273199981262004, + "grad_norm": 0.5929417547507007, + "learning_rate": 1.5406857221091993e-06, + "loss": 0.278, + "step": 27179 + }, + { + "epoch": 1.2732468262519323, + "grad_norm": 0.6034646849711286, + "learning_rate": 1.5405105929859688e-06, + "loss": 0.2824, + "step": 27180 + }, + { + "epoch": 1.2732936712418605, + "grad_norm": 0.6354015824869725, + "learning_rate": 1.5403354693843587e-06, + "loss": 0.2857, + "step": 27181 + }, + { + "epoch": 1.273340516231789, + "grad_norm": 0.5588999004783345, + "learning_rate": 1.5401603513053776e-06, + "loss": 0.2624, + "step": 27182 + }, + { + "epoch": 1.2733873612217175, + "grad_norm": 0.539181782036852, + "learning_rate": 1.5399852387500325e-06, + "loss": 0.2584, + "step": 27183 + }, + { + "epoch": 1.2734342062116457, + "grad_norm": 0.5834370530052179, + "learning_rate": 1.5398101317193299e-06, + "loss": 0.2751, + "step": 27184 + }, + { + "epoch": 1.273481051201574, + "grad_norm": 0.6167252223402565, + "learning_rate": 1.539635030214279e-06, + "loss": 0.2792, + "step": 27185 + }, + { + "epoch": 1.2735278961915024, + "grad_norm": 0.5853961270062918, + "learning_rate": 1.5394599342358876e-06, + "loss": 0.2775, + "step": 27186 + }, + { + "epoch": 1.2735747411814307, + "grad_norm": 0.5956300886416932, + "learning_rate": 1.5392848437851623e-06, + "loss": 0.2821, + "step": 27187 + }, + { + "epoch": 1.273621586171359, + "grad_norm": 0.6456820592369436, + "learning_rate": 1.5391097588631124e-06, + "loss": 0.2846, + "step": 27188 + }, + { + "epoch": 1.2736684311612874, + "grad_norm": 0.5721682032882648, + "learning_rate": 1.5389346794707433e-06, + "loss": 0.275, + "step": 27189 + }, + { + "epoch": 1.2737152761512156, + "grad_norm": 0.5533322346391188, + "learning_rate": 1.5387596056090636e-06, + "loss": 0.2504, + "step": 27190 + }, + { + "epoch": 1.2737621211411438, + "grad_norm": 0.6004523320609262, + "learning_rate": 1.538584537279081e-06, + "loss": 0.287, + "step": 27191 + }, + { + "epoch": 1.2738089661310723, + "grad_norm": 0.5771026809672848, + "learning_rate": 1.5384094744818023e-06, + "loss": 0.268, + "step": 27192 + }, + { + "epoch": 1.2738558111210005, + "grad_norm": 0.5795471149409799, + "learning_rate": 1.5382344172182359e-06, + "loss": 0.2597, + "step": 27193 + }, + { + "epoch": 1.273902656110929, + "grad_norm": 0.6157896900084274, + "learning_rate": 1.5380593654893894e-06, + "loss": 0.265, + "step": 27194 + }, + { + "epoch": 1.2739495011008573, + "grad_norm": 0.594762680942186, + "learning_rate": 1.5378843192962683e-06, + "loss": 0.2683, + "step": 27195 + }, + { + "epoch": 1.2739963460907857, + "grad_norm": 0.601169623826527, + "learning_rate": 1.5377092786398812e-06, + "loss": 0.2924, + "step": 27196 + }, + { + "epoch": 1.274043191080714, + "grad_norm": 0.60933201218184, + "learning_rate": 1.5375342435212358e-06, + "loss": 0.2549, + "step": 27197 + }, + { + "epoch": 1.2740900360706422, + "grad_norm": 0.5884065237522152, + "learning_rate": 1.5373592139413385e-06, + "loss": 0.2705, + "step": 27198 + }, + { + "epoch": 1.2741368810605707, + "grad_norm": 0.6321299754392304, + "learning_rate": 1.537184189901197e-06, + "loss": 0.2782, + "step": 27199 + }, + { + "epoch": 1.274183726050499, + "grad_norm": 0.6060447429377976, + "learning_rate": 1.5370091714018193e-06, + "loss": 0.2621, + "step": 27200 + }, + { + "epoch": 1.2742305710404271, + "grad_norm": 0.5765789289145181, + "learning_rate": 1.5368341584442115e-06, + "loss": 0.2649, + "step": 27201 + }, + { + "epoch": 1.2742774160303556, + "grad_norm": 0.6640642878567071, + "learning_rate": 1.5366591510293804e-06, + "loss": 0.3001, + "step": 27202 + }, + { + "epoch": 1.2743242610202838, + "grad_norm": 0.6158857712858814, + "learning_rate": 1.5364841491583338e-06, + "loss": 0.2836, + "step": 27203 + }, + { + "epoch": 1.274371106010212, + "grad_norm": 0.6156320406118271, + "learning_rate": 1.536309152832079e-06, + "loss": 0.2708, + "step": 27204 + }, + { + "epoch": 1.2744179510001405, + "grad_norm": 0.6357284276687346, + "learning_rate": 1.5361341620516227e-06, + "loss": 0.3044, + "step": 27205 + }, + { + "epoch": 1.2744647959900688, + "grad_norm": 0.5947273248897523, + "learning_rate": 1.5359591768179726e-06, + "loss": 0.2526, + "step": 27206 + }, + { + "epoch": 1.2745116409799973, + "grad_norm": 0.5891348867448639, + "learning_rate": 1.5357841971321347e-06, + "loss": 0.2598, + "step": 27207 + }, + { + "epoch": 1.2745584859699255, + "grad_norm": 0.6111481606658884, + "learning_rate": 1.5356092229951167e-06, + "loss": 0.2843, + "step": 27208 + }, + { + "epoch": 1.274605330959854, + "grad_norm": 0.6184497090495766, + "learning_rate": 1.5354342544079246e-06, + "loss": 0.2674, + "step": 27209 + }, + { + "epoch": 1.2746521759497822, + "grad_norm": 0.5902010734848684, + "learning_rate": 1.5352592913715658e-06, + "loss": 0.2701, + "step": 27210 + }, + { + "epoch": 1.2746990209397104, + "grad_norm": 0.6012090009245341, + "learning_rate": 1.535084333887048e-06, + "loss": 0.2808, + "step": 27211 + }, + { + "epoch": 1.274745865929639, + "grad_norm": 0.6009827859156015, + "learning_rate": 1.5349093819553767e-06, + "loss": 0.2831, + "step": 27212 + }, + { + "epoch": 1.2747927109195671, + "grad_norm": 0.6308967580897007, + "learning_rate": 1.5347344355775606e-06, + "loss": 0.2776, + "step": 27213 + }, + { + "epoch": 1.2748395559094954, + "grad_norm": 0.6233681955688436, + "learning_rate": 1.5345594947546042e-06, + "loss": 0.2716, + "step": 27214 + }, + { + "epoch": 1.2748864008994238, + "grad_norm": 0.5203189398133964, + "learning_rate": 1.5343845594875156e-06, + "loss": 0.2521, + "step": 27215 + }, + { + "epoch": 1.274933245889352, + "grad_norm": 0.5806847262892025, + "learning_rate": 1.5342096297773007e-06, + "loss": 0.2706, + "step": 27216 + }, + { + "epoch": 1.2749800908792803, + "grad_norm": 0.5906720874949982, + "learning_rate": 1.5340347056249666e-06, + "loss": 0.2886, + "step": 27217 + }, + { + "epoch": 1.2750269358692088, + "grad_norm": 0.5765588089352621, + "learning_rate": 1.5338597870315206e-06, + "loss": 0.2555, + "step": 27218 + }, + { + "epoch": 1.2750737808591373, + "grad_norm": 0.6095159206485803, + "learning_rate": 1.533684873997969e-06, + "loss": 0.2876, + "step": 27219 + }, + { + "epoch": 1.2751206258490655, + "grad_norm": 0.5938070200615, + "learning_rate": 1.5335099665253173e-06, + "loss": 0.2855, + "step": 27220 + }, + { + "epoch": 1.2751674708389937, + "grad_norm": 0.5688154988261985, + "learning_rate": 1.5333350646145725e-06, + "loss": 0.2686, + "step": 27221 + }, + { + "epoch": 1.2752143158289222, + "grad_norm": 0.5540092006673911, + "learning_rate": 1.5331601682667425e-06, + "loss": 0.2599, + "step": 27222 + }, + { + "epoch": 1.2752611608188504, + "grad_norm": 0.6042209552264488, + "learning_rate": 1.5329852774828317e-06, + "loss": 0.2719, + "step": 27223 + }, + { + "epoch": 1.2753080058087787, + "grad_norm": 0.571976289896342, + "learning_rate": 1.5328103922638482e-06, + "loss": 0.2619, + "step": 27224 + }, + { + "epoch": 1.2753548507987071, + "grad_norm": 0.5892359459071581, + "learning_rate": 1.5326355126107978e-06, + "loss": 0.2842, + "step": 27225 + }, + { + "epoch": 1.2754016957886354, + "grad_norm": 0.5996045878443289, + "learning_rate": 1.532460638524687e-06, + "loss": 0.2673, + "step": 27226 + }, + { + "epoch": 1.2754485407785636, + "grad_norm": 0.5855361876970449, + "learning_rate": 1.5322857700065213e-06, + "loss": 0.2831, + "step": 27227 + }, + { + "epoch": 1.275495385768492, + "grad_norm": 0.5695627731973397, + "learning_rate": 1.5321109070573087e-06, + "loss": 0.2641, + "step": 27228 + }, + { + "epoch": 1.2755422307584203, + "grad_norm": 0.5867295153730671, + "learning_rate": 1.5319360496780538e-06, + "loss": 0.2616, + "step": 27229 + }, + { + "epoch": 1.2755890757483488, + "grad_norm": 0.6121925602298687, + "learning_rate": 1.5317611978697636e-06, + "loss": 0.2773, + "step": 27230 + }, + { + "epoch": 1.275635920738277, + "grad_norm": 0.5894451358912784, + "learning_rate": 1.5315863516334453e-06, + "loss": 0.2729, + "step": 27231 + }, + { + "epoch": 1.2756827657282055, + "grad_norm": 0.5978901347093925, + "learning_rate": 1.531411510970104e-06, + "loss": 0.2782, + "step": 27232 + }, + { + "epoch": 1.2757296107181337, + "grad_norm": 0.5974872440635017, + "learning_rate": 1.531236675880745e-06, + "loss": 0.27, + "step": 27233 + }, + { + "epoch": 1.275776455708062, + "grad_norm": 0.5606411749058978, + "learning_rate": 1.5310618463663758e-06, + "loss": 0.2653, + "step": 27234 + }, + { + "epoch": 1.2758233006979904, + "grad_norm": 0.635768962542008, + "learning_rate": 1.5308870224280023e-06, + "loss": 0.2828, + "step": 27235 + }, + { + "epoch": 1.2758701456879187, + "grad_norm": 0.6019213201208383, + "learning_rate": 1.53071220406663e-06, + "loss": 0.2703, + "step": 27236 + }, + { + "epoch": 1.275916990677847, + "grad_norm": 0.6334072768044992, + "learning_rate": 1.5305373912832667e-06, + "loss": 0.2821, + "step": 27237 + }, + { + "epoch": 1.2759638356677754, + "grad_norm": 0.5607104621002527, + "learning_rate": 1.5303625840789157e-06, + "loss": 0.273, + "step": 27238 + }, + { + "epoch": 1.2760106806577036, + "grad_norm": 0.5824011732269281, + "learning_rate": 1.530187782454585e-06, + "loss": 0.2639, + "step": 27239 + }, + { + "epoch": 1.2760575256476319, + "grad_norm": 0.5865887486311716, + "learning_rate": 1.5300129864112792e-06, + "loss": 0.2612, + "step": 27240 + }, + { + "epoch": 1.2761043706375603, + "grad_norm": 0.6247946122670519, + "learning_rate": 1.529838195950005e-06, + "loss": 0.2819, + "step": 27241 + }, + { + "epoch": 1.2761512156274886, + "grad_norm": 0.6229804278061855, + "learning_rate": 1.5296634110717683e-06, + "loss": 0.2898, + "step": 27242 + }, + { + "epoch": 1.276198060617417, + "grad_norm": 0.594545058442663, + "learning_rate": 1.529488631777576e-06, + "loss": 0.2663, + "step": 27243 + }, + { + "epoch": 1.2762449056073453, + "grad_norm": 0.6087077297506353, + "learning_rate": 1.5293138580684311e-06, + "loss": 0.2662, + "step": 27244 + }, + { + "epoch": 1.2762917505972737, + "grad_norm": 0.6234142225558184, + "learning_rate": 1.5291390899453412e-06, + "loss": 0.2768, + "step": 27245 + }, + { + "epoch": 1.276338595587202, + "grad_norm": 0.6032428455093143, + "learning_rate": 1.5289643274093123e-06, + "loss": 0.2945, + "step": 27246 + }, + { + "epoch": 1.2763854405771302, + "grad_norm": 0.6223536071267453, + "learning_rate": 1.5287895704613491e-06, + "loss": 0.2932, + "step": 27247 + }, + { + "epoch": 1.2764322855670587, + "grad_norm": 0.6108269625348143, + "learning_rate": 1.5286148191024577e-06, + "loss": 0.2772, + "step": 27248 + }, + { + "epoch": 1.276479130556987, + "grad_norm": 0.5835234977283822, + "learning_rate": 1.5284400733336453e-06, + "loss": 0.2614, + "step": 27249 + }, + { + "epoch": 1.2765259755469152, + "grad_norm": 0.5939036343536054, + "learning_rate": 1.528265333155915e-06, + "loss": 0.2833, + "step": 27250 + }, + { + "epoch": 1.2765728205368436, + "grad_norm": 0.5875560967913408, + "learning_rate": 1.5280905985702732e-06, + "loss": 0.2774, + "step": 27251 + }, + { + "epoch": 1.2766196655267719, + "grad_norm": 0.5841197373071848, + "learning_rate": 1.5279158695777257e-06, + "loss": 0.2676, + "step": 27252 + }, + { + "epoch": 1.2766665105167, + "grad_norm": 0.5491096795662257, + "learning_rate": 1.5277411461792787e-06, + "loss": 0.2651, + "step": 27253 + }, + { + "epoch": 1.2767133555066286, + "grad_norm": 0.6120950084201273, + "learning_rate": 1.5275664283759367e-06, + "loss": 0.2748, + "step": 27254 + }, + { + "epoch": 1.276760200496557, + "grad_norm": 0.5964604937672044, + "learning_rate": 1.5273917161687063e-06, + "loss": 0.2579, + "step": 27255 + }, + { + "epoch": 1.2768070454864853, + "grad_norm": 0.5842232876202046, + "learning_rate": 1.5272170095585908e-06, + "loss": 0.266, + "step": 27256 + }, + { + "epoch": 1.2768538904764135, + "grad_norm": 0.5854410815443541, + "learning_rate": 1.5270423085465977e-06, + "loss": 0.2759, + "step": 27257 + }, + { + "epoch": 1.276900735466342, + "grad_norm": 0.6104366769800198, + "learning_rate": 1.5268676131337311e-06, + "loss": 0.2921, + "step": 27258 + }, + { + "epoch": 1.2769475804562702, + "grad_norm": 0.5966329810578976, + "learning_rate": 1.5266929233209969e-06, + "loss": 0.2536, + "step": 27259 + }, + { + "epoch": 1.2769944254461985, + "grad_norm": 0.5919955881930417, + "learning_rate": 1.5265182391094006e-06, + "loss": 0.2677, + "step": 27260 + }, + { + "epoch": 1.277041270436127, + "grad_norm": 0.589153344895682, + "learning_rate": 1.5263435604999478e-06, + "loss": 0.2709, + "step": 27261 + }, + { + "epoch": 1.2770881154260552, + "grad_norm": 0.6450536962186834, + "learning_rate": 1.5261688874936417e-06, + "loss": 0.2796, + "step": 27262 + }, + { + "epoch": 1.2771349604159834, + "grad_norm": 0.586300180210138, + "learning_rate": 1.5259942200914894e-06, + "loss": 0.29, + "step": 27263 + }, + { + "epoch": 1.2771818054059119, + "grad_norm": 0.6191364431470991, + "learning_rate": 1.5258195582944956e-06, + "loss": 0.2698, + "step": 27264 + }, + { + "epoch": 1.27722865039584, + "grad_norm": 0.6135671700700815, + "learning_rate": 1.525644902103665e-06, + "loss": 0.2757, + "step": 27265 + }, + { + "epoch": 1.2772754953857686, + "grad_norm": 0.6019613963145894, + "learning_rate": 1.5254702515200037e-06, + "loss": 0.2732, + "step": 27266 + }, + { + "epoch": 1.2773223403756968, + "grad_norm": 0.5936870813929813, + "learning_rate": 1.5252956065445162e-06, + "loss": 0.2718, + "step": 27267 + }, + { + "epoch": 1.2773691853656253, + "grad_norm": 0.5950775609435456, + "learning_rate": 1.5251209671782076e-06, + "loss": 0.2702, + "step": 27268 + }, + { + "epoch": 1.2774160303555535, + "grad_norm": 0.6112841883057217, + "learning_rate": 1.524946333422082e-06, + "loss": 0.2801, + "step": 27269 + }, + { + "epoch": 1.2774628753454818, + "grad_norm": 0.6117030434336244, + "learning_rate": 1.524771705277146e-06, + "loss": 0.2996, + "step": 27270 + }, + { + "epoch": 1.2775097203354102, + "grad_norm": 0.558704746578558, + "learning_rate": 1.5245970827444032e-06, + "loss": 0.2513, + "step": 27271 + }, + { + "epoch": 1.2775565653253385, + "grad_norm": 0.6147635639598467, + "learning_rate": 1.5244224658248591e-06, + "loss": 0.2832, + "step": 27272 + }, + { + "epoch": 1.2776034103152667, + "grad_norm": 0.5972543413086921, + "learning_rate": 1.524247854519519e-06, + "loss": 0.2879, + "step": 27273 + }, + { + "epoch": 1.2776502553051952, + "grad_norm": 0.6103615400322663, + "learning_rate": 1.5240732488293874e-06, + "loss": 0.2646, + "step": 27274 + }, + { + "epoch": 1.2776971002951234, + "grad_norm": 0.5820229459939472, + "learning_rate": 1.523898648755468e-06, + "loss": 0.2663, + "step": 27275 + }, + { + "epoch": 1.2777439452850516, + "grad_norm": 0.5588997566996465, + "learning_rate": 1.5237240542987667e-06, + "loss": 0.2556, + "step": 27276 + }, + { + "epoch": 1.27779079027498, + "grad_norm": 0.582169716274845, + "learning_rate": 1.5235494654602884e-06, + "loss": 0.28, + "step": 27277 + }, + { + "epoch": 1.2778376352649083, + "grad_norm": 0.5570330833619702, + "learning_rate": 1.5233748822410375e-06, + "loss": 0.2543, + "step": 27278 + }, + { + "epoch": 1.2778844802548368, + "grad_norm": 0.5406295698994467, + "learning_rate": 1.5232003046420183e-06, + "loss": 0.2597, + "step": 27279 + }, + { + "epoch": 1.277931325244765, + "grad_norm": 0.6052899172820406, + "learning_rate": 1.5230257326642367e-06, + "loss": 0.2803, + "step": 27280 + }, + { + "epoch": 1.2779781702346935, + "grad_norm": 0.5903430891921513, + "learning_rate": 1.5228511663086964e-06, + "loss": 0.2718, + "step": 27281 + }, + { + "epoch": 1.2780250152246218, + "grad_norm": 0.5693329301870083, + "learning_rate": 1.522676605576401e-06, + "loss": 0.2736, + "step": 27282 + }, + { + "epoch": 1.27807186021455, + "grad_norm": 0.6015700681388191, + "learning_rate": 1.5225020504683566e-06, + "loss": 0.2601, + "step": 27283 + }, + { + "epoch": 1.2781187052044785, + "grad_norm": 0.6355739544173994, + "learning_rate": 1.5223275009855676e-06, + "loss": 0.2938, + "step": 27284 + }, + { + "epoch": 1.2781655501944067, + "grad_norm": 0.5980762622238098, + "learning_rate": 1.5221529571290371e-06, + "loss": 0.285, + "step": 27285 + }, + { + "epoch": 1.278212395184335, + "grad_norm": 0.600529940061588, + "learning_rate": 1.5219784188997722e-06, + "loss": 0.2685, + "step": 27286 + }, + { + "epoch": 1.2782592401742634, + "grad_norm": 0.6030756929686245, + "learning_rate": 1.5218038862987745e-06, + "loss": 0.2906, + "step": 27287 + }, + { + "epoch": 1.2783060851641916, + "grad_norm": 0.5626074869825687, + "learning_rate": 1.52162935932705e-06, + "loss": 0.2625, + "step": 27288 + }, + { + "epoch": 1.2783529301541199, + "grad_norm": 0.6159132793982556, + "learning_rate": 1.5214548379856024e-06, + "loss": 0.2856, + "step": 27289 + }, + { + "epoch": 1.2783997751440483, + "grad_norm": 0.618154350795558, + "learning_rate": 1.5212803222754358e-06, + "loss": 0.288, + "step": 27290 + }, + { + "epoch": 1.2784466201339768, + "grad_norm": 0.6112774151929818, + "learning_rate": 1.5211058121975559e-06, + "loss": 0.2794, + "step": 27291 + }, + { + "epoch": 1.278493465123905, + "grad_norm": 0.5729061551771428, + "learning_rate": 1.520931307752966e-06, + "loss": 0.2793, + "step": 27292 + }, + { + "epoch": 1.2785403101138333, + "grad_norm": 0.5853612895669833, + "learning_rate": 1.5207568089426697e-06, + "loss": 0.2677, + "step": 27293 + }, + { + "epoch": 1.2785871551037618, + "grad_norm": 0.5723419944813023, + "learning_rate": 1.520582315767672e-06, + "loss": 0.2738, + "step": 27294 + }, + { + "epoch": 1.27863400009369, + "grad_norm": 0.5953461012890529, + "learning_rate": 1.5204078282289768e-06, + "loss": 0.2604, + "step": 27295 + }, + { + "epoch": 1.2786808450836182, + "grad_norm": 0.6025903551413241, + "learning_rate": 1.5202333463275884e-06, + "loss": 0.2747, + "step": 27296 + }, + { + "epoch": 1.2787276900735467, + "grad_norm": 0.5702851127670109, + "learning_rate": 1.5200588700645108e-06, + "loss": 0.272, + "step": 27297 + }, + { + "epoch": 1.278774535063475, + "grad_norm": 0.5660079008883845, + "learning_rate": 1.5198843994407488e-06, + "loss": 0.2672, + "step": 27298 + }, + { + "epoch": 1.2788213800534032, + "grad_norm": 0.6099161170739476, + "learning_rate": 1.5197099344573055e-06, + "loss": 0.2763, + "step": 27299 + }, + { + "epoch": 1.2788682250433316, + "grad_norm": 0.5963848315275903, + "learning_rate": 1.5195354751151845e-06, + "loss": 0.2635, + "step": 27300 + }, + { + "epoch": 1.2789150700332599, + "grad_norm": 0.6077806627082896, + "learning_rate": 1.5193610214153904e-06, + "loss": 0.2833, + "step": 27301 + }, + { + "epoch": 1.2789619150231883, + "grad_norm": 0.5344566429572645, + "learning_rate": 1.5191865733589278e-06, + "loss": 0.2534, + "step": 27302 + }, + { + "epoch": 1.2790087600131166, + "grad_norm": 0.6403381631361127, + "learning_rate": 1.5190121309467992e-06, + "loss": 0.274, + "step": 27303 + }, + { + "epoch": 1.279055605003045, + "grad_norm": 0.6229052418108401, + "learning_rate": 1.5188376941800103e-06, + "loss": 0.2787, + "step": 27304 + }, + { + "epoch": 1.2791024499929733, + "grad_norm": 0.5534694591576896, + "learning_rate": 1.5186632630595631e-06, + "loss": 0.2685, + "step": 27305 + }, + { + "epoch": 1.2791492949829015, + "grad_norm": 0.6339623470136561, + "learning_rate": 1.5184888375864625e-06, + "loss": 0.2916, + "step": 27306 + }, + { + "epoch": 1.27919613997283, + "grad_norm": 0.5647828163941009, + "learning_rate": 1.5183144177617116e-06, + "loss": 0.2532, + "step": 27307 + }, + { + "epoch": 1.2792429849627582, + "grad_norm": 0.6655261094990875, + "learning_rate": 1.5181400035863142e-06, + "loss": 0.3083, + "step": 27308 + }, + { + "epoch": 1.2792898299526865, + "grad_norm": 0.5874244067007162, + "learning_rate": 1.517965595061275e-06, + "loss": 0.2619, + "step": 27309 + }, + { + "epoch": 1.279336674942615, + "grad_norm": 0.6064916701584988, + "learning_rate": 1.5177911921875974e-06, + "loss": 0.2912, + "step": 27310 + }, + { + "epoch": 1.2793835199325432, + "grad_norm": 0.5825385732303927, + "learning_rate": 1.5176167949662834e-06, + "loss": 0.2628, + "step": 27311 + }, + { + "epoch": 1.2794303649224714, + "grad_norm": 0.5714199105138346, + "learning_rate": 1.517442403398338e-06, + "loss": 0.2807, + "step": 27312 + }, + { + "epoch": 1.2794772099123999, + "grad_norm": 0.606335631252506, + "learning_rate": 1.5172680174847654e-06, + "loss": 0.289, + "step": 27313 + }, + { + "epoch": 1.2795240549023281, + "grad_norm": 0.652948140132629, + "learning_rate": 1.5170936372265677e-06, + "loss": 0.3076, + "step": 27314 + }, + { + "epoch": 1.2795708998922566, + "grad_norm": 0.6550964385461506, + "learning_rate": 1.5169192626247495e-06, + "loss": 0.3122, + "step": 27315 + }, + { + "epoch": 1.2796177448821848, + "grad_norm": 0.6114115037923121, + "learning_rate": 1.5167448936803137e-06, + "loss": 0.2849, + "step": 27316 + }, + { + "epoch": 1.2796645898721133, + "grad_norm": 0.5841997384268385, + "learning_rate": 1.516570530394264e-06, + "loss": 0.242, + "step": 27317 + }, + { + "epoch": 1.2797114348620415, + "grad_norm": 0.624124240849446, + "learning_rate": 1.5163961727676036e-06, + "loss": 0.2661, + "step": 27318 + }, + { + "epoch": 1.2797582798519698, + "grad_norm": 0.6641047896339036, + "learning_rate": 1.5162218208013363e-06, + "loss": 0.2925, + "step": 27319 + }, + { + "epoch": 1.2798051248418982, + "grad_norm": 0.5500572148272469, + "learning_rate": 1.5160474744964642e-06, + "loss": 0.2639, + "step": 27320 + }, + { + "epoch": 1.2798519698318265, + "grad_norm": 0.5907088637034104, + "learning_rate": 1.5158731338539922e-06, + "loss": 0.2705, + "step": 27321 + }, + { + "epoch": 1.2798988148217547, + "grad_norm": 0.6333206920629513, + "learning_rate": 1.5156987988749237e-06, + "loss": 0.2869, + "step": 27322 + }, + { + "epoch": 1.2799456598116832, + "grad_norm": 0.6102568587782818, + "learning_rate": 1.5155244695602605e-06, + "loss": 0.2742, + "step": 27323 + }, + { + "epoch": 1.2799925048016114, + "grad_norm": 0.5899979846396758, + "learning_rate": 1.5153501459110064e-06, + "loss": 0.2647, + "step": 27324 + }, + { + "epoch": 1.2800393497915397, + "grad_norm": 0.640926708979096, + "learning_rate": 1.5151758279281645e-06, + "loss": 0.2606, + "step": 27325 + }, + { + "epoch": 1.2800861947814681, + "grad_norm": 0.6330711816348112, + "learning_rate": 1.515001515612739e-06, + "loss": 0.2903, + "step": 27326 + }, + { + "epoch": 1.2801330397713966, + "grad_norm": 0.6100407813036894, + "learning_rate": 1.5148272089657312e-06, + "loss": 0.2818, + "step": 27327 + }, + { + "epoch": 1.2801798847613248, + "grad_norm": 0.6322939487127188, + "learning_rate": 1.5146529079881468e-06, + "loss": 0.2772, + "step": 27328 + }, + { + "epoch": 1.280226729751253, + "grad_norm": 0.5879419532460474, + "learning_rate": 1.5144786126809857e-06, + "loss": 0.264, + "step": 27329 + }, + { + "epoch": 1.2802735747411815, + "grad_norm": 0.600315185796651, + "learning_rate": 1.5143043230452531e-06, + "loss": 0.2546, + "step": 27330 + }, + { + "epoch": 1.2803204197311098, + "grad_norm": 0.5933540126642978, + "learning_rate": 1.5141300390819511e-06, + "loss": 0.2827, + "step": 27331 + }, + { + "epoch": 1.280367264721038, + "grad_norm": 0.6042723350538087, + "learning_rate": 1.513955760792083e-06, + "loss": 0.2579, + "step": 27332 + }, + { + "epoch": 1.2804141097109665, + "grad_norm": 0.5855705530297571, + "learning_rate": 1.5137814881766517e-06, + "loss": 0.2648, + "step": 27333 + }, + { + "epoch": 1.2804609547008947, + "grad_norm": 0.5614193648250791, + "learning_rate": 1.5136072212366608e-06, + "loss": 0.2779, + "step": 27334 + }, + { + "epoch": 1.280507799690823, + "grad_norm": 0.6058594205723883, + "learning_rate": 1.5134329599731117e-06, + "loss": 0.2894, + "step": 27335 + }, + { + "epoch": 1.2805546446807514, + "grad_norm": 0.5809765483678129, + "learning_rate": 1.5132587043870076e-06, + "loss": 0.2786, + "step": 27336 + }, + { + "epoch": 1.2806014896706797, + "grad_norm": 0.5832185827635123, + "learning_rate": 1.5130844544793521e-06, + "loss": 0.2857, + "step": 27337 + }, + { + "epoch": 1.2806483346606081, + "grad_norm": 0.6109019259247072, + "learning_rate": 1.5129102102511473e-06, + "loss": 0.2719, + "step": 27338 + }, + { + "epoch": 1.2806951796505364, + "grad_norm": 0.6041605439793651, + "learning_rate": 1.5127359717033964e-06, + "loss": 0.2706, + "step": 27339 + }, + { + "epoch": 1.2807420246404648, + "grad_norm": 0.5827895227601495, + "learning_rate": 1.5125617388371022e-06, + "loss": 0.2786, + "step": 27340 + }, + { + "epoch": 1.280788869630393, + "grad_norm": 0.5932358982890227, + "learning_rate": 1.5123875116532666e-06, + "loss": 0.271, + "step": 27341 + }, + { + "epoch": 1.2808357146203213, + "grad_norm": 0.5792571289755201, + "learning_rate": 1.5122132901528924e-06, + "loss": 0.2613, + "step": 27342 + }, + { + "epoch": 1.2808825596102498, + "grad_norm": 0.6108047943829631, + "learning_rate": 1.5120390743369823e-06, + "loss": 0.2689, + "step": 27343 + }, + { + "epoch": 1.280929404600178, + "grad_norm": 0.6163532987905797, + "learning_rate": 1.5118648642065398e-06, + "loss": 0.2758, + "step": 27344 + }, + { + "epoch": 1.2809762495901063, + "grad_norm": 0.6257557430462245, + "learning_rate": 1.5116906597625657e-06, + "loss": 0.281, + "step": 27345 + }, + { + "epoch": 1.2810230945800347, + "grad_norm": 0.6161378618708833, + "learning_rate": 1.511516461006064e-06, + "loss": 0.278, + "step": 27346 + }, + { + "epoch": 1.281069939569963, + "grad_norm": 0.5916323702717635, + "learning_rate": 1.511342267938037e-06, + "loss": 0.2745, + "step": 27347 + }, + { + "epoch": 1.2811167845598912, + "grad_norm": 0.6052407366565411, + "learning_rate": 1.5111680805594867e-06, + "loss": 0.2658, + "step": 27348 + }, + { + "epoch": 1.2811636295498197, + "grad_norm": 0.6034035597471376, + "learning_rate": 1.510993898871415e-06, + "loss": 0.2816, + "step": 27349 + }, + { + "epoch": 1.281210474539748, + "grad_norm": 0.5911647099647616, + "learning_rate": 1.5108197228748245e-06, + "loss": 0.2758, + "step": 27350 + }, + { + "epoch": 1.2812573195296764, + "grad_norm": 0.5537494178000253, + "learning_rate": 1.5106455525707186e-06, + "loss": 0.2545, + "step": 27351 + }, + { + "epoch": 1.2813041645196046, + "grad_norm": 0.5609564836993105, + "learning_rate": 1.5104713879600986e-06, + "loss": 0.2669, + "step": 27352 + }, + { + "epoch": 1.281351009509533, + "grad_norm": 0.5901029244834024, + "learning_rate": 1.5102972290439678e-06, + "loss": 0.2589, + "step": 27353 + }, + { + "epoch": 1.2813978544994613, + "grad_norm": 0.5814942391384723, + "learning_rate": 1.5101230758233267e-06, + "loss": 0.2563, + "step": 27354 + }, + { + "epoch": 1.2814446994893895, + "grad_norm": 0.6006079364289273, + "learning_rate": 1.5099489282991792e-06, + "loss": 0.28, + "step": 27355 + }, + { + "epoch": 1.281491544479318, + "grad_norm": 0.5283527442974219, + "learning_rate": 1.509774786472526e-06, + "loss": 0.246, + "step": 27356 + }, + { + "epoch": 1.2815383894692463, + "grad_norm": 0.5824249903202315, + "learning_rate": 1.5096006503443703e-06, + "loss": 0.2713, + "step": 27357 + }, + { + "epoch": 1.2815852344591745, + "grad_norm": 0.6144153272772095, + "learning_rate": 1.5094265199157143e-06, + "loss": 0.2843, + "step": 27358 + }, + { + "epoch": 1.281632079449103, + "grad_norm": 0.5771584044462849, + "learning_rate": 1.50925239518756e-06, + "loss": 0.276, + "step": 27359 + }, + { + "epoch": 1.2816789244390312, + "grad_norm": 0.6025555635680591, + "learning_rate": 1.5090782761609086e-06, + "loss": 0.2649, + "step": 27360 + }, + { + "epoch": 1.2817257694289594, + "grad_norm": 0.5881159785864232, + "learning_rate": 1.5089041628367624e-06, + "loss": 0.2842, + "step": 27361 + }, + { + "epoch": 1.281772614418888, + "grad_norm": 0.5817430043993327, + "learning_rate": 1.5087300552161238e-06, + "loss": 0.2666, + "step": 27362 + }, + { + "epoch": 1.2818194594088164, + "grad_norm": 0.6396396786861747, + "learning_rate": 1.5085559532999948e-06, + "loss": 0.2768, + "step": 27363 + }, + { + "epoch": 1.2818663043987446, + "grad_norm": 0.5886185091041454, + "learning_rate": 1.5083818570893772e-06, + "loss": 0.2725, + "step": 27364 + }, + { + "epoch": 1.2819131493886728, + "grad_norm": 0.6452129252731713, + "learning_rate": 1.5082077665852727e-06, + "loss": 0.2809, + "step": 27365 + }, + { + "epoch": 1.2819599943786013, + "grad_norm": 0.5651362408764306, + "learning_rate": 1.508033681788683e-06, + "loss": 0.255, + "step": 27366 + }, + { + "epoch": 1.2820068393685295, + "grad_norm": 0.6619084872867474, + "learning_rate": 1.5078596027006104e-06, + "loss": 0.2773, + "step": 27367 + }, + { + "epoch": 1.2820536843584578, + "grad_norm": 0.5931456633925692, + "learning_rate": 1.5076855293220562e-06, + "loss": 0.2706, + "step": 27368 + }, + { + "epoch": 1.2821005293483863, + "grad_norm": 0.6180682592050283, + "learning_rate": 1.5075114616540224e-06, + "loss": 0.2658, + "step": 27369 + }, + { + "epoch": 1.2821473743383145, + "grad_norm": 0.576735245801214, + "learning_rate": 1.5073373996975104e-06, + "loss": 0.276, + "step": 27370 + }, + { + "epoch": 1.2821942193282427, + "grad_norm": 0.622542528492815, + "learning_rate": 1.5071633434535233e-06, + "loss": 0.2752, + "step": 27371 + }, + { + "epoch": 1.2822410643181712, + "grad_norm": 0.5908823806113349, + "learning_rate": 1.506989292923061e-06, + "loss": 0.2826, + "step": 27372 + }, + { + "epoch": 1.2822879093080994, + "grad_norm": 0.5915814636138402, + "learning_rate": 1.5068152481071253e-06, + "loss": 0.2655, + "step": 27373 + }, + { + "epoch": 1.282334754298028, + "grad_norm": 0.6463932000801423, + "learning_rate": 1.506641209006718e-06, + "loss": 0.2811, + "step": 27374 + }, + { + "epoch": 1.2823815992879561, + "grad_norm": 0.6318816681503793, + "learning_rate": 1.5064671756228417e-06, + "loss": 0.2734, + "step": 27375 + }, + { + "epoch": 1.2824284442778846, + "grad_norm": 0.5981811689725985, + "learning_rate": 1.5062931479564962e-06, + "loss": 0.2695, + "step": 27376 + }, + { + "epoch": 1.2824752892678128, + "grad_norm": 0.5760841842008938, + "learning_rate": 1.5061191260086855e-06, + "loss": 0.2691, + "step": 27377 + }, + { + "epoch": 1.282522134257741, + "grad_norm": 0.5722432744337572, + "learning_rate": 1.5059451097804078e-06, + "loss": 0.2904, + "step": 27378 + }, + { + "epoch": 1.2825689792476695, + "grad_norm": 0.5939960405119917, + "learning_rate": 1.505771099272667e-06, + "loss": 0.2715, + "step": 27379 + }, + { + "epoch": 1.2826158242375978, + "grad_norm": 0.5723447535131566, + "learning_rate": 1.5055970944864627e-06, + "loss": 0.2734, + "step": 27380 + }, + { + "epoch": 1.282662669227526, + "grad_norm": 0.6273883969906269, + "learning_rate": 1.5054230954227978e-06, + "loss": 0.2879, + "step": 27381 + }, + { + "epoch": 1.2827095142174545, + "grad_norm": 0.6206495138291177, + "learning_rate": 1.5052491020826732e-06, + "loss": 0.2776, + "step": 27382 + }, + { + "epoch": 1.2827563592073827, + "grad_norm": 0.625682739933733, + "learning_rate": 1.5050751144670905e-06, + "loss": 0.2774, + "step": 27383 + }, + { + "epoch": 1.282803204197311, + "grad_norm": 0.6092439105085393, + "learning_rate": 1.5049011325770495e-06, + "loss": 0.2966, + "step": 27384 + }, + { + "epoch": 1.2828500491872394, + "grad_norm": 0.6006396836771064, + "learning_rate": 1.5047271564135524e-06, + "loss": 0.2671, + "step": 27385 + }, + { + "epoch": 1.2828968941771677, + "grad_norm": 0.5882435981125158, + "learning_rate": 1.5045531859776008e-06, + "loss": 0.2575, + "step": 27386 + }, + { + "epoch": 1.2829437391670961, + "grad_norm": 0.6026746224581339, + "learning_rate": 1.5043792212701949e-06, + "loss": 0.3, + "step": 27387 + }, + { + "epoch": 1.2829905841570244, + "grad_norm": 0.5751296402731559, + "learning_rate": 1.5042052622923364e-06, + "loss": 0.2831, + "step": 27388 + }, + { + "epoch": 1.2830374291469528, + "grad_norm": 0.5868139144288941, + "learning_rate": 1.5040313090450273e-06, + "loss": 0.2787, + "step": 27389 + }, + { + "epoch": 1.283084274136881, + "grad_norm": 0.53738133801179, + "learning_rate": 1.5038573615292674e-06, + "loss": 0.2554, + "step": 27390 + }, + { + "epoch": 1.2831311191268093, + "grad_norm": 0.5824990722192159, + "learning_rate": 1.503683419746057e-06, + "loss": 0.2747, + "step": 27391 + }, + { + "epoch": 1.2831779641167378, + "grad_norm": 0.5911862077068534, + "learning_rate": 1.5035094836963988e-06, + "loss": 0.2566, + "step": 27392 + }, + { + "epoch": 1.283224809106666, + "grad_norm": 0.6221166017249901, + "learning_rate": 1.503335553381293e-06, + "loss": 0.2708, + "step": 27393 + }, + { + "epoch": 1.2832716540965943, + "grad_norm": 0.5732752365823276, + "learning_rate": 1.5031616288017404e-06, + "loss": 0.2706, + "step": 27394 + }, + { + "epoch": 1.2833184990865227, + "grad_norm": 0.621110728904487, + "learning_rate": 1.5029877099587429e-06, + "loss": 0.2794, + "step": 27395 + }, + { + "epoch": 1.283365344076451, + "grad_norm": 0.596482582972717, + "learning_rate": 1.5028137968532996e-06, + "loss": 0.2576, + "step": 27396 + }, + { + "epoch": 1.2834121890663792, + "grad_norm": 0.6077781505359606, + "learning_rate": 1.5026398894864125e-06, + "loss": 0.2782, + "step": 27397 + }, + { + "epoch": 1.2834590340563077, + "grad_norm": 0.629995200792319, + "learning_rate": 1.502465987859082e-06, + "loss": 0.2984, + "step": 27398 + }, + { + "epoch": 1.2835058790462361, + "grad_norm": 0.6022464341948346, + "learning_rate": 1.502292091972309e-06, + "loss": 0.2524, + "step": 27399 + }, + { + "epoch": 1.2835527240361644, + "grad_norm": 0.578679354578485, + "learning_rate": 1.5021182018270947e-06, + "loss": 0.2793, + "step": 27400 + }, + { + "epoch": 1.2835995690260926, + "grad_norm": 0.6202883295397962, + "learning_rate": 1.5019443174244396e-06, + "loss": 0.2915, + "step": 27401 + }, + { + "epoch": 1.283646414016021, + "grad_norm": 0.5877151351508998, + "learning_rate": 1.5017704387653435e-06, + "loss": 0.266, + "step": 27402 + }, + { + "epoch": 1.2836932590059493, + "grad_norm": 0.6197828152936309, + "learning_rate": 1.5015965658508074e-06, + "loss": 0.2623, + "step": 27403 + }, + { + "epoch": 1.2837401039958776, + "grad_norm": 0.5619661897569471, + "learning_rate": 1.5014226986818326e-06, + "loss": 0.2598, + "step": 27404 + }, + { + "epoch": 1.283786948985806, + "grad_norm": 0.6288015438640165, + "learning_rate": 1.5012488372594186e-06, + "loss": 0.2686, + "step": 27405 + }, + { + "epoch": 1.2838337939757343, + "grad_norm": 0.5994743259859638, + "learning_rate": 1.5010749815845674e-06, + "loss": 0.2867, + "step": 27406 + }, + { + "epoch": 1.2838806389656625, + "grad_norm": 0.5860540108577584, + "learning_rate": 1.5009011316582783e-06, + "loss": 0.2773, + "step": 27407 + }, + { + "epoch": 1.283927483955591, + "grad_norm": 0.5666391083583622, + "learning_rate": 1.500727287481552e-06, + "loss": 0.2781, + "step": 27408 + }, + { + "epoch": 1.2839743289455192, + "grad_norm": 0.6093400343088157, + "learning_rate": 1.5005534490553889e-06, + "loss": 0.2912, + "step": 27409 + }, + { + "epoch": 1.2840211739354477, + "grad_norm": 0.6266744797654784, + "learning_rate": 1.5003796163807898e-06, + "loss": 0.2892, + "step": 27410 + }, + { + "epoch": 1.284068018925376, + "grad_norm": 0.5931682209563139, + "learning_rate": 1.500205789458754e-06, + "loss": 0.2875, + "step": 27411 + }, + { + "epoch": 1.2841148639153044, + "grad_norm": 0.6332241115395565, + "learning_rate": 1.500031968290283e-06, + "loss": 0.2764, + "step": 27412 + }, + { + "epoch": 1.2841617089052326, + "grad_norm": 0.6191773168275437, + "learning_rate": 1.499858152876377e-06, + "loss": 0.2777, + "step": 27413 + }, + { + "epoch": 1.2842085538951609, + "grad_norm": 0.5652234357793671, + "learning_rate": 1.4996843432180363e-06, + "loss": 0.2726, + "step": 27414 + }, + { + "epoch": 1.2842553988850893, + "grad_norm": 0.5865024835115843, + "learning_rate": 1.49951053931626e-06, + "loss": 0.2637, + "step": 27415 + }, + { + "epoch": 1.2843022438750176, + "grad_norm": 0.5907977116245408, + "learning_rate": 1.4993367411720489e-06, + "loss": 0.2586, + "step": 27416 + }, + { + "epoch": 1.2843490888649458, + "grad_norm": 0.6737575082220858, + "learning_rate": 1.499162948786404e-06, + "loss": 0.2834, + "step": 27417 + }, + { + "epoch": 1.2843959338548743, + "grad_norm": 0.6154649696492103, + "learning_rate": 1.498989162160324e-06, + "loss": 0.2808, + "step": 27418 + }, + { + "epoch": 1.2844427788448025, + "grad_norm": 0.5925483989803826, + "learning_rate": 1.4988153812948104e-06, + "loss": 0.2731, + "step": 27419 + }, + { + "epoch": 1.2844896238347308, + "grad_norm": 0.611080453391747, + "learning_rate": 1.4986416061908633e-06, + "loss": 0.2817, + "step": 27420 + }, + { + "epoch": 1.2845364688246592, + "grad_norm": 0.6228239561929019, + "learning_rate": 1.4984678368494814e-06, + "loss": 0.2887, + "step": 27421 + }, + { + "epoch": 1.2845833138145875, + "grad_norm": 0.6668963688921935, + "learning_rate": 1.498294073271665e-06, + "loss": 0.2715, + "step": 27422 + }, + { + "epoch": 1.284630158804516, + "grad_norm": 0.6596608803244877, + "learning_rate": 1.4981203154584144e-06, + "loss": 0.2994, + "step": 27423 + }, + { + "epoch": 1.2846770037944442, + "grad_norm": 0.5547006979462646, + "learning_rate": 1.4979465634107304e-06, + "loss": 0.2651, + "step": 27424 + }, + { + "epoch": 1.2847238487843726, + "grad_norm": 0.5998875679331666, + "learning_rate": 1.4977728171296113e-06, + "loss": 0.2779, + "step": 27425 + }, + { + "epoch": 1.2847706937743009, + "grad_norm": 0.5983081168379797, + "learning_rate": 1.4975990766160586e-06, + "loss": 0.271, + "step": 27426 + }, + { + "epoch": 1.284817538764229, + "grad_norm": 0.6005708861441155, + "learning_rate": 1.4974253418710704e-06, + "loss": 0.2819, + "step": 27427 + }, + { + "epoch": 1.2848643837541576, + "grad_norm": 0.5861729504225986, + "learning_rate": 1.497251612895648e-06, + "loss": 0.2815, + "step": 27428 + }, + { + "epoch": 1.2849112287440858, + "grad_norm": 0.5464770159149501, + "learning_rate": 1.49707788969079e-06, + "loss": 0.2592, + "step": 27429 + }, + { + "epoch": 1.284958073734014, + "grad_norm": 0.5929682601791819, + "learning_rate": 1.4969041722574964e-06, + "loss": 0.2626, + "step": 27430 + }, + { + "epoch": 1.2850049187239425, + "grad_norm": 0.623519821480276, + "learning_rate": 1.4967304605967681e-06, + "loss": 0.2941, + "step": 27431 + }, + { + "epoch": 1.2850517637138708, + "grad_norm": 0.5605409223376161, + "learning_rate": 1.4965567547096038e-06, + "loss": 0.2804, + "step": 27432 + }, + { + "epoch": 1.285098608703799, + "grad_norm": 0.5901920535460106, + "learning_rate": 1.4963830545970027e-06, + "loss": 0.2935, + "step": 27433 + }, + { + "epoch": 1.2851454536937275, + "grad_norm": 0.613235186359934, + "learning_rate": 1.4962093602599644e-06, + "loss": 0.2639, + "step": 27434 + }, + { + "epoch": 1.285192298683656, + "grad_norm": 0.6122346694737458, + "learning_rate": 1.4960356716994898e-06, + "loss": 0.2663, + "step": 27435 + }, + { + "epoch": 1.2852391436735842, + "grad_norm": 0.6242681173107151, + "learning_rate": 1.4958619889165768e-06, + "loss": 0.2819, + "step": 27436 + }, + { + "epoch": 1.2852859886635124, + "grad_norm": 0.6062289925138787, + "learning_rate": 1.4956883119122259e-06, + "loss": 0.2639, + "step": 27437 + }, + { + "epoch": 1.2853328336534409, + "grad_norm": 0.65594747522935, + "learning_rate": 1.4955146406874372e-06, + "loss": 0.2772, + "step": 27438 + }, + { + "epoch": 1.285379678643369, + "grad_norm": 0.5895344116102164, + "learning_rate": 1.4953409752432088e-06, + "loss": 0.2854, + "step": 27439 + }, + { + "epoch": 1.2854265236332973, + "grad_norm": 0.6670303588200798, + "learning_rate": 1.49516731558054e-06, + "loss": 0.278, + "step": 27440 + }, + { + "epoch": 1.2854733686232258, + "grad_norm": 0.554386309368001, + "learning_rate": 1.4949936617004309e-06, + "loss": 0.2543, + "step": 27441 + }, + { + "epoch": 1.285520213613154, + "grad_norm": 0.6164319954880366, + "learning_rate": 1.4948200136038813e-06, + "loss": 0.2683, + "step": 27442 + }, + { + "epoch": 1.2855670586030823, + "grad_norm": 0.5912476485181134, + "learning_rate": 1.494646371291889e-06, + "loss": 0.2826, + "step": 27443 + }, + { + "epoch": 1.2856139035930108, + "grad_norm": 0.5728890207822676, + "learning_rate": 1.4944727347654552e-06, + "loss": 0.2811, + "step": 27444 + }, + { + "epoch": 1.285660748582939, + "grad_norm": 0.6167570959553849, + "learning_rate": 1.4942991040255774e-06, + "loss": 0.2699, + "step": 27445 + }, + { + "epoch": 1.2857075935728675, + "grad_norm": 0.6471448161519547, + "learning_rate": 1.4941254790732559e-06, + "loss": 0.2838, + "step": 27446 + }, + { + "epoch": 1.2857544385627957, + "grad_norm": 0.6368609376047488, + "learning_rate": 1.4939518599094887e-06, + "loss": 0.2824, + "step": 27447 + }, + { + "epoch": 1.2858012835527242, + "grad_norm": 0.5796591860823185, + "learning_rate": 1.4937782465352762e-06, + "loss": 0.2655, + "step": 27448 + }, + { + "epoch": 1.2858481285426524, + "grad_norm": 0.5928610045129388, + "learning_rate": 1.493604638951617e-06, + "loss": 0.281, + "step": 27449 + }, + { + "epoch": 1.2858949735325806, + "grad_norm": 0.5768066428412788, + "learning_rate": 1.4934310371595107e-06, + "loss": 0.263, + "step": 27450 + }, + { + "epoch": 1.285941818522509, + "grad_norm": 0.5543335088577545, + "learning_rate": 1.4932574411599548e-06, + "loss": 0.2631, + "step": 27451 + }, + { + "epoch": 1.2859886635124373, + "grad_norm": 0.6583774434905071, + "learning_rate": 1.4930838509539497e-06, + "loss": 0.317, + "step": 27452 + }, + { + "epoch": 1.2860355085023656, + "grad_norm": 0.5688856501192148, + "learning_rate": 1.4929102665424938e-06, + "loss": 0.2698, + "step": 27453 + }, + { + "epoch": 1.286082353492294, + "grad_norm": 0.5894984801424027, + "learning_rate": 1.4927366879265862e-06, + "loss": 0.2687, + "step": 27454 + }, + { + "epoch": 1.2861291984822223, + "grad_norm": 0.5950654764166453, + "learning_rate": 1.4925631151072262e-06, + "loss": 0.2724, + "step": 27455 + }, + { + "epoch": 1.2861760434721505, + "grad_norm": 0.5860345342611525, + "learning_rate": 1.4923895480854124e-06, + "loss": 0.2873, + "step": 27456 + }, + { + "epoch": 1.286222888462079, + "grad_norm": 0.560674362481353, + "learning_rate": 1.492215986862143e-06, + "loss": 0.264, + "step": 27457 + }, + { + "epoch": 1.2862697334520072, + "grad_norm": 0.6079605195829969, + "learning_rate": 1.4920424314384172e-06, + "loss": 0.2878, + "step": 27458 + }, + { + "epoch": 1.2863165784419357, + "grad_norm": 0.6183215196647187, + "learning_rate": 1.491868881815234e-06, + "loss": 0.2868, + "step": 27459 + }, + { + "epoch": 1.286363423431864, + "grad_norm": 0.6467597910779243, + "learning_rate": 1.4916953379935919e-06, + "loss": 0.2962, + "step": 27460 + }, + { + "epoch": 1.2864102684217924, + "grad_norm": 0.5763082085631914, + "learning_rate": 1.4915217999744896e-06, + "loss": 0.2683, + "step": 27461 + }, + { + "epoch": 1.2864571134117206, + "grad_norm": 0.5564652065137861, + "learning_rate": 1.491348267758927e-06, + "loss": 0.2624, + "step": 27462 + }, + { + "epoch": 1.2865039584016489, + "grad_norm": 0.5975795314000065, + "learning_rate": 1.4911747413479005e-06, + "loss": 0.2776, + "step": 27463 + }, + { + "epoch": 1.2865508033915773, + "grad_norm": 0.5764455133331392, + "learning_rate": 1.49100122074241e-06, + "loss": 0.2659, + "step": 27464 + }, + { + "epoch": 1.2865976483815056, + "grad_norm": 0.5962919515875414, + "learning_rate": 1.4908277059434535e-06, + "loss": 0.2597, + "step": 27465 + }, + { + "epoch": 1.2866444933714338, + "grad_norm": 0.6274177078954378, + "learning_rate": 1.4906541969520306e-06, + "loss": 0.2867, + "step": 27466 + }, + { + "epoch": 1.2866913383613623, + "grad_norm": 0.5735056565835341, + "learning_rate": 1.4904806937691384e-06, + "loss": 0.2508, + "step": 27467 + }, + { + "epoch": 1.2867381833512905, + "grad_norm": 0.6447935265214949, + "learning_rate": 1.4903071963957772e-06, + "loss": 0.2973, + "step": 27468 + }, + { + "epoch": 1.2867850283412188, + "grad_norm": 0.6038624074314747, + "learning_rate": 1.4901337048329434e-06, + "loss": 0.2808, + "step": 27469 + }, + { + "epoch": 1.2868318733311472, + "grad_norm": 0.5919221637682647, + "learning_rate": 1.4899602190816365e-06, + "loss": 0.2735, + "step": 27470 + }, + { + "epoch": 1.2868787183210757, + "grad_norm": 0.6004535432060987, + "learning_rate": 1.4897867391428544e-06, + "loss": 0.2644, + "step": 27471 + }, + { + "epoch": 1.286925563311004, + "grad_norm": 0.5930766698839324, + "learning_rate": 1.4896132650175959e-06, + "loss": 0.2785, + "step": 27472 + }, + { + "epoch": 1.2869724083009322, + "grad_norm": 0.6292983784438112, + "learning_rate": 1.4894397967068592e-06, + "loss": 0.2875, + "step": 27473 + }, + { + "epoch": 1.2870192532908606, + "grad_norm": 0.6292073410366956, + "learning_rate": 1.4892663342116432e-06, + "loss": 0.2767, + "step": 27474 + }, + { + "epoch": 1.2870660982807889, + "grad_norm": 0.5737231878534628, + "learning_rate": 1.4890928775329444e-06, + "loss": 0.2755, + "step": 27475 + }, + { + "epoch": 1.2871129432707171, + "grad_norm": 0.6105486343733483, + "learning_rate": 1.4889194266717621e-06, + "loss": 0.2847, + "step": 27476 + }, + { + "epoch": 1.2871597882606456, + "grad_norm": 0.5514550108522452, + "learning_rate": 1.4887459816290948e-06, + "loss": 0.2734, + "step": 27477 + }, + { + "epoch": 1.2872066332505738, + "grad_norm": 0.5650207262478116, + "learning_rate": 1.4885725424059394e-06, + "loss": 0.2574, + "step": 27478 + }, + { + "epoch": 1.287253478240502, + "grad_norm": 0.5832799352963021, + "learning_rate": 1.488399109003295e-06, + "loss": 0.2567, + "step": 27479 + }, + { + "epoch": 1.2873003232304305, + "grad_norm": 0.6178197214452688, + "learning_rate": 1.4882256814221603e-06, + "loss": 0.2797, + "step": 27480 + }, + { + "epoch": 1.2873471682203588, + "grad_norm": 0.6472373234740824, + "learning_rate": 1.4880522596635327e-06, + "loss": 0.2775, + "step": 27481 + }, + { + "epoch": 1.2873940132102872, + "grad_norm": 0.6283177841741963, + "learning_rate": 1.4878788437284092e-06, + "loss": 0.2863, + "step": 27482 + }, + { + "epoch": 1.2874408582002155, + "grad_norm": 0.6363456995480811, + "learning_rate": 1.4877054336177882e-06, + "loss": 0.2799, + "step": 27483 + }, + { + "epoch": 1.287487703190144, + "grad_norm": 0.5801490541894334, + "learning_rate": 1.487532029332669e-06, + "loss": 0.2786, + "step": 27484 + }, + { + "epoch": 1.2875345481800722, + "grad_norm": 0.601466857511389, + "learning_rate": 1.4873586308740476e-06, + "loss": 0.2711, + "step": 27485 + }, + { + "epoch": 1.2875813931700004, + "grad_norm": 0.586548440700456, + "learning_rate": 1.487185238242923e-06, + "loss": 0.2704, + "step": 27486 + }, + { + "epoch": 1.2876282381599289, + "grad_norm": 0.5966537044321186, + "learning_rate": 1.4870118514402937e-06, + "loss": 0.278, + "step": 27487 + }, + { + "epoch": 1.2876750831498571, + "grad_norm": 0.61578016005997, + "learning_rate": 1.486838470467156e-06, + "loss": 0.2735, + "step": 27488 + }, + { + "epoch": 1.2877219281397854, + "grad_norm": 0.6073151081068612, + "learning_rate": 1.4866650953245076e-06, + "loss": 0.287, + "step": 27489 + }, + { + "epoch": 1.2877687731297138, + "grad_norm": 0.6209604808163807, + "learning_rate": 1.486491726013347e-06, + "loss": 0.2752, + "step": 27490 + }, + { + "epoch": 1.287815618119642, + "grad_norm": 0.5881587601509205, + "learning_rate": 1.4863183625346728e-06, + "loss": 0.2638, + "step": 27491 + }, + { + "epoch": 1.2878624631095703, + "grad_norm": 0.5781135638531094, + "learning_rate": 1.4861450048894803e-06, + "loss": 0.2655, + "step": 27492 + }, + { + "epoch": 1.2879093080994988, + "grad_norm": 0.5766519835082732, + "learning_rate": 1.48597165307877e-06, + "loss": 0.2633, + "step": 27493 + }, + { + "epoch": 1.287956153089427, + "grad_norm": 0.5607856567279768, + "learning_rate": 1.485798307103537e-06, + "loss": 0.2583, + "step": 27494 + }, + { + "epoch": 1.2880029980793555, + "grad_norm": 0.6338698329447637, + "learning_rate": 1.4856249669647802e-06, + "loss": 0.2871, + "step": 27495 + }, + { + "epoch": 1.2880498430692837, + "grad_norm": 0.5830440138101033, + "learning_rate": 1.4854516326634963e-06, + "loss": 0.2689, + "step": 27496 + }, + { + "epoch": 1.2880966880592122, + "grad_norm": 0.562749607284343, + "learning_rate": 1.485278304200683e-06, + "loss": 0.2719, + "step": 27497 + }, + { + "epoch": 1.2881435330491404, + "grad_norm": 0.5420279493072446, + "learning_rate": 1.4851049815773389e-06, + "loss": 0.2639, + "step": 27498 + }, + { + "epoch": 1.2881903780390687, + "grad_norm": 0.5887537497712366, + "learning_rate": 1.4849316647944608e-06, + "loss": 0.2601, + "step": 27499 + }, + { + "epoch": 1.2882372230289971, + "grad_norm": 0.5765729538217141, + "learning_rate": 1.484758353853045e-06, + "loss": 0.2691, + "step": 27500 + }, + { + "epoch": 1.2882840680189254, + "grad_norm": 0.6045082795891134, + "learning_rate": 1.4845850487540896e-06, + "loss": 0.2776, + "step": 27501 + }, + { + "epoch": 1.2883309130088536, + "grad_norm": 0.6065278769206637, + "learning_rate": 1.4844117494985927e-06, + "loss": 0.2756, + "step": 27502 + }, + { + "epoch": 1.288377757998782, + "grad_norm": 0.5934572180593306, + "learning_rate": 1.48423845608755e-06, + "loss": 0.2687, + "step": 27503 + }, + { + "epoch": 1.2884246029887103, + "grad_norm": 0.5349669867843707, + "learning_rate": 1.4840651685219607e-06, + "loss": 0.2503, + "step": 27504 + }, + { + "epoch": 1.2884714479786386, + "grad_norm": 0.5573740824014897, + "learning_rate": 1.4838918868028207e-06, + "loss": 0.2764, + "step": 27505 + }, + { + "epoch": 1.288518292968567, + "grad_norm": 0.5856065991407268, + "learning_rate": 1.4837186109311275e-06, + "loss": 0.2628, + "step": 27506 + }, + { + "epoch": 1.2885651379584955, + "grad_norm": 0.6104021331472596, + "learning_rate": 1.4835453409078779e-06, + "loss": 0.2721, + "step": 27507 + }, + { + "epoch": 1.2886119829484237, + "grad_norm": 0.5749995216162671, + "learning_rate": 1.4833720767340699e-06, + "loss": 0.2661, + "step": 27508 + }, + { + "epoch": 1.288658827938352, + "grad_norm": 0.5777585282506822, + "learning_rate": 1.4831988184106994e-06, + "loss": 0.2579, + "step": 27509 + }, + { + "epoch": 1.2887056729282804, + "grad_norm": 0.5479063574520533, + "learning_rate": 1.4830255659387644e-06, + "loss": 0.2621, + "step": 27510 + }, + { + "epoch": 1.2887525179182087, + "grad_norm": 0.6345040935527019, + "learning_rate": 1.4828523193192623e-06, + "loss": 0.2783, + "step": 27511 + }, + { + "epoch": 1.288799362908137, + "grad_norm": 0.5548882293093076, + "learning_rate": 1.482679078553189e-06, + "loss": 0.2729, + "step": 27512 + }, + { + "epoch": 1.2888462078980654, + "grad_norm": 0.6376195386699858, + "learning_rate": 1.4825058436415418e-06, + "loss": 0.2974, + "step": 27513 + }, + { + "epoch": 1.2888930528879936, + "grad_norm": 0.5404613987549954, + "learning_rate": 1.4823326145853173e-06, + "loss": 0.26, + "step": 27514 + }, + { + "epoch": 1.2889398978779218, + "grad_norm": 0.5754938155556594, + "learning_rate": 1.4821593913855136e-06, + "loss": 0.283, + "step": 27515 + }, + { + "epoch": 1.2889867428678503, + "grad_norm": 0.5981239394374624, + "learning_rate": 1.4819861740431262e-06, + "loss": 0.266, + "step": 27516 + }, + { + "epoch": 1.2890335878577786, + "grad_norm": 0.5893652908763285, + "learning_rate": 1.4818129625591537e-06, + "loss": 0.2915, + "step": 27517 + }, + { + "epoch": 1.289080432847707, + "grad_norm": 0.5455101963337134, + "learning_rate": 1.4816397569345903e-06, + "loss": 0.2566, + "step": 27518 + }, + { + "epoch": 1.2891272778376353, + "grad_norm": 0.5838101620708928, + "learning_rate": 1.4814665571704351e-06, + "loss": 0.2758, + "step": 27519 + }, + { + "epoch": 1.2891741228275637, + "grad_norm": 0.5929081540352543, + "learning_rate": 1.4812933632676828e-06, + "loss": 0.2675, + "step": 27520 + }, + { + "epoch": 1.289220967817492, + "grad_norm": 0.5696670697369877, + "learning_rate": 1.4811201752273317e-06, + "loss": 0.2604, + "step": 27521 + }, + { + "epoch": 1.2892678128074202, + "grad_norm": 0.6021430997543775, + "learning_rate": 1.480946993050378e-06, + "loss": 0.2812, + "step": 27522 + }, + { + "epoch": 1.2893146577973487, + "grad_norm": 0.5995163866503584, + "learning_rate": 1.4807738167378188e-06, + "loss": 0.2736, + "step": 27523 + }, + { + "epoch": 1.289361502787277, + "grad_norm": 0.608906955229948, + "learning_rate": 1.480600646290649e-06, + "loss": 0.2839, + "step": 27524 + }, + { + "epoch": 1.2894083477772051, + "grad_norm": 0.5974002682034616, + "learning_rate": 1.4804274817098669e-06, + "loss": 0.2821, + "step": 27525 + }, + { + "epoch": 1.2894551927671336, + "grad_norm": 0.5621036819271923, + "learning_rate": 1.4802543229964683e-06, + "loss": 0.2585, + "step": 27526 + }, + { + "epoch": 1.2895020377570618, + "grad_norm": 0.65298884587199, + "learning_rate": 1.4800811701514494e-06, + "loss": 0.2748, + "step": 27527 + }, + { + "epoch": 1.28954888274699, + "grad_norm": 0.58729721624668, + "learning_rate": 1.479908023175807e-06, + "loss": 0.2741, + "step": 27528 + }, + { + "epoch": 1.2895957277369186, + "grad_norm": 0.5570506054374016, + "learning_rate": 1.4797348820705385e-06, + "loss": 0.2724, + "step": 27529 + }, + { + "epoch": 1.2896425727268468, + "grad_norm": 0.5995323485197322, + "learning_rate": 1.479561746836639e-06, + "loss": 0.2765, + "step": 27530 + }, + { + "epoch": 1.2896894177167753, + "grad_norm": 0.5655731205862934, + "learning_rate": 1.4793886174751042e-06, + "loss": 0.2584, + "step": 27531 + }, + { + "epoch": 1.2897362627067035, + "grad_norm": 0.6024596988528554, + "learning_rate": 1.4792154939869318e-06, + "loss": 0.274, + "step": 27532 + }, + { + "epoch": 1.289783107696632, + "grad_norm": 0.6452974115080797, + "learning_rate": 1.4790423763731181e-06, + "loss": 0.2961, + "step": 27533 + }, + { + "epoch": 1.2898299526865602, + "grad_norm": 0.6162001371283989, + "learning_rate": 1.4788692646346585e-06, + "loss": 0.2756, + "step": 27534 + }, + { + "epoch": 1.2898767976764884, + "grad_norm": 0.6510572520734821, + "learning_rate": 1.4786961587725508e-06, + "loss": 0.2873, + "step": 27535 + }, + { + "epoch": 1.289923642666417, + "grad_norm": 0.6103749460700978, + "learning_rate": 1.4785230587877886e-06, + "loss": 0.281, + "step": 27536 + }, + { + "epoch": 1.2899704876563451, + "grad_norm": 0.5985283411052517, + "learning_rate": 1.4783499646813701e-06, + "loss": 0.2811, + "step": 27537 + }, + { + "epoch": 1.2900173326462734, + "grad_norm": 0.5310302471590997, + "learning_rate": 1.4781768764542903e-06, + "loss": 0.2543, + "step": 27538 + }, + { + "epoch": 1.2900641776362018, + "grad_norm": 0.5836291923787786, + "learning_rate": 1.4780037941075457e-06, + "loss": 0.2722, + "step": 27539 + }, + { + "epoch": 1.29011102262613, + "grad_norm": 0.5784842167277411, + "learning_rate": 1.4778307176421331e-06, + "loss": 0.2549, + "step": 27540 + }, + { + "epoch": 1.2901578676160583, + "grad_norm": 0.5675498198987984, + "learning_rate": 1.4776576470590482e-06, + "loss": 0.2678, + "step": 27541 + }, + { + "epoch": 1.2902047126059868, + "grad_norm": 0.6375128862528665, + "learning_rate": 1.4774845823592853e-06, + "loss": 0.2885, + "step": 27542 + }, + { + "epoch": 1.2902515575959153, + "grad_norm": 0.5524426468224979, + "learning_rate": 1.4773115235438417e-06, + "loss": 0.2548, + "step": 27543 + }, + { + "epoch": 1.2902984025858435, + "grad_norm": 0.5939794180944464, + "learning_rate": 1.477138470613714e-06, + "loss": 0.2701, + "step": 27544 + }, + { + "epoch": 1.2903452475757717, + "grad_norm": 0.5887335171723703, + "learning_rate": 1.4769654235698965e-06, + "loss": 0.2752, + "step": 27545 + }, + { + "epoch": 1.2903920925657002, + "grad_norm": 0.5816625364524309, + "learning_rate": 1.4767923824133867e-06, + "loss": 0.2789, + "step": 27546 + }, + { + "epoch": 1.2904389375556284, + "grad_norm": 0.606071960119756, + "learning_rate": 1.4766193471451795e-06, + "loss": 0.2684, + "step": 27547 + }, + { + "epoch": 1.2904857825455567, + "grad_norm": 0.6142330659050066, + "learning_rate": 1.4764463177662708e-06, + "loss": 0.2832, + "step": 27548 + }, + { + "epoch": 1.2905326275354851, + "grad_norm": 0.5760757880809566, + "learning_rate": 1.4762732942776556e-06, + "loss": 0.2749, + "step": 27549 + }, + { + "epoch": 1.2905794725254134, + "grad_norm": 0.6079569771399932, + "learning_rate": 1.4761002766803306e-06, + "loss": 0.2894, + "step": 27550 + }, + { + "epoch": 1.2906263175153416, + "grad_norm": 0.6159121585979717, + "learning_rate": 1.475927264975291e-06, + "loss": 0.2811, + "step": 27551 + }, + { + "epoch": 1.29067316250527, + "grad_norm": 0.6021063682771318, + "learning_rate": 1.4757542591635327e-06, + "loss": 0.2757, + "step": 27552 + }, + { + "epoch": 1.2907200074951983, + "grad_norm": 0.5977437085952174, + "learning_rate": 1.4755812592460514e-06, + "loss": 0.2849, + "step": 27553 + }, + { + "epoch": 1.2907668524851268, + "grad_norm": 0.5973795608861523, + "learning_rate": 1.475408265223843e-06, + "loss": 0.2754, + "step": 27554 + }, + { + "epoch": 1.290813697475055, + "grad_norm": 0.5776636162757606, + "learning_rate": 1.4752352770979018e-06, + "loss": 0.2635, + "step": 27555 + }, + { + "epoch": 1.2908605424649835, + "grad_norm": 0.6297171257470423, + "learning_rate": 1.4750622948692236e-06, + "loss": 0.2883, + "step": 27556 + }, + { + "epoch": 1.2909073874549117, + "grad_norm": 0.5967426239549763, + "learning_rate": 1.4748893185388053e-06, + "loss": 0.2779, + "step": 27557 + }, + { + "epoch": 1.29095423244484, + "grad_norm": 0.5994123470249909, + "learning_rate": 1.4747163481076404e-06, + "loss": 0.2702, + "step": 27558 + }, + { + "epoch": 1.2910010774347684, + "grad_norm": 0.5960596668130835, + "learning_rate": 1.4745433835767254e-06, + "loss": 0.276, + "step": 27559 + }, + { + "epoch": 1.2910479224246967, + "grad_norm": 0.5506051301456535, + "learning_rate": 1.4743704249470562e-06, + "loss": 0.2611, + "step": 27560 + }, + { + "epoch": 1.291094767414625, + "grad_norm": 0.5951927397966513, + "learning_rate": 1.4741974722196273e-06, + "loss": 0.296, + "step": 27561 + }, + { + "epoch": 1.2911416124045534, + "grad_norm": 0.6065024588964464, + "learning_rate": 1.4740245253954332e-06, + "loss": 0.2644, + "step": 27562 + }, + { + "epoch": 1.2911884573944816, + "grad_norm": 0.5892812013934352, + "learning_rate": 1.4738515844754702e-06, + "loss": 0.2703, + "step": 27563 + }, + { + "epoch": 1.2912353023844099, + "grad_norm": 0.6263664333398073, + "learning_rate": 1.4736786494607342e-06, + "loss": 0.286, + "step": 27564 + }, + { + "epoch": 1.2912821473743383, + "grad_norm": 0.5961640539265688, + "learning_rate": 1.4735057203522191e-06, + "loss": 0.2785, + "step": 27565 + }, + { + "epoch": 1.2913289923642666, + "grad_norm": 0.5973540035168032, + "learning_rate": 1.4733327971509216e-06, + "loss": 0.2695, + "step": 27566 + }, + { + "epoch": 1.291375837354195, + "grad_norm": 0.5969905542677506, + "learning_rate": 1.4731598798578345e-06, + "loss": 0.2813, + "step": 27567 + }, + { + "epoch": 1.2914226823441233, + "grad_norm": 0.6326771415448451, + "learning_rate": 1.472986968473955e-06, + "loss": 0.2771, + "step": 27568 + }, + { + "epoch": 1.2914695273340517, + "grad_norm": 0.5735899815211293, + "learning_rate": 1.4728140630002765e-06, + "loss": 0.2791, + "step": 27569 + }, + { + "epoch": 1.29151637232398, + "grad_norm": 0.5803430000160277, + "learning_rate": 1.4726411634377951e-06, + "loss": 0.2784, + "step": 27570 + }, + { + "epoch": 1.2915632173139082, + "grad_norm": 0.559600946270327, + "learning_rate": 1.4724682697875065e-06, + "loss": 0.2722, + "step": 27571 + }, + { + "epoch": 1.2916100623038367, + "grad_norm": 0.5862291657560978, + "learning_rate": 1.4722953820504047e-06, + "loss": 0.274, + "step": 27572 + }, + { + "epoch": 1.291656907293765, + "grad_norm": 0.6071334602213591, + "learning_rate": 1.472122500227484e-06, + "loss": 0.2831, + "step": 27573 + }, + { + "epoch": 1.2917037522836932, + "grad_norm": 0.6063497005888908, + "learning_rate": 1.4719496243197395e-06, + "loss": 0.2822, + "step": 27574 + }, + { + "epoch": 1.2917505972736216, + "grad_norm": 0.6117155105262894, + "learning_rate": 1.4717767543281674e-06, + "loss": 0.2841, + "step": 27575 + }, + { + "epoch": 1.2917974422635499, + "grad_norm": 0.589021169850765, + "learning_rate": 1.471603890253761e-06, + "loss": 0.2695, + "step": 27576 + }, + { + "epoch": 1.291844287253478, + "grad_norm": 0.5822088626637973, + "learning_rate": 1.4714310320975159e-06, + "loss": 0.2708, + "step": 27577 + }, + { + "epoch": 1.2918911322434066, + "grad_norm": 0.5989291478301053, + "learning_rate": 1.4712581798604275e-06, + "loss": 0.2659, + "step": 27578 + }, + { + "epoch": 1.291937977233335, + "grad_norm": 0.5735797705715432, + "learning_rate": 1.4710853335434894e-06, + "loss": 0.2672, + "step": 27579 + }, + { + "epoch": 1.2919848222232633, + "grad_norm": 0.611789633448181, + "learning_rate": 1.470912493147696e-06, + "loss": 0.283, + "step": 27580 + }, + { + "epoch": 1.2920316672131915, + "grad_norm": 0.5592603375679303, + "learning_rate": 1.4707396586740424e-06, + "loss": 0.2681, + "step": 27581 + }, + { + "epoch": 1.29207851220312, + "grad_norm": 0.5609405347777896, + "learning_rate": 1.4705668301235242e-06, + "loss": 0.2652, + "step": 27582 + }, + { + "epoch": 1.2921253571930482, + "grad_norm": 0.6149876606443955, + "learning_rate": 1.4703940074971345e-06, + "loss": 0.2819, + "step": 27583 + }, + { + "epoch": 1.2921722021829765, + "grad_norm": 0.5884710549741751, + "learning_rate": 1.4702211907958697e-06, + "loss": 0.2692, + "step": 27584 + }, + { + "epoch": 1.292219047172905, + "grad_norm": 0.5694320250475546, + "learning_rate": 1.4700483800207217e-06, + "loss": 0.2625, + "step": 27585 + }, + { + "epoch": 1.2922658921628332, + "grad_norm": 0.5971958546665053, + "learning_rate": 1.4698755751726873e-06, + "loss": 0.2789, + "step": 27586 + }, + { + "epoch": 1.2923127371527614, + "grad_norm": 0.591189761900104, + "learning_rate": 1.4697027762527593e-06, + "loss": 0.268, + "step": 27587 + }, + { + "epoch": 1.2923595821426899, + "grad_norm": 0.584324979314272, + "learning_rate": 1.4695299832619328e-06, + "loss": 0.253, + "step": 27588 + }, + { + "epoch": 1.292406427132618, + "grad_norm": 0.6259575346761218, + "learning_rate": 1.469357196201203e-06, + "loss": 0.2807, + "step": 27589 + }, + { + "epoch": 1.2924532721225466, + "grad_norm": 0.5830765018093115, + "learning_rate": 1.4691844150715639e-06, + "loss": 0.2615, + "step": 27590 + }, + { + "epoch": 1.2925001171124748, + "grad_norm": 0.6004322324713888, + "learning_rate": 1.4690116398740084e-06, + "loss": 0.2777, + "step": 27591 + }, + { + "epoch": 1.2925469621024033, + "grad_norm": 0.5927008681292272, + "learning_rate": 1.4688388706095317e-06, + "loss": 0.2709, + "step": 27592 + }, + { + "epoch": 1.2925938070923315, + "grad_norm": 0.6203571858653864, + "learning_rate": 1.4686661072791287e-06, + "loss": 0.2809, + "step": 27593 + }, + { + "epoch": 1.2926406520822598, + "grad_norm": 0.6117552983189918, + "learning_rate": 1.4684933498837922e-06, + "loss": 0.2851, + "step": 27594 + }, + { + "epoch": 1.2926874970721882, + "grad_norm": 0.6469420781742833, + "learning_rate": 1.4683205984245183e-06, + "loss": 0.3033, + "step": 27595 + }, + { + "epoch": 1.2927343420621165, + "grad_norm": 0.5742424018568628, + "learning_rate": 1.4681478529022996e-06, + "loss": 0.266, + "step": 27596 + }, + { + "epoch": 1.2927811870520447, + "grad_norm": 0.6861039136832642, + "learning_rate": 1.4679751133181308e-06, + "loss": 0.2995, + "step": 27597 + }, + { + "epoch": 1.2928280320419732, + "grad_norm": 0.5663563141143082, + "learning_rate": 1.4678023796730056e-06, + "loss": 0.2731, + "step": 27598 + }, + { + "epoch": 1.2928748770319014, + "grad_norm": 0.6181492714035051, + "learning_rate": 1.4676296519679183e-06, + "loss": 0.2865, + "step": 27599 + }, + { + "epoch": 1.2929217220218296, + "grad_norm": 0.5397024055229772, + "learning_rate": 1.4674569302038628e-06, + "loss": 0.2511, + "step": 27600 + }, + { + "epoch": 1.292968567011758, + "grad_norm": 0.5618824144342051, + "learning_rate": 1.467284214381833e-06, + "loss": 0.2718, + "step": 27601 + }, + { + "epoch": 1.2930154120016863, + "grad_norm": 0.5437796179200569, + "learning_rate": 1.4671115045028239e-06, + "loss": 0.2643, + "step": 27602 + }, + { + "epoch": 1.2930622569916148, + "grad_norm": 0.5657299826347113, + "learning_rate": 1.466938800567828e-06, + "loss": 0.2788, + "step": 27603 + }, + { + "epoch": 1.293109101981543, + "grad_norm": 0.6302159723301134, + "learning_rate": 1.4667661025778393e-06, + "loss": 0.2869, + "step": 27604 + }, + { + "epoch": 1.2931559469714715, + "grad_norm": 0.5488901587714218, + "learning_rate": 1.466593410533852e-06, + "loss": 0.2628, + "step": 27605 + }, + { + "epoch": 1.2932027919613998, + "grad_norm": 0.5994499763120861, + "learning_rate": 1.46642072443686e-06, + "loss": 0.2705, + "step": 27606 + }, + { + "epoch": 1.293249636951328, + "grad_norm": 0.5736739436212254, + "learning_rate": 1.4662480442878568e-06, + "loss": 0.2718, + "step": 27607 + }, + { + "epoch": 1.2932964819412565, + "grad_norm": 0.6058419936567712, + "learning_rate": 1.4660753700878372e-06, + "loss": 0.2754, + "step": 27608 + }, + { + "epoch": 1.2933433269311847, + "grad_norm": 0.5894527403656131, + "learning_rate": 1.465902701837793e-06, + "loss": 0.2769, + "step": 27609 + }, + { + "epoch": 1.293390171921113, + "grad_norm": 0.6059845821747274, + "learning_rate": 1.4657300395387195e-06, + "loss": 0.2761, + "step": 27610 + }, + { + "epoch": 1.2934370169110414, + "grad_norm": 0.5443001178657169, + "learning_rate": 1.465557383191609e-06, + "loss": 0.256, + "step": 27611 + }, + { + "epoch": 1.2934838619009696, + "grad_norm": 0.621603264934878, + "learning_rate": 1.4653847327974558e-06, + "loss": 0.2923, + "step": 27612 + }, + { + "epoch": 1.2935307068908979, + "grad_norm": 0.5688852586160987, + "learning_rate": 1.4652120883572537e-06, + "loss": 0.2775, + "step": 27613 + }, + { + "epoch": 1.2935775518808263, + "grad_norm": 0.5941218637645621, + "learning_rate": 1.4650394498719967e-06, + "loss": 0.2856, + "step": 27614 + }, + { + "epoch": 1.2936243968707548, + "grad_norm": 0.5512321143673815, + "learning_rate": 1.4648668173426765e-06, + "loss": 0.2668, + "step": 27615 + }, + { + "epoch": 1.293671241860683, + "grad_norm": 0.6391014136734996, + "learning_rate": 1.4646941907702876e-06, + "loss": 0.279, + "step": 27616 + }, + { + "epoch": 1.2937180868506113, + "grad_norm": 0.5981910068107914, + "learning_rate": 1.4645215701558236e-06, + "loss": 0.2789, + "step": 27617 + }, + { + "epoch": 1.2937649318405398, + "grad_norm": 0.5959735965321036, + "learning_rate": 1.4643489555002777e-06, + "loss": 0.2632, + "step": 27618 + }, + { + "epoch": 1.293811776830468, + "grad_norm": 0.5749725272484371, + "learning_rate": 1.4641763468046426e-06, + "loss": 0.2716, + "step": 27619 + }, + { + "epoch": 1.2938586218203962, + "grad_norm": 0.545207681803951, + "learning_rate": 1.4640037440699134e-06, + "loss": 0.262, + "step": 27620 + }, + { + "epoch": 1.2939054668103247, + "grad_norm": 0.6110524871306252, + "learning_rate": 1.463831147297083e-06, + "loss": 0.2845, + "step": 27621 + }, + { + "epoch": 1.293952311800253, + "grad_norm": 0.5736384587796282, + "learning_rate": 1.4636585564871423e-06, + "loss": 0.2693, + "step": 27622 + }, + { + "epoch": 1.2939991567901812, + "grad_norm": 0.5844768609816504, + "learning_rate": 1.4634859716410863e-06, + "loss": 0.2654, + "step": 27623 + }, + { + "epoch": 1.2940460017801096, + "grad_norm": 0.6083167665465654, + "learning_rate": 1.4633133927599085e-06, + "loss": 0.2808, + "step": 27624 + }, + { + "epoch": 1.2940928467700379, + "grad_norm": 0.5941153994134127, + "learning_rate": 1.4631408198446013e-06, + "loss": 0.2696, + "step": 27625 + }, + { + "epoch": 1.2941396917599663, + "grad_norm": 0.6164480893947863, + "learning_rate": 1.4629682528961575e-06, + "loss": 0.2652, + "step": 27626 + }, + { + "epoch": 1.2941865367498946, + "grad_norm": 0.5817215421262889, + "learning_rate": 1.4627956919155722e-06, + "loss": 0.272, + "step": 27627 + }, + { + "epoch": 1.294233381739823, + "grad_norm": 0.5814350220373046, + "learning_rate": 1.4626231369038365e-06, + "loss": 0.2699, + "step": 27628 + }, + { + "epoch": 1.2942802267297513, + "grad_norm": 0.5601053882590222, + "learning_rate": 1.4624505878619432e-06, + "loss": 0.2676, + "step": 27629 + }, + { + "epoch": 1.2943270717196795, + "grad_norm": 0.5679091878089126, + "learning_rate": 1.4622780447908862e-06, + "loss": 0.2598, + "step": 27630 + }, + { + "epoch": 1.294373916709608, + "grad_norm": 0.5891258448415921, + "learning_rate": 1.462105507691659e-06, + "loss": 0.2708, + "step": 27631 + }, + { + "epoch": 1.2944207616995362, + "grad_norm": 0.5847202580815205, + "learning_rate": 1.461932976565253e-06, + "loss": 0.2828, + "step": 27632 + }, + { + "epoch": 1.2944676066894645, + "grad_norm": 0.5813335402679708, + "learning_rate": 1.4617604514126627e-06, + "loss": 0.2765, + "step": 27633 + }, + { + "epoch": 1.294514451679393, + "grad_norm": 0.5364391819696017, + "learning_rate": 1.4615879322348788e-06, + "loss": 0.2627, + "step": 27634 + }, + { + "epoch": 1.2945612966693212, + "grad_norm": 0.6031983266208569, + "learning_rate": 1.461415419032896e-06, + "loss": 0.2791, + "step": 27635 + }, + { + "epoch": 1.2946081416592494, + "grad_norm": 0.6019720313286483, + "learning_rate": 1.4612429118077063e-06, + "loss": 0.2761, + "step": 27636 + }, + { + "epoch": 1.2946549866491779, + "grad_norm": 0.6078009179687358, + "learning_rate": 1.4610704105603024e-06, + "loss": 0.2856, + "step": 27637 + }, + { + "epoch": 1.2947018316391061, + "grad_norm": 0.6021171636848706, + "learning_rate": 1.4608979152916775e-06, + "loss": 0.2934, + "step": 27638 + }, + { + "epoch": 1.2947486766290346, + "grad_norm": 0.6338062588896843, + "learning_rate": 1.4607254260028244e-06, + "loss": 0.2928, + "step": 27639 + }, + { + "epoch": 1.2947955216189628, + "grad_norm": 0.5938096836531555, + "learning_rate": 1.4605529426947345e-06, + "loss": 0.2741, + "step": 27640 + }, + { + "epoch": 1.2948423666088913, + "grad_norm": 0.6346485798923902, + "learning_rate": 1.4603804653684009e-06, + "loss": 0.2836, + "step": 27641 + }, + { + "epoch": 1.2948892115988195, + "grad_norm": 0.565068721620488, + "learning_rate": 1.460207994024817e-06, + "loss": 0.2638, + "step": 27642 + }, + { + "epoch": 1.2949360565887478, + "grad_norm": 0.6090577654198017, + "learning_rate": 1.4600355286649741e-06, + "loss": 0.2745, + "step": 27643 + }, + { + "epoch": 1.2949829015786762, + "grad_norm": 0.5697421523511644, + "learning_rate": 1.4598630692898663e-06, + "loss": 0.2689, + "step": 27644 + }, + { + "epoch": 1.2950297465686045, + "grad_norm": 0.5928469504718281, + "learning_rate": 1.4596906159004849e-06, + "loss": 0.2771, + "step": 27645 + }, + { + "epoch": 1.2950765915585327, + "grad_norm": 0.6240751125729452, + "learning_rate": 1.4595181684978227e-06, + "loss": 0.2772, + "step": 27646 + }, + { + "epoch": 1.2951234365484612, + "grad_norm": 0.5693237011571073, + "learning_rate": 1.4593457270828709e-06, + "loss": 0.2757, + "step": 27647 + }, + { + "epoch": 1.2951702815383894, + "grad_norm": 0.6259761050790782, + "learning_rate": 1.459173291656623e-06, + "loss": 0.2821, + "step": 27648 + }, + { + "epoch": 1.2952171265283177, + "grad_norm": 0.6251993679495786, + "learning_rate": 1.459000862220072e-06, + "loss": 0.2858, + "step": 27649 + }, + { + "epoch": 1.2952639715182461, + "grad_norm": 0.6051037330367535, + "learning_rate": 1.4588284387742097e-06, + "loss": 0.275, + "step": 27650 + }, + { + "epoch": 1.2953108165081746, + "grad_norm": 0.6126145906167023, + "learning_rate": 1.458656021320028e-06, + "loss": 0.2755, + "step": 27651 + }, + { + "epoch": 1.2953576614981028, + "grad_norm": 0.6339867033887406, + "learning_rate": 1.4584836098585181e-06, + "loss": 0.2682, + "step": 27652 + }, + { + "epoch": 1.295404506488031, + "grad_norm": 0.5962409651811265, + "learning_rate": 1.4583112043906737e-06, + "loss": 0.2809, + "step": 27653 + }, + { + "epoch": 1.2954513514779595, + "grad_norm": 0.5460515983875535, + "learning_rate": 1.458138804917486e-06, + "loss": 0.2474, + "step": 27654 + }, + { + "epoch": 1.2954981964678878, + "grad_norm": 0.5692917588309722, + "learning_rate": 1.4579664114399481e-06, + "loss": 0.2695, + "step": 27655 + }, + { + "epoch": 1.295545041457816, + "grad_norm": 0.556135547894557, + "learning_rate": 1.4577940239590512e-06, + "loss": 0.2607, + "step": 27656 + }, + { + "epoch": 1.2955918864477445, + "grad_norm": 0.5864643942130131, + "learning_rate": 1.4576216424757888e-06, + "loss": 0.2689, + "step": 27657 + }, + { + "epoch": 1.2956387314376727, + "grad_norm": 0.6193124288134081, + "learning_rate": 1.4574492669911522e-06, + "loss": 0.2803, + "step": 27658 + }, + { + "epoch": 1.295685576427601, + "grad_norm": 0.5927587854218368, + "learning_rate": 1.4572768975061318e-06, + "loss": 0.2773, + "step": 27659 + }, + { + "epoch": 1.2957324214175294, + "grad_norm": 0.5859291155758158, + "learning_rate": 1.4571045340217208e-06, + "loss": 0.2778, + "step": 27660 + }, + { + "epoch": 1.2957792664074577, + "grad_norm": 0.5824810826841231, + "learning_rate": 1.456932176538911e-06, + "loss": 0.2664, + "step": 27661 + }, + { + "epoch": 1.2958261113973861, + "grad_norm": 0.5884071128865221, + "learning_rate": 1.4567598250586945e-06, + "loss": 0.278, + "step": 27662 + }, + { + "epoch": 1.2958729563873144, + "grad_norm": 0.6104491625390182, + "learning_rate": 1.4565874795820638e-06, + "loss": 0.2655, + "step": 27663 + }, + { + "epoch": 1.2959198013772428, + "grad_norm": 0.5839675460685518, + "learning_rate": 1.456415140110009e-06, + "loss": 0.2621, + "step": 27664 + }, + { + "epoch": 1.295966646367171, + "grad_norm": 0.6077870984649897, + "learning_rate": 1.4562428066435242e-06, + "loss": 0.2774, + "step": 27665 + }, + { + "epoch": 1.2960134913570993, + "grad_norm": 0.5556560926085244, + "learning_rate": 1.456070479183598e-06, + "loss": 0.2609, + "step": 27666 + }, + { + "epoch": 1.2960603363470278, + "grad_norm": 0.6107125301923464, + "learning_rate": 1.4558981577312242e-06, + "loss": 0.2674, + "step": 27667 + }, + { + "epoch": 1.296107181336956, + "grad_norm": 0.615291067762554, + "learning_rate": 1.4557258422873939e-06, + "loss": 0.2958, + "step": 27668 + }, + { + "epoch": 1.2961540263268843, + "grad_norm": 0.5935523642525965, + "learning_rate": 1.4555535328531001e-06, + "loss": 0.2908, + "step": 27669 + }, + { + "epoch": 1.2962008713168127, + "grad_norm": 0.5541320201423899, + "learning_rate": 1.455381229429332e-06, + "loss": 0.2606, + "step": 27670 + }, + { + "epoch": 1.296247716306741, + "grad_norm": 0.6082276764405095, + "learning_rate": 1.4552089320170837e-06, + "loss": 0.2689, + "step": 27671 + }, + { + "epoch": 1.2962945612966692, + "grad_norm": 0.5993685752056497, + "learning_rate": 1.4550366406173438e-06, + "loss": 0.2765, + "step": 27672 + }, + { + "epoch": 1.2963414062865977, + "grad_norm": 0.6075425148678859, + "learning_rate": 1.454864355231106e-06, + "loss": 0.2755, + "step": 27673 + }, + { + "epoch": 1.296388251276526, + "grad_norm": 0.5781317212926885, + "learning_rate": 1.4546920758593608e-06, + "loss": 0.2764, + "step": 27674 + }, + { + "epoch": 1.2964350962664544, + "grad_norm": 0.614860228204433, + "learning_rate": 1.4545198025031012e-06, + "loss": 0.28, + "step": 27675 + }, + { + "epoch": 1.2964819412563826, + "grad_norm": 0.622487882741663, + "learning_rate": 1.4543475351633164e-06, + "loss": 0.2693, + "step": 27676 + }, + { + "epoch": 1.296528786246311, + "grad_norm": 0.5690594955735987, + "learning_rate": 1.4541752738409987e-06, + "loss": 0.2556, + "step": 27677 + }, + { + "epoch": 1.2965756312362393, + "grad_norm": 0.6054238736052412, + "learning_rate": 1.4540030185371401e-06, + "loss": 0.2778, + "step": 27678 + }, + { + "epoch": 1.2966224762261676, + "grad_norm": 0.6101286606994611, + "learning_rate": 1.4538307692527304e-06, + "loss": 0.2815, + "step": 27679 + }, + { + "epoch": 1.296669321216096, + "grad_norm": 0.5977291839377793, + "learning_rate": 1.4536585259887616e-06, + "loss": 0.2745, + "step": 27680 + }, + { + "epoch": 1.2967161662060243, + "grad_norm": 0.6260976927937503, + "learning_rate": 1.4534862887462265e-06, + "loss": 0.2697, + "step": 27681 + }, + { + "epoch": 1.2967630111959525, + "grad_norm": 0.6030806955038774, + "learning_rate": 1.4533140575261131e-06, + "loss": 0.2759, + "step": 27682 + }, + { + "epoch": 1.296809856185881, + "grad_norm": 0.6044890759841198, + "learning_rate": 1.4531418323294143e-06, + "loss": 0.2939, + "step": 27683 + }, + { + "epoch": 1.2968567011758092, + "grad_norm": 0.5786965836735632, + "learning_rate": 1.4529696131571217e-06, + "loss": 0.2613, + "step": 27684 + }, + { + "epoch": 1.2969035461657374, + "grad_norm": 0.6010136327645056, + "learning_rate": 1.4527974000102262e-06, + "loss": 0.2829, + "step": 27685 + }, + { + "epoch": 1.296950391155666, + "grad_norm": 0.6384430177419302, + "learning_rate": 1.4526251928897179e-06, + "loss": 0.2865, + "step": 27686 + }, + { + "epoch": 1.2969972361455941, + "grad_norm": 0.5760692512103524, + "learning_rate": 1.4524529917965882e-06, + "loss": 0.2699, + "step": 27687 + }, + { + "epoch": 1.2970440811355226, + "grad_norm": 0.6051085509875783, + "learning_rate": 1.4522807967318291e-06, + "loss": 0.2717, + "step": 27688 + }, + { + "epoch": 1.2970909261254508, + "grad_norm": 0.5834824730218842, + "learning_rate": 1.4521086076964303e-06, + "loss": 0.2653, + "step": 27689 + }, + { + "epoch": 1.2971377711153793, + "grad_norm": 0.6198839564248965, + "learning_rate": 1.4519364246913826e-06, + "loss": 0.2753, + "step": 27690 + }, + { + "epoch": 1.2971846161053076, + "grad_norm": 0.5727844235072612, + "learning_rate": 1.451764247717677e-06, + "loss": 0.2741, + "step": 27691 + }, + { + "epoch": 1.2972314610952358, + "grad_norm": 0.6235917341117274, + "learning_rate": 1.4515920767763062e-06, + "loss": 0.2841, + "step": 27692 + }, + { + "epoch": 1.2972783060851643, + "grad_norm": 0.6044197045474136, + "learning_rate": 1.4514199118682582e-06, + "loss": 0.2638, + "step": 27693 + }, + { + "epoch": 1.2973251510750925, + "grad_norm": 0.6032184935642209, + "learning_rate": 1.4512477529945263e-06, + "loss": 0.2744, + "step": 27694 + }, + { + "epoch": 1.2973719960650207, + "grad_norm": 0.5857863327951777, + "learning_rate": 1.4510756001560988e-06, + "loss": 0.2799, + "step": 27695 + }, + { + "epoch": 1.2974188410549492, + "grad_norm": 0.638101803770823, + "learning_rate": 1.450903453353968e-06, + "loss": 0.2814, + "step": 27696 + }, + { + "epoch": 1.2974656860448774, + "grad_norm": 0.6048512519563495, + "learning_rate": 1.4507313125891237e-06, + "loss": 0.2631, + "step": 27697 + }, + { + "epoch": 1.297512531034806, + "grad_norm": 0.5820746914300596, + "learning_rate": 1.4505591778625571e-06, + "loss": 0.2773, + "step": 27698 + }, + { + "epoch": 1.2975593760247341, + "grad_norm": 0.5498279214143235, + "learning_rate": 1.4503870491752594e-06, + "loss": 0.2758, + "step": 27699 + }, + { + "epoch": 1.2976062210146626, + "grad_norm": 0.5658142099482645, + "learning_rate": 1.4502149265282208e-06, + "loss": 0.2613, + "step": 27700 + }, + { + "epoch": 1.2976530660045908, + "grad_norm": 0.5712360371525356, + "learning_rate": 1.4500428099224304e-06, + "loss": 0.2681, + "step": 27701 + }, + { + "epoch": 1.297699910994519, + "grad_norm": 0.5961141410436395, + "learning_rate": 1.4498706993588796e-06, + "loss": 0.2571, + "step": 27702 + }, + { + "epoch": 1.2977467559844476, + "grad_norm": 0.5914440482848139, + "learning_rate": 1.4496985948385589e-06, + "loss": 0.2741, + "step": 27703 + }, + { + "epoch": 1.2977936009743758, + "grad_norm": 0.5513505732194668, + "learning_rate": 1.449526496362459e-06, + "loss": 0.2566, + "step": 27704 + }, + { + "epoch": 1.297840445964304, + "grad_norm": 0.5662340656469299, + "learning_rate": 1.449354403931571e-06, + "loss": 0.2631, + "step": 27705 + }, + { + "epoch": 1.2978872909542325, + "grad_norm": 0.5814122509337107, + "learning_rate": 1.4491823175468834e-06, + "loss": 0.2733, + "step": 27706 + }, + { + "epoch": 1.2979341359441607, + "grad_norm": 0.6164918694606929, + "learning_rate": 1.4490102372093884e-06, + "loss": 0.2748, + "step": 27707 + }, + { + "epoch": 1.297980980934089, + "grad_norm": 0.6295720387582336, + "learning_rate": 1.4488381629200742e-06, + "loss": 0.2735, + "step": 27708 + }, + { + "epoch": 1.2980278259240174, + "grad_norm": 0.5679083401610839, + "learning_rate": 1.4486660946799324e-06, + "loss": 0.2654, + "step": 27709 + }, + { + "epoch": 1.2980746709139457, + "grad_norm": 0.6014000155001181, + "learning_rate": 1.448494032489953e-06, + "loss": 0.2839, + "step": 27710 + }, + { + "epoch": 1.2981215159038741, + "grad_norm": 0.6055020282401927, + "learning_rate": 1.4483219763511261e-06, + "loss": 0.2789, + "step": 27711 + }, + { + "epoch": 1.2981683608938024, + "grad_norm": 0.6117224840357133, + "learning_rate": 1.4481499262644426e-06, + "loss": 0.2785, + "step": 27712 + }, + { + "epoch": 1.2982152058837308, + "grad_norm": 0.6068391189488844, + "learning_rate": 1.4479778822308916e-06, + "loss": 0.2746, + "step": 27713 + }, + { + "epoch": 1.298262050873659, + "grad_norm": 0.582051319208891, + "learning_rate": 1.4478058442514637e-06, + "loss": 0.2752, + "step": 27714 + }, + { + "epoch": 1.2983088958635873, + "grad_norm": 0.5865826530753034, + "learning_rate": 1.4476338123271482e-06, + "loss": 0.2833, + "step": 27715 + }, + { + "epoch": 1.2983557408535158, + "grad_norm": 0.639626011564291, + "learning_rate": 1.4474617864589351e-06, + "loss": 0.2952, + "step": 27716 + }, + { + "epoch": 1.298402585843444, + "grad_norm": 0.5978833360568995, + "learning_rate": 1.4472897666478153e-06, + "loss": 0.281, + "step": 27717 + }, + { + "epoch": 1.2984494308333723, + "grad_norm": 0.5602267109721171, + "learning_rate": 1.4471177528947795e-06, + "loss": 0.2693, + "step": 27718 + }, + { + "epoch": 1.2984962758233007, + "grad_norm": 0.5518211166943968, + "learning_rate": 1.446945745200815e-06, + "loss": 0.2752, + "step": 27719 + }, + { + "epoch": 1.298543120813229, + "grad_norm": 0.6010262707555872, + "learning_rate": 1.4467737435669143e-06, + "loss": 0.2756, + "step": 27720 + }, + { + "epoch": 1.2985899658031572, + "grad_norm": 0.5236819141291021, + "learning_rate": 1.4466017479940647e-06, + "loss": 0.2554, + "step": 27721 + }, + { + "epoch": 1.2986368107930857, + "grad_norm": 0.6365786576178353, + "learning_rate": 1.4464297584832577e-06, + "loss": 0.2766, + "step": 27722 + }, + { + "epoch": 1.298683655783014, + "grad_norm": 0.5728414273443795, + "learning_rate": 1.4462577750354823e-06, + "loss": 0.2584, + "step": 27723 + }, + { + "epoch": 1.2987305007729424, + "grad_norm": 0.6081029515007632, + "learning_rate": 1.4460857976517294e-06, + "loss": 0.2802, + "step": 27724 + }, + { + "epoch": 1.2987773457628706, + "grad_norm": 0.6194543886908355, + "learning_rate": 1.4459138263329873e-06, + "loss": 0.2777, + "step": 27725 + }, + { + "epoch": 1.298824190752799, + "grad_norm": 0.6023712993626043, + "learning_rate": 1.4457418610802459e-06, + "loss": 0.2724, + "step": 27726 + }, + { + "epoch": 1.2988710357427273, + "grad_norm": 0.588568894377646, + "learning_rate": 1.4455699018944964e-06, + "loss": 0.277, + "step": 27727 + }, + { + "epoch": 1.2989178807326556, + "grad_norm": 0.6624126449035779, + "learning_rate": 1.4453979487767255e-06, + "loss": 0.2775, + "step": 27728 + }, + { + "epoch": 1.298964725722584, + "grad_norm": 0.6214489489744485, + "learning_rate": 1.4452260017279248e-06, + "loss": 0.305, + "step": 27729 + }, + { + "epoch": 1.2990115707125123, + "grad_norm": 0.5798564417322181, + "learning_rate": 1.4450540607490843e-06, + "loss": 0.2685, + "step": 27730 + }, + { + "epoch": 1.2990584157024405, + "grad_norm": 0.5869182665952056, + "learning_rate": 1.4448821258411916e-06, + "loss": 0.2838, + "step": 27731 + }, + { + "epoch": 1.299105260692369, + "grad_norm": 0.5919564001858949, + "learning_rate": 1.444710197005237e-06, + "loss": 0.2576, + "step": 27732 + }, + { + "epoch": 1.2991521056822972, + "grad_norm": 0.5876732381754662, + "learning_rate": 1.4445382742422097e-06, + "loss": 0.2757, + "step": 27733 + }, + { + "epoch": 1.2991989506722257, + "grad_norm": 0.5864308493729099, + "learning_rate": 1.4443663575531008e-06, + "loss": 0.2617, + "step": 27734 + }, + { + "epoch": 1.299245795662154, + "grad_norm": 0.5987666773923225, + "learning_rate": 1.444194446938897e-06, + "loss": 0.2625, + "step": 27735 + }, + { + "epoch": 1.2992926406520824, + "grad_norm": 0.5512854134053596, + "learning_rate": 1.4440225424005897e-06, + "loss": 0.2599, + "step": 27736 + }, + { + "epoch": 1.2993394856420106, + "grad_norm": 0.6722494777969632, + "learning_rate": 1.4438506439391665e-06, + "loss": 0.2815, + "step": 27737 + }, + { + "epoch": 1.2993863306319389, + "grad_norm": 0.6263980121331653, + "learning_rate": 1.4436787515556171e-06, + "loss": 0.2837, + "step": 27738 + }, + { + "epoch": 1.2994331756218673, + "grad_norm": 0.596064484635121, + "learning_rate": 1.443506865250931e-06, + "loss": 0.2676, + "step": 27739 + }, + { + "epoch": 1.2994800206117956, + "grad_norm": 0.6232592673366182, + "learning_rate": 1.4433349850260975e-06, + "loss": 0.278, + "step": 27740 + }, + { + "epoch": 1.2995268656017238, + "grad_norm": 0.554590848605432, + "learning_rate": 1.4431631108821065e-06, + "loss": 0.265, + "step": 27741 + }, + { + "epoch": 1.2995737105916523, + "grad_norm": 0.6079454770994477, + "learning_rate": 1.4429912428199463e-06, + "loss": 0.2632, + "step": 27742 + }, + { + "epoch": 1.2996205555815805, + "grad_norm": 0.6016841143916898, + "learning_rate": 1.442819380840605e-06, + "loss": 0.2669, + "step": 27743 + }, + { + "epoch": 1.2996674005715088, + "grad_norm": 0.6274682074428429, + "learning_rate": 1.442647524945072e-06, + "loss": 0.2858, + "step": 27744 + }, + { + "epoch": 1.2997142455614372, + "grad_norm": 0.6343670760784683, + "learning_rate": 1.4424756751343368e-06, + "loss": 0.2798, + "step": 27745 + }, + { + "epoch": 1.2997610905513655, + "grad_norm": 0.5871855580256081, + "learning_rate": 1.4423038314093884e-06, + "loss": 0.2817, + "step": 27746 + }, + { + "epoch": 1.299807935541294, + "grad_norm": 0.6328027953646946, + "learning_rate": 1.442131993771216e-06, + "loss": 0.2772, + "step": 27747 + }, + { + "epoch": 1.2998547805312222, + "grad_norm": 0.5986422345702656, + "learning_rate": 1.4419601622208084e-06, + "loss": 0.2721, + "step": 27748 + }, + { + "epoch": 1.2999016255211506, + "grad_norm": 0.6142185688923133, + "learning_rate": 1.4417883367591545e-06, + "loss": 0.2983, + "step": 27749 + }, + { + "epoch": 1.2999484705110789, + "grad_norm": 0.6695697139756387, + "learning_rate": 1.4416165173872418e-06, + "loss": 0.2878, + "step": 27750 + }, + { + "epoch": 1.299995315501007, + "grad_norm": 0.5812985980560689, + "learning_rate": 1.4414447041060598e-06, + "loss": 0.2795, + "step": 27751 + }, + { + "epoch": 1.3000421604909356, + "grad_norm": 0.5490717687496144, + "learning_rate": 1.4412728969165979e-06, + "loss": 0.2584, + "step": 27752 + }, + { + "epoch": 1.3000890054808638, + "grad_norm": 0.5573242886312808, + "learning_rate": 1.441101095819844e-06, + "loss": 0.2805, + "step": 27753 + }, + { + "epoch": 1.300135850470792, + "grad_norm": 0.5638247341053707, + "learning_rate": 1.4409293008167882e-06, + "loss": 0.2631, + "step": 27754 + }, + { + "epoch": 1.3001826954607205, + "grad_norm": 0.5928200118558713, + "learning_rate": 1.4407575119084172e-06, + "loss": 0.2806, + "step": 27755 + }, + { + "epoch": 1.3002295404506488, + "grad_norm": 0.58786716425952, + "learning_rate": 1.4405857290957215e-06, + "loss": 0.2726, + "step": 27756 + }, + { + "epoch": 1.300276385440577, + "grad_norm": 0.578377113081704, + "learning_rate": 1.4404139523796872e-06, + "loss": 0.2685, + "step": 27757 + }, + { + "epoch": 1.3003232304305055, + "grad_norm": 0.5745656039327495, + "learning_rate": 1.440242181761305e-06, + "loss": 0.2614, + "step": 27758 + }, + { + "epoch": 1.3003700754204337, + "grad_norm": 0.5991140954075251, + "learning_rate": 1.4400704172415623e-06, + "loss": 0.2704, + "step": 27759 + }, + { + "epoch": 1.3004169204103622, + "grad_norm": 0.5813062165601672, + "learning_rate": 1.439898658821448e-06, + "loss": 0.2646, + "step": 27760 + }, + { + "epoch": 1.3004637654002904, + "grad_norm": 0.6088745749314893, + "learning_rate": 1.4397269065019514e-06, + "loss": 0.2754, + "step": 27761 + }, + { + "epoch": 1.3005106103902189, + "grad_norm": 0.5949790198466018, + "learning_rate": 1.4395551602840603e-06, + "loss": 0.2657, + "step": 27762 + }, + { + "epoch": 1.300557455380147, + "grad_norm": 0.567212086782185, + "learning_rate": 1.4393834201687613e-06, + "loss": 0.2656, + "step": 27763 + }, + { + "epoch": 1.3006043003700754, + "grad_norm": 0.5998524809239667, + "learning_rate": 1.439211686157044e-06, + "loss": 0.2757, + "step": 27764 + }, + { + "epoch": 1.3006511453600038, + "grad_norm": 0.5954214526181527, + "learning_rate": 1.439039958249897e-06, + "loss": 0.2664, + "step": 27765 + }, + { + "epoch": 1.300697990349932, + "grad_norm": 0.6101296906428378, + "learning_rate": 1.4388682364483086e-06, + "loss": 0.2756, + "step": 27766 + }, + { + "epoch": 1.3007448353398603, + "grad_norm": 0.5679143500189542, + "learning_rate": 1.4386965207532676e-06, + "loss": 0.2668, + "step": 27767 + }, + { + "epoch": 1.3007916803297888, + "grad_norm": 0.608808635412202, + "learning_rate": 1.4385248111657607e-06, + "loss": 0.2834, + "step": 27768 + }, + { + "epoch": 1.300838525319717, + "grad_norm": 0.5613932165128299, + "learning_rate": 1.4383531076867774e-06, + "loss": 0.2732, + "step": 27769 + }, + { + "epoch": 1.3008853703096455, + "grad_norm": 0.6337530426749451, + "learning_rate": 1.4381814103173042e-06, + "loss": 0.2895, + "step": 27770 + }, + { + "epoch": 1.3009322152995737, + "grad_norm": 0.5870660739036729, + "learning_rate": 1.4380097190583303e-06, + "loss": 0.2728, + "step": 27771 + }, + { + "epoch": 1.3009790602895022, + "grad_norm": 0.5950311200740502, + "learning_rate": 1.4378380339108434e-06, + "loss": 0.2692, + "step": 27772 + }, + { + "epoch": 1.3010259052794304, + "grad_norm": 0.6117625105609246, + "learning_rate": 1.4376663548758324e-06, + "loss": 0.2952, + "step": 27773 + }, + { + "epoch": 1.3010727502693586, + "grad_norm": 0.5877328594312221, + "learning_rate": 1.4374946819542837e-06, + "loss": 0.2713, + "step": 27774 + }, + { + "epoch": 1.301119595259287, + "grad_norm": 0.5896246370113994, + "learning_rate": 1.437323015147186e-06, + "loss": 0.2707, + "step": 27775 + }, + { + "epoch": 1.3011664402492154, + "grad_norm": 0.5507660884680802, + "learning_rate": 1.4371513544555285e-06, + "loss": 0.2513, + "step": 27776 + }, + { + "epoch": 1.3012132852391436, + "grad_norm": 0.6423518992031054, + "learning_rate": 1.4369796998802965e-06, + "loss": 0.294, + "step": 27777 + }, + { + "epoch": 1.301260130229072, + "grad_norm": 0.6315864760127947, + "learning_rate": 1.4368080514224793e-06, + "loss": 0.28, + "step": 27778 + }, + { + "epoch": 1.3013069752190003, + "grad_norm": 0.5970668346257095, + "learning_rate": 1.4366364090830655e-06, + "loss": 0.2732, + "step": 27779 + }, + { + "epoch": 1.3013538202089285, + "grad_norm": 0.6373549352030181, + "learning_rate": 1.436464772863041e-06, + "loss": 0.2766, + "step": 27780 + }, + { + "epoch": 1.301400665198857, + "grad_norm": 0.5857932133667486, + "learning_rate": 1.4362931427633944e-06, + "loss": 0.2765, + "step": 27781 + }, + { + "epoch": 1.3014475101887852, + "grad_norm": 0.5262714232418337, + "learning_rate": 1.4361215187851136e-06, + "loss": 0.2538, + "step": 27782 + }, + { + "epoch": 1.3014943551787137, + "grad_norm": 0.6311296420254493, + "learning_rate": 1.4359499009291867e-06, + "loss": 0.2929, + "step": 27783 + }, + { + "epoch": 1.301541200168642, + "grad_norm": 0.6407429949290805, + "learning_rate": 1.4357782891966e-06, + "loss": 0.2789, + "step": 27784 + }, + { + "epoch": 1.3015880451585704, + "grad_norm": 0.5586917228979101, + "learning_rate": 1.4356066835883426e-06, + "loss": 0.2542, + "step": 27785 + }, + { + "epoch": 1.3016348901484986, + "grad_norm": 0.6166898722313122, + "learning_rate": 1.4354350841054e-06, + "loss": 0.2803, + "step": 27786 + }, + { + "epoch": 1.3016817351384269, + "grad_norm": 0.626572441782757, + "learning_rate": 1.4352634907487612e-06, + "loss": 0.278, + "step": 27787 + }, + { + "epoch": 1.3017285801283554, + "grad_norm": 0.5740020975591081, + "learning_rate": 1.4350919035194133e-06, + "loss": 0.2568, + "step": 27788 + }, + { + "epoch": 1.3017754251182836, + "grad_norm": 0.5633102777532749, + "learning_rate": 1.434920322418344e-06, + "loss": 0.2753, + "step": 27789 + }, + { + "epoch": 1.3018222701082118, + "grad_norm": 0.54813288356251, + "learning_rate": 1.434748747446541e-06, + "loss": 0.271, + "step": 27790 + }, + { + "epoch": 1.3018691150981403, + "grad_norm": 0.6627601084477189, + "learning_rate": 1.4345771786049916e-06, + "loss": 0.2843, + "step": 27791 + }, + { + "epoch": 1.3019159600880685, + "grad_norm": 0.5930220353208937, + "learning_rate": 1.434405615894682e-06, + "loss": 0.2787, + "step": 27792 + }, + { + "epoch": 1.3019628050779968, + "grad_norm": 0.5726099438050488, + "learning_rate": 1.4342340593166e-06, + "loss": 0.2767, + "step": 27793 + }, + { + "epoch": 1.3020096500679252, + "grad_norm": 0.5592843176243757, + "learning_rate": 1.434062508871733e-06, + "loss": 0.274, + "step": 27794 + }, + { + "epoch": 1.3020564950578535, + "grad_norm": 0.5813030393356701, + "learning_rate": 1.4338909645610689e-06, + "loss": 0.2788, + "step": 27795 + }, + { + "epoch": 1.302103340047782, + "grad_norm": 0.5961051675359342, + "learning_rate": 1.4337194263855936e-06, + "loss": 0.2739, + "step": 27796 + }, + { + "epoch": 1.3021501850377102, + "grad_norm": 0.6117991769410582, + "learning_rate": 1.4335478943462965e-06, + "loss": 0.277, + "step": 27797 + }, + { + "epoch": 1.3021970300276386, + "grad_norm": 0.5685568959509975, + "learning_rate": 1.4333763684441633e-06, + "loss": 0.2615, + "step": 27798 + }, + { + "epoch": 1.3022438750175669, + "grad_norm": 0.6013703745336572, + "learning_rate": 1.4332048486801794e-06, + "loss": 0.2784, + "step": 27799 + }, + { + "epoch": 1.3022907200074951, + "grad_norm": 0.6157470481261149, + "learning_rate": 1.433033335055334e-06, + "loss": 0.2704, + "step": 27800 + }, + { + "epoch": 1.3023375649974236, + "grad_norm": 0.6274636206555428, + "learning_rate": 1.4328618275706135e-06, + "loss": 0.2768, + "step": 27801 + }, + { + "epoch": 1.3023844099873518, + "grad_norm": 0.597924955479411, + "learning_rate": 1.4326903262270047e-06, + "loss": 0.2627, + "step": 27802 + }, + { + "epoch": 1.30243125497728, + "grad_norm": 0.5988131735956111, + "learning_rate": 1.4325188310254962e-06, + "loss": 0.2784, + "step": 27803 + }, + { + "epoch": 1.3024780999672085, + "grad_norm": 0.5850499836319222, + "learning_rate": 1.4323473419670723e-06, + "loss": 0.2747, + "step": 27804 + }, + { + "epoch": 1.3025249449571368, + "grad_norm": 0.5471677003721395, + "learning_rate": 1.4321758590527219e-06, + "loss": 0.249, + "step": 27805 + }, + { + "epoch": 1.3025717899470652, + "grad_norm": 0.5611673012272422, + "learning_rate": 1.4320043822834304e-06, + "loss": 0.2529, + "step": 27806 + }, + { + "epoch": 1.3026186349369935, + "grad_norm": 0.6050300570379513, + "learning_rate": 1.4318329116601854e-06, + "loss": 0.2801, + "step": 27807 + }, + { + "epoch": 1.302665479926922, + "grad_norm": 0.5330391936291076, + "learning_rate": 1.431661447183973e-06, + "loss": 0.2597, + "step": 27808 + }, + { + "epoch": 1.3027123249168502, + "grad_norm": 0.5684226907075424, + "learning_rate": 1.4314899888557818e-06, + "loss": 0.2613, + "step": 27809 + }, + { + "epoch": 1.3027591699067784, + "grad_norm": 0.6158983506369393, + "learning_rate": 1.4313185366765958e-06, + "loss": 0.2796, + "step": 27810 + }, + { + "epoch": 1.3028060148967069, + "grad_norm": 0.5627550894808993, + "learning_rate": 1.4311470906474045e-06, + "loss": 0.2402, + "step": 27811 + }, + { + "epoch": 1.3028528598866351, + "grad_norm": 0.5585427820424202, + "learning_rate": 1.4309756507691918e-06, + "loss": 0.2582, + "step": 27812 + }, + { + "epoch": 1.3028997048765634, + "grad_norm": 0.5839839152356177, + "learning_rate": 1.4308042170429453e-06, + "loss": 0.278, + "step": 27813 + }, + { + "epoch": 1.3029465498664918, + "grad_norm": 0.5687357077695554, + "learning_rate": 1.4306327894696522e-06, + "loss": 0.2672, + "step": 27814 + }, + { + "epoch": 1.30299339485642, + "grad_norm": 0.5641128929458408, + "learning_rate": 1.4304613680502995e-06, + "loss": 0.2613, + "step": 27815 + }, + { + "epoch": 1.3030402398463483, + "grad_norm": 0.571390076551263, + "learning_rate": 1.430289952785872e-06, + "loss": 0.2546, + "step": 27816 + }, + { + "epoch": 1.3030870848362768, + "grad_norm": 0.5842221727843409, + "learning_rate": 1.430118543677357e-06, + "loss": 0.281, + "step": 27817 + }, + { + "epoch": 1.303133929826205, + "grad_norm": 0.6238318528995372, + "learning_rate": 1.4299471407257414e-06, + "loss": 0.2846, + "step": 27818 + }, + { + "epoch": 1.3031807748161335, + "grad_norm": 0.5865200516480515, + "learning_rate": 1.4297757439320103e-06, + "loss": 0.2582, + "step": 27819 + }, + { + "epoch": 1.3032276198060617, + "grad_norm": 0.6397658702384716, + "learning_rate": 1.4296043532971507e-06, + "loss": 0.2919, + "step": 27820 + }, + { + "epoch": 1.3032744647959902, + "grad_norm": 0.6406029055332652, + "learning_rate": 1.4294329688221492e-06, + "loss": 0.293, + "step": 27821 + }, + { + "epoch": 1.3033213097859184, + "grad_norm": 0.6158230364517264, + "learning_rate": 1.4292615905079926e-06, + "loss": 0.2727, + "step": 27822 + }, + { + "epoch": 1.3033681547758467, + "grad_norm": 0.5484861747352657, + "learning_rate": 1.4290902183556656e-06, + "loss": 0.2536, + "step": 27823 + }, + { + "epoch": 1.3034149997657751, + "grad_norm": 0.5857525388572967, + "learning_rate": 1.4289188523661554e-06, + "loss": 0.2754, + "step": 27824 + }, + { + "epoch": 1.3034618447557034, + "grad_norm": 0.5814811707738791, + "learning_rate": 1.4287474925404488e-06, + "loss": 0.2697, + "step": 27825 + }, + { + "epoch": 1.3035086897456316, + "grad_norm": 0.5860204948987147, + "learning_rate": 1.4285761388795302e-06, + "loss": 0.2708, + "step": 27826 + }, + { + "epoch": 1.30355553473556, + "grad_norm": 0.5596888038515095, + "learning_rate": 1.4284047913843868e-06, + "loss": 0.2613, + "step": 27827 + }, + { + "epoch": 1.3036023797254883, + "grad_norm": 0.6540985200964856, + "learning_rate": 1.4282334500560051e-06, + "loss": 0.2986, + "step": 27828 + }, + { + "epoch": 1.3036492247154166, + "grad_norm": 0.6183674407430422, + "learning_rate": 1.4280621148953697e-06, + "loss": 0.2784, + "step": 27829 + }, + { + "epoch": 1.303696069705345, + "grad_norm": 0.5863504727384536, + "learning_rate": 1.4278907859034674e-06, + "loss": 0.2772, + "step": 27830 + }, + { + "epoch": 1.3037429146952733, + "grad_norm": 0.607827622674293, + "learning_rate": 1.4277194630812841e-06, + "loss": 0.2748, + "step": 27831 + }, + { + "epoch": 1.3037897596852017, + "grad_norm": 0.5830523263190932, + "learning_rate": 1.4275481464298068e-06, + "loss": 0.2818, + "step": 27832 + }, + { + "epoch": 1.30383660467513, + "grad_norm": 0.5864223610853564, + "learning_rate": 1.4273768359500195e-06, + "loss": 0.2763, + "step": 27833 + }, + { + "epoch": 1.3038834496650584, + "grad_norm": 0.6013351485267644, + "learning_rate": 1.42720553164291e-06, + "loss": 0.2799, + "step": 27834 + }, + { + "epoch": 1.3039302946549867, + "grad_norm": 0.566756535376819, + "learning_rate": 1.4270342335094618e-06, + "loss": 0.2601, + "step": 27835 + }, + { + "epoch": 1.303977139644915, + "grad_norm": 0.5498415887686615, + "learning_rate": 1.4268629415506618e-06, + "loss": 0.2665, + "step": 27836 + }, + { + "epoch": 1.3040239846348434, + "grad_norm": 0.5799370238338726, + "learning_rate": 1.4266916557674965e-06, + "loss": 0.2797, + "step": 27837 + }, + { + "epoch": 1.3040708296247716, + "grad_norm": 0.6305017524930692, + "learning_rate": 1.4265203761609503e-06, + "loss": 0.2974, + "step": 27838 + }, + { + "epoch": 1.3041176746146999, + "grad_norm": 0.565469190339049, + "learning_rate": 1.4263491027320108e-06, + "loss": 0.2581, + "step": 27839 + }, + { + "epoch": 1.3041645196046283, + "grad_norm": 0.6429415207217711, + "learning_rate": 1.426177835481662e-06, + "loss": 0.2799, + "step": 27840 + }, + { + "epoch": 1.3042113645945566, + "grad_norm": 0.5910829077863168, + "learning_rate": 1.4260065744108892e-06, + "loss": 0.2777, + "step": 27841 + }, + { + "epoch": 1.304258209584485, + "grad_norm": 0.6543980263212701, + "learning_rate": 1.4258353195206786e-06, + "loss": 0.2788, + "step": 27842 + }, + { + "epoch": 1.3043050545744133, + "grad_norm": 0.6098944274565417, + "learning_rate": 1.4256640708120156e-06, + "loss": 0.2767, + "step": 27843 + }, + { + "epoch": 1.3043518995643417, + "grad_norm": 0.5476131153481338, + "learning_rate": 1.4254928282858859e-06, + "loss": 0.2538, + "step": 27844 + }, + { + "epoch": 1.30439874455427, + "grad_norm": 0.6084542806108402, + "learning_rate": 1.4253215919432762e-06, + "loss": 0.2685, + "step": 27845 + }, + { + "epoch": 1.3044455895441982, + "grad_norm": 0.5978151707510455, + "learning_rate": 1.4251503617851694e-06, + "loss": 0.2632, + "step": 27846 + }, + { + "epoch": 1.3044924345341267, + "grad_norm": 0.6520642009559342, + "learning_rate": 1.4249791378125533e-06, + "loss": 0.2637, + "step": 27847 + }, + { + "epoch": 1.304539279524055, + "grad_norm": 0.6039566090975422, + "learning_rate": 1.424807920026411e-06, + "loss": 0.2516, + "step": 27848 + }, + { + "epoch": 1.3045861245139831, + "grad_norm": 0.6166355512367764, + "learning_rate": 1.4246367084277284e-06, + "loss": 0.2901, + "step": 27849 + }, + { + "epoch": 1.3046329695039116, + "grad_norm": 0.5745144912214439, + "learning_rate": 1.4244655030174916e-06, + "loss": 0.2769, + "step": 27850 + }, + { + "epoch": 1.3046798144938399, + "grad_norm": 0.5814279341510511, + "learning_rate": 1.4242943037966856e-06, + "loss": 0.2647, + "step": 27851 + }, + { + "epoch": 1.304726659483768, + "grad_norm": 0.5800118801276212, + "learning_rate": 1.4241231107662965e-06, + "loss": 0.2677, + "step": 27852 + }, + { + "epoch": 1.3047735044736966, + "grad_norm": 0.6046772605714057, + "learning_rate": 1.4239519239273076e-06, + "loss": 0.2758, + "step": 27853 + }, + { + "epoch": 1.3048203494636248, + "grad_norm": 0.5688565001344804, + "learning_rate": 1.4237807432807054e-06, + "loss": 0.2778, + "step": 27854 + }, + { + "epoch": 1.3048671944535533, + "grad_norm": 0.6198833799032097, + "learning_rate": 1.4236095688274743e-06, + "loss": 0.2839, + "step": 27855 + }, + { + "epoch": 1.3049140394434815, + "grad_norm": 0.6092941959509607, + "learning_rate": 1.423438400568599e-06, + "loss": 0.2825, + "step": 27856 + }, + { + "epoch": 1.30496088443341, + "grad_norm": 0.5961279954139814, + "learning_rate": 1.4232672385050654e-06, + "loss": 0.2767, + "step": 27857 + }, + { + "epoch": 1.3050077294233382, + "grad_norm": 0.549833988719755, + "learning_rate": 1.423096082637859e-06, + "loss": 0.2657, + "step": 27858 + }, + { + "epoch": 1.3050545744132664, + "grad_norm": 0.555719131112854, + "learning_rate": 1.4229249329679628e-06, + "loss": 0.2685, + "step": 27859 + }, + { + "epoch": 1.305101419403195, + "grad_norm": 0.5585808275221585, + "learning_rate": 1.4227537894963644e-06, + "loss": 0.2683, + "step": 27860 + }, + { + "epoch": 1.3051482643931231, + "grad_norm": 0.6041370543586846, + "learning_rate": 1.422582652224046e-06, + "loss": 0.2765, + "step": 27861 + }, + { + "epoch": 1.3051951093830514, + "grad_norm": 0.5656877520553683, + "learning_rate": 1.4224115211519934e-06, + "loss": 0.2685, + "step": 27862 + }, + { + "epoch": 1.3052419543729799, + "grad_norm": 0.5880511001653389, + "learning_rate": 1.422240396281192e-06, + "loss": 0.2783, + "step": 27863 + }, + { + "epoch": 1.305288799362908, + "grad_norm": 0.5579791888284287, + "learning_rate": 1.4220692776126271e-06, + "loss": 0.2661, + "step": 27864 + }, + { + "epoch": 1.3053356443528363, + "grad_norm": 0.5836423697717215, + "learning_rate": 1.4218981651472816e-06, + "loss": 0.2609, + "step": 27865 + }, + { + "epoch": 1.3053824893427648, + "grad_norm": 0.6237667216480576, + "learning_rate": 1.4217270588861409e-06, + "loss": 0.2875, + "step": 27866 + }, + { + "epoch": 1.305429334332693, + "grad_norm": 0.5747183243199914, + "learning_rate": 1.4215559588301914e-06, + "loss": 0.2685, + "step": 27867 + }, + { + "epoch": 1.3054761793226215, + "grad_norm": 0.6306841374191376, + "learning_rate": 1.4213848649804152e-06, + "loss": 0.2913, + "step": 27868 + }, + { + "epoch": 1.3055230243125497, + "grad_norm": 0.5700409065532558, + "learning_rate": 1.421213777337798e-06, + "loss": 0.2601, + "step": 27869 + }, + { + "epoch": 1.3055698693024782, + "grad_norm": 0.6172443116048058, + "learning_rate": 1.4210426959033253e-06, + "loss": 0.2601, + "step": 27870 + }, + { + "epoch": 1.3056167142924064, + "grad_norm": 0.6067684309036909, + "learning_rate": 1.4208716206779796e-06, + "loss": 0.2726, + "step": 27871 + }, + { + "epoch": 1.3056635592823347, + "grad_norm": 0.5619870683441184, + "learning_rate": 1.4207005516627465e-06, + "loss": 0.2758, + "step": 27872 + }, + { + "epoch": 1.3057104042722631, + "grad_norm": 0.5706555470974527, + "learning_rate": 1.4205294888586109e-06, + "loss": 0.2629, + "step": 27873 + }, + { + "epoch": 1.3057572492621914, + "grad_norm": 0.5924925149150707, + "learning_rate": 1.420358432266557e-06, + "loss": 0.2776, + "step": 27874 + }, + { + "epoch": 1.3058040942521196, + "grad_norm": 0.5994277764829311, + "learning_rate": 1.4201873818875684e-06, + "loss": 0.2749, + "step": 27875 + }, + { + "epoch": 1.305850939242048, + "grad_norm": 0.596685298570345, + "learning_rate": 1.4200163377226312e-06, + "loss": 0.2861, + "step": 27876 + }, + { + "epoch": 1.3058977842319763, + "grad_norm": 0.565328485731893, + "learning_rate": 1.4198452997727272e-06, + "loss": 0.2685, + "step": 27877 + }, + { + "epoch": 1.3059446292219048, + "grad_norm": 0.5567485670928269, + "learning_rate": 1.419674268038842e-06, + "loss": 0.2611, + "step": 27878 + }, + { + "epoch": 1.305991474211833, + "grad_norm": 0.5665270327275551, + "learning_rate": 1.4195032425219602e-06, + "loss": 0.259, + "step": 27879 + }, + { + "epoch": 1.3060383192017615, + "grad_norm": 0.5571504816722962, + "learning_rate": 1.4193322232230655e-06, + "loss": 0.2577, + "step": 27880 + }, + { + "epoch": 1.3060851641916897, + "grad_norm": 0.5917742066154041, + "learning_rate": 1.4191612101431433e-06, + "loss": 0.2701, + "step": 27881 + }, + { + "epoch": 1.306132009181618, + "grad_norm": 0.5986487048806918, + "learning_rate": 1.4189902032831765e-06, + "loss": 0.2657, + "step": 27882 + }, + { + "epoch": 1.3061788541715464, + "grad_norm": 0.6100628299014067, + "learning_rate": 1.4188192026441483e-06, + "loss": 0.2754, + "step": 27883 + }, + { + "epoch": 1.3062256991614747, + "grad_norm": 0.5842782893001659, + "learning_rate": 1.4186482082270442e-06, + "loss": 0.2661, + "step": 27884 + }, + { + "epoch": 1.306272544151403, + "grad_norm": 0.5730403185220966, + "learning_rate": 1.4184772200328475e-06, + "loss": 0.273, + "step": 27885 + }, + { + "epoch": 1.3063193891413314, + "grad_norm": 0.611320567472519, + "learning_rate": 1.4183062380625428e-06, + "loss": 0.2764, + "step": 27886 + }, + { + "epoch": 1.3063662341312596, + "grad_norm": 0.6181683210796206, + "learning_rate": 1.4181352623171137e-06, + "loss": 0.2913, + "step": 27887 + }, + { + "epoch": 1.3064130791211879, + "grad_norm": 0.5919054221896666, + "learning_rate": 1.4179642927975453e-06, + "loss": 0.2766, + "step": 27888 + }, + { + "epoch": 1.3064599241111163, + "grad_norm": 0.5549513429284589, + "learning_rate": 1.4177933295048207e-06, + "loss": 0.2577, + "step": 27889 + }, + { + "epoch": 1.3065067691010446, + "grad_norm": 0.5960549652576554, + "learning_rate": 1.417622372439922e-06, + "loss": 0.2679, + "step": 27890 + }, + { + "epoch": 1.306553614090973, + "grad_norm": 0.6124230468406167, + "learning_rate": 1.4174514216038348e-06, + "loss": 0.2761, + "step": 27891 + }, + { + "epoch": 1.3066004590809013, + "grad_norm": 0.642245589174932, + "learning_rate": 1.4172804769975425e-06, + "loss": 0.2881, + "step": 27892 + }, + { + "epoch": 1.3066473040708297, + "grad_norm": 0.6141677732870888, + "learning_rate": 1.417109538622029e-06, + "loss": 0.2667, + "step": 27893 + }, + { + "epoch": 1.306694149060758, + "grad_norm": 0.526374367990353, + "learning_rate": 1.4169386064782789e-06, + "loss": 0.2584, + "step": 27894 + }, + { + "epoch": 1.3067409940506862, + "grad_norm": 0.5714114274921687, + "learning_rate": 1.4167676805672738e-06, + "loss": 0.2595, + "step": 27895 + }, + { + "epoch": 1.3067878390406147, + "grad_norm": 0.6007454865476454, + "learning_rate": 1.4165967608899998e-06, + "loss": 0.2728, + "step": 27896 + }, + { + "epoch": 1.306834684030543, + "grad_norm": 0.6226462990525474, + "learning_rate": 1.4164258474474377e-06, + "loss": 0.2877, + "step": 27897 + }, + { + "epoch": 1.3068815290204712, + "grad_norm": 0.6269827527121132, + "learning_rate": 1.416254940240573e-06, + "loss": 0.2779, + "step": 27898 + }, + { + "epoch": 1.3069283740103996, + "grad_norm": 0.5839649191999854, + "learning_rate": 1.416084039270388e-06, + "loss": 0.2831, + "step": 27899 + }, + { + "epoch": 1.3069752190003279, + "grad_norm": 0.5704230420163032, + "learning_rate": 1.4159131445378673e-06, + "loss": 0.2616, + "step": 27900 + }, + { + "epoch": 1.3070220639902561, + "grad_norm": 0.598873571041569, + "learning_rate": 1.4157422560439951e-06, + "loss": 0.2671, + "step": 27901 + }, + { + "epoch": 1.3070689089801846, + "grad_norm": 0.6501754152534533, + "learning_rate": 1.4155713737897537e-06, + "loss": 0.2959, + "step": 27902 + }, + { + "epoch": 1.3071157539701128, + "grad_norm": 0.5933207111085105, + "learning_rate": 1.4154004977761255e-06, + "loss": 0.2763, + "step": 27903 + }, + { + "epoch": 1.3071625989600413, + "grad_norm": 0.6441233906739413, + "learning_rate": 1.4152296280040945e-06, + "loss": 0.2803, + "step": 27904 + }, + { + "epoch": 1.3072094439499695, + "grad_norm": 0.5960069143998955, + "learning_rate": 1.4150587644746444e-06, + "loss": 0.2679, + "step": 27905 + }, + { + "epoch": 1.307256288939898, + "grad_norm": 0.5580601678540613, + "learning_rate": 1.4148879071887586e-06, + "loss": 0.2489, + "step": 27906 + }, + { + "epoch": 1.3073031339298262, + "grad_norm": 0.5652511584376997, + "learning_rate": 1.414717056147421e-06, + "loss": 0.2676, + "step": 27907 + }, + { + "epoch": 1.3073499789197545, + "grad_norm": 0.6025358534329642, + "learning_rate": 1.4145462113516133e-06, + "loss": 0.2754, + "step": 27908 + }, + { + "epoch": 1.307396823909683, + "grad_norm": 0.6347718170726249, + "learning_rate": 1.4143753728023202e-06, + "loss": 0.2873, + "step": 27909 + }, + { + "epoch": 1.3074436688996112, + "grad_norm": 0.5589278884961029, + "learning_rate": 1.4142045405005229e-06, + "loss": 0.2652, + "step": 27910 + }, + { + "epoch": 1.3074905138895394, + "grad_norm": 0.5673245861152123, + "learning_rate": 1.4140337144472055e-06, + "loss": 0.2682, + "step": 27911 + }, + { + "epoch": 1.3075373588794679, + "grad_norm": 0.6112512369588021, + "learning_rate": 1.4138628946433514e-06, + "loss": 0.2659, + "step": 27912 + }, + { + "epoch": 1.3075842038693961, + "grad_norm": 0.6168198780765074, + "learning_rate": 1.413692081089944e-06, + "loss": 0.2861, + "step": 27913 + }, + { + "epoch": 1.3076310488593246, + "grad_norm": 0.6607679423747903, + "learning_rate": 1.4135212737879648e-06, + "loss": 0.2692, + "step": 27914 + }, + { + "epoch": 1.3076778938492528, + "grad_norm": 0.5897085393045494, + "learning_rate": 1.4133504727383974e-06, + "loss": 0.2595, + "step": 27915 + }, + { + "epoch": 1.3077247388391813, + "grad_norm": 0.6079961449535312, + "learning_rate": 1.4131796779422263e-06, + "loss": 0.2798, + "step": 27916 + }, + { + "epoch": 1.3077715838291095, + "grad_norm": 0.634697325161298, + "learning_rate": 1.4130088894004319e-06, + "loss": 0.2873, + "step": 27917 + }, + { + "epoch": 1.3078184288190378, + "grad_norm": 0.5880481059682022, + "learning_rate": 1.412838107113998e-06, + "loss": 0.2798, + "step": 27918 + }, + { + "epoch": 1.3078652738089662, + "grad_norm": 0.5767313366839578, + "learning_rate": 1.4126673310839084e-06, + "loss": 0.2735, + "step": 27919 + }, + { + "epoch": 1.3079121187988945, + "grad_norm": 0.5845895090065663, + "learning_rate": 1.4124965613111442e-06, + "loss": 0.2768, + "step": 27920 + }, + { + "epoch": 1.3079589637888227, + "grad_norm": 0.5955585025730306, + "learning_rate": 1.4123257977966886e-06, + "loss": 0.2789, + "step": 27921 + }, + { + "epoch": 1.3080058087787512, + "grad_norm": 0.6028976295176435, + "learning_rate": 1.4121550405415253e-06, + "loss": 0.2739, + "step": 27922 + }, + { + "epoch": 1.3080526537686794, + "grad_norm": 0.5888983349400522, + "learning_rate": 1.4119842895466368e-06, + "loss": 0.2831, + "step": 27923 + }, + { + "epoch": 1.3080994987586076, + "grad_norm": 0.6048535257597788, + "learning_rate": 1.4118135448130046e-06, + "loss": 0.2673, + "step": 27924 + }, + { + "epoch": 1.3081463437485361, + "grad_norm": 0.5801703566509009, + "learning_rate": 1.4116428063416126e-06, + "loss": 0.2519, + "step": 27925 + }, + { + "epoch": 1.3081931887384644, + "grad_norm": 0.5894491161237936, + "learning_rate": 1.411472074133442e-06, + "loss": 0.2758, + "step": 27926 + }, + { + "epoch": 1.3082400337283928, + "grad_norm": 0.6262408572629132, + "learning_rate": 1.4113013481894761e-06, + "loss": 0.2814, + "step": 27927 + }, + { + "epoch": 1.308286878718321, + "grad_norm": 0.6133205648494019, + "learning_rate": 1.4111306285106972e-06, + "loss": 0.2774, + "step": 27928 + }, + { + "epoch": 1.3083337237082495, + "grad_norm": 0.6119481561696878, + "learning_rate": 1.4109599150980876e-06, + "loss": 0.2801, + "step": 27929 + }, + { + "epoch": 1.3083805686981778, + "grad_norm": 0.6460649363759282, + "learning_rate": 1.4107892079526315e-06, + "loss": 0.2743, + "step": 27930 + }, + { + "epoch": 1.308427413688106, + "grad_norm": 0.5684532457435068, + "learning_rate": 1.4106185070753093e-06, + "loss": 0.2626, + "step": 27931 + }, + { + "epoch": 1.3084742586780345, + "grad_norm": 0.5655309601752108, + "learning_rate": 1.4104478124671028e-06, + "loss": 0.2515, + "step": 27932 + }, + { + "epoch": 1.3085211036679627, + "grad_norm": 0.5998154659260512, + "learning_rate": 1.4102771241289954e-06, + "loss": 0.2806, + "step": 27933 + }, + { + "epoch": 1.308567948657891, + "grad_norm": 0.5657545186797048, + "learning_rate": 1.4101064420619693e-06, + "loss": 0.2768, + "step": 27934 + }, + { + "epoch": 1.3086147936478194, + "grad_norm": 0.6210689613050682, + "learning_rate": 1.4099357662670066e-06, + "loss": 0.2786, + "step": 27935 + }, + { + "epoch": 1.3086616386377476, + "grad_norm": 0.6047097783832556, + "learning_rate": 1.4097650967450896e-06, + "loss": 0.2815, + "step": 27936 + }, + { + "epoch": 1.308708483627676, + "grad_norm": 0.5201310248755401, + "learning_rate": 1.4095944334972014e-06, + "loss": 0.253, + "step": 27937 + }, + { + "epoch": 1.3087553286176044, + "grad_norm": 0.5581614108466948, + "learning_rate": 1.4094237765243234e-06, + "loss": 0.2638, + "step": 27938 + }, + { + "epoch": 1.3088021736075326, + "grad_norm": 0.5702963504348322, + "learning_rate": 1.409253125827436e-06, + "loss": 0.2837, + "step": 27939 + }, + { + "epoch": 1.308849018597461, + "grad_norm": 0.5692122144810553, + "learning_rate": 1.4090824814075233e-06, + "loss": 0.2643, + "step": 27940 + }, + { + "epoch": 1.3088958635873893, + "grad_norm": 0.607846050575053, + "learning_rate": 1.4089118432655663e-06, + "loss": 0.2821, + "step": 27941 + }, + { + "epoch": 1.3089427085773178, + "grad_norm": 0.5697552875605054, + "learning_rate": 1.4087412114025476e-06, + "loss": 0.2613, + "step": 27942 + }, + { + "epoch": 1.308989553567246, + "grad_norm": 0.6047274378009991, + "learning_rate": 1.4085705858194498e-06, + "loss": 0.2753, + "step": 27943 + }, + { + "epoch": 1.3090363985571742, + "grad_norm": 0.6017808289982837, + "learning_rate": 1.4083999665172532e-06, + "loss": 0.2738, + "step": 27944 + }, + { + "epoch": 1.3090832435471027, + "grad_norm": 0.5554793032067925, + "learning_rate": 1.4082293534969413e-06, + "loss": 0.2629, + "step": 27945 + }, + { + "epoch": 1.309130088537031, + "grad_norm": 0.5672757573612994, + "learning_rate": 1.4080587467594942e-06, + "loss": 0.2712, + "step": 27946 + }, + { + "epoch": 1.3091769335269592, + "grad_norm": 0.6509499526142885, + "learning_rate": 1.4078881463058942e-06, + "loss": 0.288, + "step": 27947 + }, + { + "epoch": 1.3092237785168876, + "grad_norm": 0.6190462878143445, + "learning_rate": 1.4077175521371236e-06, + "loss": 0.2829, + "step": 27948 + }, + { + "epoch": 1.309270623506816, + "grad_norm": 0.5999726411887562, + "learning_rate": 1.4075469642541651e-06, + "loss": 0.2823, + "step": 27949 + }, + { + "epoch": 1.3093174684967444, + "grad_norm": 0.6110349552756065, + "learning_rate": 1.407376382657998e-06, + "loss": 0.2849, + "step": 27950 + }, + { + "epoch": 1.3093643134866726, + "grad_norm": 0.5428930318043151, + "learning_rate": 1.4072058073496063e-06, + "loss": 0.2614, + "step": 27951 + }, + { + "epoch": 1.309411158476601, + "grad_norm": 0.6294345933503117, + "learning_rate": 1.4070352383299695e-06, + "loss": 0.2714, + "step": 27952 + }, + { + "epoch": 1.3094580034665293, + "grad_norm": 0.5942477430885867, + "learning_rate": 1.4068646756000704e-06, + "loss": 0.2829, + "step": 27953 + }, + { + "epoch": 1.3095048484564575, + "grad_norm": 0.6028417043289461, + "learning_rate": 1.40669411916089e-06, + "loss": 0.2795, + "step": 27954 + }, + { + "epoch": 1.309551693446386, + "grad_norm": 0.58011430023621, + "learning_rate": 1.4065235690134115e-06, + "loss": 0.2682, + "step": 27955 + }, + { + "epoch": 1.3095985384363142, + "grad_norm": 0.5945191232062013, + "learning_rate": 1.4063530251586139e-06, + "loss": 0.2706, + "step": 27956 + }, + { + "epoch": 1.3096453834262425, + "grad_norm": 0.5666376259712625, + "learning_rate": 1.40618248759748e-06, + "loss": 0.2711, + "step": 27957 + }, + { + "epoch": 1.309692228416171, + "grad_norm": 0.5426598275711091, + "learning_rate": 1.4060119563309916e-06, + "loss": 0.2505, + "step": 27958 + }, + { + "epoch": 1.3097390734060992, + "grad_norm": 0.5999510798300043, + "learning_rate": 1.4058414313601285e-06, + "loss": 0.2594, + "step": 27959 + }, + { + "epoch": 1.3097859183960274, + "grad_norm": 0.5929418791013262, + "learning_rate": 1.4056709126858732e-06, + "loss": 0.293, + "step": 27960 + }, + { + "epoch": 1.309832763385956, + "grad_norm": 0.5577085062398481, + "learning_rate": 1.4055004003092063e-06, + "loss": 0.2621, + "step": 27961 + }, + { + "epoch": 1.3098796083758841, + "grad_norm": 0.5679482183723934, + "learning_rate": 1.4053298942311105e-06, + "loss": 0.2496, + "step": 27962 + }, + { + "epoch": 1.3099264533658126, + "grad_norm": 0.6248493674064638, + "learning_rate": 1.4051593944525655e-06, + "loss": 0.2842, + "step": 27963 + }, + { + "epoch": 1.3099732983557408, + "grad_norm": 0.6444451445957242, + "learning_rate": 1.4049889009745526e-06, + "loss": 0.2971, + "step": 27964 + }, + { + "epoch": 1.3100201433456693, + "grad_norm": 0.5674528768899407, + "learning_rate": 1.4048184137980548e-06, + "loss": 0.2639, + "step": 27965 + }, + { + "epoch": 1.3100669883355975, + "grad_norm": 0.5859283619094556, + "learning_rate": 1.4046479329240504e-06, + "loss": 0.2811, + "step": 27966 + }, + { + "epoch": 1.3101138333255258, + "grad_norm": 0.5734774586867741, + "learning_rate": 1.4044774583535217e-06, + "loss": 0.2431, + "step": 27967 + }, + { + "epoch": 1.3101606783154542, + "grad_norm": 0.5930305537034649, + "learning_rate": 1.4043069900874513e-06, + "loss": 0.2718, + "step": 27968 + }, + { + "epoch": 1.3102075233053825, + "grad_norm": 0.588391778825993, + "learning_rate": 1.4041365281268178e-06, + "loss": 0.2633, + "step": 27969 + }, + { + "epoch": 1.3102543682953107, + "grad_norm": 0.5954360125232578, + "learning_rate": 1.4039660724726027e-06, + "loss": 0.2614, + "step": 27970 + }, + { + "epoch": 1.3103012132852392, + "grad_norm": 0.621829304470453, + "learning_rate": 1.4037956231257877e-06, + "loss": 0.2939, + "step": 27971 + }, + { + "epoch": 1.3103480582751674, + "grad_norm": 0.5896458848855136, + "learning_rate": 1.403625180087354e-06, + "loss": 0.2775, + "step": 27972 + }, + { + "epoch": 1.3103949032650957, + "grad_norm": 0.5723154279901288, + "learning_rate": 1.4034547433582812e-06, + "loss": 0.2752, + "step": 27973 + }, + { + "epoch": 1.3104417482550241, + "grad_norm": 0.5737120942429244, + "learning_rate": 1.4032843129395516e-06, + "loss": 0.274, + "step": 27974 + }, + { + "epoch": 1.3104885932449524, + "grad_norm": 0.6205423915656182, + "learning_rate": 1.4031138888321442e-06, + "loss": 0.288, + "step": 27975 + }, + { + "epoch": 1.3105354382348808, + "grad_norm": 0.5828495406653186, + "learning_rate": 1.4029434710370405e-06, + "loss": 0.2714, + "step": 27976 + }, + { + "epoch": 1.310582283224809, + "grad_norm": 0.5558764181955906, + "learning_rate": 1.4027730595552216e-06, + "loss": 0.2622, + "step": 27977 + }, + { + "epoch": 1.3106291282147375, + "grad_norm": 0.6012181273034222, + "learning_rate": 1.4026026543876682e-06, + "loss": 0.2832, + "step": 27978 + }, + { + "epoch": 1.3106759732046658, + "grad_norm": 0.5747006141412674, + "learning_rate": 1.4024322555353612e-06, + "loss": 0.2814, + "step": 27979 + }, + { + "epoch": 1.310722818194594, + "grad_norm": 0.6449015947909095, + "learning_rate": 1.4022618629992806e-06, + "loss": 0.2676, + "step": 27980 + }, + { + "epoch": 1.3107696631845225, + "grad_norm": 0.63343193528664, + "learning_rate": 1.4020914767804067e-06, + "loss": 0.296, + "step": 27981 + }, + { + "epoch": 1.3108165081744507, + "grad_norm": 0.587601080367697, + "learning_rate": 1.4019210968797201e-06, + "loss": 0.2631, + "step": 27982 + }, + { + "epoch": 1.310863353164379, + "grad_norm": 0.6084739429454542, + "learning_rate": 1.4017507232982015e-06, + "loss": 0.2843, + "step": 27983 + }, + { + "epoch": 1.3109101981543074, + "grad_norm": 0.661558995565211, + "learning_rate": 1.4015803560368315e-06, + "loss": 0.3079, + "step": 27984 + }, + { + "epoch": 1.3109570431442357, + "grad_norm": 0.6026680298947471, + "learning_rate": 1.4014099950965914e-06, + "loss": 0.2669, + "step": 27985 + }, + { + "epoch": 1.3110038881341641, + "grad_norm": 0.600891865348145, + "learning_rate": 1.4012396404784601e-06, + "loss": 0.2661, + "step": 27986 + }, + { + "epoch": 1.3110507331240924, + "grad_norm": 0.5755227566734965, + "learning_rate": 1.401069292183419e-06, + "loss": 0.2604, + "step": 27987 + }, + { + "epoch": 1.3110975781140208, + "grad_norm": 0.621747640870489, + "learning_rate": 1.400898950212447e-06, + "loss": 0.2721, + "step": 27988 + }, + { + "epoch": 1.311144423103949, + "grad_norm": 0.5678521993050036, + "learning_rate": 1.4007286145665255e-06, + "loss": 0.2822, + "step": 27989 + }, + { + "epoch": 1.3111912680938773, + "grad_norm": 0.5779434045459424, + "learning_rate": 1.4005582852466344e-06, + "loss": 0.288, + "step": 27990 + }, + { + "epoch": 1.3112381130838058, + "grad_norm": 0.5943607492103088, + "learning_rate": 1.4003879622537543e-06, + "loss": 0.2667, + "step": 27991 + }, + { + "epoch": 1.311284958073734, + "grad_norm": 0.5760776491331151, + "learning_rate": 1.4002176455888655e-06, + "loss": 0.257, + "step": 27992 + }, + { + "epoch": 1.3113318030636623, + "grad_norm": 0.616020293860383, + "learning_rate": 1.400047335252947e-06, + "loss": 0.2703, + "step": 27993 + }, + { + "epoch": 1.3113786480535907, + "grad_norm": 0.6075554416739314, + "learning_rate": 1.399877031246981e-06, + "loss": 0.2752, + "step": 27994 + }, + { + "epoch": 1.311425493043519, + "grad_norm": 0.6181557677424252, + "learning_rate": 1.3997067335719447e-06, + "loss": 0.2845, + "step": 27995 + }, + { + "epoch": 1.3114723380334472, + "grad_norm": 0.5706954762184531, + "learning_rate": 1.39953644222882e-06, + "loss": 0.2542, + "step": 27996 + }, + { + "epoch": 1.3115191830233757, + "grad_norm": 0.6308174794180446, + "learning_rate": 1.399366157218586e-06, + "loss": 0.2758, + "step": 27997 + }, + { + "epoch": 1.311566028013304, + "grad_norm": 0.5838580400881191, + "learning_rate": 1.3991958785422243e-06, + "loss": 0.2789, + "step": 27998 + }, + { + "epoch": 1.3116128730032324, + "grad_norm": 0.6109790094054504, + "learning_rate": 1.399025606200713e-06, + "loss": 0.2651, + "step": 27999 + }, + { + "epoch": 1.3116597179931606, + "grad_norm": 0.5990631261456437, + "learning_rate": 1.3988553401950334e-06, + "loss": 0.2776, + "step": 28000 + }, + { + "epoch": 1.311706562983089, + "grad_norm": 0.5667408639432052, + "learning_rate": 1.3986850805261632e-06, + "loss": 0.2744, + "step": 28001 + }, + { + "epoch": 1.3117534079730173, + "grad_norm": 0.639167421201948, + "learning_rate": 1.398514827195084e-06, + "loss": 0.2817, + "step": 28002 + }, + { + "epoch": 1.3118002529629456, + "grad_norm": 0.6085911863984125, + "learning_rate": 1.398344580202775e-06, + "loss": 0.2635, + "step": 28003 + }, + { + "epoch": 1.311847097952874, + "grad_norm": 0.6387823205173214, + "learning_rate": 1.398174339550217e-06, + "loss": 0.2838, + "step": 28004 + }, + { + "epoch": 1.3118939429428023, + "grad_norm": 0.6417114911992788, + "learning_rate": 1.3980041052383879e-06, + "loss": 0.2732, + "step": 28005 + }, + { + "epoch": 1.3119407879327305, + "grad_norm": 0.6324603553890441, + "learning_rate": 1.3978338772682682e-06, + "loss": 0.2845, + "step": 28006 + }, + { + "epoch": 1.311987632922659, + "grad_norm": 0.6154132635873448, + "learning_rate": 1.3976636556408382e-06, + "loss": 0.2783, + "step": 28007 + }, + { + "epoch": 1.3120344779125872, + "grad_norm": 0.5650914805911821, + "learning_rate": 1.3974934403570761e-06, + "loss": 0.2694, + "step": 28008 + }, + { + "epoch": 1.3120813229025154, + "grad_norm": 0.6247151868099226, + "learning_rate": 1.3973232314179619e-06, + "loss": 0.2795, + "step": 28009 + }, + { + "epoch": 1.312128167892444, + "grad_norm": 0.5821307464004046, + "learning_rate": 1.3971530288244767e-06, + "loss": 0.2798, + "step": 28010 + }, + { + "epoch": 1.3121750128823721, + "grad_norm": 0.5652891960611124, + "learning_rate": 1.3969828325775972e-06, + "loss": 0.2636, + "step": 28011 + }, + { + "epoch": 1.3122218578723006, + "grad_norm": 0.5882234131374967, + "learning_rate": 1.3968126426783046e-06, + "loss": 0.2713, + "step": 28012 + }, + { + "epoch": 1.3122687028622289, + "grad_norm": 0.634837542096158, + "learning_rate": 1.3966424591275778e-06, + "loss": 0.2634, + "step": 28013 + }, + { + "epoch": 1.3123155478521573, + "grad_norm": 0.5720300491713498, + "learning_rate": 1.3964722819263974e-06, + "loss": 0.2692, + "step": 28014 + }, + { + "epoch": 1.3123623928420856, + "grad_norm": 0.6042624071341146, + "learning_rate": 1.3963021110757408e-06, + "loss": 0.2738, + "step": 28015 + }, + { + "epoch": 1.3124092378320138, + "grad_norm": 0.598751317407997, + "learning_rate": 1.3961319465765888e-06, + "loss": 0.2778, + "step": 28016 + }, + { + "epoch": 1.3124560828219423, + "grad_norm": 0.5891887763464321, + "learning_rate": 1.3959617884299193e-06, + "loss": 0.2516, + "step": 28017 + }, + { + "epoch": 1.3125029278118705, + "grad_norm": 0.6210759790557401, + "learning_rate": 1.395791636636712e-06, + "loss": 0.2984, + "step": 28018 + }, + { + "epoch": 1.3125497728017987, + "grad_norm": 0.6044885640364903, + "learning_rate": 1.3956214911979465e-06, + "loss": 0.281, + "step": 28019 + }, + { + "epoch": 1.3125966177917272, + "grad_norm": 0.5607428514008747, + "learning_rate": 1.3954513521146016e-06, + "loss": 0.2582, + "step": 28020 + }, + { + "epoch": 1.3126434627816554, + "grad_norm": 0.5943740866139128, + "learning_rate": 1.3952812193876575e-06, + "loss": 0.273, + "step": 28021 + }, + { + "epoch": 1.312690307771584, + "grad_norm": 0.5881772331516375, + "learning_rate": 1.3951110930180925e-06, + "loss": 0.2603, + "step": 28022 + }, + { + "epoch": 1.3127371527615121, + "grad_norm": 0.6345078445753661, + "learning_rate": 1.3949409730068843e-06, + "loss": 0.2893, + "step": 28023 + }, + { + "epoch": 1.3127839977514406, + "grad_norm": 0.6112653176722486, + "learning_rate": 1.394770859355013e-06, + "loss": 0.2788, + "step": 28024 + }, + { + "epoch": 1.3128308427413689, + "grad_norm": 0.5852269714227338, + "learning_rate": 1.394600752063458e-06, + "loss": 0.2713, + "step": 28025 + }, + { + "epoch": 1.312877687731297, + "grad_norm": 0.5687070605001497, + "learning_rate": 1.3944306511331977e-06, + "loss": 0.2679, + "step": 28026 + }, + { + "epoch": 1.3129245327212256, + "grad_norm": 0.5978042634309616, + "learning_rate": 1.3942605565652107e-06, + "loss": 0.2795, + "step": 28027 + }, + { + "epoch": 1.3129713777111538, + "grad_norm": 0.5713315584301936, + "learning_rate": 1.3940904683604778e-06, + "loss": 0.2585, + "step": 28028 + }, + { + "epoch": 1.313018222701082, + "grad_norm": 0.6146077433582503, + "learning_rate": 1.3939203865199761e-06, + "loss": 0.2722, + "step": 28029 + }, + { + "epoch": 1.3130650676910105, + "grad_norm": 0.6098688985160157, + "learning_rate": 1.3937503110446835e-06, + "loss": 0.2747, + "step": 28030 + }, + { + "epoch": 1.3131119126809387, + "grad_norm": 0.5879051878642237, + "learning_rate": 1.3935802419355797e-06, + "loss": 0.2697, + "step": 28031 + }, + { + "epoch": 1.313158757670867, + "grad_norm": 0.6572807475034895, + "learning_rate": 1.3934101791936439e-06, + "loss": 0.3064, + "step": 28032 + }, + { + "epoch": 1.3132056026607954, + "grad_norm": 0.5620563746162627, + "learning_rate": 1.3932401228198541e-06, + "loss": 0.2719, + "step": 28033 + }, + { + "epoch": 1.3132524476507237, + "grad_norm": 0.5975185737297614, + "learning_rate": 1.3930700728151903e-06, + "loss": 0.2886, + "step": 28034 + }, + { + "epoch": 1.3132992926406521, + "grad_norm": 0.5933990119487323, + "learning_rate": 1.3929000291806292e-06, + "loss": 0.2829, + "step": 28035 + }, + { + "epoch": 1.3133461376305804, + "grad_norm": 0.5708937287853265, + "learning_rate": 1.3927299919171508e-06, + "loss": 0.2541, + "step": 28036 + }, + { + "epoch": 1.3133929826205089, + "grad_norm": 0.6271670228197515, + "learning_rate": 1.3925599610257324e-06, + "loss": 0.2727, + "step": 28037 + }, + { + "epoch": 1.313439827610437, + "grad_norm": 0.5879573314621223, + "learning_rate": 1.392389936507353e-06, + "loss": 0.2635, + "step": 28038 + }, + { + "epoch": 1.3134866726003653, + "grad_norm": 0.584026364020446, + "learning_rate": 1.392219918362991e-06, + "loss": 0.2888, + "step": 28039 + }, + { + "epoch": 1.3135335175902938, + "grad_norm": 0.5992637250720961, + "learning_rate": 1.3920499065936246e-06, + "loss": 0.2708, + "step": 28040 + }, + { + "epoch": 1.313580362580222, + "grad_norm": 0.5414211308706164, + "learning_rate": 1.3918799012002337e-06, + "loss": 0.264, + "step": 28041 + }, + { + "epoch": 1.3136272075701503, + "grad_norm": 0.6323612267176653, + "learning_rate": 1.391709902183796e-06, + "loss": 0.3018, + "step": 28042 + }, + { + "epoch": 1.3136740525600787, + "grad_norm": 0.6179660769140434, + "learning_rate": 1.3915399095452876e-06, + "loss": 0.2739, + "step": 28043 + }, + { + "epoch": 1.313720897550007, + "grad_norm": 0.5651175869519961, + "learning_rate": 1.3913699232856887e-06, + "loss": 0.2636, + "step": 28044 + }, + { + "epoch": 1.3137677425399352, + "grad_norm": 0.5874203584624627, + "learning_rate": 1.3911999434059772e-06, + "loss": 0.2811, + "step": 28045 + }, + { + "epoch": 1.3138145875298637, + "grad_norm": 0.5634373573743474, + "learning_rate": 1.3910299699071315e-06, + "loss": 0.259, + "step": 28046 + }, + { + "epoch": 1.313861432519792, + "grad_norm": 0.6071213010260551, + "learning_rate": 1.3908600027901304e-06, + "loss": 0.2732, + "step": 28047 + }, + { + "epoch": 1.3139082775097204, + "grad_norm": 0.5749146023354079, + "learning_rate": 1.3906900420559503e-06, + "loss": 0.2594, + "step": 28048 + }, + { + "epoch": 1.3139551224996486, + "grad_norm": 0.5841770483004719, + "learning_rate": 1.390520087705571e-06, + "loss": 0.2765, + "step": 28049 + }, + { + "epoch": 1.314001967489577, + "grad_norm": 0.5821011025525125, + "learning_rate": 1.390350139739969e-06, + "loss": 0.2841, + "step": 28050 + }, + { + "epoch": 1.3140488124795053, + "grad_norm": 0.5971501946590695, + "learning_rate": 1.3901801981601231e-06, + "loss": 0.2721, + "step": 28051 + }, + { + "epoch": 1.3140956574694336, + "grad_norm": 0.6065465056066797, + "learning_rate": 1.390010262967011e-06, + "loss": 0.2936, + "step": 28052 + }, + { + "epoch": 1.314142502459362, + "grad_norm": 0.577982374926317, + "learning_rate": 1.3898403341616118e-06, + "loss": 0.288, + "step": 28053 + }, + { + "epoch": 1.3141893474492903, + "grad_norm": 0.6047401848450629, + "learning_rate": 1.3896704117449017e-06, + "loss": 0.2827, + "step": 28054 + }, + { + "epoch": 1.3142361924392185, + "grad_norm": 0.6212832041441885, + "learning_rate": 1.3895004957178594e-06, + "loss": 0.2973, + "step": 28055 + }, + { + "epoch": 1.314283037429147, + "grad_norm": 0.5572101784555713, + "learning_rate": 1.3893305860814632e-06, + "loss": 0.2461, + "step": 28056 + }, + { + "epoch": 1.3143298824190752, + "grad_norm": 0.6164591709820462, + "learning_rate": 1.3891606828366893e-06, + "loss": 0.2734, + "step": 28057 + }, + { + "epoch": 1.3143767274090037, + "grad_norm": 0.5942544599683152, + "learning_rate": 1.388990785984517e-06, + "loss": 0.2722, + "step": 28058 + }, + { + "epoch": 1.314423572398932, + "grad_norm": 0.5914785480145813, + "learning_rate": 1.388820895525924e-06, + "loss": 0.2772, + "step": 28059 + }, + { + "epoch": 1.3144704173888604, + "grad_norm": 0.6272885498927764, + "learning_rate": 1.3886510114618866e-06, + "loss": 0.2898, + "step": 28060 + }, + { + "epoch": 1.3145172623787886, + "grad_norm": 0.5713930796669023, + "learning_rate": 1.3884811337933833e-06, + "loss": 0.2645, + "step": 28061 + }, + { + "epoch": 1.3145641073687169, + "grad_norm": 0.5917411000117576, + "learning_rate": 1.3883112625213919e-06, + "loss": 0.2511, + "step": 28062 + }, + { + "epoch": 1.3146109523586453, + "grad_norm": 0.619189605055701, + "learning_rate": 1.3881413976468905e-06, + "loss": 0.2669, + "step": 28063 + }, + { + "epoch": 1.3146577973485736, + "grad_norm": 0.5720450299076427, + "learning_rate": 1.387971539170855e-06, + "loss": 0.2657, + "step": 28064 + }, + { + "epoch": 1.3147046423385018, + "grad_norm": 0.6002270544999775, + "learning_rate": 1.3878016870942652e-06, + "loss": 0.2569, + "step": 28065 + }, + { + "epoch": 1.3147514873284303, + "grad_norm": 0.5627131994697535, + "learning_rate": 1.3876318414180955e-06, + "loss": 0.2678, + "step": 28066 + }, + { + "epoch": 1.3147983323183585, + "grad_norm": 0.6091246379514973, + "learning_rate": 1.3874620021433252e-06, + "loss": 0.2827, + "step": 28067 + }, + { + "epoch": 1.3148451773082868, + "grad_norm": 0.6496904821686064, + "learning_rate": 1.3872921692709314e-06, + "loss": 0.291, + "step": 28068 + }, + { + "epoch": 1.3148920222982152, + "grad_norm": 0.5814657791632674, + "learning_rate": 1.3871223428018918e-06, + "loss": 0.2554, + "step": 28069 + }, + { + "epoch": 1.3149388672881435, + "grad_norm": 0.5760764137355695, + "learning_rate": 1.3869525227371842e-06, + "loss": 0.2664, + "step": 28070 + }, + { + "epoch": 1.314985712278072, + "grad_norm": 0.5971069016508538, + "learning_rate": 1.386782709077785e-06, + "loss": 0.2877, + "step": 28071 + }, + { + "epoch": 1.3150325572680002, + "grad_norm": 0.6608093803493097, + "learning_rate": 1.3866129018246705e-06, + "loss": 0.2952, + "step": 28072 + }, + { + "epoch": 1.3150794022579286, + "grad_norm": 0.6302893908188287, + "learning_rate": 1.386443100978819e-06, + "loss": 0.2877, + "step": 28073 + }, + { + "epoch": 1.3151262472478569, + "grad_norm": 0.5695416273988078, + "learning_rate": 1.3862733065412076e-06, + "loss": 0.2688, + "step": 28074 + }, + { + "epoch": 1.3151730922377851, + "grad_norm": 0.6106750607484611, + "learning_rate": 1.3861035185128134e-06, + "loss": 0.2811, + "step": 28075 + }, + { + "epoch": 1.3152199372277136, + "grad_norm": 0.601943830461928, + "learning_rate": 1.3859337368946135e-06, + "loss": 0.2794, + "step": 28076 + }, + { + "epoch": 1.3152667822176418, + "grad_norm": 0.5709883658212882, + "learning_rate": 1.3857639616875857e-06, + "loss": 0.2686, + "step": 28077 + }, + { + "epoch": 1.31531362720757, + "grad_norm": 0.664473494069148, + "learning_rate": 1.3855941928927063e-06, + "loss": 0.2796, + "step": 28078 + }, + { + "epoch": 1.3153604721974985, + "grad_norm": 0.6074376325259495, + "learning_rate": 1.3854244305109515e-06, + "loss": 0.2657, + "step": 28079 + }, + { + "epoch": 1.3154073171874268, + "grad_norm": 0.628484537208586, + "learning_rate": 1.3852546745432988e-06, + "loss": 0.2612, + "step": 28080 + }, + { + "epoch": 1.315454162177355, + "grad_norm": 0.5793283791595741, + "learning_rate": 1.3850849249907256e-06, + "loss": 0.2899, + "step": 28081 + }, + { + "epoch": 1.3155010071672835, + "grad_norm": 0.6055625123208626, + "learning_rate": 1.3849151818542077e-06, + "loss": 0.2789, + "step": 28082 + }, + { + "epoch": 1.3155478521572117, + "grad_norm": 0.5828840637678042, + "learning_rate": 1.3847454451347242e-06, + "loss": 0.2634, + "step": 28083 + }, + { + "epoch": 1.3155946971471402, + "grad_norm": 0.5994117921693419, + "learning_rate": 1.3845757148332494e-06, + "loss": 0.2754, + "step": 28084 + }, + { + "epoch": 1.3156415421370684, + "grad_norm": 0.59438936048093, + "learning_rate": 1.3844059909507618e-06, + "loss": 0.2808, + "step": 28085 + }, + { + "epoch": 1.3156883871269969, + "grad_norm": 0.634657221778771, + "learning_rate": 1.3842362734882363e-06, + "loss": 0.2867, + "step": 28086 + }, + { + "epoch": 1.3157352321169251, + "grad_norm": 0.5818601911397261, + "learning_rate": 1.384066562446651e-06, + "loss": 0.2606, + "step": 28087 + }, + { + "epoch": 1.3157820771068534, + "grad_norm": 0.5809619305264332, + "learning_rate": 1.3838968578269817e-06, + "loss": 0.2723, + "step": 28088 + }, + { + "epoch": 1.3158289220967818, + "grad_norm": 0.5874588146905688, + "learning_rate": 1.3837271596302066e-06, + "loss": 0.2572, + "step": 28089 + }, + { + "epoch": 1.31587576708671, + "grad_norm": 0.5508173961942856, + "learning_rate": 1.3835574678573e-06, + "loss": 0.2528, + "step": 28090 + }, + { + "epoch": 1.3159226120766383, + "grad_norm": 0.6743726660119665, + "learning_rate": 1.3833877825092408e-06, + "loss": 0.2841, + "step": 28091 + }, + { + "epoch": 1.3159694570665668, + "grad_norm": 0.5601097112720499, + "learning_rate": 1.383218103587003e-06, + "loss": 0.265, + "step": 28092 + }, + { + "epoch": 1.316016302056495, + "grad_norm": 0.6235527908801822, + "learning_rate": 1.3830484310915646e-06, + "loss": 0.2843, + "step": 28093 + }, + { + "epoch": 1.3160631470464235, + "grad_norm": 0.5970437511785893, + "learning_rate": 1.3828787650239017e-06, + "loss": 0.2735, + "step": 28094 + }, + { + "epoch": 1.3161099920363517, + "grad_norm": 0.6162323099382394, + "learning_rate": 1.3827091053849907e-06, + "loss": 0.2674, + "step": 28095 + }, + { + "epoch": 1.3161568370262802, + "grad_norm": 0.5547597138342403, + "learning_rate": 1.3825394521758087e-06, + "loss": 0.2657, + "step": 28096 + }, + { + "epoch": 1.3162036820162084, + "grad_norm": 0.5625681661777547, + "learning_rate": 1.3823698053973306e-06, + "loss": 0.2631, + "step": 28097 + }, + { + "epoch": 1.3162505270061367, + "grad_norm": 0.5586740062618702, + "learning_rate": 1.3822001650505343e-06, + "loss": 0.2486, + "step": 28098 + }, + { + "epoch": 1.3162973719960651, + "grad_norm": 0.6287644869921407, + "learning_rate": 1.382030531136394e-06, + "loss": 0.2804, + "step": 28099 + }, + { + "epoch": 1.3163442169859934, + "grad_norm": 0.5722557424200644, + "learning_rate": 1.3818609036558873e-06, + "loss": 0.26, + "step": 28100 + }, + { + "epoch": 1.3163910619759216, + "grad_norm": 0.5731133962173963, + "learning_rate": 1.38169128260999e-06, + "loss": 0.2805, + "step": 28101 + }, + { + "epoch": 1.31643790696585, + "grad_norm": 0.9753583171849168, + "learning_rate": 1.381521667999679e-06, + "loss": 0.2884, + "step": 28102 + }, + { + "epoch": 1.3164847519557783, + "grad_norm": 0.6139340532450118, + "learning_rate": 1.3813520598259286e-06, + "loss": 0.2746, + "step": 28103 + }, + { + "epoch": 1.3165315969457065, + "grad_norm": 0.5862047227746982, + "learning_rate": 1.3811824580897161e-06, + "loss": 0.2941, + "step": 28104 + }, + { + "epoch": 1.316578441935635, + "grad_norm": 0.6059864325481624, + "learning_rate": 1.3810128627920186e-06, + "loss": 0.2718, + "step": 28105 + }, + { + "epoch": 1.3166252869255632, + "grad_norm": 0.5944179570930197, + "learning_rate": 1.3808432739338095e-06, + "loss": 0.2703, + "step": 28106 + }, + { + "epoch": 1.3166721319154917, + "grad_norm": 0.5556560742860327, + "learning_rate": 1.3806736915160662e-06, + "loss": 0.2555, + "step": 28107 + }, + { + "epoch": 1.31671897690542, + "grad_norm": 0.6227849493771976, + "learning_rate": 1.3805041155397656e-06, + "loss": 0.2653, + "step": 28108 + }, + { + "epoch": 1.3167658218953484, + "grad_norm": 0.5889464200657597, + "learning_rate": 1.3803345460058815e-06, + "loss": 0.2774, + "step": 28109 + }, + { + "epoch": 1.3168126668852767, + "grad_norm": 0.6316180149111333, + "learning_rate": 1.3801649829153902e-06, + "loss": 0.2852, + "step": 28110 + }, + { + "epoch": 1.316859511875205, + "grad_norm": 0.6272301130223339, + "learning_rate": 1.3799954262692682e-06, + "loss": 0.2787, + "step": 28111 + }, + { + "epoch": 1.3169063568651334, + "grad_norm": 0.5853191332997411, + "learning_rate": 1.3798258760684923e-06, + "loss": 0.2528, + "step": 28112 + }, + { + "epoch": 1.3169532018550616, + "grad_norm": 0.5996138881712978, + "learning_rate": 1.3796563323140354e-06, + "loss": 0.2843, + "step": 28113 + }, + { + "epoch": 1.3170000468449898, + "grad_norm": 0.584906429100769, + "learning_rate": 1.3794867950068764e-06, + "loss": 0.2615, + "step": 28114 + }, + { + "epoch": 1.3170468918349183, + "grad_norm": 0.640742693210019, + "learning_rate": 1.379317264147988e-06, + "loss": 0.2768, + "step": 28115 + }, + { + "epoch": 1.3170937368248465, + "grad_norm": 0.6115902159172527, + "learning_rate": 1.3791477397383469e-06, + "loss": 0.2961, + "step": 28116 + }, + { + "epoch": 1.3171405818147748, + "grad_norm": 0.5698185280406444, + "learning_rate": 1.3789782217789289e-06, + "loss": 0.2569, + "step": 28117 + }, + { + "epoch": 1.3171874268047032, + "grad_norm": 0.5744502689958307, + "learning_rate": 1.3788087102707093e-06, + "loss": 0.2546, + "step": 28118 + }, + { + "epoch": 1.3172342717946315, + "grad_norm": 0.6100134020462639, + "learning_rate": 1.3786392052146649e-06, + "loss": 0.2847, + "step": 28119 + }, + { + "epoch": 1.31728111678456, + "grad_norm": 0.6460789195986939, + "learning_rate": 1.3784697066117702e-06, + "loss": 0.2829, + "step": 28120 + }, + { + "epoch": 1.3173279617744882, + "grad_norm": 0.5709883157861911, + "learning_rate": 1.3783002144629992e-06, + "loss": 0.2649, + "step": 28121 + }, + { + "epoch": 1.3173748067644167, + "grad_norm": 0.5825212617878575, + "learning_rate": 1.3781307287693288e-06, + "loss": 0.2653, + "step": 28122 + }, + { + "epoch": 1.317421651754345, + "grad_norm": 0.6448354416023723, + "learning_rate": 1.377961249531734e-06, + "loss": 0.2991, + "step": 28123 + }, + { + "epoch": 1.3174684967442731, + "grad_norm": 0.613535854382667, + "learning_rate": 1.37779177675119e-06, + "loss": 0.2795, + "step": 28124 + }, + { + "epoch": 1.3175153417342016, + "grad_norm": 0.5829737933927157, + "learning_rate": 1.3776223104286734e-06, + "loss": 0.2622, + "step": 28125 + }, + { + "epoch": 1.3175621867241298, + "grad_norm": 0.5852554658479043, + "learning_rate": 1.3774528505651573e-06, + "loss": 0.279, + "step": 28126 + }, + { + "epoch": 1.317609031714058, + "grad_norm": 0.6222647930784287, + "learning_rate": 1.377283397161619e-06, + "loss": 0.2687, + "step": 28127 + }, + { + "epoch": 1.3176558767039865, + "grad_norm": 0.6167998106216055, + "learning_rate": 1.3771139502190316e-06, + "loss": 0.2883, + "step": 28128 + }, + { + "epoch": 1.3177027216939148, + "grad_norm": 0.5761250410868698, + "learning_rate": 1.376944509738371e-06, + "loss": 0.2745, + "step": 28129 + }, + { + "epoch": 1.3177495666838432, + "grad_norm": 0.604632274448763, + "learning_rate": 1.3767750757206129e-06, + "loss": 0.2834, + "step": 28130 + }, + { + "epoch": 1.3177964116737715, + "grad_norm": 0.5955090437753056, + "learning_rate": 1.376605648166731e-06, + "loss": 0.2803, + "step": 28131 + }, + { + "epoch": 1.3178432566637, + "grad_norm": 0.5917949166906711, + "learning_rate": 1.3764362270777033e-06, + "loss": 0.2701, + "step": 28132 + }, + { + "epoch": 1.3178901016536282, + "grad_norm": 0.6277268950711075, + "learning_rate": 1.376266812454501e-06, + "loss": 0.2859, + "step": 28133 + }, + { + "epoch": 1.3179369466435564, + "grad_norm": 0.591650739015368, + "learning_rate": 1.376097404298102e-06, + "loss": 0.2646, + "step": 28134 + }, + { + "epoch": 1.317983791633485, + "grad_norm": 0.5790661288495008, + "learning_rate": 1.375928002609479e-06, + "loss": 0.2545, + "step": 28135 + }, + { + "epoch": 1.3180306366234131, + "grad_norm": 0.5937176850772108, + "learning_rate": 1.375758607389608e-06, + "loss": 0.27, + "step": 28136 + }, + { + "epoch": 1.3180774816133414, + "grad_norm": 0.5548254424782942, + "learning_rate": 1.3755892186394631e-06, + "loss": 0.2684, + "step": 28137 + }, + { + "epoch": 1.3181243266032698, + "grad_norm": 0.5982697625670219, + "learning_rate": 1.3754198363600214e-06, + "loss": 0.2773, + "step": 28138 + }, + { + "epoch": 1.318171171593198, + "grad_norm": 0.5881598877795124, + "learning_rate": 1.3752504605522543e-06, + "loss": 0.2556, + "step": 28139 + }, + { + "epoch": 1.3182180165831263, + "grad_norm": 0.5912383420973832, + "learning_rate": 1.3750810912171391e-06, + "loss": 0.2668, + "step": 28140 + }, + { + "epoch": 1.3182648615730548, + "grad_norm": 0.5997189714499225, + "learning_rate": 1.3749117283556487e-06, + "loss": 0.2667, + "step": 28141 + }, + { + "epoch": 1.318311706562983, + "grad_norm": 0.5602083396410623, + "learning_rate": 1.3747423719687586e-06, + "loss": 0.2641, + "step": 28142 + }, + { + "epoch": 1.3183585515529115, + "grad_norm": 0.5713393311379269, + "learning_rate": 1.3745730220574433e-06, + "loss": 0.2799, + "step": 28143 + }, + { + "epoch": 1.3184053965428397, + "grad_norm": 0.5799768274003487, + "learning_rate": 1.374403678622678e-06, + "loss": 0.2633, + "step": 28144 + }, + { + "epoch": 1.3184522415327682, + "grad_norm": 0.5907903913918718, + "learning_rate": 1.3742343416654357e-06, + "loss": 0.2716, + "step": 28145 + }, + { + "epoch": 1.3184990865226964, + "grad_norm": 0.5642132223976161, + "learning_rate": 1.3740650111866921e-06, + "loss": 0.2651, + "step": 28146 + }, + { + "epoch": 1.3185459315126247, + "grad_norm": 0.5744888419159544, + "learning_rate": 1.3738956871874226e-06, + "loss": 0.2546, + "step": 28147 + }, + { + "epoch": 1.3185927765025531, + "grad_norm": 0.5814626332978841, + "learning_rate": 1.3737263696685988e-06, + "loss": 0.2719, + "step": 28148 + }, + { + "epoch": 1.3186396214924814, + "grad_norm": 0.5605145639590979, + "learning_rate": 1.3735570586311968e-06, + "loss": 0.2713, + "step": 28149 + }, + { + "epoch": 1.3186864664824096, + "grad_norm": 0.5738141001819744, + "learning_rate": 1.3733877540761922e-06, + "loss": 0.2631, + "step": 28150 + }, + { + "epoch": 1.318733311472338, + "grad_norm": 0.5684879320114822, + "learning_rate": 1.3732184560045567e-06, + "loss": 0.2803, + "step": 28151 + }, + { + "epoch": 1.3187801564622663, + "grad_norm": 0.5723615717913718, + "learning_rate": 1.3730491644172656e-06, + "loss": 0.2611, + "step": 28152 + }, + { + "epoch": 1.3188270014521946, + "grad_norm": 0.6021727341062213, + "learning_rate": 1.3728798793152936e-06, + "loss": 0.2828, + "step": 28153 + }, + { + "epoch": 1.318873846442123, + "grad_norm": 0.6135977283680825, + "learning_rate": 1.3727106006996155e-06, + "loss": 0.2651, + "step": 28154 + }, + { + "epoch": 1.3189206914320513, + "grad_norm": 0.5788440522449378, + "learning_rate": 1.3725413285712032e-06, + "loss": 0.2606, + "step": 28155 + }, + { + "epoch": 1.3189675364219797, + "grad_norm": 0.6052497617235085, + "learning_rate": 1.3723720629310335e-06, + "loss": 0.2774, + "step": 28156 + }, + { + "epoch": 1.319014381411908, + "grad_norm": 0.6353935329532472, + "learning_rate": 1.3722028037800782e-06, + "loss": 0.2898, + "step": 28157 + }, + { + "epoch": 1.3190612264018364, + "grad_norm": 0.5749581199596302, + "learning_rate": 1.3720335511193122e-06, + "loss": 0.2785, + "step": 28158 + }, + { + "epoch": 1.3191080713917647, + "grad_norm": 0.6159155234901944, + "learning_rate": 1.37186430494971e-06, + "loss": 0.2835, + "step": 28159 + }, + { + "epoch": 1.319154916381693, + "grad_norm": 0.5803731403692182, + "learning_rate": 1.3716950652722446e-06, + "loss": 0.2691, + "step": 28160 + }, + { + "epoch": 1.3192017613716214, + "grad_norm": 0.6039112403940322, + "learning_rate": 1.3715258320878915e-06, + "loss": 0.2671, + "step": 28161 + }, + { + "epoch": 1.3192486063615496, + "grad_norm": 0.5988597065831434, + "learning_rate": 1.371356605397623e-06, + "loss": 0.277, + "step": 28162 + }, + { + "epoch": 1.3192954513514779, + "grad_norm": 0.5872639254053849, + "learning_rate": 1.3711873852024144e-06, + "loss": 0.2589, + "step": 28163 + }, + { + "epoch": 1.3193422963414063, + "grad_norm": 0.583950509539466, + "learning_rate": 1.3710181715032378e-06, + "loss": 0.2668, + "step": 28164 + }, + { + "epoch": 1.3193891413313346, + "grad_norm": 0.6154639740208316, + "learning_rate": 1.3708489643010677e-06, + "loss": 0.276, + "step": 28165 + }, + { + "epoch": 1.319435986321263, + "grad_norm": 0.5556204004151839, + "learning_rate": 1.3706797635968783e-06, + "loss": 0.2492, + "step": 28166 + }, + { + "epoch": 1.3194828313111913, + "grad_norm": 0.5652329763215599, + "learning_rate": 1.3705105693916426e-06, + "loss": 0.2783, + "step": 28167 + }, + { + "epoch": 1.3195296763011197, + "grad_norm": 0.6307889149864387, + "learning_rate": 1.3703413816863359e-06, + "loss": 0.2733, + "step": 28168 + }, + { + "epoch": 1.319576521291048, + "grad_norm": 0.621246794646428, + "learning_rate": 1.3701722004819306e-06, + "loss": 0.2853, + "step": 28169 + }, + { + "epoch": 1.3196233662809762, + "grad_norm": 0.5544422503957933, + "learning_rate": 1.3700030257793991e-06, + "loss": 0.2729, + "step": 28170 + }, + { + "epoch": 1.3196702112709047, + "grad_norm": 0.6018716327908469, + "learning_rate": 1.3698338575797164e-06, + "loss": 0.2739, + "step": 28171 + }, + { + "epoch": 1.319717056260833, + "grad_norm": 0.5883231372286171, + "learning_rate": 1.3696646958838562e-06, + "loss": 0.2759, + "step": 28172 + }, + { + "epoch": 1.3197639012507612, + "grad_norm": 0.5829880015351822, + "learning_rate": 1.3694955406927907e-06, + "loss": 0.2642, + "step": 28173 + }, + { + "epoch": 1.3198107462406896, + "grad_norm": 0.602213125033144, + "learning_rate": 1.3693263920074956e-06, + "loss": 0.271, + "step": 28174 + }, + { + "epoch": 1.3198575912306179, + "grad_norm": 0.6404044923852222, + "learning_rate": 1.3691572498289424e-06, + "loss": 0.2849, + "step": 28175 + }, + { + "epoch": 1.319904436220546, + "grad_norm": 0.6282207816282489, + "learning_rate": 1.3689881141581055e-06, + "loss": 0.2845, + "step": 28176 + }, + { + "epoch": 1.3199512812104746, + "grad_norm": 0.6283845348502701, + "learning_rate": 1.3688189849959566e-06, + "loss": 0.285, + "step": 28177 + }, + { + "epoch": 1.3199981262004028, + "grad_norm": 0.5477559263011711, + "learning_rate": 1.3686498623434705e-06, + "loss": 0.2609, + "step": 28178 + }, + { + "epoch": 1.3200449711903313, + "grad_norm": 0.6390418760846309, + "learning_rate": 1.3684807462016198e-06, + "loss": 0.2999, + "step": 28179 + }, + { + "epoch": 1.3200918161802595, + "grad_norm": 0.6158650598556513, + "learning_rate": 1.3683116365713783e-06, + "loss": 0.2843, + "step": 28180 + }, + { + "epoch": 1.320138661170188, + "grad_norm": 0.6328034154630297, + "learning_rate": 1.3681425334537197e-06, + "loss": 0.2924, + "step": 28181 + }, + { + "epoch": 1.3201855061601162, + "grad_norm": 0.5863620634570016, + "learning_rate": 1.3679734368496165e-06, + "loss": 0.2757, + "step": 28182 + }, + { + "epoch": 1.3202323511500444, + "grad_norm": 0.6413714309457883, + "learning_rate": 1.3678043467600405e-06, + "loss": 0.2887, + "step": 28183 + }, + { + "epoch": 1.320279196139973, + "grad_norm": 0.6001322568586421, + "learning_rate": 1.3676352631859658e-06, + "loss": 0.2728, + "step": 28184 + }, + { + "epoch": 1.3203260411299012, + "grad_norm": 0.5992136405280952, + "learning_rate": 1.367466186128366e-06, + "loss": 0.2726, + "step": 28185 + }, + { + "epoch": 1.3203728861198294, + "grad_norm": 0.5639505892625671, + "learning_rate": 1.3672971155882132e-06, + "loss": 0.2783, + "step": 28186 + }, + { + "epoch": 1.3204197311097579, + "grad_norm": 0.6086090389916315, + "learning_rate": 1.3671280515664819e-06, + "loss": 0.2781, + "step": 28187 + }, + { + "epoch": 1.320466576099686, + "grad_norm": 0.6101809081839598, + "learning_rate": 1.3669589940641432e-06, + "loss": 0.2745, + "step": 28188 + }, + { + "epoch": 1.3205134210896143, + "grad_norm": 0.6092105082628453, + "learning_rate": 1.3667899430821713e-06, + "loss": 0.2737, + "step": 28189 + }, + { + "epoch": 1.3205602660795428, + "grad_norm": 0.5990620060677969, + "learning_rate": 1.3666208986215377e-06, + "loss": 0.2803, + "step": 28190 + }, + { + "epoch": 1.320607111069471, + "grad_norm": 0.5715220246854343, + "learning_rate": 1.3664518606832158e-06, + "loss": 0.2639, + "step": 28191 + }, + { + "epoch": 1.3206539560593995, + "grad_norm": 0.6241409557255737, + "learning_rate": 1.3662828292681787e-06, + "loss": 0.269, + "step": 28192 + }, + { + "epoch": 1.3207008010493277, + "grad_norm": 0.6087619452489427, + "learning_rate": 1.3661138043774e-06, + "loss": 0.2765, + "step": 28193 + }, + { + "epoch": 1.3207476460392562, + "grad_norm": 0.5519789874397564, + "learning_rate": 1.3659447860118502e-06, + "loss": 0.2768, + "step": 28194 + }, + { + "epoch": 1.3207944910291844, + "grad_norm": 0.6364812931318612, + "learning_rate": 1.365775774172503e-06, + "loss": 0.275, + "step": 28195 + }, + { + "epoch": 1.3208413360191127, + "grad_norm": 0.613102636075386, + "learning_rate": 1.3656067688603325e-06, + "loss": 0.2774, + "step": 28196 + }, + { + "epoch": 1.3208881810090412, + "grad_norm": 0.6085311423924324, + "learning_rate": 1.3654377700763089e-06, + "loss": 0.2724, + "step": 28197 + }, + { + "epoch": 1.3209350259989694, + "grad_norm": 0.6314540676209819, + "learning_rate": 1.3652687778214058e-06, + "loss": 0.281, + "step": 28198 + }, + { + "epoch": 1.3209818709888976, + "grad_norm": 0.5958191955145968, + "learning_rate": 1.3650997920965964e-06, + "loss": 0.2671, + "step": 28199 + }, + { + "epoch": 1.321028715978826, + "grad_norm": 0.6438078855120829, + "learning_rate": 1.3649308129028516e-06, + "loss": 0.2944, + "step": 28200 + }, + { + "epoch": 1.3210755609687543, + "grad_norm": 0.5877879114399911, + "learning_rate": 1.3647618402411445e-06, + "loss": 0.2648, + "step": 28201 + }, + { + "epoch": 1.3211224059586828, + "grad_norm": 0.6150208837577739, + "learning_rate": 1.3645928741124479e-06, + "loss": 0.2836, + "step": 28202 + }, + { + "epoch": 1.321169250948611, + "grad_norm": 0.605461219472512, + "learning_rate": 1.3644239145177349e-06, + "loss": 0.2813, + "step": 28203 + }, + { + "epoch": 1.3212160959385395, + "grad_norm": 0.6051801003534516, + "learning_rate": 1.3642549614579759e-06, + "loss": 0.2911, + "step": 28204 + }, + { + "epoch": 1.3212629409284677, + "grad_norm": 0.6662776367507769, + "learning_rate": 1.364086014934145e-06, + "loss": 0.3002, + "step": 28205 + }, + { + "epoch": 1.321309785918396, + "grad_norm": 0.5680447977216004, + "learning_rate": 1.3639170749472125e-06, + "loss": 0.2636, + "step": 28206 + }, + { + "epoch": 1.3213566309083244, + "grad_norm": 0.5834502968848536, + "learning_rate": 1.3637481414981517e-06, + "loss": 0.2734, + "step": 28207 + }, + { + "epoch": 1.3214034758982527, + "grad_norm": 0.5932076016039666, + "learning_rate": 1.363579214587935e-06, + "loss": 0.2716, + "step": 28208 + }, + { + "epoch": 1.321450320888181, + "grad_norm": 0.5807525104116443, + "learning_rate": 1.3634102942175342e-06, + "loss": 0.2539, + "step": 28209 + }, + { + "epoch": 1.3214971658781094, + "grad_norm": 0.5841692355543883, + "learning_rate": 1.3632413803879223e-06, + "loss": 0.2732, + "step": 28210 + }, + { + "epoch": 1.3215440108680376, + "grad_norm": 0.5859192128743274, + "learning_rate": 1.3630724731000711e-06, + "loss": 0.2707, + "step": 28211 + }, + { + "epoch": 1.3215908558579659, + "grad_norm": 0.6163276374282829, + "learning_rate": 1.3629035723549506e-06, + "loss": 0.2737, + "step": 28212 + }, + { + "epoch": 1.3216377008478943, + "grad_norm": 0.6096329209702422, + "learning_rate": 1.3627346781535344e-06, + "loss": 0.2734, + "step": 28213 + }, + { + "epoch": 1.3216845458378226, + "grad_norm": 0.639705901188742, + "learning_rate": 1.3625657904967942e-06, + "loss": 0.2819, + "step": 28214 + }, + { + "epoch": 1.321731390827751, + "grad_norm": 0.569716826079041, + "learning_rate": 1.362396909385702e-06, + "loss": 0.258, + "step": 28215 + }, + { + "epoch": 1.3217782358176793, + "grad_norm": 0.5838549657464646, + "learning_rate": 1.3622280348212297e-06, + "loss": 0.2626, + "step": 28216 + }, + { + "epoch": 1.3218250808076077, + "grad_norm": 0.5962161301622027, + "learning_rate": 1.3620591668043498e-06, + "loss": 0.2847, + "step": 28217 + }, + { + "epoch": 1.321871925797536, + "grad_norm": 0.5925587646329976, + "learning_rate": 1.3618903053360338e-06, + "loss": 0.2799, + "step": 28218 + }, + { + "epoch": 1.3219187707874642, + "grad_norm": 0.604588059411216, + "learning_rate": 1.361721450417252e-06, + "loss": 0.2903, + "step": 28219 + }, + { + "epoch": 1.3219656157773927, + "grad_norm": 0.6045254400010929, + "learning_rate": 1.3615526020489767e-06, + "loss": 0.2758, + "step": 28220 + }, + { + "epoch": 1.322012460767321, + "grad_norm": 0.5578044461953233, + "learning_rate": 1.3613837602321806e-06, + "loss": 0.2672, + "step": 28221 + }, + { + "epoch": 1.3220593057572492, + "grad_norm": 0.5809156367944702, + "learning_rate": 1.3612149249678344e-06, + "loss": 0.2675, + "step": 28222 + }, + { + "epoch": 1.3221061507471776, + "grad_norm": 0.5738839758132738, + "learning_rate": 1.3610460962569111e-06, + "loss": 0.2616, + "step": 28223 + }, + { + "epoch": 1.3221529957371059, + "grad_norm": 0.6113395553526142, + "learning_rate": 1.3608772741003807e-06, + "loss": 0.2873, + "step": 28224 + }, + { + "epoch": 1.3221998407270341, + "grad_norm": 0.6047050641841809, + "learning_rate": 1.360708458499216e-06, + "loss": 0.2712, + "step": 28225 + }, + { + "epoch": 1.3222466857169626, + "grad_norm": 0.6013935179405241, + "learning_rate": 1.3605396494543865e-06, + "loss": 0.2852, + "step": 28226 + }, + { + "epoch": 1.3222935307068908, + "grad_norm": 0.5939526369457367, + "learning_rate": 1.3603708469668653e-06, + "loss": 0.2547, + "step": 28227 + }, + { + "epoch": 1.3223403756968193, + "grad_norm": 0.5983554287068903, + "learning_rate": 1.3602020510376231e-06, + "loss": 0.2794, + "step": 28228 + }, + { + "epoch": 1.3223872206867475, + "grad_norm": 0.565900620000808, + "learning_rate": 1.3600332616676328e-06, + "loss": 0.2623, + "step": 28229 + }, + { + "epoch": 1.322434065676676, + "grad_norm": 0.559910346331596, + "learning_rate": 1.3598644788578635e-06, + "loss": 0.2601, + "step": 28230 + }, + { + "epoch": 1.3224809106666042, + "grad_norm": 0.5687680764595483, + "learning_rate": 1.3596957026092888e-06, + "loss": 0.2866, + "step": 28231 + }, + { + "epoch": 1.3225277556565325, + "grad_norm": 0.5927441505491456, + "learning_rate": 1.3595269329228777e-06, + "loss": 0.2675, + "step": 28232 + }, + { + "epoch": 1.322574600646461, + "grad_norm": 0.5965273086133357, + "learning_rate": 1.3593581697996023e-06, + "loss": 0.2711, + "step": 28233 + }, + { + "epoch": 1.3226214456363892, + "grad_norm": 0.5547041500149873, + "learning_rate": 1.3591894132404338e-06, + "loss": 0.2626, + "step": 28234 + }, + { + "epoch": 1.3226682906263174, + "grad_norm": 0.6270183416880603, + "learning_rate": 1.3590206632463438e-06, + "loss": 0.2875, + "step": 28235 + }, + { + "epoch": 1.3227151356162459, + "grad_norm": 0.626894979525286, + "learning_rate": 1.358851919818304e-06, + "loss": 0.2844, + "step": 28236 + }, + { + "epoch": 1.3227619806061741, + "grad_norm": 0.6011692223696915, + "learning_rate": 1.3586831829572834e-06, + "loss": 0.2767, + "step": 28237 + }, + { + "epoch": 1.3228088255961026, + "grad_norm": 0.5862014263106974, + "learning_rate": 1.3585144526642554e-06, + "loss": 0.2676, + "step": 28238 + }, + { + "epoch": 1.3228556705860308, + "grad_norm": 0.5908200424001105, + "learning_rate": 1.3583457289401888e-06, + "loss": 0.2811, + "step": 28239 + }, + { + "epoch": 1.3229025155759593, + "grad_norm": 0.6024534760347942, + "learning_rate": 1.3581770117860554e-06, + "loss": 0.2769, + "step": 28240 + }, + { + "epoch": 1.3229493605658875, + "grad_norm": 0.5573649332355455, + "learning_rate": 1.3580083012028266e-06, + "loss": 0.2514, + "step": 28241 + }, + { + "epoch": 1.3229962055558158, + "grad_norm": 0.6017915381637374, + "learning_rate": 1.357839597191474e-06, + "loss": 0.2905, + "step": 28242 + }, + { + "epoch": 1.3230430505457442, + "grad_norm": 0.6038420499655871, + "learning_rate": 1.3576708997529665e-06, + "loss": 0.2745, + "step": 28243 + }, + { + "epoch": 1.3230898955356725, + "grad_norm": 0.6180199933538196, + "learning_rate": 1.3575022088882755e-06, + "loss": 0.2795, + "step": 28244 + }, + { + "epoch": 1.3231367405256007, + "grad_norm": 0.5530317201819236, + "learning_rate": 1.3573335245983733e-06, + "loss": 0.2459, + "step": 28245 + }, + { + "epoch": 1.3231835855155292, + "grad_norm": 0.5629959332764147, + "learning_rate": 1.3571648468842286e-06, + "loss": 0.2639, + "step": 28246 + }, + { + "epoch": 1.3232304305054574, + "grad_norm": 0.5555109747440206, + "learning_rate": 1.3569961757468127e-06, + "loss": 0.2728, + "step": 28247 + }, + { + "epoch": 1.3232772754953857, + "grad_norm": 0.5535070750230027, + "learning_rate": 1.3568275111870978e-06, + "loss": 0.2611, + "step": 28248 + }, + { + "epoch": 1.3233241204853141, + "grad_norm": 0.6337814024730835, + "learning_rate": 1.356658853206052e-06, + "loss": 0.2628, + "step": 28249 + }, + { + "epoch": 1.3233709654752424, + "grad_norm": 0.6091021849295747, + "learning_rate": 1.3564902018046474e-06, + "loss": 0.2678, + "step": 28250 + }, + { + "epoch": 1.3234178104651708, + "grad_norm": 0.5614848269476229, + "learning_rate": 1.3563215569838543e-06, + "loss": 0.2536, + "step": 28251 + }, + { + "epoch": 1.323464655455099, + "grad_norm": 0.6138625980656908, + "learning_rate": 1.3561529187446437e-06, + "loss": 0.2685, + "step": 28252 + }, + { + "epoch": 1.3235115004450275, + "grad_norm": 0.5858605684187983, + "learning_rate": 1.3559842870879852e-06, + "loss": 0.2738, + "step": 28253 + }, + { + "epoch": 1.3235583454349558, + "grad_norm": 0.5609234365746556, + "learning_rate": 1.3558156620148502e-06, + "loss": 0.261, + "step": 28254 + }, + { + "epoch": 1.323605190424884, + "grad_norm": 0.6171306049195202, + "learning_rate": 1.3556470435262076e-06, + "loss": 0.2818, + "step": 28255 + }, + { + "epoch": 1.3236520354148125, + "grad_norm": 0.6288103226201148, + "learning_rate": 1.3554784316230285e-06, + "loss": 0.2868, + "step": 28256 + }, + { + "epoch": 1.3236988804047407, + "grad_norm": 0.6210763396898624, + "learning_rate": 1.3553098263062835e-06, + "loss": 0.2699, + "step": 28257 + }, + { + "epoch": 1.323745725394669, + "grad_norm": 0.6044718403272995, + "learning_rate": 1.3551412275769426e-06, + "loss": 0.2712, + "step": 28258 + }, + { + "epoch": 1.3237925703845974, + "grad_norm": 0.6194919581935252, + "learning_rate": 1.3549726354359775e-06, + "loss": 0.2843, + "step": 28259 + }, + { + "epoch": 1.3238394153745257, + "grad_norm": 0.5692802813983939, + "learning_rate": 1.3548040498843567e-06, + "loss": 0.2569, + "step": 28260 + }, + { + "epoch": 1.323886260364454, + "grad_norm": 0.6093060094491209, + "learning_rate": 1.3546354709230502e-06, + "loss": 0.2859, + "step": 28261 + }, + { + "epoch": 1.3239331053543824, + "grad_norm": 0.589909054408253, + "learning_rate": 1.3544668985530286e-06, + "loss": 0.2763, + "step": 28262 + }, + { + "epoch": 1.3239799503443106, + "grad_norm": 0.5949263634838254, + "learning_rate": 1.3542983327752617e-06, + "loss": 0.2615, + "step": 28263 + }, + { + "epoch": 1.324026795334239, + "grad_norm": 0.6267486205982157, + "learning_rate": 1.35412977359072e-06, + "loss": 0.2944, + "step": 28264 + }, + { + "epoch": 1.3240736403241673, + "grad_norm": 0.6166331500191592, + "learning_rate": 1.3539612210003747e-06, + "loss": 0.2726, + "step": 28265 + }, + { + "epoch": 1.3241204853140958, + "grad_norm": 0.5928721614374225, + "learning_rate": 1.3537926750051932e-06, + "loss": 0.2777, + "step": 28266 + }, + { + "epoch": 1.324167330304024, + "grad_norm": 0.6050054910327042, + "learning_rate": 1.353624135606148e-06, + "loss": 0.281, + "step": 28267 + }, + { + "epoch": 1.3242141752939522, + "grad_norm": 0.5766472076600303, + "learning_rate": 1.3534556028042067e-06, + "loss": 0.2669, + "step": 28268 + }, + { + "epoch": 1.3242610202838807, + "grad_norm": 0.5553177529343367, + "learning_rate": 1.3532870766003407e-06, + "loss": 0.2668, + "step": 28269 + }, + { + "epoch": 1.324307865273809, + "grad_norm": 0.6038366505429145, + "learning_rate": 1.3531185569955189e-06, + "loss": 0.2685, + "step": 28270 + }, + { + "epoch": 1.3243547102637372, + "grad_norm": 0.5586102237153266, + "learning_rate": 1.3529500439907115e-06, + "loss": 0.2713, + "step": 28271 + }, + { + "epoch": 1.3244015552536657, + "grad_norm": 0.6079226047584304, + "learning_rate": 1.3527815375868895e-06, + "loss": 0.2741, + "step": 28272 + }, + { + "epoch": 1.324448400243594, + "grad_norm": 0.6371781136816638, + "learning_rate": 1.352613037785021e-06, + "loss": 0.27, + "step": 28273 + }, + { + "epoch": 1.3244952452335224, + "grad_norm": 0.6141859408562237, + "learning_rate": 1.3524445445860756e-06, + "loss": 0.283, + "step": 28274 + }, + { + "epoch": 1.3245420902234506, + "grad_norm": 0.5847657589093911, + "learning_rate": 1.3522760579910233e-06, + "loss": 0.2586, + "step": 28275 + }, + { + "epoch": 1.324588935213379, + "grad_norm": 0.5997534500829009, + "learning_rate": 1.3521075780008336e-06, + "loss": 0.2761, + "step": 28276 + }, + { + "epoch": 1.3246357802033073, + "grad_norm": 0.6021194045597836, + "learning_rate": 1.3519391046164764e-06, + "loss": 0.2663, + "step": 28277 + }, + { + "epoch": 1.3246826251932355, + "grad_norm": 0.581061768065009, + "learning_rate": 1.3517706378389222e-06, + "loss": 0.2638, + "step": 28278 + }, + { + "epoch": 1.324729470183164, + "grad_norm": 0.5659384416981265, + "learning_rate": 1.3516021776691383e-06, + "loss": 0.2693, + "step": 28279 + }, + { + "epoch": 1.3247763151730922, + "grad_norm": 0.6547503106979119, + "learning_rate": 1.351433724108096e-06, + "loss": 0.2989, + "step": 28280 + }, + { + "epoch": 1.3248231601630205, + "grad_norm": 0.5637670374493259, + "learning_rate": 1.351265277156763e-06, + "loss": 0.2723, + "step": 28281 + }, + { + "epoch": 1.324870005152949, + "grad_norm": 0.6035047847438817, + "learning_rate": 1.3510968368161097e-06, + "loss": 0.274, + "step": 28282 + }, + { + "epoch": 1.3249168501428772, + "grad_norm": 0.5680860659371458, + "learning_rate": 1.3509284030871056e-06, + "loss": 0.2621, + "step": 28283 + }, + { + "epoch": 1.3249636951328054, + "grad_norm": 0.5911010605497911, + "learning_rate": 1.3507599759707207e-06, + "loss": 0.2771, + "step": 28284 + }, + { + "epoch": 1.325010540122734, + "grad_norm": 0.601060543714394, + "learning_rate": 1.350591555467922e-06, + "loss": 0.2761, + "step": 28285 + }, + { + "epoch": 1.3250573851126621, + "grad_norm": 0.583986010090479, + "learning_rate": 1.3504231415796801e-06, + "loss": 0.2838, + "step": 28286 + }, + { + "epoch": 1.3251042301025906, + "grad_norm": 0.583258467140793, + "learning_rate": 1.3502547343069649e-06, + "loss": 0.28, + "step": 28287 + }, + { + "epoch": 1.3251510750925188, + "grad_norm": 0.5888401508494769, + "learning_rate": 1.3500863336507442e-06, + "loss": 0.2707, + "step": 28288 + }, + { + "epoch": 1.3251979200824473, + "grad_norm": 0.5823026623537698, + "learning_rate": 1.3499179396119872e-06, + "loss": 0.2762, + "step": 28289 + }, + { + "epoch": 1.3252447650723755, + "grad_norm": 0.5236284731474834, + "learning_rate": 1.3497495521916649e-06, + "loss": 0.2453, + "step": 28290 + }, + { + "epoch": 1.3252916100623038, + "grad_norm": 0.6063397123081052, + "learning_rate": 1.349581171390743e-06, + "loss": 0.2717, + "step": 28291 + }, + { + "epoch": 1.3253384550522322, + "grad_norm": 0.5473707209547081, + "learning_rate": 1.3494127972101928e-06, + "loss": 0.2622, + "step": 28292 + }, + { + "epoch": 1.3253853000421605, + "grad_norm": 0.6209716930262406, + "learning_rate": 1.349244429650983e-06, + "loss": 0.2669, + "step": 28293 + }, + { + "epoch": 1.3254321450320887, + "grad_norm": 0.6221620393557549, + "learning_rate": 1.349076068714083e-06, + "loss": 0.2623, + "step": 28294 + }, + { + "epoch": 1.3254789900220172, + "grad_norm": 0.5653616163477861, + "learning_rate": 1.3489077144004598e-06, + "loss": 0.2784, + "step": 28295 + }, + { + "epoch": 1.3255258350119454, + "grad_norm": 0.5987234534507457, + "learning_rate": 1.3487393667110848e-06, + "loss": 0.2713, + "step": 28296 + }, + { + "epoch": 1.3255726800018737, + "grad_norm": 0.6021297648410928, + "learning_rate": 1.3485710256469243e-06, + "loss": 0.2657, + "step": 28297 + }, + { + "epoch": 1.3256195249918021, + "grad_norm": 0.5883478090291459, + "learning_rate": 1.3484026912089476e-06, + "loss": 0.2603, + "step": 28298 + }, + { + "epoch": 1.3256663699817304, + "grad_norm": 0.5962132476949683, + "learning_rate": 1.3482343633981242e-06, + "loss": 0.2684, + "step": 28299 + }, + { + "epoch": 1.3257132149716588, + "grad_norm": 0.5759754377740465, + "learning_rate": 1.3480660422154228e-06, + "loss": 0.2544, + "step": 28300 + }, + { + "epoch": 1.325760059961587, + "grad_norm": 0.6320949461767349, + "learning_rate": 1.347897727661813e-06, + "loss": 0.2687, + "step": 28301 + }, + { + "epoch": 1.3258069049515155, + "grad_norm": 0.5780891669825114, + "learning_rate": 1.3477294197382607e-06, + "loss": 0.2704, + "step": 28302 + }, + { + "epoch": 1.3258537499414438, + "grad_norm": 0.6163047720637848, + "learning_rate": 1.3475611184457373e-06, + "loss": 0.286, + "step": 28303 + }, + { + "epoch": 1.325900594931372, + "grad_norm": 0.5762841833586235, + "learning_rate": 1.347392823785209e-06, + "loss": 0.2787, + "step": 28304 + }, + { + "epoch": 1.3259474399213005, + "grad_norm": 0.5991528152683122, + "learning_rate": 1.3472245357576453e-06, + "loss": 0.2615, + "step": 28305 + }, + { + "epoch": 1.3259942849112287, + "grad_norm": 0.6270672988056935, + "learning_rate": 1.3470562543640144e-06, + "loss": 0.2813, + "step": 28306 + }, + { + "epoch": 1.326041129901157, + "grad_norm": 0.5857480222172243, + "learning_rate": 1.3468879796052852e-06, + "loss": 0.2741, + "step": 28307 + }, + { + "epoch": 1.3260879748910854, + "grad_norm": 0.6039199787934633, + "learning_rate": 1.3467197114824268e-06, + "loss": 0.2696, + "step": 28308 + }, + { + "epoch": 1.3261348198810137, + "grad_norm": 0.5446545726076247, + "learning_rate": 1.3465514499964067e-06, + "loss": 0.2709, + "step": 28309 + }, + { + "epoch": 1.3261816648709421, + "grad_norm": 0.5609904252661001, + "learning_rate": 1.346383195148192e-06, + "loss": 0.2682, + "step": 28310 + }, + { + "epoch": 1.3262285098608704, + "grad_norm": 0.5692205916669469, + "learning_rate": 1.346214946938752e-06, + "loss": 0.2789, + "step": 28311 + }, + { + "epoch": 1.3262753548507988, + "grad_norm": 0.6089257264499772, + "learning_rate": 1.346046705369055e-06, + "loss": 0.2881, + "step": 28312 + }, + { + "epoch": 1.326322199840727, + "grad_norm": 0.5588051631061005, + "learning_rate": 1.3458784704400691e-06, + "loss": 0.2507, + "step": 28313 + }, + { + "epoch": 1.3263690448306553, + "grad_norm": 0.6066957987450797, + "learning_rate": 1.3457102421527638e-06, + "loss": 0.271, + "step": 28314 + }, + { + "epoch": 1.3264158898205838, + "grad_norm": 0.5810181171049464, + "learning_rate": 1.3455420205081048e-06, + "loss": 0.272, + "step": 28315 + }, + { + "epoch": 1.326462734810512, + "grad_norm": 0.5907635117948576, + "learning_rate": 1.345373805507062e-06, + "loss": 0.2874, + "step": 28316 + }, + { + "epoch": 1.3265095798004403, + "grad_norm": 0.5727199947625172, + "learning_rate": 1.3452055971506024e-06, + "loss": 0.2687, + "step": 28317 + }, + { + "epoch": 1.3265564247903687, + "grad_norm": 0.6103782883379072, + "learning_rate": 1.345037395439694e-06, + "loss": 0.2826, + "step": 28318 + }, + { + "epoch": 1.326603269780297, + "grad_norm": 0.5969020778957811, + "learning_rate": 1.344869200375305e-06, + "loss": 0.2675, + "step": 28319 + }, + { + "epoch": 1.3266501147702252, + "grad_norm": 0.617646343614558, + "learning_rate": 1.3447010119584034e-06, + "loss": 0.2648, + "step": 28320 + }, + { + "epoch": 1.3266969597601537, + "grad_norm": 0.6310643078631222, + "learning_rate": 1.344532830189958e-06, + "loss": 0.2819, + "step": 28321 + }, + { + "epoch": 1.326743804750082, + "grad_norm": 0.625110914852347, + "learning_rate": 1.344364655070936e-06, + "loss": 0.2811, + "step": 28322 + }, + { + "epoch": 1.3267906497400104, + "grad_norm": 0.5688378127015175, + "learning_rate": 1.3441964866023039e-06, + "loss": 0.2707, + "step": 28323 + }, + { + "epoch": 1.3268374947299386, + "grad_norm": 0.6272786527715585, + "learning_rate": 1.3440283247850306e-06, + "loss": 0.2816, + "step": 28324 + }, + { + "epoch": 1.326884339719867, + "grad_norm": 0.553080116592239, + "learning_rate": 1.3438601696200836e-06, + "loss": 0.2573, + "step": 28325 + }, + { + "epoch": 1.3269311847097953, + "grad_norm": 0.5601144062580493, + "learning_rate": 1.343692021108431e-06, + "loss": 0.2552, + "step": 28326 + }, + { + "epoch": 1.3269780296997236, + "grad_norm": 0.5954270804354056, + "learning_rate": 1.3435238792510407e-06, + "loss": 0.254, + "step": 28327 + }, + { + "epoch": 1.327024874689652, + "grad_norm": 0.5655980725464981, + "learning_rate": 1.3433557440488792e-06, + "loss": 0.2534, + "step": 28328 + }, + { + "epoch": 1.3270717196795803, + "grad_norm": 0.5819665508205506, + "learning_rate": 1.3431876155029156e-06, + "loss": 0.2559, + "step": 28329 + }, + { + "epoch": 1.3271185646695085, + "grad_norm": 0.5908347079621403, + "learning_rate": 1.3430194936141156e-06, + "loss": 0.2727, + "step": 28330 + }, + { + "epoch": 1.327165409659437, + "grad_norm": 0.6551648835913432, + "learning_rate": 1.3428513783834473e-06, + "loss": 0.2898, + "step": 28331 + }, + { + "epoch": 1.3272122546493652, + "grad_norm": 0.5976409128026505, + "learning_rate": 1.3426832698118788e-06, + "loss": 0.2788, + "step": 28332 + }, + { + "epoch": 1.3272590996392934, + "grad_norm": 0.5972962777716481, + "learning_rate": 1.3425151679003782e-06, + "loss": 0.2514, + "step": 28333 + }, + { + "epoch": 1.327305944629222, + "grad_norm": 0.587229334816796, + "learning_rate": 1.3423470726499105e-06, + "loss": 0.2715, + "step": 28334 + }, + { + "epoch": 1.3273527896191502, + "grad_norm": 0.5880475360130296, + "learning_rate": 1.3421789840614449e-06, + "loss": 0.2766, + "step": 28335 + }, + { + "epoch": 1.3273996346090786, + "grad_norm": 0.5662024238203703, + "learning_rate": 1.342010902135949e-06, + "loss": 0.2577, + "step": 28336 + }, + { + "epoch": 1.3274464795990069, + "grad_norm": 0.6080324102518658, + "learning_rate": 1.3418428268743882e-06, + "loss": 0.2728, + "step": 28337 + }, + { + "epoch": 1.3274933245889353, + "grad_norm": 0.5811507936089996, + "learning_rate": 1.3416747582777312e-06, + "loss": 0.2702, + "step": 28338 + }, + { + "epoch": 1.3275401695788636, + "grad_norm": 0.5625129458417655, + "learning_rate": 1.3415066963469457e-06, + "loss": 0.2833, + "step": 28339 + }, + { + "epoch": 1.3275870145687918, + "grad_norm": 0.718563786159953, + "learning_rate": 1.341338641082997e-06, + "loss": 0.2858, + "step": 28340 + }, + { + "epoch": 1.3276338595587203, + "grad_norm": 0.5904834061552554, + "learning_rate": 1.341170592486853e-06, + "loss": 0.2779, + "step": 28341 + }, + { + "epoch": 1.3276807045486485, + "grad_norm": 0.5822939240665683, + "learning_rate": 1.3410025505594808e-06, + "loss": 0.2759, + "step": 28342 + }, + { + "epoch": 1.3277275495385767, + "grad_norm": 0.6686182170689089, + "learning_rate": 1.3408345153018492e-06, + "loss": 0.2892, + "step": 28343 + }, + { + "epoch": 1.3277743945285052, + "grad_norm": 0.5856212517994281, + "learning_rate": 1.3406664867149222e-06, + "loss": 0.2743, + "step": 28344 + }, + { + "epoch": 1.3278212395184334, + "grad_norm": 0.5739123632123017, + "learning_rate": 1.3404984647996695e-06, + "loss": 0.2742, + "step": 28345 + }, + { + "epoch": 1.327868084508362, + "grad_norm": 0.5985895347351783, + "learning_rate": 1.3403304495570554e-06, + "loss": 0.2715, + "step": 28346 + }, + { + "epoch": 1.3279149294982902, + "grad_norm": 0.6196207231793786, + "learning_rate": 1.340162440988048e-06, + "loss": 0.2816, + "step": 28347 + }, + { + "epoch": 1.3279617744882186, + "grad_norm": 0.5422873895268039, + "learning_rate": 1.3399944390936149e-06, + "loss": 0.2628, + "step": 28348 + }, + { + "epoch": 1.3280086194781469, + "grad_norm": 0.6018272340360176, + "learning_rate": 1.3398264438747216e-06, + "loss": 0.2847, + "step": 28349 + }, + { + "epoch": 1.328055464468075, + "grad_norm": 0.593953228301944, + "learning_rate": 1.3396584553323366e-06, + "loss": 0.2673, + "step": 28350 + }, + { + "epoch": 1.3281023094580036, + "grad_norm": 0.5648372331162772, + "learning_rate": 1.3394904734674257e-06, + "loss": 0.2597, + "step": 28351 + }, + { + "epoch": 1.3281491544479318, + "grad_norm": 0.5760804090797528, + "learning_rate": 1.3393224982809544e-06, + "loss": 0.2687, + "step": 28352 + }, + { + "epoch": 1.32819599943786, + "grad_norm": 0.5839176983752905, + "learning_rate": 1.3391545297738905e-06, + "loss": 0.2598, + "step": 28353 + }, + { + "epoch": 1.3282428444277885, + "grad_norm": 0.5564016313855287, + "learning_rate": 1.3389865679472002e-06, + "loss": 0.2597, + "step": 28354 + }, + { + "epoch": 1.3282896894177167, + "grad_norm": 0.5649535688381413, + "learning_rate": 1.338818612801851e-06, + "loss": 0.2732, + "step": 28355 + }, + { + "epoch": 1.328336534407645, + "grad_norm": 0.6039116924737304, + "learning_rate": 1.3386506643388083e-06, + "loss": 0.2765, + "step": 28356 + }, + { + "epoch": 1.3283833793975734, + "grad_norm": 0.5763202552905098, + "learning_rate": 1.3384827225590402e-06, + "loss": 0.2546, + "step": 28357 + }, + { + "epoch": 1.3284302243875017, + "grad_norm": 0.5770190081424669, + "learning_rate": 1.3383147874635122e-06, + "loss": 0.2663, + "step": 28358 + }, + { + "epoch": 1.3284770693774302, + "grad_norm": 0.6398950957396529, + "learning_rate": 1.3381468590531896e-06, + "loss": 0.2619, + "step": 28359 + }, + { + "epoch": 1.3285239143673584, + "grad_norm": 0.535563111207199, + "learning_rate": 1.3379789373290398e-06, + "loss": 0.2635, + "step": 28360 + }, + { + "epoch": 1.3285707593572869, + "grad_norm": 0.6024354193858443, + "learning_rate": 1.3378110222920293e-06, + "loss": 0.26, + "step": 28361 + }, + { + "epoch": 1.328617604347215, + "grad_norm": 0.5974995053013274, + "learning_rate": 1.3376431139431239e-06, + "loss": 0.2779, + "step": 28362 + }, + { + "epoch": 1.3286644493371433, + "grad_norm": 0.5835572188634143, + "learning_rate": 1.3374752122832914e-06, + "loss": 0.2763, + "step": 28363 + }, + { + "epoch": 1.3287112943270718, + "grad_norm": 0.606158213913463, + "learning_rate": 1.3373073173134959e-06, + "loss": 0.2627, + "step": 28364 + }, + { + "epoch": 1.328758139317, + "grad_norm": 0.594641452244491, + "learning_rate": 1.3371394290347055e-06, + "loss": 0.276, + "step": 28365 + }, + { + "epoch": 1.3288049843069283, + "grad_norm": 0.5885777007397003, + "learning_rate": 1.3369715474478844e-06, + "loss": 0.2686, + "step": 28366 + }, + { + "epoch": 1.3288518292968567, + "grad_norm": 0.5824830061959972, + "learning_rate": 1.336803672554e-06, + "loss": 0.2789, + "step": 28367 + }, + { + "epoch": 1.328898674286785, + "grad_norm": 0.5806119349916721, + "learning_rate": 1.3366358043540177e-06, + "loss": 0.2478, + "step": 28368 + }, + { + "epoch": 1.3289455192767132, + "grad_norm": 0.6096558753244259, + "learning_rate": 1.3364679428489045e-06, + "loss": 0.2606, + "step": 28369 + }, + { + "epoch": 1.3289923642666417, + "grad_norm": 0.5964876323866696, + "learning_rate": 1.3363000880396264e-06, + "loss": 0.2821, + "step": 28370 + }, + { + "epoch": 1.32903920925657, + "grad_norm": 0.5940422511144603, + "learning_rate": 1.3361322399271489e-06, + "loss": 0.2844, + "step": 28371 + }, + { + "epoch": 1.3290860542464984, + "grad_norm": 0.6231038273629104, + "learning_rate": 1.3359643985124367e-06, + "loss": 0.2855, + "step": 28372 + }, + { + "epoch": 1.3291328992364266, + "grad_norm": 0.5831316605552583, + "learning_rate": 1.3357965637964571e-06, + "loss": 0.2608, + "step": 28373 + }, + { + "epoch": 1.329179744226355, + "grad_norm": 0.6190026980526552, + "learning_rate": 1.3356287357801757e-06, + "loss": 0.2892, + "step": 28374 + }, + { + "epoch": 1.3292265892162833, + "grad_norm": 0.5877320672168799, + "learning_rate": 1.335460914464558e-06, + "loss": 0.2727, + "step": 28375 + }, + { + "epoch": 1.3292734342062116, + "grad_norm": 0.6119720448889744, + "learning_rate": 1.3352930998505708e-06, + "loss": 0.2804, + "step": 28376 + }, + { + "epoch": 1.32932027919614, + "grad_norm": 0.61114155346204, + "learning_rate": 1.3351252919391788e-06, + "loss": 0.2772, + "step": 28377 + }, + { + "epoch": 1.3293671241860683, + "grad_norm": 1.192232709029196, + "learning_rate": 1.3349574907313485e-06, + "loss": 0.2898, + "step": 28378 + }, + { + "epoch": 1.3294139691759965, + "grad_norm": 0.5623503687926058, + "learning_rate": 1.3347896962280442e-06, + "loss": 0.2596, + "step": 28379 + }, + { + "epoch": 1.329460814165925, + "grad_norm": 0.6236129159744546, + "learning_rate": 1.334621908430232e-06, + "loss": 0.2822, + "step": 28380 + }, + { + "epoch": 1.3295076591558532, + "grad_norm": 0.5868997356490927, + "learning_rate": 1.334454127338878e-06, + "loss": 0.2786, + "step": 28381 + }, + { + "epoch": 1.3295545041457817, + "grad_norm": 0.5761300830939942, + "learning_rate": 1.3342863529549489e-06, + "loss": 0.2643, + "step": 28382 + }, + { + "epoch": 1.32960134913571, + "grad_norm": 0.6318197102828178, + "learning_rate": 1.3341185852794072e-06, + "loss": 0.2671, + "step": 28383 + }, + { + "epoch": 1.3296481941256384, + "grad_norm": 0.5643218719614893, + "learning_rate": 1.3339508243132205e-06, + "loss": 0.2637, + "step": 28384 + }, + { + "epoch": 1.3296950391155666, + "grad_norm": 0.581214243419309, + "learning_rate": 1.3337830700573545e-06, + "loss": 0.2691, + "step": 28385 + }, + { + "epoch": 1.3297418841054949, + "grad_norm": 0.6067017518217221, + "learning_rate": 1.333615322512773e-06, + "loss": 0.2861, + "step": 28386 + }, + { + "epoch": 1.3297887290954233, + "grad_norm": 0.5914092379940252, + "learning_rate": 1.333447581680442e-06, + "loss": 0.2805, + "step": 28387 + }, + { + "epoch": 1.3298355740853516, + "grad_norm": 0.6238083983027067, + "learning_rate": 1.333279847561328e-06, + "loss": 0.2744, + "step": 28388 + }, + { + "epoch": 1.3298824190752798, + "grad_norm": 0.57488031806806, + "learning_rate": 1.333112120156394e-06, + "loss": 0.2724, + "step": 28389 + }, + { + "epoch": 1.3299292640652083, + "grad_norm": 0.5878188071099185, + "learning_rate": 1.332944399466607e-06, + "loss": 0.2587, + "step": 28390 + }, + { + "epoch": 1.3299761090551365, + "grad_norm": 0.5934780505478645, + "learning_rate": 1.3327766854929315e-06, + "loss": 0.2772, + "step": 28391 + }, + { + "epoch": 1.3300229540450648, + "grad_norm": 0.6088521833240879, + "learning_rate": 1.3326089782363333e-06, + "loss": 0.2641, + "step": 28392 + }, + { + "epoch": 1.3300697990349932, + "grad_norm": 0.5770780585442296, + "learning_rate": 1.3324412776977763e-06, + "loss": 0.2617, + "step": 28393 + }, + { + "epoch": 1.3301166440249215, + "grad_norm": 0.5873287647058122, + "learning_rate": 1.3322735838782274e-06, + "loss": 0.2873, + "step": 28394 + }, + { + "epoch": 1.33016348901485, + "grad_norm": 0.597128884485493, + "learning_rate": 1.3321058967786496e-06, + "loss": 0.2725, + "step": 28395 + }, + { + "epoch": 1.3302103340047782, + "grad_norm": 0.6429457121599484, + "learning_rate": 1.3319382164000088e-06, + "loss": 0.2826, + "step": 28396 + }, + { + "epoch": 1.3302571789947066, + "grad_norm": 0.6279873249099125, + "learning_rate": 1.3317705427432697e-06, + "loss": 0.2776, + "step": 28397 + }, + { + "epoch": 1.3303040239846349, + "grad_norm": 0.6402525483551352, + "learning_rate": 1.331602875809398e-06, + "loss": 0.2801, + "step": 28398 + }, + { + "epoch": 1.3303508689745631, + "grad_norm": 0.6192967184692235, + "learning_rate": 1.3314352155993586e-06, + "loss": 0.2858, + "step": 28399 + }, + { + "epoch": 1.3303977139644916, + "grad_norm": 0.6166344258664112, + "learning_rate": 1.331267562114116e-06, + "loss": 0.2784, + "step": 28400 + }, + { + "epoch": 1.3304445589544198, + "grad_norm": 0.6074946742050646, + "learning_rate": 1.331099915354634e-06, + "loss": 0.2684, + "step": 28401 + }, + { + "epoch": 1.330491403944348, + "grad_norm": 0.588902755408647, + "learning_rate": 1.330932275321878e-06, + "loss": 0.2825, + "step": 28402 + }, + { + "epoch": 1.3305382489342765, + "grad_norm": 0.6486677937518438, + "learning_rate": 1.330764642016813e-06, + "loss": 0.2871, + "step": 28403 + }, + { + "epoch": 1.3305850939242048, + "grad_norm": 0.5885098320906103, + "learning_rate": 1.3305970154404034e-06, + "loss": 0.2795, + "step": 28404 + }, + { + "epoch": 1.330631938914133, + "grad_norm": 0.6312974393504662, + "learning_rate": 1.3304293955936153e-06, + "loss": 0.2827, + "step": 28405 + }, + { + "epoch": 1.3306787839040615, + "grad_norm": 0.6123352736236322, + "learning_rate": 1.3302617824774111e-06, + "loss": 0.2688, + "step": 28406 + }, + { + "epoch": 1.3307256288939897, + "grad_norm": 0.5824109114387676, + "learning_rate": 1.3300941760927571e-06, + "loss": 0.269, + "step": 28407 + }, + { + "epoch": 1.3307724738839182, + "grad_norm": 0.5758430727062354, + "learning_rate": 1.329926576440616e-06, + "loss": 0.2714, + "step": 28408 + }, + { + "epoch": 1.3308193188738464, + "grad_norm": 0.5474064894316909, + "learning_rate": 1.3297589835219537e-06, + "loss": 0.2609, + "step": 28409 + }, + { + "epoch": 1.3308661638637749, + "grad_norm": 0.5677727566731151, + "learning_rate": 1.3295913973377345e-06, + "loss": 0.2565, + "step": 28410 + }, + { + "epoch": 1.3309130088537031, + "grad_norm": 0.6108260500587, + "learning_rate": 1.3294238178889224e-06, + "loss": 0.2708, + "step": 28411 + }, + { + "epoch": 1.3309598538436314, + "grad_norm": 0.575596948966037, + "learning_rate": 1.3292562451764828e-06, + "loss": 0.2647, + "step": 28412 + }, + { + "epoch": 1.3310066988335598, + "grad_norm": 0.5952752876457849, + "learning_rate": 1.3290886792013794e-06, + "loss": 0.2601, + "step": 28413 + }, + { + "epoch": 1.331053543823488, + "grad_norm": 0.5408990980903959, + "learning_rate": 1.3289211199645752e-06, + "loss": 0.2527, + "step": 28414 + }, + { + "epoch": 1.3311003888134163, + "grad_norm": 0.5914922693740994, + "learning_rate": 1.328753567467036e-06, + "loss": 0.2649, + "step": 28415 + }, + { + "epoch": 1.3311472338033448, + "grad_norm": 0.592869781186407, + "learning_rate": 1.3285860217097252e-06, + "loss": 0.2717, + "step": 28416 + }, + { + "epoch": 1.331194078793273, + "grad_norm": 0.5887305287722505, + "learning_rate": 1.3284184826936075e-06, + "loss": 0.2737, + "step": 28417 + }, + { + "epoch": 1.3312409237832015, + "grad_norm": 0.615658887583836, + "learning_rate": 1.3282509504196483e-06, + "loss": 0.2622, + "step": 28418 + }, + { + "epoch": 1.3312877687731297, + "grad_norm": 0.6058890699747861, + "learning_rate": 1.328083424888809e-06, + "loss": 0.2749, + "step": 28419 + }, + { + "epoch": 1.3313346137630582, + "grad_norm": 0.6201899007561436, + "learning_rate": 1.3279159061020564e-06, + "loss": 0.278, + "step": 28420 + }, + { + "epoch": 1.3313814587529864, + "grad_norm": 0.6208118652686149, + "learning_rate": 1.327748394060352e-06, + "loss": 0.2885, + "step": 28421 + }, + { + "epoch": 1.3314283037429147, + "grad_norm": 0.6129823254016976, + "learning_rate": 1.3275808887646608e-06, + "loss": 0.2778, + "step": 28422 + }, + { + "epoch": 1.3314751487328431, + "grad_norm": 0.5891079855337172, + "learning_rate": 1.3274133902159469e-06, + "loss": 0.2715, + "step": 28423 + }, + { + "epoch": 1.3315219937227714, + "grad_norm": 0.5778613500859596, + "learning_rate": 1.3272458984151753e-06, + "loss": 0.265, + "step": 28424 + }, + { + "epoch": 1.3315688387126996, + "grad_norm": 0.5824745786295833, + "learning_rate": 1.327078413363308e-06, + "loss": 0.2913, + "step": 28425 + }, + { + "epoch": 1.331615683702628, + "grad_norm": 0.5954012686645916, + "learning_rate": 1.3269109350613096e-06, + "loss": 0.2917, + "step": 28426 + }, + { + "epoch": 1.3316625286925563, + "grad_norm": 0.6133775217667105, + "learning_rate": 1.3267434635101448e-06, + "loss": 0.2667, + "step": 28427 + }, + { + "epoch": 1.3317093736824845, + "grad_norm": 0.5708138509799244, + "learning_rate": 1.3265759987107757e-06, + "loss": 0.2639, + "step": 28428 + }, + { + "epoch": 1.331756218672413, + "grad_norm": 0.6007160251586555, + "learning_rate": 1.3264085406641663e-06, + "loss": 0.2725, + "step": 28429 + }, + { + "epoch": 1.3318030636623412, + "grad_norm": 0.5980351924712788, + "learning_rate": 1.326241089371282e-06, + "loss": 0.2642, + "step": 28430 + }, + { + "epoch": 1.3318499086522697, + "grad_norm": 0.5965978080839796, + "learning_rate": 1.3260736448330846e-06, + "loss": 0.2635, + "step": 28431 + }, + { + "epoch": 1.331896753642198, + "grad_norm": 0.6243854133355002, + "learning_rate": 1.325906207050538e-06, + "loss": 0.2671, + "step": 28432 + }, + { + "epoch": 1.3319435986321264, + "grad_norm": 0.6112709824558508, + "learning_rate": 1.325738776024606e-06, + "loss": 0.2797, + "step": 28433 + }, + { + "epoch": 1.3319904436220547, + "grad_norm": 0.5767941302665753, + "learning_rate": 1.3255713517562533e-06, + "loss": 0.2554, + "step": 28434 + }, + { + "epoch": 1.332037288611983, + "grad_norm": 0.5906630377599639, + "learning_rate": 1.3254039342464412e-06, + "loss": 0.2712, + "step": 28435 + }, + { + "epoch": 1.3320841336019114, + "grad_norm": 0.7031974145868016, + "learning_rate": 1.3252365234961346e-06, + "loss": 0.3159, + "step": 28436 + }, + { + "epoch": 1.3321309785918396, + "grad_norm": 0.6001611570660328, + "learning_rate": 1.325069119506297e-06, + "loss": 0.2834, + "step": 28437 + }, + { + "epoch": 1.3321778235817678, + "grad_norm": 0.5582595148143771, + "learning_rate": 1.3249017222778905e-06, + "loss": 0.2682, + "step": 28438 + }, + { + "epoch": 1.3322246685716963, + "grad_norm": 0.6263958376125817, + "learning_rate": 1.3247343318118793e-06, + "loss": 0.267, + "step": 28439 + }, + { + "epoch": 1.3322715135616245, + "grad_norm": 0.6202806387608711, + "learning_rate": 1.3245669481092265e-06, + "loss": 0.2811, + "step": 28440 + }, + { + "epoch": 1.3323183585515528, + "grad_norm": 0.622498948074656, + "learning_rate": 1.3243995711708967e-06, + "loss": 0.2773, + "step": 28441 + }, + { + "epoch": 1.3323652035414812, + "grad_norm": 0.6014171440650072, + "learning_rate": 1.3242322009978508e-06, + "loss": 0.2826, + "step": 28442 + }, + { + "epoch": 1.3324120485314095, + "grad_norm": 0.5726154501990058, + "learning_rate": 1.3240648375910543e-06, + "loss": 0.2587, + "step": 28443 + }, + { + "epoch": 1.332458893521338, + "grad_norm": 0.5658384454402456, + "learning_rate": 1.3238974809514676e-06, + "loss": 0.2641, + "step": 28444 + }, + { + "epoch": 1.3325057385112662, + "grad_norm": 0.570355913671634, + "learning_rate": 1.3237301310800557e-06, + "loss": 0.2795, + "step": 28445 + }, + { + "epoch": 1.3325525835011947, + "grad_norm": 0.5574035152529503, + "learning_rate": 1.3235627879777813e-06, + "loss": 0.2722, + "step": 28446 + }, + { + "epoch": 1.332599428491123, + "grad_norm": 0.6095789060076743, + "learning_rate": 1.323395451645607e-06, + "loss": 0.2529, + "step": 28447 + }, + { + "epoch": 1.3326462734810511, + "grad_norm": 0.5893779648656139, + "learning_rate": 1.3232281220844973e-06, + "loss": 0.2773, + "step": 28448 + }, + { + "epoch": 1.3326931184709796, + "grad_norm": 0.5987606878684985, + "learning_rate": 1.3230607992954138e-06, + "loss": 0.2797, + "step": 28449 + }, + { + "epoch": 1.3327399634609078, + "grad_norm": 0.6366397353299036, + "learning_rate": 1.322893483279319e-06, + "loss": 0.2907, + "step": 28450 + }, + { + "epoch": 1.332786808450836, + "grad_norm": 0.622621795098955, + "learning_rate": 1.322726174037176e-06, + "loss": 0.2839, + "step": 28451 + }, + { + "epoch": 1.3328336534407645, + "grad_norm": 0.5976877716402381, + "learning_rate": 1.3225588715699484e-06, + "loss": 0.2764, + "step": 28452 + }, + { + "epoch": 1.3328804984306928, + "grad_norm": 0.5933820044732895, + "learning_rate": 1.3223915758785981e-06, + "loss": 0.2568, + "step": 28453 + }, + { + "epoch": 1.3329273434206212, + "grad_norm": 0.5925187675292292, + "learning_rate": 1.3222242869640895e-06, + "loss": 0.2684, + "step": 28454 + }, + { + "epoch": 1.3329741884105495, + "grad_norm": 0.622466227359238, + "learning_rate": 1.3220570048273832e-06, + "loss": 0.2865, + "step": 28455 + }, + { + "epoch": 1.333021033400478, + "grad_norm": 0.5887438923827113, + "learning_rate": 1.3218897294694436e-06, + "loss": 0.2602, + "step": 28456 + }, + { + "epoch": 1.3330678783904062, + "grad_norm": 0.5662280404629817, + "learning_rate": 1.3217224608912315e-06, + "loss": 0.2674, + "step": 28457 + }, + { + "epoch": 1.3331147233803344, + "grad_norm": 0.6368927525548533, + "learning_rate": 1.3215551990937106e-06, + "loss": 0.295, + "step": 28458 + }, + { + "epoch": 1.333161568370263, + "grad_norm": 0.6027684760602515, + "learning_rate": 1.3213879440778432e-06, + "loss": 0.2646, + "step": 28459 + }, + { + "epoch": 1.3332084133601911, + "grad_norm": 0.619980762025483, + "learning_rate": 1.3212206958445922e-06, + "loss": 0.282, + "step": 28460 + }, + { + "epoch": 1.3332552583501194, + "grad_norm": 0.6038911045233761, + "learning_rate": 1.3210534543949205e-06, + "loss": 0.2862, + "step": 28461 + }, + { + "epoch": 1.3333021033400478, + "grad_norm": 0.5672210469270988, + "learning_rate": 1.32088621972979e-06, + "loss": 0.2698, + "step": 28462 + }, + { + "epoch": 1.333348948329976, + "grad_norm": 0.6172270516736384, + "learning_rate": 1.320718991850162e-06, + "loss": 0.2864, + "step": 28463 + }, + { + "epoch": 1.3333957933199043, + "grad_norm": 0.5462396706081376, + "learning_rate": 1.3205517707569998e-06, + "loss": 0.2595, + "step": 28464 + }, + { + "epoch": 1.3334426383098328, + "grad_norm": 0.6294875131930353, + "learning_rate": 1.3203845564512657e-06, + "loss": 0.2999, + "step": 28465 + }, + { + "epoch": 1.333489483299761, + "grad_norm": 0.5720180448782184, + "learning_rate": 1.3202173489339221e-06, + "loss": 0.2682, + "step": 28466 + }, + { + "epoch": 1.3335363282896895, + "grad_norm": 0.5773709870097409, + "learning_rate": 1.320050148205932e-06, + "loss": 0.2739, + "step": 28467 + }, + { + "epoch": 1.3335831732796177, + "grad_norm": 0.6173365038492703, + "learning_rate": 1.3198829542682556e-06, + "loss": 0.2778, + "step": 28468 + }, + { + "epoch": 1.3336300182695462, + "grad_norm": 0.5661354522718082, + "learning_rate": 1.319715767121857e-06, + "loss": 0.2503, + "step": 28469 + }, + { + "epoch": 1.3336768632594744, + "grad_norm": 0.5811062518041166, + "learning_rate": 1.3195485867676972e-06, + "loss": 0.271, + "step": 28470 + }, + { + "epoch": 1.3337237082494027, + "grad_norm": 0.6032390089929867, + "learning_rate": 1.319381413206738e-06, + "loss": 0.2766, + "step": 28471 + }, + { + "epoch": 1.3337705532393311, + "grad_norm": 0.5782505649855482, + "learning_rate": 1.3192142464399422e-06, + "loss": 0.2604, + "step": 28472 + }, + { + "epoch": 1.3338173982292594, + "grad_norm": 0.5920103826360275, + "learning_rate": 1.3190470864682725e-06, + "loss": 0.2845, + "step": 28473 + }, + { + "epoch": 1.3338642432191876, + "grad_norm": 0.6549070444629768, + "learning_rate": 1.3188799332926894e-06, + "loss": 0.295, + "step": 28474 + }, + { + "epoch": 1.333911088209116, + "grad_norm": 0.5927330106960862, + "learning_rate": 1.3187127869141552e-06, + "loss": 0.2701, + "step": 28475 + }, + { + "epoch": 1.3339579331990443, + "grad_norm": 0.6150925421064309, + "learning_rate": 1.3185456473336328e-06, + "loss": 0.275, + "step": 28476 + }, + { + "epoch": 1.3340047781889726, + "grad_norm": 0.6020852577649353, + "learning_rate": 1.3183785145520825e-06, + "loss": 0.2575, + "step": 28477 + }, + { + "epoch": 1.334051623178901, + "grad_norm": 0.6288863833150048, + "learning_rate": 1.3182113885704667e-06, + "loss": 0.2892, + "step": 28478 + }, + { + "epoch": 1.3340984681688293, + "grad_norm": 0.5947856777504655, + "learning_rate": 1.3180442693897485e-06, + "loss": 0.2681, + "step": 28479 + }, + { + "epoch": 1.3341453131587577, + "grad_norm": 0.6328528196412677, + "learning_rate": 1.3178771570108873e-06, + "loss": 0.2656, + "step": 28480 + }, + { + "epoch": 1.334192158148686, + "grad_norm": 0.5546460242942153, + "learning_rate": 1.3177100514348462e-06, + "loss": 0.2656, + "step": 28481 + }, + { + "epoch": 1.3342390031386144, + "grad_norm": 0.5914339205770228, + "learning_rate": 1.3175429526625865e-06, + "loss": 0.2909, + "step": 28482 + }, + { + "epoch": 1.3342858481285427, + "grad_norm": 0.5707230191392056, + "learning_rate": 1.3173758606950708e-06, + "loss": 0.2503, + "step": 28483 + }, + { + "epoch": 1.334332693118471, + "grad_norm": 0.6037859482119269, + "learning_rate": 1.3172087755332585e-06, + "loss": 0.2884, + "step": 28484 + }, + { + "epoch": 1.3343795381083994, + "grad_norm": 0.6142368828067202, + "learning_rate": 1.3170416971781139e-06, + "loss": 0.2753, + "step": 28485 + }, + { + "epoch": 1.3344263830983276, + "grad_norm": 0.5827372975592473, + "learning_rate": 1.316874625630596e-06, + "loss": 0.275, + "step": 28486 + }, + { + "epoch": 1.3344732280882559, + "grad_norm": 0.6091152795081788, + "learning_rate": 1.3167075608916669e-06, + "loss": 0.2715, + "step": 28487 + }, + { + "epoch": 1.3345200730781843, + "grad_norm": 0.5915780094230337, + "learning_rate": 1.3165405029622885e-06, + "loss": 0.2776, + "step": 28488 + }, + { + "epoch": 1.3345669180681126, + "grad_norm": 0.6035153853968268, + "learning_rate": 1.3163734518434224e-06, + "loss": 0.2737, + "step": 28489 + }, + { + "epoch": 1.334613763058041, + "grad_norm": 0.6802318278428194, + "learning_rate": 1.3162064075360304e-06, + "loss": 0.2899, + "step": 28490 + }, + { + "epoch": 1.3346606080479693, + "grad_norm": 0.6151710569040615, + "learning_rate": 1.316039370041073e-06, + "loss": 0.2911, + "step": 28491 + }, + { + "epoch": 1.3347074530378977, + "grad_norm": 0.612490517051796, + "learning_rate": 1.3158723393595101e-06, + "loss": 0.2788, + "step": 28492 + }, + { + "epoch": 1.334754298027826, + "grad_norm": 0.5555675509656343, + "learning_rate": 1.3157053154923043e-06, + "loss": 0.2654, + "step": 28493 + }, + { + "epoch": 1.3348011430177542, + "grad_norm": 0.6284574435885283, + "learning_rate": 1.3155382984404173e-06, + "loss": 0.2841, + "step": 28494 + }, + { + "epoch": 1.3348479880076827, + "grad_norm": 0.5916818986436317, + "learning_rate": 1.3153712882048091e-06, + "loss": 0.2729, + "step": 28495 + }, + { + "epoch": 1.334894832997611, + "grad_norm": 0.5957073550340451, + "learning_rate": 1.3152042847864415e-06, + "loss": 0.2692, + "step": 28496 + }, + { + "epoch": 1.3349416779875392, + "grad_norm": 0.6176256283097907, + "learning_rate": 1.315037288186277e-06, + "loss": 0.2681, + "step": 28497 + }, + { + "epoch": 1.3349885229774676, + "grad_norm": 0.5359080185468956, + "learning_rate": 1.3148702984052747e-06, + "loss": 0.2741, + "step": 28498 + }, + { + "epoch": 1.3350353679673959, + "grad_norm": 0.5552864843519955, + "learning_rate": 1.3147033154443947e-06, + "loss": 0.2532, + "step": 28499 + }, + { + "epoch": 1.335082212957324, + "grad_norm": 0.6121982576220524, + "learning_rate": 1.3145363393045995e-06, + "loss": 0.2892, + "step": 28500 + }, + { + "epoch": 1.3351290579472526, + "grad_norm": 0.5672548917068735, + "learning_rate": 1.3143693699868497e-06, + "loss": 0.2847, + "step": 28501 + }, + { + "epoch": 1.3351759029371808, + "grad_norm": 0.5574271153755835, + "learning_rate": 1.3142024074921062e-06, + "loss": 0.2669, + "step": 28502 + }, + { + "epoch": 1.3352227479271093, + "grad_norm": 0.5386935355898006, + "learning_rate": 1.3140354518213306e-06, + "loss": 0.2548, + "step": 28503 + }, + { + "epoch": 1.3352695929170375, + "grad_norm": 0.5285843367289051, + "learning_rate": 1.3138685029754822e-06, + "loss": 0.2595, + "step": 28504 + }, + { + "epoch": 1.335316437906966, + "grad_norm": 0.627975070376952, + "learning_rate": 1.3137015609555232e-06, + "loss": 0.2854, + "step": 28505 + }, + { + "epoch": 1.3353632828968942, + "grad_norm": 0.5641923967030209, + "learning_rate": 1.3135346257624126e-06, + "loss": 0.2633, + "step": 28506 + }, + { + "epoch": 1.3354101278868225, + "grad_norm": 0.6053060768984927, + "learning_rate": 1.3133676973971122e-06, + "loss": 0.2809, + "step": 28507 + }, + { + "epoch": 1.335456972876751, + "grad_norm": 0.616059684615228, + "learning_rate": 1.3132007758605822e-06, + "loss": 0.2699, + "step": 28508 + }, + { + "epoch": 1.3355038178666792, + "grad_norm": 0.593746634475666, + "learning_rate": 1.3130338611537836e-06, + "loss": 0.2616, + "step": 28509 + }, + { + "epoch": 1.3355506628566074, + "grad_norm": 0.6475879055390457, + "learning_rate": 1.3128669532776778e-06, + "loss": 0.2897, + "step": 28510 + }, + { + "epoch": 1.3355975078465359, + "grad_norm": 0.6047394048738665, + "learning_rate": 1.3127000522332239e-06, + "loss": 0.2901, + "step": 28511 + }, + { + "epoch": 1.335644352836464, + "grad_norm": 0.5784823151759643, + "learning_rate": 1.3125331580213824e-06, + "loss": 0.26, + "step": 28512 + }, + { + "epoch": 1.3356911978263923, + "grad_norm": 0.5963328799888407, + "learning_rate": 1.312366270643114e-06, + "loss": 0.2698, + "step": 28513 + }, + { + "epoch": 1.3357380428163208, + "grad_norm": 0.5738285941955789, + "learning_rate": 1.312199390099379e-06, + "loss": 0.27, + "step": 28514 + }, + { + "epoch": 1.335784887806249, + "grad_norm": 0.6073891464772355, + "learning_rate": 1.312032516391138e-06, + "loss": 0.278, + "step": 28515 + }, + { + "epoch": 1.3358317327961775, + "grad_norm": 0.5828367562313961, + "learning_rate": 1.3118656495193522e-06, + "loss": 0.2721, + "step": 28516 + }, + { + "epoch": 1.3358785777861057, + "grad_norm": 0.589439716472458, + "learning_rate": 1.3116987894849804e-06, + "loss": 0.2801, + "step": 28517 + }, + { + "epoch": 1.3359254227760342, + "grad_norm": 0.5837143504952294, + "learning_rate": 1.3115319362889838e-06, + "loss": 0.2657, + "step": 28518 + }, + { + "epoch": 1.3359722677659625, + "grad_norm": 0.6171389369286733, + "learning_rate": 1.3113650899323215e-06, + "loss": 0.2699, + "step": 28519 + }, + { + "epoch": 1.3360191127558907, + "grad_norm": 0.6088331454169965, + "learning_rate": 1.3111982504159542e-06, + "loss": 0.2719, + "step": 28520 + }, + { + "epoch": 1.3360659577458192, + "grad_norm": 0.615383985966849, + "learning_rate": 1.3110314177408423e-06, + "loss": 0.2715, + "step": 28521 + }, + { + "epoch": 1.3361128027357474, + "grad_norm": 0.6054310861200619, + "learning_rate": 1.310864591907947e-06, + "loss": 0.2694, + "step": 28522 + }, + { + "epoch": 1.3361596477256756, + "grad_norm": 0.5897278973732197, + "learning_rate": 1.3106977729182258e-06, + "loss": 0.2784, + "step": 28523 + }, + { + "epoch": 1.336206492715604, + "grad_norm": 0.5994061981203875, + "learning_rate": 1.3105309607726402e-06, + "loss": 0.2558, + "step": 28524 + }, + { + "epoch": 1.3362533377055323, + "grad_norm": 0.5629175112171289, + "learning_rate": 1.3103641554721509e-06, + "loss": 0.2643, + "step": 28525 + }, + { + "epoch": 1.3363001826954608, + "grad_norm": 0.5854108719192584, + "learning_rate": 1.3101973570177157e-06, + "loss": 0.271, + "step": 28526 + }, + { + "epoch": 1.336347027685389, + "grad_norm": 0.6370494435932358, + "learning_rate": 1.3100305654102957e-06, + "loss": 0.2749, + "step": 28527 + }, + { + "epoch": 1.3363938726753175, + "grad_norm": 0.5921166510425103, + "learning_rate": 1.309863780650852e-06, + "loss": 0.279, + "step": 28528 + }, + { + "epoch": 1.3364407176652457, + "grad_norm": 0.5666335136034318, + "learning_rate": 1.3096970027403416e-06, + "loss": 0.264, + "step": 28529 + }, + { + "epoch": 1.336487562655174, + "grad_norm": 0.5566638376481758, + "learning_rate": 1.309530231679726e-06, + "loss": 0.2693, + "step": 28530 + }, + { + "epoch": 1.3365344076451025, + "grad_norm": 0.5652951228418214, + "learning_rate": 1.3093634674699646e-06, + "loss": 0.2666, + "step": 28531 + }, + { + "epoch": 1.3365812526350307, + "grad_norm": 0.6024294650815728, + "learning_rate": 1.3091967101120184e-06, + "loss": 0.2805, + "step": 28532 + }, + { + "epoch": 1.336628097624959, + "grad_norm": 0.5861190240919697, + "learning_rate": 1.3090299596068451e-06, + "loss": 0.2582, + "step": 28533 + }, + { + "epoch": 1.3366749426148874, + "grad_norm": 0.624395772942898, + "learning_rate": 1.3088632159554055e-06, + "loss": 0.2837, + "step": 28534 + }, + { + "epoch": 1.3367217876048156, + "grad_norm": 0.6465003955959796, + "learning_rate": 1.308696479158658e-06, + "loss": 0.3032, + "step": 28535 + }, + { + "epoch": 1.3367686325947439, + "grad_norm": 0.5868375090962158, + "learning_rate": 1.3085297492175628e-06, + "loss": 0.2974, + "step": 28536 + }, + { + "epoch": 1.3368154775846723, + "grad_norm": 0.601193781298603, + "learning_rate": 1.3083630261330792e-06, + "loss": 0.2836, + "step": 28537 + }, + { + "epoch": 1.3368623225746006, + "grad_norm": 0.5459487581490192, + "learning_rate": 1.3081963099061674e-06, + "loss": 0.2598, + "step": 28538 + }, + { + "epoch": 1.336909167564529, + "grad_norm": 0.6252501692374182, + "learning_rate": 1.3080296005377867e-06, + "loss": 0.2527, + "step": 28539 + }, + { + "epoch": 1.3369560125544573, + "grad_norm": 0.5507924634861433, + "learning_rate": 1.3078628980288966e-06, + "loss": 0.2537, + "step": 28540 + }, + { + "epoch": 1.3370028575443857, + "grad_norm": 0.6633556775821063, + "learning_rate": 1.3076962023804546e-06, + "loss": 0.3017, + "step": 28541 + }, + { + "epoch": 1.337049702534314, + "grad_norm": 0.582006012554237, + "learning_rate": 1.3075295135934213e-06, + "loss": 0.2662, + "step": 28542 + }, + { + "epoch": 1.3370965475242422, + "grad_norm": 0.6056694865769542, + "learning_rate": 1.307362831668756e-06, + "loss": 0.2708, + "step": 28543 + }, + { + "epoch": 1.3371433925141707, + "grad_norm": 0.5995537384841242, + "learning_rate": 1.3071961566074177e-06, + "loss": 0.2723, + "step": 28544 + }, + { + "epoch": 1.337190237504099, + "grad_norm": 0.5872235218197296, + "learning_rate": 1.3070294884103669e-06, + "loss": 0.2734, + "step": 28545 + }, + { + "epoch": 1.3372370824940272, + "grad_norm": 0.6464403366884133, + "learning_rate": 1.3068628270785603e-06, + "loss": 0.2862, + "step": 28546 + }, + { + "epoch": 1.3372839274839556, + "grad_norm": 0.6085306476833818, + "learning_rate": 1.3066961726129596e-06, + "loss": 0.2621, + "step": 28547 + }, + { + "epoch": 1.3373307724738839, + "grad_norm": 0.6487796129022695, + "learning_rate": 1.306529525014521e-06, + "loss": 0.2979, + "step": 28548 + }, + { + "epoch": 1.3373776174638121, + "grad_norm": 0.5899775809638604, + "learning_rate": 1.3063628842842051e-06, + "loss": 0.2784, + "step": 28549 + }, + { + "epoch": 1.3374244624537406, + "grad_norm": 0.6004182091271387, + "learning_rate": 1.3061962504229714e-06, + "loss": 0.2801, + "step": 28550 + }, + { + "epoch": 1.3374713074436688, + "grad_norm": 0.59082001812799, + "learning_rate": 1.3060296234317777e-06, + "loss": 0.2698, + "step": 28551 + }, + { + "epoch": 1.3375181524335973, + "grad_norm": 0.5670685948385183, + "learning_rate": 1.3058630033115847e-06, + "loss": 0.2528, + "step": 28552 + }, + { + "epoch": 1.3375649974235255, + "grad_norm": 0.5939798370051893, + "learning_rate": 1.3056963900633497e-06, + "loss": 0.2833, + "step": 28553 + }, + { + "epoch": 1.337611842413454, + "grad_norm": 0.6122251169224378, + "learning_rate": 1.3055297836880309e-06, + "loss": 0.274, + "step": 28554 + }, + { + "epoch": 1.3376586874033822, + "grad_norm": 0.6038266669167076, + "learning_rate": 1.3053631841865883e-06, + "loss": 0.2885, + "step": 28555 + }, + { + "epoch": 1.3377055323933105, + "grad_norm": 0.610902688169486, + "learning_rate": 1.3051965915599799e-06, + "loss": 0.2768, + "step": 28556 + }, + { + "epoch": 1.337752377383239, + "grad_norm": 0.5782648239608287, + "learning_rate": 1.3050300058091647e-06, + "loss": 0.2707, + "step": 28557 + }, + { + "epoch": 1.3377992223731672, + "grad_norm": 0.5468088359499227, + "learning_rate": 1.3048634269351029e-06, + "loss": 0.2641, + "step": 28558 + }, + { + "epoch": 1.3378460673630954, + "grad_norm": 0.6277246031171413, + "learning_rate": 1.3046968549387507e-06, + "loss": 0.2715, + "step": 28559 + }, + { + "epoch": 1.3378929123530239, + "grad_norm": 0.6180699604096527, + "learning_rate": 1.3045302898210682e-06, + "loss": 0.2754, + "step": 28560 + }, + { + "epoch": 1.3379397573429521, + "grad_norm": 0.5900788681872678, + "learning_rate": 1.3043637315830126e-06, + "loss": 0.2708, + "step": 28561 + }, + { + "epoch": 1.3379866023328806, + "grad_norm": 0.5992740015545768, + "learning_rate": 1.3041971802255436e-06, + "loss": 0.271, + "step": 28562 + }, + { + "epoch": 1.3380334473228088, + "grad_norm": 0.6141567047198782, + "learning_rate": 1.3040306357496188e-06, + "loss": 0.2894, + "step": 28563 + }, + { + "epoch": 1.3380802923127373, + "grad_norm": 0.6118206632633213, + "learning_rate": 1.3038640981561985e-06, + "loss": 0.2743, + "step": 28564 + }, + { + "epoch": 1.3381271373026655, + "grad_norm": 0.605847684372031, + "learning_rate": 1.3036975674462382e-06, + "loss": 0.2713, + "step": 28565 + }, + { + "epoch": 1.3381739822925938, + "grad_norm": 0.6641761880040669, + "learning_rate": 1.303531043620698e-06, + "loss": 0.2818, + "step": 28566 + }, + { + "epoch": 1.3382208272825222, + "grad_norm": 0.5694924734043119, + "learning_rate": 1.303364526680537e-06, + "loss": 0.2626, + "step": 28567 + }, + { + "epoch": 1.3382676722724505, + "grad_norm": 0.6037624536546157, + "learning_rate": 1.303198016626711e-06, + "loss": 0.2612, + "step": 28568 + }, + { + "epoch": 1.3383145172623787, + "grad_norm": 0.5935430739701519, + "learning_rate": 1.30303151346018e-06, + "loss": 0.2801, + "step": 28569 + }, + { + "epoch": 1.3383613622523072, + "grad_norm": 0.6136777524986865, + "learning_rate": 1.302865017181903e-06, + "loss": 0.2747, + "step": 28570 + }, + { + "epoch": 1.3384082072422354, + "grad_norm": 0.6281047739473512, + "learning_rate": 1.3026985277928356e-06, + "loss": 0.2735, + "step": 28571 + }, + { + "epoch": 1.3384550522321637, + "grad_norm": 0.588320047627743, + "learning_rate": 1.3025320452939373e-06, + "loss": 0.2833, + "step": 28572 + }, + { + "epoch": 1.3385018972220921, + "grad_norm": 0.5718663694938998, + "learning_rate": 1.3023655696861659e-06, + "loss": 0.2636, + "step": 28573 + }, + { + "epoch": 1.3385487422120204, + "grad_norm": 0.5801263158366092, + "learning_rate": 1.3021991009704807e-06, + "loss": 0.2531, + "step": 28574 + }, + { + "epoch": 1.3385955872019488, + "grad_norm": 0.6129402627500778, + "learning_rate": 1.3020326391478376e-06, + "loss": 0.2791, + "step": 28575 + }, + { + "epoch": 1.338642432191877, + "grad_norm": 0.6115088590989346, + "learning_rate": 1.301866184219196e-06, + "loss": 0.2777, + "step": 28576 + }, + { + "epoch": 1.3386892771818055, + "grad_norm": 0.618316272015917, + "learning_rate": 1.3016997361855138e-06, + "loss": 0.2738, + "step": 28577 + }, + { + "epoch": 1.3387361221717338, + "grad_norm": 0.5957987968103093, + "learning_rate": 1.3015332950477478e-06, + "loss": 0.2749, + "step": 28578 + }, + { + "epoch": 1.338782967161662, + "grad_norm": 0.5629892876876268, + "learning_rate": 1.3013668608068563e-06, + "loss": 0.2619, + "step": 28579 + }, + { + "epoch": 1.3388298121515905, + "grad_norm": 0.6119056319136233, + "learning_rate": 1.3012004334637972e-06, + "loss": 0.2677, + "step": 28580 + }, + { + "epoch": 1.3388766571415187, + "grad_norm": 0.5823922303835856, + "learning_rate": 1.3010340130195296e-06, + "loss": 0.2723, + "step": 28581 + }, + { + "epoch": 1.338923502131447, + "grad_norm": 0.5989111444268276, + "learning_rate": 1.3008675994750087e-06, + "loss": 0.2715, + "step": 28582 + }, + { + "epoch": 1.3389703471213754, + "grad_norm": 0.5683024286604641, + "learning_rate": 1.3007011928311945e-06, + "loss": 0.2634, + "step": 28583 + }, + { + "epoch": 1.3390171921113037, + "grad_norm": 0.5705271748182003, + "learning_rate": 1.3005347930890422e-06, + "loss": 0.2487, + "step": 28584 + }, + { + "epoch": 1.339064037101232, + "grad_norm": 0.5875825360809964, + "learning_rate": 1.300368400249511e-06, + "loss": 0.2736, + "step": 28585 + }, + { + "epoch": 1.3391108820911604, + "grad_norm": 0.6146618184437319, + "learning_rate": 1.300202014313558e-06, + "loss": 0.2794, + "step": 28586 + }, + { + "epoch": 1.3391577270810886, + "grad_norm": 0.5706897163323391, + "learning_rate": 1.3000356352821408e-06, + "loss": 0.2557, + "step": 28587 + }, + { + "epoch": 1.339204572071017, + "grad_norm": 0.5795882497579065, + "learning_rate": 1.299869263156218e-06, + "loss": 0.2666, + "step": 28588 + }, + { + "epoch": 1.3392514170609453, + "grad_norm": 0.5710721472534779, + "learning_rate": 1.299702897936746e-06, + "loss": 0.2845, + "step": 28589 + }, + { + "epoch": 1.3392982620508738, + "grad_norm": 0.5960002129541495, + "learning_rate": 1.2995365396246812e-06, + "loss": 0.2571, + "step": 28590 + }, + { + "epoch": 1.339345107040802, + "grad_norm": 0.6248983160331356, + "learning_rate": 1.2993701882209814e-06, + "loss": 0.2803, + "step": 28591 + }, + { + "epoch": 1.3393919520307302, + "grad_norm": 0.5731578126437549, + "learning_rate": 1.299203843726605e-06, + "loss": 0.2747, + "step": 28592 + }, + { + "epoch": 1.3394387970206587, + "grad_norm": 0.5652470703496365, + "learning_rate": 1.2990375061425085e-06, + "loss": 0.2502, + "step": 28593 + }, + { + "epoch": 1.339485642010587, + "grad_norm": 0.5528214957139215, + "learning_rate": 1.29887117546965e-06, + "loss": 0.2692, + "step": 28594 + }, + { + "epoch": 1.3395324870005152, + "grad_norm": 0.6333936468831325, + "learning_rate": 1.2987048517089853e-06, + "loss": 0.2834, + "step": 28595 + }, + { + "epoch": 1.3395793319904437, + "grad_norm": 0.6085822280464404, + "learning_rate": 1.2985385348614731e-06, + "loss": 0.2816, + "step": 28596 + }, + { + "epoch": 1.339626176980372, + "grad_norm": 0.5559430890980346, + "learning_rate": 1.298372224928069e-06, + "loss": 0.2594, + "step": 28597 + }, + { + "epoch": 1.3396730219703004, + "grad_norm": 0.5983293146011912, + "learning_rate": 1.2982059219097304e-06, + "loss": 0.2715, + "step": 28598 + }, + { + "epoch": 1.3397198669602286, + "grad_norm": 0.6210448667067205, + "learning_rate": 1.2980396258074146e-06, + "loss": 0.2878, + "step": 28599 + }, + { + "epoch": 1.339766711950157, + "grad_norm": 0.6125163344346969, + "learning_rate": 1.297873336622079e-06, + "loss": 0.2735, + "step": 28600 + }, + { + "epoch": 1.3398135569400853, + "grad_norm": 0.5803128486314857, + "learning_rate": 1.2977070543546807e-06, + "loss": 0.2825, + "step": 28601 + }, + { + "epoch": 1.3398604019300135, + "grad_norm": 0.5766305665702589, + "learning_rate": 1.2975407790061764e-06, + "loss": 0.2838, + "step": 28602 + }, + { + "epoch": 1.339907246919942, + "grad_norm": 0.6076972176860203, + "learning_rate": 1.2973745105775218e-06, + "loss": 0.2744, + "step": 28603 + }, + { + "epoch": 1.3399540919098702, + "grad_norm": 0.6007314925051093, + "learning_rate": 1.2972082490696744e-06, + "loss": 0.271, + "step": 28604 + }, + { + "epoch": 1.3400009368997985, + "grad_norm": 0.6133952869844999, + "learning_rate": 1.2970419944835913e-06, + "loss": 0.2662, + "step": 28605 + }, + { + "epoch": 1.340047781889727, + "grad_norm": 0.5935917507497533, + "learning_rate": 1.2968757468202291e-06, + "loss": 0.2742, + "step": 28606 + }, + { + "epoch": 1.3400946268796552, + "grad_norm": 0.6263987047018705, + "learning_rate": 1.2967095060805456e-06, + "loss": 0.2778, + "step": 28607 + }, + { + "epoch": 1.3401414718695834, + "grad_norm": 0.6052383260026528, + "learning_rate": 1.2965432722654958e-06, + "loss": 0.2656, + "step": 28608 + }, + { + "epoch": 1.340188316859512, + "grad_norm": 0.5967338702080762, + "learning_rate": 1.2963770453760378e-06, + "loss": 0.2697, + "step": 28609 + }, + { + "epoch": 1.3402351618494401, + "grad_norm": 0.5709762890532645, + "learning_rate": 1.296210825413126e-06, + "loss": 0.259, + "step": 28610 + }, + { + "epoch": 1.3402820068393686, + "grad_norm": 0.5967747550123698, + "learning_rate": 1.2960446123777187e-06, + "loss": 0.265, + "step": 28611 + }, + { + "epoch": 1.3403288518292968, + "grad_norm": 0.6080230607118947, + "learning_rate": 1.2958784062707723e-06, + "loss": 0.2678, + "step": 28612 + }, + { + "epoch": 1.3403756968192253, + "grad_norm": 0.6180968734721477, + "learning_rate": 1.2957122070932438e-06, + "loss": 0.2685, + "step": 28613 + }, + { + "epoch": 1.3404225418091535, + "grad_norm": 0.590927258855897, + "learning_rate": 1.2955460148460874e-06, + "loss": 0.279, + "step": 28614 + }, + { + "epoch": 1.3404693867990818, + "grad_norm": 0.5905001916688855, + "learning_rate": 1.2953798295302616e-06, + "loss": 0.2624, + "step": 28615 + }, + { + "epoch": 1.3405162317890102, + "grad_norm": 0.5864494559298555, + "learning_rate": 1.2952136511467228e-06, + "loss": 0.2667, + "step": 28616 + }, + { + "epoch": 1.3405630767789385, + "grad_norm": 0.5717228846181449, + "learning_rate": 1.2950474796964257e-06, + "loss": 0.2693, + "step": 28617 + }, + { + "epoch": 1.3406099217688667, + "grad_norm": 0.5769294930505947, + "learning_rate": 1.2948813151803273e-06, + "loss": 0.277, + "step": 28618 + }, + { + "epoch": 1.3406567667587952, + "grad_norm": 0.6380514195669006, + "learning_rate": 1.2947151575993854e-06, + "loss": 0.2826, + "step": 28619 + }, + { + "epoch": 1.3407036117487234, + "grad_norm": 0.6505089359437155, + "learning_rate": 1.2945490069545536e-06, + "loss": 0.2935, + "step": 28620 + }, + { + "epoch": 1.3407504567386517, + "grad_norm": 0.5581795179317851, + "learning_rate": 1.2943828632467892e-06, + "loss": 0.2562, + "step": 28621 + }, + { + "epoch": 1.3407973017285801, + "grad_norm": 0.662496529715681, + "learning_rate": 1.2942167264770487e-06, + "loss": 0.2657, + "step": 28622 + }, + { + "epoch": 1.3408441467185084, + "grad_norm": 0.5940563471635356, + "learning_rate": 1.2940505966462884e-06, + "loss": 0.2792, + "step": 28623 + }, + { + "epoch": 1.3408909917084368, + "grad_norm": 0.5324471137155604, + "learning_rate": 1.2938844737554635e-06, + "loss": 0.2603, + "step": 28624 + }, + { + "epoch": 1.340937836698365, + "grad_norm": 0.5771244527558642, + "learning_rate": 1.2937183578055307e-06, + "loss": 0.2778, + "step": 28625 + }, + { + "epoch": 1.3409846816882935, + "grad_norm": 0.6491160724850942, + "learning_rate": 1.293552248797445e-06, + "loss": 0.2809, + "step": 28626 + }, + { + "epoch": 1.3410315266782218, + "grad_norm": 0.563956440341517, + "learning_rate": 1.2933861467321627e-06, + "loss": 0.2564, + "step": 28627 + }, + { + "epoch": 1.34107837166815, + "grad_norm": 0.6020234685502612, + "learning_rate": 1.2932200516106398e-06, + "loss": 0.2766, + "step": 28628 + }, + { + "epoch": 1.3411252166580785, + "grad_norm": 0.6042328098980166, + "learning_rate": 1.2930539634338322e-06, + "loss": 0.2704, + "step": 28629 + }, + { + "epoch": 1.3411720616480067, + "grad_norm": 0.6179543474802938, + "learning_rate": 1.2928878822026967e-06, + "loss": 0.2756, + "step": 28630 + }, + { + "epoch": 1.341218906637935, + "grad_norm": 0.5914850518455724, + "learning_rate": 1.292721807918188e-06, + "loss": 0.2582, + "step": 28631 + }, + { + "epoch": 1.3412657516278634, + "grad_norm": 0.5732782652655989, + "learning_rate": 1.2925557405812608e-06, + "loss": 0.2818, + "step": 28632 + }, + { + "epoch": 1.3413125966177917, + "grad_norm": 0.5574530335318071, + "learning_rate": 1.292389680192872e-06, + "loss": 0.2722, + "step": 28633 + }, + { + "epoch": 1.34135944160772, + "grad_norm": 0.6200945154827882, + "learning_rate": 1.292223626753977e-06, + "loss": 0.2812, + "step": 28634 + }, + { + "epoch": 1.3414062865976484, + "grad_norm": 0.5941412173347925, + "learning_rate": 1.2920575802655316e-06, + "loss": 0.272, + "step": 28635 + }, + { + "epoch": 1.3414531315875768, + "grad_norm": 0.604210699944704, + "learning_rate": 1.291891540728491e-06, + "loss": 0.2784, + "step": 28636 + }, + { + "epoch": 1.341499976577505, + "grad_norm": 0.6041117392548451, + "learning_rate": 1.291725508143812e-06, + "loss": 0.2696, + "step": 28637 + }, + { + "epoch": 1.3415468215674333, + "grad_norm": 0.6106587875621201, + "learning_rate": 1.2915594825124488e-06, + "loss": 0.2635, + "step": 28638 + }, + { + "epoch": 1.3415936665573618, + "grad_norm": 0.6023371789698011, + "learning_rate": 1.2913934638353565e-06, + "loss": 0.2661, + "step": 28639 + }, + { + "epoch": 1.34164051154729, + "grad_norm": 0.588700954590226, + "learning_rate": 1.291227452113491e-06, + "loss": 0.2661, + "step": 28640 + }, + { + "epoch": 1.3416873565372183, + "grad_norm": 0.6055799853937454, + "learning_rate": 1.2910614473478072e-06, + "loss": 0.2976, + "step": 28641 + }, + { + "epoch": 1.3417342015271467, + "grad_norm": 0.6128950333810627, + "learning_rate": 1.2908954495392615e-06, + "loss": 0.2705, + "step": 28642 + }, + { + "epoch": 1.341781046517075, + "grad_norm": 0.6022923283413785, + "learning_rate": 1.2907294586888092e-06, + "loss": 0.2831, + "step": 28643 + }, + { + "epoch": 1.3418278915070032, + "grad_norm": 0.5611251720646885, + "learning_rate": 1.2905634747974045e-06, + "loss": 0.2628, + "step": 28644 + }, + { + "epoch": 1.3418747364969317, + "grad_norm": 0.6060209145420058, + "learning_rate": 1.2903974978660034e-06, + "loss": 0.2839, + "step": 28645 + }, + { + "epoch": 1.34192158148686, + "grad_norm": 0.6060136308486547, + "learning_rate": 1.2902315278955595e-06, + "loss": 0.264, + "step": 28646 + }, + { + "epoch": 1.3419684264767884, + "grad_norm": 0.5571867208087404, + "learning_rate": 1.2900655648870298e-06, + "loss": 0.2663, + "step": 28647 + }, + { + "epoch": 1.3420152714667166, + "grad_norm": 0.570210836018471, + "learning_rate": 1.2898996088413684e-06, + "loss": 0.2602, + "step": 28648 + }, + { + "epoch": 1.342062116456645, + "grad_norm": 0.5625901817993398, + "learning_rate": 1.2897336597595306e-06, + "loss": 0.2557, + "step": 28649 + }, + { + "epoch": 1.3421089614465733, + "grad_norm": 0.6013439256953239, + "learning_rate": 1.289567717642472e-06, + "loss": 0.2731, + "step": 28650 + }, + { + "epoch": 1.3421558064365016, + "grad_norm": 0.630584828331849, + "learning_rate": 1.2894017824911476e-06, + "loss": 0.2751, + "step": 28651 + }, + { + "epoch": 1.34220265142643, + "grad_norm": 0.5878807368429848, + "learning_rate": 1.2892358543065104e-06, + "loss": 0.28, + "step": 28652 + }, + { + "epoch": 1.3422494964163583, + "grad_norm": 0.6270573041659971, + "learning_rate": 1.2890699330895163e-06, + "loss": 0.2845, + "step": 28653 + }, + { + "epoch": 1.3422963414062865, + "grad_norm": 0.5907208948359439, + "learning_rate": 1.2889040188411207e-06, + "loss": 0.2805, + "step": 28654 + }, + { + "epoch": 1.342343186396215, + "grad_norm": 0.5876733350459485, + "learning_rate": 1.2887381115622777e-06, + "loss": 0.2727, + "step": 28655 + }, + { + "epoch": 1.3423900313861432, + "grad_norm": 0.6085600053154991, + "learning_rate": 1.288572211253944e-06, + "loss": 0.2879, + "step": 28656 + }, + { + "epoch": 1.3424368763760715, + "grad_norm": 0.5641597887543987, + "learning_rate": 1.288406317917071e-06, + "loss": 0.2699, + "step": 28657 + }, + { + "epoch": 1.342483721366, + "grad_norm": 0.6470710159240495, + "learning_rate": 1.2882404315526168e-06, + "loss": 0.2942, + "step": 28658 + }, + { + "epoch": 1.3425305663559282, + "grad_norm": 0.581790498277443, + "learning_rate": 1.2880745521615328e-06, + "loss": 0.2759, + "step": 28659 + }, + { + "epoch": 1.3425774113458566, + "grad_norm": 0.6012331451357757, + "learning_rate": 1.2879086797447758e-06, + "loss": 0.2722, + "step": 28660 + }, + { + "epoch": 1.3426242563357849, + "grad_norm": 0.6635783192143772, + "learning_rate": 1.2877428143032995e-06, + "loss": 0.2841, + "step": 28661 + }, + { + "epoch": 1.3426711013257133, + "grad_norm": 0.6022563915558388, + "learning_rate": 1.2875769558380592e-06, + "loss": 0.2671, + "step": 28662 + }, + { + "epoch": 1.3427179463156416, + "grad_norm": 0.5741326577033027, + "learning_rate": 1.2874111043500082e-06, + "loss": 0.2792, + "step": 28663 + }, + { + "epoch": 1.3427647913055698, + "grad_norm": 0.6064884873612186, + "learning_rate": 1.2872452598401011e-06, + "loss": 0.283, + "step": 28664 + }, + { + "epoch": 1.3428116362954983, + "grad_norm": 0.6232930556897988, + "learning_rate": 1.2870794223092941e-06, + "loss": 0.2791, + "step": 28665 + }, + { + "epoch": 1.3428584812854265, + "grad_norm": 0.5840925693512686, + "learning_rate": 1.286913591758539e-06, + "loss": 0.2662, + "step": 28666 + }, + { + "epoch": 1.3429053262753547, + "grad_norm": 0.6180899134646405, + "learning_rate": 1.2867477681887913e-06, + "loss": 0.2666, + "step": 28667 + }, + { + "epoch": 1.3429521712652832, + "grad_norm": 0.606755280248863, + "learning_rate": 1.2865819516010065e-06, + "loss": 0.2726, + "step": 28668 + }, + { + "epoch": 1.3429990162552115, + "grad_norm": 0.6231648612002215, + "learning_rate": 1.2864161419961362e-06, + "loss": 0.2832, + "step": 28669 + }, + { + "epoch": 1.3430458612451397, + "grad_norm": 0.613177614352387, + "learning_rate": 1.286250339375136e-06, + "loss": 0.2909, + "step": 28670 + }, + { + "epoch": 1.3430927062350682, + "grad_norm": 0.6159781043442912, + "learning_rate": 1.2860845437389602e-06, + "loss": 0.2784, + "step": 28671 + }, + { + "epoch": 1.3431395512249966, + "grad_norm": 0.6207708446614563, + "learning_rate": 1.2859187550885639e-06, + "loss": 0.2796, + "step": 28672 + }, + { + "epoch": 1.3431863962149249, + "grad_norm": 0.5774072121861484, + "learning_rate": 1.2857529734248986e-06, + "loss": 0.2799, + "step": 28673 + }, + { + "epoch": 1.343233241204853, + "grad_norm": 0.6084078049261011, + "learning_rate": 1.285587198748921e-06, + "loss": 0.301, + "step": 28674 + }, + { + "epoch": 1.3432800861947816, + "grad_norm": 0.6003265302141462, + "learning_rate": 1.285421431061583e-06, + "loss": 0.2697, + "step": 28675 + }, + { + "epoch": 1.3433269311847098, + "grad_norm": 0.5722637200328177, + "learning_rate": 1.2852556703638391e-06, + "loss": 0.252, + "step": 28676 + }, + { + "epoch": 1.343373776174638, + "grad_norm": 0.6045994788971787, + "learning_rate": 1.285089916656644e-06, + "loss": 0.2717, + "step": 28677 + }, + { + "epoch": 1.3434206211645665, + "grad_norm": 0.5893269589735242, + "learning_rate": 1.2849241699409503e-06, + "loss": 0.2791, + "step": 28678 + }, + { + "epoch": 1.3434674661544947, + "grad_norm": 0.5618787687989106, + "learning_rate": 1.284758430217714e-06, + "loss": 0.2498, + "step": 28679 + }, + { + "epoch": 1.343514311144423, + "grad_norm": 0.5812652182046917, + "learning_rate": 1.2845926974878874e-06, + "loss": 0.2699, + "step": 28680 + }, + { + "epoch": 1.3435611561343515, + "grad_norm": 0.5689797522702926, + "learning_rate": 1.2844269717524239e-06, + "loss": 0.2715, + "step": 28681 + }, + { + "epoch": 1.3436080011242797, + "grad_norm": 0.6440023516231437, + "learning_rate": 1.284261253012277e-06, + "loss": 0.3049, + "step": 28682 + }, + { + "epoch": 1.3436548461142082, + "grad_norm": 0.5588509164085572, + "learning_rate": 1.2840955412684014e-06, + "loss": 0.2508, + "step": 28683 + }, + { + "epoch": 1.3437016911041364, + "grad_norm": 0.5994661308532094, + "learning_rate": 1.28392983652175e-06, + "loss": 0.2797, + "step": 28684 + }, + { + "epoch": 1.3437485360940649, + "grad_norm": 0.575478154960199, + "learning_rate": 1.283764138773278e-06, + "loss": 0.2697, + "step": 28685 + }, + { + "epoch": 1.343795381083993, + "grad_norm": 0.563426929716019, + "learning_rate": 1.2835984480239367e-06, + "loss": 0.2666, + "step": 28686 + }, + { + "epoch": 1.3438422260739213, + "grad_norm": 0.55893561985093, + "learning_rate": 1.2834327642746813e-06, + "loss": 0.2689, + "step": 28687 + }, + { + "epoch": 1.3438890710638498, + "grad_norm": 0.5163059718754708, + "learning_rate": 1.2832670875264638e-06, + "loss": 0.2378, + "step": 28688 + }, + { + "epoch": 1.343935916053778, + "grad_norm": 0.609193505511221, + "learning_rate": 1.2831014177802385e-06, + "loss": 0.2925, + "step": 28689 + }, + { + "epoch": 1.3439827610437063, + "grad_norm": 0.606800026277877, + "learning_rate": 1.2829357550369586e-06, + "loss": 0.2783, + "step": 28690 + }, + { + "epoch": 1.3440296060336347, + "grad_norm": 0.6052002184390458, + "learning_rate": 1.2827700992975772e-06, + "loss": 0.2703, + "step": 28691 + }, + { + "epoch": 1.344076451023563, + "grad_norm": 0.6028699826036703, + "learning_rate": 1.2826044505630495e-06, + "loss": 0.2816, + "step": 28692 + }, + { + "epoch": 1.3441232960134912, + "grad_norm": 0.6213451335299749, + "learning_rate": 1.2824388088343268e-06, + "loss": 0.2892, + "step": 28693 + }, + { + "epoch": 1.3441701410034197, + "grad_norm": 0.5430780155418362, + "learning_rate": 1.2822731741123618e-06, + "loss": 0.2518, + "step": 28694 + }, + { + "epoch": 1.344216985993348, + "grad_norm": 0.5876268570392783, + "learning_rate": 1.2821075463981087e-06, + "loss": 0.281, + "step": 28695 + }, + { + "epoch": 1.3442638309832764, + "grad_norm": 0.6093110822774352, + "learning_rate": 1.281941925692521e-06, + "loss": 0.2795, + "step": 28696 + }, + { + "epoch": 1.3443106759732046, + "grad_norm": 0.5774344564453707, + "learning_rate": 1.2817763119965505e-06, + "loss": 0.2653, + "step": 28697 + }, + { + "epoch": 1.344357520963133, + "grad_norm": 0.5881751357815573, + "learning_rate": 1.2816107053111526e-06, + "loss": 0.26, + "step": 28698 + }, + { + "epoch": 1.3444043659530613, + "grad_norm": 0.6830527148838588, + "learning_rate": 1.281445105637278e-06, + "loss": 0.2876, + "step": 28699 + }, + { + "epoch": 1.3444512109429896, + "grad_norm": 0.584671183217539, + "learning_rate": 1.2812795129758812e-06, + "loss": 0.2569, + "step": 28700 + }, + { + "epoch": 1.344498055932918, + "grad_norm": 0.6136931441101683, + "learning_rate": 1.281113927327914e-06, + "loss": 0.2918, + "step": 28701 + }, + { + "epoch": 1.3445449009228463, + "grad_norm": 0.5982988268584575, + "learning_rate": 1.2809483486943291e-06, + "loss": 0.276, + "step": 28702 + }, + { + "epoch": 1.3445917459127745, + "grad_norm": 0.5930535070081673, + "learning_rate": 1.2807827770760805e-06, + "loss": 0.2602, + "step": 28703 + }, + { + "epoch": 1.344638590902703, + "grad_norm": 0.6411056420510514, + "learning_rate": 1.2806172124741214e-06, + "loss": 0.2965, + "step": 28704 + }, + { + "epoch": 1.3446854358926312, + "grad_norm": 0.6030605782945991, + "learning_rate": 1.280451654889403e-06, + "loss": 0.2801, + "step": 28705 + }, + { + "epoch": 1.3447322808825595, + "grad_norm": 0.5544349618865506, + "learning_rate": 1.2802861043228786e-06, + "loss": 0.2555, + "step": 28706 + }, + { + "epoch": 1.344779125872488, + "grad_norm": 0.5554753558827698, + "learning_rate": 1.2801205607755022e-06, + "loss": 0.2534, + "step": 28707 + }, + { + "epoch": 1.3448259708624164, + "grad_norm": 0.6142767559495599, + "learning_rate": 1.2799550242482242e-06, + "loss": 0.2861, + "step": 28708 + }, + { + "epoch": 1.3448728158523446, + "grad_norm": 0.5720564515431753, + "learning_rate": 1.2797894947419986e-06, + "loss": 0.268, + "step": 28709 + }, + { + "epoch": 1.3449196608422729, + "grad_norm": 0.6137504435465746, + "learning_rate": 1.2796239722577775e-06, + "loss": 0.2707, + "step": 28710 + }, + { + "epoch": 1.3449665058322013, + "grad_norm": 0.6575628951799584, + "learning_rate": 1.2794584567965151e-06, + "loss": 0.2872, + "step": 28711 + }, + { + "epoch": 1.3450133508221296, + "grad_norm": 0.5970523713940236, + "learning_rate": 1.2792929483591613e-06, + "loss": 0.2761, + "step": 28712 + }, + { + "epoch": 1.3450601958120578, + "grad_norm": 0.5746742672355029, + "learning_rate": 1.2791274469466696e-06, + "loss": 0.2644, + "step": 28713 + }, + { + "epoch": 1.3451070408019863, + "grad_norm": 0.5758425140991562, + "learning_rate": 1.2789619525599938e-06, + "loss": 0.2606, + "step": 28714 + }, + { + "epoch": 1.3451538857919145, + "grad_norm": 0.6250532458821655, + "learning_rate": 1.278796465200084e-06, + "loss": 0.2841, + "step": 28715 + }, + { + "epoch": 1.3452007307818428, + "grad_norm": 0.5974754440730263, + "learning_rate": 1.2786309848678938e-06, + "loss": 0.2763, + "step": 28716 + }, + { + "epoch": 1.3452475757717712, + "grad_norm": 0.5909135796048216, + "learning_rate": 1.278465511564376e-06, + "loss": 0.2688, + "step": 28717 + }, + { + "epoch": 1.3452944207616995, + "grad_norm": 0.5830568905110786, + "learning_rate": 1.2783000452904814e-06, + "loss": 0.2639, + "step": 28718 + }, + { + "epoch": 1.345341265751628, + "grad_norm": 0.5636704446048252, + "learning_rate": 1.2781345860471628e-06, + "loss": 0.2746, + "step": 28719 + }, + { + "epoch": 1.3453881107415562, + "grad_norm": 0.5751698352014526, + "learning_rate": 1.2779691338353727e-06, + "loss": 0.2706, + "step": 28720 + }, + { + "epoch": 1.3454349557314846, + "grad_norm": 0.5865991005696979, + "learning_rate": 1.277803688656064e-06, + "loss": 0.2763, + "step": 28721 + }, + { + "epoch": 1.3454818007214129, + "grad_norm": 0.6091117060986208, + "learning_rate": 1.277638250510187e-06, + "loss": 0.267, + "step": 28722 + }, + { + "epoch": 1.3455286457113411, + "grad_norm": 0.6264918442019026, + "learning_rate": 1.2774728193986953e-06, + "loss": 0.2827, + "step": 28723 + }, + { + "epoch": 1.3455754907012696, + "grad_norm": 0.5463775191936153, + "learning_rate": 1.2773073953225396e-06, + "loss": 0.2441, + "step": 28724 + }, + { + "epoch": 1.3456223356911978, + "grad_norm": 0.573970823686965, + "learning_rate": 1.277141978282672e-06, + "loss": 0.2655, + "step": 28725 + }, + { + "epoch": 1.345669180681126, + "grad_norm": 0.6147493065906556, + "learning_rate": 1.2769765682800455e-06, + "loss": 0.2615, + "step": 28726 + }, + { + "epoch": 1.3457160256710545, + "grad_norm": 0.6211001751465357, + "learning_rate": 1.2768111653156118e-06, + "loss": 0.2743, + "step": 28727 + }, + { + "epoch": 1.3457628706609828, + "grad_norm": 0.6047065437309422, + "learning_rate": 1.2766457693903228e-06, + "loss": 0.2728, + "step": 28728 + }, + { + "epoch": 1.345809715650911, + "grad_norm": 0.6284148121471494, + "learning_rate": 1.2764803805051302e-06, + "loss": 0.2731, + "step": 28729 + }, + { + "epoch": 1.3458565606408395, + "grad_norm": 0.5643007037818848, + "learning_rate": 1.276314998660984e-06, + "loss": 0.262, + "step": 28730 + }, + { + "epoch": 1.3459034056307677, + "grad_norm": 0.6246411310864927, + "learning_rate": 1.2761496238588377e-06, + "loss": 0.2842, + "step": 28731 + }, + { + "epoch": 1.3459502506206962, + "grad_norm": 0.562883614436771, + "learning_rate": 1.275984256099643e-06, + "loss": 0.2651, + "step": 28732 + }, + { + "epoch": 1.3459970956106244, + "grad_norm": 0.5733543435015315, + "learning_rate": 1.275818895384351e-06, + "loss": 0.2679, + "step": 28733 + }, + { + "epoch": 1.3460439406005529, + "grad_norm": 0.5900201382664215, + "learning_rate": 1.2756535417139146e-06, + "loss": 0.2622, + "step": 28734 + }, + { + "epoch": 1.3460907855904811, + "grad_norm": 0.58566533781895, + "learning_rate": 1.2754881950892834e-06, + "loss": 0.2669, + "step": 28735 + }, + { + "epoch": 1.3461376305804094, + "grad_norm": 0.662505206174523, + "learning_rate": 1.2753228555114109e-06, + "loss": 0.28, + "step": 28736 + }, + { + "epoch": 1.3461844755703378, + "grad_norm": 0.5679281797456612, + "learning_rate": 1.2751575229812463e-06, + "loss": 0.271, + "step": 28737 + }, + { + "epoch": 1.346231320560266, + "grad_norm": 0.5525957473068087, + "learning_rate": 1.2749921974997425e-06, + "loss": 0.2709, + "step": 28738 + }, + { + "epoch": 1.3462781655501943, + "grad_norm": 0.5865857579407566, + "learning_rate": 1.2748268790678508e-06, + "loss": 0.2699, + "step": 28739 + }, + { + "epoch": 1.3463250105401228, + "grad_norm": 0.6378469973284768, + "learning_rate": 1.2746615676865224e-06, + "loss": 0.2829, + "step": 28740 + }, + { + "epoch": 1.346371855530051, + "grad_norm": 0.592705632948997, + "learning_rate": 1.2744962633567096e-06, + "loss": 0.2598, + "step": 28741 + }, + { + "epoch": 1.3464187005199793, + "grad_norm": 0.5749527200415776, + "learning_rate": 1.2743309660793629e-06, + "loss": 0.2688, + "step": 28742 + }, + { + "epoch": 1.3464655455099077, + "grad_norm": 0.6212031418048303, + "learning_rate": 1.2741656758554322e-06, + "loss": 0.275, + "step": 28743 + }, + { + "epoch": 1.3465123904998362, + "grad_norm": 0.5717052949424862, + "learning_rate": 1.2740003926858703e-06, + "loss": 0.2787, + "step": 28744 + }, + { + "epoch": 1.3465592354897644, + "grad_norm": 0.5769620746193417, + "learning_rate": 1.273835116571628e-06, + "loss": 0.2531, + "step": 28745 + }, + { + "epoch": 1.3466060804796927, + "grad_norm": 0.611408460406283, + "learning_rate": 1.273669847513656e-06, + "loss": 0.2714, + "step": 28746 + }, + { + "epoch": 1.3466529254696211, + "grad_norm": 0.5819379622859899, + "learning_rate": 1.2735045855129068e-06, + "loss": 0.2732, + "step": 28747 + }, + { + "epoch": 1.3466997704595494, + "grad_norm": 0.554507260162585, + "learning_rate": 1.2733393305703298e-06, + "loss": 0.2598, + "step": 28748 + }, + { + "epoch": 1.3467466154494776, + "grad_norm": 0.581184729223013, + "learning_rate": 1.2731740826868776e-06, + "loss": 0.2665, + "step": 28749 + }, + { + "epoch": 1.346793460439406, + "grad_norm": 0.5961150269207328, + "learning_rate": 1.273008841863499e-06, + "loss": 0.2735, + "step": 28750 + }, + { + "epoch": 1.3468403054293343, + "grad_norm": 0.602719629095094, + "learning_rate": 1.2728436081011464e-06, + "loss": 0.2804, + "step": 28751 + }, + { + "epoch": 1.3468871504192625, + "grad_norm": 0.5786785383784256, + "learning_rate": 1.2726783814007702e-06, + "loss": 0.2729, + "step": 28752 + }, + { + "epoch": 1.346933995409191, + "grad_norm": 0.6291597234843672, + "learning_rate": 1.2725131617633225e-06, + "loss": 0.2838, + "step": 28753 + }, + { + "epoch": 1.3469808403991193, + "grad_norm": 0.5966281739678908, + "learning_rate": 1.272347949189752e-06, + "loss": 0.282, + "step": 28754 + }, + { + "epoch": 1.3470276853890477, + "grad_norm": 0.5872810888141682, + "learning_rate": 1.2721827436810103e-06, + "loss": 0.2739, + "step": 28755 + }, + { + "epoch": 1.347074530378976, + "grad_norm": 0.5539454536411065, + "learning_rate": 1.2720175452380496e-06, + "loss": 0.2567, + "step": 28756 + }, + { + "epoch": 1.3471213753689044, + "grad_norm": 0.6062435565455973, + "learning_rate": 1.2718523538618182e-06, + "loss": 0.2817, + "step": 28757 + }, + { + "epoch": 1.3471682203588327, + "grad_norm": 0.6251456873000477, + "learning_rate": 1.271687169553268e-06, + "loss": 0.2868, + "step": 28758 + }, + { + "epoch": 1.347215065348761, + "grad_norm": 0.5882287681576306, + "learning_rate": 1.27152199231335e-06, + "loss": 0.2751, + "step": 28759 + }, + { + "epoch": 1.3472619103386894, + "grad_norm": 0.6165079493462233, + "learning_rate": 1.2713568221430133e-06, + "loss": 0.272, + "step": 28760 + }, + { + "epoch": 1.3473087553286176, + "grad_norm": 0.6449095881541741, + "learning_rate": 1.2711916590432096e-06, + "loss": 0.2848, + "step": 28761 + }, + { + "epoch": 1.3473556003185458, + "grad_norm": 0.6020714497129324, + "learning_rate": 1.2710265030148888e-06, + "loss": 0.2763, + "step": 28762 + }, + { + "epoch": 1.3474024453084743, + "grad_norm": 0.6101209141464011, + "learning_rate": 1.2708613540590026e-06, + "loss": 0.2647, + "step": 28763 + }, + { + "epoch": 1.3474492902984025, + "grad_norm": 0.6344699844848186, + "learning_rate": 1.2706962121764992e-06, + "loss": 0.2774, + "step": 28764 + }, + { + "epoch": 1.3474961352883308, + "grad_norm": 0.6078659662536848, + "learning_rate": 1.2705310773683316e-06, + "loss": 0.2772, + "step": 28765 + }, + { + "epoch": 1.3475429802782593, + "grad_norm": 0.6015977110263798, + "learning_rate": 1.2703659496354475e-06, + "loss": 0.2697, + "step": 28766 + }, + { + "epoch": 1.3475898252681875, + "grad_norm": 0.5609148445298037, + "learning_rate": 1.2702008289787982e-06, + "loss": 0.2605, + "step": 28767 + }, + { + "epoch": 1.347636670258116, + "grad_norm": 0.6197647584968332, + "learning_rate": 1.2700357153993337e-06, + "loss": 0.2931, + "step": 28768 + }, + { + "epoch": 1.3476835152480442, + "grad_norm": 0.5690148322723806, + "learning_rate": 1.269870608898005e-06, + "loss": 0.2663, + "step": 28769 + }, + { + "epoch": 1.3477303602379727, + "grad_norm": 0.5397291739649626, + "learning_rate": 1.2697055094757626e-06, + "loss": 0.2489, + "step": 28770 + }, + { + "epoch": 1.347777205227901, + "grad_norm": 0.5692270148953845, + "learning_rate": 1.2695404171335558e-06, + "loss": 0.2634, + "step": 28771 + }, + { + "epoch": 1.3478240502178291, + "grad_norm": 0.6264141167747462, + "learning_rate": 1.2693753318723335e-06, + "loss": 0.2648, + "step": 28772 + }, + { + "epoch": 1.3478708952077576, + "grad_norm": 0.597259683356245, + "learning_rate": 1.2692102536930467e-06, + "loss": 0.2777, + "step": 28773 + }, + { + "epoch": 1.3479177401976858, + "grad_norm": 0.5864887797856013, + "learning_rate": 1.269045182596646e-06, + "loss": 0.2752, + "step": 28774 + }, + { + "epoch": 1.347964585187614, + "grad_norm": 0.5766885854555018, + "learning_rate": 1.2688801185840808e-06, + "loss": 0.2681, + "step": 28775 + }, + { + "epoch": 1.3480114301775425, + "grad_norm": 0.6028024028425569, + "learning_rate": 1.268715061656301e-06, + "loss": 0.2738, + "step": 28776 + }, + { + "epoch": 1.3480582751674708, + "grad_norm": 0.5942751791481762, + "learning_rate": 1.2685500118142575e-06, + "loss": 0.2749, + "step": 28777 + }, + { + "epoch": 1.348105120157399, + "grad_norm": 0.5933906946965698, + "learning_rate": 1.2683849690588989e-06, + "loss": 0.2684, + "step": 28778 + }, + { + "epoch": 1.3481519651473275, + "grad_norm": 0.6236280723137432, + "learning_rate": 1.2682199333911746e-06, + "loss": 0.2773, + "step": 28779 + }, + { + "epoch": 1.348198810137256, + "grad_norm": 0.6100871940542117, + "learning_rate": 1.268054904812035e-06, + "loss": 0.2722, + "step": 28780 + }, + { + "epoch": 1.3482456551271842, + "grad_norm": 0.605542782845018, + "learning_rate": 1.2678898833224297e-06, + "loss": 0.283, + "step": 28781 + }, + { + "epoch": 1.3482925001171124, + "grad_norm": 0.6061520112658403, + "learning_rate": 1.2677248689233084e-06, + "loss": 0.2725, + "step": 28782 + }, + { + "epoch": 1.348339345107041, + "grad_norm": 0.5744530439744702, + "learning_rate": 1.2675598616156218e-06, + "loss": 0.2696, + "step": 28783 + }, + { + "epoch": 1.3483861900969691, + "grad_norm": 0.5809591403311558, + "learning_rate": 1.2673948614003173e-06, + "loss": 0.2617, + "step": 28784 + }, + { + "epoch": 1.3484330350868974, + "grad_norm": 0.5555139093551348, + "learning_rate": 1.2672298682783467e-06, + "loss": 0.2705, + "step": 28785 + }, + { + "epoch": 1.3484798800768258, + "grad_norm": 0.5999649870901087, + "learning_rate": 1.2670648822506576e-06, + "loss": 0.2766, + "step": 28786 + }, + { + "epoch": 1.348526725066754, + "grad_norm": 0.6092930242281686, + "learning_rate": 1.2668999033181996e-06, + "loss": 0.2845, + "step": 28787 + }, + { + "epoch": 1.3485735700566823, + "grad_norm": 0.61322513493941, + "learning_rate": 1.2667349314819233e-06, + "loss": 0.2902, + "step": 28788 + }, + { + "epoch": 1.3486204150466108, + "grad_norm": 0.6344962082642042, + "learning_rate": 1.2665699667427771e-06, + "loss": 0.2788, + "step": 28789 + }, + { + "epoch": 1.348667260036539, + "grad_norm": 0.5804378726818219, + "learning_rate": 1.266405009101712e-06, + "loss": 0.271, + "step": 28790 + }, + { + "epoch": 1.3487141050264675, + "grad_norm": 0.6207112362552234, + "learning_rate": 1.2662400585596763e-06, + "loss": 0.2869, + "step": 28791 + }, + { + "epoch": 1.3487609500163957, + "grad_norm": 0.5857280775599105, + "learning_rate": 1.2660751151176176e-06, + "loss": 0.2669, + "step": 28792 + }, + { + "epoch": 1.3488077950063242, + "grad_norm": 0.6033015819552209, + "learning_rate": 1.2659101787764862e-06, + "loss": 0.2717, + "step": 28793 + }, + { + "epoch": 1.3488546399962524, + "grad_norm": 0.6014284266444685, + "learning_rate": 1.2657452495372319e-06, + "loss": 0.2748, + "step": 28794 + }, + { + "epoch": 1.3489014849861807, + "grad_norm": 0.556971787344387, + "learning_rate": 1.2655803274008036e-06, + "loss": 0.272, + "step": 28795 + }, + { + "epoch": 1.3489483299761091, + "grad_norm": 0.5566981782541163, + "learning_rate": 1.2654154123681512e-06, + "loss": 0.252, + "step": 28796 + }, + { + "epoch": 1.3489951749660374, + "grad_norm": 0.6036923292699126, + "learning_rate": 1.2652505044402215e-06, + "loss": 0.2736, + "step": 28797 + }, + { + "epoch": 1.3490420199559656, + "grad_norm": 0.6151241486648885, + "learning_rate": 1.265085603617966e-06, + "loss": 0.2802, + "step": 28798 + }, + { + "epoch": 1.349088864945894, + "grad_norm": 0.552388735523642, + "learning_rate": 1.2649207099023315e-06, + "loss": 0.2713, + "step": 28799 + }, + { + "epoch": 1.3491357099358223, + "grad_norm": 0.583893304514464, + "learning_rate": 1.2647558232942674e-06, + "loss": 0.2627, + "step": 28800 + }, + { + "epoch": 1.3491825549257506, + "grad_norm": 0.6276095216474576, + "learning_rate": 1.2645909437947238e-06, + "loss": 0.2773, + "step": 28801 + }, + { + "epoch": 1.349229399915679, + "grad_norm": 0.655461839804451, + "learning_rate": 1.2644260714046496e-06, + "loss": 0.284, + "step": 28802 + }, + { + "epoch": 1.3492762449056073, + "grad_norm": 0.61122405519163, + "learning_rate": 1.2642612061249916e-06, + "loss": 0.2748, + "step": 28803 + }, + { + "epoch": 1.3493230898955357, + "grad_norm": 0.6003840485628225, + "learning_rate": 1.2640963479566999e-06, + "loss": 0.2689, + "step": 28804 + }, + { + "epoch": 1.349369934885464, + "grad_norm": 0.5678201690267503, + "learning_rate": 1.263931496900724e-06, + "loss": 0.2648, + "step": 28805 + }, + { + "epoch": 1.3494167798753924, + "grad_norm": 0.5784544318850243, + "learning_rate": 1.2637666529580108e-06, + "loss": 0.2768, + "step": 28806 + }, + { + "epoch": 1.3494636248653207, + "grad_norm": 0.593822204571201, + "learning_rate": 1.26360181612951e-06, + "loss": 0.2804, + "step": 28807 + }, + { + "epoch": 1.349510469855249, + "grad_norm": 0.6190374569089692, + "learning_rate": 1.2634369864161705e-06, + "loss": 0.2815, + "step": 28808 + }, + { + "epoch": 1.3495573148451774, + "grad_norm": 0.5858996363680441, + "learning_rate": 1.2632721638189394e-06, + "loss": 0.2772, + "step": 28809 + }, + { + "epoch": 1.3496041598351056, + "grad_norm": 0.5766570148652883, + "learning_rate": 1.2631073483387666e-06, + "loss": 0.2648, + "step": 28810 + }, + { + "epoch": 1.3496510048250339, + "grad_norm": 0.5451705603940835, + "learning_rate": 1.2629425399765998e-06, + "loss": 0.2603, + "step": 28811 + }, + { + "epoch": 1.3496978498149623, + "grad_norm": 0.6296784632572928, + "learning_rate": 1.2627777387333886e-06, + "loss": 0.2836, + "step": 28812 + }, + { + "epoch": 1.3497446948048906, + "grad_norm": 0.6163424687641228, + "learning_rate": 1.2626129446100799e-06, + "loss": 0.2727, + "step": 28813 + }, + { + "epoch": 1.3497915397948188, + "grad_norm": 0.586881857350277, + "learning_rate": 1.2624481576076236e-06, + "loss": 0.2861, + "step": 28814 + }, + { + "epoch": 1.3498383847847473, + "grad_norm": 0.5928935002419649, + "learning_rate": 1.2622833777269662e-06, + "loss": 0.269, + "step": 28815 + }, + { + "epoch": 1.3498852297746757, + "grad_norm": 0.5592823725571054, + "learning_rate": 1.2621186049690564e-06, + "loss": 0.261, + "step": 28816 + }, + { + "epoch": 1.349932074764604, + "grad_norm": 0.5576366640575017, + "learning_rate": 1.2619538393348435e-06, + "loss": 0.2588, + "step": 28817 + }, + { + "epoch": 1.3499789197545322, + "grad_norm": 0.5967440724587505, + "learning_rate": 1.2617890808252747e-06, + "loss": 0.2713, + "step": 28818 + }, + { + "epoch": 1.3500257647444607, + "grad_norm": 0.6270730444839945, + "learning_rate": 1.2616243294412997e-06, + "loss": 0.2876, + "step": 28819 + }, + { + "epoch": 1.350072609734389, + "grad_norm": 0.6551777927375947, + "learning_rate": 1.2614595851838652e-06, + "loss": 0.2735, + "step": 28820 + }, + { + "epoch": 1.3501194547243172, + "grad_norm": 0.621928198866179, + "learning_rate": 1.261294848053919e-06, + "loss": 0.279, + "step": 28821 + }, + { + "epoch": 1.3501662997142456, + "grad_norm": 0.5675850239672734, + "learning_rate": 1.2611301180524092e-06, + "loss": 0.2584, + "step": 28822 + }, + { + "epoch": 1.3502131447041739, + "grad_norm": 0.6175339550946318, + "learning_rate": 1.2609653951802842e-06, + "loss": 0.2698, + "step": 28823 + }, + { + "epoch": 1.350259989694102, + "grad_norm": 0.5910194911100208, + "learning_rate": 1.260800679438492e-06, + "loss": 0.2721, + "step": 28824 + }, + { + "epoch": 1.3503068346840306, + "grad_norm": 0.5768213987420021, + "learning_rate": 1.2606359708279814e-06, + "loss": 0.2743, + "step": 28825 + }, + { + "epoch": 1.3503536796739588, + "grad_norm": 0.6027856795217137, + "learning_rate": 1.2604712693496985e-06, + "loss": 0.2698, + "step": 28826 + }, + { + "epoch": 1.3504005246638873, + "grad_norm": 0.5793251128284762, + "learning_rate": 1.2603065750045927e-06, + "loss": 0.2731, + "step": 28827 + }, + { + "epoch": 1.3504473696538155, + "grad_norm": 0.5905806838033221, + "learning_rate": 1.26014188779361e-06, + "loss": 0.2973, + "step": 28828 + }, + { + "epoch": 1.350494214643744, + "grad_norm": 0.6185737204191154, + "learning_rate": 1.259977207717699e-06, + "loss": 0.2703, + "step": 28829 + }, + { + "epoch": 1.3505410596336722, + "grad_norm": 0.6136422758978824, + "learning_rate": 1.2598125347778076e-06, + "loss": 0.2938, + "step": 28830 + }, + { + "epoch": 1.3505879046236005, + "grad_norm": 0.5971487264861737, + "learning_rate": 1.2596478689748834e-06, + "loss": 0.2881, + "step": 28831 + }, + { + "epoch": 1.350634749613529, + "grad_norm": 0.6394409645966107, + "learning_rate": 1.2594832103098748e-06, + "loss": 0.2836, + "step": 28832 + }, + { + "epoch": 1.3506815946034572, + "grad_norm": 0.6012484581791677, + "learning_rate": 1.2593185587837286e-06, + "loss": 0.2853, + "step": 28833 + }, + { + "epoch": 1.3507284395933854, + "grad_norm": 0.651043609815336, + "learning_rate": 1.2591539143973913e-06, + "loss": 0.2993, + "step": 28834 + }, + { + "epoch": 1.3507752845833139, + "grad_norm": 0.6417446796446062, + "learning_rate": 1.2589892771518115e-06, + "loss": 0.2948, + "step": 28835 + }, + { + "epoch": 1.350822129573242, + "grad_norm": 0.56229340900354, + "learning_rate": 1.2588246470479364e-06, + "loss": 0.256, + "step": 28836 + }, + { + "epoch": 1.3508689745631703, + "grad_norm": 0.5866161294260189, + "learning_rate": 1.2586600240867137e-06, + "loss": 0.2654, + "step": 28837 + }, + { + "epoch": 1.3509158195530988, + "grad_norm": 0.5755570051265402, + "learning_rate": 1.258495408269091e-06, + "loss": 0.2831, + "step": 28838 + }, + { + "epoch": 1.350962664543027, + "grad_norm": 0.5498740910591091, + "learning_rate": 1.2583307995960148e-06, + "loss": 0.2497, + "step": 28839 + }, + { + "epoch": 1.3510095095329555, + "grad_norm": 0.6130644012606192, + "learning_rate": 1.2581661980684333e-06, + "loss": 0.2828, + "step": 28840 + }, + { + "epoch": 1.3510563545228838, + "grad_norm": 0.6019334742739118, + "learning_rate": 1.2580016036872921e-06, + "loss": 0.2798, + "step": 28841 + }, + { + "epoch": 1.3511031995128122, + "grad_norm": 0.647582469861163, + "learning_rate": 1.25783701645354e-06, + "loss": 0.2825, + "step": 28842 + }, + { + "epoch": 1.3511500445027405, + "grad_norm": 0.6220218189792898, + "learning_rate": 1.2576724363681237e-06, + "loss": 0.2749, + "step": 28843 + }, + { + "epoch": 1.3511968894926687, + "grad_norm": 0.6009614655415426, + "learning_rate": 1.257507863431991e-06, + "loss": 0.2742, + "step": 28844 + }, + { + "epoch": 1.3512437344825972, + "grad_norm": 0.5656662968051349, + "learning_rate": 1.257343297646087e-06, + "loss": 0.2526, + "step": 28845 + }, + { + "epoch": 1.3512905794725254, + "grad_norm": 0.6074564752057577, + "learning_rate": 1.2571787390113605e-06, + "loss": 0.267, + "step": 28846 + }, + { + "epoch": 1.3513374244624536, + "grad_norm": 0.5816392042978057, + "learning_rate": 1.2570141875287587e-06, + "loss": 0.2742, + "step": 28847 + }, + { + "epoch": 1.351384269452382, + "grad_norm": 0.5930724047428065, + "learning_rate": 1.2568496431992266e-06, + "loss": 0.2726, + "step": 28848 + }, + { + "epoch": 1.3514311144423103, + "grad_norm": 0.6296469092313888, + "learning_rate": 1.2566851060237128e-06, + "loss": 0.2893, + "step": 28849 + }, + { + "epoch": 1.3514779594322386, + "grad_norm": 0.5594256002040087, + "learning_rate": 1.2565205760031637e-06, + "loss": 0.2748, + "step": 28850 + }, + { + "epoch": 1.351524804422167, + "grad_norm": 0.5897212300721009, + "learning_rate": 1.2563560531385272e-06, + "loss": 0.2726, + "step": 28851 + }, + { + "epoch": 1.3515716494120955, + "grad_norm": 0.587300232864941, + "learning_rate": 1.2561915374307476e-06, + "loss": 0.2754, + "step": 28852 + }, + { + "epoch": 1.3516184944020238, + "grad_norm": 0.6380314230626933, + "learning_rate": 1.2560270288807736e-06, + "loss": 0.2796, + "step": 28853 + }, + { + "epoch": 1.351665339391952, + "grad_norm": 0.6235019055378033, + "learning_rate": 1.255862527489552e-06, + "loss": 0.2841, + "step": 28854 + }, + { + "epoch": 1.3517121843818805, + "grad_norm": 0.5954297331110976, + "learning_rate": 1.255698033258028e-06, + "loss": 0.264, + "step": 28855 + }, + { + "epoch": 1.3517590293718087, + "grad_norm": 0.5594945074654005, + "learning_rate": 1.255533546187149e-06, + "loss": 0.2673, + "step": 28856 + }, + { + "epoch": 1.351805874361737, + "grad_norm": 0.6301814956655483, + "learning_rate": 1.255369066277863e-06, + "loss": 0.2648, + "step": 28857 + }, + { + "epoch": 1.3518527193516654, + "grad_norm": 0.61395934349696, + "learning_rate": 1.2552045935311135e-06, + "loss": 0.2789, + "step": 28858 + }, + { + "epoch": 1.3518995643415936, + "grad_norm": 0.6131437993130425, + "learning_rate": 1.2550401279478492e-06, + "loss": 0.2767, + "step": 28859 + }, + { + "epoch": 1.3519464093315219, + "grad_norm": 0.5890179100557908, + "learning_rate": 1.2548756695290159e-06, + "loss": 0.2771, + "step": 28860 + }, + { + "epoch": 1.3519932543214503, + "grad_norm": 0.560468445921004, + "learning_rate": 1.254711218275561e-06, + "loss": 0.2671, + "step": 28861 + }, + { + "epoch": 1.3520400993113786, + "grad_norm": 0.6059467154503473, + "learning_rate": 1.2545467741884295e-06, + "loss": 0.2683, + "step": 28862 + }, + { + "epoch": 1.352086944301307, + "grad_norm": 0.5682616762790254, + "learning_rate": 1.2543823372685694e-06, + "loss": 0.2482, + "step": 28863 + }, + { + "epoch": 1.3521337892912353, + "grad_norm": 0.6113461637617631, + "learning_rate": 1.2542179075169241e-06, + "loss": 0.2777, + "step": 28864 + }, + { + "epoch": 1.3521806342811638, + "grad_norm": 0.5622247150668531, + "learning_rate": 1.2540534849344421e-06, + "loss": 0.2634, + "step": 28865 + }, + { + "epoch": 1.352227479271092, + "grad_norm": 0.5686836705062365, + "learning_rate": 1.2538890695220692e-06, + "loss": 0.2557, + "step": 28866 + }, + { + "epoch": 1.3522743242610202, + "grad_norm": 0.5540245207630227, + "learning_rate": 1.2537246612807513e-06, + "loss": 0.2615, + "step": 28867 + }, + { + "epoch": 1.3523211692509487, + "grad_norm": 0.6201782908149333, + "learning_rate": 1.2535602602114362e-06, + "loss": 0.2646, + "step": 28868 + }, + { + "epoch": 1.352368014240877, + "grad_norm": 0.6019402615006303, + "learning_rate": 1.253395866315068e-06, + "loss": 0.2743, + "step": 28869 + }, + { + "epoch": 1.3524148592308052, + "grad_norm": 0.5937634366076731, + "learning_rate": 1.2532314795925924e-06, + "loss": 0.2783, + "step": 28870 + }, + { + "epoch": 1.3524617042207336, + "grad_norm": 0.5948825837428326, + "learning_rate": 1.2530671000449563e-06, + "loss": 0.2785, + "step": 28871 + }, + { + "epoch": 1.3525085492106619, + "grad_norm": 0.5684800248890934, + "learning_rate": 1.2529027276731057e-06, + "loss": 0.2822, + "step": 28872 + }, + { + "epoch": 1.3525553942005901, + "grad_norm": 0.6098280498983107, + "learning_rate": 1.2527383624779865e-06, + "loss": 0.2815, + "step": 28873 + }, + { + "epoch": 1.3526022391905186, + "grad_norm": 0.5938647479022552, + "learning_rate": 1.2525740044605454e-06, + "loss": 0.2706, + "step": 28874 + }, + { + "epoch": 1.3526490841804468, + "grad_norm": 0.5894660547328793, + "learning_rate": 1.2524096536217262e-06, + "loss": 0.2822, + "step": 28875 + }, + { + "epoch": 1.3526959291703753, + "grad_norm": 0.6296313792525003, + "learning_rate": 1.252245309962477e-06, + "loss": 0.2865, + "step": 28876 + }, + { + "epoch": 1.3527427741603035, + "grad_norm": 0.6264213824123828, + "learning_rate": 1.2520809734837415e-06, + "loss": 0.2776, + "step": 28877 + }, + { + "epoch": 1.352789619150232, + "grad_norm": 0.5716051772995067, + "learning_rate": 1.2519166441864667e-06, + "loss": 0.2716, + "step": 28878 + }, + { + "epoch": 1.3528364641401602, + "grad_norm": 0.5897802654961616, + "learning_rate": 1.2517523220715974e-06, + "loss": 0.2748, + "step": 28879 + }, + { + "epoch": 1.3528833091300885, + "grad_norm": 0.6095555271880807, + "learning_rate": 1.2515880071400798e-06, + "loss": 0.2773, + "step": 28880 + }, + { + "epoch": 1.352930154120017, + "grad_norm": 0.5807476915786838, + "learning_rate": 1.2514236993928608e-06, + "loss": 0.2819, + "step": 28881 + }, + { + "epoch": 1.3529769991099452, + "grad_norm": 0.6087177221003993, + "learning_rate": 1.2512593988308841e-06, + "loss": 0.2874, + "step": 28882 + }, + { + "epoch": 1.3530238440998734, + "grad_norm": 0.6830601880044661, + "learning_rate": 1.2510951054550952e-06, + "loss": 0.2868, + "step": 28883 + }, + { + "epoch": 1.3530706890898019, + "grad_norm": 0.5817822414739298, + "learning_rate": 1.2509308192664396e-06, + "loss": 0.2618, + "step": 28884 + }, + { + "epoch": 1.3531175340797301, + "grad_norm": 0.6032070726108888, + "learning_rate": 1.2507665402658636e-06, + "loss": 0.2688, + "step": 28885 + }, + { + "epoch": 1.3531643790696584, + "grad_norm": 0.5729740240804887, + "learning_rate": 1.2506022684543117e-06, + "loss": 0.2744, + "step": 28886 + }, + { + "epoch": 1.3532112240595868, + "grad_norm": 0.6120440624152949, + "learning_rate": 1.2504380038327312e-06, + "loss": 0.2758, + "step": 28887 + }, + { + "epoch": 1.3532580690495153, + "grad_norm": 0.5799234670014296, + "learning_rate": 1.2502737464020647e-06, + "loss": 0.2697, + "step": 28888 + }, + { + "epoch": 1.3533049140394435, + "grad_norm": 0.5543326221880416, + "learning_rate": 1.25010949616326e-06, + "loss": 0.2535, + "step": 28889 + }, + { + "epoch": 1.3533517590293718, + "grad_norm": 0.558143161736855, + "learning_rate": 1.2499452531172595e-06, + "loss": 0.2633, + "step": 28890 + }, + { + "epoch": 1.3533986040193002, + "grad_norm": 0.6070017058039586, + "learning_rate": 1.24978101726501e-06, + "loss": 0.2767, + "step": 28891 + }, + { + "epoch": 1.3534454490092285, + "grad_norm": 0.5799862248182684, + "learning_rate": 1.2496167886074567e-06, + "loss": 0.2742, + "step": 28892 + }, + { + "epoch": 1.3534922939991567, + "grad_norm": 0.5943063288206499, + "learning_rate": 1.2494525671455455e-06, + "loss": 0.2662, + "step": 28893 + }, + { + "epoch": 1.3535391389890852, + "grad_norm": 0.5941055905617558, + "learning_rate": 1.2492883528802194e-06, + "loss": 0.2788, + "step": 28894 + }, + { + "epoch": 1.3535859839790134, + "grad_norm": 0.6149609451845558, + "learning_rate": 1.2491241458124245e-06, + "loss": 0.2771, + "step": 28895 + }, + { + "epoch": 1.3536328289689417, + "grad_norm": 0.5566304231320548, + "learning_rate": 1.2489599459431065e-06, + "loss": 0.2518, + "step": 28896 + }, + { + "epoch": 1.3536796739588701, + "grad_norm": 0.5911538140360427, + "learning_rate": 1.248795753273209e-06, + "loss": 0.2756, + "step": 28897 + }, + { + "epoch": 1.3537265189487984, + "grad_norm": 0.5932260966977984, + "learning_rate": 1.2486315678036772e-06, + "loss": 0.2757, + "step": 28898 + }, + { + "epoch": 1.3537733639387268, + "grad_norm": 0.6067406647809966, + "learning_rate": 1.2484673895354571e-06, + "loss": 0.2657, + "step": 28899 + }, + { + "epoch": 1.353820208928655, + "grad_norm": 0.6220310365874268, + "learning_rate": 1.2483032184694917e-06, + "loss": 0.2888, + "step": 28900 + }, + { + "epoch": 1.3538670539185835, + "grad_norm": 0.6353554774138369, + "learning_rate": 1.2481390546067267e-06, + "loss": 0.2814, + "step": 28901 + }, + { + "epoch": 1.3539138989085118, + "grad_norm": 0.5780576824889563, + "learning_rate": 1.2479748979481071e-06, + "loss": 0.2727, + "step": 28902 + }, + { + "epoch": 1.35396074389844, + "grad_norm": 0.6367175989324808, + "learning_rate": 1.2478107484945778e-06, + "loss": 0.2888, + "step": 28903 + }, + { + "epoch": 1.3540075888883685, + "grad_norm": 0.6388226861236478, + "learning_rate": 1.2476466062470825e-06, + "loss": 0.2773, + "step": 28904 + }, + { + "epoch": 1.3540544338782967, + "grad_norm": 0.5982050823502476, + "learning_rate": 1.2474824712065667e-06, + "loss": 0.2583, + "step": 28905 + }, + { + "epoch": 1.354101278868225, + "grad_norm": 0.6499544120677946, + "learning_rate": 1.2473183433739736e-06, + "loss": 0.3129, + "step": 28906 + }, + { + "epoch": 1.3541481238581534, + "grad_norm": 0.5956494688338395, + "learning_rate": 1.2471542227502487e-06, + "loss": 0.29, + "step": 28907 + }, + { + "epoch": 1.3541949688480817, + "grad_norm": 0.5603300455759563, + "learning_rate": 1.2469901093363363e-06, + "loss": 0.2767, + "step": 28908 + }, + { + "epoch": 1.35424181383801, + "grad_norm": 0.6028050685008952, + "learning_rate": 1.2468260031331807e-06, + "loss": 0.2725, + "step": 28909 + }, + { + "epoch": 1.3542886588279384, + "grad_norm": 0.6350336811764662, + "learning_rate": 1.2466619041417274e-06, + "loss": 0.2879, + "step": 28910 + }, + { + "epoch": 1.3543355038178666, + "grad_norm": 0.584437289870965, + "learning_rate": 1.2464978123629201e-06, + "loss": 0.2675, + "step": 28911 + }, + { + "epoch": 1.354382348807795, + "grad_norm": 0.5830827131700843, + "learning_rate": 1.246333727797702e-06, + "loss": 0.2818, + "step": 28912 + }, + { + "epoch": 1.3544291937977233, + "grad_norm": 0.5657275267833181, + "learning_rate": 1.2461696504470177e-06, + "loss": 0.2799, + "step": 28913 + }, + { + "epoch": 1.3544760387876518, + "grad_norm": 0.5520962750883173, + "learning_rate": 1.2460055803118124e-06, + "loss": 0.2556, + "step": 28914 + }, + { + "epoch": 1.35452288377758, + "grad_norm": 0.5754889823063614, + "learning_rate": 1.2458415173930296e-06, + "loss": 0.2557, + "step": 28915 + }, + { + "epoch": 1.3545697287675083, + "grad_norm": 0.6362841174407641, + "learning_rate": 1.2456774616916137e-06, + "loss": 0.2788, + "step": 28916 + }, + { + "epoch": 1.3546165737574367, + "grad_norm": 0.6169885168284028, + "learning_rate": 1.2455134132085097e-06, + "loss": 0.2938, + "step": 28917 + }, + { + "epoch": 1.354663418747365, + "grad_norm": 0.5776237611231464, + "learning_rate": 1.2453493719446607e-06, + "loss": 0.2763, + "step": 28918 + }, + { + "epoch": 1.3547102637372932, + "grad_norm": 0.5769950316181947, + "learning_rate": 1.2451853379010097e-06, + "loss": 0.268, + "step": 28919 + }, + { + "epoch": 1.3547571087272217, + "grad_norm": 0.5475877403668965, + "learning_rate": 1.2450213110785018e-06, + "loss": 0.2552, + "step": 28920 + }, + { + "epoch": 1.35480395371715, + "grad_norm": 0.5911988844399162, + "learning_rate": 1.2448572914780808e-06, + "loss": 0.2628, + "step": 28921 + }, + { + "epoch": 1.3548507987070781, + "grad_norm": 0.6165825187629832, + "learning_rate": 1.2446932791006904e-06, + "loss": 0.2766, + "step": 28922 + }, + { + "epoch": 1.3548976436970066, + "grad_norm": 0.5571024997326187, + "learning_rate": 1.2445292739472758e-06, + "loss": 0.2768, + "step": 28923 + }, + { + "epoch": 1.354944488686935, + "grad_norm": 0.607721629241492, + "learning_rate": 1.2443652760187789e-06, + "loss": 0.2882, + "step": 28924 + }, + { + "epoch": 1.3549913336768633, + "grad_norm": 0.6484940215891678, + "learning_rate": 1.244201285316145e-06, + "loss": 0.2815, + "step": 28925 + }, + { + "epoch": 1.3550381786667915, + "grad_norm": 0.6144845679909323, + "learning_rate": 1.2440373018403162e-06, + "loss": 0.2793, + "step": 28926 + }, + { + "epoch": 1.35508502365672, + "grad_norm": 0.6652552784094198, + "learning_rate": 1.243873325592237e-06, + "loss": 0.2866, + "step": 28927 + }, + { + "epoch": 1.3551318686466483, + "grad_norm": 0.5819667868046072, + "learning_rate": 1.243709356572851e-06, + "loss": 0.2583, + "step": 28928 + }, + { + "epoch": 1.3551787136365765, + "grad_norm": 0.5923258488913776, + "learning_rate": 1.243545394783102e-06, + "loss": 0.2677, + "step": 28929 + }, + { + "epoch": 1.355225558626505, + "grad_norm": 0.5844411400501867, + "learning_rate": 1.2433814402239344e-06, + "loss": 0.2535, + "step": 28930 + }, + { + "epoch": 1.3552724036164332, + "grad_norm": 0.6016223987582384, + "learning_rate": 1.2432174928962908e-06, + "loss": 0.2578, + "step": 28931 + }, + { + "epoch": 1.3553192486063614, + "grad_norm": 0.5861598209392508, + "learning_rate": 1.2430535528011137e-06, + "loss": 0.2585, + "step": 28932 + }, + { + "epoch": 1.35536609359629, + "grad_norm": 0.6175358954780965, + "learning_rate": 1.2428896199393472e-06, + "loss": 0.2776, + "step": 28933 + }, + { + "epoch": 1.3554129385862181, + "grad_norm": 0.5674195087123702, + "learning_rate": 1.2427256943119353e-06, + "loss": 0.2638, + "step": 28934 + }, + { + "epoch": 1.3554597835761466, + "grad_norm": 0.5635964802633563, + "learning_rate": 1.2425617759198211e-06, + "loss": 0.2666, + "step": 28935 + }, + { + "epoch": 1.3555066285660748, + "grad_norm": 0.5913454938435678, + "learning_rate": 1.2423978647639486e-06, + "loss": 0.2857, + "step": 28936 + }, + { + "epoch": 1.3555534735560033, + "grad_norm": 0.5999816715292711, + "learning_rate": 1.2422339608452594e-06, + "loss": 0.2858, + "step": 28937 + }, + { + "epoch": 1.3556003185459315, + "grad_norm": 0.5744590113846315, + "learning_rate": 1.2420700641646985e-06, + "loss": 0.2594, + "step": 28938 + }, + { + "epoch": 1.3556471635358598, + "grad_norm": 0.5177095167939885, + "learning_rate": 1.2419061747232072e-06, + "loss": 0.2628, + "step": 28939 + }, + { + "epoch": 1.3556940085257883, + "grad_norm": 0.5975316077972513, + "learning_rate": 1.24174229252173e-06, + "loss": 0.2948, + "step": 28940 + }, + { + "epoch": 1.3557408535157165, + "grad_norm": 0.5741536667874083, + "learning_rate": 1.2415784175612094e-06, + "loss": 0.2666, + "step": 28941 + }, + { + "epoch": 1.3557876985056447, + "grad_norm": 0.5525861535154464, + "learning_rate": 1.2414145498425897e-06, + "loss": 0.255, + "step": 28942 + }, + { + "epoch": 1.3558345434955732, + "grad_norm": 0.6039548097760283, + "learning_rate": 1.2412506893668119e-06, + "loss": 0.2794, + "step": 28943 + }, + { + "epoch": 1.3558813884855014, + "grad_norm": 0.5630300059739491, + "learning_rate": 1.2410868361348203e-06, + "loss": 0.2777, + "step": 28944 + }, + { + "epoch": 1.3559282334754297, + "grad_norm": 0.604403124761153, + "learning_rate": 1.240922990147558e-06, + "loss": 0.278, + "step": 28945 + }, + { + "epoch": 1.3559750784653581, + "grad_norm": 0.5543731685479679, + "learning_rate": 1.240759151405967e-06, + "loss": 0.2563, + "step": 28946 + }, + { + "epoch": 1.3560219234552864, + "grad_norm": 0.5579298157297945, + "learning_rate": 1.2405953199109902e-06, + "loss": 0.2675, + "step": 28947 + }, + { + "epoch": 1.3560687684452148, + "grad_norm": 0.63910775572447, + "learning_rate": 1.2404314956635718e-06, + "loss": 0.288, + "step": 28948 + }, + { + "epoch": 1.356115613435143, + "grad_norm": 0.5675035854885269, + "learning_rate": 1.2402676786646529e-06, + "loss": 0.2463, + "step": 28949 + }, + { + "epoch": 1.3561624584250715, + "grad_norm": 0.5688251016318172, + "learning_rate": 1.2401038689151768e-06, + "loss": 0.2406, + "step": 28950 + }, + { + "epoch": 1.3562093034149998, + "grad_norm": 0.6012740803680168, + "learning_rate": 1.239940066416086e-06, + "loss": 0.279, + "step": 28951 + }, + { + "epoch": 1.356256148404928, + "grad_norm": 0.5667234351132043, + "learning_rate": 1.2397762711683245e-06, + "loss": 0.2556, + "step": 28952 + }, + { + "epoch": 1.3563029933948565, + "grad_norm": 0.6094367009037931, + "learning_rate": 1.2396124831728329e-06, + "loss": 0.2838, + "step": 28953 + }, + { + "epoch": 1.3563498383847847, + "grad_norm": 0.6065002370979489, + "learning_rate": 1.2394487024305557e-06, + "loss": 0.2863, + "step": 28954 + }, + { + "epoch": 1.356396683374713, + "grad_norm": 0.6244509013582542, + "learning_rate": 1.2392849289424333e-06, + "loss": 0.2804, + "step": 28955 + }, + { + "epoch": 1.3564435283646414, + "grad_norm": 0.5958918140264663, + "learning_rate": 1.2391211627094094e-06, + "loss": 0.2847, + "step": 28956 + }, + { + "epoch": 1.3564903733545697, + "grad_norm": 0.5437136007304492, + "learning_rate": 1.238957403732426e-06, + "loss": 0.2667, + "step": 28957 + }, + { + "epoch": 1.356537218344498, + "grad_norm": 0.6146913270363284, + "learning_rate": 1.2387936520124261e-06, + "loss": 0.2858, + "step": 28958 + }, + { + "epoch": 1.3565840633344264, + "grad_norm": 0.6201861200517865, + "learning_rate": 1.2386299075503527e-06, + "loss": 0.2809, + "step": 28959 + }, + { + "epoch": 1.3566309083243548, + "grad_norm": 0.594801567445389, + "learning_rate": 1.2384661703471471e-06, + "loss": 0.2803, + "step": 28960 + }, + { + "epoch": 1.356677753314283, + "grad_norm": 0.6123774065409776, + "learning_rate": 1.2383024404037506e-06, + "loss": 0.2719, + "step": 28961 + }, + { + "epoch": 1.3567245983042113, + "grad_norm": 0.6735488341905342, + "learning_rate": 1.2381387177211063e-06, + "loss": 0.2903, + "step": 28962 + }, + { + "epoch": 1.3567714432941398, + "grad_norm": 0.6055549089644432, + "learning_rate": 1.237975002300157e-06, + "loss": 0.2949, + "step": 28963 + }, + { + "epoch": 1.356818288284068, + "grad_norm": 0.5979745140142813, + "learning_rate": 1.2378112941418438e-06, + "loss": 0.2489, + "step": 28964 + }, + { + "epoch": 1.3568651332739963, + "grad_norm": 0.5822630071446047, + "learning_rate": 1.2376475932471104e-06, + "loss": 0.2673, + "step": 28965 + }, + { + "epoch": 1.3569119782639247, + "grad_norm": 0.5577650410223055, + "learning_rate": 1.2374838996168972e-06, + "loss": 0.2547, + "step": 28966 + }, + { + "epoch": 1.356958823253853, + "grad_norm": 0.58717358189809, + "learning_rate": 1.2373202132521475e-06, + "loss": 0.2851, + "step": 28967 + }, + { + "epoch": 1.3570056682437812, + "grad_norm": 0.6139012670279008, + "learning_rate": 1.237156534153802e-06, + "loss": 0.2729, + "step": 28968 + }, + { + "epoch": 1.3570525132337097, + "grad_norm": 0.6213158309827317, + "learning_rate": 1.236992862322803e-06, + "loss": 0.2918, + "step": 28969 + }, + { + "epoch": 1.357099358223638, + "grad_norm": 0.5640881062484434, + "learning_rate": 1.2368291977600923e-06, + "loss": 0.274, + "step": 28970 + }, + { + "epoch": 1.3571462032135664, + "grad_norm": 0.5403104284345225, + "learning_rate": 1.2366655404666128e-06, + "loss": 0.2585, + "step": 28971 + }, + { + "epoch": 1.3571930482034946, + "grad_norm": 0.5580770596573736, + "learning_rate": 1.236501890443306e-06, + "loss": 0.2596, + "step": 28972 + }, + { + "epoch": 1.357239893193423, + "grad_norm": 0.6197796980962471, + "learning_rate": 1.2363382476911135e-06, + "loss": 0.2784, + "step": 28973 + }, + { + "epoch": 1.3572867381833513, + "grad_norm": 0.5968384423002555, + "learning_rate": 1.2361746122109757e-06, + "loss": 0.2861, + "step": 28974 + }, + { + "epoch": 1.3573335831732796, + "grad_norm": 0.6022572551404508, + "learning_rate": 1.2360109840038353e-06, + "loss": 0.2628, + "step": 28975 + }, + { + "epoch": 1.357380428163208, + "grad_norm": 0.6072305249978007, + "learning_rate": 1.2358473630706341e-06, + "loss": 0.2777, + "step": 28976 + }, + { + "epoch": 1.3574272731531363, + "grad_norm": 0.5564462268581949, + "learning_rate": 1.2356837494123135e-06, + "loss": 0.2608, + "step": 28977 + }, + { + "epoch": 1.3574741181430645, + "grad_norm": 0.6043469360969868, + "learning_rate": 1.2355201430298164e-06, + "loss": 0.2815, + "step": 28978 + }, + { + "epoch": 1.357520963132993, + "grad_norm": 0.6045075005125059, + "learning_rate": 1.235356543924082e-06, + "loss": 0.2626, + "step": 28979 + }, + { + "epoch": 1.3575678081229212, + "grad_norm": 0.6296104245642369, + "learning_rate": 1.2351929520960534e-06, + "loss": 0.2888, + "step": 28980 + }, + { + "epoch": 1.3576146531128495, + "grad_norm": 0.5962057721695543, + "learning_rate": 1.235029367546671e-06, + "loss": 0.262, + "step": 28981 + }, + { + "epoch": 1.357661498102778, + "grad_norm": 0.5762347206591838, + "learning_rate": 1.2348657902768762e-06, + "loss": 0.2636, + "step": 28982 + }, + { + "epoch": 1.3577083430927062, + "grad_norm": 0.5606178656565525, + "learning_rate": 1.2347022202876113e-06, + "loss": 0.271, + "step": 28983 + }, + { + "epoch": 1.3577551880826346, + "grad_norm": 0.600315586306198, + "learning_rate": 1.2345386575798169e-06, + "loss": 0.276, + "step": 28984 + }, + { + "epoch": 1.3578020330725629, + "grad_norm": 0.6201502445462932, + "learning_rate": 1.2343751021544354e-06, + "loss": 0.2756, + "step": 28985 + }, + { + "epoch": 1.3578488780624913, + "grad_norm": 0.6002574936930598, + "learning_rate": 1.2342115540124058e-06, + "loss": 0.2605, + "step": 28986 + }, + { + "epoch": 1.3578957230524196, + "grad_norm": 0.545458891299663, + "learning_rate": 1.234048013154672e-06, + "loss": 0.2589, + "step": 28987 + }, + { + "epoch": 1.3579425680423478, + "grad_norm": 0.5791103622481348, + "learning_rate": 1.2338844795821726e-06, + "loss": 0.2623, + "step": 28988 + }, + { + "epoch": 1.3579894130322763, + "grad_norm": 0.6111303558136607, + "learning_rate": 1.2337209532958496e-06, + "loss": 0.2869, + "step": 28989 + }, + { + "epoch": 1.3580362580222045, + "grad_norm": 0.63210114412288, + "learning_rate": 1.2335574342966444e-06, + "loss": 0.2839, + "step": 28990 + }, + { + "epoch": 1.3580831030121328, + "grad_norm": 0.5914244729275538, + "learning_rate": 1.2333939225854987e-06, + "loss": 0.2846, + "step": 28991 + }, + { + "epoch": 1.3581299480020612, + "grad_norm": 0.5948249173684755, + "learning_rate": 1.233230418163352e-06, + "loss": 0.2613, + "step": 28992 + }, + { + "epoch": 1.3581767929919895, + "grad_norm": 0.5620293430124744, + "learning_rate": 1.2330669210311455e-06, + "loss": 0.2744, + "step": 28993 + }, + { + "epoch": 1.3582236379819177, + "grad_norm": 0.6315531934829719, + "learning_rate": 1.2329034311898217e-06, + "loss": 0.2835, + "step": 28994 + }, + { + "epoch": 1.3582704829718462, + "grad_norm": 0.5963942779272847, + "learning_rate": 1.2327399486403187e-06, + "loss": 0.2593, + "step": 28995 + }, + { + "epoch": 1.3583173279617746, + "grad_norm": 0.5964970885196956, + "learning_rate": 1.2325764733835792e-06, + "loss": 0.2716, + "step": 28996 + }, + { + "epoch": 1.3583641729517029, + "grad_norm": 0.6027605184513377, + "learning_rate": 1.2324130054205446e-06, + "loss": 0.2818, + "step": 28997 + }, + { + "epoch": 1.358411017941631, + "grad_norm": 0.5976354109482938, + "learning_rate": 1.2322495447521534e-06, + "loss": 0.2933, + "step": 28998 + }, + { + "epoch": 1.3584578629315596, + "grad_norm": 0.5880685444588823, + "learning_rate": 1.2320860913793476e-06, + "loss": 0.2826, + "step": 28999 + }, + { + "epoch": 1.3585047079214878, + "grad_norm": 0.5996148750779766, + "learning_rate": 1.2319226453030672e-06, + "loss": 0.2477, + "step": 29000 + }, + { + "epoch": 1.358551552911416, + "grad_norm": 0.6003004774968761, + "learning_rate": 1.2317592065242548e-06, + "loss": 0.2747, + "step": 29001 + }, + { + "epoch": 1.3585983979013445, + "grad_norm": 0.6473332169787341, + "learning_rate": 1.2315957750438482e-06, + "loss": 0.2825, + "step": 29002 + }, + { + "epoch": 1.3586452428912728, + "grad_norm": 0.5977813328471466, + "learning_rate": 1.2314323508627899e-06, + "loss": 0.2611, + "step": 29003 + }, + { + "epoch": 1.358692087881201, + "grad_norm": 0.6221152066417629, + "learning_rate": 1.231268933982019e-06, + "loss": 0.2825, + "step": 29004 + }, + { + "epoch": 1.3587389328711295, + "grad_norm": 0.652408605344263, + "learning_rate": 1.231105524402476e-06, + "loss": 0.283, + "step": 29005 + }, + { + "epoch": 1.3587857778610577, + "grad_norm": 0.5626538756940405, + "learning_rate": 1.2309421221251022e-06, + "loss": 0.26, + "step": 29006 + }, + { + "epoch": 1.3588326228509862, + "grad_norm": 0.6075766550161891, + "learning_rate": 1.2307787271508372e-06, + "loss": 0.2766, + "step": 29007 + }, + { + "epoch": 1.3588794678409144, + "grad_norm": 0.6043408784931963, + "learning_rate": 1.2306153394806228e-06, + "loss": 0.2775, + "step": 29008 + }, + { + "epoch": 1.3589263128308429, + "grad_norm": 0.5708945792256763, + "learning_rate": 1.2304519591153983e-06, + "loss": 0.2611, + "step": 29009 + }, + { + "epoch": 1.358973157820771, + "grad_norm": 0.6429961147686402, + "learning_rate": 1.2302885860561024e-06, + "loss": 0.2878, + "step": 29010 + }, + { + "epoch": 1.3590200028106993, + "grad_norm": 0.6284877093202198, + "learning_rate": 1.2301252203036764e-06, + "loss": 0.2823, + "step": 29011 + }, + { + "epoch": 1.3590668478006278, + "grad_norm": 0.6164918581517657, + "learning_rate": 1.229961861859061e-06, + "loss": 0.2731, + "step": 29012 + }, + { + "epoch": 1.359113692790556, + "grad_norm": 0.5833440376473015, + "learning_rate": 1.2297985107231954e-06, + "loss": 0.2615, + "step": 29013 + }, + { + "epoch": 1.3591605377804843, + "grad_norm": 0.5842150886481442, + "learning_rate": 1.2296351668970216e-06, + "loss": 0.2714, + "step": 29014 + }, + { + "epoch": 1.3592073827704128, + "grad_norm": 0.6160785282153937, + "learning_rate": 1.2294718303814767e-06, + "loss": 0.2825, + "step": 29015 + }, + { + "epoch": 1.359254227760341, + "grad_norm": 0.6027305353064473, + "learning_rate": 1.2293085011775036e-06, + "loss": 0.2763, + "step": 29016 + }, + { + "epoch": 1.3593010727502692, + "grad_norm": 0.5962644065090122, + "learning_rate": 1.2291451792860393e-06, + "loss": 0.2724, + "step": 29017 + }, + { + "epoch": 1.3593479177401977, + "grad_norm": 0.6182439361664948, + "learning_rate": 1.2289818647080252e-06, + "loss": 0.2616, + "step": 29018 + }, + { + "epoch": 1.359394762730126, + "grad_norm": 0.6258496942593972, + "learning_rate": 1.2288185574444008e-06, + "loss": 0.2874, + "step": 29019 + }, + { + "epoch": 1.3594416077200544, + "grad_norm": 0.5920296896577629, + "learning_rate": 1.2286552574961063e-06, + "loss": 0.2568, + "step": 29020 + }, + { + "epoch": 1.3594884527099826, + "grad_norm": 0.5958076288884208, + "learning_rate": 1.2284919648640823e-06, + "loss": 0.2844, + "step": 29021 + }, + { + "epoch": 1.359535297699911, + "grad_norm": 0.6119549876563517, + "learning_rate": 1.2283286795492675e-06, + "loss": 0.2796, + "step": 29022 + }, + { + "epoch": 1.3595821426898393, + "grad_norm": 0.572409331135026, + "learning_rate": 1.2281654015526004e-06, + "loss": 0.2742, + "step": 29023 + }, + { + "epoch": 1.3596289876797676, + "grad_norm": 0.5433253841983403, + "learning_rate": 1.2280021308750217e-06, + "loss": 0.2584, + "step": 29024 + }, + { + "epoch": 1.359675832669696, + "grad_norm": 0.618860628429254, + "learning_rate": 1.227838867517471e-06, + "loss": 0.2744, + "step": 29025 + }, + { + "epoch": 1.3597226776596243, + "grad_norm": 0.5844806595568021, + "learning_rate": 1.227675611480888e-06, + "loss": 0.2607, + "step": 29026 + }, + { + "epoch": 1.3597695226495525, + "grad_norm": 0.5918956720214166, + "learning_rate": 1.2275123627662131e-06, + "loss": 0.2838, + "step": 29027 + }, + { + "epoch": 1.359816367639481, + "grad_norm": 0.5850380676919034, + "learning_rate": 1.2273491213743836e-06, + "loss": 0.2685, + "step": 29028 + }, + { + "epoch": 1.3598632126294092, + "grad_norm": 0.5868614482364753, + "learning_rate": 1.2271858873063411e-06, + "loss": 0.2774, + "step": 29029 + }, + { + "epoch": 1.3599100576193375, + "grad_norm": 0.5999269243749266, + "learning_rate": 1.227022660563023e-06, + "loss": 0.2488, + "step": 29030 + }, + { + "epoch": 1.359956902609266, + "grad_norm": 0.6963385205897938, + "learning_rate": 1.2268594411453693e-06, + "loss": 0.2943, + "step": 29031 + }, + { + "epoch": 1.3600037475991944, + "grad_norm": 0.6169195787121452, + "learning_rate": 1.2266962290543196e-06, + "loss": 0.2708, + "step": 29032 + }, + { + "epoch": 1.3600505925891226, + "grad_norm": 0.5711457048965514, + "learning_rate": 1.2265330242908143e-06, + "loss": 0.2646, + "step": 29033 + }, + { + "epoch": 1.3600974375790509, + "grad_norm": 0.5923889762319253, + "learning_rate": 1.2263698268557903e-06, + "loss": 0.2735, + "step": 29034 + }, + { + "epoch": 1.3601442825689793, + "grad_norm": 0.6546446722391972, + "learning_rate": 1.2262066367501876e-06, + "loss": 0.281, + "step": 29035 + }, + { + "epoch": 1.3601911275589076, + "grad_norm": 0.5874590876016178, + "learning_rate": 1.2260434539749465e-06, + "loss": 0.2663, + "step": 29036 + }, + { + "epoch": 1.3602379725488358, + "grad_norm": 0.5710001466349682, + "learning_rate": 1.2258802785310045e-06, + "loss": 0.2545, + "step": 29037 + }, + { + "epoch": 1.3602848175387643, + "grad_norm": 0.603889545062353, + "learning_rate": 1.225717110419301e-06, + "loss": 0.2714, + "step": 29038 + }, + { + "epoch": 1.3603316625286925, + "grad_norm": 0.5782000179697773, + "learning_rate": 1.2255539496407765e-06, + "loss": 0.2771, + "step": 29039 + }, + { + "epoch": 1.3603785075186208, + "grad_norm": 0.6593493053135867, + "learning_rate": 1.2253907961963675e-06, + "loss": 0.3066, + "step": 29040 + }, + { + "epoch": 1.3604253525085492, + "grad_norm": 0.6181606312357397, + "learning_rate": 1.225227650087014e-06, + "loss": 0.2852, + "step": 29041 + }, + { + "epoch": 1.3604721974984775, + "grad_norm": 0.5602045759093338, + "learning_rate": 1.2250645113136553e-06, + "loss": 0.2783, + "step": 29042 + }, + { + "epoch": 1.360519042488406, + "grad_norm": 0.6168119188213312, + "learning_rate": 1.2249013798772305e-06, + "loss": 0.2702, + "step": 29043 + }, + { + "epoch": 1.3605658874783342, + "grad_norm": 0.6240177549328556, + "learning_rate": 1.2247382557786772e-06, + "loss": 0.272, + "step": 29044 + }, + { + "epoch": 1.3606127324682626, + "grad_norm": 0.6498734012727143, + "learning_rate": 1.2245751390189352e-06, + "loss": 0.2887, + "step": 29045 + }, + { + "epoch": 1.3606595774581909, + "grad_norm": 0.5557114769420135, + "learning_rate": 1.224412029598942e-06, + "loss": 0.2501, + "step": 29046 + }, + { + "epoch": 1.3607064224481191, + "grad_norm": 0.5780165311571674, + "learning_rate": 1.2242489275196371e-06, + "loss": 0.2685, + "step": 29047 + }, + { + "epoch": 1.3607532674380476, + "grad_norm": 0.5846670840127006, + "learning_rate": 1.2240858327819587e-06, + "loss": 0.2648, + "step": 29048 + }, + { + "epoch": 1.3608001124279758, + "grad_norm": 0.6172362077338011, + "learning_rate": 1.223922745386846e-06, + "loss": 0.2701, + "step": 29049 + }, + { + "epoch": 1.360846957417904, + "grad_norm": 0.5557642669864832, + "learning_rate": 1.223759665335238e-06, + "loss": 0.2502, + "step": 29050 + }, + { + "epoch": 1.3608938024078325, + "grad_norm": 0.5566562865123224, + "learning_rate": 1.2235965926280713e-06, + "loss": 0.2553, + "step": 29051 + }, + { + "epoch": 1.3609406473977608, + "grad_norm": 0.5854500534403594, + "learning_rate": 1.2234335272662862e-06, + "loss": 0.2637, + "step": 29052 + }, + { + "epoch": 1.360987492387689, + "grad_norm": 0.5785600088040952, + "learning_rate": 1.2232704692508195e-06, + "loss": 0.2596, + "step": 29053 + }, + { + "epoch": 1.3610343373776175, + "grad_norm": 0.6036951829879581, + "learning_rate": 1.2231074185826102e-06, + "loss": 0.2702, + "step": 29054 + }, + { + "epoch": 1.3610811823675457, + "grad_norm": 0.5750763832517009, + "learning_rate": 1.2229443752625973e-06, + "loss": 0.2611, + "step": 29055 + }, + { + "epoch": 1.3611280273574742, + "grad_norm": 0.5714313898194142, + "learning_rate": 1.222781339291718e-06, + "loss": 0.2686, + "step": 29056 + }, + { + "epoch": 1.3611748723474024, + "grad_norm": 0.6347608834960935, + "learning_rate": 1.2226183106709122e-06, + "loss": 0.2789, + "step": 29057 + }, + { + "epoch": 1.3612217173373309, + "grad_norm": 0.576950876480253, + "learning_rate": 1.2224552894011172e-06, + "loss": 0.2749, + "step": 29058 + }, + { + "epoch": 1.3612685623272591, + "grad_norm": 0.5881163662082425, + "learning_rate": 1.2222922754832695e-06, + "loss": 0.2752, + "step": 29059 + }, + { + "epoch": 1.3613154073171874, + "grad_norm": 0.6009757654410858, + "learning_rate": 1.222129268918309e-06, + "loss": 0.2705, + "step": 29060 + }, + { + "epoch": 1.3613622523071158, + "grad_norm": 0.6033989014269793, + "learning_rate": 1.2219662697071732e-06, + "loss": 0.2648, + "step": 29061 + }, + { + "epoch": 1.361409097297044, + "grad_norm": 0.5867616191539311, + "learning_rate": 1.2218032778508002e-06, + "loss": 0.282, + "step": 29062 + }, + { + "epoch": 1.3614559422869723, + "grad_norm": 0.5908922937837747, + "learning_rate": 1.2216402933501291e-06, + "loss": 0.2725, + "step": 29063 + }, + { + "epoch": 1.3615027872769008, + "grad_norm": 0.5956246743216175, + "learning_rate": 1.2214773162060962e-06, + "loss": 0.2644, + "step": 29064 + }, + { + "epoch": 1.361549632266829, + "grad_norm": 0.6672765513789662, + "learning_rate": 1.2213143464196407e-06, + "loss": 0.2863, + "step": 29065 + }, + { + "epoch": 1.3615964772567573, + "grad_norm": 0.6142891234621681, + "learning_rate": 1.2211513839916985e-06, + "loss": 0.2701, + "step": 29066 + }, + { + "epoch": 1.3616433222466857, + "grad_norm": 0.6316464531975047, + "learning_rate": 1.2209884289232087e-06, + "loss": 0.2799, + "step": 29067 + }, + { + "epoch": 1.3616901672366142, + "grad_norm": 0.6113369375152158, + "learning_rate": 1.2208254812151093e-06, + "loss": 0.2494, + "step": 29068 + }, + { + "epoch": 1.3617370122265424, + "grad_norm": 0.6033084666569863, + "learning_rate": 1.2206625408683378e-06, + "loss": 0.2636, + "step": 29069 + }, + { + "epoch": 1.3617838572164707, + "grad_norm": 0.6288628648772099, + "learning_rate": 1.2204996078838327e-06, + "loss": 0.2792, + "step": 29070 + }, + { + "epoch": 1.3618307022063991, + "grad_norm": 0.5958394845854641, + "learning_rate": 1.2203366822625307e-06, + "loss": 0.2848, + "step": 29071 + }, + { + "epoch": 1.3618775471963274, + "grad_norm": 0.6018629858594488, + "learning_rate": 1.2201737640053684e-06, + "loss": 0.2791, + "step": 29072 + }, + { + "epoch": 1.3619243921862556, + "grad_norm": 0.5639948567521378, + "learning_rate": 1.2200108531132848e-06, + "loss": 0.2745, + "step": 29073 + }, + { + "epoch": 1.361971237176184, + "grad_norm": 0.6365425172468716, + "learning_rate": 1.2198479495872167e-06, + "loss": 0.2739, + "step": 29074 + }, + { + "epoch": 1.3620180821661123, + "grad_norm": 0.6138633879360061, + "learning_rate": 1.2196850534281021e-06, + "loss": 0.2737, + "step": 29075 + }, + { + "epoch": 1.3620649271560406, + "grad_norm": 0.5861088334365614, + "learning_rate": 1.219522164636879e-06, + "loss": 0.2691, + "step": 29076 + }, + { + "epoch": 1.362111772145969, + "grad_norm": 0.6010146760017357, + "learning_rate": 1.2193592832144829e-06, + "loss": 0.2697, + "step": 29077 + }, + { + "epoch": 1.3621586171358973, + "grad_norm": 0.552238223923379, + "learning_rate": 1.2191964091618535e-06, + "loss": 0.2661, + "step": 29078 + }, + { + "epoch": 1.3622054621258257, + "grad_norm": 0.5862516873106949, + "learning_rate": 1.219033542479926e-06, + "loss": 0.2683, + "step": 29079 + }, + { + "epoch": 1.362252307115754, + "grad_norm": 0.6559966390663781, + "learning_rate": 1.2188706831696382e-06, + "loss": 0.2853, + "step": 29080 + }, + { + "epoch": 1.3622991521056824, + "grad_norm": 0.6415018497722904, + "learning_rate": 1.2187078312319278e-06, + "loss": 0.2977, + "step": 29081 + }, + { + "epoch": 1.3623459970956107, + "grad_norm": 0.5965184494743412, + "learning_rate": 1.2185449866677326e-06, + "loss": 0.279, + "step": 29082 + }, + { + "epoch": 1.362392842085539, + "grad_norm": 0.5979819344433209, + "learning_rate": 1.2183821494779879e-06, + "loss": 0.2718, + "step": 29083 + }, + { + "epoch": 1.3624396870754674, + "grad_norm": 0.5910051307053316, + "learning_rate": 1.2182193196636322e-06, + "loss": 0.2858, + "step": 29084 + }, + { + "epoch": 1.3624865320653956, + "grad_norm": 0.6222146297649028, + "learning_rate": 1.2180564972256027e-06, + "loss": 0.2871, + "step": 29085 + }, + { + "epoch": 1.3625333770553238, + "grad_norm": 0.5995702258528639, + "learning_rate": 1.2178936821648351e-06, + "loss": 0.2598, + "step": 29086 + }, + { + "epoch": 1.3625802220452523, + "grad_norm": 0.619834728359861, + "learning_rate": 1.2177308744822669e-06, + "loss": 0.2835, + "step": 29087 + }, + { + "epoch": 1.3626270670351806, + "grad_norm": 0.5913139601217284, + "learning_rate": 1.2175680741788364e-06, + "loss": 0.2575, + "step": 29088 + }, + { + "epoch": 1.3626739120251088, + "grad_norm": 0.5712049627520698, + "learning_rate": 1.2174052812554785e-06, + "loss": 0.2763, + "step": 29089 + }, + { + "epoch": 1.3627207570150373, + "grad_norm": 0.5992539090160777, + "learning_rate": 1.2172424957131304e-06, + "loss": 0.2731, + "step": 29090 + }, + { + "epoch": 1.3627676020049655, + "grad_norm": 0.5955592784214904, + "learning_rate": 1.2170797175527296e-06, + "loss": 0.271, + "step": 29091 + }, + { + "epoch": 1.362814446994894, + "grad_norm": 0.575874024738685, + "learning_rate": 1.2169169467752135e-06, + "loss": 0.2686, + "step": 29092 + }, + { + "epoch": 1.3628612919848222, + "grad_norm": 0.6268128300768678, + "learning_rate": 1.2167541833815168e-06, + "loss": 0.2842, + "step": 29093 + }, + { + "epoch": 1.3629081369747507, + "grad_norm": 0.6043125843577943, + "learning_rate": 1.2165914273725785e-06, + "loss": 0.2806, + "step": 29094 + }, + { + "epoch": 1.362954981964679, + "grad_norm": 0.5689921548557768, + "learning_rate": 1.216428678749333e-06, + "loss": 0.265, + "step": 29095 + }, + { + "epoch": 1.3630018269546071, + "grad_norm": 0.6077500481836824, + "learning_rate": 1.2162659375127176e-06, + "loss": 0.2693, + "step": 29096 + }, + { + "epoch": 1.3630486719445356, + "grad_norm": 0.5627811618751886, + "learning_rate": 1.2161032036636691e-06, + "loss": 0.2548, + "step": 29097 + }, + { + "epoch": 1.3630955169344638, + "grad_norm": 0.5815481855285465, + "learning_rate": 1.2159404772031239e-06, + "loss": 0.2759, + "step": 29098 + }, + { + "epoch": 1.363142361924392, + "grad_norm": 0.5630085249183385, + "learning_rate": 1.2157777581320197e-06, + "loss": 0.2588, + "step": 29099 + }, + { + "epoch": 1.3631892069143206, + "grad_norm": 0.5806911075460975, + "learning_rate": 1.2156150464512917e-06, + "loss": 0.2597, + "step": 29100 + }, + { + "epoch": 1.3632360519042488, + "grad_norm": 0.5962504267976828, + "learning_rate": 1.2154523421618752e-06, + "loss": 0.2833, + "step": 29101 + }, + { + "epoch": 1.363282896894177, + "grad_norm": 0.6316652649784161, + "learning_rate": 1.2152896452647076e-06, + "loss": 0.2791, + "step": 29102 + }, + { + "epoch": 1.3633297418841055, + "grad_norm": 0.5750551835797757, + "learning_rate": 1.215126955760725e-06, + "loss": 0.2513, + "step": 29103 + }, + { + "epoch": 1.363376586874034, + "grad_norm": 0.5773285800792658, + "learning_rate": 1.2149642736508643e-06, + "loss": 0.2689, + "step": 29104 + }, + { + "epoch": 1.3634234318639622, + "grad_norm": 0.6281262713114265, + "learning_rate": 1.2148015989360621e-06, + "loss": 0.2945, + "step": 29105 + }, + { + "epoch": 1.3634702768538904, + "grad_norm": 0.5994477569035743, + "learning_rate": 1.2146389316172525e-06, + "loss": 0.2733, + "step": 29106 + }, + { + "epoch": 1.363517121843819, + "grad_norm": 0.5510391137642667, + "learning_rate": 1.2144762716953736e-06, + "loss": 0.2591, + "step": 29107 + }, + { + "epoch": 1.3635639668337471, + "grad_norm": 0.6068170536579293, + "learning_rate": 1.21431361917136e-06, + "loss": 0.2881, + "step": 29108 + }, + { + "epoch": 1.3636108118236754, + "grad_norm": 0.5832317570306352, + "learning_rate": 1.2141509740461482e-06, + "loss": 0.2742, + "step": 29109 + }, + { + "epoch": 1.3636576568136038, + "grad_norm": 0.6271984985833686, + "learning_rate": 1.2139883363206744e-06, + "loss": 0.27, + "step": 29110 + }, + { + "epoch": 1.363704501803532, + "grad_norm": 0.5805480912510016, + "learning_rate": 1.2138257059958747e-06, + "loss": 0.27, + "step": 29111 + }, + { + "epoch": 1.3637513467934603, + "grad_norm": 0.6339265419855021, + "learning_rate": 1.2136630830726855e-06, + "loss": 0.3035, + "step": 29112 + }, + { + "epoch": 1.3637981917833888, + "grad_norm": 0.6049020071544814, + "learning_rate": 1.213500467552042e-06, + "loss": 0.2808, + "step": 29113 + }, + { + "epoch": 1.363845036773317, + "grad_norm": 0.5836610448846662, + "learning_rate": 1.213337859434879e-06, + "loss": 0.2872, + "step": 29114 + }, + { + "epoch": 1.3638918817632455, + "grad_norm": 0.5434374575934473, + "learning_rate": 1.2131752587221334e-06, + "loss": 0.2641, + "step": 29115 + }, + { + "epoch": 1.3639387267531737, + "grad_norm": 0.5575949784979847, + "learning_rate": 1.2130126654147405e-06, + "loss": 0.2546, + "step": 29116 + }, + { + "epoch": 1.3639855717431022, + "grad_norm": 0.5868349519286773, + "learning_rate": 1.2128500795136367e-06, + "loss": 0.2663, + "step": 29117 + }, + { + "epoch": 1.3640324167330304, + "grad_norm": 0.5859648352361219, + "learning_rate": 1.2126875010197568e-06, + "loss": 0.2743, + "step": 29118 + }, + { + "epoch": 1.3640792617229587, + "grad_norm": 0.6257302434773089, + "learning_rate": 1.2125249299340378e-06, + "loss": 0.2781, + "step": 29119 + }, + { + "epoch": 1.3641261067128871, + "grad_norm": 0.6075618814424998, + "learning_rate": 1.212362366257414e-06, + "loss": 0.2714, + "step": 29120 + }, + { + "epoch": 1.3641729517028154, + "grad_norm": 0.6105533477716107, + "learning_rate": 1.2121998099908205e-06, + "loss": 0.2657, + "step": 29121 + }, + { + "epoch": 1.3642197966927436, + "grad_norm": 0.6224345421403941, + "learning_rate": 1.2120372611351932e-06, + "loss": 0.2759, + "step": 29122 + }, + { + "epoch": 1.364266641682672, + "grad_norm": 0.6226063582096315, + "learning_rate": 1.211874719691468e-06, + "loss": 0.2715, + "step": 29123 + }, + { + "epoch": 1.3643134866726003, + "grad_norm": 0.6102526810812713, + "learning_rate": 1.2117121856605798e-06, + "loss": 0.269, + "step": 29124 + }, + { + "epoch": 1.3643603316625286, + "grad_norm": 0.6242468808798344, + "learning_rate": 1.2115496590434653e-06, + "loss": 0.2891, + "step": 29125 + }, + { + "epoch": 1.364407176652457, + "grad_norm": 0.5961717749703203, + "learning_rate": 1.2113871398410575e-06, + "loss": 0.2612, + "step": 29126 + }, + { + "epoch": 1.3644540216423853, + "grad_norm": 0.5659095908944362, + "learning_rate": 1.211224628054294e-06, + "loss": 0.276, + "step": 29127 + }, + { + "epoch": 1.3645008666323137, + "grad_norm": 0.6214438720100603, + "learning_rate": 1.211062123684108e-06, + "loss": 0.2789, + "step": 29128 + }, + { + "epoch": 1.364547711622242, + "grad_norm": 0.5894700725155911, + "learning_rate": 1.2108996267314352e-06, + "loss": 0.2724, + "step": 29129 + }, + { + "epoch": 1.3645945566121704, + "grad_norm": 0.6291829875874929, + "learning_rate": 1.2107371371972113e-06, + "loss": 0.2705, + "step": 29130 + }, + { + "epoch": 1.3646414016020987, + "grad_norm": 0.5879984770819248, + "learning_rate": 1.2105746550823718e-06, + "loss": 0.2701, + "step": 29131 + }, + { + "epoch": 1.364688246592027, + "grad_norm": 0.5977709801838321, + "learning_rate": 1.2104121803878505e-06, + "loss": 0.2802, + "step": 29132 + }, + { + "epoch": 1.3647350915819554, + "grad_norm": 0.5651278228918223, + "learning_rate": 1.2102497131145827e-06, + "loss": 0.2479, + "step": 29133 + }, + { + "epoch": 1.3647819365718836, + "grad_norm": 0.5561870136282357, + "learning_rate": 1.2100872532635046e-06, + "loss": 0.2508, + "step": 29134 + }, + { + "epoch": 1.3648287815618119, + "grad_norm": 0.6087428222483504, + "learning_rate": 1.2099248008355493e-06, + "loss": 0.2638, + "step": 29135 + }, + { + "epoch": 1.3648756265517403, + "grad_norm": 0.6123672210178613, + "learning_rate": 1.2097623558316523e-06, + "loss": 0.2577, + "step": 29136 + }, + { + "epoch": 1.3649224715416686, + "grad_norm": 0.6446959218945203, + "learning_rate": 1.20959991825275e-06, + "loss": 0.2753, + "step": 29137 + }, + { + "epoch": 1.3649693165315968, + "grad_norm": 0.6414120427226115, + "learning_rate": 1.2094374880997744e-06, + "loss": 0.285, + "step": 29138 + }, + { + "epoch": 1.3650161615215253, + "grad_norm": 0.599592224147624, + "learning_rate": 1.209275065373662e-06, + "loss": 0.2772, + "step": 29139 + }, + { + "epoch": 1.3650630065114537, + "grad_norm": 0.6488447430318753, + "learning_rate": 1.2091126500753473e-06, + "loss": 0.2866, + "step": 29140 + }, + { + "epoch": 1.365109851501382, + "grad_norm": 0.6274379241929482, + "learning_rate": 1.2089502422057656e-06, + "loss": 0.2871, + "step": 29141 + }, + { + "epoch": 1.3651566964913102, + "grad_norm": 0.596709294696737, + "learning_rate": 1.20878784176585e-06, + "loss": 0.2741, + "step": 29142 + }, + { + "epoch": 1.3652035414812387, + "grad_norm": 0.6196011560803254, + "learning_rate": 1.2086254487565365e-06, + "loss": 0.2844, + "step": 29143 + }, + { + "epoch": 1.365250386471167, + "grad_norm": 0.5590903098193261, + "learning_rate": 1.2084630631787582e-06, + "loss": 0.2526, + "step": 29144 + }, + { + "epoch": 1.3652972314610952, + "grad_norm": 0.5722076854262839, + "learning_rate": 1.2083006850334504e-06, + "loss": 0.2679, + "step": 29145 + }, + { + "epoch": 1.3653440764510236, + "grad_norm": 0.5801707929596815, + "learning_rate": 1.2081383143215472e-06, + "loss": 0.2571, + "step": 29146 + }, + { + "epoch": 1.3653909214409519, + "grad_norm": 0.6527592929264646, + "learning_rate": 1.2079759510439835e-06, + "loss": 0.296, + "step": 29147 + }, + { + "epoch": 1.36543776643088, + "grad_norm": 0.5984359527513707, + "learning_rate": 1.2078135952016944e-06, + "loss": 0.2635, + "step": 29148 + }, + { + "epoch": 1.3654846114208086, + "grad_norm": 0.586702084689599, + "learning_rate": 1.2076512467956135e-06, + "loss": 0.2672, + "step": 29149 + }, + { + "epoch": 1.3655314564107368, + "grad_norm": 0.587893664993095, + "learning_rate": 1.2074889058266736e-06, + "loss": 0.2674, + "step": 29150 + }, + { + "epoch": 1.3655783014006653, + "grad_norm": 0.603355463873995, + "learning_rate": 1.2073265722958103e-06, + "loss": 0.2663, + "step": 29151 + }, + { + "epoch": 1.3656251463905935, + "grad_norm": 0.6247273642514745, + "learning_rate": 1.2071642462039577e-06, + "loss": 0.2807, + "step": 29152 + }, + { + "epoch": 1.365671991380522, + "grad_norm": 0.5969199650316911, + "learning_rate": 1.2070019275520499e-06, + "loss": 0.2768, + "step": 29153 + }, + { + "epoch": 1.3657188363704502, + "grad_norm": 0.5489678623828514, + "learning_rate": 1.206839616341022e-06, + "loss": 0.26, + "step": 29154 + }, + { + "epoch": 1.3657656813603785, + "grad_norm": 0.5862947630806383, + "learning_rate": 1.206677312571806e-06, + "loss": 0.257, + "step": 29155 + }, + { + "epoch": 1.365812526350307, + "grad_norm": 0.5845489369400311, + "learning_rate": 1.2065150162453383e-06, + "loss": 0.2741, + "step": 29156 + }, + { + "epoch": 1.3658593713402352, + "grad_norm": 0.552775369609799, + "learning_rate": 1.2063527273625504e-06, + "loss": 0.2542, + "step": 29157 + }, + { + "epoch": 1.3659062163301634, + "grad_norm": 0.5863919068949246, + "learning_rate": 1.206190445924377e-06, + "loss": 0.2816, + "step": 29158 + }, + { + "epoch": 1.3659530613200919, + "grad_norm": 0.6459788061939999, + "learning_rate": 1.206028171931753e-06, + "loss": 0.3033, + "step": 29159 + }, + { + "epoch": 1.36599990631002, + "grad_norm": 0.6134047127739195, + "learning_rate": 1.2058659053856114e-06, + "loss": 0.2924, + "step": 29160 + }, + { + "epoch": 1.3660467512999483, + "grad_norm": 0.6132760887260504, + "learning_rate": 1.2057036462868871e-06, + "loss": 0.2795, + "step": 29161 + }, + { + "epoch": 1.3660935962898768, + "grad_norm": 0.5379270504396045, + "learning_rate": 1.2055413946365132e-06, + "loss": 0.2401, + "step": 29162 + }, + { + "epoch": 1.366140441279805, + "grad_norm": 0.5700643337629421, + "learning_rate": 1.2053791504354223e-06, + "loss": 0.2616, + "step": 29163 + }, + { + "epoch": 1.3661872862697335, + "grad_norm": 0.588059312328345, + "learning_rate": 1.2052169136845487e-06, + "loss": 0.2832, + "step": 29164 + }, + { + "epoch": 1.3662341312596618, + "grad_norm": 0.6047246524414641, + "learning_rate": 1.2050546843848266e-06, + "loss": 0.2612, + "step": 29165 + }, + { + "epoch": 1.3662809762495902, + "grad_norm": 0.6101027019654263, + "learning_rate": 1.2048924625371894e-06, + "loss": 0.2741, + "step": 29166 + }, + { + "epoch": 1.3663278212395185, + "grad_norm": 0.6106364376774922, + "learning_rate": 1.2047302481425712e-06, + "loss": 0.283, + "step": 29167 + }, + { + "epoch": 1.3663746662294467, + "grad_norm": 0.5679439858654054, + "learning_rate": 1.2045680412019042e-06, + "loss": 0.2561, + "step": 29168 + }, + { + "epoch": 1.3664215112193752, + "grad_norm": 0.5834479394515271, + "learning_rate": 1.2044058417161233e-06, + "loss": 0.2742, + "step": 29169 + }, + { + "epoch": 1.3664683562093034, + "grad_norm": 0.6056859632064653, + "learning_rate": 1.2042436496861603e-06, + "loss": 0.277, + "step": 29170 + }, + { + "epoch": 1.3665152011992316, + "grad_norm": 0.5438923244310194, + "learning_rate": 1.2040814651129493e-06, + "loss": 0.2526, + "step": 29171 + }, + { + "epoch": 1.36656204618916, + "grad_norm": 0.6350816701905184, + "learning_rate": 1.2039192879974237e-06, + "loss": 0.2583, + "step": 29172 + }, + { + "epoch": 1.3666088911790883, + "grad_norm": 0.5975743200406733, + "learning_rate": 1.2037571183405182e-06, + "loss": 0.2622, + "step": 29173 + }, + { + "epoch": 1.3666557361690166, + "grad_norm": 0.5639645515958829, + "learning_rate": 1.2035949561431633e-06, + "loss": 0.2577, + "step": 29174 + }, + { + "epoch": 1.366702581158945, + "grad_norm": 0.5755292245709924, + "learning_rate": 1.2034328014062938e-06, + "loss": 0.2662, + "step": 29175 + }, + { + "epoch": 1.3667494261488735, + "grad_norm": 0.6097813760634664, + "learning_rate": 1.2032706541308436e-06, + "loss": 0.2731, + "step": 29176 + }, + { + "epoch": 1.3667962711388018, + "grad_norm": 0.637896142295885, + "learning_rate": 1.2031085143177438e-06, + "loss": 0.2763, + "step": 29177 + }, + { + "epoch": 1.36684311612873, + "grad_norm": 0.6574633364232328, + "learning_rate": 1.2029463819679284e-06, + "loss": 0.2899, + "step": 29178 + }, + { + "epoch": 1.3668899611186585, + "grad_norm": 0.5973014687059663, + "learning_rate": 1.202784257082332e-06, + "loss": 0.2801, + "step": 29179 + }, + { + "epoch": 1.3669368061085867, + "grad_norm": 0.6255234632235842, + "learning_rate": 1.2026221396618848e-06, + "loss": 0.2902, + "step": 29180 + }, + { + "epoch": 1.366983651098515, + "grad_norm": 0.6233653331522421, + "learning_rate": 1.2024600297075212e-06, + "loss": 0.2784, + "step": 29181 + }, + { + "epoch": 1.3670304960884434, + "grad_norm": 0.5735247193029687, + "learning_rate": 1.202297927220174e-06, + "loss": 0.2757, + "step": 29182 + }, + { + "epoch": 1.3670773410783716, + "grad_norm": 0.6195646832915294, + "learning_rate": 1.202135832200777e-06, + "loss": 0.2931, + "step": 29183 + }, + { + "epoch": 1.3671241860682999, + "grad_norm": 0.572104001603703, + "learning_rate": 1.2019737446502614e-06, + "loss": 0.2525, + "step": 29184 + }, + { + "epoch": 1.3671710310582283, + "grad_norm": 0.5564876246607862, + "learning_rate": 1.2018116645695613e-06, + "loss": 0.268, + "step": 29185 + }, + { + "epoch": 1.3672178760481566, + "grad_norm": 0.5978505740109903, + "learning_rate": 1.2016495919596077e-06, + "loss": 0.2701, + "step": 29186 + }, + { + "epoch": 1.367264721038085, + "grad_norm": 0.5707323720199817, + "learning_rate": 1.2014875268213347e-06, + "loss": 0.2794, + "step": 29187 + }, + { + "epoch": 1.3673115660280133, + "grad_norm": 0.6237888115163098, + "learning_rate": 1.2013254691556745e-06, + "loss": 0.28, + "step": 29188 + }, + { + "epoch": 1.3673584110179418, + "grad_norm": 0.592238738909225, + "learning_rate": 1.2011634189635598e-06, + "loss": 0.2686, + "step": 29189 + }, + { + "epoch": 1.36740525600787, + "grad_norm": 0.6177727020253391, + "learning_rate": 1.201001376245924e-06, + "loss": 0.2834, + "step": 29190 + }, + { + "epoch": 1.3674521009977982, + "grad_norm": 0.6152657733045168, + "learning_rate": 1.200839341003698e-06, + "loss": 0.2765, + "step": 29191 + }, + { + "epoch": 1.3674989459877267, + "grad_norm": 0.5943503003617462, + "learning_rate": 1.2006773132378161e-06, + "loss": 0.2894, + "step": 29192 + }, + { + "epoch": 1.367545790977655, + "grad_norm": 0.5935268075109127, + "learning_rate": 1.2005152929492086e-06, + "loss": 0.2716, + "step": 29193 + }, + { + "epoch": 1.3675926359675832, + "grad_norm": 0.571745631279145, + "learning_rate": 1.2003532801388091e-06, + "loss": 0.2615, + "step": 29194 + }, + { + "epoch": 1.3676394809575116, + "grad_norm": 0.5950754380977566, + "learning_rate": 1.2001912748075498e-06, + "loss": 0.2597, + "step": 29195 + }, + { + "epoch": 1.3676863259474399, + "grad_norm": 0.5854652545458963, + "learning_rate": 1.2000292769563638e-06, + "loss": 0.2767, + "step": 29196 + }, + { + "epoch": 1.3677331709373681, + "grad_norm": 0.617777660219621, + "learning_rate": 1.1998672865861816e-06, + "loss": 0.2897, + "step": 29197 + }, + { + "epoch": 1.3677800159272966, + "grad_norm": 0.5757786821678755, + "learning_rate": 1.1997053036979376e-06, + "loss": 0.2837, + "step": 29198 + }, + { + "epoch": 1.3678268609172248, + "grad_norm": 0.5773195055668772, + "learning_rate": 1.1995433282925617e-06, + "loss": 0.2569, + "step": 29199 + }, + { + "epoch": 1.3678737059071533, + "grad_norm": 0.6075752291008638, + "learning_rate": 1.199381360370987e-06, + "loss": 0.2542, + "step": 29200 + }, + { + "epoch": 1.3679205508970815, + "grad_norm": 0.6255233941616745, + "learning_rate": 1.1992193999341457e-06, + "loss": 0.278, + "step": 29201 + }, + { + "epoch": 1.36796739588701, + "grad_norm": 0.6299036301913847, + "learning_rate": 1.1990574469829698e-06, + "loss": 0.2699, + "step": 29202 + }, + { + "epoch": 1.3680142408769382, + "grad_norm": 0.5874549213202673, + "learning_rate": 1.1988955015183924e-06, + "loss": 0.2689, + "step": 29203 + }, + { + "epoch": 1.3680610858668665, + "grad_norm": 0.6288550387665818, + "learning_rate": 1.1987335635413434e-06, + "loss": 0.2838, + "step": 29204 + }, + { + "epoch": 1.368107930856795, + "grad_norm": 0.5944048849709795, + "learning_rate": 1.1985716330527563e-06, + "loss": 0.2643, + "step": 29205 + }, + { + "epoch": 1.3681547758467232, + "grad_norm": 0.6212572532415427, + "learning_rate": 1.1984097100535616e-06, + "loss": 0.2775, + "step": 29206 + }, + { + "epoch": 1.3682016208366514, + "grad_norm": 0.63445845843183, + "learning_rate": 1.1982477945446921e-06, + "loss": 0.2802, + "step": 29207 + }, + { + "epoch": 1.3682484658265799, + "grad_norm": 0.6025434774547863, + "learning_rate": 1.1980858865270792e-06, + "loss": 0.2769, + "step": 29208 + }, + { + "epoch": 1.3682953108165081, + "grad_norm": 0.6256084829492662, + "learning_rate": 1.1979239860016549e-06, + "loss": 0.281, + "step": 29209 + }, + { + "epoch": 1.3683421558064364, + "grad_norm": 0.6324585700456072, + "learning_rate": 1.1977620929693514e-06, + "loss": 0.2821, + "step": 29210 + }, + { + "epoch": 1.3683890007963648, + "grad_norm": 0.5925292922277002, + "learning_rate": 1.1976002074311e-06, + "loss": 0.2773, + "step": 29211 + }, + { + "epoch": 1.3684358457862933, + "grad_norm": 0.5669562278700194, + "learning_rate": 1.1974383293878312e-06, + "loss": 0.2717, + "step": 29212 + }, + { + "epoch": 1.3684826907762215, + "grad_norm": 0.5987289619254369, + "learning_rate": 1.1972764588404772e-06, + "loss": 0.2793, + "step": 29213 + }, + { + "epoch": 1.3685295357661498, + "grad_norm": 0.571778199613023, + "learning_rate": 1.1971145957899699e-06, + "loss": 0.2613, + "step": 29214 + }, + { + "epoch": 1.3685763807560782, + "grad_norm": 0.6632335756592628, + "learning_rate": 1.1969527402372406e-06, + "loss": 0.3025, + "step": 29215 + }, + { + "epoch": 1.3686232257460065, + "grad_norm": 0.5908232149938345, + "learning_rate": 1.1967908921832216e-06, + "loss": 0.2613, + "step": 29216 + }, + { + "epoch": 1.3686700707359347, + "grad_norm": 0.5999457480683633, + "learning_rate": 1.196629051628843e-06, + "loss": 0.2704, + "step": 29217 + }, + { + "epoch": 1.3687169157258632, + "grad_norm": 0.5395364223727488, + "learning_rate": 1.196467218575037e-06, + "loss": 0.2424, + "step": 29218 + }, + { + "epoch": 1.3687637607157914, + "grad_norm": 0.6000318618052557, + "learning_rate": 1.1963053930227336e-06, + "loss": 0.2617, + "step": 29219 + }, + { + "epoch": 1.3688106057057197, + "grad_norm": 0.5856698896720611, + "learning_rate": 1.1961435749728654e-06, + "loss": 0.2549, + "step": 29220 + }, + { + "epoch": 1.3688574506956481, + "grad_norm": 0.5936649023469431, + "learning_rate": 1.195981764426363e-06, + "loss": 0.2494, + "step": 29221 + }, + { + "epoch": 1.3689042956855764, + "grad_norm": 0.5585497162842246, + "learning_rate": 1.1958199613841586e-06, + "loss": 0.2531, + "step": 29222 + }, + { + "epoch": 1.3689511406755048, + "grad_norm": 0.5823284809466782, + "learning_rate": 1.1956581658471816e-06, + "loss": 0.2845, + "step": 29223 + }, + { + "epoch": 1.368997985665433, + "grad_norm": 0.5668530566223402, + "learning_rate": 1.1954963778163641e-06, + "loss": 0.2541, + "step": 29224 + }, + { + "epoch": 1.3690448306553615, + "grad_norm": 0.6275302687992305, + "learning_rate": 1.1953345972926383e-06, + "loss": 0.2876, + "step": 29225 + }, + { + "epoch": 1.3690916756452898, + "grad_norm": 0.5682653555502732, + "learning_rate": 1.1951728242769329e-06, + "loss": 0.2645, + "step": 29226 + }, + { + "epoch": 1.369138520635218, + "grad_norm": 0.5766492880157145, + "learning_rate": 1.1950110587701796e-06, + "loss": 0.2701, + "step": 29227 + }, + { + "epoch": 1.3691853656251465, + "grad_norm": 0.5870746486081769, + "learning_rate": 1.1948493007733109e-06, + "loss": 0.2648, + "step": 29228 + }, + { + "epoch": 1.3692322106150747, + "grad_norm": 0.5778338625337012, + "learning_rate": 1.1946875502872555e-06, + "loss": 0.2715, + "step": 29229 + }, + { + "epoch": 1.369279055605003, + "grad_norm": 0.5769364132627044, + "learning_rate": 1.194525807312945e-06, + "loss": 0.2658, + "step": 29230 + }, + { + "epoch": 1.3693259005949314, + "grad_norm": 0.6468871094251418, + "learning_rate": 1.1943640718513106e-06, + "loss": 0.2745, + "step": 29231 + }, + { + "epoch": 1.3693727455848597, + "grad_norm": 0.5823423314574949, + "learning_rate": 1.1942023439032834e-06, + "loss": 0.2779, + "step": 29232 + }, + { + "epoch": 1.369419590574788, + "grad_norm": 0.6062954534922744, + "learning_rate": 1.1940406234697928e-06, + "loss": 0.2928, + "step": 29233 + }, + { + "epoch": 1.3694664355647164, + "grad_norm": 0.5735776335799389, + "learning_rate": 1.193878910551771e-06, + "loss": 0.2445, + "step": 29234 + }, + { + "epoch": 1.3695132805546446, + "grad_norm": 0.5741354803290403, + "learning_rate": 1.193717205150147e-06, + "loss": 0.2664, + "step": 29235 + }, + { + "epoch": 1.369560125544573, + "grad_norm": 0.6027050016473662, + "learning_rate": 1.1935555072658523e-06, + "loss": 0.2788, + "step": 29236 + }, + { + "epoch": 1.3696069705345013, + "grad_norm": 0.6337970474431951, + "learning_rate": 1.193393816899817e-06, + "loss": 0.2779, + "step": 29237 + }, + { + "epoch": 1.3696538155244298, + "grad_norm": 0.6148930000335261, + "learning_rate": 1.1932321340529717e-06, + "loss": 0.2742, + "step": 29238 + }, + { + "epoch": 1.369700660514358, + "grad_norm": 0.580470806551401, + "learning_rate": 1.1930704587262484e-06, + "loss": 0.2617, + "step": 29239 + }, + { + "epoch": 1.3697475055042863, + "grad_norm": 0.5486759762237845, + "learning_rate": 1.1929087909205761e-06, + "loss": 0.2621, + "step": 29240 + }, + { + "epoch": 1.3697943504942147, + "grad_norm": 0.5911491207601487, + "learning_rate": 1.192747130636884e-06, + "loss": 0.2642, + "step": 29241 + }, + { + "epoch": 1.369841195484143, + "grad_norm": 0.6258137047178862, + "learning_rate": 1.1925854778761037e-06, + "loss": 0.2757, + "step": 29242 + }, + { + "epoch": 1.3698880404740712, + "grad_norm": 0.5645197521300317, + "learning_rate": 1.1924238326391654e-06, + "loss": 0.2782, + "step": 29243 + }, + { + "epoch": 1.3699348854639997, + "grad_norm": 0.593771599207421, + "learning_rate": 1.192262194926999e-06, + "loss": 0.2817, + "step": 29244 + }, + { + "epoch": 1.369981730453928, + "grad_norm": 0.6121258561029403, + "learning_rate": 1.1921005647405365e-06, + "loss": 0.2901, + "step": 29245 + }, + { + "epoch": 1.3700285754438561, + "grad_norm": 0.5886111141996944, + "learning_rate": 1.191938942080705e-06, + "loss": 0.2791, + "step": 29246 + }, + { + "epoch": 1.3700754204337846, + "grad_norm": 0.5608583819285513, + "learning_rate": 1.1917773269484375e-06, + "loss": 0.2592, + "step": 29247 + }, + { + "epoch": 1.370122265423713, + "grad_norm": 0.6045781300265833, + "learning_rate": 1.1916157193446614e-06, + "loss": 0.2692, + "step": 29248 + }, + { + "epoch": 1.3701691104136413, + "grad_norm": 0.5567409132726102, + "learning_rate": 1.1914541192703084e-06, + "loss": 0.2643, + "step": 29249 + }, + { + "epoch": 1.3702159554035696, + "grad_norm": 0.5607786359102626, + "learning_rate": 1.1912925267263076e-06, + "loss": 0.2623, + "step": 29250 + }, + { + "epoch": 1.370262800393498, + "grad_norm": 0.5951319948453639, + "learning_rate": 1.1911309417135897e-06, + "loss": 0.2704, + "step": 29251 + }, + { + "epoch": 1.3703096453834263, + "grad_norm": 0.6713172612498842, + "learning_rate": 1.190969364233085e-06, + "loss": 0.2861, + "step": 29252 + }, + { + "epoch": 1.3703564903733545, + "grad_norm": 0.5668870430881304, + "learning_rate": 1.1908077942857226e-06, + "loss": 0.2661, + "step": 29253 + }, + { + "epoch": 1.370403335363283, + "grad_norm": 0.6161058745054301, + "learning_rate": 1.1906462318724316e-06, + "loss": 0.2854, + "step": 29254 + }, + { + "epoch": 1.3704501803532112, + "grad_norm": 0.6127725775841755, + "learning_rate": 1.1904846769941422e-06, + "loss": 0.2931, + "step": 29255 + }, + { + "epoch": 1.3704970253431394, + "grad_norm": 0.5824486740661622, + "learning_rate": 1.1903231296517844e-06, + "loss": 0.2665, + "step": 29256 + }, + { + "epoch": 1.370543870333068, + "grad_norm": 0.5638850826276346, + "learning_rate": 1.190161589846288e-06, + "loss": 0.2746, + "step": 29257 + }, + { + "epoch": 1.3705907153229961, + "grad_norm": 0.6069733473772663, + "learning_rate": 1.1900000575785819e-06, + "loss": 0.2691, + "step": 29258 + }, + { + "epoch": 1.3706375603129246, + "grad_norm": 0.626384561656022, + "learning_rate": 1.1898385328495976e-06, + "loss": 0.2995, + "step": 29259 + }, + { + "epoch": 1.3706844053028528, + "grad_norm": 0.6484809043999896, + "learning_rate": 1.1896770156602632e-06, + "loss": 0.2866, + "step": 29260 + }, + { + "epoch": 1.3707312502927813, + "grad_norm": 0.607090713937024, + "learning_rate": 1.1895155060115073e-06, + "loss": 0.2726, + "step": 29261 + }, + { + "epoch": 1.3707780952827096, + "grad_norm": 0.5666580886435928, + "learning_rate": 1.1893540039042603e-06, + "loss": 0.2773, + "step": 29262 + }, + { + "epoch": 1.3708249402726378, + "grad_norm": 0.6260644966364622, + "learning_rate": 1.1891925093394516e-06, + "loss": 0.2791, + "step": 29263 + }, + { + "epoch": 1.3708717852625663, + "grad_norm": 0.5453404268856212, + "learning_rate": 1.1890310223180105e-06, + "loss": 0.2708, + "step": 29264 + }, + { + "epoch": 1.3709186302524945, + "grad_norm": 0.631433021829474, + "learning_rate": 1.1888695428408672e-06, + "loss": 0.2784, + "step": 29265 + }, + { + "epoch": 1.3709654752424227, + "grad_norm": 0.6141110908827953, + "learning_rate": 1.1887080709089494e-06, + "loss": 0.2798, + "step": 29266 + }, + { + "epoch": 1.3710123202323512, + "grad_norm": 0.6154376373456023, + "learning_rate": 1.1885466065231877e-06, + "loss": 0.2765, + "step": 29267 + }, + { + "epoch": 1.3710591652222794, + "grad_norm": 0.5977238286067982, + "learning_rate": 1.18838514968451e-06, + "loss": 0.2708, + "step": 29268 + }, + { + "epoch": 1.3711060102122077, + "grad_norm": 0.6020709348771156, + "learning_rate": 1.188223700393846e-06, + "loss": 0.2623, + "step": 29269 + }, + { + "epoch": 1.3711528552021361, + "grad_norm": 0.6000374019302961, + "learning_rate": 1.1880622586521247e-06, + "loss": 0.2697, + "step": 29270 + }, + { + "epoch": 1.3711997001920644, + "grad_norm": 0.6035897238478782, + "learning_rate": 1.1879008244602766e-06, + "loss": 0.2786, + "step": 29271 + }, + { + "epoch": 1.3712465451819928, + "grad_norm": 0.6323212173254187, + "learning_rate": 1.1877393978192286e-06, + "loss": 0.2637, + "step": 29272 + }, + { + "epoch": 1.371293390171921, + "grad_norm": 0.611234163800244, + "learning_rate": 1.1875779787299105e-06, + "loss": 0.2936, + "step": 29273 + }, + { + "epoch": 1.3713402351618496, + "grad_norm": 0.6039893873362074, + "learning_rate": 1.187416567193252e-06, + "loss": 0.261, + "step": 29274 + }, + { + "epoch": 1.3713870801517778, + "grad_norm": 0.6041662622568069, + "learning_rate": 1.1872551632101804e-06, + "loss": 0.268, + "step": 29275 + }, + { + "epoch": 1.371433925141706, + "grad_norm": 0.5773005504245241, + "learning_rate": 1.1870937667816254e-06, + "loss": 0.2604, + "step": 29276 + }, + { + "epoch": 1.3714807701316345, + "grad_norm": 0.5930932558227259, + "learning_rate": 1.1869323779085168e-06, + "loss": 0.2661, + "step": 29277 + }, + { + "epoch": 1.3715276151215627, + "grad_norm": 0.591171530878943, + "learning_rate": 1.186770996591781e-06, + "loss": 0.288, + "step": 29278 + }, + { + "epoch": 1.371574460111491, + "grad_norm": 0.6027480214238115, + "learning_rate": 1.1866096228323487e-06, + "loss": 0.255, + "step": 29279 + }, + { + "epoch": 1.3716213051014194, + "grad_norm": 0.5877128619796048, + "learning_rate": 1.1864482566311474e-06, + "loss": 0.2763, + "step": 29280 + }, + { + "epoch": 1.3716681500913477, + "grad_norm": 0.6053822726054809, + "learning_rate": 1.1862868979891072e-06, + "loss": 0.2652, + "step": 29281 + }, + { + "epoch": 1.371714995081276, + "grad_norm": 0.5651750493277501, + "learning_rate": 1.1861255469071552e-06, + "loss": 0.2646, + "step": 29282 + }, + { + "epoch": 1.3717618400712044, + "grad_norm": 0.5753534916705002, + "learning_rate": 1.1859642033862212e-06, + "loss": 0.2691, + "step": 29283 + }, + { + "epoch": 1.3718086850611328, + "grad_norm": 0.6047666033169716, + "learning_rate": 1.1858028674272318e-06, + "loss": 0.2722, + "step": 29284 + }, + { + "epoch": 1.371855530051061, + "grad_norm": 0.5647830969714991, + "learning_rate": 1.1856415390311168e-06, + "loss": 0.2638, + "step": 29285 + }, + { + "epoch": 1.3719023750409893, + "grad_norm": 0.6348107899299443, + "learning_rate": 1.1854802181988043e-06, + "loss": 0.2795, + "step": 29286 + }, + { + "epoch": 1.3719492200309178, + "grad_norm": 0.5575642021417377, + "learning_rate": 1.1853189049312228e-06, + "loss": 0.2512, + "step": 29287 + }, + { + "epoch": 1.371996065020846, + "grad_norm": 0.6018803320152889, + "learning_rate": 1.1851575992293013e-06, + "loss": 0.2779, + "step": 29288 + }, + { + "epoch": 1.3720429100107743, + "grad_norm": 0.5954355950673048, + "learning_rate": 1.1849963010939674e-06, + "loss": 0.2729, + "step": 29289 + }, + { + "epoch": 1.3720897550007027, + "grad_norm": 0.6456822752911822, + "learning_rate": 1.1848350105261485e-06, + "loss": 0.2862, + "step": 29290 + }, + { + "epoch": 1.372136599990631, + "grad_norm": 0.6203714084998593, + "learning_rate": 1.1846737275267735e-06, + "loss": 0.2759, + "step": 29291 + }, + { + "epoch": 1.3721834449805592, + "grad_norm": 0.5665354793979802, + "learning_rate": 1.1845124520967707e-06, + "loss": 0.2681, + "step": 29292 + }, + { + "epoch": 1.3722302899704877, + "grad_norm": 0.6197659859022252, + "learning_rate": 1.1843511842370678e-06, + "loss": 0.287, + "step": 29293 + }, + { + "epoch": 1.372277134960416, + "grad_norm": 0.608065602479706, + "learning_rate": 1.1841899239485943e-06, + "loss": 0.2818, + "step": 29294 + }, + { + "epoch": 1.3723239799503444, + "grad_norm": 0.6430537465370427, + "learning_rate": 1.1840286712322763e-06, + "loss": 0.2915, + "step": 29295 + }, + { + "epoch": 1.3723708249402726, + "grad_norm": 0.60849778886861, + "learning_rate": 1.183867426089043e-06, + "loss": 0.2949, + "step": 29296 + }, + { + "epoch": 1.372417669930201, + "grad_norm": 0.598451882598282, + "learning_rate": 1.1837061885198214e-06, + "loss": 0.2784, + "step": 29297 + }, + { + "epoch": 1.3724645149201293, + "grad_norm": 0.6236065191384246, + "learning_rate": 1.18354495852554e-06, + "loss": 0.2828, + "step": 29298 + }, + { + "epoch": 1.3725113599100576, + "grad_norm": 0.5513186021873078, + "learning_rate": 1.183383736107126e-06, + "loss": 0.2682, + "step": 29299 + }, + { + "epoch": 1.372558204899986, + "grad_norm": 0.5802987967502945, + "learning_rate": 1.1832225212655078e-06, + "loss": 0.2704, + "step": 29300 + }, + { + "epoch": 1.3726050498899143, + "grad_norm": 0.5925604827432129, + "learning_rate": 1.183061314001614e-06, + "loss": 0.2724, + "step": 29301 + }, + { + "epoch": 1.3726518948798425, + "grad_norm": 0.5643042462548724, + "learning_rate": 1.1829001143163712e-06, + "loss": 0.2824, + "step": 29302 + }, + { + "epoch": 1.372698739869771, + "grad_norm": 0.5852822844814718, + "learning_rate": 1.1827389222107067e-06, + "loss": 0.2716, + "step": 29303 + }, + { + "epoch": 1.3727455848596992, + "grad_norm": 0.6040112548530201, + "learning_rate": 1.1825777376855482e-06, + "loss": 0.2801, + "step": 29304 + }, + { + "epoch": 1.3727924298496275, + "grad_norm": 0.5947903143673904, + "learning_rate": 1.1824165607418239e-06, + "loss": 0.2627, + "step": 29305 + }, + { + "epoch": 1.372839274839556, + "grad_norm": 0.589811322120179, + "learning_rate": 1.1822553913804611e-06, + "loss": 0.2727, + "step": 29306 + }, + { + "epoch": 1.3728861198294842, + "grad_norm": 0.5901125299687683, + "learning_rate": 1.1820942296023882e-06, + "loss": 0.2728, + "step": 29307 + }, + { + "epoch": 1.3729329648194126, + "grad_norm": 0.5316550826632355, + "learning_rate": 1.1819330754085309e-06, + "loss": 0.2498, + "step": 29308 + }, + { + "epoch": 1.3729798098093409, + "grad_norm": 0.6408677297317822, + "learning_rate": 1.1817719287998184e-06, + "loss": 0.2696, + "step": 29309 + }, + { + "epoch": 1.3730266547992693, + "grad_norm": 0.5643542037686695, + "learning_rate": 1.1816107897771761e-06, + "loss": 0.2502, + "step": 29310 + }, + { + "epoch": 1.3730734997891976, + "grad_norm": 0.5547610936630362, + "learning_rate": 1.1814496583415321e-06, + "loss": 0.2585, + "step": 29311 + }, + { + "epoch": 1.3731203447791258, + "grad_norm": 0.5685362250160275, + "learning_rate": 1.1812885344938144e-06, + "loss": 0.2728, + "step": 29312 + }, + { + "epoch": 1.3731671897690543, + "grad_norm": 0.5826495201876644, + "learning_rate": 1.1811274182349506e-06, + "loss": 0.2685, + "step": 29313 + }, + { + "epoch": 1.3732140347589825, + "grad_norm": 0.5865099846187259, + "learning_rate": 1.1809663095658657e-06, + "loss": 0.2775, + "step": 29314 + }, + { + "epoch": 1.3732608797489108, + "grad_norm": 0.6166122320470756, + "learning_rate": 1.1808052084874885e-06, + "loss": 0.2761, + "step": 29315 + }, + { + "epoch": 1.3733077247388392, + "grad_norm": 0.5756999114885638, + "learning_rate": 1.1806441150007467e-06, + "loss": 0.2694, + "step": 29316 + }, + { + "epoch": 1.3733545697287675, + "grad_norm": 0.5791983286307288, + "learning_rate": 1.1804830291065653e-06, + "loss": 0.2844, + "step": 29317 + }, + { + "epoch": 1.3734014147186957, + "grad_norm": 0.6484926843818062, + "learning_rate": 1.1803219508058724e-06, + "loss": 0.2709, + "step": 29318 + }, + { + "epoch": 1.3734482597086242, + "grad_norm": 0.61268019049991, + "learning_rate": 1.1801608800995962e-06, + "loss": 0.2771, + "step": 29319 + }, + { + "epoch": 1.3734951046985526, + "grad_norm": 0.6246987488607064, + "learning_rate": 1.1799998169886612e-06, + "loss": 0.2631, + "step": 29320 + }, + { + "epoch": 1.3735419496884809, + "grad_norm": 0.5943288036475941, + "learning_rate": 1.1798387614739956e-06, + "loss": 0.2697, + "step": 29321 + }, + { + "epoch": 1.373588794678409, + "grad_norm": 0.5493031259370326, + "learning_rate": 1.1796777135565263e-06, + "loss": 0.2647, + "step": 29322 + }, + { + "epoch": 1.3736356396683376, + "grad_norm": 0.5642841608173951, + "learning_rate": 1.1795166732371804e-06, + "loss": 0.2722, + "step": 29323 + }, + { + "epoch": 1.3736824846582658, + "grad_norm": 0.5504818253530753, + "learning_rate": 1.1793556405168835e-06, + "loss": 0.2635, + "step": 29324 + }, + { + "epoch": 1.373729329648194, + "grad_norm": 0.600346374937279, + "learning_rate": 1.179194615396563e-06, + "loss": 0.2821, + "step": 29325 + }, + { + "epoch": 1.3737761746381225, + "grad_norm": 0.5914306784548526, + "learning_rate": 1.1790335978771463e-06, + "loss": 0.2678, + "step": 29326 + }, + { + "epoch": 1.3738230196280508, + "grad_norm": 0.5698141876958615, + "learning_rate": 1.1788725879595583e-06, + "loss": 0.2718, + "step": 29327 + }, + { + "epoch": 1.373869864617979, + "grad_norm": 0.6381375630844465, + "learning_rate": 1.1787115856447268e-06, + "loss": 0.2939, + "step": 29328 + }, + { + "epoch": 1.3739167096079075, + "grad_norm": 0.5875069147805569, + "learning_rate": 1.1785505909335776e-06, + "loss": 0.2665, + "step": 29329 + }, + { + "epoch": 1.3739635545978357, + "grad_norm": 0.5845403958031943, + "learning_rate": 1.178389603827039e-06, + "loss": 0.2788, + "step": 29330 + }, + { + "epoch": 1.3740103995877642, + "grad_norm": 0.6301350366874529, + "learning_rate": 1.1782286243260347e-06, + "loss": 0.2711, + "step": 29331 + }, + { + "epoch": 1.3740572445776924, + "grad_norm": 0.5827003085638196, + "learning_rate": 1.1780676524314936e-06, + "loss": 0.2615, + "step": 29332 + }, + { + "epoch": 1.3741040895676209, + "grad_norm": 0.5702003718248125, + "learning_rate": 1.1779066881443398e-06, + "loss": 0.2575, + "step": 29333 + }, + { + "epoch": 1.374150934557549, + "grad_norm": 0.6109957409754826, + "learning_rate": 1.177745731465501e-06, + "loss": 0.2901, + "step": 29334 + }, + { + "epoch": 1.3741977795474773, + "grad_norm": 0.6166386008519354, + "learning_rate": 1.177584782395903e-06, + "loss": 0.287, + "step": 29335 + }, + { + "epoch": 1.3742446245374058, + "grad_norm": 0.5993780125540096, + "learning_rate": 1.1774238409364733e-06, + "loss": 0.2838, + "step": 29336 + }, + { + "epoch": 1.374291469527334, + "grad_norm": 0.6230579095128322, + "learning_rate": 1.1772629070881358e-06, + "loss": 0.2723, + "step": 29337 + }, + { + "epoch": 1.3743383145172623, + "grad_norm": 0.6067331446318801, + "learning_rate": 1.1771019808518188e-06, + "loss": 0.2761, + "step": 29338 + }, + { + "epoch": 1.3743851595071908, + "grad_norm": 0.581517824932715, + "learning_rate": 1.1769410622284467e-06, + "loss": 0.2681, + "step": 29339 + }, + { + "epoch": 1.374432004497119, + "grad_norm": 0.5586776479667236, + "learning_rate": 1.176780151218946e-06, + "loss": 0.27, + "step": 29340 + }, + { + "epoch": 1.3744788494870472, + "grad_norm": 0.5635221888322709, + "learning_rate": 1.176619247824243e-06, + "loss": 0.2576, + "step": 29341 + }, + { + "epoch": 1.3745256944769757, + "grad_norm": 0.5813429339296078, + "learning_rate": 1.1764583520452635e-06, + "loss": 0.2757, + "step": 29342 + }, + { + "epoch": 1.374572539466904, + "grad_norm": 0.6472162196465502, + "learning_rate": 1.1762974638829347e-06, + "loss": 0.2623, + "step": 29343 + }, + { + "epoch": 1.3746193844568324, + "grad_norm": 0.6221410150339144, + "learning_rate": 1.1761365833381801e-06, + "loss": 0.2764, + "step": 29344 + }, + { + "epoch": 1.3746662294467606, + "grad_norm": 0.6320345581197626, + "learning_rate": 1.175975710411928e-06, + "loss": 0.2791, + "step": 29345 + }, + { + "epoch": 1.374713074436689, + "grad_norm": 0.6354656639127472, + "learning_rate": 1.1758148451051015e-06, + "loss": 0.2786, + "step": 29346 + }, + { + "epoch": 1.3747599194266173, + "grad_norm": 0.5810442771229986, + "learning_rate": 1.1756539874186281e-06, + "loss": 0.2685, + "step": 29347 + }, + { + "epoch": 1.3748067644165456, + "grad_norm": 0.5563850963224471, + "learning_rate": 1.1754931373534332e-06, + "loss": 0.2589, + "step": 29348 + }, + { + "epoch": 1.374853609406474, + "grad_norm": 0.6090682619156168, + "learning_rate": 1.175332294910442e-06, + "loss": 0.2721, + "step": 29349 + }, + { + "epoch": 1.3749004543964023, + "grad_norm": 0.6434830111990949, + "learning_rate": 1.175171460090582e-06, + "loss": 0.2858, + "step": 29350 + }, + { + "epoch": 1.3749472993863305, + "grad_norm": 0.6004871198657262, + "learning_rate": 1.1750106328947768e-06, + "loss": 0.268, + "step": 29351 + }, + { + "epoch": 1.374994144376259, + "grad_norm": 0.6039659019647032, + "learning_rate": 1.1748498133239517e-06, + "loss": 0.2698, + "step": 29352 + }, + { + "epoch": 1.3750409893661872, + "grad_norm": 0.5993171153666725, + "learning_rate": 1.1746890013790326e-06, + "loss": 0.2669, + "step": 29353 + }, + { + "epoch": 1.3750878343561155, + "grad_norm": 0.573300462540321, + "learning_rate": 1.1745281970609453e-06, + "loss": 0.2694, + "step": 29354 + }, + { + "epoch": 1.375134679346044, + "grad_norm": 0.5785446506319352, + "learning_rate": 1.174367400370615e-06, + "loss": 0.2717, + "step": 29355 + }, + { + "epoch": 1.3751815243359724, + "grad_norm": 0.6044341995152898, + "learning_rate": 1.1742066113089685e-06, + "loss": 0.2928, + "step": 29356 + }, + { + "epoch": 1.3752283693259006, + "grad_norm": 0.5881324959403825, + "learning_rate": 1.1740458298769284e-06, + "loss": 0.2611, + "step": 29357 + }, + { + "epoch": 1.3752752143158289, + "grad_norm": 0.6120148004834457, + "learning_rate": 1.1738850560754222e-06, + "loss": 0.2757, + "step": 29358 + }, + { + "epoch": 1.3753220593057573, + "grad_norm": 0.575268432396064, + "learning_rate": 1.1737242899053736e-06, + "loss": 0.2667, + "step": 29359 + }, + { + "epoch": 1.3753689042956856, + "grad_norm": 0.609406915638802, + "learning_rate": 1.173563531367708e-06, + "loss": 0.2728, + "step": 29360 + }, + { + "epoch": 1.3754157492856138, + "grad_norm": 0.6360028652554438, + "learning_rate": 1.1734027804633511e-06, + "loss": 0.2801, + "step": 29361 + }, + { + "epoch": 1.3754625942755423, + "grad_norm": 0.6237511938792812, + "learning_rate": 1.1732420371932285e-06, + "loss": 0.2823, + "step": 29362 + }, + { + "epoch": 1.3755094392654705, + "grad_norm": 0.6237402616424504, + "learning_rate": 1.1730813015582636e-06, + "loss": 0.2853, + "step": 29363 + }, + { + "epoch": 1.3755562842553988, + "grad_norm": 0.6302228655384234, + "learning_rate": 1.1729205735593827e-06, + "loss": 0.3006, + "step": 29364 + }, + { + "epoch": 1.3756031292453272, + "grad_norm": 0.5649593506843701, + "learning_rate": 1.172759853197511e-06, + "loss": 0.2714, + "step": 29365 + }, + { + "epoch": 1.3756499742352555, + "grad_norm": 0.6100686995403848, + "learning_rate": 1.1725991404735717e-06, + "loss": 0.3036, + "step": 29366 + }, + { + "epoch": 1.375696819225184, + "grad_norm": 0.6082882005696599, + "learning_rate": 1.1724384353884908e-06, + "loss": 0.2865, + "step": 29367 + }, + { + "epoch": 1.3757436642151122, + "grad_norm": 0.6142531987046997, + "learning_rate": 1.172277737943194e-06, + "loss": 0.2705, + "step": 29368 + }, + { + "epoch": 1.3757905092050406, + "grad_norm": 0.5640071604287866, + "learning_rate": 1.172117048138604e-06, + "loss": 0.2557, + "step": 29369 + }, + { + "epoch": 1.3758373541949689, + "grad_norm": 0.6077290197987556, + "learning_rate": 1.171956365975647e-06, + "loss": 0.2743, + "step": 29370 + }, + { + "epoch": 1.3758841991848971, + "grad_norm": 0.6048539841711054, + "learning_rate": 1.171795691455247e-06, + "loss": 0.2829, + "step": 29371 + }, + { + "epoch": 1.3759310441748256, + "grad_norm": 0.6245711166487027, + "learning_rate": 1.17163502457833e-06, + "loss": 0.2778, + "step": 29372 + }, + { + "epoch": 1.3759778891647538, + "grad_norm": 0.5852624568207437, + "learning_rate": 1.1714743653458188e-06, + "loss": 0.2662, + "step": 29373 + }, + { + "epoch": 1.376024734154682, + "grad_norm": 0.5541936874407468, + "learning_rate": 1.1713137137586395e-06, + "loss": 0.2586, + "step": 29374 + }, + { + "epoch": 1.3760715791446105, + "grad_norm": 0.5529943260656442, + "learning_rate": 1.1711530698177148e-06, + "loss": 0.2516, + "step": 29375 + }, + { + "epoch": 1.3761184241345388, + "grad_norm": 0.5646142153639814, + "learning_rate": 1.1709924335239702e-06, + "loss": 0.2592, + "step": 29376 + }, + { + "epoch": 1.376165269124467, + "grad_norm": 0.6112812613091424, + "learning_rate": 1.1708318048783304e-06, + "loss": 0.2752, + "step": 29377 + }, + { + "epoch": 1.3762121141143955, + "grad_norm": 0.5798162970586184, + "learning_rate": 1.1706711838817192e-06, + "loss": 0.2687, + "step": 29378 + }, + { + "epoch": 1.3762589591043237, + "grad_norm": 0.6012281071101573, + "learning_rate": 1.1705105705350623e-06, + "loss": 0.2751, + "step": 29379 + }, + { + "epoch": 1.3763058040942522, + "grad_norm": 0.5792999641020248, + "learning_rate": 1.1703499648392826e-06, + "loss": 0.2616, + "step": 29380 + }, + { + "epoch": 1.3763526490841804, + "grad_norm": 0.6093827091893271, + "learning_rate": 1.170189366795304e-06, + "loss": 0.2671, + "step": 29381 + }, + { + "epoch": 1.3763994940741089, + "grad_norm": 0.5762890336580926, + "learning_rate": 1.1700287764040517e-06, + "loss": 0.2675, + "step": 29382 + }, + { + "epoch": 1.3764463390640371, + "grad_norm": 0.6117257558716032, + "learning_rate": 1.1698681936664488e-06, + "loss": 0.2768, + "step": 29383 + }, + { + "epoch": 1.3764931840539654, + "grad_norm": 0.554203774863581, + "learning_rate": 1.1697076185834205e-06, + "loss": 0.2545, + "step": 29384 + }, + { + "epoch": 1.3765400290438938, + "grad_norm": 0.5471940373218844, + "learning_rate": 1.1695470511558914e-06, + "loss": 0.2507, + "step": 29385 + }, + { + "epoch": 1.376586874033822, + "grad_norm": 0.591268165676421, + "learning_rate": 1.1693864913847838e-06, + "loss": 0.285, + "step": 29386 + }, + { + "epoch": 1.3766337190237503, + "grad_norm": 0.5595081799428284, + "learning_rate": 1.1692259392710234e-06, + "loss": 0.2623, + "step": 29387 + }, + { + "epoch": 1.3766805640136788, + "grad_norm": 0.5800417068222481, + "learning_rate": 1.1690653948155322e-06, + "loss": 0.29, + "step": 29388 + }, + { + "epoch": 1.376727409003607, + "grad_norm": 0.6090635553162176, + "learning_rate": 1.1689048580192353e-06, + "loss": 0.2836, + "step": 29389 + }, + { + "epoch": 1.3767742539935353, + "grad_norm": 0.5816596320046601, + "learning_rate": 1.168744328883056e-06, + "loss": 0.2696, + "step": 29390 + }, + { + "epoch": 1.3768210989834637, + "grad_norm": 0.5830613122391545, + "learning_rate": 1.168583807407919e-06, + "loss": 0.2658, + "step": 29391 + }, + { + "epoch": 1.3768679439733922, + "grad_norm": 0.5775795412665876, + "learning_rate": 1.1684232935947482e-06, + "loss": 0.2643, + "step": 29392 + }, + { + "epoch": 1.3769147889633204, + "grad_norm": 0.6712647206210313, + "learning_rate": 1.1682627874444667e-06, + "loss": 0.2987, + "step": 29393 + }, + { + "epoch": 1.3769616339532487, + "grad_norm": 0.6072260863325805, + "learning_rate": 1.1681022889579971e-06, + "loss": 0.2651, + "step": 29394 + }, + { + "epoch": 1.3770084789431771, + "grad_norm": 0.5798966574510951, + "learning_rate": 1.1679417981362642e-06, + "loss": 0.2731, + "step": 29395 + }, + { + "epoch": 1.3770553239331054, + "grad_norm": 0.6073253132500539, + "learning_rate": 1.1677813149801914e-06, + "loss": 0.2641, + "step": 29396 + }, + { + "epoch": 1.3771021689230336, + "grad_norm": 0.6147729757841639, + "learning_rate": 1.1676208394907024e-06, + "loss": 0.2773, + "step": 29397 + }, + { + "epoch": 1.377149013912962, + "grad_norm": 0.5962795836300332, + "learning_rate": 1.1674603716687204e-06, + "loss": 0.2839, + "step": 29398 + }, + { + "epoch": 1.3771958589028903, + "grad_norm": 0.5766521653853537, + "learning_rate": 1.1672999115151701e-06, + "loss": 0.2653, + "step": 29399 + }, + { + "epoch": 1.3772427038928186, + "grad_norm": 0.5294406337304405, + "learning_rate": 1.167139459030974e-06, + "loss": 0.2632, + "step": 29400 + }, + { + "epoch": 1.377289548882747, + "grad_norm": 0.6284206184153537, + "learning_rate": 1.166979014217054e-06, + "loss": 0.279, + "step": 29401 + }, + { + "epoch": 1.3773363938726753, + "grad_norm": 0.5738128288845804, + "learning_rate": 1.1668185770743351e-06, + "loss": 0.2709, + "step": 29402 + }, + { + "epoch": 1.3773832388626037, + "grad_norm": 0.6106028203918245, + "learning_rate": 1.16665814760374e-06, + "loss": 0.2908, + "step": 29403 + }, + { + "epoch": 1.377430083852532, + "grad_norm": 0.5645696435454672, + "learning_rate": 1.166497725806192e-06, + "loss": 0.2596, + "step": 29404 + }, + { + "epoch": 1.3774769288424604, + "grad_norm": 0.5835148369222777, + "learning_rate": 1.1663373116826157e-06, + "loss": 0.2692, + "step": 29405 + }, + { + "epoch": 1.3775237738323887, + "grad_norm": 0.5851463102018218, + "learning_rate": 1.166176905233932e-06, + "loss": 0.2605, + "step": 29406 + }, + { + "epoch": 1.377570618822317, + "grad_norm": 0.6352696954666687, + "learning_rate": 1.1660165064610657e-06, + "loss": 0.2735, + "step": 29407 + }, + { + "epoch": 1.3776174638122454, + "grad_norm": 0.5738887300833669, + "learning_rate": 1.165856115364938e-06, + "loss": 0.2691, + "step": 29408 + }, + { + "epoch": 1.3776643088021736, + "grad_norm": 0.5694455577061648, + "learning_rate": 1.1656957319464735e-06, + "loss": 0.2672, + "step": 29409 + }, + { + "epoch": 1.3777111537921019, + "grad_norm": 0.5900005718262634, + "learning_rate": 1.1655353562065943e-06, + "loss": 0.2735, + "step": 29410 + }, + { + "epoch": 1.3777579987820303, + "grad_norm": 0.5778900460247677, + "learning_rate": 1.165374988146225e-06, + "loss": 0.2722, + "step": 29411 + }, + { + "epoch": 1.3778048437719586, + "grad_norm": 0.6156801333814674, + "learning_rate": 1.165214627766286e-06, + "loss": 0.2565, + "step": 29412 + }, + { + "epoch": 1.3778516887618868, + "grad_norm": 0.5952274495095509, + "learning_rate": 1.1650542750677016e-06, + "loss": 0.2635, + "step": 29413 + }, + { + "epoch": 1.3778985337518153, + "grad_norm": 0.5691049922810187, + "learning_rate": 1.164893930051395e-06, + "loss": 0.2689, + "step": 29414 + }, + { + "epoch": 1.3779453787417435, + "grad_norm": 0.5609110491793223, + "learning_rate": 1.1647335927182873e-06, + "loss": 0.2693, + "step": 29415 + }, + { + "epoch": 1.377992223731672, + "grad_norm": 0.5483425649861702, + "learning_rate": 1.1645732630693025e-06, + "loss": 0.2539, + "step": 29416 + }, + { + "epoch": 1.3780390687216002, + "grad_norm": 0.5566837953509298, + "learning_rate": 1.1644129411053638e-06, + "loss": 0.2612, + "step": 29417 + }, + { + "epoch": 1.3780859137115287, + "grad_norm": 0.5637229390642705, + "learning_rate": 1.1642526268273916e-06, + "loss": 0.2657, + "step": 29418 + }, + { + "epoch": 1.378132758701457, + "grad_norm": 0.5627599828184606, + "learning_rate": 1.16409232023631e-06, + "loss": 0.2471, + "step": 29419 + }, + { + "epoch": 1.3781796036913851, + "grad_norm": 0.591690089405638, + "learning_rate": 1.1639320213330415e-06, + "loss": 0.2566, + "step": 29420 + }, + { + "epoch": 1.3782264486813136, + "grad_norm": 0.5919357591303882, + "learning_rate": 1.1637717301185091e-06, + "loss": 0.2722, + "step": 29421 + }, + { + "epoch": 1.3782732936712419, + "grad_norm": 0.6096775939406297, + "learning_rate": 1.1636114465936337e-06, + "loss": 0.2721, + "step": 29422 + }, + { + "epoch": 1.37832013866117, + "grad_norm": 0.6120208674050518, + "learning_rate": 1.1634511707593397e-06, + "loss": 0.2817, + "step": 29423 + }, + { + "epoch": 1.3783669836510986, + "grad_norm": 0.6034016566731673, + "learning_rate": 1.163290902616547e-06, + "loss": 0.2637, + "step": 29424 + }, + { + "epoch": 1.3784138286410268, + "grad_norm": 0.5808850590740062, + "learning_rate": 1.1631306421661795e-06, + "loss": 0.2686, + "step": 29425 + }, + { + "epoch": 1.378460673630955, + "grad_norm": 0.642490832271982, + "learning_rate": 1.162970389409159e-06, + "loss": 0.2771, + "step": 29426 + }, + { + "epoch": 1.3785075186208835, + "grad_norm": 0.6097253017504394, + "learning_rate": 1.162810144346408e-06, + "loss": 0.2812, + "step": 29427 + }, + { + "epoch": 1.378554363610812, + "grad_norm": 0.6264318362530853, + "learning_rate": 1.1626499069788491e-06, + "loss": 0.2868, + "step": 29428 + }, + { + "epoch": 1.3786012086007402, + "grad_norm": 0.5719410680096874, + "learning_rate": 1.162489677307404e-06, + "loss": 0.2585, + "step": 29429 + }, + { + "epoch": 1.3786480535906684, + "grad_norm": 0.6386892528787534, + "learning_rate": 1.162329455332994e-06, + "loss": 0.272, + "step": 29430 + }, + { + "epoch": 1.378694898580597, + "grad_norm": 0.6422426978226213, + "learning_rate": 1.1621692410565413e-06, + "loss": 0.2822, + "step": 29431 + }, + { + "epoch": 1.3787417435705251, + "grad_norm": 0.5828926314301778, + "learning_rate": 1.1620090344789685e-06, + "loss": 0.2711, + "step": 29432 + }, + { + "epoch": 1.3787885885604534, + "grad_norm": 0.6191045044617506, + "learning_rate": 1.1618488356011978e-06, + "loss": 0.2699, + "step": 29433 + }, + { + "epoch": 1.3788354335503819, + "grad_norm": 0.6024165510615062, + "learning_rate": 1.1616886444241512e-06, + "loss": 0.2725, + "step": 29434 + }, + { + "epoch": 1.37888227854031, + "grad_norm": 0.5682303989869942, + "learning_rate": 1.1615284609487495e-06, + "loss": 0.272, + "step": 29435 + }, + { + "epoch": 1.3789291235302383, + "grad_norm": 0.6437814733918497, + "learning_rate": 1.1613682851759159e-06, + "loss": 0.2869, + "step": 29436 + }, + { + "epoch": 1.3789759685201668, + "grad_norm": 0.5725001050287325, + "learning_rate": 1.1612081171065704e-06, + "loss": 0.2515, + "step": 29437 + }, + { + "epoch": 1.379022813510095, + "grad_norm": 0.6007823648584409, + "learning_rate": 1.1610479567416355e-06, + "loss": 0.2807, + "step": 29438 + }, + { + "epoch": 1.3790696585000235, + "grad_norm": 0.6260658023541146, + "learning_rate": 1.1608878040820334e-06, + "loss": 0.2803, + "step": 29439 + }, + { + "epoch": 1.3791165034899517, + "grad_norm": 0.6560406156967784, + "learning_rate": 1.1607276591286853e-06, + "loss": 0.284, + "step": 29440 + }, + { + "epoch": 1.3791633484798802, + "grad_norm": 0.6320151065705318, + "learning_rate": 1.1605675218825139e-06, + "loss": 0.2907, + "step": 29441 + }, + { + "epoch": 1.3792101934698084, + "grad_norm": 0.6257181884271029, + "learning_rate": 1.1604073923444395e-06, + "loss": 0.2852, + "step": 29442 + }, + { + "epoch": 1.3792570384597367, + "grad_norm": 0.6104538686962092, + "learning_rate": 1.160247270515383e-06, + "loss": 0.2896, + "step": 29443 + }, + { + "epoch": 1.3793038834496651, + "grad_norm": 0.5844685251786934, + "learning_rate": 1.1600871563962671e-06, + "loss": 0.2785, + "step": 29444 + }, + { + "epoch": 1.3793507284395934, + "grad_norm": 0.6274416079359278, + "learning_rate": 1.1599270499880126e-06, + "loss": 0.2757, + "step": 29445 + }, + { + "epoch": 1.3793975734295216, + "grad_norm": 0.562715019027914, + "learning_rate": 1.1597669512915409e-06, + "loss": 0.271, + "step": 29446 + }, + { + "epoch": 1.37944441841945, + "grad_norm": 0.6159195908057136, + "learning_rate": 1.1596068603077745e-06, + "loss": 0.2823, + "step": 29447 + }, + { + "epoch": 1.3794912634093783, + "grad_norm": 0.5711602903074904, + "learning_rate": 1.159446777037633e-06, + "loss": 0.2688, + "step": 29448 + }, + { + "epoch": 1.3795381083993066, + "grad_norm": 0.6403560677554881, + "learning_rate": 1.1592867014820395e-06, + "loss": 0.2812, + "step": 29449 + }, + { + "epoch": 1.379584953389235, + "grad_norm": 0.5925619588090175, + "learning_rate": 1.1591266336419127e-06, + "loss": 0.2691, + "step": 29450 + }, + { + "epoch": 1.3796317983791633, + "grad_norm": 0.6030731180367256, + "learning_rate": 1.1589665735181752e-06, + "loss": 0.2674, + "step": 29451 + }, + { + "epoch": 1.3796786433690917, + "grad_norm": 0.5973165416610288, + "learning_rate": 1.158806521111748e-06, + "loss": 0.2663, + "step": 29452 + }, + { + "epoch": 1.37972548835902, + "grad_norm": 0.573708419661372, + "learning_rate": 1.1586464764235534e-06, + "loss": 0.2699, + "step": 29453 + }, + { + "epoch": 1.3797723333489484, + "grad_norm": 0.6113904864972102, + "learning_rate": 1.15848643945451e-06, + "loss": 0.2687, + "step": 29454 + }, + { + "epoch": 1.3798191783388767, + "grad_norm": 0.5625946778023255, + "learning_rate": 1.15832641020554e-06, + "loss": 0.2642, + "step": 29455 + }, + { + "epoch": 1.379866023328805, + "grad_norm": 0.5895144413670006, + "learning_rate": 1.1581663886775652e-06, + "loss": 0.2785, + "step": 29456 + }, + { + "epoch": 1.3799128683187334, + "grad_norm": 0.5545712651729577, + "learning_rate": 1.1580063748715048e-06, + "loss": 0.2822, + "step": 29457 + }, + { + "epoch": 1.3799597133086616, + "grad_norm": 0.6358590416463753, + "learning_rate": 1.1578463687882802e-06, + "loss": 0.2799, + "step": 29458 + }, + { + "epoch": 1.3800065582985899, + "grad_norm": 0.5804218496591477, + "learning_rate": 1.1576863704288124e-06, + "loss": 0.2754, + "step": 29459 + }, + { + "epoch": 1.3800534032885183, + "grad_norm": 0.6400923348188691, + "learning_rate": 1.1575263797940233e-06, + "loss": 0.2857, + "step": 29460 + }, + { + "epoch": 1.3801002482784466, + "grad_norm": 0.5709230316163754, + "learning_rate": 1.1573663968848314e-06, + "loss": 0.2722, + "step": 29461 + }, + { + "epoch": 1.3801470932683748, + "grad_norm": 0.5636810286999766, + "learning_rate": 1.1572064217021586e-06, + "loss": 0.2839, + "step": 29462 + }, + { + "epoch": 1.3801939382583033, + "grad_norm": 0.5915342234477653, + "learning_rate": 1.1570464542469263e-06, + "loss": 0.2754, + "step": 29463 + }, + { + "epoch": 1.3802407832482317, + "grad_norm": 0.6105861095598863, + "learning_rate": 1.1568864945200528e-06, + "loss": 0.2644, + "step": 29464 + }, + { + "epoch": 1.38028762823816, + "grad_norm": 0.5761922929233422, + "learning_rate": 1.1567265425224603e-06, + "loss": 0.2758, + "step": 29465 + }, + { + "epoch": 1.3803344732280882, + "grad_norm": 0.5892831722849764, + "learning_rate": 1.1565665982550698e-06, + "loss": 0.2729, + "step": 29466 + }, + { + "epoch": 1.3803813182180167, + "grad_norm": 0.6823671303469252, + "learning_rate": 1.1564066617187998e-06, + "loss": 0.2749, + "step": 29467 + }, + { + "epoch": 1.380428163207945, + "grad_norm": 0.563823881167297, + "learning_rate": 1.1562467329145715e-06, + "loss": 0.2718, + "step": 29468 + }, + { + "epoch": 1.3804750081978732, + "grad_norm": 0.6430492677356406, + "learning_rate": 1.1560868118433062e-06, + "loss": 0.2758, + "step": 29469 + }, + { + "epoch": 1.3805218531878016, + "grad_norm": 0.5992325716768003, + "learning_rate": 1.1559268985059239e-06, + "loss": 0.2644, + "step": 29470 + }, + { + "epoch": 1.3805686981777299, + "grad_norm": 0.5272168022666679, + "learning_rate": 1.1557669929033438e-06, + "loss": 0.2586, + "step": 29471 + }, + { + "epoch": 1.380615543167658, + "grad_norm": 0.5400684194019882, + "learning_rate": 1.1556070950364876e-06, + "loss": 0.265, + "step": 29472 + }, + { + "epoch": 1.3806623881575866, + "grad_norm": 0.6521577935673761, + "learning_rate": 1.1554472049062737e-06, + "loss": 0.2857, + "step": 29473 + }, + { + "epoch": 1.3807092331475148, + "grad_norm": 0.5755779309420253, + "learning_rate": 1.1552873225136236e-06, + "loss": 0.2559, + "step": 29474 + }, + { + "epoch": 1.3807560781374433, + "grad_norm": 0.6063662697751916, + "learning_rate": 1.1551274478594565e-06, + "loss": 0.267, + "step": 29475 + }, + { + "epoch": 1.3808029231273715, + "grad_norm": 0.6190731722704612, + "learning_rate": 1.1549675809446938e-06, + "loss": 0.2737, + "step": 29476 + }, + { + "epoch": 1.3808497681173, + "grad_norm": 0.6262590617025299, + "learning_rate": 1.1548077217702542e-06, + "loss": 0.2812, + "step": 29477 + }, + { + "epoch": 1.3808966131072282, + "grad_norm": 0.5923033260570624, + "learning_rate": 1.1546478703370587e-06, + "loss": 0.2691, + "step": 29478 + }, + { + "epoch": 1.3809434580971565, + "grad_norm": 0.5931919488103087, + "learning_rate": 1.1544880266460256e-06, + "loss": 0.2772, + "step": 29479 + }, + { + "epoch": 1.380990303087085, + "grad_norm": 0.5763352668931637, + "learning_rate": 1.1543281906980757e-06, + "loss": 0.2674, + "step": 29480 + }, + { + "epoch": 1.3810371480770132, + "grad_norm": 0.609828372963716, + "learning_rate": 1.1541683624941289e-06, + "loss": 0.2782, + "step": 29481 + }, + { + "epoch": 1.3810839930669414, + "grad_norm": 0.5515632021930484, + "learning_rate": 1.154008542035105e-06, + "loss": 0.2536, + "step": 29482 + }, + { + "epoch": 1.3811308380568699, + "grad_norm": 0.5872380354934906, + "learning_rate": 1.1538487293219245e-06, + "loss": 0.273, + "step": 29483 + }, + { + "epoch": 1.381177683046798, + "grad_norm": 0.609378790604922, + "learning_rate": 1.1536889243555051e-06, + "loss": 0.2865, + "step": 29484 + }, + { + "epoch": 1.3812245280367264, + "grad_norm": 0.5749525716162, + "learning_rate": 1.1535291271367689e-06, + "loss": 0.2753, + "step": 29485 + }, + { + "epoch": 1.3812713730266548, + "grad_norm": 0.6245223906943609, + "learning_rate": 1.1533693376666328e-06, + "loss": 0.2876, + "step": 29486 + }, + { + "epoch": 1.381318218016583, + "grad_norm": 0.5556059905127191, + "learning_rate": 1.1532095559460177e-06, + "loss": 0.2529, + "step": 29487 + }, + { + "epoch": 1.3813650630065115, + "grad_norm": 0.5855861820975308, + "learning_rate": 1.1530497819758434e-06, + "loss": 0.2739, + "step": 29488 + }, + { + "epoch": 1.3814119079964398, + "grad_norm": 0.5499020403209991, + "learning_rate": 1.1528900157570288e-06, + "loss": 0.2637, + "step": 29489 + }, + { + "epoch": 1.3814587529863682, + "grad_norm": 0.5836307609963534, + "learning_rate": 1.1527302572904948e-06, + "loss": 0.2791, + "step": 29490 + }, + { + "epoch": 1.3815055979762965, + "grad_norm": 0.6308881816015358, + "learning_rate": 1.1525705065771595e-06, + "loss": 0.2828, + "step": 29491 + }, + { + "epoch": 1.3815524429662247, + "grad_norm": 0.6259432118269505, + "learning_rate": 1.152410763617941e-06, + "loss": 0.2798, + "step": 29492 + }, + { + "epoch": 1.3815992879561532, + "grad_norm": 0.5998389892874366, + "learning_rate": 1.1522510284137601e-06, + "loss": 0.2826, + "step": 29493 + }, + { + "epoch": 1.3816461329460814, + "grad_norm": 0.6271303946569384, + "learning_rate": 1.1520913009655358e-06, + "loss": 0.2565, + "step": 29494 + }, + { + "epoch": 1.3816929779360096, + "grad_norm": 0.5816359053360362, + "learning_rate": 1.1519315812741871e-06, + "loss": 0.2752, + "step": 29495 + }, + { + "epoch": 1.381739822925938, + "grad_norm": 0.570268389895226, + "learning_rate": 1.1517718693406346e-06, + "loss": 0.2746, + "step": 29496 + }, + { + "epoch": 1.3817866679158664, + "grad_norm": 0.5911427297488469, + "learning_rate": 1.151612165165795e-06, + "loss": 0.2654, + "step": 29497 + }, + { + "epoch": 1.3818335129057946, + "grad_norm": 0.6042369190278174, + "learning_rate": 1.1514524687505892e-06, + "loss": 0.2802, + "step": 29498 + }, + { + "epoch": 1.381880357895723, + "grad_norm": 0.5891353748825291, + "learning_rate": 1.1512927800959351e-06, + "loss": 0.2731, + "step": 29499 + }, + { + "epoch": 1.3819272028856515, + "grad_norm": 0.5903882748695994, + "learning_rate": 1.1511330992027517e-06, + "loss": 0.2757, + "step": 29500 + }, + { + "epoch": 1.3819740478755798, + "grad_norm": 0.6081073024201124, + "learning_rate": 1.1509734260719582e-06, + "loss": 0.2847, + "step": 29501 + }, + { + "epoch": 1.382020892865508, + "grad_norm": 0.60438974729331, + "learning_rate": 1.1508137607044748e-06, + "loss": 0.2706, + "step": 29502 + }, + { + "epoch": 1.3820677378554365, + "grad_norm": 0.5651779777848197, + "learning_rate": 1.1506541031012178e-06, + "loss": 0.2807, + "step": 29503 + }, + { + "epoch": 1.3821145828453647, + "grad_norm": 0.5552027030803123, + "learning_rate": 1.1504944532631076e-06, + "loss": 0.2537, + "step": 29504 + }, + { + "epoch": 1.382161427835293, + "grad_norm": 0.5995258990305101, + "learning_rate": 1.1503348111910633e-06, + "loss": 0.2681, + "step": 29505 + }, + { + "epoch": 1.3822082728252214, + "grad_norm": 0.6018829264465074, + "learning_rate": 1.1501751768860024e-06, + "loss": 0.2831, + "step": 29506 + }, + { + "epoch": 1.3822551178151496, + "grad_norm": 0.6093097081053286, + "learning_rate": 1.1500155503488436e-06, + "loss": 0.2775, + "step": 29507 + }, + { + "epoch": 1.3823019628050779, + "grad_norm": 0.5619731152250574, + "learning_rate": 1.1498559315805074e-06, + "loss": 0.2571, + "step": 29508 + }, + { + "epoch": 1.3823488077950064, + "grad_norm": 0.5776258366670847, + "learning_rate": 1.1496963205819097e-06, + "loss": 0.2636, + "step": 29509 + }, + { + "epoch": 1.3823956527849346, + "grad_norm": 0.5357329789555043, + "learning_rate": 1.1495367173539705e-06, + "loss": 0.2595, + "step": 29510 + }, + { + "epoch": 1.382442497774863, + "grad_norm": 0.5486636571374045, + "learning_rate": 1.1493771218976079e-06, + "loss": 0.2511, + "step": 29511 + }, + { + "epoch": 1.3824893427647913, + "grad_norm": 0.614947891387888, + "learning_rate": 1.1492175342137416e-06, + "loss": 0.2646, + "step": 29512 + }, + { + "epoch": 1.3825361877547198, + "grad_norm": 0.5822890487841749, + "learning_rate": 1.1490579543032879e-06, + "loss": 0.2743, + "step": 29513 + }, + { + "epoch": 1.382583032744648, + "grad_norm": 0.5946884554080959, + "learning_rate": 1.148898382167167e-06, + "loss": 0.2609, + "step": 29514 + }, + { + "epoch": 1.3826298777345762, + "grad_norm": 0.5723021948877588, + "learning_rate": 1.1487388178062953e-06, + "loss": 0.2591, + "step": 29515 + }, + { + "epoch": 1.3826767227245047, + "grad_norm": 0.5947923571570216, + "learning_rate": 1.1485792612215924e-06, + "loss": 0.2894, + "step": 29516 + }, + { + "epoch": 1.382723567714433, + "grad_norm": 0.6005540577817977, + "learning_rate": 1.148419712413976e-06, + "loss": 0.2838, + "step": 29517 + }, + { + "epoch": 1.3827704127043612, + "grad_norm": 0.6681327954053273, + "learning_rate": 1.1482601713843642e-06, + "loss": 0.2828, + "step": 29518 + }, + { + "epoch": 1.3828172576942896, + "grad_norm": 0.5784937103397173, + "learning_rate": 1.1481006381336765e-06, + "loss": 0.2621, + "step": 29519 + }, + { + "epoch": 1.3828641026842179, + "grad_norm": 0.6171443024987608, + "learning_rate": 1.1479411126628299e-06, + "loss": 0.2714, + "step": 29520 + }, + { + "epoch": 1.3829109476741461, + "grad_norm": 0.5970678776306731, + "learning_rate": 1.1477815949727413e-06, + "loss": 0.2495, + "step": 29521 + }, + { + "epoch": 1.3829577926640746, + "grad_norm": 0.6520901367710227, + "learning_rate": 1.14762208506433e-06, + "loss": 0.2827, + "step": 29522 + }, + { + "epoch": 1.3830046376540028, + "grad_norm": 0.6262767743226894, + "learning_rate": 1.1474625829385136e-06, + "loss": 0.2841, + "step": 29523 + }, + { + "epoch": 1.3830514826439313, + "grad_norm": 0.5892640460820672, + "learning_rate": 1.14730308859621e-06, + "loss": 0.2708, + "step": 29524 + }, + { + "epoch": 1.3830983276338595, + "grad_norm": 0.6390754518043755, + "learning_rate": 1.1471436020383382e-06, + "loss": 0.2902, + "step": 29525 + }, + { + "epoch": 1.383145172623788, + "grad_norm": 0.5852796999220516, + "learning_rate": 1.1469841232658139e-06, + "loss": 0.2593, + "step": 29526 + }, + { + "epoch": 1.3831920176137162, + "grad_norm": 0.6112116479452567, + "learning_rate": 1.1468246522795567e-06, + "loss": 0.272, + "step": 29527 + }, + { + "epoch": 1.3832388626036445, + "grad_norm": 0.6261919351035479, + "learning_rate": 1.1466651890804827e-06, + "loss": 0.2825, + "step": 29528 + }, + { + "epoch": 1.383285707593573, + "grad_norm": 0.6186196942324605, + "learning_rate": 1.1465057336695104e-06, + "loss": 0.275, + "step": 29529 + }, + { + "epoch": 1.3833325525835012, + "grad_norm": 0.5868886123768342, + "learning_rate": 1.1463462860475574e-06, + "loss": 0.2659, + "step": 29530 + }, + { + "epoch": 1.3833793975734294, + "grad_norm": 0.6238437690344388, + "learning_rate": 1.1461868462155416e-06, + "loss": 0.2681, + "step": 29531 + }, + { + "epoch": 1.3834262425633579, + "grad_norm": 0.5837281515478882, + "learning_rate": 1.1460274141743808e-06, + "loss": 0.2785, + "step": 29532 + }, + { + "epoch": 1.3834730875532861, + "grad_norm": 0.5965550841585213, + "learning_rate": 1.1458679899249919e-06, + "loss": 0.2802, + "step": 29533 + }, + { + "epoch": 1.3835199325432144, + "grad_norm": 0.5966797666716666, + "learning_rate": 1.1457085734682917e-06, + "loss": 0.2862, + "step": 29534 + }, + { + "epoch": 1.3835667775331428, + "grad_norm": 0.6383233083808625, + "learning_rate": 1.1455491648051981e-06, + "loss": 0.2752, + "step": 29535 + }, + { + "epoch": 1.3836136225230713, + "grad_norm": 0.6146347190070767, + "learning_rate": 1.1453897639366288e-06, + "loss": 0.2614, + "step": 29536 + }, + { + "epoch": 1.3836604675129995, + "grad_norm": 0.6537887520165038, + "learning_rate": 1.1452303708635007e-06, + "loss": 0.2913, + "step": 29537 + }, + { + "epoch": 1.3837073125029278, + "grad_norm": 0.5461992868462029, + "learning_rate": 1.1450709855867314e-06, + "loss": 0.2777, + "step": 29538 + }, + { + "epoch": 1.3837541574928562, + "grad_norm": 0.6040160946568731, + "learning_rate": 1.144911608107239e-06, + "loss": 0.2754, + "step": 29539 + }, + { + "epoch": 1.3838010024827845, + "grad_norm": 0.589657782330908, + "learning_rate": 1.1447522384259396e-06, + "loss": 0.2774, + "step": 29540 + }, + { + "epoch": 1.3838478474727127, + "grad_norm": 0.5699613325450944, + "learning_rate": 1.1445928765437496e-06, + "loss": 0.2714, + "step": 29541 + }, + { + "epoch": 1.3838946924626412, + "grad_norm": 0.6179354663396305, + "learning_rate": 1.1444335224615868e-06, + "loss": 0.2821, + "step": 29542 + }, + { + "epoch": 1.3839415374525694, + "grad_norm": 0.6286871956206675, + "learning_rate": 1.1442741761803686e-06, + "loss": 0.2786, + "step": 29543 + }, + { + "epoch": 1.3839883824424977, + "grad_norm": 0.594292963572957, + "learning_rate": 1.1441148377010116e-06, + "loss": 0.2629, + "step": 29544 + }, + { + "epoch": 1.3840352274324261, + "grad_norm": 0.5990197523786046, + "learning_rate": 1.143955507024434e-06, + "loss": 0.2853, + "step": 29545 + }, + { + "epoch": 1.3840820724223544, + "grad_norm": 0.6046072840556498, + "learning_rate": 1.1437961841515504e-06, + "loss": 0.2765, + "step": 29546 + }, + { + "epoch": 1.3841289174122828, + "grad_norm": 0.5697900033330923, + "learning_rate": 1.1436368690832802e-06, + "loss": 0.2899, + "step": 29547 + }, + { + "epoch": 1.384175762402211, + "grad_norm": 0.6016597210643391, + "learning_rate": 1.1434775618205377e-06, + "loss": 0.2868, + "step": 29548 + }, + { + "epoch": 1.3842226073921395, + "grad_norm": 0.5700019134228287, + "learning_rate": 1.143318262364241e-06, + "loss": 0.2619, + "step": 29549 + }, + { + "epoch": 1.3842694523820678, + "grad_norm": 0.6748270505857873, + "learning_rate": 1.1431589707153068e-06, + "loss": 0.2678, + "step": 29550 + }, + { + "epoch": 1.384316297371996, + "grad_norm": 0.5707676987601816, + "learning_rate": 1.1429996868746524e-06, + "loss": 0.2747, + "step": 29551 + }, + { + "epoch": 1.3843631423619245, + "grad_norm": 0.6223146415179595, + "learning_rate": 1.1428404108431929e-06, + "loss": 0.2655, + "step": 29552 + }, + { + "epoch": 1.3844099873518527, + "grad_norm": 0.5512517852535241, + "learning_rate": 1.1426811426218457e-06, + "loss": 0.2702, + "step": 29553 + }, + { + "epoch": 1.384456832341781, + "grad_norm": 0.5947927141361464, + "learning_rate": 1.1425218822115283e-06, + "loss": 0.2576, + "step": 29554 + }, + { + "epoch": 1.3845036773317094, + "grad_norm": 0.5930053939878986, + "learning_rate": 1.1423626296131554e-06, + "loss": 0.2781, + "step": 29555 + }, + { + "epoch": 1.3845505223216377, + "grad_norm": 0.5773925961563153, + "learning_rate": 1.1422033848276446e-06, + "loss": 0.2565, + "step": 29556 + }, + { + "epoch": 1.384597367311566, + "grad_norm": 0.5839876236093942, + "learning_rate": 1.1420441478559124e-06, + "loss": 0.2676, + "step": 29557 + }, + { + "epoch": 1.3846442123014944, + "grad_norm": 0.5667952024075934, + "learning_rate": 1.1418849186988743e-06, + "loss": 0.2514, + "step": 29558 + }, + { + "epoch": 1.3846910572914226, + "grad_norm": 0.6035675276076281, + "learning_rate": 1.141725697357447e-06, + "loss": 0.2825, + "step": 29559 + }, + { + "epoch": 1.384737902281351, + "grad_norm": 0.6095296602809338, + "learning_rate": 1.141566483832547e-06, + "loss": 0.2784, + "step": 29560 + }, + { + "epoch": 1.3847847472712793, + "grad_norm": 0.5758410718946653, + "learning_rate": 1.1414072781250914e-06, + "loss": 0.2648, + "step": 29561 + }, + { + "epoch": 1.3848315922612078, + "grad_norm": 0.6189189173711337, + "learning_rate": 1.1412480802359945e-06, + "loss": 0.2838, + "step": 29562 + }, + { + "epoch": 1.384878437251136, + "grad_norm": 0.6048947293402382, + "learning_rate": 1.1410888901661742e-06, + "loss": 0.2718, + "step": 29563 + }, + { + "epoch": 1.3849252822410643, + "grad_norm": 0.5628110534149138, + "learning_rate": 1.1409297079165452e-06, + "loss": 0.2713, + "step": 29564 + }, + { + "epoch": 1.3849721272309927, + "grad_norm": 0.5876665142911183, + "learning_rate": 1.1407705334880236e-06, + "loss": 0.2712, + "step": 29565 + }, + { + "epoch": 1.385018972220921, + "grad_norm": 0.5919772214528836, + "learning_rate": 1.1406113668815263e-06, + "loss": 0.269, + "step": 29566 + }, + { + "epoch": 1.3850658172108492, + "grad_norm": 0.6003478348708247, + "learning_rate": 1.1404522080979689e-06, + "loss": 0.2652, + "step": 29567 + }, + { + "epoch": 1.3851126622007777, + "grad_norm": 0.5924256220212544, + "learning_rate": 1.1402930571382682e-06, + "loss": 0.2556, + "step": 29568 + }, + { + "epoch": 1.385159507190706, + "grad_norm": 0.6063124538450317, + "learning_rate": 1.1401339140033393e-06, + "loss": 0.2842, + "step": 29569 + }, + { + "epoch": 1.3852063521806341, + "grad_norm": 0.5696831296016928, + "learning_rate": 1.1399747786940968e-06, + "loss": 0.2694, + "step": 29570 + }, + { + "epoch": 1.3852531971705626, + "grad_norm": 0.5956667730347548, + "learning_rate": 1.1398156512114578e-06, + "loss": 0.2614, + "step": 29571 + }, + { + "epoch": 1.385300042160491, + "grad_norm": 0.6255536405416806, + "learning_rate": 1.1396565315563378e-06, + "loss": 0.2936, + "step": 29572 + }, + { + "epoch": 1.3853468871504193, + "grad_norm": 0.5998951634271198, + "learning_rate": 1.1394974197296527e-06, + "loss": 0.2818, + "step": 29573 + }, + { + "epoch": 1.3853937321403476, + "grad_norm": 0.6155700416499746, + "learning_rate": 1.1393383157323187e-06, + "loss": 0.2662, + "step": 29574 + }, + { + "epoch": 1.385440577130276, + "grad_norm": 0.5705058158990771, + "learning_rate": 1.1391792195652498e-06, + "loss": 0.2654, + "step": 29575 + }, + { + "epoch": 1.3854874221202043, + "grad_norm": 0.600281796773426, + "learning_rate": 1.1390201312293637e-06, + "loss": 0.2742, + "step": 29576 + }, + { + "epoch": 1.3855342671101325, + "grad_norm": 0.5940073966340924, + "learning_rate": 1.1388610507255733e-06, + "loss": 0.2698, + "step": 29577 + }, + { + "epoch": 1.385581112100061, + "grad_norm": 0.5469122283196516, + "learning_rate": 1.1387019780547956e-06, + "loss": 0.2683, + "step": 29578 + }, + { + "epoch": 1.3856279570899892, + "grad_norm": 0.6063702661346432, + "learning_rate": 1.138542913217946e-06, + "loss": 0.2656, + "step": 29579 + }, + { + "epoch": 1.3856748020799174, + "grad_norm": 0.6132390400947342, + "learning_rate": 1.1383838562159397e-06, + "loss": 0.2858, + "step": 29580 + }, + { + "epoch": 1.385721647069846, + "grad_norm": 0.6406389741940726, + "learning_rate": 1.138224807049693e-06, + "loss": 0.2731, + "step": 29581 + }, + { + "epoch": 1.3857684920597741, + "grad_norm": 0.6093495176590259, + "learning_rate": 1.13806576572012e-06, + "loss": 0.2607, + "step": 29582 + }, + { + "epoch": 1.3858153370497026, + "grad_norm": 0.6061397498284602, + "learning_rate": 1.1379067322281356e-06, + "loss": 0.279, + "step": 29583 + }, + { + "epoch": 1.3858621820396309, + "grad_norm": 0.6311198201065461, + "learning_rate": 1.1377477065746557e-06, + "loss": 0.2794, + "step": 29584 + }, + { + "epoch": 1.3859090270295593, + "grad_norm": 0.5863993009108436, + "learning_rate": 1.1375886887605949e-06, + "loss": 0.2675, + "step": 29585 + }, + { + "epoch": 1.3859558720194876, + "grad_norm": 0.593404503295178, + "learning_rate": 1.137429678786869e-06, + "loss": 0.265, + "step": 29586 + }, + { + "epoch": 1.3860027170094158, + "grad_norm": 0.6006873114455821, + "learning_rate": 1.137270676654394e-06, + "loss": 0.2882, + "step": 29587 + }, + { + "epoch": 1.3860495619993443, + "grad_norm": 0.6138077578615114, + "learning_rate": 1.1371116823640824e-06, + "loss": 0.2734, + "step": 29588 + }, + { + "epoch": 1.3860964069892725, + "grad_norm": 0.5797243931196109, + "learning_rate": 1.1369526959168517e-06, + "loss": 0.2594, + "step": 29589 + }, + { + "epoch": 1.3861432519792007, + "grad_norm": 0.6211384837188899, + "learning_rate": 1.1367937173136148e-06, + "loss": 0.2897, + "step": 29590 + }, + { + "epoch": 1.3861900969691292, + "grad_norm": 0.6282045993037115, + "learning_rate": 1.1366347465552874e-06, + "loss": 0.3053, + "step": 29591 + }, + { + "epoch": 1.3862369419590574, + "grad_norm": 0.6100187059951802, + "learning_rate": 1.1364757836427843e-06, + "loss": 0.2679, + "step": 29592 + }, + { + "epoch": 1.3862837869489857, + "grad_norm": 0.5594625333952602, + "learning_rate": 1.136316828577021e-06, + "loss": 0.2475, + "step": 29593 + }, + { + "epoch": 1.3863306319389141, + "grad_norm": 0.6271218776463068, + "learning_rate": 1.1361578813589112e-06, + "loss": 0.2695, + "step": 29594 + }, + { + "epoch": 1.3863774769288424, + "grad_norm": 0.5999740433224969, + "learning_rate": 1.1359989419893697e-06, + "loss": 0.2669, + "step": 29595 + }, + { + "epoch": 1.3864243219187709, + "grad_norm": 0.6116229603650611, + "learning_rate": 1.1358400104693124e-06, + "loss": 0.2764, + "step": 29596 + }, + { + "epoch": 1.386471166908699, + "grad_norm": 0.5838934568929611, + "learning_rate": 1.135681086799652e-06, + "loss": 0.2722, + "step": 29597 + }, + { + "epoch": 1.3865180118986276, + "grad_norm": 0.6251550758111724, + "learning_rate": 1.1355221709813041e-06, + "loss": 0.2723, + "step": 29598 + }, + { + "epoch": 1.3865648568885558, + "grad_norm": 0.6096083485110525, + "learning_rate": 1.135363263015183e-06, + "loss": 0.2723, + "step": 29599 + }, + { + "epoch": 1.386611701878484, + "grad_norm": 0.6223653362416858, + "learning_rate": 1.1352043629022043e-06, + "loss": 0.2751, + "step": 29600 + }, + { + "epoch": 1.3866585468684125, + "grad_norm": 0.5752013769929187, + "learning_rate": 1.135045470643281e-06, + "loss": 0.266, + "step": 29601 + }, + { + "epoch": 1.3867053918583407, + "grad_norm": 0.628408923147131, + "learning_rate": 1.1348865862393274e-06, + "loss": 0.2644, + "step": 29602 + }, + { + "epoch": 1.386752236848269, + "grad_norm": 0.5962237376702215, + "learning_rate": 1.1347277096912595e-06, + "loss": 0.2751, + "step": 29603 + }, + { + "epoch": 1.3867990818381974, + "grad_norm": 0.6164791965454609, + "learning_rate": 1.1345688409999897e-06, + "loss": 0.2679, + "step": 29604 + }, + { + "epoch": 1.3868459268281257, + "grad_norm": 0.582679559952932, + "learning_rate": 1.134409980166433e-06, + "loss": 0.2731, + "step": 29605 + }, + { + "epoch": 1.386892771818054, + "grad_norm": 0.5606201116068154, + "learning_rate": 1.1342511271915043e-06, + "loss": 0.2749, + "step": 29606 + }, + { + "epoch": 1.3869396168079824, + "grad_norm": 0.619601756873515, + "learning_rate": 1.1340922820761163e-06, + "loss": 0.2573, + "step": 29607 + }, + { + "epoch": 1.3869864617979106, + "grad_norm": 0.5920994087270335, + "learning_rate": 1.133933444821184e-06, + "loss": 0.2787, + "step": 29608 + }, + { + "epoch": 1.387033306787839, + "grad_norm": 0.613776969370694, + "learning_rate": 1.1337746154276214e-06, + "loss": 0.273, + "step": 29609 + }, + { + "epoch": 1.3870801517777673, + "grad_norm": 0.6106859489561254, + "learning_rate": 1.1336157938963432e-06, + "loss": 0.2803, + "step": 29610 + }, + { + "epoch": 1.3871269967676958, + "grad_norm": 0.5810448430084867, + "learning_rate": 1.1334569802282619e-06, + "loss": 0.2527, + "step": 29611 + }, + { + "epoch": 1.387173841757624, + "grad_norm": 0.5762468827560091, + "learning_rate": 1.1332981744242932e-06, + "loss": 0.2724, + "step": 29612 + }, + { + "epoch": 1.3872206867475523, + "grad_norm": 0.5890123700117568, + "learning_rate": 1.1331393764853491e-06, + "loss": 0.2739, + "step": 29613 + }, + { + "epoch": 1.3872675317374807, + "grad_norm": 0.5926361269185855, + "learning_rate": 1.1329805864123442e-06, + "loss": 0.2773, + "step": 29614 + }, + { + "epoch": 1.387314376727409, + "grad_norm": 0.6030176674879127, + "learning_rate": 1.1328218042061925e-06, + "loss": 0.2785, + "step": 29615 + }, + { + "epoch": 1.3873612217173372, + "grad_norm": 0.6033518614082322, + "learning_rate": 1.1326630298678088e-06, + "loss": 0.2823, + "step": 29616 + }, + { + "epoch": 1.3874080667072657, + "grad_norm": 0.657477249272742, + "learning_rate": 1.1325042633981043e-06, + "loss": 0.297, + "step": 29617 + }, + { + "epoch": 1.387454911697194, + "grad_norm": 0.595287175901192, + "learning_rate": 1.1323455047979953e-06, + "loss": 0.2775, + "step": 29618 + }, + { + "epoch": 1.3875017566871224, + "grad_norm": 0.6293688694096001, + "learning_rate": 1.1321867540683934e-06, + "loss": 0.2887, + "step": 29619 + }, + { + "epoch": 1.3875486016770506, + "grad_norm": 0.6056137439612808, + "learning_rate": 1.1320280112102124e-06, + "loss": 0.2761, + "step": 29620 + }, + { + "epoch": 1.387595446666979, + "grad_norm": 0.6379636414153179, + "learning_rate": 1.1318692762243668e-06, + "loss": 0.2983, + "step": 29621 + }, + { + "epoch": 1.3876422916569073, + "grad_norm": 0.5752183426965224, + "learning_rate": 1.1317105491117697e-06, + "loss": 0.2773, + "step": 29622 + }, + { + "epoch": 1.3876891366468356, + "grad_norm": 0.5893309540869257, + "learning_rate": 1.1315518298733352e-06, + "loss": 0.2845, + "step": 29623 + }, + { + "epoch": 1.387735981636764, + "grad_norm": 0.6330059824966342, + "learning_rate": 1.1313931185099751e-06, + "loss": 0.2866, + "step": 29624 + }, + { + "epoch": 1.3877828266266923, + "grad_norm": 0.6095140421440997, + "learning_rate": 1.1312344150226046e-06, + "loss": 0.2752, + "step": 29625 + }, + { + "epoch": 1.3878296716166205, + "grad_norm": 0.5821388357949522, + "learning_rate": 1.1310757194121353e-06, + "loss": 0.2739, + "step": 29626 + }, + { + "epoch": 1.387876516606549, + "grad_norm": 0.6133379226756689, + "learning_rate": 1.1309170316794806e-06, + "loss": 0.2827, + "step": 29627 + }, + { + "epoch": 1.3879233615964772, + "grad_norm": 0.575044025759396, + "learning_rate": 1.1307583518255546e-06, + "loss": 0.2816, + "step": 29628 + }, + { + "epoch": 1.3879702065864055, + "grad_norm": 0.6062586226431045, + "learning_rate": 1.1305996798512703e-06, + "loss": 0.2741, + "step": 29629 + }, + { + "epoch": 1.388017051576334, + "grad_norm": 0.6212271349902524, + "learning_rate": 1.1304410157575412e-06, + "loss": 0.2814, + "step": 29630 + }, + { + "epoch": 1.3880638965662622, + "grad_norm": 0.634121318481067, + "learning_rate": 1.1302823595452803e-06, + "loss": 0.2822, + "step": 29631 + }, + { + "epoch": 1.3881107415561906, + "grad_norm": 0.5777180881539428, + "learning_rate": 1.130123711215399e-06, + "loss": 0.2577, + "step": 29632 + }, + { + "epoch": 1.3881575865461189, + "grad_norm": 0.5529122989671017, + "learning_rate": 1.1299650707688115e-06, + "loss": 0.2672, + "step": 29633 + }, + { + "epoch": 1.3882044315360473, + "grad_norm": 0.5670821042468378, + "learning_rate": 1.1298064382064306e-06, + "loss": 0.2531, + "step": 29634 + }, + { + "epoch": 1.3882512765259756, + "grad_norm": 0.5757498727024591, + "learning_rate": 1.129647813529169e-06, + "loss": 0.2707, + "step": 29635 + }, + { + "epoch": 1.3882981215159038, + "grad_norm": 0.6273607525619145, + "learning_rate": 1.129489196737941e-06, + "loss": 0.2949, + "step": 29636 + }, + { + "epoch": 1.3883449665058323, + "grad_norm": 0.5809871470974852, + "learning_rate": 1.1293305878336575e-06, + "loss": 0.2616, + "step": 29637 + }, + { + "epoch": 1.3883918114957605, + "grad_norm": 0.5445352000914466, + "learning_rate": 1.1291719868172324e-06, + "loss": 0.2682, + "step": 29638 + }, + { + "epoch": 1.3884386564856888, + "grad_norm": 0.5740665448630999, + "learning_rate": 1.1290133936895775e-06, + "loss": 0.2625, + "step": 29639 + }, + { + "epoch": 1.3884855014756172, + "grad_norm": 0.5755527298056249, + "learning_rate": 1.1288548084516056e-06, + "loss": 0.2805, + "step": 29640 + }, + { + "epoch": 1.3885323464655455, + "grad_norm": 0.5859722899403312, + "learning_rate": 1.1286962311042298e-06, + "loss": 0.2884, + "step": 29641 + }, + { + "epoch": 1.3885791914554737, + "grad_norm": 0.6620201698154751, + "learning_rate": 1.1285376616483635e-06, + "loss": 0.2894, + "step": 29642 + }, + { + "epoch": 1.3886260364454022, + "grad_norm": 0.5954466217188453, + "learning_rate": 1.1283791000849172e-06, + "loss": 0.2653, + "step": 29643 + }, + { + "epoch": 1.3886728814353304, + "grad_norm": 0.5833522952607274, + "learning_rate": 1.1282205464148044e-06, + "loss": 0.2731, + "step": 29644 + }, + { + "epoch": 1.3887197264252589, + "grad_norm": 0.6110128277910319, + "learning_rate": 1.1280620006389386e-06, + "loss": 0.2677, + "step": 29645 + }, + { + "epoch": 1.3887665714151871, + "grad_norm": 0.5882778696423144, + "learning_rate": 1.1279034627582303e-06, + "loss": 0.2736, + "step": 29646 + }, + { + "epoch": 1.3888134164051156, + "grad_norm": 0.5847166204958236, + "learning_rate": 1.1277449327735926e-06, + "loss": 0.2607, + "step": 29647 + }, + { + "epoch": 1.3888602613950438, + "grad_norm": 0.5757160174074853, + "learning_rate": 1.1275864106859392e-06, + "loss": 0.2631, + "step": 29648 + }, + { + "epoch": 1.388907106384972, + "grad_norm": 0.5589609422796669, + "learning_rate": 1.1274278964961796e-06, + "loss": 0.2672, + "step": 29649 + }, + { + "epoch": 1.3889539513749005, + "grad_norm": 0.6221770843403761, + "learning_rate": 1.1272693902052275e-06, + "loss": 0.2729, + "step": 29650 + }, + { + "epoch": 1.3890007963648288, + "grad_norm": 0.5698171191400194, + "learning_rate": 1.1271108918139953e-06, + "loss": 0.2606, + "step": 29651 + }, + { + "epoch": 1.389047641354757, + "grad_norm": 0.6704670327907666, + "learning_rate": 1.126952401323396e-06, + "loss": 0.2859, + "step": 29652 + }, + { + "epoch": 1.3890944863446855, + "grad_norm": 0.5854363877613689, + "learning_rate": 1.1267939187343391e-06, + "loss": 0.2622, + "step": 29653 + }, + { + "epoch": 1.3891413313346137, + "grad_norm": 0.5845450763220241, + "learning_rate": 1.1266354440477392e-06, + "loss": 0.2713, + "step": 29654 + }, + { + "epoch": 1.3891881763245422, + "grad_norm": 0.5681593822211569, + "learning_rate": 1.1264769772645063e-06, + "loss": 0.2604, + "step": 29655 + }, + { + "epoch": 1.3892350213144704, + "grad_norm": 0.6153567452949075, + "learning_rate": 1.126318518385553e-06, + "loss": 0.2824, + "step": 29656 + }, + { + "epoch": 1.3892818663043989, + "grad_norm": 0.5622580469903189, + "learning_rate": 1.1261600674117914e-06, + "loss": 0.2594, + "step": 29657 + }, + { + "epoch": 1.3893287112943271, + "grad_norm": 0.6155263776484958, + "learning_rate": 1.1260016243441335e-06, + "loss": 0.2723, + "step": 29658 + }, + { + "epoch": 1.3893755562842554, + "grad_norm": 0.6581053418122248, + "learning_rate": 1.1258431891834914e-06, + "loss": 0.2933, + "step": 29659 + }, + { + "epoch": 1.3894224012741838, + "grad_norm": 0.5998494136341928, + "learning_rate": 1.1256847619307767e-06, + "loss": 0.2765, + "step": 29660 + }, + { + "epoch": 1.389469246264112, + "grad_norm": 0.5771706563351385, + "learning_rate": 1.1255263425868997e-06, + "loss": 0.2636, + "step": 29661 + }, + { + "epoch": 1.3895160912540403, + "grad_norm": 0.5908328283587473, + "learning_rate": 1.1253679311527735e-06, + "loss": 0.2636, + "step": 29662 + }, + { + "epoch": 1.3895629362439688, + "grad_norm": 0.5785265106184991, + "learning_rate": 1.125209527629309e-06, + "loss": 0.2624, + "step": 29663 + }, + { + "epoch": 1.389609781233897, + "grad_norm": 0.5602570037291069, + "learning_rate": 1.1250511320174182e-06, + "loss": 0.2642, + "step": 29664 + }, + { + "epoch": 1.3896566262238252, + "grad_norm": 0.534300075101351, + "learning_rate": 1.1248927443180136e-06, + "loss": 0.2547, + "step": 29665 + }, + { + "epoch": 1.3897034712137537, + "grad_norm": 0.5790476908931654, + "learning_rate": 1.1247343645320046e-06, + "loss": 0.2742, + "step": 29666 + }, + { + "epoch": 1.389750316203682, + "grad_norm": 0.5441303068163202, + "learning_rate": 1.1245759926603045e-06, + "loss": 0.2532, + "step": 29667 + }, + { + "epoch": 1.3897971611936104, + "grad_norm": 0.5828972374438559, + "learning_rate": 1.124417628703823e-06, + "loss": 0.2716, + "step": 29668 + }, + { + "epoch": 1.3898440061835386, + "grad_norm": 0.57100193148888, + "learning_rate": 1.1242592726634725e-06, + "loss": 0.2803, + "step": 29669 + }, + { + "epoch": 1.3898908511734671, + "grad_norm": 0.6331436809746739, + "learning_rate": 1.124100924540164e-06, + "loss": 0.2759, + "step": 29670 + }, + { + "epoch": 1.3899376961633954, + "grad_norm": 0.5984249907505523, + "learning_rate": 1.1239425843348087e-06, + "loss": 0.2851, + "step": 29671 + }, + { + "epoch": 1.3899845411533236, + "grad_norm": 0.572403316315139, + "learning_rate": 1.1237842520483192e-06, + "loss": 0.2791, + "step": 29672 + }, + { + "epoch": 1.390031386143252, + "grad_norm": 0.5569243388174684, + "learning_rate": 1.123625927681605e-06, + "loss": 0.257, + "step": 29673 + }, + { + "epoch": 1.3900782311331803, + "grad_norm": 0.592168299768327, + "learning_rate": 1.1234676112355773e-06, + "loss": 0.2629, + "step": 29674 + }, + { + "epoch": 1.3901250761231085, + "grad_norm": 0.6130943954325627, + "learning_rate": 1.1233093027111468e-06, + "loss": 0.2645, + "step": 29675 + }, + { + "epoch": 1.390171921113037, + "grad_norm": 0.6264482973095411, + "learning_rate": 1.1231510021092258e-06, + "loss": 0.2856, + "step": 29676 + }, + { + "epoch": 1.3902187661029652, + "grad_norm": 0.5636918031551487, + "learning_rate": 1.1229927094307247e-06, + "loss": 0.2674, + "step": 29677 + }, + { + "epoch": 1.3902656110928935, + "grad_norm": 0.6075293150537621, + "learning_rate": 1.122834424676554e-06, + "loss": 0.282, + "step": 29678 + }, + { + "epoch": 1.390312456082822, + "grad_norm": 0.6109623935483255, + "learning_rate": 1.1226761478476263e-06, + "loss": 0.2665, + "step": 29679 + }, + { + "epoch": 1.3903593010727502, + "grad_norm": 0.6285933080873726, + "learning_rate": 1.1225178789448512e-06, + "loss": 0.2737, + "step": 29680 + }, + { + "epoch": 1.3904061460626786, + "grad_norm": 0.5961268751200941, + "learning_rate": 1.1223596179691387e-06, + "loss": 0.2773, + "step": 29681 + }, + { + "epoch": 1.390452991052607, + "grad_norm": 0.6324945119247162, + "learning_rate": 1.1222013649214002e-06, + "loss": 0.275, + "step": 29682 + }, + { + "epoch": 1.3904998360425354, + "grad_norm": 0.6054438873899866, + "learning_rate": 1.1220431198025464e-06, + "loss": 0.293, + "step": 29683 + }, + { + "epoch": 1.3905466810324636, + "grad_norm": 0.6367927330419841, + "learning_rate": 1.1218848826134884e-06, + "loss": 0.2891, + "step": 29684 + }, + { + "epoch": 1.3905935260223918, + "grad_norm": 0.5870919688464836, + "learning_rate": 1.1217266533551372e-06, + "loss": 0.2639, + "step": 29685 + }, + { + "epoch": 1.3906403710123203, + "grad_norm": 0.6508497796497152, + "learning_rate": 1.1215684320284017e-06, + "loss": 0.2941, + "step": 29686 + }, + { + "epoch": 1.3906872160022485, + "grad_norm": 0.5806009869525842, + "learning_rate": 1.1214102186341947e-06, + "loss": 0.2783, + "step": 29687 + }, + { + "epoch": 1.3907340609921768, + "grad_norm": 0.6526226003256452, + "learning_rate": 1.1212520131734241e-06, + "loss": 0.279, + "step": 29688 + }, + { + "epoch": 1.3907809059821052, + "grad_norm": 0.5939348513930253, + "learning_rate": 1.121093815647002e-06, + "loss": 0.274, + "step": 29689 + }, + { + "epoch": 1.3908277509720335, + "grad_norm": 0.5961198419794873, + "learning_rate": 1.1209356260558383e-06, + "loss": 0.2757, + "step": 29690 + }, + { + "epoch": 1.390874595961962, + "grad_norm": 0.601046381790709, + "learning_rate": 1.1207774444008442e-06, + "loss": 0.2648, + "step": 29691 + }, + { + "epoch": 1.3909214409518902, + "grad_norm": 0.5629582615388053, + "learning_rate": 1.1206192706829283e-06, + "loss": 0.2784, + "step": 29692 + }, + { + "epoch": 1.3909682859418186, + "grad_norm": 0.5889514269853897, + "learning_rate": 1.120461104903002e-06, + "loss": 0.2791, + "step": 29693 + }, + { + "epoch": 1.391015130931747, + "grad_norm": 0.5760345448518087, + "learning_rate": 1.1203029470619762e-06, + "loss": 0.2784, + "step": 29694 + }, + { + "epoch": 1.3910619759216751, + "grad_norm": 0.5982304060424258, + "learning_rate": 1.1201447971607593e-06, + "loss": 0.28, + "step": 29695 + }, + { + "epoch": 1.3911088209116036, + "grad_norm": 0.569658640689111, + "learning_rate": 1.1199866552002621e-06, + "loss": 0.2692, + "step": 29696 + }, + { + "epoch": 1.3911556659015318, + "grad_norm": 0.5759282142491817, + "learning_rate": 1.1198285211813958e-06, + "loss": 0.2675, + "step": 29697 + }, + { + "epoch": 1.39120251089146, + "grad_norm": 0.6078886433879973, + "learning_rate": 1.1196703951050686e-06, + "loss": 0.2737, + "step": 29698 + }, + { + "epoch": 1.3912493558813885, + "grad_norm": 0.6375403928475794, + "learning_rate": 1.1195122769721915e-06, + "loss": 0.2878, + "step": 29699 + }, + { + "epoch": 1.3912962008713168, + "grad_norm": 0.6062675684266833, + "learning_rate": 1.119354166783674e-06, + "loss": 0.2635, + "step": 29700 + }, + { + "epoch": 1.391343045861245, + "grad_norm": 0.5678964725062607, + "learning_rate": 1.1191960645404273e-06, + "loss": 0.2673, + "step": 29701 + }, + { + "epoch": 1.3913898908511735, + "grad_norm": 0.5863114597327764, + "learning_rate": 1.1190379702433592e-06, + "loss": 0.2642, + "step": 29702 + }, + { + "epoch": 1.3914367358411017, + "grad_norm": 0.5817822122176225, + "learning_rate": 1.1188798838933815e-06, + "loss": 0.2724, + "step": 29703 + }, + { + "epoch": 1.3914835808310302, + "grad_norm": 0.5743369711990238, + "learning_rate": 1.1187218054914021e-06, + "loss": 0.2657, + "step": 29704 + }, + { + "epoch": 1.3915304258209584, + "grad_norm": 0.6642722166366677, + "learning_rate": 1.1185637350383312e-06, + "loss": 0.2977, + "step": 29705 + }, + { + "epoch": 1.391577270810887, + "grad_norm": 0.6297119367542823, + "learning_rate": 1.1184056725350792e-06, + "loss": 0.2722, + "step": 29706 + }, + { + "epoch": 1.3916241158008151, + "grad_norm": 0.6091103342406484, + "learning_rate": 1.1182476179825552e-06, + "loss": 0.2856, + "step": 29707 + }, + { + "epoch": 1.3916709607907434, + "grad_norm": 0.5851484950278812, + "learning_rate": 1.1180895713816698e-06, + "loss": 0.2499, + "step": 29708 + }, + { + "epoch": 1.3917178057806718, + "grad_norm": 0.586095879511733, + "learning_rate": 1.1179315327333314e-06, + "loss": 0.2728, + "step": 29709 + }, + { + "epoch": 1.3917646507706, + "grad_norm": 0.592119663605037, + "learning_rate": 1.1177735020384489e-06, + "loss": 0.2716, + "step": 29710 + }, + { + "epoch": 1.3918114957605283, + "grad_norm": 0.583044505532845, + "learning_rate": 1.1176154792979324e-06, + "loss": 0.2648, + "step": 29711 + }, + { + "epoch": 1.3918583407504568, + "grad_norm": 0.5923622926841446, + "learning_rate": 1.1174574645126914e-06, + "loss": 0.2826, + "step": 29712 + }, + { + "epoch": 1.391905185740385, + "grad_norm": 0.5755331344792204, + "learning_rate": 1.1172994576836351e-06, + "loss": 0.2647, + "step": 29713 + }, + { + "epoch": 1.3919520307303133, + "grad_norm": 0.6196972490098219, + "learning_rate": 1.117141458811674e-06, + "loss": 0.2823, + "step": 29714 + }, + { + "epoch": 1.3919988757202417, + "grad_norm": 0.623473144081751, + "learning_rate": 1.116983467897715e-06, + "loss": 0.2911, + "step": 29715 + }, + { + "epoch": 1.39204572071017, + "grad_norm": 0.6027367257606576, + "learning_rate": 1.11682548494267e-06, + "loss": 0.2774, + "step": 29716 + }, + { + "epoch": 1.3920925657000984, + "grad_norm": 0.6254177242758541, + "learning_rate": 1.1166675099474453e-06, + "loss": 0.2753, + "step": 29717 + }, + { + "epoch": 1.3921394106900267, + "grad_norm": 0.5993158571676027, + "learning_rate": 1.1165095429129515e-06, + "loss": 0.2821, + "step": 29718 + }, + { + "epoch": 1.3921862556799551, + "grad_norm": 0.5584681396487211, + "learning_rate": 1.1163515838400977e-06, + "loss": 0.2713, + "step": 29719 + }, + { + "epoch": 1.3922331006698834, + "grad_norm": 0.590147014215753, + "learning_rate": 1.1161936327297926e-06, + "loss": 0.2638, + "step": 29720 + }, + { + "epoch": 1.3922799456598116, + "grad_norm": 0.60386831010198, + "learning_rate": 1.1160356895829462e-06, + "loss": 0.2622, + "step": 29721 + }, + { + "epoch": 1.39232679064974, + "grad_norm": 0.5955860090176754, + "learning_rate": 1.115877754400467e-06, + "loss": 0.2827, + "step": 29722 + }, + { + "epoch": 1.3923736356396683, + "grad_norm": 0.5764834703937824, + "learning_rate": 1.115719827183262e-06, + "loss": 0.2679, + "step": 29723 + }, + { + "epoch": 1.3924204806295966, + "grad_norm": 0.6166239421531254, + "learning_rate": 1.1155619079322419e-06, + "loss": 0.272, + "step": 29724 + }, + { + "epoch": 1.392467325619525, + "grad_norm": 0.6016214580496452, + "learning_rate": 1.1154039966483146e-06, + "loss": 0.2509, + "step": 29725 + }, + { + "epoch": 1.3925141706094533, + "grad_norm": 0.5933231330336104, + "learning_rate": 1.1152460933323897e-06, + "loss": 0.2657, + "step": 29726 + }, + { + "epoch": 1.3925610155993817, + "grad_norm": 0.5927598887850153, + "learning_rate": 1.115088197985376e-06, + "loss": 0.2673, + "step": 29727 + }, + { + "epoch": 1.39260786058931, + "grad_norm": 0.6172514407792663, + "learning_rate": 1.114930310608181e-06, + "loss": 0.2822, + "step": 29728 + }, + { + "epoch": 1.3926547055792384, + "grad_norm": 0.6012134530697637, + "learning_rate": 1.114772431201715e-06, + "loss": 0.2573, + "step": 29729 + }, + { + "epoch": 1.3927015505691667, + "grad_norm": 0.5710853027884022, + "learning_rate": 1.1146145597668842e-06, + "loss": 0.265, + "step": 29730 + }, + { + "epoch": 1.392748395559095, + "grad_norm": 0.6149695554572464, + "learning_rate": 1.1144566963045985e-06, + "loss": 0.2753, + "step": 29731 + }, + { + "epoch": 1.3927952405490234, + "grad_norm": 0.5810894943544335, + "learning_rate": 1.1142988408157665e-06, + "loss": 0.2671, + "step": 29732 + }, + { + "epoch": 1.3928420855389516, + "grad_norm": 0.5823785688600999, + "learning_rate": 1.1141409933012962e-06, + "loss": 0.2758, + "step": 29733 + }, + { + "epoch": 1.3928889305288799, + "grad_norm": 0.5697847166017564, + "learning_rate": 1.1139831537620972e-06, + "loss": 0.2723, + "step": 29734 + }, + { + "epoch": 1.3929357755188083, + "grad_norm": 0.5796326886805204, + "learning_rate": 1.1138253221990758e-06, + "loss": 0.281, + "step": 29735 + }, + { + "epoch": 1.3929826205087366, + "grad_norm": 0.5752279343126686, + "learning_rate": 1.113667498613142e-06, + "loss": 0.2705, + "step": 29736 + }, + { + "epoch": 1.3930294654986648, + "grad_norm": 0.6056994097550649, + "learning_rate": 1.1135096830052028e-06, + "loss": 0.2733, + "step": 29737 + }, + { + "epoch": 1.3930763104885933, + "grad_norm": 0.6266053227044766, + "learning_rate": 1.1133518753761665e-06, + "loss": 0.2881, + "step": 29738 + }, + { + "epoch": 1.3931231554785215, + "grad_norm": 0.5839766624638226, + "learning_rate": 1.1131940757269419e-06, + "loss": 0.273, + "step": 29739 + }, + { + "epoch": 1.39317000046845, + "grad_norm": 0.5778754988269362, + "learning_rate": 1.1130362840584375e-06, + "loss": 0.2854, + "step": 29740 + }, + { + "epoch": 1.3932168454583782, + "grad_norm": 0.6013681613462137, + "learning_rate": 1.11287850037156e-06, + "loss": 0.2564, + "step": 29741 + }, + { + "epoch": 1.3932636904483067, + "grad_norm": 0.6215581801746362, + "learning_rate": 1.1127207246672178e-06, + "loss": 0.2872, + "step": 29742 + }, + { + "epoch": 1.393310535438235, + "grad_norm": 0.6209709268882468, + "learning_rate": 1.1125629569463203e-06, + "loss": 0.279, + "step": 29743 + }, + { + "epoch": 1.3933573804281632, + "grad_norm": 0.6086879079137073, + "learning_rate": 1.1124051972097735e-06, + "loss": 0.2622, + "step": 29744 + }, + { + "epoch": 1.3934042254180916, + "grad_norm": 0.5692291194756435, + "learning_rate": 1.1122474454584858e-06, + "loss": 0.2718, + "step": 29745 + }, + { + "epoch": 1.3934510704080199, + "grad_norm": 0.5934254028740725, + "learning_rate": 1.1120897016933663e-06, + "loss": 0.2743, + "step": 29746 + }, + { + "epoch": 1.393497915397948, + "grad_norm": 0.6267286150903052, + "learning_rate": 1.1119319659153204e-06, + "loss": 0.2687, + "step": 29747 + }, + { + "epoch": 1.3935447603878766, + "grad_norm": 0.5957326481861784, + "learning_rate": 1.1117742381252578e-06, + "loss": 0.2714, + "step": 29748 + }, + { + "epoch": 1.3935916053778048, + "grad_norm": 0.5664587929358742, + "learning_rate": 1.111616518324085e-06, + "loss": 0.2691, + "step": 29749 + }, + { + "epoch": 1.393638450367733, + "grad_norm": 0.5835583058523302, + "learning_rate": 1.1114588065127113e-06, + "loss": 0.2676, + "step": 29750 + }, + { + "epoch": 1.3936852953576615, + "grad_norm": 0.6021022222396937, + "learning_rate": 1.1113011026920423e-06, + "loss": 0.2752, + "step": 29751 + }, + { + "epoch": 1.3937321403475897, + "grad_norm": 0.6190814540120342, + "learning_rate": 1.1111434068629876e-06, + "loss": 0.2687, + "step": 29752 + }, + { + "epoch": 1.3937789853375182, + "grad_norm": 0.5813892698923326, + "learning_rate": 1.1109857190264522e-06, + "loss": 0.2681, + "step": 29753 + }, + { + "epoch": 1.3938258303274464, + "grad_norm": 0.614210309687702, + "learning_rate": 1.1108280391833447e-06, + "loss": 0.2841, + "step": 29754 + }, + { + "epoch": 1.393872675317375, + "grad_norm": 0.5868567093877282, + "learning_rate": 1.1106703673345731e-06, + "loss": 0.2607, + "step": 29755 + }, + { + "epoch": 1.3939195203073032, + "grad_norm": 0.5505481910878008, + "learning_rate": 1.1105127034810453e-06, + "loss": 0.2673, + "step": 29756 + }, + { + "epoch": 1.3939663652972314, + "grad_norm": 0.5660551094108177, + "learning_rate": 1.1103550476236666e-06, + "loss": 0.2592, + "step": 29757 + }, + { + "epoch": 1.3940132102871599, + "grad_norm": 0.5832951511339468, + "learning_rate": 1.110197399763346e-06, + "loss": 0.2835, + "step": 29758 + }, + { + "epoch": 1.394060055277088, + "grad_norm": 0.5559063912450686, + "learning_rate": 1.1100397599009892e-06, + "loss": 0.2588, + "step": 29759 + }, + { + "epoch": 1.3941069002670163, + "grad_norm": 0.6055041624964386, + "learning_rate": 1.1098821280375046e-06, + "loss": 0.2714, + "step": 29760 + }, + { + "epoch": 1.3941537452569448, + "grad_norm": 0.6208264401340295, + "learning_rate": 1.1097245041737986e-06, + "loss": 0.274, + "step": 29761 + }, + { + "epoch": 1.394200590246873, + "grad_norm": 0.5624387034628998, + "learning_rate": 1.1095668883107788e-06, + "loss": 0.2655, + "step": 29762 + }, + { + "epoch": 1.3942474352368015, + "grad_norm": 0.5959906270768272, + "learning_rate": 1.109409280449353e-06, + "loss": 0.2714, + "step": 29763 + }, + { + "epoch": 1.3942942802267297, + "grad_norm": 0.5909892916425823, + "learning_rate": 1.1092516805904275e-06, + "loss": 0.2733, + "step": 29764 + }, + { + "epoch": 1.3943411252166582, + "grad_norm": 0.5694101591256202, + "learning_rate": 1.1090940887349079e-06, + "loss": 0.2778, + "step": 29765 + }, + { + "epoch": 1.3943879702065864, + "grad_norm": 0.5817724869529906, + "learning_rate": 1.108936504883702e-06, + "loss": 0.2753, + "step": 29766 + }, + { + "epoch": 1.3944348151965147, + "grad_norm": 0.6027247102299962, + "learning_rate": 1.108778929037717e-06, + "loss": 0.2915, + "step": 29767 + }, + { + "epoch": 1.3944816601864432, + "grad_norm": 0.636978789766095, + "learning_rate": 1.1086213611978596e-06, + "loss": 0.2709, + "step": 29768 + }, + { + "epoch": 1.3945285051763714, + "grad_norm": 0.6010648998802431, + "learning_rate": 1.1084638013650368e-06, + "loss": 0.2697, + "step": 29769 + }, + { + "epoch": 1.3945753501662996, + "grad_norm": 0.6233611026456608, + "learning_rate": 1.1083062495401558e-06, + "loss": 0.2753, + "step": 29770 + }, + { + "epoch": 1.394622195156228, + "grad_norm": 0.6521259585427222, + "learning_rate": 1.1081487057241225e-06, + "loss": 0.2936, + "step": 29771 + }, + { + "epoch": 1.3946690401461563, + "grad_norm": 0.5844494583875661, + "learning_rate": 1.1079911699178429e-06, + "loss": 0.2742, + "step": 29772 + }, + { + "epoch": 1.3947158851360846, + "grad_norm": 0.6013524629994239, + "learning_rate": 1.1078336421222237e-06, + "loss": 0.2826, + "step": 29773 + }, + { + "epoch": 1.394762730126013, + "grad_norm": 0.5614254702930778, + "learning_rate": 1.1076761223381726e-06, + "loss": 0.2552, + "step": 29774 + }, + { + "epoch": 1.3948095751159413, + "grad_norm": 0.6116457520838161, + "learning_rate": 1.1075186105665953e-06, + "loss": 0.2706, + "step": 29775 + }, + { + "epoch": 1.3948564201058697, + "grad_norm": 0.6145480903366007, + "learning_rate": 1.1073611068083993e-06, + "loss": 0.2918, + "step": 29776 + }, + { + "epoch": 1.394903265095798, + "grad_norm": 0.5976532374061284, + "learning_rate": 1.107203611064489e-06, + "loss": 0.2527, + "step": 29777 + }, + { + "epoch": 1.3949501100857264, + "grad_norm": 0.5764047094401163, + "learning_rate": 1.107046123335773e-06, + "loss": 0.2617, + "step": 29778 + }, + { + "epoch": 1.3949969550756547, + "grad_norm": 0.6089483047251991, + "learning_rate": 1.106888643623155e-06, + "loss": 0.2799, + "step": 29779 + }, + { + "epoch": 1.395043800065583, + "grad_norm": 0.5314589050066975, + "learning_rate": 1.1067311719275435e-06, + "loss": 0.245, + "step": 29780 + }, + { + "epoch": 1.3950906450555114, + "grad_norm": 0.5885715835917817, + "learning_rate": 1.1065737082498434e-06, + "loss": 0.2791, + "step": 29781 + }, + { + "epoch": 1.3951374900454396, + "grad_norm": 0.5912464385383314, + "learning_rate": 1.1064162525909624e-06, + "loss": 0.2767, + "step": 29782 + }, + { + "epoch": 1.3951843350353679, + "grad_norm": 0.5667376289271685, + "learning_rate": 1.106258804951805e-06, + "loss": 0.2626, + "step": 29783 + }, + { + "epoch": 1.3952311800252963, + "grad_norm": 0.6076543763289703, + "learning_rate": 1.1061013653332775e-06, + "loss": 0.2744, + "step": 29784 + }, + { + "epoch": 1.3952780250152246, + "grad_norm": 0.575305607940359, + "learning_rate": 1.1059439337362873e-06, + "loss": 0.2784, + "step": 29785 + }, + { + "epoch": 1.3953248700051528, + "grad_norm": 0.5548792350072297, + "learning_rate": 1.1057865101617384e-06, + "loss": 0.2593, + "step": 29786 + }, + { + "epoch": 1.3953717149950813, + "grad_norm": 0.5441075288668535, + "learning_rate": 1.105629094610538e-06, + "loss": 0.2605, + "step": 29787 + }, + { + "epoch": 1.3954185599850095, + "grad_norm": 0.5658139168594952, + "learning_rate": 1.1054716870835923e-06, + "loss": 0.2579, + "step": 29788 + }, + { + "epoch": 1.395465404974938, + "grad_norm": 0.5491739380315068, + "learning_rate": 1.105314287581806e-06, + "loss": 0.2647, + "step": 29789 + }, + { + "epoch": 1.3955122499648662, + "grad_norm": 0.5552690565947234, + "learning_rate": 1.1051568961060849e-06, + "loss": 0.2596, + "step": 29790 + }, + { + "epoch": 1.3955590949547947, + "grad_norm": 0.5731377575590033, + "learning_rate": 1.1049995126573356e-06, + "loss": 0.2597, + "step": 29791 + }, + { + "epoch": 1.395605939944723, + "grad_norm": 0.6076881856535457, + "learning_rate": 1.1048421372364646e-06, + "loss": 0.2904, + "step": 29792 + }, + { + "epoch": 1.3956527849346512, + "grad_norm": 0.605584298456466, + "learning_rate": 1.1046847698443753e-06, + "loss": 0.2699, + "step": 29793 + }, + { + "epoch": 1.3956996299245796, + "grad_norm": 0.5903869139473994, + "learning_rate": 1.1045274104819754e-06, + "loss": 0.2607, + "step": 29794 + }, + { + "epoch": 1.3957464749145079, + "grad_norm": 0.5742572023411688, + "learning_rate": 1.1043700591501682e-06, + "loss": 0.2816, + "step": 29795 + }, + { + "epoch": 1.3957933199044361, + "grad_norm": 0.5459924024197985, + "learning_rate": 1.1042127158498612e-06, + "loss": 0.2798, + "step": 29796 + }, + { + "epoch": 1.3958401648943646, + "grad_norm": 0.5552003954747945, + "learning_rate": 1.1040553805819587e-06, + "loss": 0.2457, + "step": 29797 + }, + { + "epoch": 1.3958870098842928, + "grad_norm": 0.5518928626767022, + "learning_rate": 1.103898053347367e-06, + "loss": 0.2557, + "step": 29798 + }, + { + "epoch": 1.3959338548742213, + "grad_norm": 0.5928584100303448, + "learning_rate": 1.1037407341469916e-06, + "loss": 0.2718, + "step": 29799 + }, + { + "epoch": 1.3959806998641495, + "grad_norm": 0.6128886978058324, + "learning_rate": 1.103583422981737e-06, + "loss": 0.2799, + "step": 29800 + }, + { + "epoch": 1.396027544854078, + "grad_norm": 0.6016962557037651, + "learning_rate": 1.1034261198525093e-06, + "loss": 0.2708, + "step": 29801 + }, + { + "epoch": 1.3960743898440062, + "grad_norm": 0.5858971886952604, + "learning_rate": 1.1032688247602124e-06, + "loss": 0.2758, + "step": 29802 + }, + { + "epoch": 1.3961212348339345, + "grad_norm": 0.5701869748834001, + "learning_rate": 1.1031115377057524e-06, + "loss": 0.2633, + "step": 29803 + }, + { + "epoch": 1.396168079823863, + "grad_norm": 0.6330957832302647, + "learning_rate": 1.1029542586900343e-06, + "loss": 0.2832, + "step": 29804 + }, + { + "epoch": 1.3962149248137912, + "grad_norm": 0.5766700521136793, + "learning_rate": 1.1027969877139643e-06, + "loss": 0.2833, + "step": 29805 + }, + { + "epoch": 1.3962617698037194, + "grad_norm": 0.6284558968474333, + "learning_rate": 1.1026397247784457e-06, + "loss": 0.2833, + "step": 29806 + }, + { + "epoch": 1.3963086147936479, + "grad_norm": 0.6356648214693893, + "learning_rate": 1.102482469884385e-06, + "loss": 0.2844, + "step": 29807 + }, + { + "epoch": 1.3963554597835761, + "grad_norm": 0.6009141405728191, + "learning_rate": 1.1023252230326857e-06, + "loss": 0.2773, + "step": 29808 + }, + { + "epoch": 1.3964023047735044, + "grad_norm": 0.5486098211359712, + "learning_rate": 1.1021679842242534e-06, + "loss": 0.2596, + "step": 29809 + }, + { + "epoch": 1.3964491497634328, + "grad_norm": 0.5794727739316722, + "learning_rate": 1.1020107534599928e-06, + "loss": 0.2742, + "step": 29810 + }, + { + "epoch": 1.396495994753361, + "grad_norm": 0.5872495175245307, + "learning_rate": 1.101853530740809e-06, + "loss": 0.2562, + "step": 29811 + }, + { + "epoch": 1.3965428397432895, + "grad_norm": 0.6430510667828419, + "learning_rate": 1.1016963160676078e-06, + "loss": 0.2909, + "step": 29812 + }, + { + "epoch": 1.3965896847332178, + "grad_norm": 0.6084113121160997, + "learning_rate": 1.1015391094412928e-06, + "loss": 0.2834, + "step": 29813 + }, + { + "epoch": 1.3966365297231462, + "grad_norm": 0.5837511779212353, + "learning_rate": 1.101381910862768e-06, + "loss": 0.2632, + "step": 29814 + }, + { + "epoch": 1.3966833747130745, + "grad_norm": 0.593655115366214, + "learning_rate": 1.1012247203329384e-06, + "loss": 0.2612, + "step": 29815 + }, + { + "epoch": 1.3967302197030027, + "grad_norm": 0.5995095507350657, + "learning_rate": 1.1010675378527091e-06, + "loss": 0.2938, + "step": 29816 + }, + { + "epoch": 1.3967770646929312, + "grad_norm": 0.5661380229062954, + "learning_rate": 1.1009103634229845e-06, + "loss": 0.2777, + "step": 29817 + }, + { + "epoch": 1.3968239096828594, + "grad_norm": 0.5879597509264703, + "learning_rate": 1.1007531970446694e-06, + "loss": 0.2829, + "step": 29818 + }, + { + "epoch": 1.3968707546727877, + "grad_norm": 0.6075687567820166, + "learning_rate": 1.1005960387186684e-06, + "loss": 0.2821, + "step": 29819 + }, + { + "epoch": 1.3969175996627161, + "grad_norm": 0.6361269620705873, + "learning_rate": 1.1004388884458859e-06, + "loss": 0.2739, + "step": 29820 + }, + { + "epoch": 1.3969644446526444, + "grad_norm": 0.6259463897532218, + "learning_rate": 1.1002817462272248e-06, + "loss": 0.2745, + "step": 29821 + }, + { + "epoch": 1.3970112896425726, + "grad_norm": 0.5646947552226474, + "learning_rate": 1.1001246120635903e-06, + "loss": 0.2749, + "step": 29822 + }, + { + "epoch": 1.397058134632501, + "grad_norm": 0.5759316915174484, + "learning_rate": 1.0999674859558868e-06, + "loss": 0.2756, + "step": 29823 + }, + { + "epoch": 1.3971049796224293, + "grad_norm": 0.5817454403979431, + "learning_rate": 1.0998103679050188e-06, + "loss": 0.2664, + "step": 29824 + }, + { + "epoch": 1.3971518246123578, + "grad_norm": 0.5952131973953096, + "learning_rate": 1.0996532579118907e-06, + "loss": 0.2656, + "step": 29825 + }, + { + "epoch": 1.397198669602286, + "grad_norm": 0.5887349835034271, + "learning_rate": 1.0994961559774057e-06, + "loss": 0.2669, + "step": 29826 + }, + { + "epoch": 1.3972455145922145, + "grad_norm": 0.6365282184876435, + "learning_rate": 1.0993390621024688e-06, + "loss": 0.2852, + "step": 29827 + }, + { + "epoch": 1.3972923595821427, + "grad_norm": 0.5791915180779351, + "learning_rate": 1.099181976287983e-06, + "loss": 0.2664, + "step": 29828 + }, + { + "epoch": 1.397339204572071, + "grad_norm": 0.6056753783686999, + "learning_rate": 1.0990248985348525e-06, + "loss": 0.2825, + "step": 29829 + }, + { + "epoch": 1.3973860495619994, + "grad_norm": 0.7026907246357151, + "learning_rate": 1.0988678288439819e-06, + "loss": 0.2718, + "step": 29830 + }, + { + "epoch": 1.3974328945519277, + "grad_norm": 0.5733622018868803, + "learning_rate": 1.0987107672162756e-06, + "loss": 0.2654, + "step": 29831 + }, + { + "epoch": 1.397479739541856, + "grad_norm": 0.5843682560808866, + "learning_rate": 1.0985537136526356e-06, + "loss": 0.2618, + "step": 29832 + }, + { + "epoch": 1.3975265845317844, + "grad_norm": 0.6189691816423445, + "learning_rate": 1.0983966681539668e-06, + "loss": 0.2723, + "step": 29833 + }, + { + "epoch": 1.3975734295217126, + "grad_norm": 0.6174549456710114, + "learning_rate": 1.098239630721174e-06, + "loss": 0.2775, + "step": 29834 + }, + { + "epoch": 1.397620274511641, + "grad_norm": 0.5890911388145771, + "learning_rate": 1.0980826013551584e-06, + "loss": 0.2605, + "step": 29835 + }, + { + "epoch": 1.3976671195015693, + "grad_norm": 0.6201464733066222, + "learning_rate": 1.0979255800568257e-06, + "loss": 0.2732, + "step": 29836 + }, + { + "epoch": 1.3977139644914978, + "grad_norm": 0.5720388046342866, + "learning_rate": 1.0977685668270794e-06, + "loss": 0.2596, + "step": 29837 + }, + { + "epoch": 1.397760809481426, + "grad_norm": 0.5724876750355405, + "learning_rate": 1.0976115616668218e-06, + "loss": 0.2534, + "step": 29838 + }, + { + "epoch": 1.3978076544713542, + "grad_norm": 0.587932547908333, + "learning_rate": 1.0974545645769571e-06, + "loss": 0.2688, + "step": 29839 + }, + { + "epoch": 1.3978544994612827, + "grad_norm": 0.5841387561045797, + "learning_rate": 1.097297575558389e-06, + "loss": 0.2713, + "step": 29840 + }, + { + "epoch": 1.397901344451211, + "grad_norm": 0.6085772797923472, + "learning_rate": 1.0971405946120215e-06, + "loss": 0.2721, + "step": 29841 + }, + { + "epoch": 1.3979481894411392, + "grad_norm": 0.5733965770313525, + "learning_rate": 1.0969836217387563e-06, + "loss": 0.2699, + "step": 29842 + }, + { + "epoch": 1.3979950344310677, + "grad_norm": 0.5616144898840822, + "learning_rate": 1.0968266569394988e-06, + "loss": 0.2715, + "step": 29843 + }, + { + "epoch": 1.398041879420996, + "grad_norm": 0.5953972569299989, + "learning_rate": 1.0966697002151506e-06, + "loss": 0.2646, + "step": 29844 + }, + { + "epoch": 1.3980887244109241, + "grad_norm": 0.5812946018863879, + "learning_rate": 1.0965127515666153e-06, + "loss": 0.2683, + "step": 29845 + }, + { + "epoch": 1.3981355694008526, + "grad_norm": 0.6100631562016818, + "learning_rate": 1.0963558109947962e-06, + "loss": 0.2727, + "step": 29846 + }, + { + "epoch": 1.3981824143907808, + "grad_norm": 0.5931269912000687, + "learning_rate": 1.0961988785005966e-06, + "loss": 0.2815, + "step": 29847 + }, + { + "epoch": 1.3982292593807093, + "grad_norm": 0.626279099452442, + "learning_rate": 1.0960419540849207e-06, + "loss": 0.2628, + "step": 29848 + }, + { + "epoch": 1.3982761043706375, + "grad_norm": 0.6185803949347827, + "learning_rate": 1.0958850377486708e-06, + "loss": 0.2644, + "step": 29849 + }, + { + "epoch": 1.398322949360566, + "grad_norm": 0.6066158125864481, + "learning_rate": 1.0957281294927483e-06, + "loss": 0.2752, + "step": 29850 + }, + { + "epoch": 1.3983697943504942, + "grad_norm": 0.586928153527705, + "learning_rate": 1.0955712293180573e-06, + "loss": 0.2943, + "step": 29851 + }, + { + "epoch": 1.3984166393404225, + "grad_norm": 0.5278246056564508, + "learning_rate": 1.095414337225501e-06, + "loss": 0.2673, + "step": 29852 + }, + { + "epoch": 1.398463484330351, + "grad_norm": 0.6037846003271276, + "learning_rate": 1.0952574532159824e-06, + "loss": 0.2819, + "step": 29853 + }, + { + "epoch": 1.3985103293202792, + "grad_norm": 0.631012865085019, + "learning_rate": 1.095100577290405e-06, + "loss": 0.28, + "step": 29854 + }, + { + "epoch": 1.3985571743102074, + "grad_norm": 0.5898768321511466, + "learning_rate": 1.0949437094496696e-06, + "loss": 0.2886, + "step": 29855 + }, + { + "epoch": 1.398604019300136, + "grad_norm": 0.5706745944865225, + "learning_rate": 1.0947868496946809e-06, + "loss": 0.2727, + "step": 29856 + }, + { + "epoch": 1.3986508642900641, + "grad_norm": 0.6001285472985879, + "learning_rate": 1.0946299980263398e-06, + "loss": 0.2636, + "step": 29857 + }, + { + "epoch": 1.3986977092799924, + "grad_norm": 0.6340264397832572, + "learning_rate": 1.0944731544455498e-06, + "loss": 0.2735, + "step": 29858 + }, + { + "epoch": 1.3987445542699208, + "grad_norm": 0.6338947873542726, + "learning_rate": 1.0943163189532136e-06, + "loss": 0.294, + "step": 29859 + }, + { + "epoch": 1.398791399259849, + "grad_norm": 0.6073055989073678, + "learning_rate": 1.0941594915502338e-06, + "loss": 0.2863, + "step": 29860 + }, + { + "epoch": 1.3988382442497775, + "grad_norm": 0.6031528833764671, + "learning_rate": 1.0940026722375133e-06, + "loss": 0.2696, + "step": 29861 + }, + { + "epoch": 1.3988850892397058, + "grad_norm": 0.5994162063324009, + "learning_rate": 1.0938458610159543e-06, + "loss": 0.2715, + "step": 29862 + }, + { + "epoch": 1.3989319342296342, + "grad_norm": 0.6033262803474633, + "learning_rate": 1.0936890578864581e-06, + "loss": 0.2611, + "step": 29863 + }, + { + "epoch": 1.3989787792195625, + "grad_norm": 0.610363912831851, + "learning_rate": 1.0935322628499278e-06, + "loss": 0.2669, + "step": 29864 + }, + { + "epoch": 1.3990256242094907, + "grad_norm": 0.6173398530131482, + "learning_rate": 1.093375475907266e-06, + "loss": 0.2971, + "step": 29865 + }, + { + "epoch": 1.3990724691994192, + "grad_norm": 0.5939001063439843, + "learning_rate": 1.0932186970593744e-06, + "loss": 0.2622, + "step": 29866 + }, + { + "epoch": 1.3991193141893474, + "grad_norm": 0.6150176920697366, + "learning_rate": 1.0930619263071571e-06, + "loss": 0.2826, + "step": 29867 + }, + { + "epoch": 1.3991661591792757, + "grad_norm": 0.5786905101236293, + "learning_rate": 1.0929051636515133e-06, + "loss": 0.2613, + "step": 29868 + }, + { + "epoch": 1.3992130041692041, + "grad_norm": 0.6306507976963378, + "learning_rate": 1.092748409093348e-06, + "loss": 0.2584, + "step": 29869 + }, + { + "epoch": 1.3992598491591324, + "grad_norm": 0.5954619916711915, + "learning_rate": 1.0925916626335605e-06, + "loss": 0.2622, + "step": 29870 + }, + { + "epoch": 1.3993066941490608, + "grad_norm": 0.6116393585971188, + "learning_rate": 1.0924349242730548e-06, + "loss": 0.2886, + "step": 29871 + }, + { + "epoch": 1.399353539138989, + "grad_norm": 0.5879392858493286, + "learning_rate": 1.092278194012732e-06, + "loss": 0.2626, + "step": 29872 + }, + { + "epoch": 1.3994003841289175, + "grad_norm": 0.6030936303758135, + "learning_rate": 1.0921214718534945e-06, + "loss": 0.2565, + "step": 29873 + }, + { + "epoch": 1.3994472291188458, + "grad_norm": 0.6039403396168138, + "learning_rate": 1.0919647577962451e-06, + "loss": 0.2744, + "step": 29874 + }, + { + "epoch": 1.399494074108774, + "grad_norm": 0.6027071549137933, + "learning_rate": 1.0918080518418834e-06, + "loss": 0.2833, + "step": 29875 + }, + { + "epoch": 1.3995409190987025, + "grad_norm": 0.5698056992991231, + "learning_rate": 1.0916513539913135e-06, + "loss": 0.262, + "step": 29876 + }, + { + "epoch": 1.3995877640886307, + "grad_norm": 0.621911901410353, + "learning_rate": 1.091494664245435e-06, + "loss": 0.2792, + "step": 29877 + }, + { + "epoch": 1.399634609078559, + "grad_norm": 0.5609183873019314, + "learning_rate": 1.091337982605151e-06, + "loss": 0.2696, + "step": 29878 + }, + { + "epoch": 1.3996814540684874, + "grad_norm": 0.5739907280310353, + "learning_rate": 1.0911813090713626e-06, + "loss": 0.2606, + "step": 29879 + }, + { + "epoch": 1.3997282990584157, + "grad_norm": 0.5761018728814837, + "learning_rate": 1.0910246436449725e-06, + "loss": 0.2659, + "step": 29880 + }, + { + "epoch": 1.399775144048344, + "grad_norm": 0.5602720339799977, + "learning_rate": 1.090867986326881e-06, + "loss": 0.2475, + "step": 29881 + }, + { + "epoch": 1.3998219890382724, + "grad_norm": 0.5680414924298836, + "learning_rate": 1.0907113371179897e-06, + "loss": 0.2657, + "step": 29882 + }, + { + "epoch": 1.3998688340282006, + "grad_norm": 0.5360684696313969, + "learning_rate": 1.0905546960192016e-06, + "loss": 0.2638, + "step": 29883 + }, + { + "epoch": 1.399915679018129, + "grad_norm": 0.5584394660514442, + "learning_rate": 1.0903980630314156e-06, + "loss": 0.2673, + "step": 29884 + }, + { + "epoch": 1.3999625240080573, + "grad_norm": 0.6438611681479202, + "learning_rate": 1.0902414381555349e-06, + "loss": 0.2704, + "step": 29885 + }, + { + "epoch": 1.4000093689979858, + "grad_norm": 0.6299421369356107, + "learning_rate": 1.0900848213924611e-06, + "loss": 0.2955, + "step": 29886 + }, + { + "epoch": 1.400056213987914, + "grad_norm": 0.6023963663659594, + "learning_rate": 1.0899282127430942e-06, + "loss": 0.2727, + "step": 29887 + }, + { + "epoch": 1.4001030589778423, + "grad_norm": 0.6417280388290211, + "learning_rate": 1.0897716122083355e-06, + "loss": 0.2749, + "step": 29888 + }, + { + "epoch": 1.4001499039677707, + "grad_norm": 0.6408430054627938, + "learning_rate": 1.089615019789087e-06, + "loss": 0.2808, + "step": 29889 + }, + { + "epoch": 1.400196748957699, + "grad_norm": 0.5972781441242503, + "learning_rate": 1.0894584354862505e-06, + "loss": 0.2702, + "step": 29890 + }, + { + "epoch": 1.4002435939476272, + "grad_norm": 0.5803712044077582, + "learning_rate": 1.089301859300725e-06, + "loss": 0.2578, + "step": 29891 + }, + { + "epoch": 1.4002904389375557, + "grad_norm": 0.5618065599910546, + "learning_rate": 1.089145291233414e-06, + "loss": 0.266, + "step": 29892 + }, + { + "epoch": 1.400337283927484, + "grad_norm": 0.5929546019824415, + "learning_rate": 1.0889887312852163e-06, + "loss": 0.2657, + "step": 29893 + }, + { + "epoch": 1.4003841289174122, + "grad_norm": 0.582362695907556, + "learning_rate": 1.0888321794570336e-06, + "loss": 0.2728, + "step": 29894 + }, + { + "epoch": 1.4004309739073406, + "grad_norm": 0.5517168147243992, + "learning_rate": 1.0886756357497672e-06, + "loss": 0.2564, + "step": 29895 + }, + { + "epoch": 1.4004778188972689, + "grad_norm": 0.5978119412556262, + "learning_rate": 1.0885191001643185e-06, + "loss": 0.2905, + "step": 29896 + }, + { + "epoch": 1.4005246638871973, + "grad_norm": 0.5995121930085107, + "learning_rate": 1.088362572701587e-06, + "loss": 0.2767, + "step": 29897 + }, + { + "epoch": 1.4005715088771256, + "grad_norm": 0.6053493689796496, + "learning_rate": 1.0882060533624749e-06, + "loss": 0.2815, + "step": 29898 + }, + { + "epoch": 1.400618353867054, + "grad_norm": 0.5714430621083604, + "learning_rate": 1.0880495421478813e-06, + "loss": 0.2726, + "step": 29899 + }, + { + "epoch": 1.4006651988569823, + "grad_norm": 0.6145318493192538, + "learning_rate": 1.0878930390587075e-06, + "loss": 0.2705, + "step": 29900 + }, + { + "epoch": 1.4007120438469105, + "grad_norm": 0.5811654146918113, + "learning_rate": 1.0877365440958543e-06, + "loss": 0.2663, + "step": 29901 + }, + { + "epoch": 1.400758888836839, + "grad_norm": 0.6027843757192949, + "learning_rate": 1.0875800572602224e-06, + "loss": 0.2782, + "step": 29902 + }, + { + "epoch": 1.4008057338267672, + "grad_norm": 0.594500159882675, + "learning_rate": 1.0874235785527135e-06, + "loss": 0.2752, + "step": 29903 + }, + { + "epoch": 1.4008525788166954, + "grad_norm": 0.5905261966489173, + "learning_rate": 1.0872671079742263e-06, + "loss": 0.2687, + "step": 29904 + }, + { + "epoch": 1.400899423806624, + "grad_norm": 0.5916300238391805, + "learning_rate": 1.0871106455256614e-06, + "loss": 0.2665, + "step": 29905 + }, + { + "epoch": 1.4009462687965522, + "grad_norm": 0.5401243734461024, + "learning_rate": 1.0869541912079194e-06, + "loss": 0.2565, + "step": 29906 + }, + { + "epoch": 1.4009931137864806, + "grad_norm": 0.5560145319675829, + "learning_rate": 1.0867977450219008e-06, + "loss": 0.2645, + "step": 29907 + }, + { + "epoch": 1.4010399587764089, + "grad_norm": 0.6462796003994082, + "learning_rate": 1.0866413069685062e-06, + "loss": 0.3027, + "step": 29908 + }, + { + "epoch": 1.4010868037663373, + "grad_norm": 0.588188897072693, + "learning_rate": 1.0864848770486356e-06, + "loss": 0.2692, + "step": 29909 + }, + { + "epoch": 1.4011336487562656, + "grad_norm": 0.6120065928207279, + "learning_rate": 1.08632845526319e-06, + "loss": 0.2726, + "step": 29910 + }, + { + "epoch": 1.4011804937461938, + "grad_norm": 0.649251083797211, + "learning_rate": 1.0861720416130689e-06, + "loss": 0.287, + "step": 29911 + }, + { + "epoch": 1.4012273387361223, + "grad_norm": 0.6041511618100736, + "learning_rate": 1.0860156360991713e-06, + "loss": 0.274, + "step": 29912 + }, + { + "epoch": 1.4012741837260505, + "grad_norm": 0.6086374198857515, + "learning_rate": 1.0858592387223984e-06, + "loss": 0.27, + "step": 29913 + }, + { + "epoch": 1.4013210287159787, + "grad_norm": 0.6182103763716541, + "learning_rate": 1.0857028494836504e-06, + "loss": 0.2772, + "step": 29914 + }, + { + "epoch": 1.4013678737059072, + "grad_norm": 0.5657913364187437, + "learning_rate": 1.0855464683838269e-06, + "loss": 0.2614, + "step": 29915 + }, + { + "epoch": 1.4014147186958354, + "grad_norm": 0.5702337109693462, + "learning_rate": 1.0853900954238288e-06, + "loss": 0.2817, + "step": 29916 + }, + { + "epoch": 1.4014615636857637, + "grad_norm": 0.6233646270853656, + "learning_rate": 1.0852337306045543e-06, + "loss": 0.2842, + "step": 29917 + }, + { + "epoch": 1.4015084086756922, + "grad_norm": 0.6605837027647271, + "learning_rate": 1.085077373926905e-06, + "loss": 0.2896, + "step": 29918 + }, + { + "epoch": 1.4015552536656204, + "grad_norm": 0.5859207397703563, + "learning_rate": 1.084921025391779e-06, + "loss": 0.2726, + "step": 29919 + }, + { + "epoch": 1.4016020986555489, + "grad_norm": 0.5731366770302703, + "learning_rate": 1.0847646850000765e-06, + "loss": 0.2789, + "step": 29920 + }, + { + "epoch": 1.401648943645477, + "grad_norm": 0.5875976455875452, + "learning_rate": 1.0846083527526976e-06, + "loss": 0.2677, + "step": 29921 + }, + { + "epoch": 1.4016957886354056, + "grad_norm": 0.6034556981628566, + "learning_rate": 1.0844520286505431e-06, + "loss": 0.2629, + "step": 29922 + }, + { + "epoch": 1.4017426336253338, + "grad_norm": 0.6110889984099547, + "learning_rate": 1.0842957126945103e-06, + "loss": 0.2622, + "step": 29923 + }, + { + "epoch": 1.401789478615262, + "grad_norm": 0.5864415672384111, + "learning_rate": 1.0841394048854998e-06, + "loss": 0.276, + "step": 29924 + }, + { + "epoch": 1.4018363236051905, + "grad_norm": 0.6018983152096364, + "learning_rate": 1.083983105224412e-06, + "loss": 0.2894, + "step": 29925 + }, + { + "epoch": 1.4018831685951187, + "grad_norm": 0.6133524869432797, + "learning_rate": 1.0838268137121447e-06, + "loss": 0.2771, + "step": 29926 + }, + { + "epoch": 1.401930013585047, + "grad_norm": 0.5792518664581683, + "learning_rate": 1.0836705303495982e-06, + "loss": 0.2557, + "step": 29927 + }, + { + "epoch": 1.4019768585749754, + "grad_norm": 0.6507156149995497, + "learning_rate": 1.0835142551376725e-06, + "loss": 0.2882, + "step": 29928 + }, + { + "epoch": 1.4020237035649037, + "grad_norm": 0.5903881579475325, + "learning_rate": 1.0833579880772656e-06, + "loss": 0.2577, + "step": 29929 + }, + { + "epoch": 1.402070548554832, + "grad_norm": 0.5825608149107931, + "learning_rate": 1.0832017291692775e-06, + "loss": 0.2642, + "step": 29930 + }, + { + "epoch": 1.4021173935447604, + "grad_norm": 0.6264799772212782, + "learning_rate": 1.0830454784146072e-06, + "loss": 0.2854, + "step": 29931 + }, + { + "epoch": 1.4021642385346886, + "grad_norm": 0.6230794080512595, + "learning_rate": 1.0828892358141548e-06, + "loss": 0.2985, + "step": 29932 + }, + { + "epoch": 1.402211083524617, + "grad_norm": 0.6250500082576854, + "learning_rate": 1.082733001368818e-06, + "loss": 0.2725, + "step": 29933 + }, + { + "epoch": 1.4022579285145453, + "grad_norm": 0.5959017468256301, + "learning_rate": 1.0825767750794974e-06, + "loss": 0.273, + "step": 29934 + }, + { + "epoch": 1.4023047735044738, + "grad_norm": 0.5930417820053634, + "learning_rate": 1.0824205569470902e-06, + "loss": 0.2704, + "step": 29935 + }, + { + "epoch": 1.402351618494402, + "grad_norm": 0.62074151761313, + "learning_rate": 1.0822643469724966e-06, + "loss": 0.2725, + "step": 29936 + }, + { + "epoch": 1.4023984634843303, + "grad_norm": 0.6367060714975165, + "learning_rate": 1.0821081451566151e-06, + "loss": 0.277, + "step": 29937 + }, + { + "epoch": 1.4024453084742587, + "grad_norm": 0.597980885790349, + "learning_rate": 1.0819519515003452e-06, + "loss": 0.2714, + "step": 29938 + }, + { + "epoch": 1.402492153464187, + "grad_norm": 0.5645966358674985, + "learning_rate": 1.081795766004586e-06, + "loss": 0.267, + "step": 29939 + }, + { + "epoch": 1.4025389984541152, + "grad_norm": 0.5835115427330893, + "learning_rate": 1.081639588670235e-06, + "loss": 0.2799, + "step": 29940 + }, + { + "epoch": 1.4025858434440437, + "grad_norm": 0.5913781544579864, + "learning_rate": 1.0814834194981924e-06, + "loss": 0.2855, + "step": 29941 + }, + { + "epoch": 1.402632688433972, + "grad_norm": 0.6449359696960251, + "learning_rate": 1.0813272584893552e-06, + "loss": 0.2869, + "step": 29942 + }, + { + "epoch": 1.4026795334239004, + "grad_norm": 0.58591343620565, + "learning_rate": 1.0811711056446234e-06, + "loss": 0.2779, + "step": 29943 + }, + { + "epoch": 1.4027263784138286, + "grad_norm": 0.6208303872547956, + "learning_rate": 1.0810149609648948e-06, + "loss": 0.2731, + "step": 29944 + }, + { + "epoch": 1.402773223403757, + "grad_norm": 0.5351744720701407, + "learning_rate": 1.0808588244510698e-06, + "loss": 0.2531, + "step": 29945 + }, + { + "epoch": 1.4028200683936853, + "grad_norm": 0.6088827320172274, + "learning_rate": 1.0807026961040446e-06, + "loss": 0.2639, + "step": 29946 + }, + { + "epoch": 1.4028669133836136, + "grad_norm": 0.5781462980743889, + "learning_rate": 1.0805465759247194e-06, + "loss": 0.2692, + "step": 29947 + }, + { + "epoch": 1.402913758373542, + "grad_norm": 0.5551122068880099, + "learning_rate": 1.0803904639139912e-06, + "loss": 0.2484, + "step": 29948 + }, + { + "epoch": 1.4029606033634703, + "grad_norm": 0.6000660705875361, + "learning_rate": 1.0802343600727588e-06, + "loss": 0.2663, + "step": 29949 + }, + { + "epoch": 1.4030074483533985, + "grad_norm": 0.6152396017427352, + "learning_rate": 1.080078264401921e-06, + "loss": 0.274, + "step": 29950 + }, + { + "epoch": 1.403054293343327, + "grad_norm": 0.6066585275434072, + "learning_rate": 1.0799221769023758e-06, + "loss": 0.2756, + "step": 29951 + }, + { + "epoch": 1.4031011383332552, + "grad_norm": 0.6232335524698238, + "learning_rate": 1.0797660975750227e-06, + "loss": 0.2596, + "step": 29952 + }, + { + "epoch": 1.4031479833231835, + "grad_norm": 0.6464136464414902, + "learning_rate": 1.0796100264207585e-06, + "loss": 0.2661, + "step": 29953 + }, + { + "epoch": 1.403194828313112, + "grad_norm": 0.5966463822758288, + "learning_rate": 1.0794539634404808e-06, + "loss": 0.2755, + "step": 29954 + }, + { + "epoch": 1.4032416733030402, + "grad_norm": 0.543640076750254, + "learning_rate": 1.0792979086350882e-06, + "loss": 0.253, + "step": 29955 + }, + { + "epoch": 1.4032885182929686, + "grad_norm": 0.6249383681266798, + "learning_rate": 1.0791418620054794e-06, + "loss": 0.2803, + "step": 29956 + }, + { + "epoch": 1.4033353632828969, + "grad_norm": 0.6016702476448996, + "learning_rate": 1.0789858235525519e-06, + "loss": 0.2805, + "step": 29957 + }, + { + "epoch": 1.4033822082728253, + "grad_norm": 0.6101315758914255, + "learning_rate": 1.0788297932772042e-06, + "loss": 0.281, + "step": 29958 + }, + { + "epoch": 1.4034290532627536, + "grad_norm": 0.5936482904573351, + "learning_rate": 1.0786737711803341e-06, + "loss": 0.2776, + "step": 29959 + }, + { + "epoch": 1.4034758982526818, + "grad_norm": 0.6270980578428192, + "learning_rate": 1.0785177572628397e-06, + "loss": 0.2792, + "step": 29960 + }, + { + "epoch": 1.4035227432426103, + "grad_norm": 0.5560879611439304, + "learning_rate": 1.0783617515256174e-06, + "loss": 0.261, + "step": 29961 + }, + { + "epoch": 1.4035695882325385, + "grad_norm": 0.6541715746591172, + "learning_rate": 1.0782057539695657e-06, + "loss": 0.2939, + "step": 29962 + }, + { + "epoch": 1.4036164332224668, + "grad_norm": 0.5980532905244295, + "learning_rate": 1.0780497645955825e-06, + "loss": 0.2609, + "step": 29963 + }, + { + "epoch": 1.4036632782123952, + "grad_norm": 0.5976832728994952, + "learning_rate": 1.077893783404566e-06, + "loss": 0.2583, + "step": 29964 + }, + { + "epoch": 1.4037101232023235, + "grad_norm": 0.5727187941555175, + "learning_rate": 1.0777378103974135e-06, + "loss": 0.2679, + "step": 29965 + }, + { + "epoch": 1.4037569681922517, + "grad_norm": 0.6151961091601922, + "learning_rate": 1.077581845575022e-06, + "loss": 0.2747, + "step": 29966 + }, + { + "epoch": 1.4038038131821802, + "grad_norm": 0.5422101850255969, + "learning_rate": 1.07742588893829e-06, + "loss": 0.2552, + "step": 29967 + }, + { + "epoch": 1.4038506581721084, + "grad_norm": 0.5947493531417543, + "learning_rate": 1.077269940488114e-06, + "loss": 0.261, + "step": 29968 + }, + { + "epoch": 1.4038975031620369, + "grad_norm": 0.5737567112631664, + "learning_rate": 1.0771140002253919e-06, + "loss": 0.2698, + "step": 29969 + }, + { + "epoch": 1.4039443481519651, + "grad_norm": 0.6114601564777744, + "learning_rate": 1.0769580681510208e-06, + "loss": 0.2692, + "step": 29970 + }, + { + "epoch": 1.4039911931418936, + "grad_norm": 0.5321610839529107, + "learning_rate": 1.0768021442658993e-06, + "loss": 0.2619, + "step": 29971 + }, + { + "epoch": 1.4040380381318218, + "grad_norm": 0.5668867139232086, + "learning_rate": 1.076646228570923e-06, + "loss": 0.2713, + "step": 29972 + }, + { + "epoch": 1.40408488312175, + "grad_norm": 0.5825925814082319, + "learning_rate": 1.07649032106699e-06, + "loss": 0.2841, + "step": 29973 + }, + { + "epoch": 1.4041317281116785, + "grad_norm": 0.5920117695921628, + "learning_rate": 1.0763344217549982e-06, + "loss": 0.2696, + "step": 29974 + }, + { + "epoch": 1.4041785731016068, + "grad_norm": 0.6141750363248867, + "learning_rate": 1.0761785306358427e-06, + "loss": 0.2713, + "step": 29975 + }, + { + "epoch": 1.404225418091535, + "grad_norm": 0.550515545515791, + "learning_rate": 1.0760226477104225e-06, + "loss": 0.2695, + "step": 29976 + }, + { + "epoch": 1.4042722630814635, + "grad_norm": 0.6456661269253651, + "learning_rate": 1.0758667729796346e-06, + "loss": 0.2876, + "step": 29977 + }, + { + "epoch": 1.4043191080713917, + "grad_norm": 0.6141766262633035, + "learning_rate": 1.0757109064443745e-06, + "loss": 0.2769, + "step": 29978 + }, + { + "epoch": 1.4043659530613202, + "grad_norm": 0.5627320469112088, + "learning_rate": 1.0755550481055404e-06, + "loss": 0.2532, + "step": 29979 + }, + { + "epoch": 1.4044127980512484, + "grad_norm": 0.5953902324260564, + "learning_rate": 1.0753991979640288e-06, + "loss": 0.2709, + "step": 29980 + }, + { + "epoch": 1.4044596430411769, + "grad_norm": 0.6351556815835797, + "learning_rate": 1.075243356020738e-06, + "loss": 0.283, + "step": 29981 + }, + { + "epoch": 1.4045064880311051, + "grad_norm": 0.6073325883882832, + "learning_rate": 1.0750875222765621e-06, + "loss": 0.2819, + "step": 29982 + }, + { + "epoch": 1.4045533330210334, + "grad_norm": 0.6073302198557182, + "learning_rate": 1.0749316967324009e-06, + "loss": 0.2581, + "step": 29983 + }, + { + "epoch": 1.4046001780109618, + "grad_norm": 0.6024853365046254, + "learning_rate": 1.0747758793891482e-06, + "loss": 0.2727, + "step": 29984 + }, + { + "epoch": 1.40464702300089, + "grad_norm": 0.6155817555417641, + "learning_rate": 1.0746200702477025e-06, + "loss": 0.2836, + "step": 29985 + }, + { + "epoch": 1.4046938679908183, + "grad_norm": 0.5881767002413939, + "learning_rate": 1.0744642693089598e-06, + "loss": 0.2679, + "step": 29986 + }, + { + "epoch": 1.4047407129807468, + "grad_norm": 0.5888745906327061, + "learning_rate": 1.074308476573817e-06, + "loss": 0.2726, + "step": 29987 + }, + { + "epoch": 1.404787557970675, + "grad_norm": 0.580401391293271, + "learning_rate": 1.0741526920431713e-06, + "loss": 0.2517, + "step": 29988 + }, + { + "epoch": 1.4048344029606032, + "grad_norm": 0.624636506099638, + "learning_rate": 1.073996915717919e-06, + "loss": 0.2736, + "step": 29989 + }, + { + "epoch": 1.4048812479505317, + "grad_norm": 0.5918739374664753, + "learning_rate": 1.0738411475989546e-06, + "loss": 0.2644, + "step": 29990 + }, + { + "epoch": 1.40492809294046, + "grad_norm": 0.5711546998095254, + "learning_rate": 1.073685387687176e-06, + "loss": 0.2614, + "step": 29991 + }, + { + "epoch": 1.4049749379303884, + "grad_norm": 0.5616338132607158, + "learning_rate": 1.0735296359834798e-06, + "loss": 0.2632, + "step": 29992 + }, + { + "epoch": 1.4050217829203167, + "grad_norm": 0.5916401031425889, + "learning_rate": 1.0733738924887619e-06, + "loss": 0.2872, + "step": 29993 + }, + { + "epoch": 1.4050686279102451, + "grad_norm": 0.5978066825142454, + "learning_rate": 1.0732181572039194e-06, + "loss": 0.2734, + "step": 29994 + }, + { + "epoch": 1.4051154729001734, + "grad_norm": 0.5827010359467174, + "learning_rate": 1.073062430129847e-06, + "loss": 0.2627, + "step": 29995 + }, + { + "epoch": 1.4051623178901016, + "grad_norm": 0.5473491777004675, + "learning_rate": 1.0729067112674426e-06, + "loss": 0.2519, + "step": 29996 + }, + { + "epoch": 1.40520916288003, + "grad_norm": 0.5915777428531827, + "learning_rate": 1.0727510006176006e-06, + "loss": 0.2803, + "step": 29997 + }, + { + "epoch": 1.4052560078699583, + "grad_norm": 0.624241028492728, + "learning_rate": 1.0725952981812176e-06, + "loss": 0.2708, + "step": 29998 + }, + { + "epoch": 1.4053028528598865, + "grad_norm": 0.5627528243096219, + "learning_rate": 1.0724396039591904e-06, + "loss": 0.2514, + "step": 29999 + }, + { + "epoch": 1.405349697849815, + "grad_norm": 0.5967285523323611, + "learning_rate": 1.0722839179524139e-06, + "loss": 0.2673, + "step": 30000 + }, + { + "epoch": 1.4053965428397432, + "grad_norm": 0.5822219540073114, + "learning_rate": 1.0721282401617859e-06, + "loss": 0.273, + "step": 30001 + }, + { + "epoch": 1.4054433878296715, + "grad_norm": 0.5925871070728955, + "learning_rate": 1.0719725705882005e-06, + "loss": 0.2649, + "step": 30002 + }, + { + "epoch": 1.4054902328196, + "grad_norm": 0.5668272088472605, + "learning_rate": 1.0718169092325536e-06, + "loss": 0.2526, + "step": 30003 + }, + { + "epoch": 1.4055370778095282, + "grad_norm": 0.5447562634550698, + "learning_rate": 1.0716612560957413e-06, + "loss": 0.2752, + "step": 30004 + }, + { + "epoch": 1.4055839227994567, + "grad_norm": 0.6473485486148474, + "learning_rate": 1.0715056111786595e-06, + "loss": 0.2731, + "step": 30005 + }, + { + "epoch": 1.405630767789385, + "grad_norm": 0.6103001395921558, + "learning_rate": 1.0713499744822036e-06, + "loss": 0.2667, + "step": 30006 + }, + { + "epoch": 1.4056776127793134, + "grad_norm": 0.5888350063386535, + "learning_rate": 1.07119434600727e-06, + "loss": 0.271, + "step": 30007 + }, + { + "epoch": 1.4057244577692416, + "grad_norm": 0.6100607324730436, + "learning_rate": 1.0710387257547544e-06, + "loss": 0.2677, + "step": 30008 + }, + { + "epoch": 1.4057713027591698, + "grad_norm": 0.5880903489407249, + "learning_rate": 1.0708831137255517e-06, + "loss": 0.2768, + "step": 30009 + }, + { + "epoch": 1.4058181477490983, + "grad_norm": 0.637313694470499, + "learning_rate": 1.0707275099205568e-06, + "loss": 0.2745, + "step": 30010 + }, + { + "epoch": 1.4058649927390265, + "grad_norm": 0.5954929857056818, + "learning_rate": 1.070571914340666e-06, + "loss": 0.2862, + "step": 30011 + }, + { + "epoch": 1.4059118377289548, + "grad_norm": 0.610600392625391, + "learning_rate": 1.0704163269867742e-06, + "loss": 0.2899, + "step": 30012 + }, + { + "epoch": 1.4059586827188832, + "grad_norm": 0.5733735266463345, + "learning_rate": 1.0702607478597774e-06, + "loss": 0.2708, + "step": 30013 + }, + { + "epoch": 1.4060055277088115, + "grad_norm": 0.5935385118795604, + "learning_rate": 1.0701051769605714e-06, + "loss": 0.2723, + "step": 30014 + }, + { + "epoch": 1.40605237269874, + "grad_norm": 0.5840884255906752, + "learning_rate": 1.0699496142900498e-06, + "loss": 0.2722, + "step": 30015 + }, + { + "epoch": 1.4060992176886682, + "grad_norm": 0.5993051657738511, + "learning_rate": 1.0697940598491097e-06, + "loss": 0.2792, + "step": 30016 + }, + { + "epoch": 1.4061460626785967, + "grad_norm": 0.5589631970027568, + "learning_rate": 1.0696385136386441e-06, + "loss": 0.2564, + "step": 30017 + }, + { + "epoch": 1.406192907668525, + "grad_norm": 0.5992572494494035, + "learning_rate": 1.0694829756595499e-06, + "loss": 0.2705, + "step": 30018 + }, + { + "epoch": 1.4062397526584531, + "grad_norm": 0.6241625309619188, + "learning_rate": 1.0693274459127212e-06, + "loss": 0.2885, + "step": 30019 + }, + { + "epoch": 1.4062865976483816, + "grad_norm": 0.5747863304127396, + "learning_rate": 1.0691719243990547e-06, + "loss": 0.2755, + "step": 30020 + }, + { + "epoch": 1.4063334426383098, + "grad_norm": 0.5963963928107553, + "learning_rate": 1.0690164111194428e-06, + "loss": 0.2756, + "step": 30021 + }, + { + "epoch": 1.406380287628238, + "grad_norm": 0.6123361512920805, + "learning_rate": 1.0688609060747823e-06, + "loss": 0.2831, + "step": 30022 + }, + { + "epoch": 1.4064271326181665, + "grad_norm": 0.6162472284966346, + "learning_rate": 1.0687054092659682e-06, + "loss": 0.2731, + "step": 30023 + }, + { + "epoch": 1.4064739776080948, + "grad_norm": 0.619911677974475, + "learning_rate": 1.068549920693894e-06, + "loss": 0.2716, + "step": 30024 + }, + { + "epoch": 1.406520822598023, + "grad_norm": 0.5540756805422408, + "learning_rate": 1.0683944403594551e-06, + "loss": 0.264, + "step": 30025 + }, + { + "epoch": 1.4065676675879515, + "grad_norm": 0.553771648077955, + "learning_rate": 1.0682389682635472e-06, + "loss": 0.259, + "step": 30026 + }, + { + "epoch": 1.4066145125778797, + "grad_norm": 0.6358600695596476, + "learning_rate": 1.0680835044070634e-06, + "loss": 0.2718, + "step": 30027 + }, + { + "epoch": 1.4066613575678082, + "grad_norm": 0.6243873547726275, + "learning_rate": 1.067928048790899e-06, + "loss": 0.292, + "step": 30028 + }, + { + "epoch": 1.4067082025577364, + "grad_norm": 0.5988495630119735, + "learning_rate": 1.0677726014159487e-06, + "loss": 0.2788, + "step": 30029 + }, + { + "epoch": 1.406755047547665, + "grad_norm": 0.557021029928546, + "learning_rate": 1.0676171622831082e-06, + "loss": 0.2625, + "step": 30030 + }, + { + "epoch": 1.4068018925375931, + "grad_norm": 0.6105993620347855, + "learning_rate": 1.0674617313932701e-06, + "loss": 0.2876, + "step": 30031 + }, + { + "epoch": 1.4068487375275214, + "grad_norm": 0.549283147151783, + "learning_rate": 1.0673063087473303e-06, + "loss": 0.2593, + "step": 30032 + }, + { + "epoch": 1.4068955825174498, + "grad_norm": 0.6138664092980475, + "learning_rate": 1.067150894346182e-06, + "loss": 0.2733, + "step": 30033 + }, + { + "epoch": 1.406942427507378, + "grad_norm": 0.5913274474346637, + "learning_rate": 1.06699548819072e-06, + "loss": 0.2769, + "step": 30034 + }, + { + "epoch": 1.4069892724973063, + "grad_norm": 0.6079669117259936, + "learning_rate": 1.0668400902818384e-06, + "loss": 0.2833, + "step": 30035 + }, + { + "epoch": 1.4070361174872348, + "grad_norm": 0.5500008379660325, + "learning_rate": 1.0666847006204333e-06, + "loss": 0.2643, + "step": 30036 + }, + { + "epoch": 1.407082962477163, + "grad_norm": 0.6638489207809204, + "learning_rate": 1.0665293192073965e-06, + "loss": 0.2759, + "step": 30037 + }, + { + "epoch": 1.4071298074670913, + "grad_norm": 0.5886596982996823, + "learning_rate": 1.066373946043624e-06, + "loss": 0.281, + "step": 30038 + }, + { + "epoch": 1.4071766524570197, + "grad_norm": 0.5873663271762336, + "learning_rate": 1.0662185811300083e-06, + "loss": 0.2827, + "step": 30039 + }, + { + "epoch": 1.407223497446948, + "grad_norm": 0.5957341808448466, + "learning_rate": 1.0660632244674443e-06, + "loss": 0.2671, + "step": 30040 + }, + { + "epoch": 1.4072703424368764, + "grad_norm": 0.6340653947375259, + "learning_rate": 1.065907876056826e-06, + "loss": 0.2872, + "step": 30041 + }, + { + "epoch": 1.4073171874268047, + "grad_norm": 0.528189799950723, + "learning_rate": 1.0657525358990472e-06, + "loss": 0.2429, + "step": 30042 + }, + { + "epoch": 1.4073640324167331, + "grad_norm": 0.5672928857214016, + "learning_rate": 1.0655972039950032e-06, + "loss": 0.2641, + "step": 30043 + }, + { + "epoch": 1.4074108774066614, + "grad_norm": 0.5916691041285194, + "learning_rate": 1.0654418803455868e-06, + "loss": 0.2713, + "step": 30044 + }, + { + "epoch": 1.4074577223965896, + "grad_norm": 0.5904755313757171, + "learning_rate": 1.0652865649516908e-06, + "loss": 0.2951, + "step": 30045 + }, + { + "epoch": 1.407504567386518, + "grad_norm": 0.5693996275345385, + "learning_rate": 1.06513125781421e-06, + "loss": 0.2682, + "step": 30046 + }, + { + "epoch": 1.4075514123764463, + "grad_norm": 0.6462218292253016, + "learning_rate": 1.0649759589340383e-06, + "loss": 0.2916, + "step": 30047 + }, + { + "epoch": 1.4075982573663746, + "grad_norm": 0.5819248651606064, + "learning_rate": 1.0648206683120693e-06, + "loss": 0.2651, + "step": 30048 + }, + { + "epoch": 1.407645102356303, + "grad_norm": 0.5770766754854596, + "learning_rate": 1.0646653859491964e-06, + "loss": 0.2695, + "step": 30049 + }, + { + "epoch": 1.4076919473462313, + "grad_norm": 0.6657485565056727, + "learning_rate": 1.0645101118463147e-06, + "loss": 0.3044, + "step": 30050 + }, + { + "epoch": 1.4077387923361597, + "grad_norm": 0.6403917283144294, + "learning_rate": 1.0643548460043163e-06, + "loss": 0.2775, + "step": 30051 + }, + { + "epoch": 1.407785637326088, + "grad_norm": 0.6149721366317309, + "learning_rate": 1.064199588424094e-06, + "loss": 0.274, + "step": 30052 + }, + { + "epoch": 1.4078324823160164, + "grad_norm": 0.5619191341273496, + "learning_rate": 1.0640443391065422e-06, + "loss": 0.2603, + "step": 30053 + }, + { + "epoch": 1.4078793273059447, + "grad_norm": 0.5862428335134929, + "learning_rate": 1.0638890980525545e-06, + "loss": 0.2741, + "step": 30054 + }, + { + "epoch": 1.407926172295873, + "grad_norm": 0.6243825498396769, + "learning_rate": 1.0637338652630238e-06, + "loss": 0.2887, + "step": 30055 + }, + { + "epoch": 1.4079730172858014, + "grad_norm": 0.6171398784984391, + "learning_rate": 1.063578640738845e-06, + "loss": 0.2779, + "step": 30056 + }, + { + "epoch": 1.4080198622757296, + "grad_norm": 0.630462650589446, + "learning_rate": 1.0634234244809088e-06, + "loss": 0.2918, + "step": 30057 + }, + { + "epoch": 1.4080667072656579, + "grad_norm": 0.6546520052024137, + "learning_rate": 1.0632682164901106e-06, + "loss": 0.2898, + "step": 30058 + }, + { + "epoch": 1.4081135522555863, + "grad_norm": 0.6647443156503268, + "learning_rate": 1.063113016767342e-06, + "loss": 0.259, + "step": 30059 + }, + { + "epoch": 1.4081603972455146, + "grad_norm": 0.6007399048170898, + "learning_rate": 1.062957825313497e-06, + "loss": 0.2854, + "step": 30060 + }, + { + "epoch": 1.4082072422354428, + "grad_norm": 0.5793733616804729, + "learning_rate": 1.0628026421294682e-06, + "loss": 0.2788, + "step": 30061 + }, + { + "epoch": 1.4082540872253713, + "grad_norm": 0.6180907265499334, + "learning_rate": 1.0626474672161502e-06, + "loss": 0.2834, + "step": 30062 + }, + { + "epoch": 1.4083009322152995, + "grad_norm": 0.5748998739588335, + "learning_rate": 1.0624923005744336e-06, + "loss": 0.2737, + "step": 30063 + }, + { + "epoch": 1.408347777205228, + "grad_norm": 0.6146265663491051, + "learning_rate": 1.0623371422052126e-06, + "loss": 0.279, + "step": 30064 + }, + { + "epoch": 1.4083946221951562, + "grad_norm": 0.5680719351607346, + "learning_rate": 1.0621819921093806e-06, + "loss": 0.2629, + "step": 30065 + }, + { + "epoch": 1.4084414671850847, + "grad_norm": 0.5670171452649029, + "learning_rate": 1.0620268502878296e-06, + "loss": 0.2731, + "step": 30066 + }, + { + "epoch": 1.408488312175013, + "grad_norm": 0.5723188013685058, + "learning_rate": 1.0618717167414522e-06, + "loss": 0.2525, + "step": 30067 + }, + { + "epoch": 1.4085351571649412, + "grad_norm": 0.6133089743475294, + "learning_rate": 1.0617165914711425e-06, + "loss": 0.2821, + "step": 30068 + }, + { + "epoch": 1.4085820021548696, + "grad_norm": 0.6246215869635647, + "learning_rate": 1.0615614744777915e-06, + "loss": 0.2696, + "step": 30069 + }, + { + "epoch": 1.4086288471447979, + "grad_norm": 0.631991513162819, + "learning_rate": 1.0614063657622928e-06, + "loss": 0.2936, + "step": 30070 + }, + { + "epoch": 1.408675692134726, + "grad_norm": 0.5778995693946701, + "learning_rate": 1.0612512653255385e-06, + "loss": 0.2751, + "step": 30071 + }, + { + "epoch": 1.4087225371246546, + "grad_norm": 0.5341636483867704, + "learning_rate": 1.061096173168423e-06, + "loss": 0.2587, + "step": 30072 + }, + { + "epoch": 1.4087693821145828, + "grad_norm": 0.5908878762730109, + "learning_rate": 1.0609410892918363e-06, + "loss": 0.2616, + "step": 30073 + }, + { + "epoch": 1.408816227104511, + "grad_norm": 0.5830856571590102, + "learning_rate": 1.0607860136966715e-06, + "loss": 0.2656, + "step": 30074 + }, + { + "epoch": 1.4088630720944395, + "grad_norm": 0.6566190989626799, + "learning_rate": 1.060630946383823e-06, + "loss": 0.2828, + "step": 30075 + }, + { + "epoch": 1.4089099170843677, + "grad_norm": 0.5518382737200197, + "learning_rate": 1.0604758873541804e-06, + "loss": 0.2568, + "step": 30076 + }, + { + "epoch": 1.4089567620742962, + "grad_norm": 0.582032423912807, + "learning_rate": 1.060320836608637e-06, + "loss": 0.2589, + "step": 30077 + }, + { + "epoch": 1.4090036070642245, + "grad_norm": 0.6080764618174204, + "learning_rate": 1.0601657941480858e-06, + "loss": 0.2696, + "step": 30078 + }, + { + "epoch": 1.409050452054153, + "grad_norm": 0.5999895143629311, + "learning_rate": 1.0600107599734193e-06, + "loss": 0.2704, + "step": 30079 + }, + { + "epoch": 1.4090972970440812, + "grad_norm": 0.5815601722351416, + "learning_rate": 1.059855734085528e-06, + "loss": 0.2666, + "step": 30080 + }, + { + "epoch": 1.4091441420340094, + "grad_norm": 0.5872068255189109, + "learning_rate": 1.0597007164853059e-06, + "loss": 0.2627, + "step": 30081 + }, + { + "epoch": 1.4091909870239379, + "grad_norm": 0.6213052224921579, + "learning_rate": 1.0595457071736433e-06, + "loss": 0.2753, + "step": 30082 + }, + { + "epoch": 1.409237832013866, + "grad_norm": 0.5582560918907581, + "learning_rate": 1.0593907061514331e-06, + "loss": 0.2632, + "step": 30083 + }, + { + "epoch": 1.4092846770037943, + "grad_norm": 0.5980222223850067, + "learning_rate": 1.0592357134195674e-06, + "loss": 0.2692, + "step": 30084 + }, + { + "epoch": 1.4093315219937228, + "grad_norm": 0.5485199883493475, + "learning_rate": 1.0590807289789388e-06, + "loss": 0.2586, + "step": 30085 + }, + { + "epoch": 1.409378366983651, + "grad_norm": 0.5644992848903598, + "learning_rate": 1.0589257528304376e-06, + "loss": 0.2564, + "step": 30086 + }, + { + "epoch": 1.4094252119735795, + "grad_norm": 0.5966774972690563, + "learning_rate": 1.0587707849749574e-06, + "loss": 0.283, + "step": 30087 + }, + { + "epoch": 1.4094720569635077, + "grad_norm": 0.5654625016774989, + "learning_rate": 1.0586158254133883e-06, + "loss": 0.2553, + "step": 30088 + }, + { + "epoch": 1.4095189019534362, + "grad_norm": 0.603667047525032, + "learning_rate": 1.0584608741466225e-06, + "loss": 0.2918, + "step": 30089 + }, + { + "epoch": 1.4095657469433645, + "grad_norm": 0.6195811994440614, + "learning_rate": 1.0583059311755521e-06, + "loss": 0.2791, + "step": 30090 + }, + { + "epoch": 1.4096125919332927, + "grad_norm": 0.6147781251325652, + "learning_rate": 1.0581509965010691e-06, + "loss": 0.2817, + "step": 30091 + }, + { + "epoch": 1.4096594369232212, + "grad_norm": 0.59063710797419, + "learning_rate": 1.0579960701240656e-06, + "loss": 0.2727, + "step": 30092 + }, + { + "epoch": 1.4097062819131494, + "grad_norm": 0.5876761698082927, + "learning_rate": 1.0578411520454317e-06, + "loss": 0.2571, + "step": 30093 + }, + { + "epoch": 1.4097531269030776, + "grad_norm": 0.6574804412651166, + "learning_rate": 1.057686242266059e-06, + "loss": 0.2991, + "step": 30094 + }, + { + "epoch": 1.409799971893006, + "grad_norm": 0.595615524299263, + "learning_rate": 1.0575313407868396e-06, + "loss": 0.2696, + "step": 30095 + }, + { + "epoch": 1.4098468168829343, + "grad_norm": 0.5953071354205736, + "learning_rate": 1.0573764476086648e-06, + "loss": 0.2716, + "step": 30096 + }, + { + "epoch": 1.4098936618728626, + "grad_norm": 0.6508786587439566, + "learning_rate": 1.0572215627324257e-06, + "loss": 0.2797, + "step": 30097 + }, + { + "epoch": 1.409940506862791, + "grad_norm": 0.5898710212421121, + "learning_rate": 1.057066686159014e-06, + "loss": 0.2637, + "step": 30098 + }, + { + "epoch": 1.4099873518527193, + "grad_norm": 0.5358099118667842, + "learning_rate": 1.0569118178893217e-06, + "loss": 0.2639, + "step": 30099 + }, + { + "epoch": 1.4100341968426477, + "grad_norm": 0.616424127945936, + "learning_rate": 1.0567569579242392e-06, + "loss": 0.2778, + "step": 30100 + }, + { + "epoch": 1.410081041832576, + "grad_norm": 0.6323377486971388, + "learning_rate": 1.0566021062646567e-06, + "loss": 0.2797, + "step": 30101 + }, + { + "epoch": 1.4101278868225045, + "grad_norm": 0.6252348783395006, + "learning_rate": 1.0564472629114664e-06, + "loss": 0.2752, + "step": 30102 + }, + { + "epoch": 1.4101747318124327, + "grad_norm": 0.5936188216031245, + "learning_rate": 1.0562924278655592e-06, + "loss": 0.2692, + "step": 30103 + }, + { + "epoch": 1.410221576802361, + "grad_norm": 0.5900370624856294, + "learning_rate": 1.0561376011278262e-06, + "loss": 0.2674, + "step": 30104 + }, + { + "epoch": 1.4102684217922894, + "grad_norm": 0.6084806311613954, + "learning_rate": 1.0559827826991596e-06, + "loss": 0.2669, + "step": 30105 + }, + { + "epoch": 1.4103152667822176, + "grad_norm": 0.6503470238211941, + "learning_rate": 1.0558279725804477e-06, + "loss": 0.2882, + "step": 30106 + }, + { + "epoch": 1.4103621117721459, + "grad_norm": 0.6389322881920979, + "learning_rate": 1.0556731707725842e-06, + "loss": 0.2819, + "step": 30107 + }, + { + "epoch": 1.4104089567620743, + "grad_norm": 0.5996924662500086, + "learning_rate": 1.0555183772764573e-06, + "loss": 0.2715, + "step": 30108 + }, + { + "epoch": 1.4104558017520026, + "grad_norm": 0.6183625190383548, + "learning_rate": 1.0553635920929595e-06, + "loss": 0.2748, + "step": 30109 + }, + { + "epoch": 1.4105026467419308, + "grad_norm": 0.6256256407145827, + "learning_rate": 1.0552088152229808e-06, + "loss": 0.2621, + "step": 30110 + }, + { + "epoch": 1.4105494917318593, + "grad_norm": 0.5854770582478587, + "learning_rate": 1.0550540466674133e-06, + "loss": 0.2662, + "step": 30111 + }, + { + "epoch": 1.4105963367217875, + "grad_norm": 0.6199525979389416, + "learning_rate": 1.0548992864271456e-06, + "loss": 0.2818, + "step": 30112 + }, + { + "epoch": 1.410643181711716, + "grad_norm": 0.589144778771844, + "learning_rate": 1.0547445345030694e-06, + "loss": 0.2627, + "step": 30113 + }, + { + "epoch": 1.4106900267016442, + "grad_norm": 0.5860524868317734, + "learning_rate": 1.0545897908960762e-06, + "loss": 0.262, + "step": 30114 + }, + { + "epoch": 1.4107368716915727, + "grad_norm": 0.6008731603229612, + "learning_rate": 1.0544350556070543e-06, + "loss": 0.3006, + "step": 30115 + }, + { + "epoch": 1.410783716681501, + "grad_norm": 0.631360914622149, + "learning_rate": 1.0542803286368953e-06, + "loss": 0.2832, + "step": 30116 + }, + { + "epoch": 1.4108305616714292, + "grad_norm": 0.5830880722711037, + "learning_rate": 1.0541256099864908e-06, + "loss": 0.2792, + "step": 30117 + }, + { + "epoch": 1.4108774066613576, + "grad_norm": 0.641804556799447, + "learning_rate": 1.0539708996567288e-06, + "loss": 0.2661, + "step": 30118 + }, + { + "epoch": 1.4109242516512859, + "grad_norm": 0.5819557626745984, + "learning_rate": 1.0538161976485009e-06, + "loss": 0.2755, + "step": 30119 + }, + { + "epoch": 1.4109710966412141, + "grad_norm": 0.6522614475916768, + "learning_rate": 1.0536615039626974e-06, + "loss": 0.2851, + "step": 30120 + }, + { + "epoch": 1.4110179416311426, + "grad_norm": 0.6114703541523916, + "learning_rate": 1.0535068186002094e-06, + "loss": 0.2805, + "step": 30121 + }, + { + "epoch": 1.4110647866210708, + "grad_norm": 0.6251500140706181, + "learning_rate": 1.053352141561925e-06, + "loss": 0.2835, + "step": 30122 + }, + { + "epoch": 1.4111116316109993, + "grad_norm": 0.5922445690505405, + "learning_rate": 1.0531974728487366e-06, + "loss": 0.2802, + "step": 30123 + }, + { + "epoch": 1.4111584766009275, + "grad_norm": 0.6083142495801319, + "learning_rate": 1.053042812461532e-06, + "loss": 0.3013, + "step": 30124 + }, + { + "epoch": 1.411205321590856, + "grad_norm": 0.6039381763968262, + "learning_rate": 1.0528881604012022e-06, + "loss": 0.2811, + "step": 30125 + }, + { + "epoch": 1.4112521665807842, + "grad_norm": 0.6105383675955273, + "learning_rate": 1.0527335166686376e-06, + "loss": 0.2866, + "step": 30126 + }, + { + "epoch": 1.4112990115707125, + "grad_norm": 0.682870929761392, + "learning_rate": 1.0525788812647275e-06, + "loss": 0.2857, + "step": 30127 + }, + { + "epoch": 1.411345856560641, + "grad_norm": 0.6898453148438813, + "learning_rate": 1.0524242541903634e-06, + "loss": 0.2858, + "step": 30128 + }, + { + "epoch": 1.4113927015505692, + "grad_norm": 0.5906438924011397, + "learning_rate": 1.0522696354464337e-06, + "loss": 0.2655, + "step": 30129 + }, + { + "epoch": 1.4114395465404974, + "grad_norm": 0.5716916055513914, + "learning_rate": 1.0521150250338274e-06, + "loss": 0.2753, + "step": 30130 + }, + { + "epoch": 1.4114863915304259, + "grad_norm": 0.5881842306898436, + "learning_rate": 1.0519604229534356e-06, + "loss": 0.2635, + "step": 30131 + }, + { + "epoch": 1.4115332365203541, + "grad_norm": 0.5781165573786403, + "learning_rate": 1.0518058292061472e-06, + "loss": 0.2795, + "step": 30132 + }, + { + "epoch": 1.4115800815102824, + "grad_norm": 0.6189841358217547, + "learning_rate": 1.0516512437928526e-06, + "loss": 0.264, + "step": 30133 + }, + { + "epoch": 1.4116269265002108, + "grad_norm": 0.6038406359160307, + "learning_rate": 1.0514966667144416e-06, + "loss": 0.2728, + "step": 30134 + }, + { + "epoch": 1.411673771490139, + "grad_norm": 0.5949911922701944, + "learning_rate": 1.0513420979718025e-06, + "loss": 0.2708, + "step": 30135 + }, + { + "epoch": 1.4117206164800675, + "grad_norm": 0.5609798486982536, + "learning_rate": 1.0511875375658265e-06, + "loss": 0.2659, + "step": 30136 + }, + { + "epoch": 1.4117674614699958, + "grad_norm": 0.6342890189980229, + "learning_rate": 1.0510329854974011e-06, + "loss": 0.2808, + "step": 30137 + }, + { + "epoch": 1.4118143064599242, + "grad_norm": 0.5479590080284971, + "learning_rate": 1.0508784417674165e-06, + "loss": 0.246, + "step": 30138 + }, + { + "epoch": 1.4118611514498525, + "grad_norm": 0.617879107384979, + "learning_rate": 1.0507239063767624e-06, + "loss": 0.271, + "step": 30139 + }, + { + "epoch": 1.4119079964397807, + "grad_norm": 0.5691767590812689, + "learning_rate": 1.050569379326328e-06, + "loss": 0.2636, + "step": 30140 + }, + { + "epoch": 1.4119548414297092, + "grad_norm": 0.6115138240775925, + "learning_rate": 1.0504148606170034e-06, + "loss": 0.2786, + "step": 30141 + }, + { + "epoch": 1.4120016864196374, + "grad_norm": 0.6192551243707708, + "learning_rate": 1.050260350249677e-06, + "loss": 0.2693, + "step": 30142 + }, + { + "epoch": 1.4120485314095657, + "grad_norm": 0.6102648303449318, + "learning_rate": 1.0501058482252368e-06, + "loss": 0.2782, + "step": 30143 + }, + { + "epoch": 1.4120953763994941, + "grad_norm": 0.6005396206097885, + "learning_rate": 1.0499513545445734e-06, + "loss": 0.2818, + "step": 30144 + }, + { + "epoch": 1.4121422213894224, + "grad_norm": 0.6212387278692346, + "learning_rate": 1.0497968692085754e-06, + "loss": 0.2567, + "step": 30145 + }, + { + "epoch": 1.4121890663793506, + "grad_norm": 0.5698978783567733, + "learning_rate": 1.0496423922181314e-06, + "loss": 0.2589, + "step": 30146 + }, + { + "epoch": 1.412235911369279, + "grad_norm": 0.6079572842363898, + "learning_rate": 1.0494879235741314e-06, + "loss": 0.28, + "step": 30147 + }, + { + "epoch": 1.4122827563592073, + "grad_norm": 0.6004373830975804, + "learning_rate": 1.0493334632774647e-06, + "loss": 0.2752, + "step": 30148 + }, + { + "epoch": 1.4123296013491358, + "grad_norm": 0.6459156533342679, + "learning_rate": 1.0491790113290192e-06, + "loss": 0.2705, + "step": 30149 + }, + { + "epoch": 1.412376446339064, + "grad_norm": 0.5816384429951913, + "learning_rate": 1.0490245677296828e-06, + "loss": 0.2747, + "step": 30150 + }, + { + "epoch": 1.4124232913289925, + "grad_norm": 0.582247485195872, + "learning_rate": 1.0488701324803457e-06, + "loss": 0.2716, + "step": 30151 + }, + { + "epoch": 1.4124701363189207, + "grad_norm": 0.5887099500818394, + "learning_rate": 1.048715705581896e-06, + "loss": 0.2672, + "step": 30152 + }, + { + "epoch": 1.412516981308849, + "grad_norm": 0.5903502703339728, + "learning_rate": 1.0485612870352227e-06, + "loss": 0.2708, + "step": 30153 + }, + { + "epoch": 1.4125638262987774, + "grad_norm": 0.6588883069523567, + "learning_rate": 1.0484068768412153e-06, + "loss": 0.3009, + "step": 30154 + }, + { + "epoch": 1.4126106712887057, + "grad_norm": 0.6081456876851156, + "learning_rate": 1.0482524750007605e-06, + "loss": 0.2841, + "step": 30155 + }, + { + "epoch": 1.412657516278634, + "grad_norm": 0.6223488927536923, + "learning_rate": 1.0480980815147488e-06, + "loss": 0.2709, + "step": 30156 + }, + { + "epoch": 1.4127043612685624, + "grad_norm": 0.5948628744614716, + "learning_rate": 1.047943696384067e-06, + "loss": 0.2666, + "step": 30157 + }, + { + "epoch": 1.4127512062584906, + "grad_norm": 0.5814686063305571, + "learning_rate": 1.047789319609604e-06, + "loss": 0.2671, + "step": 30158 + }, + { + "epoch": 1.412798051248419, + "grad_norm": 0.6226496489744497, + "learning_rate": 1.0476349511922485e-06, + "loss": 0.2837, + "step": 30159 + }, + { + "epoch": 1.4128448962383473, + "grad_norm": 0.5996584632778299, + "learning_rate": 1.0474805911328897e-06, + "loss": 0.2875, + "step": 30160 + }, + { + "epoch": 1.4128917412282758, + "grad_norm": 0.5832249533970911, + "learning_rate": 1.047326239432414e-06, + "loss": 0.2668, + "step": 30161 + }, + { + "epoch": 1.412938586218204, + "grad_norm": 0.5644928066153111, + "learning_rate": 1.047171896091711e-06, + "loss": 0.2697, + "step": 30162 + }, + { + "epoch": 1.4129854312081322, + "grad_norm": 0.6474991305430968, + "learning_rate": 1.0470175611116692e-06, + "loss": 0.3, + "step": 30163 + }, + { + "epoch": 1.4130322761980607, + "grad_norm": 0.6081284992200994, + "learning_rate": 1.0468632344931755e-06, + "loss": 0.2916, + "step": 30164 + }, + { + "epoch": 1.413079121187989, + "grad_norm": 0.5617463304589992, + "learning_rate": 1.0467089162371186e-06, + "loss": 0.2691, + "step": 30165 + }, + { + "epoch": 1.4131259661779172, + "grad_norm": 0.6086314864104192, + "learning_rate": 1.0465546063443874e-06, + "loss": 0.2876, + "step": 30166 + }, + { + "epoch": 1.4131728111678457, + "grad_norm": 0.6317679360925258, + "learning_rate": 1.0464003048158684e-06, + "loss": 0.2681, + "step": 30167 + }, + { + "epoch": 1.413219656157774, + "grad_norm": 0.6061967658404315, + "learning_rate": 1.04624601165245e-06, + "loss": 0.2768, + "step": 30168 + }, + { + "epoch": 1.4132665011477021, + "grad_norm": 0.6001901277412672, + "learning_rate": 1.0460917268550206e-06, + "loss": 0.2816, + "step": 30169 + }, + { + "epoch": 1.4133133461376306, + "grad_norm": 0.578905363378371, + "learning_rate": 1.0459374504244687e-06, + "loss": 0.2656, + "step": 30170 + }, + { + "epoch": 1.4133601911275588, + "grad_norm": 0.58301568605521, + "learning_rate": 1.0457831823616807e-06, + "loss": 0.2788, + "step": 30171 + }, + { + "epoch": 1.4134070361174873, + "grad_norm": 0.6402805151312819, + "learning_rate": 1.0456289226675457e-06, + "loss": 0.286, + "step": 30172 + }, + { + "epoch": 1.4134538811074155, + "grad_norm": 0.6055320941179168, + "learning_rate": 1.0454746713429498e-06, + "loss": 0.2916, + "step": 30173 + }, + { + "epoch": 1.413500726097344, + "grad_norm": 0.562235743752141, + "learning_rate": 1.0453204283887813e-06, + "loss": 0.2671, + "step": 30174 + }, + { + "epoch": 1.4135475710872722, + "grad_norm": 0.5947982584704352, + "learning_rate": 1.0451661938059284e-06, + "loss": 0.2721, + "step": 30175 + }, + { + "epoch": 1.4135944160772005, + "grad_norm": 0.5987788653648536, + "learning_rate": 1.0450119675952794e-06, + "loss": 0.2583, + "step": 30176 + }, + { + "epoch": 1.413641261067129, + "grad_norm": 0.5830494208302185, + "learning_rate": 1.0448577497577198e-06, + "loss": 0.2791, + "step": 30177 + }, + { + "epoch": 1.4136881060570572, + "grad_norm": 0.5996827597466864, + "learning_rate": 1.0447035402941388e-06, + "loss": 0.2743, + "step": 30178 + }, + { + "epoch": 1.4137349510469854, + "grad_norm": 0.6175948458513475, + "learning_rate": 1.0445493392054227e-06, + "loss": 0.2965, + "step": 30179 + }, + { + "epoch": 1.413781796036914, + "grad_norm": 0.5816387168688386, + "learning_rate": 1.044395146492459e-06, + "loss": 0.2632, + "step": 30180 + }, + { + "epoch": 1.4138286410268421, + "grad_norm": 0.5364985760611835, + "learning_rate": 1.0442409621561355e-06, + "loss": 0.2478, + "step": 30181 + }, + { + "epoch": 1.4138754860167704, + "grad_norm": 0.6315536268135568, + "learning_rate": 1.0440867861973394e-06, + "loss": 0.2858, + "step": 30182 + }, + { + "epoch": 1.4139223310066988, + "grad_norm": 0.6122792833102833, + "learning_rate": 1.0439326186169587e-06, + "loss": 0.2764, + "step": 30183 + }, + { + "epoch": 1.413969175996627, + "grad_norm": 0.5731857403485752, + "learning_rate": 1.0437784594158798e-06, + "loss": 0.2619, + "step": 30184 + }, + { + "epoch": 1.4140160209865555, + "grad_norm": 0.5849761261293981, + "learning_rate": 1.0436243085949891e-06, + "loss": 0.2622, + "step": 30185 + }, + { + "epoch": 1.4140628659764838, + "grad_norm": 0.5678355191771096, + "learning_rate": 1.0434701661551744e-06, + "loss": 0.2656, + "step": 30186 + }, + { + "epoch": 1.4141097109664122, + "grad_norm": 0.5764886690058053, + "learning_rate": 1.043316032097323e-06, + "loss": 0.2687, + "step": 30187 + }, + { + "epoch": 1.4141565559563405, + "grad_norm": 0.636526993971016, + "learning_rate": 1.0431619064223214e-06, + "loss": 0.2781, + "step": 30188 + }, + { + "epoch": 1.4142034009462687, + "grad_norm": 0.573618444611915, + "learning_rate": 1.043007789131057e-06, + "loss": 0.2547, + "step": 30189 + }, + { + "epoch": 1.4142502459361972, + "grad_norm": 0.6034866274276095, + "learning_rate": 1.0428536802244175e-06, + "loss": 0.2705, + "step": 30190 + }, + { + "epoch": 1.4142970909261254, + "grad_norm": 0.6079877787098691, + "learning_rate": 1.0426995797032888e-06, + "loss": 0.2819, + "step": 30191 + }, + { + "epoch": 1.4143439359160537, + "grad_norm": 0.6196894878501032, + "learning_rate": 1.0425454875685568e-06, + "loss": 0.2869, + "step": 30192 + }, + { + "epoch": 1.4143907809059821, + "grad_norm": 0.5783391335554909, + "learning_rate": 1.042391403821109e-06, + "loss": 0.2609, + "step": 30193 + }, + { + "epoch": 1.4144376258959104, + "grad_norm": 0.576855882499272, + "learning_rate": 1.0422373284618325e-06, + "loss": 0.2743, + "step": 30194 + }, + { + "epoch": 1.4144844708858388, + "grad_norm": 0.5619960008847791, + "learning_rate": 1.042083261491614e-06, + "loss": 0.2641, + "step": 30195 + }, + { + "epoch": 1.414531315875767, + "grad_norm": 0.5787651462502499, + "learning_rate": 1.0419292029113407e-06, + "loss": 0.2704, + "step": 30196 + }, + { + "epoch": 1.4145781608656955, + "grad_norm": 0.5571713227314007, + "learning_rate": 1.0417751527218972e-06, + "loss": 0.2651, + "step": 30197 + }, + { + "epoch": 1.4146250058556238, + "grad_norm": 0.5815528033113813, + "learning_rate": 1.0416211109241725e-06, + "loss": 0.2789, + "step": 30198 + }, + { + "epoch": 1.414671850845552, + "grad_norm": 0.6011161780113833, + "learning_rate": 1.0414670775190506e-06, + "loss": 0.2775, + "step": 30199 + }, + { + "epoch": 1.4147186958354805, + "grad_norm": 0.5844853923901051, + "learning_rate": 1.0413130525074191e-06, + "loss": 0.2661, + "step": 30200 + }, + { + "epoch": 1.4147655408254087, + "grad_norm": 0.5932126312257938, + "learning_rate": 1.0411590358901643e-06, + "loss": 0.2825, + "step": 30201 + }, + { + "epoch": 1.414812385815337, + "grad_norm": 0.6179644736749529, + "learning_rate": 1.0410050276681735e-06, + "loss": 0.2825, + "step": 30202 + }, + { + "epoch": 1.4148592308052654, + "grad_norm": 0.5883866777559456, + "learning_rate": 1.0408510278423313e-06, + "loss": 0.264, + "step": 30203 + }, + { + "epoch": 1.4149060757951937, + "grad_norm": 0.5567470197111186, + "learning_rate": 1.0406970364135248e-06, + "loss": 0.2644, + "step": 30204 + }, + { + "epoch": 1.414952920785122, + "grad_norm": 0.5953758685824702, + "learning_rate": 1.0405430533826406e-06, + "loss": 0.2821, + "step": 30205 + }, + { + "epoch": 1.4149997657750504, + "grad_norm": 0.6017364934371707, + "learning_rate": 1.0403890787505635e-06, + "loss": 0.2721, + "step": 30206 + }, + { + "epoch": 1.4150466107649786, + "grad_norm": 0.6011555494161329, + "learning_rate": 1.0402351125181804e-06, + "loss": 0.2799, + "step": 30207 + }, + { + "epoch": 1.415093455754907, + "grad_norm": 0.5500119894240202, + "learning_rate": 1.0400811546863784e-06, + "loss": 0.2423, + "step": 30208 + }, + { + "epoch": 1.4151403007448353, + "grad_norm": 0.5949870108628668, + "learning_rate": 1.0399272052560414e-06, + "loss": 0.2842, + "step": 30209 + }, + { + "epoch": 1.4151871457347638, + "grad_norm": 0.5685316223845513, + "learning_rate": 1.0397732642280564e-06, + "loss": 0.2586, + "step": 30210 + }, + { + "epoch": 1.415233990724692, + "grad_norm": 0.5657246985442204, + "learning_rate": 1.039619331603309e-06, + "loss": 0.2642, + "step": 30211 + }, + { + "epoch": 1.4152808357146203, + "grad_norm": 0.637048759546313, + "learning_rate": 1.0394654073826865e-06, + "loss": 0.2657, + "step": 30212 + }, + { + "epoch": 1.4153276807045487, + "grad_norm": 0.5977474180936513, + "learning_rate": 1.0393114915670727e-06, + "loss": 0.2751, + "step": 30213 + }, + { + "epoch": 1.415374525694477, + "grad_norm": 0.591573813843062, + "learning_rate": 1.0391575841573538e-06, + "loss": 0.2716, + "step": 30214 + }, + { + "epoch": 1.4154213706844052, + "grad_norm": 0.5850632162583148, + "learning_rate": 1.039003685154417e-06, + "loss": 0.2878, + "step": 30215 + }, + { + "epoch": 1.4154682156743337, + "grad_norm": 0.5971453353801435, + "learning_rate": 1.0388497945591456e-06, + "loss": 0.2642, + "step": 30216 + }, + { + "epoch": 1.415515060664262, + "grad_norm": 0.5777530309228789, + "learning_rate": 1.0386959123724265e-06, + "loss": 0.2536, + "step": 30217 + }, + { + "epoch": 1.4155619056541902, + "grad_norm": 0.6243000518756124, + "learning_rate": 1.0385420385951454e-06, + "loss": 0.2873, + "step": 30218 + }, + { + "epoch": 1.4156087506441186, + "grad_norm": 0.619595950659001, + "learning_rate": 1.038388173228188e-06, + "loss": 0.2765, + "step": 30219 + }, + { + "epoch": 1.4156555956340469, + "grad_norm": 0.5935207547301452, + "learning_rate": 1.0382343162724387e-06, + "loss": 0.2582, + "step": 30220 + }, + { + "epoch": 1.4157024406239753, + "grad_norm": 0.6124975614358217, + "learning_rate": 1.0380804677287844e-06, + "loss": 0.2583, + "step": 30221 + }, + { + "epoch": 1.4157492856139036, + "grad_norm": 0.5970438762014723, + "learning_rate": 1.0379266275981088e-06, + "loss": 0.2732, + "step": 30222 + }, + { + "epoch": 1.415796130603832, + "grad_norm": 0.5822913421026772, + "learning_rate": 1.0377727958812975e-06, + "loss": 0.2713, + "step": 30223 + }, + { + "epoch": 1.4158429755937603, + "grad_norm": 0.5802375293139486, + "learning_rate": 1.0376189725792365e-06, + "loss": 0.2609, + "step": 30224 + }, + { + "epoch": 1.4158898205836885, + "grad_norm": 0.6235269417569395, + "learning_rate": 1.037465157692812e-06, + "loss": 0.274, + "step": 30225 + }, + { + "epoch": 1.415936665573617, + "grad_norm": 0.5642561577869003, + "learning_rate": 1.0373113512229067e-06, + "loss": 0.2724, + "step": 30226 + }, + { + "epoch": 1.4159835105635452, + "grad_norm": 0.6076598738525836, + "learning_rate": 1.0371575531704076e-06, + "loss": 0.2777, + "step": 30227 + }, + { + "epoch": 1.4160303555534735, + "grad_norm": 0.6157464257594444, + "learning_rate": 1.0370037635361985e-06, + "loss": 0.2843, + "step": 30228 + }, + { + "epoch": 1.416077200543402, + "grad_norm": 0.6163815112794402, + "learning_rate": 1.036849982321165e-06, + "loss": 0.2703, + "step": 30229 + }, + { + "epoch": 1.4161240455333302, + "grad_norm": 0.5719369185073663, + "learning_rate": 1.036696209526192e-06, + "loss": 0.2753, + "step": 30230 + }, + { + "epoch": 1.4161708905232586, + "grad_norm": 0.6335875912676757, + "learning_rate": 1.0365424451521647e-06, + "loss": 0.2789, + "step": 30231 + }, + { + "epoch": 1.4162177355131869, + "grad_norm": 0.5629163347335665, + "learning_rate": 1.0363886891999687e-06, + "loss": 0.2566, + "step": 30232 + }, + { + "epoch": 1.4162645805031153, + "grad_norm": 0.5944812675640342, + "learning_rate": 1.036234941670488e-06, + "loss": 0.2709, + "step": 30233 + }, + { + "epoch": 1.4163114254930436, + "grad_norm": 0.5874651475230546, + "learning_rate": 1.036081202564606e-06, + "loss": 0.2794, + "step": 30234 + }, + { + "epoch": 1.4163582704829718, + "grad_norm": 0.6227345973185393, + "learning_rate": 1.035927471883209e-06, + "loss": 0.2858, + "step": 30235 + }, + { + "epoch": 1.4164051154729003, + "grad_norm": 0.5786436417631177, + "learning_rate": 1.0357737496271816e-06, + "loss": 0.2462, + "step": 30236 + }, + { + "epoch": 1.4164519604628285, + "grad_norm": 0.5957218051758345, + "learning_rate": 1.035620035797408e-06, + "loss": 0.2759, + "step": 30237 + }, + { + "epoch": 1.4164988054527567, + "grad_norm": 0.5639675178350845, + "learning_rate": 1.0354663303947732e-06, + "loss": 0.2738, + "step": 30238 + }, + { + "epoch": 1.4165456504426852, + "grad_norm": 0.6050901868459909, + "learning_rate": 1.035312633420162e-06, + "loss": 0.2782, + "step": 30239 + }, + { + "epoch": 1.4165924954326135, + "grad_norm": 0.5786607257384455, + "learning_rate": 1.0351589448744592e-06, + "loss": 0.2497, + "step": 30240 + }, + { + "epoch": 1.4166393404225417, + "grad_norm": 0.619893708800214, + "learning_rate": 1.035005264758547e-06, + "loss": 0.2707, + "step": 30241 + }, + { + "epoch": 1.4166861854124702, + "grad_norm": 0.5968441599096498, + "learning_rate": 1.0348515930733116e-06, + "loss": 0.265, + "step": 30242 + }, + { + "epoch": 1.4167330304023984, + "grad_norm": 0.6008533285067653, + "learning_rate": 1.034697929819637e-06, + "loss": 0.2701, + "step": 30243 + }, + { + "epoch": 1.4167798753923269, + "grad_norm": 0.564996856002132, + "learning_rate": 1.0345442749984076e-06, + "loss": 0.2687, + "step": 30244 + }, + { + "epoch": 1.416826720382255, + "grad_norm": 0.5887503021085883, + "learning_rate": 1.0343906286105082e-06, + "loss": 0.2747, + "step": 30245 + }, + { + "epoch": 1.4168735653721836, + "grad_norm": 0.6216882910014278, + "learning_rate": 1.0342369906568217e-06, + "loss": 0.2841, + "step": 30246 + }, + { + "epoch": 1.4169204103621118, + "grad_norm": 0.5669926956090235, + "learning_rate": 1.0340833611382337e-06, + "loss": 0.2659, + "step": 30247 + }, + { + "epoch": 1.41696725535204, + "grad_norm": 0.5932539788787674, + "learning_rate": 1.0339297400556266e-06, + "loss": 0.261, + "step": 30248 + }, + { + "epoch": 1.4170141003419685, + "grad_norm": 0.5585903520390477, + "learning_rate": 1.0337761274098855e-06, + "loss": 0.2632, + "step": 30249 + }, + { + "epoch": 1.4170609453318967, + "grad_norm": 0.5920399362879558, + "learning_rate": 1.033622523201894e-06, + "loss": 0.2693, + "step": 30250 + }, + { + "epoch": 1.417107790321825, + "grad_norm": 0.5994245549687474, + "learning_rate": 1.0334689274325373e-06, + "loss": 0.2831, + "step": 30251 + }, + { + "epoch": 1.4171546353117535, + "grad_norm": 0.6168834096457375, + "learning_rate": 1.0333153401026977e-06, + "loss": 0.2836, + "step": 30252 + }, + { + "epoch": 1.4172014803016817, + "grad_norm": 0.5381287463164905, + "learning_rate": 1.0331617612132594e-06, + "loss": 0.2631, + "step": 30253 + }, + { + "epoch": 1.41724832529161, + "grad_norm": 0.599398796330554, + "learning_rate": 1.0330081907651075e-06, + "loss": 0.2734, + "step": 30254 + }, + { + "epoch": 1.4172951702815384, + "grad_norm": 0.6197082629687605, + "learning_rate": 1.0328546287591237e-06, + "loss": 0.2861, + "step": 30255 + }, + { + "epoch": 1.4173420152714666, + "grad_norm": 0.607590421882926, + "learning_rate": 1.032701075196193e-06, + "loss": 0.2743, + "step": 30256 + }, + { + "epoch": 1.417388860261395, + "grad_norm": 0.6027536928776198, + "learning_rate": 1.0325475300771998e-06, + "loss": 0.2966, + "step": 30257 + }, + { + "epoch": 1.4174357052513233, + "grad_norm": 0.5817376338220376, + "learning_rate": 1.0323939934030255e-06, + "loss": 0.2613, + "step": 30258 + }, + { + "epoch": 1.4174825502412518, + "grad_norm": 0.5974116567119168, + "learning_rate": 1.0322404651745548e-06, + "loss": 0.2533, + "step": 30259 + }, + { + "epoch": 1.41752939523118, + "grad_norm": 0.6106910360314252, + "learning_rate": 1.0320869453926718e-06, + "loss": 0.2711, + "step": 30260 + }, + { + "epoch": 1.4175762402211083, + "grad_norm": 0.5715336238380545, + "learning_rate": 1.0319334340582602e-06, + "loss": 0.2674, + "step": 30261 + }, + { + "epoch": 1.4176230852110367, + "grad_norm": 0.5706259766920372, + "learning_rate": 1.0317799311722015e-06, + "loss": 0.257, + "step": 30262 + }, + { + "epoch": 1.417669930200965, + "grad_norm": 0.6830480432788281, + "learning_rate": 1.0316264367353815e-06, + "loss": 0.2836, + "step": 30263 + }, + { + "epoch": 1.4177167751908932, + "grad_norm": 0.5986700495131065, + "learning_rate": 1.0314729507486815e-06, + "loss": 0.2918, + "step": 30264 + }, + { + "epoch": 1.4177636201808217, + "grad_norm": 0.6069532093734196, + "learning_rate": 1.0313194732129852e-06, + "loss": 0.2835, + "step": 30265 + }, + { + "epoch": 1.41781046517075, + "grad_norm": 0.6039790201126459, + "learning_rate": 1.0311660041291762e-06, + "loss": 0.2602, + "step": 30266 + }, + { + "epoch": 1.4178573101606784, + "grad_norm": 0.6381645610125237, + "learning_rate": 1.031012543498138e-06, + "loss": 0.2758, + "step": 30267 + }, + { + "epoch": 1.4179041551506066, + "grad_norm": 0.6142605079572546, + "learning_rate": 1.0308590913207541e-06, + "loss": 0.2959, + "step": 30268 + }, + { + "epoch": 1.417951000140535, + "grad_norm": 0.621164510123923, + "learning_rate": 1.0307056475979068e-06, + "loss": 0.2917, + "step": 30269 + }, + { + "epoch": 1.4179978451304633, + "grad_norm": 0.6109064023491367, + "learning_rate": 1.0305522123304784e-06, + "loss": 0.2744, + "step": 30270 + }, + { + "epoch": 1.4180446901203916, + "grad_norm": 0.6402728578279734, + "learning_rate": 1.0303987855193528e-06, + "loss": 0.2774, + "step": 30271 + }, + { + "epoch": 1.41809153511032, + "grad_norm": 0.5917605740161442, + "learning_rate": 1.0302453671654129e-06, + "loss": 0.2607, + "step": 30272 + }, + { + "epoch": 1.4181383801002483, + "grad_norm": 0.5539414330319915, + "learning_rate": 1.0300919572695412e-06, + "loss": 0.2601, + "step": 30273 + }, + { + "epoch": 1.4181852250901765, + "grad_norm": 0.5725037408419896, + "learning_rate": 1.029938555832622e-06, + "loss": 0.2629, + "step": 30274 + }, + { + "epoch": 1.418232070080105, + "grad_norm": 0.5890541275975347, + "learning_rate": 1.0297851628555357e-06, + "loss": 0.2664, + "step": 30275 + }, + { + "epoch": 1.4182789150700332, + "grad_norm": 0.6087719690296802, + "learning_rate": 1.0296317783391674e-06, + "loss": 0.2732, + "step": 30276 + }, + { + "epoch": 1.4183257600599615, + "grad_norm": 0.6087737722643882, + "learning_rate": 1.0294784022843975e-06, + "loss": 0.2796, + "step": 30277 + }, + { + "epoch": 1.41837260504989, + "grad_norm": 0.5986887532621162, + "learning_rate": 1.0293250346921102e-06, + "loss": 0.2708, + "step": 30278 + }, + { + "epoch": 1.4184194500398182, + "grad_norm": 0.5992179605336808, + "learning_rate": 1.0291716755631876e-06, + "loss": 0.2713, + "step": 30279 + }, + { + "epoch": 1.4184662950297466, + "grad_norm": 0.6132081524130906, + "learning_rate": 1.029018324898512e-06, + "loss": 0.2933, + "step": 30280 + }, + { + "epoch": 1.4185131400196749, + "grad_norm": 0.6211722352179071, + "learning_rate": 1.0288649826989674e-06, + "loss": 0.2925, + "step": 30281 + }, + { + "epoch": 1.4185599850096033, + "grad_norm": 0.5681989403295756, + "learning_rate": 1.028711648965435e-06, + "loss": 0.2791, + "step": 30282 + }, + { + "epoch": 1.4186068299995316, + "grad_norm": 0.6598511929432845, + "learning_rate": 1.0285583236987964e-06, + "loss": 0.2795, + "step": 30283 + }, + { + "epoch": 1.4186536749894598, + "grad_norm": 0.5637349587013329, + "learning_rate": 1.0284050068999345e-06, + "loss": 0.2639, + "step": 30284 + }, + { + "epoch": 1.4187005199793883, + "grad_norm": 0.598351209307548, + "learning_rate": 1.0282516985697324e-06, + "loss": 0.2572, + "step": 30285 + }, + { + "epoch": 1.4187473649693165, + "grad_norm": 0.597655475336741, + "learning_rate": 1.0280983987090717e-06, + "loss": 0.2715, + "step": 30286 + }, + { + "epoch": 1.4187942099592448, + "grad_norm": 0.5524367702099322, + "learning_rate": 1.0279451073188346e-06, + "loss": 0.2662, + "step": 30287 + }, + { + "epoch": 1.4188410549491732, + "grad_norm": 0.5761810400636028, + "learning_rate": 1.0277918243999043e-06, + "loss": 0.274, + "step": 30288 + }, + { + "epoch": 1.4188878999391015, + "grad_norm": 0.6334356535799379, + "learning_rate": 1.0276385499531625e-06, + "loss": 0.2771, + "step": 30289 + }, + { + "epoch": 1.4189347449290297, + "grad_norm": 0.5596031670747614, + "learning_rate": 1.0274852839794894e-06, + "loss": 0.2602, + "step": 30290 + }, + { + "epoch": 1.4189815899189582, + "grad_norm": 0.6004872026918219, + "learning_rate": 1.0273320264797684e-06, + "loss": 0.261, + "step": 30291 + }, + { + "epoch": 1.4190284349088864, + "grad_norm": 0.583170928294683, + "learning_rate": 1.0271787774548815e-06, + "loss": 0.2744, + "step": 30292 + }, + { + "epoch": 1.4190752798988149, + "grad_norm": 0.5816510291313409, + "learning_rate": 1.027025536905711e-06, + "loss": 0.2718, + "step": 30293 + }, + { + "epoch": 1.4191221248887431, + "grad_norm": 0.622478366058456, + "learning_rate": 1.0268723048331386e-06, + "loss": 0.3047, + "step": 30294 + }, + { + "epoch": 1.4191689698786716, + "grad_norm": 0.5641990239298418, + "learning_rate": 1.0267190812380454e-06, + "loss": 0.2549, + "step": 30295 + }, + { + "epoch": 1.4192158148685998, + "grad_norm": 0.5465629624204135, + "learning_rate": 1.026565866121314e-06, + "loss": 0.2582, + "step": 30296 + }, + { + "epoch": 1.419262659858528, + "grad_norm": 0.645499811229811, + "learning_rate": 1.0264126594838253e-06, + "loss": 0.2871, + "step": 30297 + }, + { + "epoch": 1.4193095048484565, + "grad_norm": 0.6147586141307607, + "learning_rate": 1.0262594613264611e-06, + "loss": 0.2712, + "step": 30298 + }, + { + "epoch": 1.4193563498383848, + "grad_norm": 0.6118383140431858, + "learning_rate": 1.0261062716501033e-06, + "loss": 0.2878, + "step": 30299 + }, + { + "epoch": 1.419403194828313, + "grad_norm": 0.6428449558262902, + "learning_rate": 1.0259530904556344e-06, + "loss": 0.2731, + "step": 30300 + }, + { + "epoch": 1.4194500398182415, + "grad_norm": 0.6527489853433939, + "learning_rate": 1.025799917743934e-06, + "loss": 0.2888, + "step": 30301 + }, + { + "epoch": 1.4194968848081697, + "grad_norm": 0.6208997097183165, + "learning_rate": 1.0256467535158848e-06, + "loss": 0.2796, + "step": 30302 + }, + { + "epoch": 1.4195437297980982, + "grad_norm": 0.6813460248111409, + "learning_rate": 1.0254935977723688e-06, + "loss": 0.2918, + "step": 30303 + }, + { + "epoch": 1.4195905747880264, + "grad_norm": 0.6155132837507404, + "learning_rate": 1.0253404505142652e-06, + "loss": 0.276, + "step": 30304 + }, + { + "epoch": 1.4196374197779549, + "grad_norm": 0.5950583468687991, + "learning_rate": 1.0251873117424572e-06, + "loss": 0.2567, + "step": 30305 + }, + { + "epoch": 1.4196842647678831, + "grad_norm": 0.6166753530293817, + "learning_rate": 1.0250341814578263e-06, + "loss": 0.2753, + "step": 30306 + }, + { + "epoch": 1.4197311097578114, + "grad_norm": 0.5856095258269862, + "learning_rate": 1.0248810596612519e-06, + "loss": 0.2668, + "step": 30307 + }, + { + "epoch": 1.4197779547477398, + "grad_norm": 0.5783028449351001, + "learning_rate": 1.0247279463536164e-06, + "loss": 0.2692, + "step": 30308 + }, + { + "epoch": 1.419824799737668, + "grad_norm": 0.6116195825089398, + "learning_rate": 1.0245748415358007e-06, + "loss": 0.2653, + "step": 30309 + }, + { + "epoch": 1.4198716447275963, + "grad_norm": 0.5709274015782104, + "learning_rate": 1.0244217452086868e-06, + "loss": 0.2591, + "step": 30310 + }, + { + "epoch": 1.4199184897175248, + "grad_norm": 0.5574759478072925, + "learning_rate": 1.0242686573731542e-06, + "loss": 0.2613, + "step": 30311 + }, + { + "epoch": 1.419965334707453, + "grad_norm": 0.578901556174012, + "learning_rate": 1.0241155780300852e-06, + "loss": 0.2777, + "step": 30312 + }, + { + "epoch": 1.4200121796973812, + "grad_norm": 0.5526016395166535, + "learning_rate": 1.0239625071803595e-06, + "loss": 0.2615, + "step": 30313 + }, + { + "epoch": 1.4200590246873097, + "grad_norm": 0.6174717734116916, + "learning_rate": 1.0238094448248585e-06, + "loss": 0.2816, + "step": 30314 + }, + { + "epoch": 1.420105869677238, + "grad_norm": 0.5817105704610138, + "learning_rate": 1.0236563909644629e-06, + "loss": 0.2829, + "step": 30315 + }, + { + "epoch": 1.4201527146671664, + "grad_norm": 0.6001506407308554, + "learning_rate": 1.0235033456000546e-06, + "loss": 0.2656, + "step": 30316 + }, + { + "epoch": 1.4201995596570947, + "grad_norm": 0.6386951887357595, + "learning_rate": 1.0233503087325128e-06, + "loss": 0.28, + "step": 30317 + }, + { + "epoch": 1.4202464046470231, + "grad_norm": 0.5980068963049167, + "learning_rate": 1.0231972803627197e-06, + "loss": 0.2739, + "step": 30318 + }, + { + "epoch": 1.4202932496369514, + "grad_norm": 0.6191146095249037, + "learning_rate": 1.0230442604915538e-06, + "loss": 0.295, + "step": 30319 + }, + { + "epoch": 1.4203400946268796, + "grad_norm": 0.5660011825395543, + "learning_rate": 1.0228912491198972e-06, + "loss": 0.2565, + "step": 30320 + }, + { + "epoch": 1.420386939616808, + "grad_norm": 0.6353751943419964, + "learning_rate": 1.0227382462486304e-06, + "loss": 0.2785, + "step": 30321 + }, + { + "epoch": 1.4204337846067363, + "grad_norm": 0.6059618460855498, + "learning_rate": 1.0225852518786336e-06, + "loss": 0.2816, + "step": 30322 + }, + { + "epoch": 1.4204806295966645, + "grad_norm": 0.6047733198364628, + "learning_rate": 1.0224322660107883e-06, + "loss": 0.2797, + "step": 30323 + }, + { + "epoch": 1.420527474586593, + "grad_norm": 0.5981186536136762, + "learning_rate": 1.0222792886459738e-06, + "loss": 0.2902, + "step": 30324 + }, + { + "epoch": 1.4205743195765212, + "grad_norm": 0.6023846613248726, + "learning_rate": 1.0221263197850695e-06, + "loss": 0.261, + "step": 30325 + }, + { + "epoch": 1.4206211645664495, + "grad_norm": 0.5895743261883144, + "learning_rate": 1.0219733594289571e-06, + "loss": 0.2735, + "step": 30326 + }, + { + "epoch": 1.420668009556378, + "grad_norm": 0.5846369626927634, + "learning_rate": 1.0218204075785162e-06, + "loss": 0.2709, + "step": 30327 + }, + { + "epoch": 1.4207148545463062, + "grad_norm": 0.5944493622440961, + "learning_rate": 1.0216674642346277e-06, + "loss": 0.2813, + "step": 30328 + }, + { + "epoch": 1.4207616995362347, + "grad_norm": 0.6304589845318868, + "learning_rate": 1.0215145293981713e-06, + "loss": 0.2711, + "step": 30329 + }, + { + "epoch": 1.420808544526163, + "grad_norm": 0.5840385125022264, + "learning_rate": 1.021361603070028e-06, + "loss": 0.275, + "step": 30330 + }, + { + "epoch": 1.4208553895160914, + "grad_norm": 0.6247036013399504, + "learning_rate": 1.021208685251077e-06, + "loss": 0.2781, + "step": 30331 + }, + { + "epoch": 1.4209022345060196, + "grad_norm": 0.5973881036207289, + "learning_rate": 1.0210557759421973e-06, + "loss": 0.2726, + "step": 30332 + }, + { + "epoch": 1.4209490794959478, + "grad_norm": 0.610421796507623, + "learning_rate": 1.02090287514427e-06, + "loss": 0.2851, + "step": 30333 + }, + { + "epoch": 1.4209959244858763, + "grad_norm": 0.6373623439594187, + "learning_rate": 1.020749982858175e-06, + "loss": 0.2844, + "step": 30334 + }, + { + "epoch": 1.4210427694758045, + "grad_norm": 0.6092093760612571, + "learning_rate": 1.0205970990847922e-06, + "loss": 0.2886, + "step": 30335 + }, + { + "epoch": 1.4210896144657328, + "grad_norm": 0.5801002520774613, + "learning_rate": 1.0204442238250018e-06, + "loss": 0.2605, + "step": 30336 + }, + { + "epoch": 1.4211364594556612, + "grad_norm": 0.5747062539307239, + "learning_rate": 1.0202913570796824e-06, + "loss": 0.2777, + "step": 30337 + }, + { + "epoch": 1.4211833044455895, + "grad_norm": 0.6419242325153115, + "learning_rate": 1.0201384988497152e-06, + "loss": 0.2888, + "step": 30338 + }, + { + "epoch": 1.421230149435518, + "grad_norm": 0.6208790060128867, + "learning_rate": 1.0199856491359777e-06, + "loss": 0.2755, + "step": 30339 + }, + { + "epoch": 1.4212769944254462, + "grad_norm": 0.5744729164548333, + "learning_rate": 1.0198328079393513e-06, + "loss": 0.2646, + "step": 30340 + }, + { + "epoch": 1.4213238394153747, + "grad_norm": 0.6162244040014638, + "learning_rate": 1.0196799752607147e-06, + "loss": 0.2722, + "step": 30341 + }, + { + "epoch": 1.421370684405303, + "grad_norm": 0.6254518776349348, + "learning_rate": 1.019527151100949e-06, + "loss": 0.2839, + "step": 30342 + }, + { + "epoch": 1.4214175293952311, + "grad_norm": 0.6203539749010004, + "learning_rate": 1.0193743354609315e-06, + "loss": 0.2844, + "step": 30343 + }, + { + "epoch": 1.4214643743851596, + "grad_norm": 0.6284356026847122, + "learning_rate": 1.0192215283415424e-06, + "loss": 0.291, + "step": 30344 + }, + { + "epoch": 1.4215112193750878, + "grad_norm": 0.6070842359046082, + "learning_rate": 1.0190687297436622e-06, + "loss": 0.2659, + "step": 30345 + }, + { + "epoch": 1.421558064365016, + "grad_norm": 0.6095945852962699, + "learning_rate": 1.0189159396681684e-06, + "loss": 0.2689, + "step": 30346 + }, + { + "epoch": 1.4216049093549445, + "grad_norm": 0.5831127526068185, + "learning_rate": 1.0187631581159407e-06, + "loss": 0.2715, + "step": 30347 + }, + { + "epoch": 1.4216517543448728, + "grad_norm": 0.582624383692956, + "learning_rate": 1.0186103850878593e-06, + "loss": 0.2592, + "step": 30348 + }, + { + "epoch": 1.421698599334801, + "grad_norm": 0.6047016654870218, + "learning_rate": 1.0184576205848035e-06, + "loss": 0.2864, + "step": 30349 + }, + { + "epoch": 1.4217454443247295, + "grad_norm": 0.6203339462593699, + "learning_rate": 1.0183048646076508e-06, + "loss": 0.2688, + "step": 30350 + }, + { + "epoch": 1.4217922893146577, + "grad_norm": 0.5839317233243809, + "learning_rate": 1.0181521171572812e-06, + "loss": 0.2695, + "step": 30351 + }, + { + "epoch": 1.4218391343045862, + "grad_norm": 0.6099840178642396, + "learning_rate": 1.0179993782345742e-06, + "loss": 0.2874, + "step": 30352 + }, + { + "epoch": 1.4218859792945144, + "grad_norm": 0.5829688770401077, + "learning_rate": 1.017846647840408e-06, + "loss": 0.2559, + "step": 30353 + }, + { + "epoch": 1.421932824284443, + "grad_norm": 0.6286080658467011, + "learning_rate": 1.0176939259756616e-06, + "loss": 0.2741, + "step": 30354 + }, + { + "epoch": 1.4219796692743711, + "grad_norm": 0.6459614010808292, + "learning_rate": 1.017541212641215e-06, + "loss": 0.3027, + "step": 30355 + }, + { + "epoch": 1.4220265142642994, + "grad_norm": 0.5861056184357892, + "learning_rate": 1.0173885078379453e-06, + "loss": 0.2716, + "step": 30356 + }, + { + "epoch": 1.4220733592542278, + "grad_norm": 0.5903062400907535, + "learning_rate": 1.017235811566732e-06, + "loss": 0.2742, + "step": 30357 + }, + { + "epoch": 1.422120204244156, + "grad_norm": 0.5981563297276177, + "learning_rate": 1.017083123828454e-06, + "loss": 0.2872, + "step": 30358 + }, + { + "epoch": 1.4221670492340843, + "grad_norm": 0.609048419609286, + "learning_rate": 1.0169304446239906e-06, + "loss": 0.2684, + "step": 30359 + }, + { + "epoch": 1.4222138942240128, + "grad_norm": 0.6086125364132517, + "learning_rate": 1.016777773954219e-06, + "loss": 0.27, + "step": 30360 + }, + { + "epoch": 1.422260739213941, + "grad_norm": 0.6103852807721243, + "learning_rate": 1.0166251118200192e-06, + "loss": 0.2762, + "step": 30361 + }, + { + "epoch": 1.4223075842038693, + "grad_norm": 0.6373900561400445, + "learning_rate": 1.0164724582222684e-06, + "loss": 0.2908, + "step": 30362 + }, + { + "epoch": 1.4223544291937977, + "grad_norm": 0.5687026468632567, + "learning_rate": 1.0163198131618457e-06, + "loss": 0.2651, + "step": 30363 + }, + { + "epoch": 1.422401274183726, + "grad_norm": 0.6043639580222929, + "learning_rate": 1.0161671766396298e-06, + "loss": 0.2681, + "step": 30364 + }, + { + "epoch": 1.4224481191736544, + "grad_norm": 0.6173957399861121, + "learning_rate": 1.0160145486564994e-06, + "loss": 0.2784, + "step": 30365 + }, + { + "epoch": 1.4224949641635827, + "grad_norm": 0.5729720351131369, + "learning_rate": 1.0158619292133315e-06, + "loss": 0.2676, + "step": 30366 + }, + { + "epoch": 1.4225418091535111, + "grad_norm": 0.6107404614830855, + "learning_rate": 1.015709318311006e-06, + "loss": 0.2724, + "step": 30367 + }, + { + "epoch": 1.4225886541434394, + "grad_norm": 0.5938721203649029, + "learning_rate": 1.0155567159503992e-06, + "loss": 0.2807, + "step": 30368 + }, + { + "epoch": 1.4226354991333676, + "grad_norm": 0.5519301480287532, + "learning_rate": 1.0154041221323908e-06, + "loss": 0.2601, + "step": 30369 + }, + { + "epoch": 1.422682344123296, + "grad_norm": 0.5620589031440105, + "learning_rate": 1.0152515368578582e-06, + "loss": 0.2658, + "step": 30370 + }, + { + "epoch": 1.4227291891132243, + "grad_norm": 0.5765743812441999, + "learning_rate": 1.0150989601276803e-06, + "loss": 0.2548, + "step": 30371 + }, + { + "epoch": 1.4227760341031526, + "grad_norm": 0.5847342880980108, + "learning_rate": 1.0149463919427352e-06, + "loss": 0.2662, + "step": 30372 + }, + { + "epoch": 1.422822879093081, + "grad_norm": 0.5870190079048458, + "learning_rate": 1.0147938323039005e-06, + "loss": 0.2766, + "step": 30373 + }, + { + "epoch": 1.4228697240830093, + "grad_norm": 0.5913767611758668, + "learning_rate": 1.0146412812120531e-06, + "loss": 0.2631, + "step": 30374 + }, + { + "epoch": 1.4229165690729377, + "grad_norm": 0.5913977620517632, + "learning_rate": 1.0144887386680718e-06, + "loss": 0.2679, + "step": 30375 + }, + { + "epoch": 1.422963414062866, + "grad_norm": 0.5599406085276356, + "learning_rate": 1.0143362046728345e-06, + "loss": 0.2588, + "step": 30376 + }, + { + "epoch": 1.4230102590527944, + "grad_norm": 0.563177058783829, + "learning_rate": 1.0141836792272187e-06, + "loss": 0.2471, + "step": 30377 + }, + { + "epoch": 1.4230571040427227, + "grad_norm": 0.5766688712665711, + "learning_rate": 1.0140311623321025e-06, + "loss": 0.2658, + "step": 30378 + }, + { + "epoch": 1.423103949032651, + "grad_norm": 0.590619066300786, + "learning_rate": 1.0138786539883643e-06, + "loss": 0.2786, + "step": 30379 + }, + { + "epoch": 1.4231507940225794, + "grad_norm": 0.5811315108911483, + "learning_rate": 1.0137261541968809e-06, + "loss": 0.2934, + "step": 30380 + }, + { + "epoch": 1.4231976390125076, + "grad_norm": 0.5626289167738068, + "learning_rate": 1.0135736629585295e-06, + "loss": 0.2723, + "step": 30381 + }, + { + "epoch": 1.4232444840024359, + "grad_norm": 0.6329540771637099, + "learning_rate": 1.0134211802741876e-06, + "loss": 0.2693, + "step": 30382 + }, + { + "epoch": 1.4232913289923643, + "grad_norm": 0.6106679624517259, + "learning_rate": 1.013268706144733e-06, + "loss": 0.272, + "step": 30383 + }, + { + "epoch": 1.4233381739822926, + "grad_norm": 0.6065158019423948, + "learning_rate": 1.0131162405710437e-06, + "loss": 0.2794, + "step": 30384 + }, + { + "epoch": 1.4233850189722208, + "grad_norm": 0.6096515488774739, + "learning_rate": 1.0129637835539976e-06, + "loss": 0.2759, + "step": 30385 + }, + { + "epoch": 1.4234318639621493, + "grad_norm": 0.59570002996587, + "learning_rate": 1.0128113350944702e-06, + "loss": 0.2758, + "step": 30386 + }, + { + "epoch": 1.4234787089520775, + "grad_norm": 0.5991178495103805, + "learning_rate": 1.0126588951933405e-06, + "loss": 0.2789, + "step": 30387 + }, + { + "epoch": 1.423525553942006, + "grad_norm": 0.5891238233528824, + "learning_rate": 1.0125064638514844e-06, + "loss": 0.2583, + "step": 30388 + }, + { + "epoch": 1.4235723989319342, + "grad_norm": 0.5845779520672293, + "learning_rate": 1.0123540410697797e-06, + "loss": 0.2616, + "step": 30389 + }, + { + "epoch": 1.4236192439218627, + "grad_norm": 0.6271769969442776, + "learning_rate": 1.0122016268491033e-06, + "loss": 0.2795, + "step": 30390 + }, + { + "epoch": 1.423666088911791, + "grad_norm": 0.5684218958137186, + "learning_rate": 1.0120492211903337e-06, + "loss": 0.2684, + "step": 30391 + }, + { + "epoch": 1.4237129339017192, + "grad_norm": 0.5723195145204153, + "learning_rate": 1.011896824094346e-06, + "loss": 0.2626, + "step": 30392 + }, + { + "epoch": 1.4237597788916476, + "grad_norm": 0.6049139350460271, + "learning_rate": 1.0117444355620181e-06, + "loss": 0.2815, + "step": 30393 + }, + { + "epoch": 1.4238066238815759, + "grad_norm": 0.5914554595596759, + "learning_rate": 1.0115920555942278e-06, + "loss": 0.2619, + "step": 30394 + }, + { + "epoch": 1.423853468871504, + "grad_norm": 0.5900969906348388, + "learning_rate": 1.01143968419185e-06, + "loss": 0.2648, + "step": 30395 + }, + { + "epoch": 1.4239003138614326, + "grad_norm": 0.6232417340639097, + "learning_rate": 1.0112873213557628e-06, + "loss": 0.279, + "step": 30396 + }, + { + "epoch": 1.4239471588513608, + "grad_norm": 0.6356228149424222, + "learning_rate": 1.011134967086844e-06, + "loss": 0.2696, + "step": 30397 + }, + { + "epoch": 1.423994003841289, + "grad_norm": 0.6311950415117781, + "learning_rate": 1.010982621385968e-06, + "loss": 0.2889, + "step": 30398 + }, + { + "epoch": 1.4240408488312175, + "grad_norm": 0.6098392959332715, + "learning_rate": 1.010830284254013e-06, + "loss": 0.273, + "step": 30399 + }, + { + "epoch": 1.4240876938211458, + "grad_norm": 0.5732650575269936, + "learning_rate": 1.0106779556918553e-06, + "loss": 0.2658, + "step": 30400 + }, + { + "epoch": 1.4241345388110742, + "grad_norm": 0.6069719021772753, + "learning_rate": 1.0105256357003726e-06, + "loss": 0.2607, + "step": 30401 + }, + { + "epoch": 1.4241813838010025, + "grad_norm": 0.5576136605236448, + "learning_rate": 1.0103733242804398e-06, + "loss": 0.2464, + "step": 30402 + }, + { + "epoch": 1.424228228790931, + "grad_norm": 0.5879535199654078, + "learning_rate": 1.0102210214329346e-06, + "loss": 0.262, + "step": 30403 + }, + { + "epoch": 1.4242750737808592, + "grad_norm": 0.5730846248192997, + "learning_rate": 1.0100687271587323e-06, + "loss": 0.2601, + "step": 30404 + }, + { + "epoch": 1.4243219187707874, + "grad_norm": 0.607274392553425, + "learning_rate": 1.0099164414587102e-06, + "loss": 0.2687, + "step": 30405 + }, + { + "epoch": 1.4243687637607159, + "grad_norm": 0.6278333856868874, + "learning_rate": 1.0097641643337442e-06, + "loss": 0.2872, + "step": 30406 + }, + { + "epoch": 1.424415608750644, + "grad_norm": 0.6014952400115728, + "learning_rate": 1.009611895784711e-06, + "loss": 0.27, + "step": 30407 + }, + { + "epoch": 1.4244624537405723, + "grad_norm": 0.5595557333823965, + "learning_rate": 1.0094596358124876e-06, + "loss": 0.2691, + "step": 30408 + }, + { + "epoch": 1.4245092987305008, + "grad_norm": 0.6021406815699273, + "learning_rate": 1.009307384417949e-06, + "loss": 0.2703, + "step": 30409 + }, + { + "epoch": 1.424556143720429, + "grad_norm": 0.5643768231872023, + "learning_rate": 1.0091551416019713e-06, + "loss": 0.2632, + "step": 30410 + }, + { + "epoch": 1.4246029887103575, + "grad_norm": 0.6086221506034918, + "learning_rate": 1.0090029073654308e-06, + "loss": 0.2627, + "step": 30411 + }, + { + "epoch": 1.4246498337002858, + "grad_norm": 0.5106085904690385, + "learning_rate": 1.0088506817092043e-06, + "loss": 0.2385, + "step": 30412 + }, + { + "epoch": 1.4246966786902142, + "grad_norm": 0.6157976207946249, + "learning_rate": 1.008698464634167e-06, + "loss": 0.2697, + "step": 30413 + }, + { + "epoch": 1.4247435236801425, + "grad_norm": 0.6006740056667449, + "learning_rate": 1.0085462561411958e-06, + "loss": 0.2817, + "step": 30414 + }, + { + "epoch": 1.4247903686700707, + "grad_norm": 0.5909724189718716, + "learning_rate": 1.0083940562311652e-06, + "loss": 0.271, + "step": 30415 + }, + { + "epoch": 1.4248372136599992, + "grad_norm": 0.5619397801705224, + "learning_rate": 1.0082418649049532e-06, + "loss": 0.2652, + "step": 30416 + }, + { + "epoch": 1.4248840586499274, + "grad_norm": 0.5468787715267139, + "learning_rate": 1.0080896821634333e-06, + "loss": 0.2664, + "step": 30417 + }, + { + "epoch": 1.4249309036398556, + "grad_norm": 0.583744585811549, + "learning_rate": 1.0079375080074825e-06, + "loss": 0.2743, + "step": 30418 + }, + { + "epoch": 1.424977748629784, + "grad_norm": 0.6225012095469555, + "learning_rate": 1.0077853424379758e-06, + "loss": 0.2755, + "step": 30419 + }, + { + "epoch": 1.4250245936197123, + "grad_norm": 0.5999037738141785, + "learning_rate": 1.0076331854557898e-06, + "loss": 0.2752, + "step": 30420 + }, + { + "epoch": 1.4250714386096406, + "grad_norm": 0.6109602774488416, + "learning_rate": 1.0074810370618004e-06, + "loss": 0.2806, + "step": 30421 + }, + { + "epoch": 1.425118283599569, + "grad_norm": 0.5577621681817553, + "learning_rate": 1.0073288972568826e-06, + "loss": 0.2662, + "step": 30422 + }, + { + "epoch": 1.4251651285894973, + "grad_norm": 0.6266872609742443, + "learning_rate": 1.007176766041911e-06, + "loss": 0.2731, + "step": 30423 + }, + { + "epoch": 1.4252119735794258, + "grad_norm": 0.5922869387516914, + "learning_rate": 1.007024643417762e-06, + "loss": 0.2711, + "step": 30424 + }, + { + "epoch": 1.425258818569354, + "grad_norm": 0.6432735053192972, + "learning_rate": 1.0068725293853107e-06, + "loss": 0.2697, + "step": 30425 + }, + { + "epoch": 1.4253056635592825, + "grad_norm": 0.6200915535050776, + "learning_rate": 1.0067204239454329e-06, + "loss": 0.2872, + "step": 30426 + }, + { + "epoch": 1.4253525085492107, + "grad_norm": 0.5802065420709369, + "learning_rate": 1.0065683270990033e-06, + "loss": 0.2701, + "step": 30427 + }, + { + "epoch": 1.425399353539139, + "grad_norm": 0.5942466231346781, + "learning_rate": 1.006416238846899e-06, + "loss": 0.2764, + "step": 30428 + }, + { + "epoch": 1.4254461985290674, + "grad_norm": 0.6051759639920312, + "learning_rate": 1.0062641591899936e-06, + "loss": 0.2703, + "step": 30429 + }, + { + "epoch": 1.4254930435189956, + "grad_norm": 0.5791255812587636, + "learning_rate": 1.0061120881291617e-06, + "loss": 0.2638, + "step": 30430 + }, + { + "epoch": 1.4255398885089239, + "grad_norm": 0.6163769578238276, + "learning_rate": 1.0059600256652793e-06, + "loss": 0.2674, + "step": 30431 + }, + { + "epoch": 1.4255867334988523, + "grad_norm": 0.6119344404670388, + "learning_rate": 1.0058079717992213e-06, + "loss": 0.2724, + "step": 30432 + }, + { + "epoch": 1.4256335784887806, + "grad_norm": 0.5635439791983782, + "learning_rate": 1.0056559265318634e-06, + "loss": 0.2601, + "step": 30433 + }, + { + "epoch": 1.4256804234787088, + "grad_norm": 0.5929216121267883, + "learning_rate": 1.0055038898640803e-06, + "loss": 0.2759, + "step": 30434 + }, + { + "epoch": 1.4257272684686373, + "grad_norm": 0.6011397019257911, + "learning_rate": 1.0053518617967462e-06, + "loss": 0.2816, + "step": 30435 + }, + { + "epoch": 1.4257741134585655, + "grad_norm": 0.5866293633514761, + "learning_rate": 1.0051998423307371e-06, + "loss": 0.2628, + "step": 30436 + }, + { + "epoch": 1.425820958448494, + "grad_norm": 0.6080292025056835, + "learning_rate": 1.0050478314669262e-06, + "loss": 0.268, + "step": 30437 + }, + { + "epoch": 1.4258678034384222, + "grad_norm": 0.5862756055479731, + "learning_rate": 1.0048958292061898e-06, + "loss": 0.2674, + "step": 30438 + }, + { + "epoch": 1.4259146484283507, + "grad_norm": 0.5711044055987586, + "learning_rate": 1.0047438355494016e-06, + "loss": 0.2646, + "step": 30439 + }, + { + "epoch": 1.425961493418279, + "grad_norm": 0.5856754094307878, + "learning_rate": 1.0045918504974378e-06, + "loss": 0.2731, + "step": 30440 + }, + { + "epoch": 1.4260083384082072, + "grad_norm": 0.6111166864863421, + "learning_rate": 1.0044398740511713e-06, + "loss": 0.267, + "step": 30441 + }, + { + "epoch": 1.4260551833981356, + "grad_norm": 0.5935985164254785, + "learning_rate": 1.0042879062114775e-06, + "loss": 0.2739, + "step": 30442 + }, + { + "epoch": 1.4261020283880639, + "grad_norm": 0.6190153580428991, + "learning_rate": 1.0041359469792317e-06, + "loss": 0.2744, + "step": 30443 + }, + { + "epoch": 1.4261488733779921, + "grad_norm": 0.5939679956234081, + "learning_rate": 1.0039839963553066e-06, + "loss": 0.2703, + "step": 30444 + }, + { + "epoch": 1.4261957183679206, + "grad_norm": 0.5833031423774749, + "learning_rate": 1.0038320543405774e-06, + "loss": 0.2637, + "step": 30445 + }, + { + "epoch": 1.4262425633578488, + "grad_norm": 0.6235860777568182, + "learning_rate": 1.0036801209359199e-06, + "loss": 0.2818, + "step": 30446 + }, + { + "epoch": 1.4262894083477773, + "grad_norm": 0.5502729503644694, + "learning_rate": 1.003528196142206e-06, + "loss": 0.2495, + "step": 30447 + }, + { + "epoch": 1.4263362533377055, + "grad_norm": 0.6436925192775247, + "learning_rate": 1.0033762799603118e-06, + "loss": 0.2775, + "step": 30448 + }, + { + "epoch": 1.426383098327634, + "grad_norm": 0.5912579278100484, + "learning_rate": 1.0032243723911104e-06, + "loss": 0.2825, + "step": 30449 + }, + { + "epoch": 1.4264299433175622, + "grad_norm": 0.5594365714143356, + "learning_rate": 1.0030724734354776e-06, + "loss": 0.2576, + "step": 30450 + }, + { + "epoch": 1.4264767883074905, + "grad_norm": 0.6206946410321765, + "learning_rate": 1.0029205830942855e-06, + "loss": 0.2908, + "step": 30451 + }, + { + "epoch": 1.426523633297419, + "grad_norm": 0.6999587779863593, + "learning_rate": 1.00276870136841e-06, + "loss": 0.3015, + "step": 30452 + }, + { + "epoch": 1.4265704782873472, + "grad_norm": 0.5896796088726167, + "learning_rate": 1.0026168282587237e-06, + "loss": 0.2635, + "step": 30453 + }, + { + "epoch": 1.4266173232772754, + "grad_norm": 0.5633445562513099, + "learning_rate": 1.002464963766101e-06, + "loss": 0.2676, + "step": 30454 + }, + { + "epoch": 1.4266641682672039, + "grad_norm": 0.6103256158451147, + "learning_rate": 1.0023131078914161e-06, + "loss": 0.267, + "step": 30455 + }, + { + "epoch": 1.4267110132571321, + "grad_norm": 0.638871025348931, + "learning_rate": 1.0021612606355438e-06, + "loss": 0.2892, + "step": 30456 + }, + { + "epoch": 1.4267578582470604, + "grad_norm": 0.6350383275271353, + "learning_rate": 1.0020094219993558e-06, + "loss": 0.2794, + "step": 30457 + }, + { + "epoch": 1.4268047032369888, + "grad_norm": 0.5910833974534692, + "learning_rate": 1.0018575919837285e-06, + "loss": 0.2777, + "step": 30458 + }, + { + "epoch": 1.426851548226917, + "grad_norm": 0.5706560901818841, + "learning_rate": 1.0017057705895329e-06, + "loss": 0.2782, + "step": 30459 + }, + { + "epoch": 1.4268983932168455, + "grad_norm": 0.6198728647446846, + "learning_rate": 1.001553957817644e-06, + "loss": 0.2659, + "step": 30460 + }, + { + "epoch": 1.4269452382067738, + "grad_norm": 0.6194113051243437, + "learning_rate": 1.0014021536689355e-06, + "loss": 0.2725, + "step": 30461 + }, + { + "epoch": 1.4269920831967022, + "grad_norm": 0.5662598457991832, + "learning_rate": 1.0012503581442808e-06, + "loss": 0.2723, + "step": 30462 + }, + { + "epoch": 1.4270389281866305, + "grad_norm": 0.6219244484672317, + "learning_rate": 1.0010985712445548e-06, + "loss": 0.2882, + "step": 30463 + }, + { + "epoch": 1.4270857731765587, + "grad_norm": 0.6156491422065283, + "learning_rate": 1.0009467929706298e-06, + "loss": 0.2721, + "step": 30464 + }, + { + "epoch": 1.4271326181664872, + "grad_norm": 0.5886679988476874, + "learning_rate": 1.0007950233233782e-06, + "loss": 0.2588, + "step": 30465 + }, + { + "epoch": 1.4271794631564154, + "grad_norm": 0.6099191901874557, + "learning_rate": 1.0006432623036745e-06, + "loss": 0.2683, + "step": 30466 + }, + { + "epoch": 1.4272263081463437, + "grad_norm": 0.6709704060045191, + "learning_rate": 1.000491509912392e-06, + "loss": 0.2932, + "step": 30467 + }, + { + "epoch": 1.4272731531362721, + "grad_norm": 0.5803266216285364, + "learning_rate": 1.0003397661504036e-06, + "loss": 0.2754, + "step": 30468 + }, + { + "epoch": 1.4273199981262004, + "grad_norm": 0.6835055490324794, + "learning_rate": 1.0001880310185833e-06, + "loss": 0.296, + "step": 30469 + }, + { + "epoch": 1.4273668431161286, + "grad_norm": 0.5720743241347369, + "learning_rate": 1.0000363045178047e-06, + "loss": 0.2472, + "step": 30470 + }, + { + "epoch": 1.427413688106057, + "grad_norm": 0.5878304949893641, + "learning_rate": 9.9988458664894e-07, + "loss": 0.2711, + "step": 30471 + }, + { + "epoch": 1.4274605330959853, + "grad_norm": 0.5778950542593744, + "learning_rate": 9.99732877412862e-07, + "loss": 0.2708, + "step": 30472 + }, + { + "epoch": 1.4275073780859138, + "grad_norm": 0.6423886159806608, + "learning_rate": 9.99581176810444e-07, + "loss": 0.2847, + "step": 30473 + }, + { + "epoch": 1.427554223075842, + "grad_norm": 0.5572359427565046, + "learning_rate": 9.99429484842559e-07, + "loss": 0.2555, + "step": 30474 + }, + { + "epoch": 1.4276010680657705, + "grad_norm": 0.5895436190871717, + "learning_rate": 9.992778015100804e-07, + "loss": 0.2755, + "step": 30475 + }, + { + "epoch": 1.4276479130556987, + "grad_norm": 0.5881165939549297, + "learning_rate": 9.991261268138816e-07, + "loss": 0.2714, + "step": 30476 + }, + { + "epoch": 1.427694758045627, + "grad_norm": 0.5771908485333836, + "learning_rate": 9.98974460754834e-07, + "loss": 0.2745, + "step": 30477 + }, + { + "epoch": 1.4277416030355554, + "grad_norm": 0.6408084263710424, + "learning_rate": 9.988228033338118e-07, + "loss": 0.2957, + "step": 30478 + }, + { + "epoch": 1.4277884480254837, + "grad_norm": 0.6189024918655494, + "learning_rate": 9.986711545516864e-07, + "loss": 0.2799, + "step": 30479 + }, + { + "epoch": 1.427835293015412, + "grad_norm": 0.6105172360685119, + "learning_rate": 9.98519514409331e-07, + "loss": 0.2694, + "step": 30480 + }, + { + "epoch": 1.4278821380053404, + "grad_norm": 0.5992379667435422, + "learning_rate": 9.983678829076185e-07, + "loss": 0.2645, + "step": 30481 + }, + { + "epoch": 1.4279289829952686, + "grad_norm": 0.6121806578217638, + "learning_rate": 9.982162600474213e-07, + "loss": 0.2809, + "step": 30482 + }, + { + "epoch": 1.427975827985197, + "grad_norm": 0.62783863876218, + "learning_rate": 9.98064645829613e-07, + "loss": 0.2659, + "step": 30483 + }, + { + "epoch": 1.4280226729751253, + "grad_norm": 0.5648730301941483, + "learning_rate": 9.979130402550644e-07, + "loss": 0.2691, + "step": 30484 + }, + { + "epoch": 1.4280695179650538, + "grad_norm": 0.606331959966857, + "learning_rate": 9.977614433246493e-07, + "loss": 0.2734, + "step": 30485 + }, + { + "epoch": 1.428116362954982, + "grad_norm": 0.580463440629306, + "learning_rate": 9.97609855039239e-07, + "loss": 0.2691, + "step": 30486 + }, + { + "epoch": 1.4281632079449103, + "grad_norm": 0.5602984267348351, + "learning_rate": 9.97458275399706e-07, + "loss": 0.2651, + "step": 30487 + }, + { + "epoch": 1.4282100529348387, + "grad_norm": 0.6249569570675054, + "learning_rate": 9.973067044069235e-07, + "loss": 0.2715, + "step": 30488 + }, + { + "epoch": 1.428256897924767, + "grad_norm": 0.6082511066153325, + "learning_rate": 9.971551420617636e-07, + "loss": 0.2608, + "step": 30489 + }, + { + "epoch": 1.4283037429146952, + "grad_norm": 0.635628516136112, + "learning_rate": 9.970035883650975e-07, + "loss": 0.2667, + "step": 30490 + }, + { + "epoch": 1.4283505879046237, + "grad_norm": 0.6341710903445316, + "learning_rate": 9.96852043317798e-07, + "loss": 0.2863, + "step": 30491 + }, + { + "epoch": 1.428397432894552, + "grad_norm": 0.633771581756624, + "learning_rate": 9.96700506920738e-07, + "loss": 0.2786, + "step": 30492 + }, + { + "epoch": 1.4284442778844801, + "grad_norm": 0.6147970352571395, + "learning_rate": 9.965489791747878e-07, + "loss": 0.284, + "step": 30493 + }, + { + "epoch": 1.4284911228744086, + "grad_norm": 0.5858343246641505, + "learning_rate": 9.963974600808205e-07, + "loss": 0.2586, + "step": 30494 + }, + { + "epoch": 1.4285379678643368, + "grad_norm": 0.6075035200109051, + "learning_rate": 9.962459496397087e-07, + "loss": 0.2677, + "step": 30495 + }, + { + "epoch": 1.4285848128542653, + "grad_norm": 0.5990258632332688, + "learning_rate": 9.960944478523224e-07, + "loss": 0.2711, + "step": 30496 + }, + { + "epoch": 1.4286316578441935, + "grad_norm": 0.6538352693846796, + "learning_rate": 9.959429547195348e-07, + "loss": 0.2934, + "step": 30497 + }, + { + "epoch": 1.428678502834122, + "grad_norm": 0.6717675105394073, + "learning_rate": 9.957914702422176e-07, + "loss": 0.2731, + "step": 30498 + }, + { + "epoch": 1.4287253478240503, + "grad_norm": 0.593559602794528, + "learning_rate": 9.956399944212431e-07, + "loss": 0.2811, + "step": 30499 + }, + { + "epoch": 1.4287721928139785, + "grad_norm": 0.571199775527896, + "learning_rate": 9.954885272574813e-07, + "loss": 0.2598, + "step": 30500 + }, + { + "epoch": 1.428819037803907, + "grad_norm": 0.6009177063531222, + "learning_rate": 9.95337068751806e-07, + "loss": 0.2672, + "step": 30501 + }, + { + "epoch": 1.4288658827938352, + "grad_norm": 0.6049162724474948, + "learning_rate": 9.951856189050868e-07, + "loss": 0.2649, + "step": 30502 + }, + { + "epoch": 1.4289127277837634, + "grad_norm": 0.6156129186364078, + "learning_rate": 9.95034177718196e-07, + "loss": 0.2574, + "step": 30503 + }, + { + "epoch": 1.428959572773692, + "grad_norm": 0.6002156346562919, + "learning_rate": 9.948827451920054e-07, + "loss": 0.2699, + "step": 30504 + }, + { + "epoch": 1.4290064177636201, + "grad_norm": 0.5746716886988975, + "learning_rate": 9.947313213273871e-07, + "loss": 0.2562, + "step": 30505 + }, + { + "epoch": 1.4290532627535484, + "grad_norm": 0.6440824363769858, + "learning_rate": 9.945799061252109e-07, + "loss": 0.2888, + "step": 30506 + }, + { + "epoch": 1.4291001077434768, + "grad_norm": 0.5826150773648546, + "learning_rate": 9.944284995863498e-07, + "loss": 0.2702, + "step": 30507 + }, + { + "epoch": 1.429146952733405, + "grad_norm": 0.5484540977947089, + "learning_rate": 9.942771017116735e-07, + "loss": 0.2473, + "step": 30508 + }, + { + "epoch": 1.4291937977233335, + "grad_norm": 0.5946803189653849, + "learning_rate": 9.94125712502054e-07, + "loss": 0.2732, + "step": 30509 + }, + { + "epoch": 1.4292406427132618, + "grad_norm": 0.6025693364232168, + "learning_rate": 9.939743319583622e-07, + "loss": 0.2723, + "step": 30510 + }, + { + "epoch": 1.4292874877031903, + "grad_norm": 0.5686067212143854, + "learning_rate": 9.9382296008147e-07, + "loss": 0.2676, + "step": 30511 + }, + { + "epoch": 1.4293343326931185, + "grad_norm": 0.6573615534159563, + "learning_rate": 9.936715968722487e-07, + "loss": 0.2842, + "step": 30512 + }, + { + "epoch": 1.4293811776830467, + "grad_norm": 0.5789354320711977, + "learning_rate": 9.93520242331569e-07, + "loss": 0.2697, + "step": 30513 + }, + { + "epoch": 1.4294280226729752, + "grad_norm": 0.6157796406961639, + "learning_rate": 9.93368896460301e-07, + "loss": 0.292, + "step": 30514 + }, + { + "epoch": 1.4294748676629034, + "grad_norm": 0.605872554332781, + "learning_rate": 9.932175592593157e-07, + "loss": 0.2689, + "step": 30515 + }, + { + "epoch": 1.4295217126528317, + "grad_norm": 0.566326401245861, + "learning_rate": 9.93066230729485e-07, + "loss": 0.2685, + "step": 30516 + }, + { + "epoch": 1.4295685576427601, + "grad_norm": 0.585095181946555, + "learning_rate": 9.929149108716792e-07, + "loss": 0.2643, + "step": 30517 + }, + { + "epoch": 1.4296154026326884, + "grad_norm": 0.6013263961502122, + "learning_rate": 9.927635996867697e-07, + "loss": 0.2754, + "step": 30518 + }, + { + "epoch": 1.4296622476226168, + "grad_norm": 0.582757694659945, + "learning_rate": 9.926122971756273e-07, + "loss": 0.2652, + "step": 30519 + }, + { + "epoch": 1.429709092612545, + "grad_norm": 0.5847537782727747, + "learning_rate": 9.924610033391225e-07, + "loss": 0.2665, + "step": 30520 + }, + { + "epoch": 1.4297559376024735, + "grad_norm": 0.6195160830722035, + "learning_rate": 9.923097181781246e-07, + "loss": 0.289, + "step": 30521 + }, + { + "epoch": 1.4298027825924018, + "grad_norm": 0.597337936178448, + "learning_rate": 9.921584416935054e-07, + "loss": 0.2879, + "step": 30522 + }, + { + "epoch": 1.42984962758233, + "grad_norm": 0.57000019527092, + "learning_rate": 9.920071738861357e-07, + "loss": 0.2783, + "step": 30523 + }, + { + "epoch": 1.4298964725722585, + "grad_norm": 0.6395666861938423, + "learning_rate": 9.918559147568855e-07, + "loss": 0.2863, + "step": 30524 + }, + { + "epoch": 1.4299433175621867, + "grad_norm": 0.6025165340128822, + "learning_rate": 9.91704664306626e-07, + "loss": 0.2632, + "step": 30525 + }, + { + "epoch": 1.429990162552115, + "grad_norm": 0.6255612890628661, + "learning_rate": 9.915534225362267e-07, + "loss": 0.2848, + "step": 30526 + }, + { + "epoch": 1.4300370075420434, + "grad_norm": 0.5809744617864725, + "learning_rate": 9.914021894465589e-07, + "loss": 0.2664, + "step": 30527 + }, + { + "epoch": 1.4300838525319717, + "grad_norm": 0.5685250076938764, + "learning_rate": 9.912509650384916e-07, + "loss": 0.2684, + "step": 30528 + }, + { + "epoch": 1.4301306975219, + "grad_norm": 0.6460270208777845, + "learning_rate": 9.910997493128956e-07, + "loss": 0.265, + "step": 30529 + }, + { + "epoch": 1.4301775425118284, + "grad_norm": 0.6118997007218212, + "learning_rate": 9.909485422706414e-07, + "loss": 0.2693, + "step": 30530 + }, + { + "epoch": 1.4302243875017566, + "grad_norm": 0.56267606992978, + "learning_rate": 9.907973439126e-07, + "loss": 0.2653, + "step": 30531 + }, + { + "epoch": 1.430271232491685, + "grad_norm": 0.6030960685012458, + "learning_rate": 9.906461542396398e-07, + "loss": 0.2602, + "step": 30532 + }, + { + "epoch": 1.4303180774816133, + "grad_norm": 0.6096150062032962, + "learning_rate": 9.904949732526313e-07, + "loss": 0.2854, + "step": 30533 + }, + { + "epoch": 1.4303649224715418, + "grad_norm": 0.5986962664717494, + "learning_rate": 9.903438009524457e-07, + "loss": 0.2777, + "step": 30534 + }, + { + "epoch": 1.43041176746147, + "grad_norm": 0.5941385734936717, + "learning_rate": 9.901926373399515e-07, + "loss": 0.2808, + "step": 30535 + }, + { + "epoch": 1.4304586124513983, + "grad_norm": 0.5917537333122797, + "learning_rate": 9.90041482416019e-07, + "loss": 0.2833, + "step": 30536 + }, + { + "epoch": 1.4305054574413267, + "grad_norm": 0.6249757703303718, + "learning_rate": 9.898903361815193e-07, + "loss": 0.271, + "step": 30537 + }, + { + "epoch": 1.430552302431255, + "grad_norm": 0.5899308378941907, + "learning_rate": 9.8973919863732e-07, + "loss": 0.2566, + "step": 30538 + }, + { + "epoch": 1.4305991474211832, + "grad_norm": 0.5887270787075773, + "learning_rate": 9.895880697842924e-07, + "loss": 0.2575, + "step": 30539 + }, + { + "epoch": 1.4306459924111117, + "grad_norm": 0.6275736490465859, + "learning_rate": 9.894369496233056e-07, + "loss": 0.2752, + "step": 30540 + }, + { + "epoch": 1.43069283740104, + "grad_norm": 0.7854109144378447, + "learning_rate": 9.892858381552303e-07, + "loss": 0.2725, + "step": 30541 + }, + { + "epoch": 1.4307396823909682, + "grad_norm": 0.5762837702766036, + "learning_rate": 9.891347353809347e-07, + "loss": 0.2695, + "step": 30542 + }, + { + "epoch": 1.4307865273808966, + "grad_norm": 0.5524691946413507, + "learning_rate": 9.889836413012897e-07, + "loss": 0.2571, + "step": 30543 + }, + { + "epoch": 1.4308333723708249, + "grad_norm": 0.6358615062359321, + "learning_rate": 9.88832555917163e-07, + "loss": 0.2912, + "step": 30544 + }, + { + "epoch": 1.4308802173607533, + "grad_norm": 0.6063647480816949, + "learning_rate": 9.886814792294252e-07, + "loss": 0.2766, + "step": 30545 + }, + { + "epoch": 1.4309270623506816, + "grad_norm": 0.5988708715902828, + "learning_rate": 9.885304112389457e-07, + "loss": 0.2652, + "step": 30546 + }, + { + "epoch": 1.43097390734061, + "grad_norm": 0.6097139894817635, + "learning_rate": 9.883793519465948e-07, + "loss": 0.2979, + "step": 30547 + }, + { + "epoch": 1.4310207523305383, + "grad_norm": 0.560687070042441, + "learning_rate": 9.882283013532398e-07, + "loss": 0.263, + "step": 30548 + }, + { + "epoch": 1.4310675973204665, + "grad_norm": 0.589209415554287, + "learning_rate": 9.880772594597518e-07, + "loss": 0.2752, + "step": 30549 + }, + { + "epoch": 1.431114442310395, + "grad_norm": 0.6564972073646477, + "learning_rate": 9.879262262669983e-07, + "loss": 0.2783, + "step": 30550 + }, + { + "epoch": 1.4311612873003232, + "grad_norm": 0.5849130832511544, + "learning_rate": 9.877752017758494e-07, + "loss": 0.2987, + "step": 30551 + }, + { + "epoch": 1.4312081322902515, + "grad_norm": 0.6176291516067611, + "learning_rate": 9.876241859871737e-07, + "loss": 0.2979, + "step": 30552 + }, + { + "epoch": 1.43125497728018, + "grad_norm": 0.5542713882440843, + "learning_rate": 9.874731789018412e-07, + "loss": 0.2659, + "step": 30553 + }, + { + "epoch": 1.4313018222701082, + "grad_norm": 0.5833868476969515, + "learning_rate": 9.873221805207212e-07, + "loss": 0.2601, + "step": 30554 + }, + { + "epoch": 1.4313486672600366, + "grad_norm": 0.614751939697991, + "learning_rate": 9.871711908446807e-07, + "loss": 0.2648, + "step": 30555 + }, + { + "epoch": 1.4313955122499649, + "grad_norm": 0.6317738404426274, + "learning_rate": 9.87020209874591e-07, + "loss": 0.2682, + "step": 30556 + }, + { + "epoch": 1.4314423572398933, + "grad_norm": 0.6180915180768974, + "learning_rate": 9.868692376113186e-07, + "loss": 0.2836, + "step": 30557 + }, + { + "epoch": 1.4314892022298216, + "grad_norm": 0.633386118973142, + "learning_rate": 9.867182740557335e-07, + "loss": 0.2774, + "step": 30558 + }, + { + "epoch": 1.4315360472197498, + "grad_norm": 0.6163052321360777, + "learning_rate": 9.865673192087044e-07, + "loss": 0.2696, + "step": 30559 + }, + { + "epoch": 1.4315828922096783, + "grad_norm": 0.6513836794953483, + "learning_rate": 9.864163730711e-07, + "loss": 0.2888, + "step": 30560 + }, + { + "epoch": 1.4316297371996065, + "grad_norm": 0.6118897358745455, + "learning_rate": 9.862654356437898e-07, + "loss": 0.2636, + "step": 30561 + }, + { + "epoch": 1.4316765821895348, + "grad_norm": 0.6117330286673504, + "learning_rate": 9.861145069276415e-07, + "loss": 0.2677, + "step": 30562 + }, + { + "epoch": 1.4317234271794632, + "grad_norm": 0.6044898827567422, + "learning_rate": 9.85963586923523e-07, + "loss": 0.2797, + "step": 30563 + }, + { + "epoch": 1.4317702721693915, + "grad_norm": 0.5835027723098157, + "learning_rate": 9.858126756323034e-07, + "loss": 0.2807, + "step": 30564 + }, + { + "epoch": 1.4318171171593197, + "grad_norm": 0.6392866465191948, + "learning_rate": 9.856617730548512e-07, + "loss": 0.2777, + "step": 30565 + }, + { + "epoch": 1.4318639621492482, + "grad_norm": 0.6370067002807593, + "learning_rate": 9.855108791920352e-07, + "loss": 0.2787, + "step": 30566 + }, + { + "epoch": 1.4319108071391764, + "grad_norm": 0.6047065932993064, + "learning_rate": 9.853599940447232e-07, + "loss": 0.2699, + "step": 30567 + }, + { + "epoch": 1.4319576521291049, + "grad_norm": 0.5635163121055373, + "learning_rate": 9.852091176137845e-07, + "loss": 0.2712, + "step": 30568 + }, + { + "epoch": 1.432004497119033, + "grad_norm": 0.590988450444146, + "learning_rate": 9.850582499000866e-07, + "loss": 0.2751, + "step": 30569 + }, + { + "epoch": 1.4320513421089616, + "grad_norm": 0.596281345905545, + "learning_rate": 9.849073909044971e-07, + "loss": 0.2704, + "step": 30570 + }, + { + "epoch": 1.4320981870988898, + "grad_norm": 0.5755542465001015, + "learning_rate": 9.847565406278845e-07, + "loss": 0.2686, + "step": 30571 + }, + { + "epoch": 1.432145032088818, + "grad_norm": 0.610129650271886, + "learning_rate": 9.846056990711172e-07, + "loss": 0.2758, + "step": 30572 + }, + { + "epoch": 1.4321918770787465, + "grad_norm": 0.616860363299615, + "learning_rate": 9.844548662350633e-07, + "loss": 0.2718, + "step": 30573 + }, + { + "epoch": 1.4322387220686748, + "grad_norm": 0.5789968522645915, + "learning_rate": 9.843040421205918e-07, + "loss": 0.2702, + "step": 30574 + }, + { + "epoch": 1.432285567058603, + "grad_norm": 0.5537488290642458, + "learning_rate": 9.841532267285684e-07, + "loss": 0.2507, + "step": 30575 + }, + { + "epoch": 1.4323324120485315, + "grad_norm": 0.5930810993561595, + "learning_rate": 9.840024200598633e-07, + "loss": 0.2859, + "step": 30576 + }, + { + "epoch": 1.4323792570384597, + "grad_norm": 0.6237109328299412, + "learning_rate": 9.83851622115342e-07, + "loss": 0.2832, + "step": 30577 + }, + { + "epoch": 1.432426102028388, + "grad_norm": 0.5927033871292373, + "learning_rate": 9.837008328958738e-07, + "loss": 0.2623, + "step": 30578 + }, + { + "epoch": 1.4324729470183164, + "grad_norm": 0.5448203663090976, + "learning_rate": 9.835500524023263e-07, + "loss": 0.2614, + "step": 30579 + }, + { + "epoch": 1.4325197920082446, + "grad_norm": 0.5783785817931671, + "learning_rate": 9.83399280635568e-07, + "loss": 0.2621, + "step": 30580 + }, + { + "epoch": 1.432566636998173, + "grad_norm": 0.5938430660480108, + "learning_rate": 9.832485175964646e-07, + "loss": 0.2725, + "step": 30581 + }, + { + "epoch": 1.4326134819881013, + "grad_norm": 0.5848785503634458, + "learning_rate": 9.830977632858846e-07, + "loss": 0.2636, + "step": 30582 + }, + { + "epoch": 1.4326603269780298, + "grad_norm": 0.6338062409232432, + "learning_rate": 9.829470177046968e-07, + "loss": 0.2776, + "step": 30583 + }, + { + "epoch": 1.432707171967958, + "grad_norm": 0.5650897559170057, + "learning_rate": 9.827962808537669e-07, + "loss": 0.2627, + "step": 30584 + }, + { + "epoch": 1.4327540169578863, + "grad_norm": 0.5766029385920676, + "learning_rate": 9.82645552733963e-07, + "loss": 0.2745, + "step": 30585 + }, + { + "epoch": 1.4328008619478148, + "grad_norm": 0.5893664720451813, + "learning_rate": 9.82494833346153e-07, + "loss": 0.2788, + "step": 30586 + }, + { + "epoch": 1.432847706937743, + "grad_norm": 0.6343887625342998, + "learning_rate": 9.823441226912033e-07, + "loss": 0.2952, + "step": 30587 + }, + { + "epoch": 1.4328945519276712, + "grad_norm": 0.5908174194237082, + "learning_rate": 9.82193420769982e-07, + "loss": 0.2657, + "step": 30588 + }, + { + "epoch": 1.4329413969175997, + "grad_norm": 0.5910986520027132, + "learning_rate": 9.820427275833558e-07, + "loss": 0.2854, + "step": 30589 + }, + { + "epoch": 1.432988241907528, + "grad_norm": 0.6091030548846357, + "learning_rate": 9.81892043132193e-07, + "loss": 0.2698, + "step": 30590 + }, + { + "epoch": 1.4330350868974562, + "grad_norm": 0.5749353110122246, + "learning_rate": 9.81741367417359e-07, + "loss": 0.2672, + "step": 30591 + }, + { + "epoch": 1.4330819318873846, + "grad_norm": 0.5780090350407927, + "learning_rate": 9.81590700439723e-07, + "loss": 0.2739, + "step": 30592 + }, + { + "epoch": 1.433128776877313, + "grad_norm": 0.5840383818791463, + "learning_rate": 9.814400422001497e-07, + "loss": 0.2796, + "step": 30593 + }, + { + "epoch": 1.4331756218672413, + "grad_norm": 0.5398861098096405, + "learning_rate": 9.812893926995074e-07, + "loss": 0.2464, + "step": 30594 + }, + { + "epoch": 1.4332224668571696, + "grad_norm": 0.6025921170794002, + "learning_rate": 9.811387519386627e-07, + "loss": 0.2822, + "step": 30595 + }, + { + "epoch": 1.433269311847098, + "grad_norm": 0.5990887626368113, + "learning_rate": 9.80988119918484e-07, + "loss": 0.2776, + "step": 30596 + }, + { + "epoch": 1.4333161568370263, + "grad_norm": 0.5832881406338108, + "learning_rate": 9.80837496639836e-07, + "loss": 0.2732, + "step": 30597 + }, + { + "epoch": 1.4333630018269545, + "grad_norm": 0.555067202289595, + "learning_rate": 9.806868821035869e-07, + "loss": 0.2569, + "step": 30598 + }, + { + "epoch": 1.433409846816883, + "grad_norm": 0.6171240664117865, + "learning_rate": 9.805362763106024e-07, + "loss": 0.2796, + "step": 30599 + }, + { + "epoch": 1.4334566918068112, + "grad_norm": 0.6044150190282858, + "learning_rate": 9.803856792617495e-07, + "loss": 0.288, + "step": 30600 + }, + { + "epoch": 1.4335035367967395, + "grad_norm": 0.601656837505605, + "learning_rate": 9.802350909578951e-07, + "loss": 0.2833, + "step": 30601 + }, + { + "epoch": 1.433550381786668, + "grad_norm": 0.5712699903332791, + "learning_rate": 9.800845113999058e-07, + "loss": 0.2532, + "step": 30602 + }, + { + "epoch": 1.4335972267765962, + "grad_norm": 0.6348620286869169, + "learning_rate": 9.79933940588649e-07, + "loss": 0.2707, + "step": 30603 + }, + { + "epoch": 1.4336440717665246, + "grad_norm": 0.5897578666456167, + "learning_rate": 9.797833785249906e-07, + "loss": 0.2581, + "step": 30604 + }, + { + "epoch": 1.4336909167564529, + "grad_norm": 0.5739235861217777, + "learning_rate": 9.796328252097955e-07, + "loss": 0.2649, + "step": 30605 + }, + { + "epoch": 1.4337377617463813, + "grad_norm": 0.5831348546282558, + "learning_rate": 9.794822806439314e-07, + "loss": 0.2634, + "step": 30606 + }, + { + "epoch": 1.4337846067363096, + "grad_norm": 0.6046935657754069, + "learning_rate": 9.793317448282647e-07, + "loss": 0.2723, + "step": 30607 + }, + { + "epoch": 1.4338314517262378, + "grad_norm": 0.5740635408883553, + "learning_rate": 9.791812177636616e-07, + "loss": 0.2766, + "step": 30608 + }, + { + "epoch": 1.4338782967161663, + "grad_norm": 0.5473955014682778, + "learning_rate": 9.790306994509884e-07, + "loss": 0.2514, + "step": 30609 + }, + { + "epoch": 1.4339251417060945, + "grad_norm": 0.6151290989189238, + "learning_rate": 9.788801898911122e-07, + "loss": 0.276, + "step": 30610 + }, + { + "epoch": 1.4339719866960228, + "grad_norm": 0.5580921158422175, + "learning_rate": 9.78729689084898e-07, + "loss": 0.2639, + "step": 30611 + }, + { + "epoch": 1.4340188316859512, + "grad_norm": 0.5811837534020442, + "learning_rate": 9.785791970332112e-07, + "loss": 0.2754, + "step": 30612 + }, + { + "epoch": 1.4340656766758795, + "grad_norm": 0.5762319100546971, + "learning_rate": 9.78428713736919e-07, + "loss": 0.2661, + "step": 30613 + }, + { + "epoch": 1.4341125216658077, + "grad_norm": 0.5884037282563265, + "learning_rate": 9.782782391968868e-07, + "loss": 0.2724, + "step": 30614 + }, + { + "epoch": 1.4341593666557362, + "grad_norm": 0.5538500827241037, + "learning_rate": 9.781277734139806e-07, + "loss": 0.2583, + "step": 30615 + }, + { + "epoch": 1.4342062116456644, + "grad_norm": 0.6263826932690768, + "learning_rate": 9.77977316389068e-07, + "loss": 0.2873, + "step": 30616 + }, + { + "epoch": 1.4342530566355929, + "grad_norm": 0.5597517765492007, + "learning_rate": 9.778268681230125e-07, + "loss": 0.253, + "step": 30617 + }, + { + "epoch": 1.4342999016255211, + "grad_norm": 0.5770297992182067, + "learning_rate": 9.776764286166815e-07, + "loss": 0.2644, + "step": 30618 + }, + { + "epoch": 1.4343467466154496, + "grad_norm": 0.5853780203891575, + "learning_rate": 9.775259978709389e-07, + "loss": 0.2744, + "step": 30619 + }, + { + "epoch": 1.4343935916053778, + "grad_norm": 0.5973636869212818, + "learning_rate": 9.773755758866518e-07, + "loss": 0.2789, + "step": 30620 + }, + { + "epoch": 1.434440436595306, + "grad_norm": 0.5848888423994063, + "learning_rate": 9.772251626646855e-07, + "loss": 0.2706, + "step": 30621 + }, + { + "epoch": 1.4344872815852345, + "grad_norm": 0.5717802747286616, + "learning_rate": 9.77074758205906e-07, + "loss": 0.2764, + "step": 30622 + }, + { + "epoch": 1.4345341265751628, + "grad_norm": 0.6342767690829729, + "learning_rate": 9.769243625111789e-07, + "loss": 0.2947, + "step": 30623 + }, + { + "epoch": 1.434580971565091, + "grad_norm": 0.5976414770965787, + "learning_rate": 9.767739755813685e-07, + "loss": 0.279, + "step": 30624 + }, + { + "epoch": 1.4346278165550195, + "grad_norm": 0.6490208433524667, + "learning_rate": 9.766235974173419e-07, + "loss": 0.2884, + "step": 30625 + }, + { + "epoch": 1.4346746615449477, + "grad_norm": 0.6015336384123212, + "learning_rate": 9.764732280199628e-07, + "loss": 0.2717, + "step": 30626 + }, + { + "epoch": 1.434721506534876, + "grad_norm": 0.5613246033162033, + "learning_rate": 9.763228673900973e-07, + "loss": 0.2648, + "step": 30627 + }, + { + "epoch": 1.4347683515248044, + "grad_norm": 0.5841326197874173, + "learning_rate": 9.761725155286108e-07, + "loss": 0.2819, + "step": 30628 + }, + { + "epoch": 1.4348151965147329, + "grad_norm": 0.6259108135552107, + "learning_rate": 9.760221724363694e-07, + "loss": 0.268, + "step": 30629 + }, + { + "epoch": 1.4348620415046611, + "grad_norm": 0.571750322651283, + "learning_rate": 9.758718381142365e-07, + "loss": 0.2787, + "step": 30630 + }, + { + "epoch": 1.4349088864945894, + "grad_norm": 0.6062973558945188, + "learning_rate": 9.757215125630778e-07, + "loss": 0.2906, + "step": 30631 + }, + { + "epoch": 1.4349557314845178, + "grad_norm": 0.5453500422221975, + "learning_rate": 9.755711957837598e-07, + "loss": 0.254, + "step": 30632 + }, + { + "epoch": 1.435002576474446, + "grad_norm": 0.5512726715347903, + "learning_rate": 9.754208877771456e-07, + "loss": 0.2503, + "step": 30633 + }, + { + "epoch": 1.4350494214643743, + "grad_norm": 0.5744487913176569, + "learning_rate": 9.75270588544101e-07, + "loss": 0.248, + "step": 30634 + }, + { + "epoch": 1.4350962664543028, + "grad_norm": 0.6164942448708853, + "learning_rate": 9.751202980854916e-07, + "loss": 0.2789, + "step": 30635 + }, + { + "epoch": 1.435143111444231, + "grad_norm": 0.5762269728306514, + "learning_rate": 9.749700164021808e-07, + "loss": 0.2481, + "step": 30636 + }, + { + "epoch": 1.4351899564341593, + "grad_norm": 0.6230323939021615, + "learning_rate": 9.748197434950343e-07, + "loss": 0.2817, + "step": 30637 + }, + { + "epoch": 1.4352368014240877, + "grad_norm": 0.5971528774212104, + "learning_rate": 9.74669479364917e-07, + "loss": 0.2733, + "step": 30638 + }, + { + "epoch": 1.435283646414016, + "grad_norm": 0.5938466832904106, + "learning_rate": 9.74519224012694e-07, + "loss": 0.2735, + "step": 30639 + }, + { + "epoch": 1.4353304914039444, + "grad_norm": 0.5707522716686969, + "learning_rate": 9.743689774392289e-07, + "loss": 0.2674, + "step": 30640 + }, + { + "epoch": 1.4353773363938727, + "grad_norm": 0.5895944768235305, + "learning_rate": 9.742187396453879e-07, + "loss": 0.2606, + "step": 30641 + }, + { + "epoch": 1.4354241813838011, + "grad_norm": 0.5543725082421213, + "learning_rate": 9.740685106320334e-07, + "loss": 0.2449, + "step": 30642 + }, + { + "epoch": 1.4354710263737294, + "grad_norm": 0.5404052194663747, + "learning_rate": 9.73918290400031e-07, + "loss": 0.255, + "step": 30643 + }, + { + "epoch": 1.4355178713636576, + "grad_norm": 0.6043205773942548, + "learning_rate": 9.737680789502455e-07, + "loss": 0.2654, + "step": 30644 + }, + { + "epoch": 1.435564716353586, + "grad_norm": 0.5806899743271993, + "learning_rate": 9.736178762835417e-07, + "loss": 0.2777, + "step": 30645 + }, + { + "epoch": 1.4356115613435143, + "grad_norm": 0.5755431794616546, + "learning_rate": 9.734676824007827e-07, + "loss": 0.277, + "step": 30646 + }, + { + "epoch": 1.4356584063334425, + "grad_norm": 0.577013124215784, + "learning_rate": 9.733174973028345e-07, + "loss": 0.2801, + "step": 30647 + }, + { + "epoch": 1.435705251323371, + "grad_norm": 0.6074355862064051, + "learning_rate": 9.731673209905592e-07, + "loss": 0.2777, + "step": 30648 + }, + { + "epoch": 1.4357520963132993, + "grad_norm": 0.5939560821642124, + "learning_rate": 9.730171534648226e-07, + "loss": 0.2738, + "step": 30649 + }, + { + "epoch": 1.4357989413032275, + "grad_norm": 0.6293469865765627, + "learning_rate": 9.72866994726488e-07, + "loss": 0.2868, + "step": 30650 + }, + { + "epoch": 1.435845786293156, + "grad_norm": 0.6170403442697948, + "learning_rate": 9.727168447764204e-07, + "loss": 0.2725, + "step": 30651 + }, + { + "epoch": 1.4358926312830842, + "grad_norm": 0.5613268164000561, + "learning_rate": 9.725667036154842e-07, + "loss": 0.2616, + "step": 30652 + }, + { + "epoch": 1.4359394762730127, + "grad_norm": 0.6091881259697824, + "learning_rate": 9.724165712445427e-07, + "loss": 0.2741, + "step": 30653 + }, + { + "epoch": 1.435986321262941, + "grad_norm": 0.597245474603728, + "learning_rate": 9.722664476644592e-07, + "loss": 0.2657, + "step": 30654 + }, + { + "epoch": 1.4360331662528694, + "grad_norm": 0.5445422130931789, + "learning_rate": 9.721163328760982e-07, + "loss": 0.2591, + "step": 30655 + }, + { + "epoch": 1.4360800112427976, + "grad_norm": 0.6063126211693048, + "learning_rate": 9.719662268803237e-07, + "loss": 0.2813, + "step": 30656 + }, + { + "epoch": 1.4361268562327258, + "grad_norm": 0.5914894037111129, + "learning_rate": 9.718161296779994e-07, + "loss": 0.2753, + "step": 30657 + }, + { + "epoch": 1.4361737012226543, + "grad_norm": 0.5696892885180838, + "learning_rate": 9.716660412699893e-07, + "loss": 0.2619, + "step": 30658 + }, + { + "epoch": 1.4362205462125825, + "grad_norm": 0.6344210799801022, + "learning_rate": 9.71515961657158e-07, + "loss": 0.3088, + "step": 30659 + }, + { + "epoch": 1.4362673912025108, + "grad_norm": 0.6031441309091512, + "learning_rate": 9.713658908403677e-07, + "loss": 0.2672, + "step": 30660 + }, + { + "epoch": 1.4363142361924393, + "grad_norm": 0.5627978042642887, + "learning_rate": 9.71215828820482e-07, + "loss": 0.2686, + "step": 30661 + }, + { + "epoch": 1.4363610811823675, + "grad_norm": 0.580178136023124, + "learning_rate": 9.71065775598365e-07, + "loss": 0.2642, + "step": 30662 + }, + { + "epoch": 1.4364079261722957, + "grad_norm": 0.6541949178169233, + "learning_rate": 9.709157311748798e-07, + "loss": 0.2815, + "step": 30663 + }, + { + "epoch": 1.4364547711622242, + "grad_norm": 0.6254671027030102, + "learning_rate": 9.707656955508906e-07, + "loss": 0.271, + "step": 30664 + }, + { + "epoch": 1.4365016161521527, + "grad_norm": 0.6091073561408992, + "learning_rate": 9.706156687272614e-07, + "loss": 0.263, + "step": 30665 + }, + { + "epoch": 1.436548461142081, + "grad_norm": 0.5767679955093366, + "learning_rate": 9.704656507048536e-07, + "loss": 0.2617, + "step": 30666 + }, + { + "epoch": 1.4365953061320091, + "grad_norm": 0.5612259722391069, + "learning_rate": 9.703156414845322e-07, + "loss": 0.2604, + "step": 30667 + }, + { + "epoch": 1.4366421511219376, + "grad_norm": 0.6074109024736278, + "learning_rate": 9.701656410671592e-07, + "loss": 0.2672, + "step": 30668 + }, + { + "epoch": 1.4366889961118658, + "grad_norm": 0.5916789940454147, + "learning_rate": 9.700156494535984e-07, + "loss": 0.2674, + "step": 30669 + }, + { + "epoch": 1.436735841101794, + "grad_norm": 0.5673410844092189, + "learning_rate": 9.69865666644713e-07, + "loss": 0.2692, + "step": 30670 + }, + { + "epoch": 1.4367826860917225, + "grad_norm": 0.5901337916033245, + "learning_rate": 9.69715692641367e-07, + "loss": 0.2741, + "step": 30671 + }, + { + "epoch": 1.4368295310816508, + "grad_norm": 0.6032897665629451, + "learning_rate": 9.695657274444218e-07, + "loss": 0.2623, + "step": 30672 + }, + { + "epoch": 1.436876376071579, + "grad_norm": 0.5657784888614054, + "learning_rate": 9.69415771054741e-07, + "loss": 0.264, + "step": 30673 + }, + { + "epoch": 1.4369232210615075, + "grad_norm": 0.5824699131965, + "learning_rate": 9.692658234731886e-07, + "loss": 0.2731, + "step": 30674 + }, + { + "epoch": 1.4369700660514357, + "grad_norm": 0.5806284485655073, + "learning_rate": 9.691158847006258e-07, + "loss": 0.2622, + "step": 30675 + }, + { + "epoch": 1.4370169110413642, + "grad_norm": 0.5794534114190774, + "learning_rate": 9.689659547379162e-07, + "loss": 0.27, + "step": 30676 + }, + { + "epoch": 1.4370637560312924, + "grad_norm": 0.5896027793703416, + "learning_rate": 9.688160335859238e-07, + "loss": 0.2657, + "step": 30677 + }, + { + "epoch": 1.437110601021221, + "grad_norm": 0.6059635059596364, + "learning_rate": 9.686661212455093e-07, + "loss": 0.2872, + "step": 30678 + }, + { + "epoch": 1.4371574460111491, + "grad_norm": 0.6032506676310944, + "learning_rate": 9.685162177175364e-07, + "loss": 0.2673, + "step": 30679 + }, + { + "epoch": 1.4372042910010774, + "grad_norm": 0.5887833111075783, + "learning_rate": 9.683663230028679e-07, + "loss": 0.2585, + "step": 30680 + }, + { + "epoch": 1.4372511359910058, + "grad_norm": 0.6096594606798198, + "learning_rate": 9.682164371023666e-07, + "loss": 0.2699, + "step": 30681 + }, + { + "epoch": 1.437297980980934, + "grad_norm": 0.6145359141980873, + "learning_rate": 9.680665600168942e-07, + "loss": 0.2797, + "step": 30682 + }, + { + "epoch": 1.4373448259708623, + "grad_norm": 0.6335722290291524, + "learning_rate": 9.67916691747315e-07, + "loss": 0.2569, + "step": 30683 + }, + { + "epoch": 1.4373916709607908, + "grad_norm": 0.6220218244931185, + "learning_rate": 9.677668322944886e-07, + "loss": 0.285, + "step": 30684 + }, + { + "epoch": 1.437438515950719, + "grad_norm": 0.5886848336695619, + "learning_rate": 9.676169816592793e-07, + "loss": 0.2777, + "step": 30685 + }, + { + "epoch": 1.4374853609406473, + "grad_norm": 0.6065066047205536, + "learning_rate": 9.67467139842549e-07, + "loss": 0.2704, + "step": 30686 + }, + { + "epoch": 1.4375322059305757, + "grad_norm": 0.5884491168833187, + "learning_rate": 9.673173068451611e-07, + "loss": 0.2827, + "step": 30687 + }, + { + "epoch": 1.437579050920504, + "grad_norm": 0.6069773563659725, + "learning_rate": 9.671674826679761e-07, + "loss": 0.2807, + "step": 30688 + }, + { + "epoch": 1.4376258959104324, + "grad_norm": 0.5722410277698272, + "learning_rate": 9.670176673118566e-07, + "loss": 0.2635, + "step": 30689 + }, + { + "epoch": 1.4376727409003607, + "grad_norm": 0.606135610252528, + "learning_rate": 9.668678607776663e-07, + "loss": 0.2753, + "step": 30690 + }, + { + "epoch": 1.4377195858902891, + "grad_norm": 0.5791054322959591, + "learning_rate": 9.667180630662653e-07, + "loss": 0.2746, + "step": 30691 + }, + { + "epoch": 1.4377664308802174, + "grad_norm": 0.5975365218388387, + "learning_rate": 9.665682741785163e-07, + "loss": 0.2781, + "step": 30692 + }, + { + "epoch": 1.4378132758701456, + "grad_norm": 0.5902723873863909, + "learning_rate": 9.664184941152815e-07, + "loss": 0.2617, + "step": 30693 + }, + { + "epoch": 1.437860120860074, + "grad_norm": 0.5765347381899559, + "learning_rate": 9.662687228774236e-07, + "loss": 0.2684, + "step": 30694 + }, + { + "epoch": 1.4379069658500023, + "grad_norm": 0.5971777241912186, + "learning_rate": 9.66118960465803e-07, + "loss": 0.2785, + "step": 30695 + }, + { + "epoch": 1.4379538108399306, + "grad_norm": 0.5902956530138033, + "learning_rate": 9.659692068812832e-07, + "loss": 0.272, + "step": 30696 + }, + { + "epoch": 1.438000655829859, + "grad_norm": 0.6053861254584078, + "learning_rate": 9.658194621247241e-07, + "loss": 0.2692, + "step": 30697 + }, + { + "epoch": 1.4380475008197873, + "grad_norm": 0.636025119245371, + "learning_rate": 9.656697261969883e-07, + "loss": 0.2728, + "step": 30698 + }, + { + "epoch": 1.4380943458097155, + "grad_norm": 0.5671795560469808, + "learning_rate": 9.655199990989376e-07, + "loss": 0.2651, + "step": 30699 + }, + { + "epoch": 1.438141190799644, + "grad_norm": 0.6080392078467228, + "learning_rate": 9.653702808314336e-07, + "loss": 0.2647, + "step": 30700 + }, + { + "epoch": 1.4381880357895724, + "grad_norm": 0.5849759163344816, + "learning_rate": 9.65220571395339e-07, + "loss": 0.2672, + "step": 30701 + }, + { + "epoch": 1.4382348807795007, + "grad_norm": 0.6215683737434694, + "learning_rate": 9.65070870791514e-07, + "loss": 0.266, + "step": 30702 + }, + { + "epoch": 1.438281725769429, + "grad_norm": 0.6144162041392013, + "learning_rate": 9.649211790208196e-07, + "loss": 0.2795, + "step": 30703 + }, + { + "epoch": 1.4383285707593574, + "grad_norm": 0.6189790794228854, + "learning_rate": 9.647714960841182e-07, + "loss": 0.2893, + "step": 30704 + }, + { + "epoch": 1.4383754157492856, + "grad_norm": 0.5842033410841737, + "learning_rate": 9.646218219822706e-07, + "loss": 0.2673, + "step": 30705 + }, + { + "epoch": 1.4384222607392139, + "grad_norm": 0.563028797215941, + "learning_rate": 9.644721567161387e-07, + "loss": 0.2582, + "step": 30706 + }, + { + "epoch": 1.4384691057291423, + "grad_norm": 0.639909165843531, + "learning_rate": 9.643225002865836e-07, + "loss": 0.2802, + "step": 30707 + }, + { + "epoch": 1.4385159507190706, + "grad_norm": 0.602414615705951, + "learning_rate": 9.64172852694467e-07, + "loss": 0.2789, + "step": 30708 + }, + { + "epoch": 1.4385627957089988, + "grad_norm": 0.5738874837393507, + "learning_rate": 9.640232139406502e-07, + "loss": 0.2697, + "step": 30709 + }, + { + "epoch": 1.4386096406989273, + "grad_norm": 0.5442226301791022, + "learning_rate": 9.638735840259928e-07, + "loss": 0.2575, + "step": 30710 + }, + { + "epoch": 1.4386564856888555, + "grad_norm": 0.5853255405528265, + "learning_rate": 9.637239629513568e-07, + "loss": 0.2695, + "step": 30711 + }, + { + "epoch": 1.438703330678784, + "grad_norm": 0.6089366979084546, + "learning_rate": 9.635743507176028e-07, + "loss": 0.2719, + "step": 30712 + }, + { + "epoch": 1.4387501756687122, + "grad_norm": 0.5837176397222457, + "learning_rate": 9.634247473255927e-07, + "loss": 0.2753, + "step": 30713 + }, + { + "epoch": 1.4387970206586407, + "grad_norm": 0.6031127791963177, + "learning_rate": 9.632751527761878e-07, + "loss": 0.2657, + "step": 30714 + }, + { + "epoch": 1.438843865648569, + "grad_norm": 0.5618157054556009, + "learning_rate": 9.631255670702472e-07, + "loss": 0.2621, + "step": 30715 + }, + { + "epoch": 1.4388907106384972, + "grad_norm": 0.556156764337022, + "learning_rate": 9.629759902086336e-07, + "loss": 0.2703, + "step": 30716 + }, + { + "epoch": 1.4389375556284256, + "grad_norm": 0.6346691261898632, + "learning_rate": 9.62826422192206e-07, + "loss": 0.2928, + "step": 30717 + }, + { + "epoch": 1.4389844006183539, + "grad_norm": 0.5455800852531163, + "learning_rate": 9.62676863021826e-07, + "loss": 0.2575, + "step": 30718 + }, + { + "epoch": 1.439031245608282, + "grad_norm": 0.6291539390818176, + "learning_rate": 9.625273126983542e-07, + "loss": 0.2855, + "step": 30719 + }, + { + "epoch": 1.4390780905982106, + "grad_norm": 0.6117035779005501, + "learning_rate": 9.623777712226524e-07, + "loss": 0.2938, + "step": 30720 + }, + { + "epoch": 1.4391249355881388, + "grad_norm": 0.6766551748988493, + "learning_rate": 9.62228238595579e-07, + "loss": 0.2874, + "step": 30721 + }, + { + "epoch": 1.439171780578067, + "grad_norm": 0.5670876517405972, + "learning_rate": 9.620787148179953e-07, + "loss": 0.2666, + "step": 30722 + }, + { + "epoch": 1.4392186255679955, + "grad_norm": 0.6450969380020763, + "learning_rate": 9.619291998907631e-07, + "loss": 0.286, + "step": 30723 + }, + { + "epoch": 1.4392654705579238, + "grad_norm": 0.6098072516017748, + "learning_rate": 9.61779693814741e-07, + "loss": 0.2837, + "step": 30724 + }, + { + "epoch": 1.4393123155478522, + "grad_norm": 0.5387786941838941, + "learning_rate": 9.616301965907901e-07, + "loss": 0.2576, + "step": 30725 + }, + { + "epoch": 1.4393591605377805, + "grad_norm": 0.5916992935368908, + "learning_rate": 9.61480708219772e-07, + "loss": 0.2721, + "step": 30726 + }, + { + "epoch": 1.439406005527709, + "grad_norm": 0.6074827134586848, + "learning_rate": 9.613312287025442e-07, + "loss": 0.271, + "step": 30727 + }, + { + "epoch": 1.4394528505176372, + "grad_norm": 0.6172970146108061, + "learning_rate": 9.611817580399688e-07, + "loss": 0.2938, + "step": 30728 + }, + { + "epoch": 1.4394996955075654, + "grad_norm": 0.5687239656046184, + "learning_rate": 9.610322962329057e-07, + "loss": 0.2705, + "step": 30729 + }, + { + "epoch": 1.4395465404974939, + "grad_norm": 0.5966709895537651, + "learning_rate": 9.608828432822156e-07, + "loss": 0.2605, + "step": 30730 + }, + { + "epoch": 1.439593385487422, + "grad_norm": 0.6702818723289476, + "learning_rate": 9.607333991887572e-07, + "loss": 0.2728, + "step": 30731 + }, + { + "epoch": 1.4396402304773503, + "grad_norm": 0.6506410207610562, + "learning_rate": 9.60583963953392e-07, + "loss": 0.2883, + "step": 30732 + }, + { + "epoch": 1.4396870754672788, + "grad_norm": 0.5752413156973563, + "learning_rate": 9.604345375769783e-07, + "loss": 0.2645, + "step": 30733 + }, + { + "epoch": 1.439733920457207, + "grad_norm": 0.5765999128110324, + "learning_rate": 9.602851200603769e-07, + "loss": 0.2731, + "step": 30734 + }, + { + "epoch": 1.4397807654471353, + "grad_norm": 0.6135736691546864, + "learning_rate": 9.601357114044476e-07, + "loss": 0.2778, + "step": 30735 + }, + { + "epoch": 1.4398276104370638, + "grad_norm": 0.6007102977254005, + "learning_rate": 9.59986311610051e-07, + "loss": 0.2656, + "step": 30736 + }, + { + "epoch": 1.4398744554269922, + "grad_norm": 0.583356360644601, + "learning_rate": 9.598369206780456e-07, + "loss": 0.2657, + "step": 30737 + }, + { + "epoch": 1.4399213004169205, + "grad_norm": 0.5694114763794671, + "learning_rate": 9.596875386092922e-07, + "loss": 0.2596, + "step": 30738 + }, + { + "epoch": 1.4399681454068487, + "grad_norm": 0.60170789334449, + "learning_rate": 9.59538165404649e-07, + "loss": 0.275, + "step": 30739 + }, + { + "epoch": 1.4400149903967772, + "grad_norm": 0.608053162103291, + "learning_rate": 9.593888010649765e-07, + "loss": 0.266, + "step": 30740 + }, + { + "epoch": 1.4400618353867054, + "grad_norm": 0.6526399020188365, + "learning_rate": 9.592394455911345e-07, + "loss": 0.2942, + "step": 30741 + }, + { + "epoch": 1.4401086803766336, + "grad_norm": 0.5607332613915842, + "learning_rate": 9.590900989839818e-07, + "loss": 0.2517, + "step": 30742 + }, + { + "epoch": 1.440155525366562, + "grad_norm": 0.5997432563735288, + "learning_rate": 9.589407612443794e-07, + "loss": 0.2814, + "step": 30743 + }, + { + "epoch": 1.4402023703564903, + "grad_norm": 0.6223430459682586, + "learning_rate": 9.587914323731856e-07, + "loss": 0.2761, + "step": 30744 + }, + { + "epoch": 1.4402492153464186, + "grad_norm": 0.5838528934894024, + "learning_rate": 9.586421123712589e-07, + "loss": 0.2555, + "step": 30745 + }, + { + "epoch": 1.440296060336347, + "grad_norm": 0.5987158480613675, + "learning_rate": 9.584928012394592e-07, + "loss": 0.2729, + "step": 30746 + }, + { + "epoch": 1.4403429053262753, + "grad_norm": 0.5661154432571025, + "learning_rate": 9.58343498978646e-07, + "loss": 0.2586, + "step": 30747 + }, + { + "epoch": 1.4403897503162038, + "grad_norm": 0.5654410502274985, + "learning_rate": 9.581942055896788e-07, + "loss": 0.2792, + "step": 30748 + }, + { + "epoch": 1.440436595306132, + "grad_norm": 0.6009686052546809, + "learning_rate": 9.580449210734163e-07, + "loss": 0.275, + "step": 30749 + }, + { + "epoch": 1.4404834402960605, + "grad_norm": 0.601705929385633, + "learning_rate": 9.578956454307183e-07, + "loss": 0.2718, + "step": 30750 + }, + { + "epoch": 1.4405302852859887, + "grad_norm": 0.6019168916038363, + "learning_rate": 9.577463786624436e-07, + "loss": 0.2884, + "step": 30751 + }, + { + "epoch": 1.440577130275917, + "grad_norm": 0.5848706529602856, + "learning_rate": 9.575971207694501e-07, + "loss": 0.262, + "step": 30752 + }, + { + "epoch": 1.4406239752658454, + "grad_norm": 0.585104756494361, + "learning_rate": 9.574478717525973e-07, + "loss": 0.2595, + "step": 30753 + }, + { + "epoch": 1.4406708202557736, + "grad_norm": 0.6047149285102746, + "learning_rate": 9.572986316127442e-07, + "loss": 0.2666, + "step": 30754 + }, + { + "epoch": 1.4407176652457019, + "grad_norm": 0.6128792637473561, + "learning_rate": 9.571494003507497e-07, + "loss": 0.2848, + "step": 30755 + }, + { + "epoch": 1.4407645102356303, + "grad_norm": 0.5635094298813509, + "learning_rate": 9.570001779674728e-07, + "loss": 0.2557, + "step": 30756 + }, + { + "epoch": 1.4408113552255586, + "grad_norm": 0.6576330504879984, + "learning_rate": 9.56850964463773e-07, + "loss": 0.2853, + "step": 30757 + }, + { + "epoch": 1.4408582002154868, + "grad_norm": 0.5935878339741034, + "learning_rate": 9.567017598405078e-07, + "loss": 0.2676, + "step": 30758 + }, + { + "epoch": 1.4409050452054153, + "grad_norm": 0.6032441066346133, + "learning_rate": 9.565525640985355e-07, + "loss": 0.2835, + "step": 30759 + }, + { + "epoch": 1.4409518901953435, + "grad_norm": 0.5868741127327677, + "learning_rate": 9.56403377238715e-07, + "loss": 0.2722, + "step": 30760 + }, + { + "epoch": 1.440998735185272, + "grad_norm": 0.5872582425453066, + "learning_rate": 9.562541992619052e-07, + "loss": 0.2762, + "step": 30761 + }, + { + "epoch": 1.4410455801752002, + "grad_norm": 0.5549689739965693, + "learning_rate": 9.561050301689645e-07, + "loss": 0.2536, + "step": 30762 + }, + { + "epoch": 1.4410924251651287, + "grad_norm": 0.59908665370992, + "learning_rate": 9.559558699607521e-07, + "loss": 0.2643, + "step": 30763 + }, + { + "epoch": 1.441139270155057, + "grad_norm": 0.5462156880090663, + "learning_rate": 9.558067186381248e-07, + "loss": 0.263, + "step": 30764 + }, + { + "epoch": 1.4411861151449852, + "grad_norm": 0.5889501173380958, + "learning_rate": 9.556575762019427e-07, + "loss": 0.2768, + "step": 30765 + }, + { + "epoch": 1.4412329601349136, + "grad_norm": 0.6236261641095675, + "learning_rate": 9.55508442653062e-07, + "loss": 0.274, + "step": 30766 + }, + { + "epoch": 1.4412798051248419, + "grad_norm": 0.5882005916194825, + "learning_rate": 9.55359317992342e-07, + "loss": 0.27, + "step": 30767 + }, + { + "epoch": 1.4413266501147701, + "grad_norm": 0.5692464501513751, + "learning_rate": 9.55210202220641e-07, + "loss": 0.2648, + "step": 30768 + }, + { + "epoch": 1.4413734951046986, + "grad_norm": 0.5692755129679387, + "learning_rate": 9.550610953388178e-07, + "loss": 0.264, + "step": 30769 + }, + { + "epoch": 1.4414203400946268, + "grad_norm": 0.6292145075583326, + "learning_rate": 9.549119973477288e-07, + "loss": 0.282, + "step": 30770 + }, + { + "epoch": 1.441467185084555, + "grad_norm": 0.5944144452547605, + "learning_rate": 9.547629082482332e-07, + "loss": 0.2787, + "step": 30771 + }, + { + "epoch": 1.4415140300744835, + "grad_norm": 0.6064730583554342, + "learning_rate": 9.546138280411892e-07, + "loss": 0.2734, + "step": 30772 + }, + { + "epoch": 1.441560875064412, + "grad_norm": 0.5960538771185265, + "learning_rate": 9.544647567274537e-07, + "loss": 0.2695, + "step": 30773 + }, + { + "epoch": 1.4416077200543402, + "grad_norm": 0.6277788153549755, + "learning_rate": 9.543156943078847e-07, + "loss": 0.2938, + "step": 30774 + }, + { + "epoch": 1.4416545650442685, + "grad_norm": 0.5593910494911516, + "learning_rate": 9.541666407833414e-07, + "loss": 0.252, + "step": 30775 + }, + { + "epoch": 1.441701410034197, + "grad_norm": 0.6171206592517501, + "learning_rate": 9.540175961546796e-07, + "loss": 0.2788, + "step": 30776 + }, + { + "epoch": 1.4417482550241252, + "grad_norm": 0.6159809525546203, + "learning_rate": 9.538685604227581e-07, + "loss": 0.2923, + "step": 30777 + }, + { + "epoch": 1.4417951000140534, + "grad_norm": 0.5824449898508856, + "learning_rate": 9.537195335884345e-07, + "loss": 0.2638, + "step": 30778 + }, + { + "epoch": 1.4418419450039819, + "grad_norm": 0.6001183783334239, + "learning_rate": 9.53570515652567e-07, + "loss": 0.2668, + "step": 30779 + }, + { + "epoch": 1.4418887899939101, + "grad_norm": 0.5716057623061536, + "learning_rate": 9.534215066160118e-07, + "loss": 0.2746, + "step": 30780 + }, + { + "epoch": 1.4419356349838384, + "grad_norm": 0.5797140505327759, + "learning_rate": 9.53272506479628e-07, + "loss": 0.2672, + "step": 30781 + }, + { + "epoch": 1.4419824799737668, + "grad_norm": 0.5519465625663187, + "learning_rate": 9.531235152442711e-07, + "loss": 0.2615, + "step": 30782 + }, + { + "epoch": 1.442029324963695, + "grad_norm": 0.6411960514103585, + "learning_rate": 9.529745329107998e-07, + "loss": 0.2676, + "step": 30783 + }, + { + "epoch": 1.4420761699536235, + "grad_norm": 0.5598487506239606, + "learning_rate": 9.528255594800711e-07, + "loss": 0.2676, + "step": 30784 + }, + { + "epoch": 1.4421230149435518, + "grad_norm": 0.5905461424298418, + "learning_rate": 9.526765949529432e-07, + "loss": 0.2826, + "step": 30785 + }, + { + "epoch": 1.4421698599334802, + "grad_norm": 0.6249434884427306, + "learning_rate": 9.525276393302721e-07, + "loss": 0.282, + "step": 30786 + }, + { + "epoch": 1.4422167049234085, + "grad_norm": 0.5819539954395728, + "learning_rate": 9.523786926129161e-07, + "loss": 0.2688, + "step": 30787 + }, + { + "epoch": 1.4422635499133367, + "grad_norm": 0.5829406009710429, + "learning_rate": 9.522297548017307e-07, + "loss": 0.2703, + "step": 30788 + }, + { + "epoch": 1.4423103949032652, + "grad_norm": 0.6337506917453731, + "learning_rate": 9.520808258975742e-07, + "loss": 0.2645, + "step": 30789 + }, + { + "epoch": 1.4423572398931934, + "grad_norm": 0.6057159797868485, + "learning_rate": 9.519319059013036e-07, + "loss": 0.2849, + "step": 30790 + }, + { + "epoch": 1.4424040848831217, + "grad_norm": 0.571230805583411, + "learning_rate": 9.517829948137758e-07, + "loss": 0.2729, + "step": 30791 + }, + { + "epoch": 1.4424509298730501, + "grad_norm": 0.5958805085302485, + "learning_rate": 9.516340926358486e-07, + "loss": 0.2638, + "step": 30792 + }, + { + "epoch": 1.4424977748629784, + "grad_norm": 0.6210072855501128, + "learning_rate": 9.514851993683777e-07, + "loss": 0.2597, + "step": 30793 + }, + { + "epoch": 1.4425446198529066, + "grad_norm": 0.6215264620148004, + "learning_rate": 9.513363150122198e-07, + "loss": 0.2847, + "step": 30794 + }, + { + "epoch": 1.442591464842835, + "grad_norm": 0.6227623991096259, + "learning_rate": 9.511874395682319e-07, + "loss": 0.2828, + "step": 30795 + }, + { + "epoch": 1.4426383098327633, + "grad_norm": 0.689594611096841, + "learning_rate": 9.510385730372712e-07, + "loss": 0.299, + "step": 30796 + }, + { + "epoch": 1.4426851548226918, + "grad_norm": 0.6450950057118704, + "learning_rate": 9.50889715420194e-07, + "loss": 0.2678, + "step": 30797 + }, + { + "epoch": 1.44273199981262, + "grad_norm": 0.5643456052440802, + "learning_rate": 9.507408667178575e-07, + "loss": 0.2686, + "step": 30798 + }, + { + "epoch": 1.4427788448025485, + "grad_norm": 0.6082145923504548, + "learning_rate": 9.505920269311184e-07, + "loss": 0.257, + "step": 30799 + }, + { + "epoch": 1.4428256897924767, + "grad_norm": 0.5647204332518254, + "learning_rate": 9.504431960608329e-07, + "loss": 0.2658, + "step": 30800 + }, + { + "epoch": 1.442872534782405, + "grad_norm": 0.5771298015366366, + "learning_rate": 9.502943741078566e-07, + "loss": 0.2742, + "step": 30801 + }, + { + "epoch": 1.4429193797723334, + "grad_norm": 0.6455130786051632, + "learning_rate": 9.501455610730464e-07, + "loss": 0.2849, + "step": 30802 + }, + { + "epoch": 1.4429662247622617, + "grad_norm": 0.5736785994704171, + "learning_rate": 9.49996756957259e-07, + "loss": 0.2752, + "step": 30803 + }, + { + "epoch": 1.44301306975219, + "grad_norm": 0.6159586930468757, + "learning_rate": 9.498479617613509e-07, + "loss": 0.2678, + "step": 30804 + }, + { + "epoch": 1.4430599147421184, + "grad_norm": 0.5855468557382703, + "learning_rate": 9.49699175486179e-07, + "loss": 0.2659, + "step": 30805 + }, + { + "epoch": 1.4431067597320466, + "grad_norm": 0.5764532130588093, + "learning_rate": 9.495503981325977e-07, + "loss": 0.2713, + "step": 30806 + }, + { + "epoch": 1.4431536047219748, + "grad_norm": 0.5910724288590803, + "learning_rate": 9.494016297014649e-07, + "loss": 0.277, + "step": 30807 + }, + { + "epoch": 1.4432004497119033, + "grad_norm": 0.6098064161866245, + "learning_rate": 9.492528701936354e-07, + "loss": 0.283, + "step": 30808 + }, + { + "epoch": 1.4432472947018318, + "grad_norm": 0.5778799636690908, + "learning_rate": 9.491041196099656e-07, + "loss": 0.2659, + "step": 30809 + }, + { + "epoch": 1.44329413969176, + "grad_norm": 0.6054014041305135, + "learning_rate": 9.489553779513122e-07, + "loss": 0.2769, + "step": 30810 + }, + { + "epoch": 1.4433409846816883, + "grad_norm": 0.6056345446465767, + "learning_rate": 9.488066452185313e-07, + "loss": 0.297, + "step": 30811 + }, + { + "epoch": 1.4433878296716167, + "grad_norm": 0.6127172902647235, + "learning_rate": 9.486579214124775e-07, + "loss": 0.2611, + "step": 30812 + }, + { + "epoch": 1.443434674661545, + "grad_norm": 0.5422764669468063, + "learning_rate": 9.485092065340073e-07, + "loss": 0.2532, + "step": 30813 + }, + { + "epoch": 1.4434815196514732, + "grad_norm": 0.6128510183862894, + "learning_rate": 9.48360500583978e-07, + "loss": 0.2832, + "step": 30814 + }, + { + "epoch": 1.4435283646414017, + "grad_norm": 0.5754648646167427, + "learning_rate": 9.482118035632426e-07, + "loss": 0.2673, + "step": 30815 + }, + { + "epoch": 1.44357520963133, + "grad_norm": 0.558285292884149, + "learning_rate": 9.480631154726586e-07, + "loss": 0.2647, + "step": 30816 + }, + { + "epoch": 1.4436220546212581, + "grad_norm": 0.5671544421002637, + "learning_rate": 9.479144363130821e-07, + "loss": 0.2584, + "step": 30817 + }, + { + "epoch": 1.4436688996111866, + "grad_norm": 0.5945758908366824, + "learning_rate": 9.47765766085367e-07, + "loss": 0.271, + "step": 30818 + }, + { + "epoch": 1.4437157446011148, + "grad_norm": 0.5801556908014578, + "learning_rate": 9.476171047903696e-07, + "loss": 0.2691, + "step": 30819 + }, + { + "epoch": 1.4437625895910433, + "grad_norm": 0.5949139005990018, + "learning_rate": 9.47468452428946e-07, + "loss": 0.2919, + "step": 30820 + }, + { + "epoch": 1.4438094345809716, + "grad_norm": 0.5558183677762317, + "learning_rate": 9.473198090019517e-07, + "loss": 0.2505, + "step": 30821 + }, + { + "epoch": 1.4438562795709, + "grad_norm": 0.6562968076878585, + "learning_rate": 9.471711745102411e-07, + "loss": 0.2827, + "step": 30822 + }, + { + "epoch": 1.4439031245608283, + "grad_norm": 0.636655950075526, + "learning_rate": 9.470225489546697e-07, + "loss": 0.2788, + "step": 30823 + }, + { + "epoch": 1.4439499695507565, + "grad_norm": 0.5784842302573929, + "learning_rate": 9.468739323360945e-07, + "loss": 0.26, + "step": 30824 + }, + { + "epoch": 1.443996814540685, + "grad_norm": 0.6127646792561006, + "learning_rate": 9.467253246553684e-07, + "loss": 0.2659, + "step": 30825 + }, + { + "epoch": 1.4440436595306132, + "grad_norm": 0.5593889593202404, + "learning_rate": 9.465767259133476e-07, + "loss": 0.2658, + "step": 30826 + }, + { + "epoch": 1.4440905045205414, + "grad_norm": 0.5672891445375322, + "learning_rate": 9.464281361108882e-07, + "loss": 0.2528, + "step": 30827 + }, + { + "epoch": 1.44413734951047, + "grad_norm": 0.6194830922778285, + "learning_rate": 9.462795552488436e-07, + "loss": 0.2774, + "step": 30828 + }, + { + "epoch": 1.4441841945003981, + "grad_norm": 0.5765570806917136, + "learning_rate": 9.461309833280696e-07, + "loss": 0.2684, + "step": 30829 + }, + { + "epoch": 1.4442310394903264, + "grad_norm": 0.6003357884609323, + "learning_rate": 9.459824203494223e-07, + "loss": 0.281, + "step": 30830 + }, + { + "epoch": 1.4442778844802548, + "grad_norm": 0.6071237025681263, + "learning_rate": 9.458338663137545e-07, + "loss": 0.29, + "step": 30831 + }, + { + "epoch": 1.444324729470183, + "grad_norm": 0.5900275679902912, + "learning_rate": 9.456853212219222e-07, + "loss": 0.2637, + "step": 30832 + }, + { + "epoch": 1.4443715744601116, + "grad_norm": 0.5877471987524995, + "learning_rate": 9.455367850747801e-07, + "loss": 0.2759, + "step": 30833 + }, + { + "epoch": 1.4444184194500398, + "grad_norm": 0.6092902114402701, + "learning_rate": 9.453882578731843e-07, + "loss": 0.2902, + "step": 30834 + }, + { + "epoch": 1.4444652644399683, + "grad_norm": 0.594981864727516, + "learning_rate": 9.452397396179871e-07, + "loss": 0.2619, + "step": 30835 + }, + { + "epoch": 1.4445121094298965, + "grad_norm": 0.6465289632350921, + "learning_rate": 9.450912303100454e-07, + "loss": 0.2828, + "step": 30836 + }, + { + "epoch": 1.4445589544198247, + "grad_norm": 0.5884988294631791, + "learning_rate": 9.449427299502123e-07, + "loss": 0.2593, + "step": 30837 + }, + { + "epoch": 1.4446057994097532, + "grad_norm": 0.6045455020522275, + "learning_rate": 9.447942385393427e-07, + "loss": 0.2702, + "step": 30838 + }, + { + "epoch": 1.4446526443996814, + "grad_norm": 0.595271359841523, + "learning_rate": 9.446457560782912e-07, + "loss": 0.2746, + "step": 30839 + }, + { + "epoch": 1.4446994893896097, + "grad_norm": 0.5982005088150552, + "learning_rate": 9.444972825679127e-07, + "loss": 0.2744, + "step": 30840 + }, + { + "epoch": 1.4447463343795381, + "grad_norm": 0.629771240674017, + "learning_rate": 9.44348818009062e-07, + "loss": 0.2711, + "step": 30841 + }, + { + "epoch": 1.4447931793694664, + "grad_norm": 0.603564553591856, + "learning_rate": 9.44200362402593e-07, + "loss": 0.2729, + "step": 30842 + }, + { + "epoch": 1.4448400243593946, + "grad_norm": 0.5590742203950239, + "learning_rate": 9.440519157493591e-07, + "loss": 0.2535, + "step": 30843 + }, + { + "epoch": 1.444886869349323, + "grad_norm": 0.5783889794392649, + "learning_rate": 9.439034780502154e-07, + "loss": 0.2694, + "step": 30844 + }, + { + "epoch": 1.4449337143392516, + "grad_norm": 0.5402316609050782, + "learning_rate": 9.43755049306016e-07, + "loss": 0.2698, + "step": 30845 + }, + { + "epoch": 1.4449805593291798, + "grad_norm": 0.5933105490501416, + "learning_rate": 9.436066295176149e-07, + "loss": 0.2857, + "step": 30846 + }, + { + "epoch": 1.445027404319108, + "grad_norm": 0.5857169844078295, + "learning_rate": 9.434582186858668e-07, + "loss": 0.2714, + "step": 30847 + }, + { + "epoch": 1.4450742493090365, + "grad_norm": 0.6656711166737479, + "learning_rate": 9.433098168116262e-07, + "loss": 0.2838, + "step": 30848 + }, + { + "epoch": 1.4451210942989647, + "grad_norm": 0.6117705318485575, + "learning_rate": 9.431614238957462e-07, + "loss": 0.2835, + "step": 30849 + }, + { + "epoch": 1.445167939288893, + "grad_norm": 0.5853147207337993, + "learning_rate": 9.430130399390802e-07, + "loss": 0.2716, + "step": 30850 + }, + { + "epoch": 1.4452147842788214, + "grad_norm": 0.5924245532082839, + "learning_rate": 9.428646649424827e-07, + "loss": 0.2711, + "step": 30851 + }, + { + "epoch": 1.4452616292687497, + "grad_norm": 0.6036249990532583, + "learning_rate": 9.427162989068078e-07, + "loss": 0.2901, + "step": 30852 + }, + { + "epoch": 1.445308474258678, + "grad_norm": 0.581179309552143, + "learning_rate": 9.425679418329093e-07, + "loss": 0.2712, + "step": 30853 + }, + { + "epoch": 1.4453553192486064, + "grad_norm": 0.5854014617489353, + "learning_rate": 9.424195937216413e-07, + "loss": 0.278, + "step": 30854 + }, + { + "epoch": 1.4454021642385346, + "grad_norm": 0.6055508479768443, + "learning_rate": 9.422712545738566e-07, + "loss": 0.2695, + "step": 30855 + }, + { + "epoch": 1.445449009228463, + "grad_norm": 0.6010991335393234, + "learning_rate": 9.421229243904099e-07, + "loss": 0.2835, + "step": 30856 + }, + { + "epoch": 1.4454958542183913, + "grad_norm": 0.6136501603812594, + "learning_rate": 9.419746031721535e-07, + "loss": 0.2777, + "step": 30857 + }, + { + "epoch": 1.4455426992083198, + "grad_norm": 0.5627208026339536, + "learning_rate": 9.418262909199416e-07, + "loss": 0.2647, + "step": 30858 + }, + { + "epoch": 1.445589544198248, + "grad_norm": 0.5691066782569404, + "learning_rate": 9.416779876346277e-07, + "loss": 0.2651, + "step": 30859 + }, + { + "epoch": 1.4456363891881763, + "grad_norm": 0.609480185794394, + "learning_rate": 9.415296933170665e-07, + "loss": 0.2673, + "step": 30860 + }, + { + "epoch": 1.4456832341781047, + "grad_norm": 0.597627232313029, + "learning_rate": 9.413814079681091e-07, + "loss": 0.2695, + "step": 30861 + }, + { + "epoch": 1.445730079168033, + "grad_norm": 0.6063341107227659, + "learning_rate": 9.412331315886098e-07, + "loss": 0.2718, + "step": 30862 + }, + { + "epoch": 1.4457769241579612, + "grad_norm": 0.6403039395956852, + "learning_rate": 9.410848641794232e-07, + "loss": 0.277, + "step": 30863 + }, + { + "epoch": 1.4458237691478897, + "grad_norm": 0.5919771243617493, + "learning_rate": 9.409366057414004e-07, + "loss": 0.2802, + "step": 30864 + }, + { + "epoch": 1.445870614137818, + "grad_norm": 0.6053801451672358, + "learning_rate": 9.407883562753956e-07, + "loss": 0.2639, + "step": 30865 + }, + { + "epoch": 1.4459174591277462, + "grad_norm": 0.5781804924005332, + "learning_rate": 9.406401157822625e-07, + "loss": 0.2696, + "step": 30866 + }, + { + "epoch": 1.4459643041176746, + "grad_norm": 0.5560330378318171, + "learning_rate": 9.404918842628533e-07, + "loss": 0.263, + "step": 30867 + }, + { + "epoch": 1.4460111491076029, + "grad_norm": 0.5981441259756644, + "learning_rate": 9.403436617180208e-07, + "loss": 0.2766, + "step": 30868 + }, + { + "epoch": 1.4460579940975313, + "grad_norm": 0.6438992795292513, + "learning_rate": 9.401954481486189e-07, + "loss": 0.2694, + "step": 30869 + }, + { + "epoch": 1.4461048390874596, + "grad_norm": 0.5918008225224222, + "learning_rate": 9.400472435555008e-07, + "loss": 0.2749, + "step": 30870 + }, + { + "epoch": 1.446151684077388, + "grad_norm": 0.6112139690114801, + "learning_rate": 9.398990479395179e-07, + "loss": 0.2845, + "step": 30871 + }, + { + "epoch": 1.4461985290673163, + "grad_norm": 0.5857903360381129, + "learning_rate": 9.397508613015249e-07, + "loss": 0.2679, + "step": 30872 + }, + { + "epoch": 1.4462453740572445, + "grad_norm": 0.5811661544101941, + "learning_rate": 9.396026836423725e-07, + "loss": 0.2642, + "step": 30873 + }, + { + "epoch": 1.446292219047173, + "grad_norm": 0.5841188190398272, + "learning_rate": 9.394545149629145e-07, + "loss": 0.2649, + "step": 30874 + }, + { + "epoch": 1.4463390640371012, + "grad_norm": 0.5757026529216784, + "learning_rate": 9.393063552640036e-07, + "loss": 0.2526, + "step": 30875 + }, + { + "epoch": 1.4463859090270295, + "grad_norm": 0.6196633100619255, + "learning_rate": 9.391582045464931e-07, + "loss": 0.2755, + "step": 30876 + }, + { + "epoch": 1.446432754016958, + "grad_norm": 0.5693335770537765, + "learning_rate": 9.390100628112342e-07, + "loss": 0.2655, + "step": 30877 + }, + { + "epoch": 1.4464795990068862, + "grad_norm": 0.5995681622014911, + "learning_rate": 9.388619300590806e-07, + "loss": 0.2671, + "step": 30878 + }, + { + "epoch": 1.4465264439968144, + "grad_norm": 0.5890814670811647, + "learning_rate": 9.387138062908835e-07, + "loss": 0.2724, + "step": 30879 + }, + { + "epoch": 1.4465732889867429, + "grad_norm": 0.6074428779301466, + "learning_rate": 9.385656915074961e-07, + "loss": 0.2691, + "step": 30880 + }, + { + "epoch": 1.4466201339766713, + "grad_norm": 0.650260757433458, + "learning_rate": 9.384175857097704e-07, + "loss": 0.2734, + "step": 30881 + }, + { + "epoch": 1.4466669789665996, + "grad_norm": 0.6084160629676499, + "learning_rate": 9.382694888985594e-07, + "loss": 0.2615, + "step": 30882 + }, + { + "epoch": 1.4467138239565278, + "grad_norm": 0.5680026464882949, + "learning_rate": 9.381214010747158e-07, + "loss": 0.2775, + "step": 30883 + }, + { + "epoch": 1.4467606689464563, + "grad_norm": 0.5568713124060872, + "learning_rate": 9.379733222390908e-07, + "loss": 0.2666, + "step": 30884 + }, + { + "epoch": 1.4468075139363845, + "grad_norm": 0.6072446326281905, + "learning_rate": 9.378252523925358e-07, + "loss": 0.2729, + "step": 30885 + }, + { + "epoch": 1.4468543589263128, + "grad_norm": 0.6501332400992755, + "learning_rate": 9.37677191535904e-07, + "loss": 0.2819, + "step": 30886 + }, + { + "epoch": 1.4469012039162412, + "grad_norm": 0.5964382368496076, + "learning_rate": 9.375291396700473e-07, + "loss": 0.2694, + "step": 30887 + }, + { + "epoch": 1.4469480489061695, + "grad_norm": 0.6233946777807393, + "learning_rate": 9.373810967958175e-07, + "loss": 0.2844, + "step": 30888 + }, + { + "epoch": 1.4469948938960977, + "grad_norm": 0.6135446754832805, + "learning_rate": 9.372330629140669e-07, + "loss": 0.275, + "step": 30889 + }, + { + "epoch": 1.4470417388860262, + "grad_norm": 0.5946207364962914, + "learning_rate": 9.37085038025648e-07, + "loss": 0.2702, + "step": 30890 + }, + { + "epoch": 1.4470885838759544, + "grad_norm": 0.5722771124414727, + "learning_rate": 9.36937022131412e-07, + "loss": 0.2639, + "step": 30891 + }, + { + "epoch": 1.4471354288658829, + "grad_norm": 0.6016765474195338, + "learning_rate": 9.367890152322096e-07, + "loss": 0.2785, + "step": 30892 + }, + { + "epoch": 1.447182273855811, + "grad_norm": 0.5907214673780917, + "learning_rate": 9.366410173288937e-07, + "loss": 0.2705, + "step": 30893 + }, + { + "epoch": 1.4472291188457396, + "grad_norm": 0.5878714597044629, + "learning_rate": 9.364930284223156e-07, + "loss": 0.2513, + "step": 30894 + }, + { + "epoch": 1.4472759638356678, + "grad_norm": 0.6336385641250444, + "learning_rate": 9.363450485133271e-07, + "loss": 0.2655, + "step": 30895 + }, + { + "epoch": 1.447322808825596, + "grad_norm": 0.6082246637685019, + "learning_rate": 9.361970776027801e-07, + "loss": 0.2755, + "step": 30896 + }, + { + "epoch": 1.4473696538155245, + "grad_norm": 0.5886737479999435, + "learning_rate": 9.360491156915266e-07, + "loss": 0.2688, + "step": 30897 + }, + { + "epoch": 1.4474164988054528, + "grad_norm": 0.604264675133339, + "learning_rate": 9.359011627804174e-07, + "loss": 0.2793, + "step": 30898 + }, + { + "epoch": 1.447463343795381, + "grad_norm": 0.6282285916194879, + "learning_rate": 9.357532188703028e-07, + "loss": 0.278, + "step": 30899 + }, + { + "epoch": 1.4475101887853095, + "grad_norm": 0.5566704462242363, + "learning_rate": 9.356052839620356e-07, + "loss": 0.2666, + "step": 30900 + }, + { + "epoch": 1.4475570337752377, + "grad_norm": 0.5838492339050457, + "learning_rate": 9.354573580564666e-07, + "loss": 0.2755, + "step": 30901 + }, + { + "epoch": 1.447603878765166, + "grad_norm": 0.5897336040290244, + "learning_rate": 9.353094411544473e-07, + "loss": 0.2786, + "step": 30902 + }, + { + "epoch": 1.4476507237550944, + "grad_norm": 0.621012835563174, + "learning_rate": 9.351615332568298e-07, + "loss": 0.2795, + "step": 30903 + }, + { + "epoch": 1.4476975687450226, + "grad_norm": 0.5961355195378625, + "learning_rate": 9.350136343644633e-07, + "loss": 0.261, + "step": 30904 + }, + { + "epoch": 1.447744413734951, + "grad_norm": 0.565830821142737, + "learning_rate": 9.348657444782011e-07, + "loss": 0.2836, + "step": 30905 + }, + { + "epoch": 1.4477912587248793, + "grad_norm": 0.6110331475795356, + "learning_rate": 9.347178635988921e-07, + "loss": 0.2804, + "step": 30906 + }, + { + "epoch": 1.4478381037148078, + "grad_norm": 0.562294431533347, + "learning_rate": 9.345699917273885e-07, + "loss": 0.2549, + "step": 30907 + }, + { + "epoch": 1.447884948704736, + "grad_norm": 0.591942680859737, + "learning_rate": 9.344221288645411e-07, + "loss": 0.2679, + "step": 30908 + }, + { + "epoch": 1.4479317936946643, + "grad_norm": 0.5633893507191655, + "learning_rate": 9.342742750112019e-07, + "loss": 0.2441, + "step": 30909 + }, + { + "epoch": 1.4479786386845928, + "grad_norm": 0.5956702849446655, + "learning_rate": 9.341264301682196e-07, + "loss": 0.2863, + "step": 30910 + }, + { + "epoch": 1.448025483674521, + "grad_norm": 0.6293144032657897, + "learning_rate": 9.339785943364463e-07, + "loss": 0.2837, + "step": 30911 + }, + { + "epoch": 1.4480723286644492, + "grad_norm": 0.5826030520309341, + "learning_rate": 9.338307675167335e-07, + "loss": 0.2567, + "step": 30912 + }, + { + "epoch": 1.4481191736543777, + "grad_norm": 0.5928080708854805, + "learning_rate": 9.336829497099298e-07, + "loss": 0.282, + "step": 30913 + }, + { + "epoch": 1.448166018644306, + "grad_norm": 0.5774350382291941, + "learning_rate": 9.335351409168875e-07, + "loss": 0.2588, + "step": 30914 + }, + { + "epoch": 1.4482128636342342, + "grad_norm": 0.5903643859316883, + "learning_rate": 9.333873411384572e-07, + "loss": 0.273, + "step": 30915 + }, + { + "epoch": 1.4482597086241626, + "grad_norm": 0.5631176138902813, + "learning_rate": 9.332395503754885e-07, + "loss": 0.2687, + "step": 30916 + }, + { + "epoch": 1.448306553614091, + "grad_norm": 0.5814351673868788, + "learning_rate": 9.330917686288321e-07, + "loss": 0.284, + "step": 30917 + }, + { + "epoch": 1.4483533986040193, + "grad_norm": 0.5827236468993691, + "learning_rate": 9.329439958993388e-07, + "loss": 0.2719, + "step": 30918 + }, + { + "epoch": 1.4484002435939476, + "grad_norm": 0.6015976969696403, + "learning_rate": 9.327962321878597e-07, + "loss": 0.2853, + "step": 30919 + }, + { + "epoch": 1.448447088583876, + "grad_norm": 0.622897374091437, + "learning_rate": 9.326484774952438e-07, + "loss": 0.2881, + "step": 30920 + }, + { + "epoch": 1.4484939335738043, + "grad_norm": 0.616318849679053, + "learning_rate": 9.325007318223428e-07, + "loss": 0.2768, + "step": 30921 + }, + { + "epoch": 1.4485407785637325, + "grad_norm": 0.6364949548583905, + "learning_rate": 9.32352995170005e-07, + "loss": 0.2843, + "step": 30922 + }, + { + "epoch": 1.448587623553661, + "grad_norm": 0.5760568435717278, + "learning_rate": 9.322052675390819e-07, + "loss": 0.2614, + "step": 30923 + }, + { + "epoch": 1.4486344685435892, + "grad_norm": 0.5989579369060382, + "learning_rate": 9.320575489304234e-07, + "loss": 0.2682, + "step": 30924 + }, + { + "epoch": 1.4486813135335175, + "grad_norm": 0.6206544665597059, + "learning_rate": 9.319098393448805e-07, + "loss": 0.2835, + "step": 30925 + }, + { + "epoch": 1.448728158523446, + "grad_norm": 0.5845461982552782, + "learning_rate": 9.317621387833014e-07, + "loss": 0.28, + "step": 30926 + }, + { + "epoch": 1.4487750035133742, + "grad_norm": 0.5968272362503381, + "learning_rate": 9.316144472465379e-07, + "loss": 0.2696, + "step": 30927 + }, + { + "epoch": 1.4488218485033026, + "grad_norm": 0.557254740246585, + "learning_rate": 9.314667647354384e-07, + "loss": 0.2703, + "step": 30928 + }, + { + "epoch": 1.4488686934932309, + "grad_norm": 0.6156385868815224, + "learning_rate": 9.313190912508532e-07, + "loss": 0.27, + "step": 30929 + }, + { + "epoch": 1.4489155384831593, + "grad_norm": 0.5792745642965818, + "learning_rate": 9.311714267936323e-07, + "loss": 0.2799, + "step": 30930 + }, + { + "epoch": 1.4489623834730876, + "grad_norm": 0.6083097988168673, + "learning_rate": 9.310237713646259e-07, + "loss": 0.2962, + "step": 30931 + }, + { + "epoch": 1.4490092284630158, + "grad_norm": 0.5546418641559203, + "learning_rate": 9.308761249646839e-07, + "loss": 0.2622, + "step": 30932 + }, + { + "epoch": 1.4490560734529443, + "grad_norm": 0.6207748973366224, + "learning_rate": 9.307284875946554e-07, + "loss": 0.279, + "step": 30933 + }, + { + "epoch": 1.4491029184428725, + "grad_norm": 0.5842176571742598, + "learning_rate": 9.305808592553892e-07, + "loss": 0.2656, + "step": 30934 + }, + { + "epoch": 1.4491497634328008, + "grad_norm": 0.5936000267067235, + "learning_rate": 9.304332399477358e-07, + "loss": 0.2807, + "step": 30935 + }, + { + "epoch": 1.4491966084227292, + "grad_norm": 0.5986518174985896, + "learning_rate": 9.302856296725446e-07, + "loss": 0.2724, + "step": 30936 + }, + { + "epoch": 1.4492434534126575, + "grad_norm": 0.6255793207316298, + "learning_rate": 9.301380284306649e-07, + "loss": 0.2746, + "step": 30937 + }, + { + "epoch": 1.4492902984025857, + "grad_norm": 0.5672161263304413, + "learning_rate": 9.299904362229464e-07, + "loss": 0.2612, + "step": 30938 + }, + { + "epoch": 1.4493371433925142, + "grad_norm": 0.5504166632102651, + "learning_rate": 9.298428530502393e-07, + "loss": 0.2607, + "step": 30939 + }, + { + "epoch": 1.4493839883824424, + "grad_norm": 0.6341695551261389, + "learning_rate": 9.296952789133917e-07, + "loss": 0.2668, + "step": 30940 + }, + { + "epoch": 1.4494308333723709, + "grad_norm": 0.6097164897188643, + "learning_rate": 9.295477138132525e-07, + "loss": 0.2667, + "step": 30941 + }, + { + "epoch": 1.4494776783622991, + "grad_norm": 0.5971514965393794, + "learning_rate": 9.294001577506714e-07, + "loss": 0.2816, + "step": 30942 + }, + { + "epoch": 1.4495245233522276, + "grad_norm": 0.6090043574269847, + "learning_rate": 9.292526107264976e-07, + "loss": 0.283, + "step": 30943 + }, + { + "epoch": 1.4495713683421558, + "grad_norm": 0.5846533797638113, + "learning_rate": 9.291050727415801e-07, + "loss": 0.2622, + "step": 30944 + }, + { + "epoch": 1.449618213332084, + "grad_norm": 0.6164583324192007, + "learning_rate": 9.28957543796769e-07, + "loss": 0.2796, + "step": 30945 + }, + { + "epoch": 1.4496650583220125, + "grad_norm": 0.6258294661655023, + "learning_rate": 9.288100238929118e-07, + "loss": 0.2577, + "step": 30946 + }, + { + "epoch": 1.4497119033119408, + "grad_norm": 0.5903052557324256, + "learning_rate": 9.286625130308585e-07, + "loss": 0.2657, + "step": 30947 + }, + { + "epoch": 1.449758748301869, + "grad_norm": 0.6268031272802214, + "learning_rate": 9.285150112114569e-07, + "loss": 0.2834, + "step": 30948 + }, + { + "epoch": 1.4498055932917975, + "grad_norm": 0.5988615006992261, + "learning_rate": 9.283675184355562e-07, + "loss": 0.2767, + "step": 30949 + }, + { + "epoch": 1.4498524382817257, + "grad_norm": 0.6625816902279507, + "learning_rate": 9.282200347040055e-07, + "loss": 0.2839, + "step": 30950 + }, + { + "epoch": 1.449899283271654, + "grad_norm": 0.6263219172024802, + "learning_rate": 9.280725600176543e-07, + "loss": 0.2736, + "step": 30951 + }, + { + "epoch": 1.4499461282615824, + "grad_norm": 0.5873538036474111, + "learning_rate": 9.279250943773496e-07, + "loss": 0.2761, + "step": 30952 + }, + { + "epoch": 1.4499929732515109, + "grad_norm": 0.589828116239532, + "learning_rate": 9.277776377839409e-07, + "loss": 0.2692, + "step": 30953 + }, + { + "epoch": 1.4500398182414391, + "grad_norm": 0.5818315872797767, + "learning_rate": 9.276301902382775e-07, + "loss": 0.2618, + "step": 30954 + }, + { + "epoch": 1.4500866632313674, + "grad_norm": 0.6476418512004459, + "learning_rate": 9.274827517412064e-07, + "loss": 0.2895, + "step": 30955 + }, + { + "epoch": 1.4501335082212958, + "grad_norm": 0.5897652994767875, + "learning_rate": 9.273353222935769e-07, + "loss": 0.2713, + "step": 30956 + }, + { + "epoch": 1.450180353211224, + "grad_norm": 0.6023477754537008, + "learning_rate": 9.271879018962379e-07, + "loss": 0.2738, + "step": 30957 + }, + { + "epoch": 1.4502271982011523, + "grad_norm": 0.6228807920258239, + "learning_rate": 9.270404905500366e-07, + "loss": 0.2809, + "step": 30958 + }, + { + "epoch": 1.4502740431910808, + "grad_norm": 0.5962784660043022, + "learning_rate": 9.268930882558219e-07, + "loss": 0.2654, + "step": 30959 + }, + { + "epoch": 1.450320888181009, + "grad_norm": 0.6293457229875302, + "learning_rate": 9.267456950144418e-07, + "loss": 0.2622, + "step": 30960 + }, + { + "epoch": 1.4503677331709373, + "grad_norm": 0.5534911924168462, + "learning_rate": 9.265983108267459e-07, + "loss": 0.2501, + "step": 30961 + }, + { + "epoch": 1.4504145781608657, + "grad_norm": 0.5949586800702422, + "learning_rate": 9.264509356935805e-07, + "loss": 0.2706, + "step": 30962 + }, + { + "epoch": 1.450461423150794, + "grad_norm": 0.585387091809621, + "learning_rate": 9.263035696157943e-07, + "loss": 0.2669, + "step": 30963 + }, + { + "epoch": 1.4505082681407224, + "grad_norm": 0.5751578523560879, + "learning_rate": 9.261562125942367e-07, + "loss": 0.2681, + "step": 30964 + }, + { + "epoch": 1.4505551131306507, + "grad_norm": 0.6054012726967538, + "learning_rate": 9.260088646297533e-07, + "loss": 0.269, + "step": 30965 + }, + { + "epoch": 1.4506019581205791, + "grad_norm": 0.5973307746902033, + "learning_rate": 9.258615257231934e-07, + "loss": 0.2592, + "step": 30966 + }, + { + "epoch": 1.4506488031105074, + "grad_norm": 0.5619457045715002, + "learning_rate": 9.257141958754057e-07, + "loss": 0.2564, + "step": 30967 + }, + { + "epoch": 1.4506956481004356, + "grad_norm": 0.551351350032344, + "learning_rate": 9.255668750872362e-07, + "loss": 0.2675, + "step": 30968 + }, + { + "epoch": 1.450742493090364, + "grad_norm": 0.6028853638209019, + "learning_rate": 9.254195633595336e-07, + "loss": 0.2653, + "step": 30969 + }, + { + "epoch": 1.4507893380802923, + "grad_norm": 0.5542804373821877, + "learning_rate": 9.252722606931463e-07, + "loss": 0.267, + "step": 30970 + }, + { + "epoch": 1.4508361830702206, + "grad_norm": 0.6002474654010106, + "learning_rate": 9.251249670889209e-07, + "loss": 0.276, + "step": 30971 + }, + { + "epoch": 1.450883028060149, + "grad_norm": 0.5957867131573739, + "learning_rate": 9.249776825477052e-07, + "loss": 0.2615, + "step": 30972 + }, + { + "epoch": 1.4509298730500773, + "grad_norm": 0.6775192662754173, + "learning_rate": 9.248304070703474e-07, + "loss": 0.298, + "step": 30973 + }, + { + "epoch": 1.4509767180400055, + "grad_norm": 0.5726251094620599, + "learning_rate": 9.246831406576953e-07, + "loss": 0.28, + "step": 30974 + }, + { + "epoch": 1.451023563029934, + "grad_norm": 0.5699569836262387, + "learning_rate": 9.24535883310595e-07, + "loss": 0.2611, + "step": 30975 + }, + { + "epoch": 1.4510704080198622, + "grad_norm": 0.6064300772941027, + "learning_rate": 9.243886350298952e-07, + "loss": 0.2717, + "step": 30976 + }, + { + "epoch": 1.4511172530097907, + "grad_norm": 0.581284275633006, + "learning_rate": 9.242413958164426e-07, + "loss": 0.2628, + "step": 30977 + }, + { + "epoch": 1.451164097999719, + "grad_norm": 0.5735331219194285, + "learning_rate": 9.240941656710842e-07, + "loss": 0.2767, + "step": 30978 + }, + { + "epoch": 1.4512109429896474, + "grad_norm": 0.5885209204233005, + "learning_rate": 9.23946944594668e-07, + "loss": 0.2668, + "step": 30979 + }, + { + "epoch": 1.4512577879795756, + "grad_norm": 0.6032618620518992, + "learning_rate": 9.23799732588041e-07, + "loss": 0.2712, + "step": 30980 + }, + { + "epoch": 1.4513046329695038, + "grad_norm": 0.5737672848048607, + "learning_rate": 9.236525296520513e-07, + "loss": 0.2706, + "step": 30981 + }, + { + "epoch": 1.4513514779594323, + "grad_norm": 0.5837804848826096, + "learning_rate": 9.235053357875448e-07, + "loss": 0.245, + "step": 30982 + }, + { + "epoch": 1.4513983229493606, + "grad_norm": 0.5979994936416292, + "learning_rate": 9.233581509953682e-07, + "loss": 0.2688, + "step": 30983 + }, + { + "epoch": 1.4514451679392888, + "grad_norm": 0.5997043805388952, + "learning_rate": 9.232109752763693e-07, + "loss": 0.2851, + "step": 30984 + }, + { + "epoch": 1.4514920129292173, + "grad_norm": 0.6104962409524493, + "learning_rate": 9.230638086313948e-07, + "loss": 0.275, + "step": 30985 + }, + { + "epoch": 1.4515388579191455, + "grad_norm": 0.5607145993553688, + "learning_rate": 9.229166510612917e-07, + "loss": 0.2582, + "step": 30986 + }, + { + "epoch": 1.4515857029090737, + "grad_norm": 0.5839474447485856, + "learning_rate": 9.227695025669068e-07, + "loss": 0.2757, + "step": 30987 + }, + { + "epoch": 1.4516325478990022, + "grad_norm": 0.607047312667643, + "learning_rate": 9.226223631490879e-07, + "loss": 0.2797, + "step": 30988 + }, + { + "epoch": 1.4516793928889307, + "grad_norm": 0.6280449519007617, + "learning_rate": 9.224752328086811e-07, + "loss": 0.2961, + "step": 30989 + }, + { + "epoch": 1.451726237878859, + "grad_norm": 0.561367878908164, + "learning_rate": 9.223281115465315e-07, + "loss": 0.2544, + "step": 30990 + }, + { + "epoch": 1.4517730828687871, + "grad_norm": 0.5895455311002885, + "learning_rate": 9.221809993634875e-07, + "loss": 0.2668, + "step": 30991 + }, + { + "epoch": 1.4518199278587156, + "grad_norm": 0.5729329833227346, + "learning_rate": 9.220338962603953e-07, + "loss": 0.2652, + "step": 30992 + }, + { + "epoch": 1.4518667728486438, + "grad_norm": 0.7677378635819372, + "learning_rate": 9.218868022381012e-07, + "loss": 0.2883, + "step": 30993 + }, + { + "epoch": 1.451913617838572, + "grad_norm": 0.5905918075797144, + "learning_rate": 9.21739717297453e-07, + "loss": 0.2804, + "step": 30994 + }, + { + "epoch": 1.4519604628285006, + "grad_norm": 0.6548187100219889, + "learning_rate": 9.215926414392948e-07, + "loss": 0.3044, + "step": 30995 + }, + { + "epoch": 1.4520073078184288, + "grad_norm": 0.5942488862922414, + "learning_rate": 9.214455746644755e-07, + "loss": 0.2702, + "step": 30996 + }, + { + "epoch": 1.452054152808357, + "grad_norm": 0.5750824440194265, + "learning_rate": 9.21298516973839e-07, + "loss": 0.2606, + "step": 30997 + }, + { + "epoch": 1.4521009977982855, + "grad_norm": 0.5816398887759834, + "learning_rate": 9.21151468368233e-07, + "loss": 0.2648, + "step": 30998 + }, + { + "epoch": 1.4521478427882137, + "grad_norm": 0.6171018957304255, + "learning_rate": 9.210044288485032e-07, + "loss": 0.2597, + "step": 30999 + }, + { + "epoch": 1.4521946877781422, + "grad_norm": 0.6267087335442737, + "learning_rate": 9.20857398415497e-07, + "loss": 0.2755, + "step": 31000 + }, + { + "epoch": 1.4522415327680704, + "grad_norm": 0.6314255185604526, + "learning_rate": 9.207103770700587e-07, + "loss": 0.27, + "step": 31001 + }, + { + "epoch": 1.452288377757999, + "grad_norm": 0.6039605704648296, + "learning_rate": 9.205633648130352e-07, + "loss": 0.276, + "step": 31002 + }, + { + "epoch": 1.4523352227479271, + "grad_norm": 0.5839126600983225, + "learning_rate": 9.204163616452736e-07, + "loss": 0.2696, + "step": 31003 + }, + { + "epoch": 1.4523820677378554, + "grad_norm": 0.561896200504255, + "learning_rate": 9.202693675676178e-07, + "loss": 0.2649, + "step": 31004 + }, + { + "epoch": 1.4524289127277838, + "grad_norm": 0.5955220062837374, + "learning_rate": 9.20122382580915e-07, + "loss": 0.2696, + "step": 31005 + }, + { + "epoch": 1.452475757717712, + "grad_norm": 0.6214232027913037, + "learning_rate": 9.199754066860115e-07, + "loss": 0.2717, + "step": 31006 + }, + { + "epoch": 1.4525226027076403, + "grad_norm": 0.6229180556378198, + "learning_rate": 9.198284398837515e-07, + "loss": 0.2749, + "step": 31007 + }, + { + "epoch": 1.4525694476975688, + "grad_norm": 0.5661362505299695, + "learning_rate": 9.196814821749817e-07, + "loss": 0.2577, + "step": 31008 + }, + { + "epoch": 1.452616292687497, + "grad_norm": 0.5904789739809971, + "learning_rate": 9.195345335605476e-07, + "loss": 0.2497, + "step": 31009 + }, + { + "epoch": 1.4526631376774253, + "grad_norm": 0.5814239460092488, + "learning_rate": 9.193875940412961e-07, + "loss": 0.2672, + "step": 31010 + }, + { + "epoch": 1.4527099826673537, + "grad_norm": 0.6204192414730029, + "learning_rate": 9.192406636180709e-07, + "loss": 0.2735, + "step": 31011 + }, + { + "epoch": 1.452756827657282, + "grad_norm": 0.5875628768244647, + "learning_rate": 9.19093742291719e-07, + "loss": 0.2724, + "step": 31012 + }, + { + "epoch": 1.4528036726472104, + "grad_norm": 0.5679313228683776, + "learning_rate": 9.189468300630846e-07, + "loss": 0.2621, + "step": 31013 + }, + { + "epoch": 1.4528505176371387, + "grad_norm": 0.6172720881205802, + "learning_rate": 9.187999269330136e-07, + "loss": 0.2879, + "step": 31014 + }, + { + "epoch": 1.4528973626270671, + "grad_norm": 0.5949979079988589, + "learning_rate": 9.186530329023519e-07, + "loss": 0.2742, + "step": 31015 + }, + { + "epoch": 1.4529442076169954, + "grad_norm": 0.6356525523660479, + "learning_rate": 9.185061479719454e-07, + "loss": 0.2724, + "step": 31016 + }, + { + "epoch": 1.4529910526069236, + "grad_norm": 0.5944166512509487, + "learning_rate": 9.183592721426374e-07, + "loss": 0.2649, + "step": 31017 + }, + { + "epoch": 1.453037897596852, + "grad_norm": 0.5466405597866573, + "learning_rate": 9.182124054152752e-07, + "loss": 0.2695, + "step": 31018 + }, + { + "epoch": 1.4530847425867803, + "grad_norm": 0.5734116887375742, + "learning_rate": 9.180655477907022e-07, + "loss": 0.2538, + "step": 31019 + }, + { + "epoch": 1.4531315875767086, + "grad_norm": 0.616473133968278, + "learning_rate": 9.179186992697644e-07, + "loss": 0.2725, + "step": 31020 + }, + { + "epoch": 1.453178432566637, + "grad_norm": 0.5927929280831989, + "learning_rate": 9.177718598533069e-07, + "loss": 0.2793, + "step": 31021 + }, + { + "epoch": 1.4532252775565653, + "grad_norm": 0.5569240579072453, + "learning_rate": 9.176250295421746e-07, + "loss": 0.2633, + "step": 31022 + }, + { + "epoch": 1.4532721225464935, + "grad_norm": 0.584434469274428, + "learning_rate": 9.174782083372133e-07, + "loss": 0.277, + "step": 31023 + }, + { + "epoch": 1.453318967536422, + "grad_norm": 0.5375707555205033, + "learning_rate": 9.173313962392674e-07, + "loss": 0.2475, + "step": 31024 + }, + { + "epoch": 1.4533658125263504, + "grad_norm": 0.5938102305890985, + "learning_rate": 9.171845932491805e-07, + "loss": 0.2775, + "step": 31025 + }, + { + "epoch": 1.4534126575162787, + "grad_norm": 0.5998933246657562, + "learning_rate": 9.170377993677984e-07, + "loss": 0.2686, + "step": 31026 + }, + { + "epoch": 1.453459502506207, + "grad_norm": 0.5850394345900992, + "learning_rate": 9.168910145959659e-07, + "loss": 0.2668, + "step": 31027 + }, + { + "epoch": 1.4535063474961354, + "grad_norm": 0.6054892951942077, + "learning_rate": 9.16744238934528e-07, + "loss": 0.2861, + "step": 31028 + }, + { + "epoch": 1.4535531924860636, + "grad_norm": 0.6053522288955127, + "learning_rate": 9.165974723843288e-07, + "loss": 0.269, + "step": 31029 + }, + { + "epoch": 1.4536000374759919, + "grad_norm": 0.6024192783020733, + "learning_rate": 9.16450714946214e-07, + "loss": 0.2687, + "step": 31030 + }, + { + "epoch": 1.4536468824659203, + "grad_norm": 0.6269375026601631, + "learning_rate": 9.163039666210274e-07, + "loss": 0.2756, + "step": 31031 + }, + { + "epoch": 1.4536937274558486, + "grad_norm": 0.596384452459604, + "learning_rate": 9.161572274096125e-07, + "loss": 0.2675, + "step": 31032 + }, + { + "epoch": 1.4537405724457768, + "grad_norm": 0.6087708849266394, + "learning_rate": 9.160104973128147e-07, + "loss": 0.2822, + "step": 31033 + }, + { + "epoch": 1.4537874174357053, + "grad_norm": 0.5963001288780652, + "learning_rate": 9.158637763314784e-07, + "loss": 0.2574, + "step": 31034 + }, + { + "epoch": 1.4538342624256335, + "grad_norm": 0.6013524262261766, + "learning_rate": 9.157170644664482e-07, + "loss": 0.2573, + "step": 31035 + }, + { + "epoch": 1.453881107415562, + "grad_norm": 0.6091018739478802, + "learning_rate": 9.155703617185677e-07, + "loss": 0.2804, + "step": 31036 + }, + { + "epoch": 1.4539279524054902, + "grad_norm": 0.5578002696654516, + "learning_rate": 9.154236680886825e-07, + "loss": 0.2635, + "step": 31037 + }, + { + "epoch": 1.4539747973954187, + "grad_norm": 0.5437772338307779, + "learning_rate": 9.152769835776357e-07, + "loss": 0.2488, + "step": 31038 + }, + { + "epoch": 1.454021642385347, + "grad_norm": 0.578892822861993, + "learning_rate": 9.151303081862709e-07, + "loss": 0.2713, + "step": 31039 + }, + { + "epoch": 1.4540684873752752, + "grad_norm": 0.5798489631097832, + "learning_rate": 9.149836419154328e-07, + "loss": 0.2714, + "step": 31040 + }, + { + "epoch": 1.4541153323652036, + "grad_norm": 0.6043492321270953, + "learning_rate": 9.148369847659655e-07, + "loss": 0.2569, + "step": 31041 + }, + { + "epoch": 1.4541621773551319, + "grad_norm": 0.5694761480250262, + "learning_rate": 9.146903367387128e-07, + "loss": 0.2638, + "step": 31042 + }, + { + "epoch": 1.45420902234506, + "grad_norm": 0.6296168508285798, + "learning_rate": 9.145436978345196e-07, + "loss": 0.2871, + "step": 31043 + }, + { + "epoch": 1.4542558673349886, + "grad_norm": 0.6228245745249935, + "learning_rate": 9.143970680542283e-07, + "loss": 0.2842, + "step": 31044 + }, + { + "epoch": 1.4543027123249168, + "grad_norm": 0.600386146997337, + "learning_rate": 9.142504473986841e-07, + "loss": 0.2733, + "step": 31045 + }, + { + "epoch": 1.454349557314845, + "grad_norm": 0.5602633237670874, + "learning_rate": 9.141038358687291e-07, + "loss": 0.2597, + "step": 31046 + }, + { + "epoch": 1.4543964023047735, + "grad_norm": 0.5976800860444256, + "learning_rate": 9.139572334652078e-07, + "loss": 0.2675, + "step": 31047 + }, + { + "epoch": 1.4544432472947018, + "grad_norm": 0.6174652432681724, + "learning_rate": 9.138106401889644e-07, + "loss": 0.295, + "step": 31048 + }, + { + "epoch": 1.4544900922846302, + "grad_norm": 0.6179595374309826, + "learning_rate": 9.136640560408424e-07, + "loss": 0.2888, + "step": 31049 + }, + { + "epoch": 1.4545369372745585, + "grad_norm": 0.5505630203051327, + "learning_rate": 9.135174810216846e-07, + "loss": 0.2564, + "step": 31050 + }, + { + "epoch": 1.454583782264487, + "grad_norm": 0.5995262051027769, + "learning_rate": 9.133709151323347e-07, + "loss": 0.2623, + "step": 31051 + }, + { + "epoch": 1.4546306272544152, + "grad_norm": 0.5959904020583799, + "learning_rate": 9.132243583736375e-07, + "loss": 0.2745, + "step": 31052 + }, + { + "epoch": 1.4546774722443434, + "grad_norm": 0.5728576106674275, + "learning_rate": 9.130778107464342e-07, + "loss": 0.2711, + "step": 31053 + }, + { + "epoch": 1.4547243172342719, + "grad_norm": 0.6221450330433285, + "learning_rate": 9.129312722515693e-07, + "loss": 0.2813, + "step": 31054 + }, + { + "epoch": 1.4547711622242, + "grad_norm": 0.5776011605729154, + "learning_rate": 9.12784742889887e-07, + "loss": 0.2647, + "step": 31055 + }, + { + "epoch": 1.4548180072141284, + "grad_norm": 0.5776375439765256, + "learning_rate": 9.126382226622288e-07, + "loss": 0.2727, + "step": 31056 + }, + { + "epoch": 1.4548648522040568, + "grad_norm": 0.5529308689913549, + "learning_rate": 9.124917115694385e-07, + "loss": 0.2748, + "step": 31057 + }, + { + "epoch": 1.454911697193985, + "grad_norm": 0.554145463927487, + "learning_rate": 9.123452096123592e-07, + "loss": 0.259, + "step": 31058 + }, + { + "epoch": 1.4549585421839133, + "grad_norm": 0.5817523733376632, + "learning_rate": 9.121987167918353e-07, + "loss": 0.2732, + "step": 31059 + }, + { + "epoch": 1.4550053871738418, + "grad_norm": 0.5883635439829615, + "learning_rate": 9.120522331087079e-07, + "loss": 0.2709, + "step": 31060 + }, + { + "epoch": 1.4550522321637702, + "grad_norm": 0.6070372356336926, + "learning_rate": 9.119057585638214e-07, + "loss": 0.2649, + "step": 31061 + }, + { + "epoch": 1.4550990771536985, + "grad_norm": 0.6018367981547411, + "learning_rate": 9.117592931580174e-07, + "loss": 0.2728, + "step": 31062 + }, + { + "epoch": 1.4551459221436267, + "grad_norm": 0.5851339074784552, + "learning_rate": 9.116128368921395e-07, + "loss": 0.2801, + "step": 31063 + }, + { + "epoch": 1.4551927671335552, + "grad_norm": 0.5358471215503967, + "learning_rate": 9.114663897670303e-07, + "loss": 0.2481, + "step": 31064 + }, + { + "epoch": 1.4552396121234834, + "grad_norm": 0.6147484218455589, + "learning_rate": 9.113199517835339e-07, + "loss": 0.2562, + "step": 31065 + }, + { + "epoch": 1.4552864571134116, + "grad_norm": 0.5840574221758199, + "learning_rate": 9.111735229424909e-07, + "loss": 0.2787, + "step": 31066 + }, + { + "epoch": 1.45533330210334, + "grad_norm": 0.5930638385702337, + "learning_rate": 9.110271032447459e-07, + "loss": 0.2673, + "step": 31067 + }, + { + "epoch": 1.4553801470932684, + "grad_norm": 0.6255915001763863, + "learning_rate": 9.108806926911396e-07, + "loss": 0.2783, + "step": 31068 + }, + { + "epoch": 1.4554269920831966, + "grad_norm": 0.5801663652743844, + "learning_rate": 9.107342912825154e-07, + "loss": 0.2615, + "step": 31069 + }, + { + "epoch": 1.455473837073125, + "grad_norm": 0.6076894229349017, + "learning_rate": 9.105878990197159e-07, + "loss": 0.2711, + "step": 31070 + }, + { + "epoch": 1.4555206820630533, + "grad_norm": 0.6663740779625272, + "learning_rate": 9.104415159035837e-07, + "loss": 0.3002, + "step": 31071 + }, + { + "epoch": 1.4555675270529818, + "grad_norm": 0.6182401339196744, + "learning_rate": 9.10295141934962e-07, + "loss": 0.275, + "step": 31072 + }, + { + "epoch": 1.45561437204291, + "grad_norm": 0.5895501918308615, + "learning_rate": 9.10148777114692e-07, + "loss": 0.2687, + "step": 31073 + }, + { + "epoch": 1.4556612170328385, + "grad_norm": 0.6063553509573663, + "learning_rate": 9.100024214436154e-07, + "loss": 0.2786, + "step": 31074 + }, + { + "epoch": 1.4557080620227667, + "grad_norm": 0.6432886605455892, + "learning_rate": 9.098560749225751e-07, + "loss": 0.2979, + "step": 31075 + }, + { + "epoch": 1.455754907012695, + "grad_norm": 0.623628999583537, + "learning_rate": 9.097097375524136e-07, + "loss": 0.2788, + "step": 31076 + }, + { + "epoch": 1.4558017520026234, + "grad_norm": 0.6489670485694955, + "learning_rate": 9.095634093339728e-07, + "loss": 0.2995, + "step": 31077 + }, + { + "epoch": 1.4558485969925516, + "grad_norm": 0.6129972935476559, + "learning_rate": 9.094170902680949e-07, + "loss": 0.2771, + "step": 31078 + }, + { + "epoch": 1.4558954419824799, + "grad_norm": 0.6239946506119225, + "learning_rate": 9.092707803556228e-07, + "loss": 0.2744, + "step": 31079 + }, + { + "epoch": 1.4559422869724084, + "grad_norm": 0.6233205341220283, + "learning_rate": 9.091244795973975e-07, + "loss": 0.2739, + "step": 31080 + }, + { + "epoch": 1.4559891319623366, + "grad_norm": 0.6076429127854511, + "learning_rate": 9.089781879942599e-07, + "loss": 0.2826, + "step": 31081 + }, + { + "epoch": 1.4560359769522648, + "grad_norm": 0.5647750422365218, + "learning_rate": 9.088319055470532e-07, + "loss": 0.2791, + "step": 31082 + }, + { + "epoch": 1.4560828219421933, + "grad_norm": 0.5719927038238829, + "learning_rate": 9.086856322566187e-07, + "loss": 0.2677, + "step": 31083 + }, + { + "epoch": 1.4561296669321215, + "grad_norm": 0.6124435531663834, + "learning_rate": 9.085393681237986e-07, + "loss": 0.2639, + "step": 31084 + }, + { + "epoch": 1.45617651192205, + "grad_norm": 0.5911836836780021, + "learning_rate": 9.083931131494353e-07, + "loss": 0.2785, + "step": 31085 + }, + { + "epoch": 1.4562233569119782, + "grad_norm": 0.6445295222814318, + "learning_rate": 9.082468673343686e-07, + "loss": 0.2792, + "step": 31086 + }, + { + "epoch": 1.4562702019019067, + "grad_norm": 0.5917069620713269, + "learning_rate": 9.081006306794421e-07, + "loss": 0.2772, + "step": 31087 + }, + { + "epoch": 1.456317046891835, + "grad_norm": 0.5690309586540072, + "learning_rate": 9.079544031854954e-07, + "loss": 0.2656, + "step": 31088 + }, + { + "epoch": 1.4563638918817632, + "grad_norm": 0.5958573519701633, + "learning_rate": 9.078081848533709e-07, + "loss": 0.2849, + "step": 31089 + }, + { + "epoch": 1.4564107368716916, + "grad_norm": 0.6200191476032771, + "learning_rate": 9.076619756839103e-07, + "loss": 0.2925, + "step": 31090 + }, + { + "epoch": 1.4564575818616199, + "grad_norm": 0.6348103244063106, + "learning_rate": 9.075157756779557e-07, + "loss": 0.294, + "step": 31091 + }, + { + "epoch": 1.4565044268515481, + "grad_norm": 0.6111431477282403, + "learning_rate": 9.073695848363468e-07, + "loss": 0.2785, + "step": 31092 + }, + { + "epoch": 1.4565512718414766, + "grad_norm": 0.618060112272022, + "learning_rate": 9.072234031599253e-07, + "loss": 0.2859, + "step": 31093 + }, + { + "epoch": 1.4565981168314048, + "grad_norm": 0.5948614974590676, + "learning_rate": 9.070772306495337e-07, + "loss": 0.2661, + "step": 31094 + }, + { + "epoch": 1.456644961821333, + "grad_norm": 0.560083886927611, + "learning_rate": 9.069310673060117e-07, + "loss": 0.2662, + "step": 31095 + }, + { + "epoch": 1.4566918068112615, + "grad_norm": 0.6107418565168027, + "learning_rate": 9.067849131302007e-07, + "loss": 0.2815, + "step": 31096 + }, + { + "epoch": 1.45673865180119, + "grad_norm": 0.6049866258422817, + "learning_rate": 9.066387681229421e-07, + "loss": 0.2709, + "step": 31097 + }, + { + "epoch": 1.4567854967911182, + "grad_norm": 0.6129789793192412, + "learning_rate": 9.064926322850781e-07, + "loss": 0.2807, + "step": 31098 + }, + { + "epoch": 1.4568323417810465, + "grad_norm": 0.616489939658819, + "learning_rate": 9.063465056174472e-07, + "loss": 0.2728, + "step": 31099 + }, + { + "epoch": 1.456879186770975, + "grad_norm": 0.6059406905261794, + "learning_rate": 9.06200388120892e-07, + "loss": 0.2683, + "step": 31100 + }, + { + "epoch": 1.4569260317609032, + "grad_norm": 0.6129510043706216, + "learning_rate": 9.060542797962535e-07, + "loss": 0.2759, + "step": 31101 + }, + { + "epoch": 1.4569728767508314, + "grad_norm": 0.6237030633296808, + "learning_rate": 9.059081806443712e-07, + "loss": 0.2836, + "step": 31102 + }, + { + "epoch": 1.4570197217407599, + "grad_norm": 0.6230652442567821, + "learning_rate": 9.057620906660869e-07, + "loss": 0.2643, + "step": 31103 + }, + { + "epoch": 1.4570665667306881, + "grad_norm": 0.6131846182713635, + "learning_rate": 9.056160098622419e-07, + "loss": 0.2723, + "step": 31104 + }, + { + "epoch": 1.4571134117206164, + "grad_norm": 0.5877735904611695, + "learning_rate": 9.054699382336749e-07, + "loss": 0.2726, + "step": 31105 + }, + { + "epoch": 1.4571602567105448, + "grad_norm": 0.5989046553230714, + "learning_rate": 9.053238757812277e-07, + "loss": 0.2622, + "step": 31106 + }, + { + "epoch": 1.457207101700473, + "grad_norm": 0.5620868159411119, + "learning_rate": 9.051778225057417e-07, + "loss": 0.2641, + "step": 31107 + }, + { + "epoch": 1.4572539466904015, + "grad_norm": 0.6204543679169567, + "learning_rate": 9.050317784080559e-07, + "loss": 0.271, + "step": 31108 + }, + { + "epoch": 1.4573007916803298, + "grad_norm": 0.5524897265418472, + "learning_rate": 9.04885743489011e-07, + "loss": 0.262, + "step": 31109 + }, + { + "epoch": 1.4573476366702582, + "grad_norm": 0.6105629662344627, + "learning_rate": 9.047397177494485e-07, + "loss": 0.2808, + "step": 31110 + }, + { + "epoch": 1.4573944816601865, + "grad_norm": 0.6467013519872477, + "learning_rate": 9.045937011902076e-07, + "loss": 0.2811, + "step": 31111 + }, + { + "epoch": 1.4574413266501147, + "grad_norm": 0.6504514050549302, + "learning_rate": 9.044476938121286e-07, + "loss": 0.2809, + "step": 31112 + }, + { + "epoch": 1.4574881716400432, + "grad_norm": 0.6203470452989739, + "learning_rate": 9.043016956160522e-07, + "loss": 0.2569, + "step": 31113 + }, + { + "epoch": 1.4575350166299714, + "grad_norm": 0.6159871883817976, + "learning_rate": 9.041557066028192e-07, + "loss": 0.2741, + "step": 31114 + }, + { + "epoch": 1.4575818616198997, + "grad_norm": 0.6490582215427804, + "learning_rate": 9.040097267732681e-07, + "loss": 0.2701, + "step": 31115 + }, + { + "epoch": 1.4576287066098281, + "grad_norm": 0.58445438162486, + "learning_rate": 9.03863756128241e-07, + "loss": 0.2727, + "step": 31116 + }, + { + "epoch": 1.4576755515997564, + "grad_norm": 0.5816296810533771, + "learning_rate": 9.037177946685757e-07, + "loss": 0.2796, + "step": 31117 + }, + { + "epoch": 1.4577223965896846, + "grad_norm": 0.6024418493502226, + "learning_rate": 9.035718423951134e-07, + "loss": 0.2855, + "step": 31118 + }, + { + "epoch": 1.457769241579613, + "grad_norm": 0.5910210480500654, + "learning_rate": 9.034258993086939e-07, + "loss": 0.2646, + "step": 31119 + }, + { + "epoch": 1.4578160865695413, + "grad_norm": 0.6179739302426759, + "learning_rate": 9.03279965410157e-07, + "loss": 0.2774, + "step": 31120 + }, + { + "epoch": 1.4578629315594698, + "grad_norm": 0.582613291332463, + "learning_rate": 9.031340407003436e-07, + "loss": 0.2598, + "step": 31121 + }, + { + "epoch": 1.457909776549398, + "grad_norm": 0.5844250750489971, + "learning_rate": 9.029881251800923e-07, + "loss": 0.2697, + "step": 31122 + }, + { + "epoch": 1.4579566215393265, + "grad_norm": 0.619489201027917, + "learning_rate": 9.028422188502423e-07, + "loss": 0.2753, + "step": 31123 + }, + { + "epoch": 1.4580034665292547, + "grad_norm": 0.6358445461340383, + "learning_rate": 9.026963217116338e-07, + "loss": 0.2691, + "step": 31124 + }, + { + "epoch": 1.458050311519183, + "grad_norm": 0.5926383784955178, + "learning_rate": 9.025504337651065e-07, + "loss": 0.2656, + "step": 31125 + }, + { + "epoch": 1.4580971565091114, + "grad_norm": 0.6319518444257072, + "learning_rate": 9.024045550114999e-07, + "loss": 0.2871, + "step": 31126 + }, + { + "epoch": 1.4581440014990397, + "grad_norm": 0.5864369789141672, + "learning_rate": 9.022586854516535e-07, + "loss": 0.2776, + "step": 31127 + }, + { + "epoch": 1.458190846488968, + "grad_norm": 0.5942468623117363, + "learning_rate": 9.021128250864075e-07, + "loss": 0.2815, + "step": 31128 + }, + { + "epoch": 1.4582376914788964, + "grad_norm": 0.6277617177764181, + "learning_rate": 9.01966973916601e-07, + "loss": 0.2939, + "step": 31129 + }, + { + "epoch": 1.4582845364688246, + "grad_norm": 0.5940773428823781, + "learning_rate": 9.018211319430716e-07, + "loss": 0.2897, + "step": 31130 + }, + { + "epoch": 1.4583313814587529, + "grad_norm": 0.6290473055386601, + "learning_rate": 9.016752991666602e-07, + "loss": 0.285, + "step": 31131 + }, + { + "epoch": 1.4583782264486813, + "grad_norm": 0.6115371629009017, + "learning_rate": 9.015294755882057e-07, + "loss": 0.2772, + "step": 31132 + }, + { + "epoch": 1.4584250714386098, + "grad_norm": 0.6505019488872443, + "learning_rate": 9.013836612085472e-07, + "loss": 0.2797, + "step": 31133 + }, + { + "epoch": 1.458471916428538, + "grad_norm": 0.5943538885133998, + "learning_rate": 9.012378560285248e-07, + "loss": 0.2674, + "step": 31134 + }, + { + "epoch": 1.4585187614184663, + "grad_norm": 0.6157252238676042, + "learning_rate": 9.010920600489759e-07, + "loss": 0.2643, + "step": 31135 + }, + { + "epoch": 1.4585656064083947, + "grad_norm": 0.6318569935126129, + "learning_rate": 9.009462732707411e-07, + "loss": 0.2836, + "step": 31136 + }, + { + "epoch": 1.458612451398323, + "grad_norm": 0.6168218492644938, + "learning_rate": 9.008004956946579e-07, + "loss": 0.2859, + "step": 31137 + }, + { + "epoch": 1.4586592963882512, + "grad_norm": 0.602857387635998, + "learning_rate": 9.006547273215657e-07, + "loss": 0.2663, + "step": 31138 + }, + { + "epoch": 1.4587061413781797, + "grad_norm": 0.5922788049394755, + "learning_rate": 9.005089681523039e-07, + "loss": 0.2695, + "step": 31139 + }, + { + "epoch": 1.458752986368108, + "grad_norm": 0.6094111177754357, + "learning_rate": 9.003632181877115e-07, + "loss": 0.2671, + "step": 31140 + }, + { + "epoch": 1.4587998313580361, + "grad_norm": 0.5829746951264164, + "learning_rate": 9.002174774286259e-07, + "loss": 0.2594, + "step": 31141 + }, + { + "epoch": 1.4588466763479646, + "grad_norm": 0.5979898974951882, + "learning_rate": 9.000717458758868e-07, + "loss": 0.2552, + "step": 31142 + }, + { + "epoch": 1.4588935213378929, + "grad_norm": 0.5507702357544956, + "learning_rate": 8.999260235303334e-07, + "loss": 0.2478, + "step": 31143 + }, + { + "epoch": 1.4589403663278213, + "grad_norm": 0.5798257463103965, + "learning_rate": 8.997803103928026e-07, + "loss": 0.2673, + "step": 31144 + }, + { + "epoch": 1.4589872113177496, + "grad_norm": 0.5993724418894308, + "learning_rate": 8.996346064641342e-07, + "loss": 0.2678, + "step": 31145 + }, + { + "epoch": 1.459034056307678, + "grad_norm": 0.6478764421441275, + "learning_rate": 8.994889117451672e-07, + "loss": 0.2724, + "step": 31146 + }, + { + "epoch": 1.4590809012976063, + "grad_norm": 0.5744293727916392, + "learning_rate": 8.993432262367383e-07, + "loss": 0.2719, + "step": 31147 + }, + { + "epoch": 1.4591277462875345, + "grad_norm": 0.63327001138489, + "learning_rate": 8.991975499396868e-07, + "loss": 0.2865, + "step": 31148 + }, + { + "epoch": 1.459174591277463, + "grad_norm": 0.5737259758725641, + "learning_rate": 8.990518828548511e-07, + "loss": 0.2585, + "step": 31149 + }, + { + "epoch": 1.4592214362673912, + "grad_norm": 0.6104123935660536, + "learning_rate": 8.989062249830702e-07, + "loss": 0.2788, + "step": 31150 + }, + { + "epoch": 1.4592682812573194, + "grad_norm": 0.6225204767926609, + "learning_rate": 8.987605763251808e-07, + "loss": 0.2812, + "step": 31151 + }, + { + "epoch": 1.459315126247248, + "grad_norm": 0.5653110950097312, + "learning_rate": 8.986149368820226e-07, + "loss": 0.259, + "step": 31152 + }, + { + "epoch": 1.4593619712371761, + "grad_norm": 0.6378965432220931, + "learning_rate": 8.984693066544319e-07, + "loss": 0.2807, + "step": 31153 + }, + { + "epoch": 1.4594088162271044, + "grad_norm": 0.5701865085065021, + "learning_rate": 8.983236856432483e-07, + "loss": 0.2696, + "step": 31154 + }, + { + "epoch": 1.4594556612170329, + "grad_norm": 0.6379441782687884, + "learning_rate": 8.98178073849309e-07, + "loss": 0.2902, + "step": 31155 + }, + { + "epoch": 1.459502506206961, + "grad_norm": 0.6400900912907731, + "learning_rate": 8.980324712734531e-07, + "loss": 0.2833, + "step": 31156 + }, + { + "epoch": 1.4595493511968896, + "grad_norm": 0.5514255571266551, + "learning_rate": 8.97886877916517e-07, + "loss": 0.2498, + "step": 31157 + }, + { + "epoch": 1.4595961961868178, + "grad_norm": 0.5867757008069505, + "learning_rate": 8.977412937793401e-07, + "loss": 0.2504, + "step": 31158 + }, + { + "epoch": 1.4596430411767463, + "grad_norm": 0.5592149882602777, + "learning_rate": 8.975957188627585e-07, + "loss": 0.2684, + "step": 31159 + }, + { + "epoch": 1.4596898861666745, + "grad_norm": 0.6235293931665258, + "learning_rate": 8.974501531676106e-07, + "loss": 0.2808, + "step": 31160 + }, + { + "epoch": 1.4597367311566027, + "grad_norm": 0.5398618385337233, + "learning_rate": 8.973045966947344e-07, + "loss": 0.2441, + "step": 31161 + }, + { + "epoch": 1.4597835761465312, + "grad_norm": 0.6116520467056332, + "learning_rate": 8.971590494449675e-07, + "loss": 0.265, + "step": 31162 + }, + { + "epoch": 1.4598304211364594, + "grad_norm": 0.6156805208397904, + "learning_rate": 8.97013511419148e-07, + "loss": 0.2533, + "step": 31163 + }, + { + "epoch": 1.4598772661263877, + "grad_norm": 0.6107799150532076, + "learning_rate": 8.968679826181123e-07, + "loss": 0.2808, + "step": 31164 + }, + { + "epoch": 1.4599241111163161, + "grad_norm": 0.5820891262251267, + "learning_rate": 8.96722463042699e-07, + "loss": 0.2699, + "step": 31165 + }, + { + "epoch": 1.4599709561062444, + "grad_norm": 0.6169808377692028, + "learning_rate": 8.965769526937446e-07, + "loss": 0.2821, + "step": 31166 + }, + { + "epoch": 1.4600178010961726, + "grad_norm": 0.5991817158757031, + "learning_rate": 8.964314515720862e-07, + "loss": 0.2806, + "step": 31167 + }, + { + "epoch": 1.460064646086101, + "grad_norm": 0.5865529066172068, + "learning_rate": 8.962859596785622e-07, + "loss": 0.261, + "step": 31168 + }, + { + "epoch": 1.4601114910760296, + "grad_norm": 0.5685668825340089, + "learning_rate": 8.961404770140091e-07, + "loss": 0.2733, + "step": 31169 + }, + { + "epoch": 1.4601583360659578, + "grad_norm": 0.5933399812908647, + "learning_rate": 8.959950035792656e-07, + "loss": 0.2806, + "step": 31170 + }, + { + "epoch": 1.460205181055886, + "grad_norm": 0.5875266588845585, + "learning_rate": 8.958495393751676e-07, + "loss": 0.277, + "step": 31171 + }, + { + "epoch": 1.4602520260458145, + "grad_norm": 0.6489797888554435, + "learning_rate": 8.957040844025514e-07, + "loss": 0.2859, + "step": 31172 + }, + { + "epoch": 1.4602988710357427, + "grad_norm": 0.5806213628400767, + "learning_rate": 8.95558638662255e-07, + "loss": 0.2598, + "step": 31173 + }, + { + "epoch": 1.460345716025671, + "grad_norm": 0.5821439838308697, + "learning_rate": 8.954132021551154e-07, + "loss": 0.2661, + "step": 31174 + }, + { + "epoch": 1.4603925610155994, + "grad_norm": 0.5827474556242387, + "learning_rate": 8.952677748819694e-07, + "loss": 0.2676, + "step": 31175 + }, + { + "epoch": 1.4604394060055277, + "grad_norm": 0.5747242600031567, + "learning_rate": 8.951223568436542e-07, + "loss": 0.2596, + "step": 31176 + }, + { + "epoch": 1.460486250995456, + "grad_norm": 0.5787951498817869, + "learning_rate": 8.949769480410072e-07, + "loss": 0.2637, + "step": 31177 + }, + { + "epoch": 1.4605330959853844, + "grad_norm": 0.610015953954258, + "learning_rate": 8.948315484748645e-07, + "loss": 0.2723, + "step": 31178 + }, + { + "epoch": 1.4605799409753126, + "grad_norm": 0.5512857822728576, + "learning_rate": 8.94686158146062e-07, + "loss": 0.2516, + "step": 31179 + }, + { + "epoch": 1.460626785965241, + "grad_norm": 0.6202614710822962, + "learning_rate": 8.945407770554371e-07, + "loss": 0.2716, + "step": 31180 + }, + { + "epoch": 1.4606736309551693, + "grad_norm": 0.6221365559141558, + "learning_rate": 8.943954052038264e-07, + "loss": 0.3061, + "step": 31181 + }, + { + "epoch": 1.4607204759450978, + "grad_norm": 0.6049178598271646, + "learning_rate": 8.942500425920669e-07, + "loss": 0.2924, + "step": 31182 + }, + { + "epoch": 1.460767320935026, + "grad_norm": 0.5889787802523437, + "learning_rate": 8.941046892209954e-07, + "loss": 0.2538, + "step": 31183 + }, + { + "epoch": 1.4608141659249543, + "grad_norm": 0.6120203793010343, + "learning_rate": 8.939593450914469e-07, + "loss": 0.2833, + "step": 31184 + }, + { + "epoch": 1.4608610109148827, + "grad_norm": 0.5637515802327562, + "learning_rate": 8.938140102042597e-07, + "loss": 0.2593, + "step": 31185 + }, + { + "epoch": 1.460907855904811, + "grad_norm": 0.6179498598983172, + "learning_rate": 8.936686845602683e-07, + "loss": 0.283, + "step": 31186 + }, + { + "epoch": 1.4609547008947392, + "grad_norm": 0.6000832051810866, + "learning_rate": 8.9352336816031e-07, + "loss": 0.2709, + "step": 31187 + }, + { + "epoch": 1.4610015458846677, + "grad_norm": 0.5997338255691105, + "learning_rate": 8.933780610052209e-07, + "loss": 0.2619, + "step": 31188 + }, + { + "epoch": 1.461048390874596, + "grad_norm": 0.594122995948063, + "learning_rate": 8.932327630958379e-07, + "loss": 0.2695, + "step": 31189 + }, + { + "epoch": 1.4610952358645242, + "grad_norm": 0.6092255649066254, + "learning_rate": 8.930874744329957e-07, + "loss": 0.2763, + "step": 31190 + }, + { + "epoch": 1.4611420808544526, + "grad_norm": 0.6369469022432476, + "learning_rate": 8.929421950175313e-07, + "loss": 0.2779, + "step": 31191 + }, + { + "epoch": 1.4611889258443809, + "grad_norm": 0.6138800400708562, + "learning_rate": 8.927969248502815e-07, + "loss": 0.2673, + "step": 31192 + }, + { + "epoch": 1.4612357708343093, + "grad_norm": 0.530847395688859, + "learning_rate": 8.926516639320806e-07, + "loss": 0.2409, + "step": 31193 + }, + { + "epoch": 1.4612826158242376, + "grad_norm": 0.6632626216928409, + "learning_rate": 8.925064122637653e-07, + "loss": 0.2834, + "step": 31194 + }, + { + "epoch": 1.461329460814166, + "grad_norm": 0.6162730317723087, + "learning_rate": 8.923611698461726e-07, + "loss": 0.2866, + "step": 31195 + }, + { + "epoch": 1.4613763058040943, + "grad_norm": 0.6118822210653265, + "learning_rate": 8.922159366801361e-07, + "loss": 0.2732, + "step": 31196 + }, + { + "epoch": 1.4614231507940225, + "grad_norm": 0.6044293252891871, + "learning_rate": 8.920707127664932e-07, + "loss": 0.2812, + "step": 31197 + }, + { + "epoch": 1.461469995783951, + "grad_norm": 0.6289861170409735, + "learning_rate": 8.919254981060791e-07, + "loss": 0.2846, + "step": 31198 + }, + { + "epoch": 1.4615168407738792, + "grad_norm": 0.607797114449916, + "learning_rate": 8.917802926997302e-07, + "loss": 0.2795, + "step": 31199 + }, + { + "epoch": 1.4615636857638075, + "grad_norm": 0.6051443127599869, + "learning_rate": 8.916350965482809e-07, + "loss": 0.2842, + "step": 31200 + }, + { + "epoch": 1.461610530753736, + "grad_norm": 0.5580826320710381, + "learning_rate": 8.914899096525681e-07, + "loss": 0.263, + "step": 31201 + }, + { + "epoch": 1.4616573757436642, + "grad_norm": 0.5903534792197014, + "learning_rate": 8.913447320134256e-07, + "loss": 0.2589, + "step": 31202 + }, + { + "epoch": 1.4617042207335924, + "grad_norm": 0.5927130686603822, + "learning_rate": 8.9119956363169e-07, + "loss": 0.2705, + "step": 31203 + }, + { + "epoch": 1.4617510657235209, + "grad_norm": 0.5701327341336201, + "learning_rate": 8.910544045081967e-07, + "loss": 0.2622, + "step": 31204 + }, + { + "epoch": 1.4617979107134493, + "grad_norm": 0.6649539885393091, + "learning_rate": 8.909092546437814e-07, + "loss": 0.2911, + "step": 31205 + }, + { + "epoch": 1.4618447557033776, + "grad_norm": 0.5398998882356774, + "learning_rate": 8.907641140392784e-07, + "loss": 0.2614, + "step": 31206 + }, + { + "epoch": 1.4618916006933058, + "grad_norm": 0.5769434654819803, + "learning_rate": 8.906189826955242e-07, + "loss": 0.2624, + "step": 31207 + }, + { + "epoch": 1.4619384456832343, + "grad_norm": 0.6112503301927757, + "learning_rate": 8.904738606133523e-07, + "loss": 0.2674, + "step": 31208 + }, + { + "epoch": 1.4619852906731625, + "grad_norm": 0.5601136518568797, + "learning_rate": 8.90328747793599e-07, + "loss": 0.269, + "step": 31209 + }, + { + "epoch": 1.4620321356630908, + "grad_norm": 0.6020459125013363, + "learning_rate": 8.90183644237099e-07, + "loss": 0.2749, + "step": 31210 + }, + { + "epoch": 1.4620789806530192, + "grad_norm": 0.5404410182638089, + "learning_rate": 8.900385499446876e-07, + "loss": 0.2612, + "step": 31211 + }, + { + "epoch": 1.4621258256429475, + "grad_norm": 0.5931759786961351, + "learning_rate": 8.898934649172006e-07, + "loss": 0.2755, + "step": 31212 + }, + { + "epoch": 1.4621726706328757, + "grad_norm": 0.6175871640114714, + "learning_rate": 8.897483891554721e-07, + "loss": 0.3046, + "step": 31213 + }, + { + "epoch": 1.4622195156228042, + "grad_norm": 0.6116897831759408, + "learning_rate": 8.896033226603357e-07, + "loss": 0.2827, + "step": 31214 + }, + { + "epoch": 1.4622663606127324, + "grad_norm": 0.635631366907448, + "learning_rate": 8.894582654326275e-07, + "loss": 0.2906, + "step": 31215 + }, + { + "epoch": 1.4623132056026609, + "grad_norm": 0.593016957042912, + "learning_rate": 8.893132174731826e-07, + "loss": 0.278, + "step": 31216 + }, + { + "epoch": 1.4623600505925891, + "grad_norm": 0.6012774610083415, + "learning_rate": 8.891681787828349e-07, + "loss": 0.2721, + "step": 31217 + }, + { + "epoch": 1.4624068955825176, + "grad_norm": 0.592834963923453, + "learning_rate": 8.890231493624197e-07, + "loss": 0.2611, + "step": 31218 + }, + { + "epoch": 1.4624537405724458, + "grad_norm": 0.5862154407177528, + "learning_rate": 8.888781292127719e-07, + "loss": 0.2695, + "step": 31219 + }, + { + "epoch": 1.462500585562374, + "grad_norm": 0.6049098357649758, + "learning_rate": 8.887331183347256e-07, + "loss": 0.2702, + "step": 31220 + }, + { + "epoch": 1.4625474305523025, + "grad_norm": 0.5786009423898173, + "learning_rate": 8.885881167291147e-07, + "loss": 0.2653, + "step": 31221 + }, + { + "epoch": 1.4625942755422308, + "grad_norm": 0.6073938049040206, + "learning_rate": 8.884431243967739e-07, + "loss": 0.2785, + "step": 31222 + }, + { + "epoch": 1.462641120532159, + "grad_norm": 0.6292880155910341, + "learning_rate": 8.882981413385381e-07, + "loss": 0.2754, + "step": 31223 + }, + { + "epoch": 1.4626879655220875, + "grad_norm": 0.596508566702763, + "learning_rate": 8.88153167555241e-07, + "loss": 0.2506, + "step": 31224 + }, + { + "epoch": 1.4627348105120157, + "grad_norm": 0.5787010623497936, + "learning_rate": 8.880082030477186e-07, + "loss": 0.2645, + "step": 31225 + }, + { + "epoch": 1.462781655501944, + "grad_norm": 0.5822756167476985, + "learning_rate": 8.878632478168025e-07, + "loss": 0.2749, + "step": 31226 + }, + { + "epoch": 1.4628285004918724, + "grad_norm": 0.6491254870569622, + "learning_rate": 8.877183018633295e-07, + "loss": 0.2796, + "step": 31227 + }, + { + "epoch": 1.4628753454818006, + "grad_norm": 0.5620565609778215, + "learning_rate": 8.875733651881313e-07, + "loss": 0.2659, + "step": 31228 + }, + { + "epoch": 1.4629221904717291, + "grad_norm": 0.5858910192528666, + "learning_rate": 8.874284377920434e-07, + "loss": 0.2683, + "step": 31229 + }, + { + "epoch": 1.4629690354616574, + "grad_norm": 0.5707343062956428, + "learning_rate": 8.872835196758995e-07, + "loss": 0.2645, + "step": 31230 + }, + { + "epoch": 1.4630158804515858, + "grad_norm": 0.5557055842531198, + "learning_rate": 8.871386108405341e-07, + "loss": 0.258, + "step": 31231 + }, + { + "epoch": 1.463062725441514, + "grad_norm": 0.5876277954329382, + "learning_rate": 8.869937112867801e-07, + "loss": 0.2724, + "step": 31232 + }, + { + "epoch": 1.4631095704314423, + "grad_norm": 0.6090107114385448, + "learning_rate": 8.868488210154719e-07, + "loss": 0.2692, + "step": 31233 + }, + { + "epoch": 1.4631564154213708, + "grad_norm": 0.57721884965505, + "learning_rate": 8.867039400274438e-07, + "loss": 0.2705, + "step": 31234 + }, + { + "epoch": 1.463203260411299, + "grad_norm": 0.6233715250765481, + "learning_rate": 8.865590683235287e-07, + "loss": 0.2822, + "step": 31235 + }, + { + "epoch": 1.4632501054012272, + "grad_norm": 0.5297944824614943, + "learning_rate": 8.864142059045602e-07, + "loss": 0.2461, + "step": 31236 + }, + { + "epoch": 1.4632969503911557, + "grad_norm": 0.624154826854641, + "learning_rate": 8.862693527713726e-07, + "loss": 0.2668, + "step": 31237 + }, + { + "epoch": 1.463343795381084, + "grad_norm": 0.5788014745189122, + "learning_rate": 8.861245089247999e-07, + "loss": 0.2828, + "step": 31238 + }, + { + "epoch": 1.4633906403710122, + "grad_norm": 0.5882417391025994, + "learning_rate": 8.859796743656745e-07, + "loss": 0.2734, + "step": 31239 + }, + { + "epoch": 1.4634374853609406, + "grad_norm": 0.6292330078594514, + "learning_rate": 8.858348490948302e-07, + "loss": 0.2949, + "step": 31240 + }, + { + "epoch": 1.4634843303508691, + "grad_norm": 0.5795896533803345, + "learning_rate": 8.856900331131016e-07, + "loss": 0.2718, + "step": 31241 + }, + { + "epoch": 1.4635311753407974, + "grad_norm": 0.6064559383489152, + "learning_rate": 8.855452264213202e-07, + "loss": 0.276, + "step": 31242 + }, + { + "epoch": 1.4635780203307256, + "grad_norm": 0.6025768354193395, + "learning_rate": 8.854004290203202e-07, + "loss": 0.2788, + "step": 31243 + }, + { + "epoch": 1.463624865320654, + "grad_norm": 0.584802217062749, + "learning_rate": 8.852556409109359e-07, + "loss": 0.2599, + "step": 31244 + }, + { + "epoch": 1.4636717103105823, + "grad_norm": 0.5782254716413422, + "learning_rate": 8.851108620939985e-07, + "loss": 0.2598, + "step": 31245 + }, + { + "epoch": 1.4637185553005105, + "grad_norm": 0.5863658077771369, + "learning_rate": 8.849660925703421e-07, + "loss": 0.2861, + "step": 31246 + }, + { + "epoch": 1.463765400290439, + "grad_norm": 0.6036276681598169, + "learning_rate": 8.848213323408011e-07, + "loss": 0.2736, + "step": 31247 + }, + { + "epoch": 1.4638122452803672, + "grad_norm": 0.5771794123366505, + "learning_rate": 8.846765814062063e-07, + "loss": 0.2813, + "step": 31248 + }, + { + "epoch": 1.4638590902702955, + "grad_norm": 0.5920431853773275, + "learning_rate": 8.845318397673919e-07, + "loss": 0.2696, + "step": 31249 + }, + { + "epoch": 1.463905935260224, + "grad_norm": 0.5869740084114536, + "learning_rate": 8.843871074251914e-07, + "loss": 0.26, + "step": 31250 + }, + { + "epoch": 1.4639527802501522, + "grad_norm": 0.6189310865884718, + "learning_rate": 8.842423843804365e-07, + "loss": 0.2779, + "step": 31251 + }, + { + "epoch": 1.4639996252400806, + "grad_norm": 0.577990621138144, + "learning_rate": 8.840976706339605e-07, + "loss": 0.2729, + "step": 31252 + }, + { + "epoch": 1.464046470230009, + "grad_norm": 0.5652227383543332, + "learning_rate": 8.839529661865958e-07, + "loss": 0.2712, + "step": 31253 + }, + { + "epoch": 1.4640933152199374, + "grad_norm": 0.5929395190931732, + "learning_rate": 8.838082710391771e-07, + "loss": 0.2647, + "step": 31254 + }, + { + "epoch": 1.4641401602098656, + "grad_norm": 0.6272955742086264, + "learning_rate": 8.836635851925343e-07, + "loss": 0.2776, + "step": 31255 + }, + { + "epoch": 1.4641870051997938, + "grad_norm": 0.5857661340577675, + "learning_rate": 8.835189086475023e-07, + "loss": 0.2637, + "step": 31256 + }, + { + "epoch": 1.4642338501897223, + "grad_norm": 0.604999408134777, + "learning_rate": 8.833742414049117e-07, + "loss": 0.292, + "step": 31257 + }, + { + "epoch": 1.4642806951796505, + "grad_norm": 0.576648210877621, + "learning_rate": 8.832295834655962e-07, + "loss": 0.2648, + "step": 31258 + }, + { + "epoch": 1.4643275401695788, + "grad_norm": 0.5625336904317032, + "learning_rate": 8.830849348303883e-07, + "loss": 0.2548, + "step": 31259 + }, + { + "epoch": 1.4643743851595072, + "grad_norm": 0.5762205479278849, + "learning_rate": 8.829402955001198e-07, + "loss": 0.2585, + "step": 31260 + }, + { + "epoch": 1.4644212301494355, + "grad_norm": 0.6133308618931963, + "learning_rate": 8.827956654756246e-07, + "loss": 0.2745, + "step": 31261 + }, + { + "epoch": 1.4644680751393637, + "grad_norm": 0.56797000879785, + "learning_rate": 8.826510447577336e-07, + "loss": 0.2674, + "step": 31262 + }, + { + "epoch": 1.4645149201292922, + "grad_norm": 0.5617795900654258, + "learning_rate": 8.825064333472785e-07, + "loss": 0.2531, + "step": 31263 + }, + { + "epoch": 1.4645617651192204, + "grad_norm": 0.6220171813977414, + "learning_rate": 8.823618312450926e-07, + "loss": 0.2694, + "step": 31264 + }, + { + "epoch": 1.464608610109149, + "grad_norm": 0.5861038486158546, + "learning_rate": 8.822172384520078e-07, + "loss": 0.2743, + "step": 31265 + }, + { + "epoch": 1.4646554550990771, + "grad_norm": 0.6052625364691225, + "learning_rate": 8.82072654968856e-07, + "loss": 0.2813, + "step": 31266 + }, + { + "epoch": 1.4647023000890056, + "grad_norm": 0.5652086415902097, + "learning_rate": 8.819280807964695e-07, + "loss": 0.2504, + "step": 31267 + }, + { + "epoch": 1.4647491450789338, + "grad_norm": 0.6153203818559456, + "learning_rate": 8.817835159356808e-07, + "loss": 0.2706, + "step": 31268 + }, + { + "epoch": 1.464795990068862, + "grad_norm": 0.6085096505690116, + "learning_rate": 8.816389603873215e-07, + "loss": 0.2761, + "step": 31269 + }, + { + "epoch": 1.4648428350587905, + "grad_norm": 0.5693219838062941, + "learning_rate": 8.814944141522222e-07, + "loss": 0.2748, + "step": 31270 + }, + { + "epoch": 1.4648896800487188, + "grad_norm": 0.5951816028685919, + "learning_rate": 8.81349877231216e-07, + "loss": 0.2565, + "step": 31271 + }, + { + "epoch": 1.464936525038647, + "grad_norm": 0.6676404607205695, + "learning_rate": 8.812053496251344e-07, + "loss": 0.2647, + "step": 31272 + }, + { + "epoch": 1.4649833700285755, + "grad_norm": 0.602057980882362, + "learning_rate": 8.810608313348091e-07, + "loss": 0.2733, + "step": 31273 + }, + { + "epoch": 1.4650302150185037, + "grad_norm": 0.5649985664100591, + "learning_rate": 8.809163223610723e-07, + "loss": 0.2554, + "step": 31274 + }, + { + "epoch": 1.465077060008432, + "grad_norm": 0.571921687088824, + "learning_rate": 8.807718227047546e-07, + "loss": 0.2674, + "step": 31275 + }, + { + "epoch": 1.4651239049983604, + "grad_norm": 0.5961722885154586, + "learning_rate": 8.80627332366689e-07, + "loss": 0.2751, + "step": 31276 + }, + { + "epoch": 1.465170749988289, + "grad_norm": 0.65096822653985, + "learning_rate": 8.804828513477051e-07, + "loss": 0.2618, + "step": 31277 + }, + { + "epoch": 1.4652175949782171, + "grad_norm": 0.6278797085125598, + "learning_rate": 8.803383796486353e-07, + "loss": 0.2913, + "step": 31278 + }, + { + "epoch": 1.4652644399681454, + "grad_norm": 0.5723297743116482, + "learning_rate": 8.801939172703111e-07, + "loss": 0.2773, + "step": 31279 + }, + { + "epoch": 1.4653112849580738, + "grad_norm": 0.5832401165037119, + "learning_rate": 8.800494642135649e-07, + "loss": 0.2732, + "step": 31280 + }, + { + "epoch": 1.465358129948002, + "grad_norm": 0.5681136798326336, + "learning_rate": 8.799050204792257e-07, + "loss": 0.266, + "step": 31281 + }, + { + "epoch": 1.4654049749379303, + "grad_norm": 0.5814372591275869, + "learning_rate": 8.79760586068126e-07, + "loss": 0.2745, + "step": 31282 + }, + { + "epoch": 1.4654518199278588, + "grad_norm": 0.5839486360792778, + "learning_rate": 8.796161609810977e-07, + "loss": 0.2804, + "step": 31283 + }, + { + "epoch": 1.465498664917787, + "grad_norm": 0.5757101930857058, + "learning_rate": 8.794717452189702e-07, + "loss": 0.2629, + "step": 31284 + }, + { + "epoch": 1.4655455099077153, + "grad_norm": 0.5651581058890823, + "learning_rate": 8.793273387825757e-07, + "loss": 0.2545, + "step": 31285 + }, + { + "epoch": 1.4655923548976437, + "grad_norm": 0.5569279715168746, + "learning_rate": 8.791829416727457e-07, + "loss": 0.2513, + "step": 31286 + }, + { + "epoch": 1.465639199887572, + "grad_norm": 0.5881117945685973, + "learning_rate": 8.790385538903098e-07, + "loss": 0.2635, + "step": 31287 + }, + { + "epoch": 1.4656860448775004, + "grad_norm": 0.601097339258184, + "learning_rate": 8.788941754360997e-07, + "loss": 0.2805, + "step": 31288 + }, + { + "epoch": 1.4657328898674287, + "grad_norm": 0.6033867340752007, + "learning_rate": 8.787498063109459e-07, + "loss": 0.266, + "step": 31289 + }, + { + "epoch": 1.4657797348573571, + "grad_norm": 0.6514115915360835, + "learning_rate": 8.786054465156804e-07, + "loss": 0.2785, + "step": 31290 + }, + { + "epoch": 1.4658265798472854, + "grad_norm": 0.6032473473038682, + "learning_rate": 8.784610960511323e-07, + "loss": 0.2754, + "step": 31291 + }, + { + "epoch": 1.4658734248372136, + "grad_norm": 0.6194151642636078, + "learning_rate": 8.783167549181338e-07, + "loss": 0.2833, + "step": 31292 + }, + { + "epoch": 1.465920269827142, + "grad_norm": 0.585614599361064, + "learning_rate": 8.781724231175137e-07, + "loss": 0.2785, + "step": 31293 + }, + { + "epoch": 1.4659671148170703, + "grad_norm": 0.6081789412868489, + "learning_rate": 8.78028100650104e-07, + "loss": 0.2745, + "step": 31294 + }, + { + "epoch": 1.4660139598069986, + "grad_norm": 0.580638002713417, + "learning_rate": 8.778837875167348e-07, + "loss": 0.2754, + "step": 31295 + }, + { + "epoch": 1.466060804796927, + "grad_norm": 0.5347049424873204, + "learning_rate": 8.777394837182376e-07, + "loss": 0.2488, + "step": 31296 + }, + { + "epoch": 1.4661076497868553, + "grad_norm": 0.5833775887715849, + "learning_rate": 8.77595189255441e-07, + "loss": 0.2832, + "step": 31297 + }, + { + "epoch": 1.4661544947767835, + "grad_norm": 0.6110191291893531, + "learning_rate": 8.774509041291774e-07, + "loss": 0.268, + "step": 31298 + }, + { + "epoch": 1.466201339766712, + "grad_norm": 0.5813166243896253, + "learning_rate": 8.773066283402748e-07, + "loss": 0.2675, + "step": 31299 + }, + { + "epoch": 1.4662481847566402, + "grad_norm": 0.5923692224506155, + "learning_rate": 8.77162361889565e-07, + "loss": 0.2729, + "step": 31300 + }, + { + "epoch": 1.4662950297465687, + "grad_norm": 0.5520161840667827, + "learning_rate": 8.770181047778778e-07, + "loss": 0.2844, + "step": 31301 + }, + { + "epoch": 1.466341874736497, + "grad_norm": 0.5628260466796918, + "learning_rate": 8.768738570060436e-07, + "loss": 0.2536, + "step": 31302 + }, + { + "epoch": 1.4663887197264254, + "grad_norm": 0.6068350861335721, + "learning_rate": 8.76729618574893e-07, + "loss": 0.2834, + "step": 31303 + }, + { + "epoch": 1.4664355647163536, + "grad_norm": 0.6142414840526071, + "learning_rate": 8.76585389485255e-07, + "loss": 0.2781, + "step": 31304 + }, + { + "epoch": 1.4664824097062819, + "grad_norm": 0.5743072910348933, + "learning_rate": 8.764411697379604e-07, + "loss": 0.2666, + "step": 31305 + }, + { + "epoch": 1.4665292546962103, + "grad_norm": 0.6006924747771917, + "learning_rate": 8.762969593338383e-07, + "loss": 0.2852, + "step": 31306 + }, + { + "epoch": 1.4665760996861386, + "grad_norm": 0.6200564923041503, + "learning_rate": 8.761527582737189e-07, + "loss": 0.264, + "step": 31307 + }, + { + "epoch": 1.4666229446760668, + "grad_norm": 0.6242643815022735, + "learning_rate": 8.760085665584325e-07, + "loss": 0.287, + "step": 31308 + }, + { + "epoch": 1.4666697896659953, + "grad_norm": 0.5787031730466347, + "learning_rate": 8.758643841888084e-07, + "loss": 0.2659, + "step": 31309 + }, + { + "epoch": 1.4667166346559235, + "grad_norm": 0.6055571026096992, + "learning_rate": 8.757202111656776e-07, + "loss": 0.2757, + "step": 31310 + }, + { + "epoch": 1.4667634796458517, + "grad_norm": 0.5749317974199005, + "learning_rate": 8.755760474898687e-07, + "loss": 0.2578, + "step": 31311 + }, + { + "epoch": 1.4668103246357802, + "grad_norm": 0.6116900399694891, + "learning_rate": 8.754318931622102e-07, + "loss": 0.2821, + "step": 31312 + }, + { + "epoch": 1.4668571696257087, + "grad_norm": 0.5897065273706178, + "learning_rate": 8.752877481835331e-07, + "loss": 0.2524, + "step": 31313 + }, + { + "epoch": 1.466904014615637, + "grad_norm": 0.6064288602698624, + "learning_rate": 8.751436125546667e-07, + "loss": 0.2766, + "step": 31314 + }, + { + "epoch": 1.4669508596055651, + "grad_norm": 0.618874129507243, + "learning_rate": 8.749994862764402e-07, + "loss": 0.2685, + "step": 31315 + }, + { + "epoch": 1.4669977045954936, + "grad_norm": 0.5435694342219268, + "learning_rate": 8.748553693496837e-07, + "loss": 0.2613, + "step": 31316 + }, + { + "epoch": 1.4670445495854219, + "grad_norm": 0.558151895841944, + "learning_rate": 8.747112617752262e-07, + "loss": 0.2497, + "step": 31317 + }, + { + "epoch": 1.46709139457535, + "grad_norm": 0.5811868986008044, + "learning_rate": 8.745671635538975e-07, + "loss": 0.2607, + "step": 31318 + }, + { + "epoch": 1.4671382395652786, + "grad_norm": 0.5999479597281429, + "learning_rate": 8.744230746865251e-07, + "loss": 0.2693, + "step": 31319 + }, + { + "epoch": 1.4671850845552068, + "grad_norm": 0.577029592903233, + "learning_rate": 8.742789951739395e-07, + "loss": 0.2585, + "step": 31320 + }, + { + "epoch": 1.467231929545135, + "grad_norm": 0.571677845065441, + "learning_rate": 8.741349250169695e-07, + "loss": 0.2681, + "step": 31321 + }, + { + "epoch": 1.4672787745350635, + "grad_norm": 0.6028215567164245, + "learning_rate": 8.739908642164444e-07, + "loss": 0.28, + "step": 31322 + }, + { + "epoch": 1.4673256195249917, + "grad_norm": 0.5792955946804069, + "learning_rate": 8.738468127731942e-07, + "loss": 0.2679, + "step": 31323 + }, + { + "epoch": 1.4673724645149202, + "grad_norm": 0.6117755297960464, + "learning_rate": 8.737027706880458e-07, + "loss": 0.2686, + "step": 31324 + }, + { + "epoch": 1.4674193095048484, + "grad_norm": 0.5665031735117532, + "learning_rate": 8.735587379618302e-07, + "loss": 0.249, + "step": 31325 + }, + { + "epoch": 1.467466154494777, + "grad_norm": 0.571872444405599, + "learning_rate": 8.734147145953745e-07, + "loss": 0.2596, + "step": 31326 + }, + { + "epoch": 1.4675129994847051, + "grad_norm": 0.5208633765921153, + "learning_rate": 8.73270700589508e-07, + "loss": 0.244, + "step": 31327 + }, + { + "epoch": 1.4675598444746334, + "grad_norm": 0.5535733300881807, + "learning_rate": 8.731266959450599e-07, + "loss": 0.2594, + "step": 31328 + }, + { + "epoch": 1.4676066894645619, + "grad_norm": 0.6265332577644126, + "learning_rate": 8.729827006628597e-07, + "loss": 0.2895, + "step": 31329 + }, + { + "epoch": 1.46765353445449, + "grad_norm": 0.6200325584580766, + "learning_rate": 8.728387147437342e-07, + "loss": 0.2741, + "step": 31330 + }, + { + "epoch": 1.4677003794444183, + "grad_norm": 0.582677410438208, + "learning_rate": 8.726947381885129e-07, + "loss": 0.2734, + "step": 31331 + }, + { + "epoch": 1.4677472244343468, + "grad_norm": 0.5833161948947689, + "learning_rate": 8.725507709980252e-07, + "loss": 0.2642, + "step": 31332 + }, + { + "epoch": 1.467794069424275, + "grad_norm": 0.5972701138916096, + "learning_rate": 8.724068131730981e-07, + "loss": 0.2685, + "step": 31333 + }, + { + "epoch": 1.4678409144142033, + "grad_norm": 0.6145352787913478, + "learning_rate": 8.722628647145607e-07, + "loss": 0.2776, + "step": 31334 + }, + { + "epoch": 1.4678877594041317, + "grad_norm": 0.5969389535619928, + "learning_rate": 8.721189256232421e-07, + "loss": 0.2608, + "step": 31335 + }, + { + "epoch": 1.46793460439406, + "grad_norm": 0.602388149603067, + "learning_rate": 8.71974995899969e-07, + "loss": 0.2757, + "step": 31336 + }, + { + "epoch": 1.4679814493839884, + "grad_norm": 0.6100848284958211, + "learning_rate": 8.718310755455709e-07, + "loss": 0.2877, + "step": 31337 + }, + { + "epoch": 1.4680282943739167, + "grad_norm": 0.5871158106488137, + "learning_rate": 8.716871645608754e-07, + "loss": 0.2872, + "step": 31338 + }, + { + "epoch": 1.4680751393638451, + "grad_norm": 0.6100987094997047, + "learning_rate": 8.715432629467122e-07, + "loss": 0.268, + "step": 31339 + }, + { + "epoch": 1.4681219843537734, + "grad_norm": 0.5981377939222565, + "learning_rate": 8.71399370703907e-07, + "loss": 0.2818, + "step": 31340 + }, + { + "epoch": 1.4681688293437016, + "grad_norm": 0.5681102576906969, + "learning_rate": 8.712554878332902e-07, + "loss": 0.2585, + "step": 31341 + }, + { + "epoch": 1.46821567433363, + "grad_norm": 0.6579719588227452, + "learning_rate": 8.711116143356879e-07, + "loss": 0.2811, + "step": 31342 + }, + { + "epoch": 1.4682625193235583, + "grad_norm": 0.6014922650087962, + "learning_rate": 8.709677502119287e-07, + "loss": 0.2713, + "step": 31343 + }, + { + "epoch": 1.4683093643134866, + "grad_norm": 0.6178283840199053, + "learning_rate": 8.708238954628407e-07, + "loss": 0.2836, + "step": 31344 + }, + { + "epoch": 1.468356209303415, + "grad_norm": 0.5980315833305179, + "learning_rate": 8.706800500892523e-07, + "loss": 0.2718, + "step": 31345 + }, + { + "epoch": 1.4684030542933433, + "grad_norm": 0.6063522352129543, + "learning_rate": 8.705362140919901e-07, + "loss": 0.2859, + "step": 31346 + }, + { + "epoch": 1.4684498992832715, + "grad_norm": 0.5650226585083854, + "learning_rate": 8.703923874718834e-07, + "loss": 0.2558, + "step": 31347 + }, + { + "epoch": 1.4684967442732, + "grad_norm": 0.58444656352567, + "learning_rate": 8.702485702297581e-07, + "loss": 0.2787, + "step": 31348 + }, + { + "epoch": 1.4685435892631284, + "grad_norm": 0.5862090488315452, + "learning_rate": 8.701047623664424e-07, + "loss": 0.2657, + "step": 31349 + }, + { + "epoch": 1.4685904342530567, + "grad_norm": 0.6281793963403517, + "learning_rate": 8.699609638827642e-07, + "loss": 0.2873, + "step": 31350 + }, + { + "epoch": 1.468637279242985, + "grad_norm": 0.6252018499036645, + "learning_rate": 8.698171747795512e-07, + "loss": 0.2872, + "step": 31351 + }, + { + "epoch": 1.4686841242329134, + "grad_norm": 0.5728380240147213, + "learning_rate": 8.696733950576311e-07, + "loss": 0.2613, + "step": 31352 + }, + { + "epoch": 1.4687309692228416, + "grad_norm": 0.5969694338701913, + "learning_rate": 8.695296247178312e-07, + "loss": 0.2725, + "step": 31353 + }, + { + "epoch": 1.4687778142127699, + "grad_norm": 0.5865682877216621, + "learning_rate": 8.693858637609773e-07, + "loss": 0.2715, + "step": 31354 + }, + { + "epoch": 1.4688246592026983, + "grad_norm": 0.5800962259557298, + "learning_rate": 8.69242112187898e-07, + "loss": 0.2647, + "step": 31355 + }, + { + "epoch": 1.4688715041926266, + "grad_norm": 0.6016149711076385, + "learning_rate": 8.690983699994207e-07, + "loss": 0.2819, + "step": 31356 + }, + { + "epoch": 1.4689183491825548, + "grad_norm": 0.6455936131670975, + "learning_rate": 8.689546371963722e-07, + "loss": 0.2811, + "step": 31357 + }, + { + "epoch": 1.4689651941724833, + "grad_norm": 0.6041689947848756, + "learning_rate": 8.688109137795799e-07, + "loss": 0.2728, + "step": 31358 + }, + { + "epoch": 1.4690120391624115, + "grad_norm": 0.6574576226596044, + "learning_rate": 8.686671997498717e-07, + "loss": 0.2893, + "step": 31359 + }, + { + "epoch": 1.46905888415234, + "grad_norm": 0.6142396100322206, + "learning_rate": 8.685234951080734e-07, + "loss": 0.2783, + "step": 31360 + }, + { + "epoch": 1.4691057291422682, + "grad_norm": 0.6256490549628424, + "learning_rate": 8.68379799855012e-07, + "loss": 0.272, + "step": 31361 + }, + { + "epoch": 1.4691525741321967, + "grad_norm": 0.5865603196063578, + "learning_rate": 8.682361139915144e-07, + "loss": 0.2676, + "step": 31362 + }, + { + "epoch": 1.469199419122125, + "grad_norm": 0.5965587339124668, + "learning_rate": 8.68092437518408e-07, + "loss": 0.2734, + "step": 31363 + }, + { + "epoch": 1.4692462641120532, + "grad_norm": 0.6159188295066139, + "learning_rate": 8.679487704365191e-07, + "loss": 0.288, + "step": 31364 + }, + { + "epoch": 1.4692931091019816, + "grad_norm": 0.5789061464728061, + "learning_rate": 8.67805112746676e-07, + "loss": 0.2632, + "step": 31365 + }, + { + "epoch": 1.4693399540919099, + "grad_norm": 0.5678835013525387, + "learning_rate": 8.676614644497034e-07, + "loss": 0.2758, + "step": 31366 + }, + { + "epoch": 1.4693867990818381, + "grad_norm": 0.6402382115123101, + "learning_rate": 8.675178255464295e-07, + "loss": 0.2782, + "step": 31367 + }, + { + "epoch": 1.4694336440717666, + "grad_norm": 0.6124880207839741, + "learning_rate": 8.673741960376797e-07, + "loss": 0.2818, + "step": 31368 + }, + { + "epoch": 1.4694804890616948, + "grad_norm": 0.5920841829816572, + "learning_rate": 8.672305759242807e-07, + "loss": 0.2675, + "step": 31369 + }, + { + "epoch": 1.469527334051623, + "grad_norm": 0.5649672708547321, + "learning_rate": 8.670869652070596e-07, + "loss": 0.2523, + "step": 31370 + }, + { + "epoch": 1.4695741790415515, + "grad_norm": 0.6390581276052387, + "learning_rate": 8.669433638868424e-07, + "loss": 0.3028, + "step": 31371 + }, + { + "epoch": 1.4696210240314798, + "grad_norm": 0.5923080704806634, + "learning_rate": 8.667997719644566e-07, + "loss": 0.2738, + "step": 31372 + }, + { + "epoch": 1.4696678690214082, + "grad_norm": 0.5783563870327653, + "learning_rate": 8.666561894407266e-07, + "loss": 0.2657, + "step": 31373 + }, + { + "epoch": 1.4697147140113365, + "grad_norm": 0.5523125895678564, + "learning_rate": 8.665126163164808e-07, + "loss": 0.2773, + "step": 31374 + }, + { + "epoch": 1.469761559001265, + "grad_norm": 0.5673840353232499, + "learning_rate": 8.663690525925433e-07, + "loss": 0.2466, + "step": 31375 + }, + { + "epoch": 1.4698084039911932, + "grad_norm": 0.5787574302965262, + "learning_rate": 8.662254982697416e-07, + "loss": 0.2634, + "step": 31376 + }, + { + "epoch": 1.4698552489811214, + "grad_norm": 0.5905145903212814, + "learning_rate": 8.660819533489013e-07, + "loss": 0.2744, + "step": 31377 + }, + { + "epoch": 1.4699020939710499, + "grad_norm": 0.6058015140112284, + "learning_rate": 8.659384178308494e-07, + "loss": 0.2706, + "step": 31378 + }, + { + "epoch": 1.4699489389609781, + "grad_norm": 0.5541095342577397, + "learning_rate": 8.657948917164105e-07, + "loss": 0.2559, + "step": 31379 + }, + { + "epoch": 1.4699957839509064, + "grad_norm": 0.6125763601739053, + "learning_rate": 8.656513750064113e-07, + "loss": 0.2741, + "step": 31380 + }, + { + "epoch": 1.4700426289408348, + "grad_norm": 0.6310841226935621, + "learning_rate": 8.655078677016787e-07, + "loss": 0.2865, + "step": 31381 + }, + { + "epoch": 1.470089473930763, + "grad_norm": 0.6262766348472432, + "learning_rate": 8.653643698030365e-07, + "loss": 0.2811, + "step": 31382 + }, + { + "epoch": 1.4701363189206913, + "grad_norm": 0.590516676454502, + "learning_rate": 8.652208813113114e-07, + "loss": 0.2592, + "step": 31383 + }, + { + "epoch": 1.4701831639106198, + "grad_norm": 0.5884962991783182, + "learning_rate": 8.6507740222733e-07, + "loss": 0.2825, + "step": 31384 + }, + { + "epoch": 1.4702300089005482, + "grad_norm": 0.6291282912304064, + "learning_rate": 8.649339325519165e-07, + "loss": 0.2832, + "step": 31385 + }, + { + "epoch": 1.4702768538904765, + "grad_norm": 0.6290907233555566, + "learning_rate": 8.647904722858974e-07, + "loss": 0.2757, + "step": 31386 + }, + { + "epoch": 1.4703236988804047, + "grad_norm": 0.6152502230636488, + "learning_rate": 8.646470214300989e-07, + "loss": 0.2872, + "step": 31387 + }, + { + "epoch": 1.4703705438703332, + "grad_norm": 0.6012240426070339, + "learning_rate": 8.645035799853449e-07, + "loss": 0.2781, + "step": 31388 + }, + { + "epoch": 1.4704173888602614, + "grad_norm": 0.610094193277291, + "learning_rate": 8.643601479524615e-07, + "loss": 0.2879, + "step": 31389 + }, + { + "epoch": 1.4704642338501897, + "grad_norm": 0.6298114851710552, + "learning_rate": 8.642167253322756e-07, + "loss": 0.2816, + "step": 31390 + }, + { + "epoch": 1.4705110788401181, + "grad_norm": 0.6086824036818161, + "learning_rate": 8.640733121256101e-07, + "loss": 0.273, + "step": 31391 + }, + { + "epoch": 1.4705579238300464, + "grad_norm": 0.6193567132143846, + "learning_rate": 8.639299083332917e-07, + "loss": 0.2837, + "step": 31392 + }, + { + "epoch": 1.4706047688199746, + "grad_norm": 0.60363998623435, + "learning_rate": 8.637865139561455e-07, + "loss": 0.278, + "step": 31393 + }, + { + "epoch": 1.470651613809903, + "grad_norm": 0.6292653331406309, + "learning_rate": 8.636431289949973e-07, + "loss": 0.2921, + "step": 31394 + }, + { + "epoch": 1.4706984587998313, + "grad_norm": 0.6121812301043845, + "learning_rate": 8.634997534506709e-07, + "loss": 0.2762, + "step": 31395 + }, + { + "epoch": 1.4707453037897598, + "grad_norm": 0.5600838609463838, + "learning_rate": 8.633563873239931e-07, + "loss": 0.2566, + "step": 31396 + }, + { + "epoch": 1.470792148779688, + "grad_norm": 0.572307858680193, + "learning_rate": 8.632130306157868e-07, + "loss": 0.2683, + "step": 31397 + }, + { + "epoch": 1.4708389937696165, + "grad_norm": 0.5855442420577871, + "learning_rate": 8.630696833268784e-07, + "loss": 0.2711, + "step": 31398 + }, + { + "epoch": 1.4708858387595447, + "grad_norm": 0.6005296010687173, + "learning_rate": 8.629263454580925e-07, + "loss": 0.2615, + "step": 31399 + }, + { + "epoch": 1.470932683749473, + "grad_norm": 0.61737211850507, + "learning_rate": 8.627830170102539e-07, + "loss": 0.2631, + "step": 31400 + }, + { + "epoch": 1.4709795287394014, + "grad_norm": 0.6518982595502752, + "learning_rate": 8.626396979841883e-07, + "loss": 0.2789, + "step": 31401 + }, + { + "epoch": 1.4710263737293297, + "grad_norm": 0.6100396151851503, + "learning_rate": 8.6249638838072e-07, + "loss": 0.2614, + "step": 31402 + }, + { + "epoch": 1.471073218719258, + "grad_norm": 0.5951905459129022, + "learning_rate": 8.623530882006722e-07, + "loss": 0.2785, + "step": 31403 + }, + { + "epoch": 1.4711200637091864, + "grad_norm": 0.6075465225847028, + "learning_rate": 8.62209797444871e-07, + "loss": 0.2719, + "step": 31404 + }, + { + "epoch": 1.4711669086991146, + "grad_norm": 0.5913059403672346, + "learning_rate": 8.620665161141409e-07, + "loss": 0.2602, + "step": 31405 + }, + { + "epoch": 1.4712137536890428, + "grad_norm": 0.670051605888644, + "learning_rate": 8.619232442093059e-07, + "loss": 0.2825, + "step": 31406 + }, + { + "epoch": 1.4712605986789713, + "grad_norm": 0.619496006826161, + "learning_rate": 8.617799817311912e-07, + "loss": 0.3018, + "step": 31407 + }, + { + "epoch": 1.4713074436688995, + "grad_norm": 0.5889564923156428, + "learning_rate": 8.616367286806219e-07, + "loss": 0.2534, + "step": 31408 + }, + { + "epoch": 1.471354288658828, + "grad_norm": 0.5776317305015847, + "learning_rate": 8.614934850584211e-07, + "loss": 0.2698, + "step": 31409 + }, + { + "epoch": 1.4714011336487562, + "grad_norm": 0.588713135130107, + "learning_rate": 8.613502508654129e-07, + "loss": 0.2582, + "step": 31410 + }, + { + "epoch": 1.4714479786386847, + "grad_norm": 0.5825027517758199, + "learning_rate": 8.612070261024221e-07, + "loss": 0.2739, + "step": 31411 + }, + { + "epoch": 1.471494823628613, + "grad_norm": 0.6166780633471949, + "learning_rate": 8.610638107702729e-07, + "loss": 0.275, + "step": 31412 + }, + { + "epoch": 1.4715416686185412, + "grad_norm": 0.6234385767999502, + "learning_rate": 8.609206048697896e-07, + "loss": 0.2774, + "step": 31413 + }, + { + "epoch": 1.4715885136084697, + "grad_norm": 0.5880995534567188, + "learning_rate": 8.607774084017973e-07, + "loss": 0.2668, + "step": 31414 + }, + { + "epoch": 1.471635358598398, + "grad_norm": 0.5930725048969069, + "learning_rate": 8.606342213671179e-07, + "loss": 0.2706, + "step": 31415 + }, + { + "epoch": 1.4716822035883261, + "grad_norm": 0.6517047573625988, + "learning_rate": 8.604910437665773e-07, + "loss": 0.2831, + "step": 31416 + }, + { + "epoch": 1.4717290485782546, + "grad_norm": 0.6790369401409535, + "learning_rate": 8.603478756009981e-07, + "loss": 0.3032, + "step": 31417 + }, + { + "epoch": 1.4717758935681828, + "grad_norm": 0.5834862922336305, + "learning_rate": 8.602047168712044e-07, + "loss": 0.2609, + "step": 31418 + }, + { + "epoch": 1.471822738558111, + "grad_norm": 0.6143456719964175, + "learning_rate": 8.600615675780207e-07, + "loss": 0.2817, + "step": 31419 + }, + { + "epoch": 1.4718695835480395, + "grad_norm": 0.5620053050932514, + "learning_rate": 8.599184277222711e-07, + "loss": 0.2643, + "step": 31420 + }, + { + "epoch": 1.471916428537968, + "grad_norm": 0.5870536316878233, + "learning_rate": 8.597752973047782e-07, + "loss": 0.2585, + "step": 31421 + }, + { + "epoch": 1.4719632735278962, + "grad_norm": 0.6158091571726412, + "learning_rate": 8.59632176326366e-07, + "loss": 0.273, + "step": 31422 + }, + { + "epoch": 1.4720101185178245, + "grad_norm": 0.5676328033808199, + "learning_rate": 8.594890647878592e-07, + "loss": 0.2622, + "step": 31423 + }, + { + "epoch": 1.472056963507753, + "grad_norm": 0.6041795019543658, + "learning_rate": 8.593459626900796e-07, + "loss": 0.2847, + "step": 31424 + }, + { + "epoch": 1.4721038084976812, + "grad_norm": 0.5946798926550794, + "learning_rate": 8.592028700338517e-07, + "loss": 0.2522, + "step": 31425 + }, + { + "epoch": 1.4721506534876094, + "grad_norm": 0.5853701091561027, + "learning_rate": 8.590597868199999e-07, + "loss": 0.2711, + "step": 31426 + }, + { + "epoch": 1.472197498477538, + "grad_norm": 0.5973445851565063, + "learning_rate": 8.589167130493456e-07, + "loss": 0.2663, + "step": 31427 + }, + { + "epoch": 1.4722443434674661, + "grad_norm": 0.6016765752419041, + "learning_rate": 8.58773648722713e-07, + "loss": 0.2659, + "step": 31428 + }, + { + "epoch": 1.4722911884573944, + "grad_norm": 0.5827609014999223, + "learning_rate": 8.586305938409257e-07, + "loss": 0.2629, + "step": 31429 + }, + { + "epoch": 1.4723380334473228, + "grad_norm": 0.6018801700265505, + "learning_rate": 8.584875484048075e-07, + "loss": 0.2739, + "step": 31430 + }, + { + "epoch": 1.472384878437251, + "grad_norm": 0.5428556261354962, + "learning_rate": 8.583445124151801e-07, + "loss": 0.2458, + "step": 31431 + }, + { + "epoch": 1.4724317234271795, + "grad_norm": 0.6345146812617692, + "learning_rate": 8.582014858728685e-07, + "loss": 0.2955, + "step": 31432 + }, + { + "epoch": 1.4724785684171078, + "grad_norm": 0.606261685630893, + "learning_rate": 8.580584687786936e-07, + "loss": 0.2663, + "step": 31433 + }, + { + "epoch": 1.4725254134070362, + "grad_norm": 0.556844597165385, + "learning_rate": 8.579154611334794e-07, + "loss": 0.2508, + "step": 31434 + }, + { + "epoch": 1.4725722583969645, + "grad_norm": 0.5859387184083334, + "learning_rate": 8.577724629380493e-07, + "loss": 0.2806, + "step": 31435 + }, + { + "epoch": 1.4726191033868927, + "grad_norm": 0.6120797347832261, + "learning_rate": 8.576294741932268e-07, + "loss": 0.2791, + "step": 31436 + }, + { + "epoch": 1.4726659483768212, + "grad_norm": 0.5890120133748413, + "learning_rate": 8.574864948998326e-07, + "loss": 0.2793, + "step": 31437 + }, + { + "epoch": 1.4727127933667494, + "grad_norm": 0.6207300451706603, + "learning_rate": 8.573435250586912e-07, + "loss": 0.2972, + "step": 31438 + }, + { + "epoch": 1.4727596383566777, + "grad_norm": 0.5727145059730269, + "learning_rate": 8.572005646706255e-07, + "loss": 0.2667, + "step": 31439 + }, + { + "epoch": 1.4728064833466061, + "grad_norm": 0.5663399435298105, + "learning_rate": 8.570576137364572e-07, + "loss": 0.255, + "step": 31440 + }, + { + "epoch": 1.4728533283365344, + "grad_norm": 0.5945686432657942, + "learning_rate": 8.56914672257009e-07, + "loss": 0.2587, + "step": 31441 + }, + { + "epoch": 1.4729001733264626, + "grad_norm": 0.6055068434937991, + "learning_rate": 8.567717402331041e-07, + "loss": 0.2662, + "step": 31442 + }, + { + "epoch": 1.472947018316391, + "grad_norm": 0.6125901248641076, + "learning_rate": 8.566288176655657e-07, + "loss": 0.2706, + "step": 31443 + }, + { + "epoch": 1.4729938633063193, + "grad_norm": 0.6345693671071212, + "learning_rate": 8.564859045552143e-07, + "loss": 0.2791, + "step": 31444 + }, + { + "epoch": 1.4730407082962478, + "grad_norm": 0.569947858380708, + "learning_rate": 8.563430009028745e-07, + "loss": 0.2482, + "step": 31445 + }, + { + "epoch": 1.473087553286176, + "grad_norm": 0.6152196582401473, + "learning_rate": 8.562001067093667e-07, + "loss": 0.2673, + "step": 31446 + }, + { + "epoch": 1.4731343982761045, + "grad_norm": 0.6114837338903159, + "learning_rate": 8.560572219755142e-07, + "loss": 0.2678, + "step": 31447 + }, + { + "epoch": 1.4731812432660327, + "grad_norm": 0.6091844627786127, + "learning_rate": 8.559143467021392e-07, + "loss": 0.2771, + "step": 31448 + }, + { + "epoch": 1.473228088255961, + "grad_norm": 0.5911137361412792, + "learning_rate": 8.557714808900638e-07, + "loss": 0.2607, + "step": 31449 + }, + { + "epoch": 1.4732749332458894, + "grad_norm": 0.6171438325059966, + "learning_rate": 8.556286245401113e-07, + "loss": 0.2775, + "step": 31450 + }, + { + "epoch": 1.4733217782358177, + "grad_norm": 0.5833406324136545, + "learning_rate": 8.554857776531025e-07, + "loss": 0.2607, + "step": 31451 + }, + { + "epoch": 1.473368623225746, + "grad_norm": 0.5893150632439984, + "learning_rate": 8.553429402298591e-07, + "loss": 0.2649, + "step": 31452 + }, + { + "epoch": 1.4734154682156744, + "grad_norm": 0.5580192490301067, + "learning_rate": 8.552001122712036e-07, + "loss": 0.2641, + "step": 31453 + }, + { + "epoch": 1.4734623132056026, + "grad_norm": 0.6187491211080229, + "learning_rate": 8.55057293777958e-07, + "loss": 0.2722, + "step": 31454 + }, + { + "epoch": 1.4735091581955309, + "grad_norm": 0.5738886982088409, + "learning_rate": 8.549144847509445e-07, + "loss": 0.2717, + "step": 31455 + }, + { + "epoch": 1.4735560031854593, + "grad_norm": 0.6112970543987626, + "learning_rate": 8.547716851909843e-07, + "loss": 0.2914, + "step": 31456 + }, + { + "epoch": 1.4736028481753878, + "grad_norm": 0.5683871807926293, + "learning_rate": 8.546288950989007e-07, + "loss": 0.2641, + "step": 31457 + }, + { + "epoch": 1.473649693165316, + "grad_norm": 0.6138115822330752, + "learning_rate": 8.54486114475514e-07, + "loss": 0.2742, + "step": 31458 + }, + { + "epoch": 1.4736965381552443, + "grad_norm": 0.5546310749790909, + "learning_rate": 8.543433433216455e-07, + "loss": 0.2579, + "step": 31459 + }, + { + "epoch": 1.4737433831451727, + "grad_norm": 0.5987890675909051, + "learning_rate": 8.542005816381175e-07, + "loss": 0.2616, + "step": 31460 + }, + { + "epoch": 1.473790228135101, + "grad_norm": 0.5766214640823403, + "learning_rate": 8.540578294257512e-07, + "loss": 0.2599, + "step": 31461 + }, + { + "epoch": 1.4738370731250292, + "grad_norm": 0.607405521490744, + "learning_rate": 8.539150866853685e-07, + "loss": 0.2857, + "step": 31462 + }, + { + "epoch": 1.4738839181149577, + "grad_norm": 0.5691621421409543, + "learning_rate": 8.537723534177917e-07, + "loss": 0.2641, + "step": 31463 + }, + { + "epoch": 1.473930763104886, + "grad_norm": 0.6090180235627518, + "learning_rate": 8.536296296238403e-07, + "loss": 0.2761, + "step": 31464 + }, + { + "epoch": 1.4739776080948142, + "grad_norm": 0.6011924649238103, + "learning_rate": 8.534869153043374e-07, + "loss": 0.2898, + "step": 31465 + }, + { + "epoch": 1.4740244530847426, + "grad_norm": 0.5716402841129837, + "learning_rate": 8.533442104601028e-07, + "loss": 0.2549, + "step": 31466 + }, + { + "epoch": 1.4740712980746709, + "grad_norm": 0.5817262267389081, + "learning_rate": 8.532015150919582e-07, + "loss": 0.2756, + "step": 31467 + }, + { + "epoch": 1.4741181430645993, + "grad_norm": 0.5944364635011582, + "learning_rate": 8.53058829200725e-07, + "loss": 0.262, + "step": 31468 + }, + { + "epoch": 1.4741649880545276, + "grad_norm": 0.625133986935557, + "learning_rate": 8.529161527872251e-07, + "loss": 0.2838, + "step": 31469 + }, + { + "epoch": 1.474211833044456, + "grad_norm": 0.564625827045539, + "learning_rate": 8.527734858522782e-07, + "loss": 0.2649, + "step": 31470 + }, + { + "epoch": 1.4742586780343843, + "grad_norm": 0.6110435909180584, + "learning_rate": 8.526308283967056e-07, + "loss": 0.2567, + "step": 31471 + }, + { + "epoch": 1.4743055230243125, + "grad_norm": 0.588208357659081, + "learning_rate": 8.524881804213294e-07, + "loss": 0.2662, + "step": 31472 + }, + { + "epoch": 1.474352368014241, + "grad_norm": 0.5976143990138437, + "learning_rate": 8.523455419269688e-07, + "loss": 0.277, + "step": 31473 + }, + { + "epoch": 1.4743992130041692, + "grad_norm": 0.6088051198355886, + "learning_rate": 8.522029129144457e-07, + "loss": 0.2664, + "step": 31474 + }, + { + "epoch": 1.4744460579940974, + "grad_norm": 0.579919438563373, + "learning_rate": 8.520602933845811e-07, + "loss": 0.2669, + "step": 31475 + }, + { + "epoch": 1.474492902984026, + "grad_norm": 0.5942373396627885, + "learning_rate": 8.519176833381948e-07, + "loss": 0.2587, + "step": 31476 + }, + { + "epoch": 1.4745397479739542, + "grad_norm": 0.6193432832090623, + "learning_rate": 8.517750827761079e-07, + "loss": 0.2656, + "step": 31477 + }, + { + "epoch": 1.4745865929638824, + "grad_norm": 0.5700762799956538, + "learning_rate": 8.516324916991411e-07, + "loss": 0.2701, + "step": 31478 + }, + { + "epoch": 1.4746334379538109, + "grad_norm": 0.6307336772735915, + "learning_rate": 8.514899101081162e-07, + "loss": 0.2686, + "step": 31479 + }, + { + "epoch": 1.474680282943739, + "grad_norm": 0.6243244624973935, + "learning_rate": 8.513473380038511e-07, + "loss": 0.2725, + "step": 31480 + }, + { + "epoch": 1.4747271279336676, + "grad_norm": 0.6314373864499874, + "learning_rate": 8.512047753871691e-07, + "loss": 0.2673, + "step": 31481 + }, + { + "epoch": 1.4747739729235958, + "grad_norm": 0.6256523347553797, + "learning_rate": 8.51062222258888e-07, + "loss": 0.2686, + "step": 31482 + }, + { + "epoch": 1.4748208179135243, + "grad_norm": 0.5657422825517902, + "learning_rate": 8.509196786198296e-07, + "loss": 0.2695, + "step": 31483 + }, + { + "epoch": 1.4748676629034525, + "grad_norm": 0.6119082198280488, + "learning_rate": 8.507771444708138e-07, + "loss": 0.2656, + "step": 31484 + }, + { + "epoch": 1.4749145078933807, + "grad_norm": 0.6629805208288146, + "learning_rate": 8.506346198126619e-07, + "loss": 0.2648, + "step": 31485 + }, + { + "epoch": 1.4749613528833092, + "grad_norm": 0.6061725269082509, + "learning_rate": 8.504921046461922e-07, + "loss": 0.2596, + "step": 31486 + }, + { + "epoch": 1.4750081978732374, + "grad_norm": 0.6043823788892714, + "learning_rate": 8.503495989722268e-07, + "loss": 0.2767, + "step": 31487 + }, + { + "epoch": 1.4750550428631657, + "grad_norm": 0.5657387529474089, + "learning_rate": 8.502071027915842e-07, + "loss": 0.2635, + "step": 31488 + }, + { + "epoch": 1.4751018878530942, + "grad_norm": 0.5703762868679481, + "learning_rate": 8.500646161050849e-07, + "loss": 0.2718, + "step": 31489 + }, + { + "epoch": 1.4751487328430224, + "grad_norm": 0.6367110582276185, + "learning_rate": 8.49922138913549e-07, + "loss": 0.2896, + "step": 31490 + }, + { + "epoch": 1.4751955778329506, + "grad_norm": 0.6153148518825275, + "learning_rate": 8.497796712177967e-07, + "loss": 0.2897, + "step": 31491 + }, + { + "epoch": 1.475242422822879, + "grad_norm": 0.6200616868033764, + "learning_rate": 8.496372130186481e-07, + "loss": 0.2742, + "step": 31492 + }, + { + "epoch": 1.4752892678128076, + "grad_norm": 0.606433908959152, + "learning_rate": 8.494947643169227e-07, + "loss": 0.2804, + "step": 31493 + }, + { + "epoch": 1.4753361128027358, + "grad_norm": 0.6147908676040661, + "learning_rate": 8.493523251134391e-07, + "loss": 0.2625, + "step": 31494 + }, + { + "epoch": 1.475382957792664, + "grad_norm": 0.6098404251889221, + "learning_rate": 8.492098954090183e-07, + "loss": 0.2783, + "step": 31495 + }, + { + "epoch": 1.4754298027825925, + "grad_norm": 0.6111445897180785, + "learning_rate": 8.490674752044795e-07, + "loss": 0.2739, + "step": 31496 + }, + { + "epoch": 1.4754766477725207, + "grad_norm": 0.6104353707851873, + "learning_rate": 8.489250645006425e-07, + "loss": 0.2732, + "step": 31497 + }, + { + "epoch": 1.475523492762449, + "grad_norm": 0.6293420073239672, + "learning_rate": 8.487826632983267e-07, + "loss": 0.3018, + "step": 31498 + }, + { + "epoch": 1.4755703377523774, + "grad_norm": 0.6083762413848621, + "learning_rate": 8.486402715983524e-07, + "loss": 0.275, + "step": 31499 + }, + { + "epoch": 1.4756171827423057, + "grad_norm": 0.6154372683357437, + "learning_rate": 8.484978894015386e-07, + "loss": 0.2852, + "step": 31500 + }, + { + "epoch": 1.475664027732234, + "grad_norm": 0.5635632240709921, + "learning_rate": 8.483555167087032e-07, + "loss": 0.2578, + "step": 31501 + }, + { + "epoch": 1.4757108727221624, + "grad_norm": 0.5791201904827404, + "learning_rate": 8.482131535206667e-07, + "loss": 0.2669, + "step": 31502 + }, + { + "epoch": 1.4757577177120906, + "grad_norm": 0.6188519119386137, + "learning_rate": 8.480707998382484e-07, + "loss": 0.277, + "step": 31503 + }, + { + "epoch": 1.475804562702019, + "grad_norm": 0.5965878561801075, + "learning_rate": 8.479284556622675e-07, + "loss": 0.2692, + "step": 31504 + }, + { + "epoch": 1.4758514076919473, + "grad_norm": 0.5561604714374645, + "learning_rate": 8.477861209935431e-07, + "loss": 0.2683, + "step": 31505 + }, + { + "epoch": 1.4758982526818758, + "grad_norm": 0.613261618625438, + "learning_rate": 8.476437958328951e-07, + "loss": 0.276, + "step": 31506 + }, + { + "epoch": 1.475945097671804, + "grad_norm": 0.5464849413117775, + "learning_rate": 8.475014801811418e-07, + "loss": 0.2587, + "step": 31507 + }, + { + "epoch": 1.4759919426617323, + "grad_norm": 0.5878247224675731, + "learning_rate": 8.473591740391013e-07, + "loss": 0.2615, + "step": 31508 + }, + { + "epoch": 1.4760387876516607, + "grad_norm": 0.5527485996846122, + "learning_rate": 8.472168774075934e-07, + "loss": 0.2655, + "step": 31509 + }, + { + "epoch": 1.476085632641589, + "grad_norm": 0.6129791004689982, + "learning_rate": 8.47074590287437e-07, + "loss": 0.3006, + "step": 31510 + }, + { + "epoch": 1.4761324776315172, + "grad_norm": 0.6030321932911218, + "learning_rate": 8.469323126794507e-07, + "loss": 0.2769, + "step": 31511 + }, + { + "epoch": 1.4761793226214457, + "grad_norm": 0.6302495957470223, + "learning_rate": 8.467900445844543e-07, + "loss": 0.2927, + "step": 31512 + }, + { + "epoch": 1.476226167611374, + "grad_norm": 0.5885880304502941, + "learning_rate": 8.466477860032649e-07, + "loss": 0.2677, + "step": 31513 + }, + { + "epoch": 1.4762730126013022, + "grad_norm": 0.6014630792828581, + "learning_rate": 8.465055369367029e-07, + "loss": 0.2802, + "step": 31514 + }, + { + "epoch": 1.4763198575912306, + "grad_norm": 0.5893858446957585, + "learning_rate": 8.463632973855848e-07, + "loss": 0.274, + "step": 31515 + }, + { + "epoch": 1.4763667025811589, + "grad_norm": 0.5634292875024655, + "learning_rate": 8.462210673507306e-07, + "loss": 0.2646, + "step": 31516 + }, + { + "epoch": 1.4764135475710873, + "grad_norm": 0.5752598270938232, + "learning_rate": 8.460788468329584e-07, + "loss": 0.2812, + "step": 31517 + }, + { + "epoch": 1.4764603925610156, + "grad_norm": 0.6302654346010584, + "learning_rate": 8.459366358330875e-07, + "loss": 0.2795, + "step": 31518 + }, + { + "epoch": 1.476507237550944, + "grad_norm": 0.6428877462149655, + "learning_rate": 8.457944343519347e-07, + "loss": 0.2798, + "step": 31519 + }, + { + "epoch": 1.4765540825408723, + "grad_norm": 0.6567726313361181, + "learning_rate": 8.456522423903193e-07, + "loss": 0.2749, + "step": 31520 + }, + { + "epoch": 1.4766009275308005, + "grad_norm": 0.6051168316614863, + "learning_rate": 8.455100599490603e-07, + "loss": 0.2893, + "step": 31521 + }, + { + "epoch": 1.476647772520729, + "grad_norm": 0.5676955348674491, + "learning_rate": 8.453678870289741e-07, + "loss": 0.2685, + "step": 31522 + }, + { + "epoch": 1.4766946175106572, + "grad_norm": 0.5985459845579953, + "learning_rate": 8.4522572363088e-07, + "loss": 0.2724, + "step": 31523 + }, + { + "epoch": 1.4767414625005855, + "grad_norm": 0.6159274057852373, + "learning_rate": 8.450835697555965e-07, + "loss": 0.2629, + "step": 31524 + }, + { + "epoch": 1.476788307490514, + "grad_norm": 0.5676972339538597, + "learning_rate": 8.449414254039407e-07, + "loss": 0.2549, + "step": 31525 + }, + { + "epoch": 1.4768351524804422, + "grad_norm": 0.5746515948914295, + "learning_rate": 8.447992905767305e-07, + "loss": 0.2518, + "step": 31526 + }, + { + "epoch": 1.4768819974703704, + "grad_norm": 0.6278818381405353, + "learning_rate": 8.446571652747854e-07, + "loss": 0.2772, + "step": 31527 + }, + { + "epoch": 1.4769288424602989, + "grad_norm": 0.6473339532767224, + "learning_rate": 8.445150494989216e-07, + "loss": 0.2877, + "step": 31528 + }, + { + "epoch": 1.4769756874502273, + "grad_norm": 0.6207390342337905, + "learning_rate": 8.443729432499573e-07, + "loss": 0.2782, + "step": 31529 + }, + { + "epoch": 1.4770225324401556, + "grad_norm": 0.5611349100252916, + "learning_rate": 8.442308465287116e-07, + "loss": 0.254, + "step": 31530 + }, + { + "epoch": 1.4770693774300838, + "grad_norm": 0.626702509594666, + "learning_rate": 8.440887593360003e-07, + "loss": 0.2632, + "step": 31531 + }, + { + "epoch": 1.4771162224200123, + "grad_norm": 0.6047670740642763, + "learning_rate": 8.439466816726421e-07, + "loss": 0.2649, + "step": 31532 + }, + { + "epoch": 1.4771630674099405, + "grad_norm": 0.601730396854949, + "learning_rate": 8.438046135394545e-07, + "loss": 0.259, + "step": 31533 + }, + { + "epoch": 1.4772099123998688, + "grad_norm": 0.5839329575268914, + "learning_rate": 8.43662554937256e-07, + "loss": 0.2772, + "step": 31534 + }, + { + "epoch": 1.4772567573897972, + "grad_norm": 0.563014031713085, + "learning_rate": 8.435205058668622e-07, + "loss": 0.2531, + "step": 31535 + }, + { + "epoch": 1.4773036023797255, + "grad_norm": 0.5954898997521874, + "learning_rate": 8.433784663290925e-07, + "loss": 0.2681, + "step": 31536 + }, + { + "epoch": 1.4773504473696537, + "grad_norm": 0.5514001530080513, + "learning_rate": 8.432364363247625e-07, + "loss": 0.2684, + "step": 31537 + }, + { + "epoch": 1.4773972923595822, + "grad_norm": 0.6047026227449601, + "learning_rate": 8.430944158546902e-07, + "loss": 0.2832, + "step": 31538 + }, + { + "epoch": 1.4774441373495104, + "grad_norm": 0.6076244774078117, + "learning_rate": 8.429524049196933e-07, + "loss": 0.2633, + "step": 31539 + }, + { + "epoch": 1.4774909823394389, + "grad_norm": 0.6086884220871563, + "learning_rate": 8.428104035205886e-07, + "loss": 0.269, + "step": 31540 + }, + { + "epoch": 1.4775378273293671, + "grad_norm": 0.6006620438764659, + "learning_rate": 8.426684116581945e-07, + "loss": 0.266, + "step": 31541 + }, + { + "epoch": 1.4775846723192956, + "grad_norm": 0.5570864328656965, + "learning_rate": 8.425264293333271e-07, + "loss": 0.265, + "step": 31542 + }, + { + "epoch": 1.4776315173092238, + "grad_norm": 0.6079119671252412, + "learning_rate": 8.423844565468029e-07, + "loss": 0.2786, + "step": 31543 + }, + { + "epoch": 1.477678362299152, + "grad_norm": 0.5932758190133393, + "learning_rate": 8.422424932994391e-07, + "loss": 0.2617, + "step": 31544 + }, + { + "epoch": 1.4777252072890805, + "grad_norm": 0.5941114823928602, + "learning_rate": 8.421005395920534e-07, + "loss": 0.2697, + "step": 31545 + }, + { + "epoch": 1.4777720522790088, + "grad_norm": 0.5683122435870204, + "learning_rate": 8.419585954254625e-07, + "loss": 0.2703, + "step": 31546 + }, + { + "epoch": 1.477818897268937, + "grad_norm": 0.6377838715117644, + "learning_rate": 8.41816660800483e-07, + "loss": 0.264, + "step": 31547 + }, + { + "epoch": 1.4778657422588655, + "grad_norm": 0.6010573121673704, + "learning_rate": 8.416747357179325e-07, + "loss": 0.2775, + "step": 31548 + }, + { + "epoch": 1.4779125872487937, + "grad_norm": 0.592137877915769, + "learning_rate": 8.415328201786271e-07, + "loss": 0.2694, + "step": 31549 + }, + { + "epoch": 1.477959432238722, + "grad_norm": 0.6207090079934413, + "learning_rate": 8.413909141833828e-07, + "loss": 0.2712, + "step": 31550 + }, + { + "epoch": 1.4780062772286504, + "grad_norm": 0.6012483475796164, + "learning_rate": 8.41249017733017e-07, + "loss": 0.2804, + "step": 31551 + }, + { + "epoch": 1.4780531222185787, + "grad_norm": 0.5830185252508879, + "learning_rate": 8.411071308283461e-07, + "loss": 0.2677, + "step": 31552 + }, + { + "epoch": 1.4780999672085071, + "grad_norm": 0.6293735594147403, + "learning_rate": 8.409652534701867e-07, + "loss": 0.265, + "step": 31553 + }, + { + "epoch": 1.4781468121984354, + "grad_norm": 0.5824005984847385, + "learning_rate": 8.408233856593562e-07, + "loss": 0.2684, + "step": 31554 + }, + { + "epoch": 1.4781936571883638, + "grad_norm": 0.5462395744871954, + "learning_rate": 8.406815273966693e-07, + "loss": 0.2653, + "step": 31555 + }, + { + "epoch": 1.478240502178292, + "grad_norm": 0.6060046775581815, + "learning_rate": 8.405396786829437e-07, + "loss": 0.2821, + "step": 31556 + }, + { + "epoch": 1.4782873471682203, + "grad_norm": 0.6259018300136514, + "learning_rate": 8.403978395189946e-07, + "loss": 0.2961, + "step": 31557 + }, + { + "epoch": 1.4783341921581488, + "grad_norm": 0.6121092447655196, + "learning_rate": 8.402560099056386e-07, + "loss": 0.278, + "step": 31558 + }, + { + "epoch": 1.478381037148077, + "grad_norm": 0.5541974428137434, + "learning_rate": 8.401141898436924e-07, + "loss": 0.2717, + "step": 31559 + }, + { + "epoch": 1.4784278821380052, + "grad_norm": 0.6136014498141569, + "learning_rate": 8.399723793339723e-07, + "loss": 0.2832, + "step": 31560 + }, + { + "epoch": 1.4784747271279337, + "grad_norm": 0.6017459273603614, + "learning_rate": 8.398305783772934e-07, + "loss": 0.278, + "step": 31561 + }, + { + "epoch": 1.478521572117862, + "grad_norm": 0.5951024411955793, + "learning_rate": 8.396887869744722e-07, + "loss": 0.2766, + "step": 31562 + }, + { + "epoch": 1.4785684171077902, + "grad_norm": 0.5786534839980887, + "learning_rate": 8.395470051263254e-07, + "loss": 0.273, + "step": 31563 + }, + { + "epoch": 1.4786152620977187, + "grad_norm": 0.6055662510883828, + "learning_rate": 8.394052328336674e-07, + "loss": 0.2722, + "step": 31564 + }, + { + "epoch": 1.478662107087647, + "grad_norm": 0.5711647481678297, + "learning_rate": 8.392634700973151e-07, + "loss": 0.2628, + "step": 31565 + }, + { + "epoch": 1.4787089520775754, + "grad_norm": 0.5620787183200981, + "learning_rate": 8.391217169180849e-07, + "loss": 0.2812, + "step": 31566 + }, + { + "epoch": 1.4787557970675036, + "grad_norm": 0.5811903213075924, + "learning_rate": 8.389799732967909e-07, + "loss": 0.2669, + "step": 31567 + }, + { + "epoch": 1.478802642057432, + "grad_norm": 0.5725044889797206, + "learning_rate": 8.388382392342497e-07, + "loss": 0.2709, + "step": 31568 + }, + { + "epoch": 1.4788494870473603, + "grad_norm": 0.5926320440225377, + "learning_rate": 8.386965147312768e-07, + "loss": 0.2767, + "step": 31569 + }, + { + "epoch": 1.4788963320372885, + "grad_norm": 0.604467455255092, + "learning_rate": 8.385547997886889e-07, + "loss": 0.2954, + "step": 31570 + }, + { + "epoch": 1.478943177027217, + "grad_norm": 0.5994821726767913, + "learning_rate": 8.384130944072997e-07, + "loss": 0.2816, + "step": 31571 + }, + { + "epoch": 1.4789900220171452, + "grad_norm": 0.6327820759298479, + "learning_rate": 8.382713985879262e-07, + "loss": 0.268, + "step": 31572 + }, + { + "epoch": 1.4790368670070735, + "grad_norm": 0.794187412957796, + "learning_rate": 8.381297123313825e-07, + "loss": 0.2889, + "step": 31573 + }, + { + "epoch": 1.479083711997002, + "grad_norm": 0.6079725990304293, + "learning_rate": 8.379880356384845e-07, + "loss": 0.2777, + "step": 31574 + }, + { + "epoch": 1.4791305569869302, + "grad_norm": 0.6016758967876679, + "learning_rate": 8.378463685100477e-07, + "loss": 0.273, + "step": 31575 + }, + { + "epoch": 1.4791774019768587, + "grad_norm": 0.5719032773457708, + "learning_rate": 8.37704710946888e-07, + "loss": 0.2685, + "step": 31576 + }, + { + "epoch": 1.479224246966787, + "grad_norm": 0.5715831027087601, + "learning_rate": 8.375630629498191e-07, + "loss": 0.2679, + "step": 31577 + }, + { + "epoch": 1.4792710919567154, + "grad_norm": 0.5971554521191287, + "learning_rate": 8.374214245196569e-07, + "loss": 0.2738, + "step": 31578 + }, + { + "epoch": 1.4793179369466436, + "grad_norm": 0.5694733737572661, + "learning_rate": 8.372797956572173e-07, + "loss": 0.2642, + "step": 31579 + }, + { + "epoch": 1.4793647819365718, + "grad_norm": 0.6278409508390402, + "learning_rate": 8.371381763633138e-07, + "loss": 0.2543, + "step": 31580 + }, + { + "epoch": 1.4794116269265003, + "grad_norm": 0.5675131391810542, + "learning_rate": 8.369965666387622e-07, + "loss": 0.2652, + "step": 31581 + }, + { + "epoch": 1.4794584719164285, + "grad_norm": 0.6703152748397271, + "learning_rate": 8.368549664843775e-07, + "loss": 0.3024, + "step": 31582 + }, + { + "epoch": 1.4795053169063568, + "grad_norm": 0.5796496011095801, + "learning_rate": 8.367133759009752e-07, + "loss": 0.2724, + "step": 31583 + }, + { + "epoch": 1.4795521618962852, + "grad_norm": 0.5751447141988251, + "learning_rate": 8.365717948893684e-07, + "loss": 0.2718, + "step": 31584 + }, + { + "epoch": 1.4795990068862135, + "grad_norm": 0.5519712403531567, + "learning_rate": 8.364302234503738e-07, + "loss": 0.2627, + "step": 31585 + }, + { + "epoch": 1.4796458518761417, + "grad_norm": 0.581591964550035, + "learning_rate": 8.362886615848042e-07, + "loss": 0.2712, + "step": 31586 + }, + { + "epoch": 1.4796926968660702, + "grad_norm": 0.6118819376445572, + "learning_rate": 8.361471092934753e-07, + "loss": 0.2759, + "step": 31587 + }, + { + "epoch": 1.4797395418559984, + "grad_norm": 0.604732224789574, + "learning_rate": 8.360055665772016e-07, + "loss": 0.2806, + "step": 31588 + }, + { + "epoch": 1.479786386845927, + "grad_norm": 0.5678298700593175, + "learning_rate": 8.358640334367976e-07, + "loss": 0.2611, + "step": 31589 + }, + { + "epoch": 1.4798332318358551, + "grad_norm": 0.583303528240195, + "learning_rate": 8.357225098730784e-07, + "loss": 0.2735, + "step": 31590 + }, + { + "epoch": 1.4798800768257836, + "grad_norm": 0.5870981917391397, + "learning_rate": 8.355809958868583e-07, + "loss": 0.2536, + "step": 31591 + }, + { + "epoch": 1.4799269218157118, + "grad_norm": 0.5859790152450477, + "learning_rate": 8.354394914789501e-07, + "loss": 0.2669, + "step": 31592 + }, + { + "epoch": 1.47997376680564, + "grad_norm": 0.5875870698654255, + "learning_rate": 8.352979966501693e-07, + "loss": 0.275, + "step": 31593 + }, + { + "epoch": 1.4800206117955685, + "grad_norm": 0.602382976587003, + "learning_rate": 8.351565114013302e-07, + "loss": 0.2641, + "step": 31594 + }, + { + "epoch": 1.4800674567854968, + "grad_norm": 0.6111683444000218, + "learning_rate": 8.35015035733247e-07, + "loss": 0.2848, + "step": 31595 + }, + { + "epoch": 1.480114301775425, + "grad_norm": 0.5821947064521787, + "learning_rate": 8.348735696467336e-07, + "loss": 0.2761, + "step": 31596 + }, + { + "epoch": 1.4801611467653535, + "grad_norm": 0.563854203195069, + "learning_rate": 8.347321131426054e-07, + "loss": 0.2677, + "step": 31597 + }, + { + "epoch": 1.4802079917552817, + "grad_norm": 0.6990326338899411, + "learning_rate": 8.345906662216749e-07, + "loss": 0.2886, + "step": 31598 + }, + { + "epoch": 1.48025483674521, + "grad_norm": 0.5571170209218219, + "learning_rate": 8.34449228884756e-07, + "loss": 0.2573, + "step": 31599 + }, + { + "epoch": 1.4803016817351384, + "grad_norm": 0.6259276847619493, + "learning_rate": 8.343078011326633e-07, + "loss": 0.2772, + "step": 31600 + }, + { + "epoch": 1.4803485267250667, + "grad_norm": 0.6070035347916507, + "learning_rate": 8.341663829662103e-07, + "loss": 0.2886, + "step": 31601 + }, + { + "epoch": 1.4803953717149951, + "grad_norm": 0.5578887295495241, + "learning_rate": 8.340249743862111e-07, + "loss": 0.2571, + "step": 31602 + }, + { + "epoch": 1.4804422167049234, + "grad_norm": 0.6219397757664227, + "learning_rate": 8.338835753934804e-07, + "loss": 0.2924, + "step": 31603 + }, + { + "epoch": 1.4804890616948518, + "grad_norm": 0.5850125413848336, + "learning_rate": 8.337421859888301e-07, + "loss": 0.2565, + "step": 31604 + }, + { + "epoch": 1.48053590668478, + "grad_norm": 0.6146365727657469, + "learning_rate": 8.336008061730755e-07, + "loss": 0.2953, + "step": 31605 + }, + { + "epoch": 1.4805827516747083, + "grad_norm": 0.6149754340505844, + "learning_rate": 8.334594359470285e-07, + "loss": 0.2842, + "step": 31606 + }, + { + "epoch": 1.4806295966646368, + "grad_norm": 0.5910933436321358, + "learning_rate": 8.333180753115036e-07, + "loss": 0.2591, + "step": 31607 + }, + { + "epoch": 1.480676441654565, + "grad_norm": 0.6124713715383666, + "learning_rate": 8.331767242673145e-07, + "loss": 0.2587, + "step": 31608 + }, + { + "epoch": 1.4807232866444933, + "grad_norm": 0.5840602992551225, + "learning_rate": 8.330353828152752e-07, + "loss": 0.2691, + "step": 31609 + }, + { + "epoch": 1.4807701316344217, + "grad_norm": 0.6475977159302544, + "learning_rate": 8.328940509561972e-07, + "loss": 0.2735, + "step": 31610 + }, + { + "epoch": 1.48081697662435, + "grad_norm": 0.625904994813213, + "learning_rate": 8.327527286908952e-07, + "loss": 0.2864, + "step": 31611 + }, + { + "epoch": 1.4808638216142784, + "grad_norm": 0.5570348742995429, + "learning_rate": 8.326114160201829e-07, + "loss": 0.2655, + "step": 31612 + }, + { + "epoch": 1.4809106666042067, + "grad_norm": 0.5631215035999256, + "learning_rate": 8.32470112944872e-07, + "loss": 0.262, + "step": 31613 + }, + { + "epoch": 1.4809575115941351, + "grad_norm": 0.5747112318757157, + "learning_rate": 8.323288194657764e-07, + "loss": 0.2635, + "step": 31614 + }, + { + "epoch": 1.4810043565840634, + "grad_norm": 0.5978355362573358, + "learning_rate": 8.321875355837103e-07, + "loss": 0.2799, + "step": 31615 + }, + { + "epoch": 1.4810512015739916, + "grad_norm": 0.6054397395965531, + "learning_rate": 8.320462612994848e-07, + "loss": 0.2844, + "step": 31616 + }, + { + "epoch": 1.48109804656392, + "grad_norm": 0.6038939860761962, + "learning_rate": 8.319049966139136e-07, + "loss": 0.2725, + "step": 31617 + }, + { + "epoch": 1.4811448915538483, + "grad_norm": 0.6221991047549433, + "learning_rate": 8.317637415278105e-07, + "loss": 0.284, + "step": 31618 + }, + { + "epoch": 1.4811917365437766, + "grad_norm": 0.5792268635659045, + "learning_rate": 8.31622496041988e-07, + "loss": 0.2734, + "step": 31619 + }, + { + "epoch": 1.481238581533705, + "grad_norm": 0.6355698652908267, + "learning_rate": 8.314812601572583e-07, + "loss": 0.2703, + "step": 31620 + }, + { + "epoch": 1.4812854265236333, + "grad_norm": 0.6229307283131913, + "learning_rate": 8.313400338744351e-07, + "loss": 0.2828, + "step": 31621 + }, + { + "epoch": 1.4813322715135615, + "grad_norm": 0.5931700326349449, + "learning_rate": 8.3119881719433e-07, + "loss": 0.2894, + "step": 31622 + }, + { + "epoch": 1.48137911650349, + "grad_norm": 0.6216190918437559, + "learning_rate": 8.310576101177562e-07, + "loss": 0.2727, + "step": 31623 + }, + { + "epoch": 1.4814259614934182, + "grad_norm": 0.5958745635623681, + "learning_rate": 8.309164126455263e-07, + "loss": 0.263, + "step": 31624 + }, + { + "epoch": 1.4814728064833467, + "grad_norm": 0.5959422620222204, + "learning_rate": 8.30775224778454e-07, + "loss": 0.2789, + "step": 31625 + }, + { + "epoch": 1.481519651473275, + "grad_norm": 0.6111521850720921, + "learning_rate": 8.306340465173496e-07, + "loss": 0.2826, + "step": 31626 + }, + { + "epoch": 1.4815664964632034, + "grad_norm": 0.587239209763846, + "learning_rate": 8.304928778630275e-07, + "loss": 0.2676, + "step": 31627 + }, + { + "epoch": 1.4816133414531316, + "grad_norm": 0.5935304349703661, + "learning_rate": 8.303517188162988e-07, + "loss": 0.2874, + "step": 31628 + }, + { + "epoch": 1.4816601864430599, + "grad_norm": 0.58812967274777, + "learning_rate": 8.302105693779761e-07, + "loss": 0.2664, + "step": 31629 + }, + { + "epoch": 1.4817070314329883, + "grad_norm": 0.570280368528144, + "learning_rate": 8.300694295488718e-07, + "loss": 0.2635, + "step": 31630 + }, + { + "epoch": 1.4817538764229166, + "grad_norm": 0.6024483877621923, + "learning_rate": 8.299282993297983e-07, + "loss": 0.272, + "step": 31631 + }, + { + "epoch": 1.4818007214128448, + "grad_norm": 0.6211709035413684, + "learning_rate": 8.297871787215686e-07, + "loss": 0.2747, + "step": 31632 + }, + { + "epoch": 1.4818475664027733, + "grad_norm": 0.5986900607868816, + "learning_rate": 8.296460677249937e-07, + "loss": 0.2688, + "step": 31633 + }, + { + "epoch": 1.4818944113927015, + "grad_norm": 0.6139118429159708, + "learning_rate": 8.295049663408852e-07, + "loss": 0.2852, + "step": 31634 + }, + { + "epoch": 1.4819412563826297, + "grad_norm": 0.5880208037118931, + "learning_rate": 8.293638745700555e-07, + "loss": 0.2706, + "step": 31635 + }, + { + "epoch": 1.4819881013725582, + "grad_norm": 0.6603943308064846, + "learning_rate": 8.29222792413317e-07, + "loss": 0.2912, + "step": 31636 + }, + { + "epoch": 1.4820349463624864, + "grad_norm": 0.6004742942624328, + "learning_rate": 8.290817198714815e-07, + "loss": 0.2765, + "step": 31637 + }, + { + "epoch": 1.482081791352415, + "grad_norm": 0.6071681842979981, + "learning_rate": 8.289406569453607e-07, + "loss": 0.2767, + "step": 31638 + }, + { + "epoch": 1.4821286363423432, + "grad_norm": 0.5569652310912153, + "learning_rate": 8.287996036357671e-07, + "loss": 0.2565, + "step": 31639 + }, + { + "epoch": 1.4821754813322716, + "grad_norm": 0.6163669168629585, + "learning_rate": 8.286585599435118e-07, + "loss": 0.2793, + "step": 31640 + }, + { + "epoch": 1.4822223263221999, + "grad_norm": 0.6565021379022673, + "learning_rate": 8.285175258694056e-07, + "loss": 0.3022, + "step": 31641 + }, + { + "epoch": 1.482269171312128, + "grad_norm": 0.6234048634979308, + "learning_rate": 8.283765014142608e-07, + "loss": 0.2673, + "step": 31642 + }, + { + "epoch": 1.4823160163020566, + "grad_norm": 0.5980566788501228, + "learning_rate": 8.282354865788892e-07, + "loss": 0.272, + "step": 31643 + }, + { + "epoch": 1.4823628612919848, + "grad_norm": 0.6204327730975628, + "learning_rate": 8.28094481364102e-07, + "loss": 0.2645, + "step": 31644 + }, + { + "epoch": 1.482409706281913, + "grad_norm": 0.6004436573098167, + "learning_rate": 8.279534857707111e-07, + "loss": 0.2751, + "step": 31645 + }, + { + "epoch": 1.4824565512718415, + "grad_norm": 0.591665507567655, + "learning_rate": 8.278124997995284e-07, + "loss": 0.2412, + "step": 31646 + }, + { + "epoch": 1.4825033962617697, + "grad_norm": 0.6043270529396775, + "learning_rate": 8.276715234513647e-07, + "loss": 0.2778, + "step": 31647 + }, + { + "epoch": 1.4825502412516982, + "grad_norm": 0.6125741816731725, + "learning_rate": 8.275305567270298e-07, + "loss": 0.2748, + "step": 31648 + }, + { + "epoch": 1.4825970862416264, + "grad_norm": 0.587060513819759, + "learning_rate": 8.273895996273365e-07, + "loss": 0.2824, + "step": 31649 + }, + { + "epoch": 1.482643931231555, + "grad_norm": 0.588411885301978, + "learning_rate": 8.272486521530954e-07, + "loss": 0.2682, + "step": 31650 + }, + { + "epoch": 1.4826907762214832, + "grad_norm": 0.6277070256932994, + "learning_rate": 8.271077143051181e-07, + "loss": 0.2701, + "step": 31651 + }, + { + "epoch": 1.4827376212114114, + "grad_norm": 0.63511591973133, + "learning_rate": 8.269667860842162e-07, + "loss": 0.2897, + "step": 31652 + }, + { + "epoch": 1.4827844662013399, + "grad_norm": 0.5943621238076334, + "learning_rate": 8.268258674911992e-07, + "loss": 0.2605, + "step": 31653 + }, + { + "epoch": 1.482831311191268, + "grad_norm": 0.5698762812581848, + "learning_rate": 8.266849585268794e-07, + "loss": 0.2576, + "step": 31654 + }, + { + "epoch": 1.4828781561811963, + "grad_norm": 0.5813982548491896, + "learning_rate": 8.265440591920665e-07, + "loss": 0.2665, + "step": 31655 + }, + { + "epoch": 1.4829250011711248, + "grad_norm": 0.6138491189799088, + "learning_rate": 8.264031694875719e-07, + "loss": 0.2769, + "step": 31656 + }, + { + "epoch": 1.482971846161053, + "grad_norm": 0.5910967045960018, + "learning_rate": 8.262622894142061e-07, + "loss": 0.2808, + "step": 31657 + }, + { + "epoch": 1.4830186911509813, + "grad_norm": 0.588277692211348, + "learning_rate": 8.261214189727812e-07, + "loss": 0.2626, + "step": 31658 + }, + { + "epoch": 1.4830655361409097, + "grad_norm": 0.598417739351534, + "learning_rate": 8.259805581641062e-07, + "loss": 0.264, + "step": 31659 + }, + { + "epoch": 1.483112381130838, + "grad_norm": 0.5587267195723042, + "learning_rate": 8.25839706988992e-07, + "loss": 0.251, + "step": 31660 + }, + { + "epoch": 1.4831592261207664, + "grad_norm": 0.6113649970056463, + "learning_rate": 8.256988654482506e-07, + "loss": 0.2702, + "step": 31661 + }, + { + "epoch": 1.4832060711106947, + "grad_norm": 0.6018877143012596, + "learning_rate": 8.255580335426905e-07, + "loss": 0.264, + "step": 31662 + }, + { + "epoch": 1.4832529161006232, + "grad_norm": 0.6369562560730498, + "learning_rate": 8.25417211273123e-07, + "loss": 0.2633, + "step": 31663 + }, + { + "epoch": 1.4832997610905514, + "grad_norm": 0.6324479219199491, + "learning_rate": 8.252763986403592e-07, + "loss": 0.2742, + "step": 31664 + }, + { + "epoch": 1.4833466060804796, + "grad_norm": 0.6503377909002144, + "learning_rate": 8.251355956452084e-07, + "loss": 0.2867, + "step": 31665 + }, + { + "epoch": 1.483393451070408, + "grad_norm": 0.5603063884378797, + "learning_rate": 8.24994802288481e-07, + "loss": 0.2725, + "step": 31666 + }, + { + "epoch": 1.4834402960603363, + "grad_norm": 0.5587148476079508, + "learning_rate": 8.248540185709883e-07, + "loss": 0.2651, + "step": 31667 + }, + { + "epoch": 1.4834871410502646, + "grad_norm": 0.5914565444540921, + "learning_rate": 8.247132444935391e-07, + "loss": 0.2692, + "step": 31668 + }, + { + "epoch": 1.483533986040193, + "grad_norm": 0.6333283334020211, + "learning_rate": 8.245724800569438e-07, + "loss": 0.2838, + "step": 31669 + }, + { + "epoch": 1.4835808310301213, + "grad_norm": 0.5630530889394352, + "learning_rate": 8.244317252620137e-07, + "loss": 0.2644, + "step": 31670 + }, + { + "epoch": 1.4836276760200495, + "grad_norm": 0.5635221137258427, + "learning_rate": 8.242909801095572e-07, + "loss": 0.2565, + "step": 31671 + }, + { + "epoch": 1.483674521009978, + "grad_norm": 0.5565644331930641, + "learning_rate": 8.241502446003849e-07, + "loss": 0.272, + "step": 31672 + }, + { + "epoch": 1.4837213659999062, + "grad_norm": 0.6077479350794787, + "learning_rate": 8.240095187353067e-07, + "loss": 0.2741, + "step": 31673 + }, + { + "epoch": 1.4837682109898347, + "grad_norm": 0.569394243606017, + "learning_rate": 8.238688025151334e-07, + "loss": 0.2564, + "step": 31674 + }, + { + "epoch": 1.483815055979763, + "grad_norm": 0.5909239507631764, + "learning_rate": 8.23728095940673e-07, + "loss": 0.2664, + "step": 31675 + }, + { + "epoch": 1.4838619009696914, + "grad_norm": 0.5787514382161054, + "learning_rate": 8.235873990127369e-07, + "loss": 0.2769, + "step": 31676 + }, + { + "epoch": 1.4839087459596196, + "grad_norm": 0.6208455741090926, + "learning_rate": 8.234467117321329e-07, + "loss": 0.2675, + "step": 31677 + }, + { + "epoch": 1.4839555909495479, + "grad_norm": 0.6235662702299712, + "learning_rate": 8.23306034099672e-07, + "loss": 0.2874, + "step": 31678 + }, + { + "epoch": 1.4840024359394763, + "grad_norm": 0.5437579683033281, + "learning_rate": 8.231653661161634e-07, + "loss": 0.2634, + "step": 31679 + }, + { + "epoch": 1.4840492809294046, + "grad_norm": 0.6275360949746239, + "learning_rate": 8.230247077824166e-07, + "loss": 0.2897, + "step": 31680 + }, + { + "epoch": 1.4840961259193328, + "grad_norm": 0.5484027086380079, + "learning_rate": 8.228840590992417e-07, + "loss": 0.2616, + "step": 31681 + }, + { + "epoch": 1.4841429709092613, + "grad_norm": 0.6137241879801212, + "learning_rate": 8.227434200674481e-07, + "loss": 0.2853, + "step": 31682 + }, + { + "epoch": 1.4841898158991895, + "grad_norm": 0.5705556004358723, + "learning_rate": 8.226027906878434e-07, + "loss": 0.2664, + "step": 31683 + }, + { + "epoch": 1.484236660889118, + "grad_norm": 0.5668615530945337, + "learning_rate": 8.224621709612379e-07, + "loss": 0.2603, + "step": 31684 + }, + { + "epoch": 1.4842835058790462, + "grad_norm": 0.6621607517803662, + "learning_rate": 8.22321560888441e-07, + "loss": 0.2856, + "step": 31685 + }, + { + "epoch": 1.4843303508689747, + "grad_norm": 0.584437741200186, + "learning_rate": 8.221809604702621e-07, + "loss": 0.2548, + "step": 31686 + }, + { + "epoch": 1.484377195858903, + "grad_norm": 0.58323991837516, + "learning_rate": 8.220403697075099e-07, + "loss": 0.259, + "step": 31687 + }, + { + "epoch": 1.4844240408488312, + "grad_norm": 0.5793758792186933, + "learning_rate": 8.218997886009944e-07, + "loss": 0.2705, + "step": 31688 + }, + { + "epoch": 1.4844708858387596, + "grad_norm": 0.6381468816148348, + "learning_rate": 8.21759217151524e-07, + "loss": 0.2938, + "step": 31689 + }, + { + "epoch": 1.4845177308286879, + "grad_norm": 0.5920148932385799, + "learning_rate": 8.216186553599064e-07, + "loss": 0.266, + "step": 31690 + }, + { + "epoch": 1.4845645758186161, + "grad_norm": 0.5968555787116141, + "learning_rate": 8.21478103226952e-07, + "loss": 0.2644, + "step": 31691 + }, + { + "epoch": 1.4846114208085446, + "grad_norm": 0.5853956433194355, + "learning_rate": 8.21337560753469e-07, + "loss": 0.2647, + "step": 31692 + }, + { + "epoch": 1.4846582657984728, + "grad_norm": 0.6229679559785379, + "learning_rate": 8.211970279402665e-07, + "loss": 0.2753, + "step": 31693 + }, + { + "epoch": 1.484705110788401, + "grad_norm": 0.6001540686011914, + "learning_rate": 8.210565047881538e-07, + "loss": 0.2778, + "step": 31694 + }, + { + "epoch": 1.4847519557783295, + "grad_norm": 0.6132803023223014, + "learning_rate": 8.209159912979384e-07, + "loss": 0.2885, + "step": 31695 + }, + { + "epoch": 1.4847988007682578, + "grad_norm": 0.580648191237294, + "learning_rate": 8.207754874704299e-07, + "loss": 0.264, + "step": 31696 + }, + { + "epoch": 1.4848456457581862, + "grad_norm": 0.6028730307066195, + "learning_rate": 8.206349933064359e-07, + "loss": 0.2634, + "step": 31697 + }, + { + "epoch": 1.4848924907481145, + "grad_norm": 0.6421338868819015, + "learning_rate": 8.204945088067653e-07, + "loss": 0.3027, + "step": 31698 + }, + { + "epoch": 1.484939335738043, + "grad_norm": 0.6429060760440946, + "learning_rate": 8.203540339722266e-07, + "loss": 0.2868, + "step": 31699 + }, + { + "epoch": 1.4849861807279712, + "grad_norm": 0.5752405577275771, + "learning_rate": 8.202135688036292e-07, + "loss": 0.2591, + "step": 31700 + }, + { + "epoch": 1.4850330257178994, + "grad_norm": 0.575012177575682, + "learning_rate": 8.200731133017797e-07, + "loss": 0.2712, + "step": 31701 + }, + { + "epoch": 1.4850798707078279, + "grad_norm": 0.6067096440643469, + "learning_rate": 8.199326674674871e-07, + "loss": 0.2798, + "step": 31702 + }, + { + "epoch": 1.4851267156977561, + "grad_norm": 0.5712169933827086, + "learning_rate": 8.197922313015607e-07, + "loss": 0.2689, + "step": 31703 + }, + { + "epoch": 1.4851735606876844, + "grad_norm": 0.6360949707973016, + "learning_rate": 8.196518048048066e-07, + "loss": 0.2911, + "step": 31704 + }, + { + "epoch": 1.4852204056776128, + "grad_norm": 0.6336518192418337, + "learning_rate": 8.195113879780345e-07, + "loss": 0.2758, + "step": 31705 + }, + { + "epoch": 1.485267250667541, + "grad_norm": 0.582023604706903, + "learning_rate": 8.193709808220524e-07, + "loss": 0.2807, + "step": 31706 + }, + { + "epoch": 1.4853140956574693, + "grad_norm": 0.608053769553214, + "learning_rate": 8.19230583337667e-07, + "loss": 0.282, + "step": 31707 + }, + { + "epoch": 1.4853609406473978, + "grad_norm": 0.5637843633685803, + "learning_rate": 8.190901955256874e-07, + "loss": 0.263, + "step": 31708 + }, + { + "epoch": 1.485407785637326, + "grad_norm": 0.5532031974203202, + "learning_rate": 8.189498173869215e-07, + "loss": 0.2655, + "step": 31709 + }, + { + "epoch": 1.4854546306272545, + "grad_norm": 0.5905549059662852, + "learning_rate": 8.188094489221773e-07, + "loss": 0.2581, + "step": 31710 + }, + { + "epoch": 1.4855014756171827, + "grad_norm": 0.575895715001422, + "learning_rate": 8.186690901322613e-07, + "loss": 0.2641, + "step": 31711 + }, + { + "epoch": 1.4855483206071112, + "grad_norm": 0.6031768052997328, + "learning_rate": 8.185287410179823e-07, + "loss": 0.2601, + "step": 31712 + }, + { + "epoch": 1.4855951655970394, + "grad_norm": 0.6207194117377417, + "learning_rate": 8.183884015801486e-07, + "loss": 0.2873, + "step": 31713 + }, + { + "epoch": 1.4856420105869677, + "grad_norm": 0.6016268710577499, + "learning_rate": 8.182480718195663e-07, + "loss": 0.2595, + "step": 31714 + }, + { + "epoch": 1.4856888555768961, + "grad_norm": 0.614874454918007, + "learning_rate": 8.181077517370434e-07, + "loss": 0.2777, + "step": 31715 + }, + { + "epoch": 1.4857357005668244, + "grad_norm": 0.6393271866632657, + "learning_rate": 8.179674413333886e-07, + "loss": 0.2831, + "step": 31716 + }, + { + "epoch": 1.4857825455567526, + "grad_norm": 0.5661523822243278, + "learning_rate": 8.178271406094074e-07, + "loss": 0.2597, + "step": 31717 + }, + { + "epoch": 1.485829390546681, + "grad_norm": 0.565670020980705, + "learning_rate": 8.176868495659082e-07, + "loss": 0.2691, + "step": 31718 + }, + { + "epoch": 1.4858762355366093, + "grad_norm": 0.6308089933373149, + "learning_rate": 8.175465682036995e-07, + "loss": 0.277, + "step": 31719 + }, + { + "epoch": 1.4859230805265378, + "grad_norm": 0.5881110159902526, + "learning_rate": 8.174062965235863e-07, + "loss": 0.2663, + "step": 31720 + }, + { + "epoch": 1.485969925516466, + "grad_norm": 0.5796084294228487, + "learning_rate": 8.172660345263772e-07, + "loss": 0.2621, + "step": 31721 + }, + { + "epoch": 1.4860167705063945, + "grad_norm": 0.60824675981038, + "learning_rate": 8.171257822128789e-07, + "loss": 0.2868, + "step": 31722 + }, + { + "epoch": 1.4860636154963227, + "grad_norm": 0.6044406686406733, + "learning_rate": 8.169855395838997e-07, + "loss": 0.2717, + "step": 31723 + }, + { + "epoch": 1.486110460486251, + "grad_norm": 0.570875750581077, + "learning_rate": 8.168453066402449e-07, + "loss": 0.2711, + "step": 31724 + }, + { + "epoch": 1.4861573054761794, + "grad_norm": 0.5572630652087998, + "learning_rate": 8.167050833827231e-07, + "loss": 0.2609, + "step": 31725 + }, + { + "epoch": 1.4862041504661077, + "grad_norm": 0.6153662884649476, + "learning_rate": 8.165648698121398e-07, + "loss": 0.2869, + "step": 31726 + }, + { + "epoch": 1.486250995456036, + "grad_norm": 0.5794719165203257, + "learning_rate": 8.164246659293023e-07, + "loss": 0.2706, + "step": 31727 + }, + { + "epoch": 1.4862978404459644, + "grad_norm": 0.6119159052753129, + "learning_rate": 8.162844717350179e-07, + "loss": 0.2743, + "step": 31728 + }, + { + "epoch": 1.4863446854358926, + "grad_norm": 0.5918095059879633, + "learning_rate": 8.161442872300932e-07, + "loss": 0.2747, + "step": 31729 + }, + { + "epoch": 1.4863915304258208, + "grad_norm": 0.5555389250209507, + "learning_rate": 8.160041124153353e-07, + "loss": 0.2545, + "step": 31730 + }, + { + "epoch": 1.4864383754157493, + "grad_norm": 0.6117394506045692, + "learning_rate": 8.158639472915508e-07, + "loss": 0.2615, + "step": 31731 + }, + { + "epoch": 1.4864852204056775, + "grad_norm": 0.5841674915985103, + "learning_rate": 8.157237918595454e-07, + "loss": 0.2637, + "step": 31732 + }, + { + "epoch": 1.486532065395606, + "grad_norm": 0.6509594109367698, + "learning_rate": 8.15583646120126e-07, + "loss": 0.2648, + "step": 31733 + }, + { + "epoch": 1.4865789103855342, + "grad_norm": 0.6137498811720851, + "learning_rate": 8.154435100740993e-07, + "loss": 0.2697, + "step": 31734 + }, + { + "epoch": 1.4866257553754627, + "grad_norm": 0.5627177150381567, + "learning_rate": 8.153033837222718e-07, + "loss": 0.2645, + "step": 31735 + }, + { + "epoch": 1.486672600365391, + "grad_norm": 0.5826984598770562, + "learning_rate": 8.151632670654499e-07, + "loss": 0.2591, + "step": 31736 + }, + { + "epoch": 1.4867194453553192, + "grad_norm": 0.555086552826246, + "learning_rate": 8.150231601044408e-07, + "loss": 0.2715, + "step": 31737 + }, + { + "epoch": 1.4867662903452477, + "grad_norm": 0.6138497584715511, + "learning_rate": 8.1488306284005e-07, + "loss": 0.2812, + "step": 31738 + }, + { + "epoch": 1.486813135335176, + "grad_norm": 0.5454686979913091, + "learning_rate": 8.147429752730828e-07, + "loss": 0.2616, + "step": 31739 + }, + { + "epoch": 1.4868599803251041, + "grad_norm": 0.5609005722914393, + "learning_rate": 8.146028974043462e-07, + "loss": 0.2654, + "step": 31740 + }, + { + "epoch": 1.4869068253150326, + "grad_norm": 0.5661820453582942, + "learning_rate": 8.144628292346463e-07, + "loss": 0.2786, + "step": 31741 + }, + { + "epoch": 1.4869536703049608, + "grad_norm": 0.6146388510049957, + "learning_rate": 8.143227707647891e-07, + "loss": 0.2778, + "step": 31742 + }, + { + "epoch": 1.487000515294889, + "grad_norm": 0.6120488205074638, + "learning_rate": 8.141827219955816e-07, + "loss": 0.2848, + "step": 31743 + }, + { + "epoch": 1.4870473602848175, + "grad_norm": 0.563621008383918, + "learning_rate": 8.140426829278278e-07, + "loss": 0.2611, + "step": 31744 + }, + { + "epoch": 1.4870942052747458, + "grad_norm": 0.5981850872791461, + "learning_rate": 8.139026535623357e-07, + "loss": 0.2861, + "step": 31745 + }, + { + "epoch": 1.4871410502646742, + "grad_norm": 0.5942650784634513, + "learning_rate": 8.137626338999091e-07, + "loss": 0.2618, + "step": 31746 + }, + { + "epoch": 1.4871878952546025, + "grad_norm": 0.6421522539211935, + "learning_rate": 8.136226239413548e-07, + "loss": 0.2833, + "step": 31747 + }, + { + "epoch": 1.487234740244531, + "grad_norm": 0.5948815051935751, + "learning_rate": 8.134826236874782e-07, + "loss": 0.272, + "step": 31748 + }, + { + "epoch": 1.4872815852344592, + "grad_norm": 0.6220788382707033, + "learning_rate": 8.133426331390862e-07, + "loss": 0.2733, + "step": 31749 + }, + { + "epoch": 1.4873284302243874, + "grad_norm": 0.6167557514444576, + "learning_rate": 8.132026522969827e-07, + "loss": 0.2773, + "step": 31750 + }, + { + "epoch": 1.487375275214316, + "grad_norm": 0.6302421441153157, + "learning_rate": 8.130626811619738e-07, + "loss": 0.2958, + "step": 31751 + }, + { + "epoch": 1.4874221202042441, + "grad_norm": 0.5853415535773333, + "learning_rate": 8.12922719734866e-07, + "loss": 0.2673, + "step": 31752 + }, + { + "epoch": 1.4874689651941724, + "grad_norm": 0.5783827651985989, + "learning_rate": 8.127827680164632e-07, + "loss": 0.2634, + "step": 31753 + }, + { + "epoch": 1.4875158101841008, + "grad_norm": 0.5991998892403975, + "learning_rate": 8.126428260075714e-07, + "loss": 0.2821, + "step": 31754 + }, + { + "epoch": 1.487562655174029, + "grad_norm": 0.583256369361063, + "learning_rate": 8.125028937089968e-07, + "loss": 0.2541, + "step": 31755 + }, + { + "epoch": 1.4876095001639575, + "grad_norm": 0.6562603633390286, + "learning_rate": 8.123629711215433e-07, + "loss": 0.2961, + "step": 31756 + }, + { + "epoch": 1.4876563451538858, + "grad_norm": 0.5961682386775001, + "learning_rate": 8.122230582460166e-07, + "loss": 0.2652, + "step": 31757 + }, + { + "epoch": 1.4877031901438142, + "grad_norm": 0.597461089343786, + "learning_rate": 8.12083155083222e-07, + "loss": 0.2775, + "step": 31758 + }, + { + "epoch": 1.4877500351337425, + "grad_norm": 0.6290866866716063, + "learning_rate": 8.119432616339656e-07, + "loss": 0.2849, + "step": 31759 + }, + { + "epoch": 1.4877968801236707, + "grad_norm": 0.5814885423822364, + "learning_rate": 8.118033778990506e-07, + "loss": 0.2658, + "step": 31760 + }, + { + "epoch": 1.4878437251135992, + "grad_norm": 0.5647897121524976, + "learning_rate": 8.116635038792834e-07, + "loss": 0.2601, + "step": 31761 + }, + { + "epoch": 1.4878905701035274, + "grad_norm": 0.552921609579716, + "learning_rate": 8.115236395754678e-07, + "loss": 0.2704, + "step": 31762 + }, + { + "epoch": 1.4879374150934557, + "grad_norm": 0.5988626958016016, + "learning_rate": 8.11383784988409e-07, + "loss": 0.2811, + "step": 31763 + }, + { + "epoch": 1.4879842600833841, + "grad_norm": 0.5903281081270766, + "learning_rate": 8.112439401189123e-07, + "loss": 0.2646, + "step": 31764 + }, + { + "epoch": 1.4880311050733124, + "grad_norm": 0.5776371289410702, + "learning_rate": 8.111041049677831e-07, + "loss": 0.2552, + "step": 31765 + }, + { + "epoch": 1.4880779500632406, + "grad_norm": 0.6203054439871, + "learning_rate": 8.109642795358244e-07, + "loss": 0.2819, + "step": 31766 + }, + { + "epoch": 1.488124795053169, + "grad_norm": 0.5748953936954239, + "learning_rate": 8.108244638238427e-07, + "loss": 0.2569, + "step": 31767 + }, + { + "epoch": 1.4881716400430973, + "grad_norm": 0.5783327030172972, + "learning_rate": 8.106846578326408e-07, + "loss": 0.2664, + "step": 31768 + }, + { + "epoch": 1.4882184850330258, + "grad_norm": 0.6040085249625651, + "learning_rate": 8.105448615630241e-07, + "loss": 0.2547, + "step": 31769 + }, + { + "epoch": 1.488265330022954, + "grad_norm": 0.6160750429664408, + "learning_rate": 8.104050750157968e-07, + "loss": 0.2799, + "step": 31770 + }, + { + "epoch": 1.4883121750128825, + "grad_norm": 0.6166674456252766, + "learning_rate": 8.102652981917641e-07, + "loss": 0.3, + "step": 31771 + }, + { + "epoch": 1.4883590200028107, + "grad_norm": 0.5939240539849372, + "learning_rate": 8.101255310917305e-07, + "loss": 0.2562, + "step": 31772 + }, + { + "epoch": 1.488405864992739, + "grad_norm": 0.6038321182312679, + "learning_rate": 8.099857737164996e-07, + "loss": 0.2808, + "step": 31773 + }, + { + "epoch": 1.4884527099826674, + "grad_norm": 0.5713677190467681, + "learning_rate": 8.098460260668753e-07, + "loss": 0.255, + "step": 31774 + }, + { + "epoch": 1.4884995549725957, + "grad_norm": 0.5963074031606223, + "learning_rate": 8.097062881436621e-07, + "loss": 0.2768, + "step": 31775 + }, + { + "epoch": 1.488546399962524, + "grad_norm": 0.5601053810937675, + "learning_rate": 8.095665599476644e-07, + "loss": 0.2453, + "step": 31776 + }, + { + "epoch": 1.4885932449524524, + "grad_norm": 0.5874705644012317, + "learning_rate": 8.094268414796863e-07, + "loss": 0.2689, + "step": 31777 + }, + { + "epoch": 1.4886400899423806, + "grad_norm": 0.5828928662735658, + "learning_rate": 8.092871327405316e-07, + "loss": 0.2597, + "step": 31778 + }, + { + "epoch": 1.4886869349323089, + "grad_norm": 0.5850193780848221, + "learning_rate": 8.091474337310057e-07, + "loss": 0.2771, + "step": 31779 + }, + { + "epoch": 1.4887337799222373, + "grad_norm": 0.6278171658764196, + "learning_rate": 8.090077444519112e-07, + "loss": 0.2736, + "step": 31780 + }, + { + "epoch": 1.4887806249121656, + "grad_norm": 0.620890907022542, + "learning_rate": 8.088680649040512e-07, + "loss": 0.2875, + "step": 31781 + }, + { + "epoch": 1.488827469902094, + "grad_norm": 0.6005660768823557, + "learning_rate": 8.087283950882305e-07, + "loss": 0.2664, + "step": 31782 + }, + { + "epoch": 1.4888743148920223, + "grad_norm": 0.6274780648602347, + "learning_rate": 8.085887350052526e-07, + "loss": 0.2872, + "step": 31783 + }, + { + "epoch": 1.4889211598819507, + "grad_norm": 0.5869294383648432, + "learning_rate": 8.084490846559218e-07, + "loss": 0.2749, + "step": 31784 + }, + { + "epoch": 1.488968004871879, + "grad_norm": 0.6126926580662276, + "learning_rate": 8.083094440410411e-07, + "loss": 0.2655, + "step": 31785 + }, + { + "epoch": 1.4890148498618072, + "grad_norm": 0.6220370844776573, + "learning_rate": 8.081698131614152e-07, + "loss": 0.2808, + "step": 31786 + }, + { + "epoch": 1.4890616948517357, + "grad_norm": 0.6265882271886687, + "learning_rate": 8.080301920178469e-07, + "loss": 0.2835, + "step": 31787 + }, + { + "epoch": 1.489108539841664, + "grad_norm": 0.6004333106569558, + "learning_rate": 8.078905806111387e-07, + "loss": 0.2667, + "step": 31788 + }, + { + "epoch": 1.4891553848315922, + "grad_norm": 0.5791802001549802, + "learning_rate": 8.077509789420948e-07, + "loss": 0.2548, + "step": 31789 + }, + { + "epoch": 1.4892022298215206, + "grad_norm": 0.5998959198504383, + "learning_rate": 8.07611387011519e-07, + "loss": 0.2822, + "step": 31790 + }, + { + "epoch": 1.4892490748114489, + "grad_norm": 0.5994078555616982, + "learning_rate": 8.074718048202141e-07, + "loss": 0.2814, + "step": 31791 + }, + { + "epoch": 1.4892959198013773, + "grad_norm": 0.6045005032970373, + "learning_rate": 8.073322323689844e-07, + "loss": 0.265, + "step": 31792 + }, + { + "epoch": 1.4893427647913056, + "grad_norm": 0.5808886106921495, + "learning_rate": 8.071926696586313e-07, + "loss": 0.2675, + "step": 31793 + }, + { + "epoch": 1.489389609781234, + "grad_norm": 0.5905699470050159, + "learning_rate": 8.0705311668996e-07, + "loss": 0.2786, + "step": 31794 + }, + { + "epoch": 1.4894364547711623, + "grad_norm": 0.5784401202651308, + "learning_rate": 8.069135734637715e-07, + "loss": 0.2665, + "step": 31795 + }, + { + "epoch": 1.4894832997610905, + "grad_norm": 0.6107034594529235, + "learning_rate": 8.067740399808699e-07, + "loss": 0.2806, + "step": 31796 + }, + { + "epoch": 1.489530144751019, + "grad_norm": 0.6053547495444, + "learning_rate": 8.066345162420583e-07, + "loss": 0.2637, + "step": 31797 + }, + { + "epoch": 1.4895769897409472, + "grad_norm": 0.6450447617752383, + "learning_rate": 8.064950022481399e-07, + "loss": 0.2808, + "step": 31798 + }, + { + "epoch": 1.4896238347308755, + "grad_norm": 0.569457305438104, + "learning_rate": 8.063554979999166e-07, + "loss": 0.2585, + "step": 31799 + }, + { + "epoch": 1.489670679720804, + "grad_norm": 0.5976398941074855, + "learning_rate": 8.062160034981917e-07, + "loss": 0.2678, + "step": 31800 + }, + { + "epoch": 1.4897175247107322, + "grad_norm": 0.5372080672200522, + "learning_rate": 8.060765187437686e-07, + "loss": 0.2506, + "step": 31801 + }, + { + "epoch": 1.4897643697006604, + "grad_norm": 0.5699541861698302, + "learning_rate": 8.059370437374486e-07, + "loss": 0.2639, + "step": 31802 + }, + { + "epoch": 1.4898112146905889, + "grad_norm": 0.5786760308268905, + "learning_rate": 8.057975784800354e-07, + "loss": 0.2797, + "step": 31803 + }, + { + "epoch": 1.489858059680517, + "grad_norm": 0.6112528319996561, + "learning_rate": 8.056581229723318e-07, + "loss": 0.2568, + "step": 31804 + }, + { + "epoch": 1.4899049046704456, + "grad_norm": 0.603592519139789, + "learning_rate": 8.055186772151391e-07, + "loss": 0.2688, + "step": 31805 + }, + { + "epoch": 1.4899517496603738, + "grad_norm": 0.5705068580322815, + "learning_rate": 8.053792412092607e-07, + "loss": 0.2785, + "step": 31806 + }, + { + "epoch": 1.4899985946503023, + "grad_norm": 0.6180673315476262, + "learning_rate": 8.052398149554996e-07, + "loss": 0.2754, + "step": 31807 + }, + { + "epoch": 1.4900454396402305, + "grad_norm": 0.6160333180202132, + "learning_rate": 8.051003984546566e-07, + "loss": 0.2895, + "step": 31808 + }, + { + "epoch": 1.4900922846301587, + "grad_norm": 0.62928100763714, + "learning_rate": 8.049609917075346e-07, + "loss": 0.2772, + "step": 31809 + }, + { + "epoch": 1.4901391296200872, + "grad_norm": 0.6247481972891294, + "learning_rate": 8.048215947149371e-07, + "loss": 0.2759, + "step": 31810 + }, + { + "epoch": 1.4901859746100155, + "grad_norm": 0.588655506857825, + "learning_rate": 8.046822074776645e-07, + "loss": 0.2595, + "step": 31811 + }, + { + "epoch": 1.4902328195999437, + "grad_norm": 0.6278293634513716, + "learning_rate": 8.045428299965194e-07, + "loss": 0.2747, + "step": 31812 + }, + { + "epoch": 1.4902796645898722, + "grad_norm": 0.5864738557256662, + "learning_rate": 8.044034622723043e-07, + "loss": 0.2786, + "step": 31813 + }, + { + "epoch": 1.4903265095798004, + "grad_norm": 0.5824740081737256, + "learning_rate": 8.042641043058219e-07, + "loss": 0.2919, + "step": 31814 + }, + { + "epoch": 1.4903733545697286, + "grad_norm": 0.6264308685774811, + "learning_rate": 8.041247560978726e-07, + "loss": 0.2888, + "step": 31815 + }, + { + "epoch": 1.490420199559657, + "grad_norm": 0.5957585788468929, + "learning_rate": 8.039854176492601e-07, + "loss": 0.2621, + "step": 31816 + }, + { + "epoch": 1.4904670445495853, + "grad_norm": 0.5936710454028126, + "learning_rate": 8.038460889607841e-07, + "loss": 0.2802, + "step": 31817 + }, + { + "epoch": 1.4905138895395138, + "grad_norm": 0.6336374839482624, + "learning_rate": 8.037067700332477e-07, + "loss": 0.2885, + "step": 31818 + }, + { + "epoch": 1.490560734529442, + "grad_norm": 0.6290557600751177, + "learning_rate": 8.035674608674523e-07, + "loss": 0.2782, + "step": 31819 + }, + { + "epoch": 1.4906075795193705, + "grad_norm": 0.5485411880979456, + "learning_rate": 8.034281614642001e-07, + "loss": 0.2617, + "step": 31820 + }, + { + "epoch": 1.4906544245092987, + "grad_norm": 0.5916141074032661, + "learning_rate": 8.032888718242932e-07, + "loss": 0.2619, + "step": 31821 + }, + { + "epoch": 1.490701269499227, + "grad_norm": 0.6146154388957146, + "learning_rate": 8.031495919485322e-07, + "loss": 0.277, + "step": 31822 + }, + { + "epoch": 1.4907481144891555, + "grad_norm": 0.5910985587103226, + "learning_rate": 8.030103218377181e-07, + "loss": 0.2781, + "step": 31823 + }, + { + "epoch": 1.4907949594790837, + "grad_norm": 0.6254282678360852, + "learning_rate": 8.02871061492653e-07, + "loss": 0.2835, + "step": 31824 + }, + { + "epoch": 1.490841804469012, + "grad_norm": 0.5801980265048786, + "learning_rate": 8.027318109141385e-07, + "loss": 0.265, + "step": 31825 + }, + { + "epoch": 1.4908886494589404, + "grad_norm": 0.585348166696875, + "learning_rate": 8.025925701029758e-07, + "loss": 0.2635, + "step": 31826 + }, + { + "epoch": 1.4909354944488686, + "grad_norm": 0.593391548183927, + "learning_rate": 8.024533390599662e-07, + "loss": 0.2755, + "step": 31827 + }, + { + "epoch": 1.490982339438797, + "grad_norm": 0.5973232843547881, + "learning_rate": 8.023141177859117e-07, + "loss": 0.2604, + "step": 31828 + }, + { + "epoch": 1.4910291844287253, + "grad_norm": 0.5667605285611779, + "learning_rate": 8.021749062816128e-07, + "loss": 0.2634, + "step": 31829 + }, + { + "epoch": 1.4910760294186538, + "grad_norm": 0.5894642503312172, + "learning_rate": 8.020357045478699e-07, + "loss": 0.2669, + "step": 31830 + }, + { + "epoch": 1.491122874408582, + "grad_norm": 0.6211021088505294, + "learning_rate": 8.018965125854847e-07, + "loss": 0.2837, + "step": 31831 + }, + { + "epoch": 1.4911697193985103, + "grad_norm": 0.5856034691485905, + "learning_rate": 8.017573303952581e-07, + "loss": 0.2691, + "step": 31832 + }, + { + "epoch": 1.4912165643884387, + "grad_norm": 0.5675933572276863, + "learning_rate": 8.016181579779913e-07, + "loss": 0.2584, + "step": 31833 + }, + { + "epoch": 1.491263409378367, + "grad_norm": 0.6052624485034275, + "learning_rate": 8.014789953344862e-07, + "loss": 0.2503, + "step": 31834 + }, + { + "epoch": 1.4913102543682952, + "grad_norm": 0.6535262469097758, + "learning_rate": 8.013398424655413e-07, + "loss": 0.2781, + "step": 31835 + }, + { + "epoch": 1.4913570993582237, + "grad_norm": 0.6138448870884837, + "learning_rate": 8.012006993719599e-07, + "loss": 0.2888, + "step": 31836 + }, + { + "epoch": 1.491403944348152, + "grad_norm": 0.5686662215881964, + "learning_rate": 8.010615660545404e-07, + "loss": 0.2658, + "step": 31837 + }, + { + "epoch": 1.4914507893380802, + "grad_norm": 0.6025306210611684, + "learning_rate": 8.00922442514085e-07, + "loss": 0.2789, + "step": 31838 + }, + { + "epoch": 1.4914976343280086, + "grad_norm": 0.6187222143450316, + "learning_rate": 8.007833287513933e-07, + "loss": 0.291, + "step": 31839 + }, + { + "epoch": 1.4915444793179369, + "grad_norm": 0.6457416194691186, + "learning_rate": 8.006442247672677e-07, + "loss": 0.2684, + "step": 31840 + }, + { + "epoch": 1.4915913243078653, + "grad_norm": 0.6161334689379999, + "learning_rate": 8.005051305625064e-07, + "loss": 0.2826, + "step": 31841 + }, + { + "epoch": 1.4916381692977936, + "grad_norm": 0.5949423459087212, + "learning_rate": 8.003660461379112e-07, + "loss": 0.2774, + "step": 31842 + }, + { + "epoch": 1.491685014287722, + "grad_norm": 0.5480938649654218, + "learning_rate": 8.002269714942829e-07, + "loss": 0.2514, + "step": 31843 + }, + { + "epoch": 1.4917318592776503, + "grad_norm": 0.5654315068183119, + "learning_rate": 8.000879066324202e-07, + "loss": 0.266, + "step": 31844 + }, + { + "epoch": 1.4917787042675785, + "grad_norm": 0.6421743371384615, + "learning_rate": 7.999488515531245e-07, + "loss": 0.2806, + "step": 31845 + }, + { + "epoch": 1.491825549257507, + "grad_norm": 0.6048612618150953, + "learning_rate": 7.998098062571965e-07, + "loss": 0.2739, + "step": 31846 + }, + { + "epoch": 1.4918723942474352, + "grad_norm": 0.5990381247606886, + "learning_rate": 7.996707707454352e-07, + "loss": 0.2892, + "step": 31847 + }, + { + "epoch": 1.4919192392373635, + "grad_norm": 0.5985826931205873, + "learning_rate": 7.99531745018641e-07, + "loss": 0.2955, + "step": 31848 + }, + { + "epoch": 1.491966084227292, + "grad_norm": 0.6222247476446985, + "learning_rate": 7.993927290776144e-07, + "loss": 0.2991, + "step": 31849 + }, + { + "epoch": 1.4920129292172202, + "grad_norm": 0.6176620311043395, + "learning_rate": 7.992537229231559e-07, + "loss": 0.2796, + "step": 31850 + }, + { + "epoch": 1.4920597742071484, + "grad_norm": 0.6002163979513662, + "learning_rate": 7.99114726556064e-07, + "loss": 0.2617, + "step": 31851 + }, + { + "epoch": 1.4921066191970769, + "grad_norm": 0.5830505748214467, + "learning_rate": 7.989757399771394e-07, + "loss": 0.2609, + "step": 31852 + }, + { + "epoch": 1.4921534641870051, + "grad_norm": 0.5622824775224478, + "learning_rate": 7.988367631871827e-07, + "loss": 0.271, + "step": 31853 + }, + { + "epoch": 1.4922003091769336, + "grad_norm": 0.6180988104125931, + "learning_rate": 7.986977961869919e-07, + "loss": 0.2857, + "step": 31854 + }, + { + "epoch": 1.4922471541668618, + "grad_norm": 0.6236385053097543, + "learning_rate": 7.985588389773676e-07, + "loss": 0.2886, + "step": 31855 + }, + { + "epoch": 1.4922939991567903, + "grad_norm": 0.5706590568820854, + "learning_rate": 7.984198915591107e-07, + "loss": 0.2686, + "step": 31856 + }, + { + "epoch": 1.4923408441467185, + "grad_norm": 0.6135183236280846, + "learning_rate": 7.982809539330185e-07, + "loss": 0.2846, + "step": 31857 + }, + { + "epoch": 1.4923876891366468, + "grad_norm": 0.6222381730105426, + "learning_rate": 7.981420260998921e-07, + "loss": 0.2905, + "step": 31858 + }, + { + "epoch": 1.4924345341265752, + "grad_norm": 0.5816834353276322, + "learning_rate": 7.980031080605311e-07, + "loss": 0.2461, + "step": 31859 + }, + { + "epoch": 1.4924813791165035, + "grad_norm": 0.5994885837644189, + "learning_rate": 7.978641998157336e-07, + "loss": 0.2659, + "step": 31860 + }, + { + "epoch": 1.4925282241064317, + "grad_norm": 0.6631222914435742, + "learning_rate": 7.977253013663e-07, + "loss": 0.2731, + "step": 31861 + }, + { + "epoch": 1.4925750690963602, + "grad_norm": 0.6309518834408202, + "learning_rate": 7.975864127130292e-07, + "loss": 0.2729, + "step": 31862 + }, + { + "epoch": 1.4926219140862884, + "grad_norm": 0.5833160800972335, + "learning_rate": 7.974475338567217e-07, + "loss": 0.2825, + "step": 31863 + }, + { + "epoch": 1.4926687590762169, + "grad_norm": 0.5584892856852777, + "learning_rate": 7.973086647981748e-07, + "loss": 0.2449, + "step": 31864 + }, + { + "epoch": 1.4927156040661451, + "grad_norm": 0.6027953956677367, + "learning_rate": 7.971698055381896e-07, + "loss": 0.2575, + "step": 31865 + }, + { + "epoch": 1.4927624490560736, + "grad_norm": 0.5712775589893979, + "learning_rate": 7.97030956077563e-07, + "loss": 0.263, + "step": 31866 + }, + { + "epoch": 1.4928092940460018, + "grad_norm": 0.6107804568698558, + "learning_rate": 7.968921164170956e-07, + "loss": 0.2752, + "step": 31867 + }, + { + "epoch": 1.49285613903593, + "grad_norm": 0.632170287380662, + "learning_rate": 7.967532865575858e-07, + "loss": 0.2752, + "step": 31868 + }, + { + "epoch": 1.4929029840258585, + "grad_norm": 0.5755780488835422, + "learning_rate": 7.966144664998326e-07, + "loss": 0.2749, + "step": 31869 + }, + { + "epoch": 1.4929498290157868, + "grad_norm": 0.6138860271308841, + "learning_rate": 7.964756562446363e-07, + "loss": 0.2683, + "step": 31870 + }, + { + "epoch": 1.492996674005715, + "grad_norm": 0.6382307490756454, + "learning_rate": 7.963368557927942e-07, + "loss": 0.2827, + "step": 31871 + }, + { + "epoch": 1.4930435189956435, + "grad_norm": 0.5963570760433102, + "learning_rate": 7.961980651451043e-07, + "loss": 0.2793, + "step": 31872 + }, + { + "epoch": 1.4930903639855717, + "grad_norm": 0.53732759240852, + "learning_rate": 7.960592843023665e-07, + "loss": 0.2646, + "step": 31873 + }, + { + "epoch": 1.4931372089755, + "grad_norm": 0.6326892635315735, + "learning_rate": 7.959205132653791e-07, + "loss": 0.2655, + "step": 31874 + }, + { + "epoch": 1.4931840539654284, + "grad_norm": 0.5910503736868028, + "learning_rate": 7.957817520349409e-07, + "loss": 0.2838, + "step": 31875 + }, + { + "epoch": 1.4932308989553567, + "grad_norm": 0.6008267471834053, + "learning_rate": 7.956430006118504e-07, + "loss": 0.2816, + "step": 31876 + }, + { + "epoch": 1.4932777439452851, + "grad_norm": 0.6212152273090533, + "learning_rate": 7.95504258996907e-07, + "loss": 0.2792, + "step": 31877 + }, + { + "epoch": 1.4933245889352134, + "grad_norm": 0.5946993249276913, + "learning_rate": 7.95365527190908e-07, + "loss": 0.2657, + "step": 31878 + }, + { + "epoch": 1.4933714339251418, + "grad_norm": 0.5607217203781211, + "learning_rate": 7.95226805194651e-07, + "loss": 0.2612, + "step": 31879 + }, + { + "epoch": 1.49341827891507, + "grad_norm": 0.5755480113010597, + "learning_rate": 7.950880930089355e-07, + "loss": 0.2961, + "step": 31880 + }, + { + "epoch": 1.4934651239049983, + "grad_norm": 0.6147305259985546, + "learning_rate": 7.949493906345595e-07, + "loss": 0.2825, + "step": 31881 + }, + { + "epoch": 1.4935119688949268, + "grad_norm": 0.6377639030898093, + "learning_rate": 7.948106980723211e-07, + "loss": 0.2884, + "step": 31882 + }, + { + "epoch": 1.493558813884855, + "grad_norm": 0.5459227336399128, + "learning_rate": 7.946720153230194e-07, + "loss": 0.2589, + "step": 31883 + }, + { + "epoch": 1.4936056588747832, + "grad_norm": 0.6617512901109093, + "learning_rate": 7.94533342387451e-07, + "loss": 0.296, + "step": 31884 + }, + { + "epoch": 1.4936525038647117, + "grad_norm": 0.603752883765919, + "learning_rate": 7.943946792664153e-07, + "loss": 0.272, + "step": 31885 + }, + { + "epoch": 1.49369934885464, + "grad_norm": 0.6073859486044498, + "learning_rate": 7.942560259607085e-07, + "loss": 0.2595, + "step": 31886 + }, + { + "epoch": 1.4937461938445682, + "grad_norm": 0.5911284833589739, + "learning_rate": 7.941173824711299e-07, + "loss": 0.2816, + "step": 31887 + }, + { + "epoch": 1.4937930388344967, + "grad_norm": 0.6048624544410192, + "learning_rate": 7.939787487984765e-07, + "loss": 0.286, + "step": 31888 + }, + { + "epoch": 1.493839883824425, + "grad_norm": 0.658747260475096, + "learning_rate": 7.938401249435479e-07, + "loss": 0.2929, + "step": 31889 + }, + { + "epoch": 1.4938867288143534, + "grad_norm": 0.580786606470136, + "learning_rate": 7.937015109071395e-07, + "loss": 0.262, + "step": 31890 + }, + { + "epoch": 1.4939335738042816, + "grad_norm": 0.6194149979531822, + "learning_rate": 7.935629066900502e-07, + "loss": 0.2739, + "step": 31891 + }, + { + "epoch": 1.49398041879421, + "grad_norm": 0.5610039061965846, + "learning_rate": 7.934243122930782e-07, + "loss": 0.2689, + "step": 31892 + }, + { + "epoch": 1.4940272637841383, + "grad_norm": 0.6372128515600085, + "learning_rate": 7.932857277170194e-07, + "loss": 0.2698, + "step": 31893 + }, + { + "epoch": 1.4940741087740665, + "grad_norm": 0.5823856385303874, + "learning_rate": 7.931471529626724e-07, + "loss": 0.2557, + "step": 31894 + }, + { + "epoch": 1.494120953763995, + "grad_norm": 0.6082732170606527, + "learning_rate": 7.930085880308355e-07, + "loss": 0.2599, + "step": 31895 + }, + { + "epoch": 1.4941677987539232, + "grad_norm": 0.6155556344147578, + "learning_rate": 7.928700329223041e-07, + "loss": 0.2954, + "step": 31896 + }, + { + "epoch": 1.4942146437438515, + "grad_norm": 0.5953659300189765, + "learning_rate": 7.927314876378766e-07, + "loss": 0.2776, + "step": 31897 + }, + { + "epoch": 1.49426148873378, + "grad_norm": 0.6074016650754375, + "learning_rate": 7.925929521783504e-07, + "loss": 0.267, + "step": 31898 + }, + { + "epoch": 1.4943083337237082, + "grad_norm": 0.6257495652030844, + "learning_rate": 7.924544265445233e-07, + "loss": 0.2859, + "step": 31899 + }, + { + "epoch": 1.4943551787136367, + "grad_norm": 0.5974008365605433, + "learning_rate": 7.923159107371911e-07, + "loss": 0.2745, + "step": 31900 + }, + { + "epoch": 1.494402023703565, + "grad_norm": 0.5964067755751289, + "learning_rate": 7.921774047571524e-07, + "loss": 0.2866, + "step": 31901 + }, + { + "epoch": 1.4944488686934934, + "grad_norm": 0.6352632330489685, + "learning_rate": 7.920389086052027e-07, + "loss": 0.2695, + "step": 31902 + }, + { + "epoch": 1.4944957136834216, + "grad_norm": 0.5994383655761649, + "learning_rate": 7.919004222821397e-07, + "loss": 0.2784, + "step": 31903 + }, + { + "epoch": 1.4945425586733498, + "grad_norm": 0.5923118550404414, + "learning_rate": 7.917619457887607e-07, + "loss": 0.2724, + "step": 31904 + }, + { + "epoch": 1.4945894036632783, + "grad_norm": 0.6296551650989952, + "learning_rate": 7.916234791258629e-07, + "loss": 0.2786, + "step": 31905 + }, + { + "epoch": 1.4946362486532065, + "grad_norm": 0.5636570822015288, + "learning_rate": 7.914850222942419e-07, + "loss": 0.266, + "step": 31906 + }, + { + "epoch": 1.4946830936431348, + "grad_norm": 0.6125315165267149, + "learning_rate": 7.913465752946958e-07, + "loss": 0.2971, + "step": 31907 + }, + { + "epoch": 1.4947299386330632, + "grad_norm": 0.7628924077659262, + "learning_rate": 7.912081381280202e-07, + "loss": 0.2683, + "step": 31908 + }, + { + "epoch": 1.4947767836229915, + "grad_norm": 0.5602595287581161, + "learning_rate": 7.910697107950122e-07, + "loss": 0.2569, + "step": 31909 + }, + { + "epoch": 1.4948236286129197, + "grad_norm": 0.5780992074064094, + "learning_rate": 7.909312932964682e-07, + "loss": 0.2685, + "step": 31910 + }, + { + "epoch": 1.4948704736028482, + "grad_norm": 0.6043357768900983, + "learning_rate": 7.907928856331853e-07, + "loss": 0.2778, + "step": 31911 + }, + { + "epoch": 1.4949173185927764, + "grad_norm": 0.6397327536602258, + "learning_rate": 7.906544878059607e-07, + "loss": 0.2779, + "step": 31912 + }, + { + "epoch": 1.494964163582705, + "grad_norm": 0.6194403764582398, + "learning_rate": 7.905160998155898e-07, + "loss": 0.2944, + "step": 31913 + }, + { + "epoch": 1.4950110085726331, + "grad_norm": 0.651125195846037, + "learning_rate": 7.903777216628683e-07, + "loss": 0.2972, + "step": 31914 + }, + { + "epoch": 1.4950578535625616, + "grad_norm": 0.5926455627159419, + "learning_rate": 7.90239353348593e-07, + "loss": 0.258, + "step": 31915 + }, + { + "epoch": 1.4951046985524898, + "grad_norm": 0.6310235894625102, + "learning_rate": 7.901009948735608e-07, + "loss": 0.2712, + "step": 31916 + }, + { + "epoch": 1.495151543542418, + "grad_norm": 0.5743297608147481, + "learning_rate": 7.899626462385677e-07, + "loss": 0.2716, + "step": 31917 + }, + { + "epoch": 1.4951983885323465, + "grad_norm": 0.6074830414068382, + "learning_rate": 7.898243074444095e-07, + "loss": 0.2634, + "step": 31918 + }, + { + "epoch": 1.4952452335222748, + "grad_norm": 0.5733059337979673, + "learning_rate": 7.896859784918834e-07, + "loss": 0.2733, + "step": 31919 + }, + { + "epoch": 1.495292078512203, + "grad_norm": 0.5933043613497596, + "learning_rate": 7.895476593817846e-07, + "loss": 0.2737, + "step": 31920 + }, + { + "epoch": 1.4953389235021315, + "grad_norm": 0.5830160807427756, + "learning_rate": 7.894093501149086e-07, + "loss": 0.2713, + "step": 31921 + }, + { + "epoch": 1.4953857684920597, + "grad_norm": 0.617363628987705, + "learning_rate": 7.892710506920515e-07, + "loss": 0.2767, + "step": 31922 + }, + { + "epoch": 1.495432613481988, + "grad_norm": 0.6059387437322762, + "learning_rate": 7.891327611140098e-07, + "loss": 0.2741, + "step": 31923 + }, + { + "epoch": 1.4954794584719164, + "grad_norm": 0.5883998360671775, + "learning_rate": 7.889944813815789e-07, + "loss": 0.2698, + "step": 31924 + }, + { + "epoch": 1.4955263034618447, + "grad_norm": 0.5994718246439865, + "learning_rate": 7.888562114955548e-07, + "loss": 0.2688, + "step": 31925 + }, + { + "epoch": 1.4955731484517731, + "grad_norm": 0.5312740289928031, + "learning_rate": 7.887179514567337e-07, + "loss": 0.2554, + "step": 31926 + }, + { + "epoch": 1.4956199934417014, + "grad_norm": 0.6402248931745592, + "learning_rate": 7.885797012659108e-07, + "loss": 0.2911, + "step": 31927 + }, + { + "epoch": 1.4956668384316298, + "grad_norm": 0.6095204791121573, + "learning_rate": 7.884414609238808e-07, + "loss": 0.2636, + "step": 31928 + }, + { + "epoch": 1.495713683421558, + "grad_norm": 0.5841433795141, + "learning_rate": 7.8830323043144e-07, + "loss": 0.2772, + "step": 31929 + }, + { + "epoch": 1.4957605284114863, + "grad_norm": 0.6157077830435254, + "learning_rate": 7.881650097893839e-07, + "loss": 0.2735, + "step": 31930 + }, + { + "epoch": 1.4958073734014148, + "grad_norm": 0.5799254921517647, + "learning_rate": 7.88026798998508e-07, + "loss": 0.2683, + "step": 31931 + }, + { + "epoch": 1.495854218391343, + "grad_norm": 0.5891152556580579, + "learning_rate": 7.878885980596085e-07, + "loss": 0.2616, + "step": 31932 + }, + { + "epoch": 1.4959010633812713, + "grad_norm": 0.626068776243336, + "learning_rate": 7.877504069734788e-07, + "loss": 0.2894, + "step": 31933 + }, + { + "epoch": 1.4959479083711997, + "grad_norm": 0.6018812505549195, + "learning_rate": 7.876122257409158e-07, + "loss": 0.2694, + "step": 31934 + }, + { + "epoch": 1.495994753361128, + "grad_norm": 0.5953190968322087, + "learning_rate": 7.874740543627135e-07, + "loss": 0.2674, + "step": 31935 + }, + { + "epoch": 1.4960415983510564, + "grad_norm": 0.6061385103159949, + "learning_rate": 7.873358928396674e-07, + "loss": 0.28, + "step": 31936 + }, + { + "epoch": 1.4960884433409847, + "grad_norm": 0.5948009750451927, + "learning_rate": 7.871977411725728e-07, + "loss": 0.2704, + "step": 31937 + }, + { + "epoch": 1.4961352883309131, + "grad_norm": 0.6345299944396511, + "learning_rate": 7.870595993622257e-07, + "loss": 0.2784, + "step": 31938 + }, + { + "epoch": 1.4961821333208414, + "grad_norm": 0.5755904501987288, + "learning_rate": 7.869214674094192e-07, + "loss": 0.2782, + "step": 31939 + }, + { + "epoch": 1.4962289783107696, + "grad_norm": 0.6305503586910807, + "learning_rate": 7.867833453149489e-07, + "loss": 0.288, + "step": 31940 + }, + { + "epoch": 1.496275823300698, + "grad_norm": 0.5614193247226685, + "learning_rate": 7.866452330796109e-07, + "loss": 0.2538, + "step": 31941 + }, + { + "epoch": 1.4963226682906263, + "grad_norm": 0.6110485885683232, + "learning_rate": 7.86507130704198e-07, + "loss": 0.2956, + "step": 31942 + }, + { + "epoch": 1.4963695132805546, + "grad_norm": 0.5793379066472432, + "learning_rate": 7.863690381895059e-07, + "loss": 0.2801, + "step": 31943 + }, + { + "epoch": 1.496416358270483, + "grad_norm": 0.569741521432525, + "learning_rate": 7.862309555363301e-07, + "loss": 0.2536, + "step": 31944 + }, + { + "epoch": 1.4964632032604113, + "grad_norm": 0.575733284929519, + "learning_rate": 7.860928827454636e-07, + "loss": 0.2752, + "step": 31945 + }, + { + "epoch": 1.4965100482503395, + "grad_norm": 0.5659749944648187, + "learning_rate": 7.859548198177017e-07, + "loss": 0.2644, + "step": 31946 + }, + { + "epoch": 1.496556893240268, + "grad_norm": 0.6190317824957159, + "learning_rate": 7.858167667538399e-07, + "loss": 0.2642, + "step": 31947 + }, + { + "epoch": 1.4966037382301962, + "grad_norm": 0.6025206267515157, + "learning_rate": 7.856787235546709e-07, + "loss": 0.2614, + "step": 31948 + }, + { + "epoch": 1.4966505832201247, + "grad_norm": 0.588256888452419, + "learning_rate": 7.855406902209903e-07, + "loss": 0.2719, + "step": 31949 + }, + { + "epoch": 1.496697428210053, + "grad_norm": 0.5973654001009181, + "learning_rate": 7.854026667535925e-07, + "loss": 0.2657, + "step": 31950 + }, + { + "epoch": 1.4967442731999814, + "grad_norm": 0.6194769976115151, + "learning_rate": 7.852646531532706e-07, + "loss": 0.2792, + "step": 31951 + }, + { + "epoch": 1.4967911181899096, + "grad_norm": 0.6130899593726999, + "learning_rate": 7.851266494208198e-07, + "loss": 0.28, + "step": 31952 + }, + { + "epoch": 1.4968379631798379, + "grad_norm": 0.6051761650476207, + "learning_rate": 7.849886555570343e-07, + "loss": 0.2721, + "step": 31953 + }, + { + "epoch": 1.4968848081697663, + "grad_norm": 0.5314491753350781, + "learning_rate": 7.848506715627085e-07, + "loss": 0.2586, + "step": 31954 + }, + { + "epoch": 1.4969316531596946, + "grad_norm": 0.60636191376492, + "learning_rate": 7.847126974386355e-07, + "loss": 0.2672, + "step": 31955 + }, + { + "epoch": 1.4969784981496228, + "grad_norm": 0.5564611507378445, + "learning_rate": 7.845747331856104e-07, + "loss": 0.2639, + "step": 31956 + }, + { + "epoch": 1.4970253431395513, + "grad_norm": 0.5818710497901503, + "learning_rate": 7.844367788044258e-07, + "loss": 0.2639, + "step": 31957 + }, + { + "epoch": 1.4970721881294795, + "grad_norm": 0.6154063928111259, + "learning_rate": 7.842988342958763e-07, + "loss": 0.2789, + "step": 31958 + }, + { + "epoch": 1.4971190331194077, + "grad_norm": 0.6248016382675222, + "learning_rate": 7.841608996607558e-07, + "loss": 0.2737, + "step": 31959 + }, + { + "epoch": 1.4971658781093362, + "grad_norm": 0.6189892346154007, + "learning_rate": 7.840229748998582e-07, + "loss": 0.2698, + "step": 31960 + }, + { + "epoch": 1.4972127230992645, + "grad_norm": 0.5824803978625163, + "learning_rate": 7.838850600139778e-07, + "loss": 0.2574, + "step": 31961 + }, + { + "epoch": 1.497259568089193, + "grad_norm": 0.5617098246904761, + "learning_rate": 7.837471550039074e-07, + "loss": 0.2672, + "step": 31962 + }, + { + "epoch": 1.4973064130791212, + "grad_norm": 0.6001510314499658, + "learning_rate": 7.836092598704401e-07, + "loss": 0.2777, + "step": 31963 + }, + { + "epoch": 1.4973532580690496, + "grad_norm": 0.5841754771875568, + "learning_rate": 7.834713746143702e-07, + "loss": 0.2817, + "step": 31964 + }, + { + "epoch": 1.4974001030589779, + "grad_norm": 0.5661604680794357, + "learning_rate": 7.83333499236491e-07, + "loss": 0.268, + "step": 31965 + }, + { + "epoch": 1.497446948048906, + "grad_norm": 0.6167179888567621, + "learning_rate": 7.83195633737596e-07, + "loss": 0.2681, + "step": 31966 + }, + { + "epoch": 1.4974937930388346, + "grad_norm": 0.6427591531073836, + "learning_rate": 7.830577781184786e-07, + "loss": 0.283, + "step": 31967 + }, + { + "epoch": 1.4975406380287628, + "grad_norm": 0.5921657226443474, + "learning_rate": 7.829199323799331e-07, + "loss": 0.2642, + "step": 31968 + }, + { + "epoch": 1.497587483018691, + "grad_norm": 0.5988977149088431, + "learning_rate": 7.827820965227517e-07, + "loss": 0.2547, + "step": 31969 + }, + { + "epoch": 1.4976343280086195, + "grad_norm": 0.5820172422439234, + "learning_rate": 7.826442705477269e-07, + "loss": 0.2584, + "step": 31970 + }, + { + "epoch": 1.4976811729985477, + "grad_norm": 0.6387325878543268, + "learning_rate": 7.825064544556529e-07, + "loss": 0.2752, + "step": 31971 + }, + { + "epoch": 1.4977280179884762, + "grad_norm": 0.596982659619069, + "learning_rate": 7.823686482473222e-07, + "loss": 0.2598, + "step": 31972 + }, + { + "epoch": 1.4977748629784045, + "grad_norm": 0.5681897623939125, + "learning_rate": 7.822308519235286e-07, + "loss": 0.2701, + "step": 31973 + }, + { + "epoch": 1.497821707968333, + "grad_norm": 0.6054393809678745, + "learning_rate": 7.820930654850653e-07, + "loss": 0.2695, + "step": 31974 + }, + { + "epoch": 1.4978685529582612, + "grad_norm": 0.6578746371860031, + "learning_rate": 7.819552889327239e-07, + "loss": 0.2747, + "step": 31975 + }, + { + "epoch": 1.4979153979481894, + "grad_norm": 0.589331490616593, + "learning_rate": 7.818175222672988e-07, + "loss": 0.2837, + "step": 31976 + }, + { + "epoch": 1.4979622429381179, + "grad_norm": 0.5452992083029973, + "learning_rate": 7.816797654895811e-07, + "loss": 0.2556, + "step": 31977 + }, + { + "epoch": 1.498009087928046, + "grad_norm": 0.5942438493659595, + "learning_rate": 7.815420186003647e-07, + "loss": 0.265, + "step": 31978 + }, + { + "epoch": 1.4980559329179743, + "grad_norm": 0.6273359893411664, + "learning_rate": 7.814042816004419e-07, + "loss": 0.2773, + "step": 31979 + }, + { + "epoch": 1.4981027779079028, + "grad_norm": 0.5722921283575128, + "learning_rate": 7.812665544906062e-07, + "loss": 0.2713, + "step": 31980 + }, + { + "epoch": 1.498149622897831, + "grad_norm": 0.6039153858142221, + "learning_rate": 7.811288372716489e-07, + "loss": 0.2623, + "step": 31981 + }, + { + "epoch": 1.4981964678877593, + "grad_norm": 0.6209794754908696, + "learning_rate": 7.809911299443629e-07, + "loss": 0.29, + "step": 31982 + }, + { + "epoch": 1.4982433128776877, + "grad_norm": 0.5740377525545696, + "learning_rate": 7.808534325095418e-07, + "loss": 0.2625, + "step": 31983 + }, + { + "epoch": 1.498290157867616, + "grad_norm": 0.6341192122181738, + "learning_rate": 7.807157449679764e-07, + "loss": 0.2896, + "step": 31984 + }, + { + "epoch": 1.4983370028575445, + "grad_norm": 0.6521638900239353, + "learning_rate": 7.805780673204596e-07, + "loss": 0.2704, + "step": 31985 + }, + { + "epoch": 1.4983838478474727, + "grad_norm": 0.6030420793209014, + "learning_rate": 7.804403995677843e-07, + "loss": 0.2792, + "step": 31986 + }, + { + "epoch": 1.4984306928374012, + "grad_norm": 0.5902648426932721, + "learning_rate": 7.803027417107425e-07, + "loss": 0.2729, + "step": 31987 + }, + { + "epoch": 1.4984775378273294, + "grad_norm": 0.6043241188926269, + "learning_rate": 7.801650937501257e-07, + "loss": 0.2777, + "step": 31988 + }, + { + "epoch": 1.4985243828172576, + "grad_norm": 0.604430536464273, + "learning_rate": 7.800274556867266e-07, + "loss": 0.273, + "step": 31989 + }, + { + "epoch": 1.498571227807186, + "grad_norm": 0.5812476840577103, + "learning_rate": 7.798898275213376e-07, + "loss": 0.2518, + "step": 31990 + }, + { + "epoch": 1.4986180727971143, + "grad_norm": 0.5428197911441714, + "learning_rate": 7.797522092547499e-07, + "loss": 0.2654, + "step": 31991 + }, + { + "epoch": 1.4986649177870426, + "grad_norm": 0.6261108833016575, + "learning_rate": 7.796146008877559e-07, + "loss": 0.2697, + "step": 31992 + }, + { + "epoch": 1.498711762776971, + "grad_norm": 0.5908524145112365, + "learning_rate": 7.794770024211479e-07, + "loss": 0.2733, + "step": 31993 + }, + { + "epoch": 1.4987586077668993, + "grad_norm": 0.6215710445533711, + "learning_rate": 7.793394138557168e-07, + "loss": 0.2613, + "step": 31994 + }, + { + "epoch": 1.4988054527568275, + "grad_norm": 0.5612056172065315, + "learning_rate": 7.792018351922547e-07, + "loss": 0.2637, + "step": 31995 + }, + { + "epoch": 1.498852297746756, + "grad_norm": 0.5869332517472213, + "learning_rate": 7.790642664315546e-07, + "loss": 0.266, + "step": 31996 + }, + { + "epoch": 1.4988991427366842, + "grad_norm": 0.585305378261924, + "learning_rate": 7.789267075744061e-07, + "loss": 0.2653, + "step": 31997 + }, + { + "epoch": 1.4989459877266127, + "grad_norm": 0.6047099796646057, + "learning_rate": 7.78789158621602e-07, + "loss": 0.2657, + "step": 31998 + }, + { + "epoch": 1.498992832716541, + "grad_norm": 0.5584456940406808, + "learning_rate": 7.786516195739344e-07, + "loss": 0.2615, + "step": 31999 + }, + { + "epoch": 1.4990396777064694, + "grad_norm": 0.5674402348838523, + "learning_rate": 7.785140904321931e-07, + "loss": 0.2597, + "step": 32000 + }, + { + "epoch": 1.4990865226963976, + "grad_norm": 0.6238678224062715, + "learning_rate": 7.783765711971708e-07, + "loss": 0.2905, + "step": 32001 + }, + { + "epoch": 1.4991333676863259, + "grad_norm": 0.606436094074214, + "learning_rate": 7.782390618696584e-07, + "loss": 0.2672, + "step": 32002 + }, + { + "epoch": 1.4991802126762543, + "grad_norm": 0.7013711878690981, + "learning_rate": 7.781015624504485e-07, + "loss": 0.309, + "step": 32003 + }, + { + "epoch": 1.4992270576661826, + "grad_norm": 0.6053378311745937, + "learning_rate": 7.779640729403304e-07, + "loss": 0.2851, + "step": 32004 + }, + { + "epoch": 1.4992739026561108, + "grad_norm": 0.6247947476633164, + "learning_rate": 7.77826593340097e-07, + "loss": 0.2825, + "step": 32005 + }, + { + "epoch": 1.4993207476460393, + "grad_norm": 0.5812758166426761, + "learning_rate": 7.776891236505379e-07, + "loss": 0.2808, + "step": 32006 + }, + { + "epoch": 1.4993675926359675, + "grad_norm": 0.5766948894196953, + "learning_rate": 7.775516638724451e-07, + "loss": 0.2549, + "step": 32007 + }, + { + "epoch": 1.499414437625896, + "grad_norm": 0.6003121660236098, + "learning_rate": 7.774142140066094e-07, + "loss": 0.2607, + "step": 32008 + }, + { + "epoch": 1.4994612826158242, + "grad_norm": 0.6467286982223275, + "learning_rate": 7.77276774053822e-07, + "loss": 0.2803, + "step": 32009 + }, + { + "epoch": 1.4995081276057527, + "grad_norm": 0.6412592162411664, + "learning_rate": 7.771393440148745e-07, + "loss": 0.29, + "step": 32010 + }, + { + "epoch": 1.499554972595681, + "grad_norm": 0.5987731811423378, + "learning_rate": 7.770019238905571e-07, + "loss": 0.2723, + "step": 32011 + }, + { + "epoch": 1.4996018175856092, + "grad_norm": 0.5808525378371154, + "learning_rate": 7.7686451368166e-07, + "loss": 0.2659, + "step": 32012 + }, + { + "epoch": 1.4996486625755376, + "grad_norm": 0.5596467738855919, + "learning_rate": 7.767271133889743e-07, + "loss": 0.2627, + "step": 32013 + }, + { + "epoch": 1.4996955075654659, + "grad_norm": 0.6142350849517965, + "learning_rate": 7.76589723013291e-07, + "loss": 0.2929, + "step": 32014 + }, + { + "epoch": 1.4997423525553941, + "grad_norm": 0.5686781961735756, + "learning_rate": 7.764523425554008e-07, + "loss": 0.2551, + "step": 32015 + }, + { + "epoch": 1.4997891975453226, + "grad_norm": 0.5824034189384361, + "learning_rate": 7.763149720160939e-07, + "loss": 0.2576, + "step": 32016 + }, + { + "epoch": 1.4998360425352508, + "grad_norm": 0.6120373749238809, + "learning_rate": 7.761776113961619e-07, + "loss": 0.2679, + "step": 32017 + }, + { + "epoch": 1.499882887525179, + "grad_norm": 0.6068108314273856, + "learning_rate": 7.760402606963949e-07, + "loss": 0.2762, + "step": 32018 + }, + { + "epoch": 1.4999297325151075, + "grad_norm": 0.617028834246129, + "learning_rate": 7.759029199175816e-07, + "loss": 0.2769, + "step": 32019 + }, + { + "epoch": 1.4999765775050358, + "grad_norm": 0.5850715716631604, + "learning_rate": 7.75765589060514e-07, + "loss": 0.2878, + "step": 32020 + }, + { + "epoch": 1.500023422494964, + "grad_norm": 0.6321080687820548, + "learning_rate": 7.756282681259819e-07, + "loss": 0.2701, + "step": 32021 + }, + { + "epoch": 1.5000702674848925, + "grad_norm": 0.6244995093456438, + "learning_rate": 7.754909571147757e-07, + "loss": 0.2781, + "step": 32022 + }, + { + "epoch": 1.500117112474821, + "grad_norm": 0.6215849473415992, + "learning_rate": 7.753536560276864e-07, + "loss": 0.2628, + "step": 32023 + }, + { + "epoch": 1.5001639574647492, + "grad_norm": 0.6462064734622165, + "learning_rate": 7.752163648655028e-07, + "loss": 0.2701, + "step": 32024 + }, + { + "epoch": 1.5002108024546774, + "grad_norm": 0.5762546105813376, + "learning_rate": 7.750790836290159e-07, + "loss": 0.2659, + "step": 32025 + }, + { + "epoch": 1.5002576474446059, + "grad_norm": 0.6089638222799642, + "learning_rate": 7.74941812319015e-07, + "loss": 0.2628, + "step": 32026 + }, + { + "epoch": 1.5003044924345341, + "grad_norm": 0.6147632861581586, + "learning_rate": 7.748045509362901e-07, + "loss": 0.2849, + "step": 32027 + }, + { + "epoch": 1.5003513374244624, + "grad_norm": 0.5542842591186986, + "learning_rate": 7.746672994816312e-07, + "loss": 0.2795, + "step": 32028 + }, + { + "epoch": 1.5003981824143908, + "grad_norm": 0.6526050705064094, + "learning_rate": 7.745300579558296e-07, + "loss": 0.2856, + "step": 32029 + }, + { + "epoch": 1.500445027404319, + "grad_norm": 0.6392155851787171, + "learning_rate": 7.743928263596728e-07, + "loss": 0.2763, + "step": 32030 + }, + { + "epoch": 1.5004918723942473, + "grad_norm": 0.6758720245289181, + "learning_rate": 7.742556046939515e-07, + "loss": 0.2866, + "step": 32031 + }, + { + "epoch": 1.5005387173841758, + "grad_norm": 0.5886584230999502, + "learning_rate": 7.741183929594565e-07, + "loss": 0.2699, + "step": 32032 + }, + { + "epoch": 1.5005855623741042, + "grad_norm": 0.6075095671363223, + "learning_rate": 7.739811911569753e-07, + "loss": 0.2687, + "step": 32033 + }, + { + "epoch": 1.5006324073640323, + "grad_norm": 0.5968660713631635, + "learning_rate": 7.738439992872984e-07, + "loss": 0.2711, + "step": 32034 + }, + { + "epoch": 1.5006792523539607, + "grad_norm": 0.6029558911562656, + "learning_rate": 7.737068173512167e-07, + "loss": 0.2747, + "step": 32035 + }, + { + "epoch": 1.5007260973438892, + "grad_norm": 0.596826911200086, + "learning_rate": 7.735696453495172e-07, + "loss": 0.2737, + "step": 32036 + }, + { + "epoch": 1.5007729423338174, + "grad_norm": 0.5937780304612392, + "learning_rate": 7.734324832829906e-07, + "loss": 0.2651, + "step": 32037 + }, + { + "epoch": 1.5008197873237457, + "grad_norm": 0.5994692804482543, + "learning_rate": 7.732953311524268e-07, + "loss": 0.2656, + "step": 32038 + }, + { + "epoch": 1.5008666323136741, + "grad_norm": 0.5539560752300364, + "learning_rate": 7.731581889586137e-07, + "loss": 0.2706, + "step": 32039 + }, + { + "epoch": 1.5009134773036024, + "grad_norm": 0.6036799654553499, + "learning_rate": 7.730210567023414e-07, + "loss": 0.2767, + "step": 32040 + }, + { + "epoch": 1.5009603222935306, + "grad_norm": 0.6105170640046357, + "learning_rate": 7.728839343843994e-07, + "loss": 0.2827, + "step": 32041 + }, + { + "epoch": 1.501007167283459, + "grad_norm": 0.5808902433073971, + "learning_rate": 7.727468220055753e-07, + "loss": 0.2541, + "step": 32042 + }, + { + "epoch": 1.5010540122733873, + "grad_norm": 0.6081806218914233, + "learning_rate": 7.726097195666593e-07, + "loss": 0.2763, + "step": 32043 + }, + { + "epoch": 1.5011008572633155, + "grad_norm": 0.5529327865591037, + "learning_rate": 7.724726270684402e-07, + "loss": 0.2436, + "step": 32044 + }, + { + "epoch": 1.501147702253244, + "grad_norm": 0.6149060024065109, + "learning_rate": 7.723355445117078e-07, + "loss": 0.2754, + "step": 32045 + }, + { + "epoch": 1.5011945472431725, + "grad_norm": 0.5715474895085233, + "learning_rate": 7.721984718972491e-07, + "loss": 0.2649, + "step": 32046 + }, + { + "epoch": 1.5012413922331007, + "grad_norm": 0.6384427235154816, + "learning_rate": 7.720614092258552e-07, + "loss": 0.2966, + "step": 32047 + }, + { + "epoch": 1.501288237223029, + "grad_norm": 0.6927796341271057, + "learning_rate": 7.719243564983123e-07, + "loss": 0.2773, + "step": 32048 + }, + { + "epoch": 1.5013350822129574, + "grad_norm": 0.5740573085003172, + "learning_rate": 7.717873137154106e-07, + "loss": 0.2665, + "step": 32049 + }, + { + "epoch": 1.5013819272028857, + "grad_norm": 0.554760738794991, + "learning_rate": 7.716502808779385e-07, + "loss": 0.2704, + "step": 32050 + }, + { + "epoch": 1.501428772192814, + "grad_norm": 0.6199530391937669, + "learning_rate": 7.715132579866846e-07, + "loss": 0.282, + "step": 32051 + }, + { + "epoch": 1.5014756171827424, + "grad_norm": 0.5817292198348658, + "learning_rate": 7.713762450424383e-07, + "loss": 0.2728, + "step": 32052 + }, + { + "epoch": 1.5015224621726706, + "grad_norm": 0.615837394347924, + "learning_rate": 7.712392420459863e-07, + "loss": 0.2706, + "step": 32053 + }, + { + "epoch": 1.5015693071625988, + "grad_norm": 0.5783586157458397, + "learning_rate": 7.711022489981191e-07, + "loss": 0.2637, + "step": 32054 + }, + { + "epoch": 1.5016161521525273, + "grad_norm": 0.6312659199462984, + "learning_rate": 7.709652658996228e-07, + "loss": 0.2708, + "step": 32055 + }, + { + "epoch": 1.5016629971424558, + "grad_norm": 0.5930133596660426, + "learning_rate": 7.708282927512869e-07, + "loss": 0.2687, + "step": 32056 + }, + { + "epoch": 1.5017098421323838, + "grad_norm": 0.5783609353700355, + "learning_rate": 7.706913295538995e-07, + "loss": 0.2577, + "step": 32057 + }, + { + "epoch": 1.5017566871223123, + "grad_norm": 0.5684170539308069, + "learning_rate": 7.70554376308249e-07, + "loss": 0.258, + "step": 32058 + }, + { + "epoch": 1.5018035321122407, + "grad_norm": 0.6385957167141426, + "learning_rate": 7.704174330151243e-07, + "loss": 0.2829, + "step": 32059 + }, + { + "epoch": 1.501850377102169, + "grad_norm": 0.5942952333496537, + "learning_rate": 7.702804996753122e-07, + "loss": 0.2651, + "step": 32060 + }, + { + "epoch": 1.5018972220920972, + "grad_norm": 0.5891599281869185, + "learning_rate": 7.701435762896006e-07, + "loss": 0.2731, + "step": 32061 + }, + { + "epoch": 1.5019440670820257, + "grad_norm": 0.5882733911705014, + "learning_rate": 7.700066628587777e-07, + "loss": 0.2915, + "step": 32062 + }, + { + "epoch": 1.501990912071954, + "grad_norm": 0.5972270526294257, + "learning_rate": 7.698697593836315e-07, + "loss": 0.2569, + "step": 32063 + }, + { + "epoch": 1.5020377570618821, + "grad_norm": 0.6476705794310911, + "learning_rate": 7.697328658649502e-07, + "loss": 0.2666, + "step": 32064 + }, + { + "epoch": 1.5020846020518106, + "grad_norm": 0.6987250477613514, + "learning_rate": 7.695959823035215e-07, + "loss": 0.2438, + "step": 32065 + }, + { + "epoch": 1.5021314470417388, + "grad_norm": 0.5825385179170628, + "learning_rate": 7.694591087001338e-07, + "loss": 0.2718, + "step": 32066 + }, + { + "epoch": 1.502178292031667, + "grad_norm": 0.5896250287834258, + "learning_rate": 7.693222450555737e-07, + "loss": 0.2619, + "step": 32067 + }, + { + "epoch": 1.5022251370215955, + "grad_norm": 0.6847893418300806, + "learning_rate": 7.691853913706285e-07, + "loss": 0.2892, + "step": 32068 + }, + { + "epoch": 1.502271982011524, + "grad_norm": 0.5763259319220676, + "learning_rate": 7.690485476460862e-07, + "loss": 0.2624, + "step": 32069 + }, + { + "epoch": 1.502318827001452, + "grad_norm": 0.6056762521325627, + "learning_rate": 7.689117138827345e-07, + "loss": 0.2757, + "step": 32070 + }, + { + "epoch": 1.5023656719913805, + "grad_norm": 0.5862472266249237, + "learning_rate": 7.68774890081361e-07, + "loss": 0.2747, + "step": 32071 + }, + { + "epoch": 1.502412516981309, + "grad_norm": 0.5997071902425799, + "learning_rate": 7.686380762427537e-07, + "loss": 0.2675, + "step": 32072 + }, + { + "epoch": 1.5024593619712372, + "grad_norm": 0.6020543004983107, + "learning_rate": 7.685012723676979e-07, + "loss": 0.2758, + "step": 32073 + }, + { + "epoch": 1.5025062069611654, + "grad_norm": 0.6835358870266663, + "learning_rate": 7.683644784569833e-07, + "loss": 0.299, + "step": 32074 + }, + { + "epoch": 1.502553051951094, + "grad_norm": 0.6124710076542844, + "learning_rate": 7.682276945113948e-07, + "loss": 0.2739, + "step": 32075 + }, + { + "epoch": 1.5025998969410221, + "grad_norm": 0.6350728200016567, + "learning_rate": 7.680909205317208e-07, + "loss": 0.2728, + "step": 32076 + }, + { + "epoch": 1.5026467419309504, + "grad_norm": 0.5882400251217563, + "learning_rate": 7.679541565187482e-07, + "loss": 0.2635, + "step": 32077 + }, + { + "epoch": 1.5026935869208788, + "grad_norm": 0.5457987831973343, + "learning_rate": 7.678174024732648e-07, + "loss": 0.2522, + "step": 32078 + }, + { + "epoch": 1.502740431910807, + "grad_norm": 0.6099787624902527, + "learning_rate": 7.676806583960564e-07, + "loss": 0.2809, + "step": 32079 + }, + { + "epoch": 1.5027872769007353, + "grad_norm": 0.6288806992320805, + "learning_rate": 7.675439242879101e-07, + "loss": 0.2799, + "step": 32080 + }, + { + "epoch": 1.5028341218906638, + "grad_norm": 0.5939243631773383, + "learning_rate": 7.674072001496139e-07, + "loss": 0.2708, + "step": 32081 + }, + { + "epoch": 1.5028809668805923, + "grad_norm": 0.5841841848551093, + "learning_rate": 7.672704859819529e-07, + "loss": 0.2744, + "step": 32082 + }, + { + "epoch": 1.5029278118705205, + "grad_norm": 0.6180273463748295, + "learning_rate": 7.671337817857152e-07, + "loss": 0.2816, + "step": 32083 + }, + { + "epoch": 1.5029746568604487, + "grad_norm": 0.6133014148751019, + "learning_rate": 7.669970875616875e-07, + "loss": 0.2765, + "step": 32084 + }, + { + "epoch": 1.5030215018503772, + "grad_norm": 0.5738117739914771, + "learning_rate": 7.668604033106552e-07, + "loss": 0.2635, + "step": 32085 + }, + { + "epoch": 1.5030683468403054, + "grad_norm": 0.6239953476465822, + "learning_rate": 7.667237290334056e-07, + "loss": 0.2785, + "step": 32086 + }, + { + "epoch": 1.5031151918302337, + "grad_norm": 0.5429861913215867, + "learning_rate": 7.665870647307264e-07, + "loss": 0.2548, + "step": 32087 + }, + { + "epoch": 1.5031620368201621, + "grad_norm": 0.627671820631575, + "learning_rate": 7.664504104034021e-07, + "loss": 0.2729, + "step": 32088 + }, + { + "epoch": 1.5032088818100904, + "grad_norm": 0.6140630878391743, + "learning_rate": 7.663137660522196e-07, + "loss": 0.2855, + "step": 32089 + }, + { + "epoch": 1.5032557268000186, + "grad_norm": 0.6029277889638701, + "learning_rate": 7.661771316779668e-07, + "loss": 0.2707, + "step": 32090 + }, + { + "epoch": 1.503302571789947, + "grad_norm": 0.5619431870501089, + "learning_rate": 7.660405072814278e-07, + "loss": 0.2623, + "step": 32091 + }, + { + "epoch": 1.5033494167798755, + "grad_norm": 0.6161232188257515, + "learning_rate": 7.6590389286339e-07, + "loss": 0.2619, + "step": 32092 + }, + { + "epoch": 1.5033962617698036, + "grad_norm": 0.5612492941282403, + "learning_rate": 7.657672884246395e-07, + "loss": 0.2577, + "step": 32093 + }, + { + "epoch": 1.503443106759732, + "grad_norm": 0.669330672100782, + "learning_rate": 7.65630693965963e-07, + "loss": 0.2839, + "step": 32094 + }, + { + "epoch": 1.5034899517496605, + "grad_norm": 0.6206989477626017, + "learning_rate": 7.654941094881454e-07, + "loss": 0.278, + "step": 32095 + }, + { + "epoch": 1.5035367967395887, + "grad_norm": 0.553712225630426, + "learning_rate": 7.65357534991974e-07, + "loss": 0.2694, + "step": 32096 + }, + { + "epoch": 1.503583641729517, + "grad_norm": 0.6233154436407575, + "learning_rate": 7.652209704782329e-07, + "loss": 0.2718, + "step": 32097 + }, + { + "epoch": 1.5036304867194454, + "grad_norm": 0.6129354087951671, + "learning_rate": 7.650844159477095e-07, + "loss": 0.2756, + "step": 32098 + }, + { + "epoch": 1.5036773317093737, + "grad_norm": 0.5704125802900702, + "learning_rate": 7.649478714011888e-07, + "loss": 0.2764, + "step": 32099 + }, + { + "epoch": 1.503724176699302, + "grad_norm": 0.6418989877135156, + "learning_rate": 7.648113368394572e-07, + "loss": 0.2856, + "step": 32100 + }, + { + "epoch": 1.5037710216892304, + "grad_norm": 0.6070796011833012, + "learning_rate": 7.64674812263301e-07, + "loss": 0.2695, + "step": 32101 + }, + { + "epoch": 1.5038178666791586, + "grad_norm": 0.6017216024851447, + "learning_rate": 7.645382976735053e-07, + "loss": 0.2905, + "step": 32102 + }, + { + "epoch": 1.5038647116690869, + "grad_norm": 0.6026462179146225, + "learning_rate": 7.644017930708544e-07, + "loss": 0.254, + "step": 32103 + }, + { + "epoch": 1.5039115566590153, + "grad_norm": 0.6075957378104648, + "learning_rate": 7.642652984561352e-07, + "loss": 0.2673, + "step": 32104 + }, + { + "epoch": 1.5039584016489438, + "grad_norm": 0.5913833271378846, + "learning_rate": 7.641288138301328e-07, + "loss": 0.2771, + "step": 32105 + }, + { + "epoch": 1.5040052466388718, + "grad_norm": 0.6009181893568168, + "learning_rate": 7.63992339193633e-07, + "loss": 0.2774, + "step": 32106 + }, + { + "epoch": 1.5040520916288003, + "grad_norm": 0.6247718385095175, + "learning_rate": 7.638558745474209e-07, + "loss": 0.2721, + "step": 32107 + }, + { + "epoch": 1.5040989366187287, + "grad_norm": 0.6173434323038959, + "learning_rate": 7.637194198922826e-07, + "loss": 0.292, + "step": 32108 + }, + { + "epoch": 1.504145781608657, + "grad_norm": 0.6015119736929118, + "learning_rate": 7.635829752290025e-07, + "loss": 0.2641, + "step": 32109 + }, + { + "epoch": 1.5041926265985852, + "grad_norm": 0.6657616200675411, + "learning_rate": 7.634465405583652e-07, + "loss": 0.2906, + "step": 32110 + }, + { + "epoch": 1.5042394715885137, + "grad_norm": 0.6351412888438077, + "learning_rate": 7.633101158811567e-07, + "loss": 0.2698, + "step": 32111 + }, + { + "epoch": 1.504286316578442, + "grad_norm": 0.5951464855712175, + "learning_rate": 7.631737011981618e-07, + "loss": 0.2807, + "step": 32112 + }, + { + "epoch": 1.5043331615683702, + "grad_norm": 0.6194210205972867, + "learning_rate": 7.63037296510166e-07, + "loss": 0.2613, + "step": 32113 + }, + { + "epoch": 1.5043800065582986, + "grad_norm": 0.6241292089367464, + "learning_rate": 7.629009018179548e-07, + "loss": 0.2866, + "step": 32114 + }, + { + "epoch": 1.5044268515482269, + "grad_norm": 0.6079807559990728, + "learning_rate": 7.627645171223114e-07, + "loss": 0.2907, + "step": 32115 + }, + { + "epoch": 1.504473696538155, + "grad_norm": 0.6047560078375686, + "learning_rate": 7.626281424240223e-07, + "loss": 0.2703, + "step": 32116 + }, + { + "epoch": 1.5045205415280836, + "grad_norm": 0.5745958738639386, + "learning_rate": 7.624917777238708e-07, + "loss": 0.2586, + "step": 32117 + }, + { + "epoch": 1.504567386518012, + "grad_norm": 0.5683597760282509, + "learning_rate": 7.623554230226423e-07, + "loss": 0.2507, + "step": 32118 + }, + { + "epoch": 1.5046142315079403, + "grad_norm": 0.5953607864346336, + "learning_rate": 7.622190783211217e-07, + "loss": 0.2701, + "step": 32119 + }, + { + "epoch": 1.5046610764978685, + "grad_norm": 0.5938629740437108, + "learning_rate": 7.620827436200934e-07, + "loss": 0.2758, + "step": 32120 + }, + { + "epoch": 1.504707921487797, + "grad_norm": 0.5925194094330394, + "learning_rate": 7.619464189203432e-07, + "loss": 0.2793, + "step": 32121 + }, + { + "epoch": 1.5047547664777252, + "grad_norm": 0.5478970378797753, + "learning_rate": 7.618101042226536e-07, + "loss": 0.2464, + "step": 32122 + }, + { + "epoch": 1.5048016114676535, + "grad_norm": 0.556584986459807, + "learning_rate": 7.616737995278107e-07, + "loss": 0.2833, + "step": 32123 + }, + { + "epoch": 1.504848456457582, + "grad_norm": 0.6283135108114626, + "learning_rate": 7.615375048365972e-07, + "loss": 0.286, + "step": 32124 + }, + { + "epoch": 1.5048953014475102, + "grad_norm": 0.6433145432186704, + "learning_rate": 7.614012201497986e-07, + "loss": 0.283, + "step": 32125 + }, + { + "epoch": 1.5049421464374384, + "grad_norm": 0.5777287101205087, + "learning_rate": 7.612649454681992e-07, + "loss": 0.2732, + "step": 32126 + }, + { + "epoch": 1.5049889914273669, + "grad_norm": 0.5753657201688115, + "learning_rate": 7.611286807925835e-07, + "loss": 0.2672, + "step": 32127 + }, + { + "epoch": 1.5050358364172953, + "grad_norm": 0.5886926782256641, + "learning_rate": 7.609924261237345e-07, + "loss": 0.2753, + "step": 32128 + }, + { + "epoch": 1.5050826814072233, + "grad_norm": 0.6226806019999259, + "learning_rate": 7.608561814624371e-07, + "loss": 0.2708, + "step": 32129 + }, + { + "epoch": 1.5051295263971518, + "grad_norm": 0.6450031611293263, + "learning_rate": 7.607199468094758e-07, + "loss": 0.2904, + "step": 32130 + }, + { + "epoch": 1.5051763713870803, + "grad_norm": 0.6398416813674475, + "learning_rate": 7.605837221656337e-07, + "loss": 0.2945, + "step": 32131 + }, + { + "epoch": 1.5052232163770085, + "grad_norm": 0.6360186149831446, + "learning_rate": 7.604475075316947e-07, + "loss": 0.28, + "step": 32132 + }, + { + "epoch": 1.5052700613669368, + "grad_norm": 0.6077063905255374, + "learning_rate": 7.603113029084441e-07, + "loss": 0.2775, + "step": 32133 + }, + { + "epoch": 1.5053169063568652, + "grad_norm": 0.538120845385097, + "learning_rate": 7.601751082966638e-07, + "loss": 0.2521, + "step": 32134 + }, + { + "epoch": 1.5053637513467935, + "grad_norm": 0.5915376471202974, + "learning_rate": 7.600389236971384e-07, + "loss": 0.277, + "step": 32135 + }, + { + "epoch": 1.5054105963367217, + "grad_norm": 0.6065873701157005, + "learning_rate": 7.599027491106525e-07, + "loss": 0.291, + "step": 32136 + }, + { + "epoch": 1.5054574413266502, + "grad_norm": 0.5966787283278746, + "learning_rate": 7.597665845379881e-07, + "loss": 0.2575, + "step": 32137 + }, + { + "epoch": 1.5055042863165784, + "grad_norm": 0.5849734881888962, + "learning_rate": 7.596304299799298e-07, + "loss": 0.2719, + "step": 32138 + }, + { + "epoch": 1.5055511313065066, + "grad_norm": 0.5584986778522544, + "learning_rate": 7.594942854372617e-07, + "loss": 0.2599, + "step": 32139 + }, + { + "epoch": 1.505597976296435, + "grad_norm": 0.587794357742277, + "learning_rate": 7.593581509107656e-07, + "loss": 0.2624, + "step": 32140 + }, + { + "epoch": 1.5056448212863636, + "grad_norm": 0.5873359660957648, + "learning_rate": 7.59222026401226e-07, + "loss": 0.2499, + "step": 32141 + }, + { + "epoch": 1.5056916662762916, + "grad_norm": 0.5810390420893706, + "learning_rate": 7.59085911909426e-07, + "loss": 0.2692, + "step": 32142 + }, + { + "epoch": 1.50573851126622, + "grad_norm": 0.5671311996153364, + "learning_rate": 7.589498074361499e-07, + "loss": 0.2547, + "step": 32143 + }, + { + "epoch": 1.5057853562561485, + "grad_norm": 0.595477470532725, + "learning_rate": 7.588137129821791e-07, + "loss": 0.2624, + "step": 32144 + }, + { + "epoch": 1.5058322012460768, + "grad_norm": 0.5668203453612758, + "learning_rate": 7.586776285482986e-07, + "loss": 0.274, + "step": 32145 + }, + { + "epoch": 1.505879046236005, + "grad_norm": 0.6261743285120134, + "learning_rate": 7.5854155413529e-07, + "loss": 0.2809, + "step": 32146 + }, + { + "epoch": 1.5059258912259335, + "grad_norm": 0.597892174608525, + "learning_rate": 7.584054897439369e-07, + "loss": 0.276, + "step": 32147 + }, + { + "epoch": 1.5059727362158617, + "grad_norm": 0.5861579506867921, + "learning_rate": 7.582694353750226e-07, + "loss": 0.2929, + "step": 32148 + }, + { + "epoch": 1.50601958120579, + "grad_norm": 0.5606424747939496, + "learning_rate": 7.581333910293298e-07, + "loss": 0.2517, + "step": 32149 + }, + { + "epoch": 1.5060664261957184, + "grad_norm": 0.6049323138007031, + "learning_rate": 7.579973567076424e-07, + "loss": 0.277, + "step": 32150 + }, + { + "epoch": 1.5061132711856466, + "grad_norm": 0.6212796840710064, + "learning_rate": 7.578613324107426e-07, + "loss": 0.2836, + "step": 32151 + }, + { + "epoch": 1.5061601161755749, + "grad_norm": 0.6246147948921089, + "learning_rate": 7.577253181394118e-07, + "loss": 0.2724, + "step": 32152 + }, + { + "epoch": 1.5062069611655033, + "grad_norm": 0.6044493310384773, + "learning_rate": 7.57589313894434e-07, + "loss": 0.2639, + "step": 32153 + }, + { + "epoch": 1.5062538061554318, + "grad_norm": 0.5272415403954396, + "learning_rate": 7.574533196765918e-07, + "loss": 0.2469, + "step": 32154 + }, + { + "epoch": 1.50630065114536, + "grad_norm": 0.9814159351693912, + "learning_rate": 7.573173354866678e-07, + "loss": 0.292, + "step": 32155 + }, + { + "epoch": 1.5063474961352883, + "grad_norm": 0.6148055136304964, + "learning_rate": 7.571813613254444e-07, + "loss": 0.287, + "step": 32156 + }, + { + "epoch": 1.5063943411252168, + "grad_norm": 0.584124463970157, + "learning_rate": 7.570453971937053e-07, + "loss": 0.2686, + "step": 32157 + }, + { + "epoch": 1.506441186115145, + "grad_norm": 0.5785986746919385, + "learning_rate": 7.569094430922317e-07, + "loss": 0.2583, + "step": 32158 + }, + { + "epoch": 1.5064880311050732, + "grad_norm": 0.5976030569190566, + "learning_rate": 7.567734990218054e-07, + "loss": 0.2658, + "step": 32159 + }, + { + "epoch": 1.5065348760950017, + "grad_norm": 0.6130081746702815, + "learning_rate": 7.566375649832091e-07, + "loss": 0.2783, + "step": 32160 + }, + { + "epoch": 1.50658172108493, + "grad_norm": 0.5726349989137263, + "learning_rate": 7.56501640977226e-07, + "loss": 0.2679, + "step": 32161 + }, + { + "epoch": 1.5066285660748582, + "grad_norm": 0.6488203982877337, + "learning_rate": 7.563657270046373e-07, + "loss": 0.2672, + "step": 32162 + }, + { + "epoch": 1.5066754110647866, + "grad_norm": 0.6413594129225964, + "learning_rate": 7.562298230662268e-07, + "loss": 0.2703, + "step": 32163 + }, + { + "epoch": 1.506722256054715, + "grad_norm": 0.6009699063702678, + "learning_rate": 7.560939291627742e-07, + "loss": 0.2739, + "step": 32164 + }, + { + "epoch": 1.5067691010446431, + "grad_norm": 0.6135015457516342, + "learning_rate": 7.559580452950638e-07, + "loss": 0.2746, + "step": 32165 + }, + { + "epoch": 1.5068159460345716, + "grad_norm": 0.629639997390189, + "learning_rate": 7.558221714638756e-07, + "loss": 0.2926, + "step": 32166 + }, + { + "epoch": 1.5068627910245, + "grad_norm": 0.568131454232803, + "learning_rate": 7.556863076699925e-07, + "loss": 0.2713, + "step": 32167 + }, + { + "epoch": 1.5069096360144283, + "grad_norm": 0.608721404996823, + "learning_rate": 7.555504539141962e-07, + "loss": 0.28, + "step": 32168 + }, + { + "epoch": 1.5069564810043565, + "grad_norm": 0.5960415068150811, + "learning_rate": 7.554146101972693e-07, + "loss": 0.2848, + "step": 32169 + }, + { + "epoch": 1.507003325994285, + "grad_norm": 0.582499521241663, + "learning_rate": 7.55278776519992e-07, + "loss": 0.2701, + "step": 32170 + }, + { + "epoch": 1.5070501709842132, + "grad_norm": 0.6827886112900009, + "learning_rate": 7.55142952883147e-07, + "loss": 0.2799, + "step": 32171 + }, + { + "epoch": 1.5070970159741415, + "grad_norm": 0.6097373765794517, + "learning_rate": 7.550071392875166e-07, + "loss": 0.2658, + "step": 32172 + }, + { + "epoch": 1.50714386096407, + "grad_norm": 0.6276938710933553, + "learning_rate": 7.548713357338805e-07, + "loss": 0.2901, + "step": 32173 + }, + { + "epoch": 1.5071907059539982, + "grad_norm": 0.6083789202924729, + "learning_rate": 7.547355422230216e-07, + "loss": 0.2679, + "step": 32174 + }, + { + "epoch": 1.5072375509439264, + "grad_norm": 0.5852000187528137, + "learning_rate": 7.545997587557213e-07, + "loss": 0.2672, + "step": 32175 + }, + { + "epoch": 1.5072843959338549, + "grad_norm": 0.6094954563714134, + "learning_rate": 7.544639853327601e-07, + "loss": 0.2664, + "step": 32176 + }, + { + "epoch": 1.5073312409237833, + "grad_norm": 0.6052963887081858, + "learning_rate": 7.543282219549203e-07, + "loss": 0.2717, + "step": 32177 + }, + { + "epoch": 1.5073780859137114, + "grad_norm": 0.5712503507336434, + "learning_rate": 7.541924686229832e-07, + "loss": 0.2741, + "step": 32178 + }, + { + "epoch": 1.5074249309036398, + "grad_norm": 0.5894099727120272, + "learning_rate": 7.54056725337729e-07, + "loss": 0.2727, + "step": 32179 + }, + { + "epoch": 1.5074717758935683, + "grad_norm": 0.5975308596313039, + "learning_rate": 7.539209920999393e-07, + "loss": 0.2792, + "step": 32180 + }, + { + "epoch": 1.5075186208834965, + "grad_norm": 0.5806270249923783, + "learning_rate": 7.537852689103966e-07, + "loss": 0.2408, + "step": 32181 + }, + { + "epoch": 1.5075654658734248, + "grad_norm": 0.5943739972826405, + "learning_rate": 7.536495557698797e-07, + "loss": 0.2603, + "step": 32182 + }, + { + "epoch": 1.5076123108633532, + "grad_norm": 0.6101138898340296, + "learning_rate": 7.535138526791707e-07, + "loss": 0.2842, + "step": 32183 + }, + { + "epoch": 1.5076591558532815, + "grad_norm": 0.5576700100993123, + "learning_rate": 7.533781596390507e-07, + "loss": 0.266, + "step": 32184 + }, + { + "epoch": 1.5077060008432097, + "grad_norm": 0.566425741397041, + "learning_rate": 7.53242476650301e-07, + "loss": 0.2601, + "step": 32185 + }, + { + "epoch": 1.5077528458331382, + "grad_norm": 0.6254111864298479, + "learning_rate": 7.531068037137012e-07, + "loss": 0.2908, + "step": 32186 + }, + { + "epoch": 1.5077996908230664, + "grad_norm": 0.5747489851553089, + "learning_rate": 7.529711408300333e-07, + "loss": 0.2779, + "step": 32187 + }, + { + "epoch": 1.5078465358129947, + "grad_norm": 0.6087421213675982, + "learning_rate": 7.528354880000769e-07, + "loss": 0.2667, + "step": 32188 + }, + { + "epoch": 1.5078933808029231, + "grad_norm": 0.6431456418611358, + "learning_rate": 7.526998452246129e-07, + "loss": 0.2885, + "step": 32189 + }, + { + "epoch": 1.5079402257928516, + "grad_norm": 0.5949087212875597, + "learning_rate": 7.525642125044222e-07, + "loss": 0.2581, + "step": 32190 + }, + { + "epoch": 1.5079870707827798, + "grad_norm": 0.6090008738046081, + "learning_rate": 7.52428589840285e-07, + "loss": 0.2882, + "step": 32191 + }, + { + "epoch": 1.508033915772708, + "grad_norm": 0.6252650283158789, + "learning_rate": 7.522929772329832e-07, + "loss": 0.2857, + "step": 32192 + }, + { + "epoch": 1.5080807607626365, + "grad_norm": 0.631391002843271, + "learning_rate": 7.52157374683295e-07, + "loss": 0.2698, + "step": 32193 + }, + { + "epoch": 1.5081276057525648, + "grad_norm": 0.6242964920127096, + "learning_rate": 7.520217821920026e-07, + "loss": 0.2637, + "step": 32194 + }, + { + "epoch": 1.508174450742493, + "grad_norm": 0.6339340834394604, + "learning_rate": 7.518861997598848e-07, + "loss": 0.2755, + "step": 32195 + }, + { + "epoch": 1.5082212957324215, + "grad_norm": 0.6141047091127971, + "learning_rate": 7.517506273877225e-07, + "loss": 0.2894, + "step": 32196 + }, + { + "epoch": 1.5082681407223497, + "grad_norm": 0.5945649027527852, + "learning_rate": 7.51615065076296e-07, + "loss": 0.2628, + "step": 32197 + }, + { + "epoch": 1.508314985712278, + "grad_norm": 0.5973525220217, + "learning_rate": 7.514795128263852e-07, + "loss": 0.2754, + "step": 32198 + }, + { + "epoch": 1.5083618307022064, + "grad_norm": 0.571923223981887, + "learning_rate": 7.513439706387712e-07, + "loss": 0.2609, + "step": 32199 + }, + { + "epoch": 1.5084086756921349, + "grad_norm": 0.5945104279820098, + "learning_rate": 7.512084385142332e-07, + "loss": 0.2845, + "step": 32200 + }, + { + "epoch": 1.508455520682063, + "grad_norm": 0.601224484383382, + "learning_rate": 7.510729164535505e-07, + "loss": 0.2732, + "step": 32201 + }, + { + "epoch": 1.5085023656719914, + "grad_norm": 0.5851338384031728, + "learning_rate": 7.509374044575033e-07, + "loss": 0.2841, + "step": 32202 + }, + { + "epoch": 1.5085492106619198, + "grad_norm": 0.6372433060618368, + "learning_rate": 7.508019025268717e-07, + "loss": 0.2943, + "step": 32203 + }, + { + "epoch": 1.508596055651848, + "grad_norm": 0.5973965770853945, + "learning_rate": 7.506664106624357e-07, + "loss": 0.2628, + "step": 32204 + }, + { + "epoch": 1.5086429006417763, + "grad_norm": 0.5619923632151592, + "learning_rate": 7.505309288649748e-07, + "loss": 0.2552, + "step": 32205 + }, + { + "epoch": 1.5086897456317048, + "grad_norm": 0.572551093119471, + "learning_rate": 7.503954571352692e-07, + "loss": 0.2627, + "step": 32206 + }, + { + "epoch": 1.508736590621633, + "grad_norm": 0.5744306698760695, + "learning_rate": 7.502599954740983e-07, + "loss": 0.2763, + "step": 32207 + }, + { + "epoch": 1.5087834356115613, + "grad_norm": 0.621035583615824, + "learning_rate": 7.501245438822405e-07, + "loss": 0.2783, + "step": 32208 + }, + { + "epoch": 1.5088302806014897, + "grad_norm": 0.5497058860888868, + "learning_rate": 7.49989102360476e-07, + "loss": 0.2509, + "step": 32209 + }, + { + "epoch": 1.508877125591418, + "grad_norm": 0.51871986286339, + "learning_rate": 7.498536709095846e-07, + "loss": 0.2291, + "step": 32210 + }, + { + "epoch": 1.5089239705813462, + "grad_norm": 0.6548331589482892, + "learning_rate": 7.497182495303453e-07, + "loss": 0.2662, + "step": 32211 + }, + { + "epoch": 1.5089708155712747, + "grad_norm": 0.6099963030697598, + "learning_rate": 7.495828382235384e-07, + "loss": 0.2787, + "step": 32212 + }, + { + "epoch": 1.5090176605612031, + "grad_norm": 0.6106648627236162, + "learning_rate": 7.494474369899416e-07, + "loss": 0.27, + "step": 32213 + }, + { + "epoch": 1.5090645055511311, + "grad_norm": 0.598432313012487, + "learning_rate": 7.493120458303354e-07, + "loss": 0.27, + "step": 32214 + }, + { + "epoch": 1.5091113505410596, + "grad_norm": 0.5528678524285235, + "learning_rate": 7.491766647454976e-07, + "loss": 0.2644, + "step": 32215 + }, + { + "epoch": 1.509158195530988, + "grad_norm": 0.5731655639074443, + "learning_rate": 7.490412937362082e-07, + "loss": 0.2599, + "step": 32216 + }, + { + "epoch": 1.5092050405209163, + "grad_norm": 0.5774714426132416, + "learning_rate": 7.489059328032459e-07, + "loss": 0.267, + "step": 32217 + }, + { + "epoch": 1.5092518855108445, + "grad_norm": 0.5992798459990268, + "learning_rate": 7.487705819473906e-07, + "loss": 0.2741, + "step": 32218 + }, + { + "epoch": 1.509298730500773, + "grad_norm": 0.5544807607494899, + "learning_rate": 7.486352411694198e-07, + "loss": 0.2544, + "step": 32219 + }, + { + "epoch": 1.5093455754907013, + "grad_norm": 0.5586340704315965, + "learning_rate": 7.484999104701129e-07, + "loss": 0.2478, + "step": 32220 + }, + { + "epoch": 1.5093924204806295, + "grad_norm": 0.5612445803290915, + "learning_rate": 7.483645898502497e-07, + "loss": 0.2437, + "step": 32221 + }, + { + "epoch": 1.509439265470558, + "grad_norm": 0.5605671854270274, + "learning_rate": 7.482292793106072e-07, + "loss": 0.2653, + "step": 32222 + }, + { + "epoch": 1.5094861104604862, + "grad_norm": 0.599320845434133, + "learning_rate": 7.480939788519647e-07, + "loss": 0.2764, + "step": 32223 + }, + { + "epoch": 1.5095329554504144, + "grad_norm": 0.6120369976910283, + "learning_rate": 7.479586884751022e-07, + "loss": 0.2766, + "step": 32224 + }, + { + "epoch": 1.509579800440343, + "grad_norm": 0.581307658842203, + "learning_rate": 7.478234081807962e-07, + "loss": 0.2708, + "step": 32225 + }, + { + "epoch": 1.5096266454302714, + "grad_norm": 0.6265776162536276, + "learning_rate": 7.476881379698258e-07, + "loss": 0.3038, + "step": 32226 + }, + { + "epoch": 1.5096734904201996, + "grad_norm": 0.5604437515912406, + "learning_rate": 7.475528778429708e-07, + "loss": 0.2577, + "step": 32227 + }, + { + "epoch": 1.5097203354101278, + "grad_norm": 0.6422515177442589, + "learning_rate": 7.474176278010075e-07, + "loss": 0.2937, + "step": 32228 + }, + { + "epoch": 1.5097671804000563, + "grad_norm": 0.621536570847989, + "learning_rate": 7.472823878447156e-07, + "loss": 0.2727, + "step": 32229 + }, + { + "epoch": 1.5098140253899845, + "grad_norm": 0.5788678364964318, + "learning_rate": 7.471471579748735e-07, + "loss": 0.2692, + "step": 32230 + }, + { + "epoch": 1.5098608703799128, + "grad_norm": 0.5987662944287555, + "learning_rate": 7.470119381922583e-07, + "loss": 0.2832, + "step": 32231 + }, + { + "epoch": 1.5099077153698413, + "grad_norm": 0.631620057164692, + "learning_rate": 7.468767284976489e-07, + "loss": 0.2861, + "step": 32232 + }, + { + "epoch": 1.5099545603597695, + "grad_norm": 0.6218936198822897, + "learning_rate": 7.467415288918231e-07, + "loss": 0.2835, + "step": 32233 + }, + { + "epoch": 1.5100014053496977, + "grad_norm": 0.5449022555844517, + "learning_rate": 7.466063393755599e-07, + "loss": 0.2583, + "step": 32234 + }, + { + "epoch": 1.5100482503396262, + "grad_norm": 0.5775145850337167, + "learning_rate": 7.464711599496358e-07, + "loss": 0.2738, + "step": 32235 + }, + { + "epoch": 1.5100950953295547, + "grad_norm": 0.5982183499614084, + "learning_rate": 7.463359906148305e-07, + "loss": 0.2686, + "step": 32236 + }, + { + "epoch": 1.5101419403194827, + "grad_norm": 0.6258137493618577, + "learning_rate": 7.462008313719197e-07, + "loss": 0.2683, + "step": 32237 + }, + { + "epoch": 1.5101887853094111, + "grad_norm": 0.593781033995096, + "learning_rate": 7.460656822216821e-07, + "loss": 0.2592, + "step": 32238 + }, + { + "epoch": 1.5102356302993396, + "grad_norm": 0.5659053278017594, + "learning_rate": 7.459305431648961e-07, + "loss": 0.2467, + "step": 32239 + }, + { + "epoch": 1.5102824752892678, + "grad_norm": 0.556396339940828, + "learning_rate": 7.457954142023388e-07, + "loss": 0.2522, + "step": 32240 + }, + { + "epoch": 1.510329320279196, + "grad_norm": 0.5798700291695192, + "learning_rate": 7.456602953347888e-07, + "loss": 0.273, + "step": 32241 + }, + { + "epoch": 1.5103761652691245, + "grad_norm": 0.604635809124549, + "learning_rate": 7.455251865630228e-07, + "loss": 0.2786, + "step": 32242 + }, + { + "epoch": 1.5104230102590528, + "grad_norm": 0.596962455170375, + "learning_rate": 7.453900878878176e-07, + "loss": 0.2763, + "step": 32243 + }, + { + "epoch": 1.510469855248981, + "grad_norm": 0.5861278966989513, + "learning_rate": 7.452549993099517e-07, + "loss": 0.26, + "step": 32244 + }, + { + "epoch": 1.5105167002389095, + "grad_norm": 0.59198264404077, + "learning_rate": 7.451199208302018e-07, + "loss": 0.2862, + "step": 32245 + }, + { + "epoch": 1.5105635452288377, + "grad_norm": 0.5985710275012428, + "learning_rate": 7.449848524493461e-07, + "loss": 0.2731, + "step": 32246 + }, + { + "epoch": 1.510610390218766, + "grad_norm": 0.6380803330827095, + "learning_rate": 7.448497941681612e-07, + "loss": 0.2801, + "step": 32247 + }, + { + "epoch": 1.5106572352086944, + "grad_norm": 0.6631220453670277, + "learning_rate": 7.447147459874254e-07, + "loss": 0.2847, + "step": 32248 + }, + { + "epoch": 1.510704080198623, + "grad_norm": 0.6333113423212402, + "learning_rate": 7.44579707907915e-07, + "loss": 0.2846, + "step": 32249 + }, + { + "epoch": 1.510750925188551, + "grad_norm": 0.5674405503868216, + "learning_rate": 7.444446799304067e-07, + "loss": 0.252, + "step": 32250 + }, + { + "epoch": 1.5107977701784794, + "grad_norm": 0.6200265147514715, + "learning_rate": 7.443096620556778e-07, + "loss": 0.2801, + "step": 32251 + }, + { + "epoch": 1.5108446151684078, + "grad_norm": 0.6266037161241419, + "learning_rate": 7.441746542845055e-07, + "loss": 0.2673, + "step": 32252 + }, + { + "epoch": 1.510891460158336, + "grad_norm": 0.573350799375114, + "learning_rate": 7.44039656617667e-07, + "loss": 0.2576, + "step": 32253 + }, + { + "epoch": 1.5109383051482643, + "grad_norm": 0.5766706592907807, + "learning_rate": 7.439046690559393e-07, + "loss": 0.2751, + "step": 32254 + }, + { + "epoch": 1.5109851501381928, + "grad_norm": 0.6090144995938679, + "learning_rate": 7.437696916000983e-07, + "loss": 0.2755, + "step": 32255 + }, + { + "epoch": 1.511031995128121, + "grad_norm": 0.59152639921202, + "learning_rate": 7.43634724250922e-07, + "loss": 0.2716, + "step": 32256 + }, + { + "epoch": 1.5110788401180493, + "grad_norm": 0.568192389438318, + "learning_rate": 7.434997670091859e-07, + "loss": 0.2577, + "step": 32257 + }, + { + "epoch": 1.5111256851079777, + "grad_norm": 0.593315064921107, + "learning_rate": 7.433648198756668e-07, + "loss": 0.2713, + "step": 32258 + }, + { + "epoch": 1.511172530097906, + "grad_norm": 0.6066328849874215, + "learning_rate": 7.432298828511419e-07, + "loss": 0.2683, + "step": 32259 + }, + { + "epoch": 1.5112193750878342, + "grad_norm": 0.6220605417874578, + "learning_rate": 7.430949559363876e-07, + "loss": 0.2902, + "step": 32260 + }, + { + "epoch": 1.5112662200777627, + "grad_norm": 0.5624985417678389, + "learning_rate": 7.429600391321807e-07, + "loss": 0.2745, + "step": 32261 + }, + { + "epoch": 1.5113130650676911, + "grad_norm": 0.5792094913764496, + "learning_rate": 7.428251324392965e-07, + "loss": 0.2624, + "step": 32262 + }, + { + "epoch": 1.5113599100576194, + "grad_norm": 0.5735999544743323, + "learning_rate": 7.426902358585128e-07, + "loss": 0.2677, + "step": 32263 + }, + { + "epoch": 1.5114067550475476, + "grad_norm": 0.5980950881002497, + "learning_rate": 7.425553493906043e-07, + "loss": 0.2662, + "step": 32264 + }, + { + "epoch": 1.511453600037476, + "grad_norm": 0.5997762213533744, + "learning_rate": 7.424204730363482e-07, + "loss": 0.2651, + "step": 32265 + }, + { + "epoch": 1.5115004450274043, + "grad_norm": 0.5832413717120554, + "learning_rate": 7.422856067965203e-07, + "loss": 0.2715, + "step": 32266 + }, + { + "epoch": 1.5115472900173326, + "grad_norm": 0.5599156257877503, + "learning_rate": 7.421507506718978e-07, + "loss": 0.2534, + "step": 32267 + }, + { + "epoch": 1.511594135007261, + "grad_norm": 0.6435518038038661, + "learning_rate": 7.420159046632552e-07, + "loss": 0.27, + "step": 32268 + }, + { + "epoch": 1.5116409799971893, + "grad_norm": 0.6100770492451847, + "learning_rate": 7.418810687713693e-07, + "loss": 0.2658, + "step": 32269 + }, + { + "epoch": 1.5116878249871175, + "grad_norm": 0.6096534473518234, + "learning_rate": 7.417462429970165e-07, + "loss": 0.2747, + "step": 32270 + }, + { + "epoch": 1.511734669977046, + "grad_norm": 0.5932954558977636, + "learning_rate": 7.416114273409716e-07, + "loss": 0.273, + "step": 32271 + }, + { + "epoch": 1.5117815149669744, + "grad_norm": 0.5940737695052283, + "learning_rate": 7.414766218040109e-07, + "loss": 0.2728, + "step": 32272 + }, + { + "epoch": 1.5118283599569025, + "grad_norm": 0.5578254830281828, + "learning_rate": 7.41341826386911e-07, + "loss": 0.2516, + "step": 32273 + }, + { + "epoch": 1.511875204946831, + "grad_norm": 0.5974215404754138, + "learning_rate": 7.41207041090446e-07, + "loss": 0.2758, + "step": 32274 + }, + { + "epoch": 1.5119220499367594, + "grad_norm": 0.5934924691179543, + "learning_rate": 7.410722659153927e-07, + "loss": 0.2782, + "step": 32275 + }, + { + "epoch": 1.5119688949266876, + "grad_norm": 0.6061781611480249, + "learning_rate": 7.409375008625269e-07, + "loss": 0.2708, + "step": 32276 + }, + { + "epoch": 1.5120157399166159, + "grad_norm": 0.5938597326709347, + "learning_rate": 7.408027459326231e-07, + "loss": 0.2789, + "step": 32277 + }, + { + "epoch": 1.5120625849065443, + "grad_norm": 0.6068014020305688, + "learning_rate": 7.406680011264572e-07, + "loss": 0.2689, + "step": 32278 + }, + { + "epoch": 1.5121094298964726, + "grad_norm": 0.57485656520986, + "learning_rate": 7.405332664448053e-07, + "loss": 0.2655, + "step": 32279 + }, + { + "epoch": 1.5121562748864008, + "grad_norm": 0.5724723200517428, + "learning_rate": 7.403985418884419e-07, + "loss": 0.2695, + "step": 32280 + }, + { + "epoch": 1.5122031198763293, + "grad_norm": 0.5856390184211671, + "learning_rate": 7.402638274581425e-07, + "loss": 0.2776, + "step": 32281 + }, + { + "epoch": 1.5122499648662575, + "grad_norm": 0.584631105667858, + "learning_rate": 7.401291231546823e-07, + "loss": 0.2659, + "step": 32282 + }, + { + "epoch": 1.5122968098561858, + "grad_norm": 0.5964965055796451, + "learning_rate": 7.399944289788378e-07, + "loss": 0.267, + "step": 32283 + }, + { + "epoch": 1.5123436548461142, + "grad_norm": 0.5740251535452255, + "learning_rate": 7.398597449313818e-07, + "loss": 0.2747, + "step": 32284 + }, + { + "epoch": 1.5123904998360427, + "grad_norm": 0.6378664595985001, + "learning_rate": 7.397250710130915e-07, + "loss": 0.2763, + "step": 32285 + }, + { + "epoch": 1.5124373448259707, + "grad_norm": 0.5815927958585008, + "learning_rate": 7.395904072247403e-07, + "loss": 0.2728, + "step": 32286 + }, + { + "epoch": 1.5124841898158992, + "grad_norm": 0.6510833628713796, + "learning_rate": 7.39455753567104e-07, + "loss": 0.2813, + "step": 32287 + }, + { + "epoch": 1.5125310348058276, + "grad_norm": 0.579730721690065, + "learning_rate": 7.39321110040957e-07, + "loss": 0.2689, + "step": 32288 + }, + { + "epoch": 1.5125778797957559, + "grad_norm": 0.6037357474032717, + "learning_rate": 7.391864766470746e-07, + "loss": 0.2541, + "step": 32289 + }, + { + "epoch": 1.512624724785684, + "grad_norm": 0.6233747274321053, + "learning_rate": 7.390518533862323e-07, + "loss": 0.29, + "step": 32290 + }, + { + "epoch": 1.5126715697756126, + "grad_norm": 0.6171558436815675, + "learning_rate": 7.38917240259204e-07, + "loss": 0.2906, + "step": 32291 + }, + { + "epoch": 1.5127184147655408, + "grad_norm": 0.6534195935522028, + "learning_rate": 7.387826372667636e-07, + "loss": 0.2786, + "step": 32292 + }, + { + "epoch": 1.512765259755469, + "grad_norm": 0.5443961631878677, + "learning_rate": 7.386480444096863e-07, + "loss": 0.2694, + "step": 32293 + }, + { + "epoch": 1.5128121047453975, + "grad_norm": 0.5804076761647381, + "learning_rate": 7.385134616887471e-07, + "loss": 0.272, + "step": 32294 + }, + { + "epoch": 1.5128589497353258, + "grad_norm": 0.645621065950226, + "learning_rate": 7.3837888910472e-07, + "loss": 0.2785, + "step": 32295 + }, + { + "epoch": 1.512905794725254, + "grad_norm": 0.6052757291850127, + "learning_rate": 7.382443266583794e-07, + "loss": 0.2927, + "step": 32296 + }, + { + "epoch": 1.5129526397151825, + "grad_norm": 0.6165971389021776, + "learning_rate": 7.381097743505011e-07, + "loss": 0.2834, + "step": 32297 + }, + { + "epoch": 1.512999484705111, + "grad_norm": 0.5692839072839407, + "learning_rate": 7.379752321818579e-07, + "loss": 0.265, + "step": 32298 + }, + { + "epoch": 1.5130463296950392, + "grad_norm": 0.6092433789747812, + "learning_rate": 7.378407001532237e-07, + "loss": 0.2809, + "step": 32299 + }, + { + "epoch": 1.5130931746849674, + "grad_norm": 0.6254022567110522, + "learning_rate": 7.377061782653733e-07, + "loss": 0.2766, + "step": 32300 + }, + { + "epoch": 1.5131400196748959, + "grad_norm": 0.6229886484348867, + "learning_rate": 7.37571666519081e-07, + "loss": 0.2902, + "step": 32301 + }, + { + "epoch": 1.513186864664824, + "grad_norm": 0.5745087039823137, + "learning_rate": 7.374371649151208e-07, + "loss": 0.2735, + "step": 32302 + }, + { + "epoch": 1.5132337096547523, + "grad_norm": 0.6196026305941101, + "learning_rate": 7.373026734542673e-07, + "loss": 0.2811, + "step": 32303 + }, + { + "epoch": 1.5132805546446808, + "grad_norm": 0.5888746721163808, + "learning_rate": 7.371681921372934e-07, + "loss": 0.2543, + "step": 32304 + }, + { + "epoch": 1.513327399634609, + "grad_norm": 0.6190412845794714, + "learning_rate": 7.370337209649742e-07, + "loss": 0.2696, + "step": 32305 + }, + { + "epoch": 1.5133742446245373, + "grad_norm": 0.618892299686632, + "learning_rate": 7.36899259938082e-07, + "loss": 0.2789, + "step": 32306 + }, + { + "epoch": 1.5134210896144658, + "grad_norm": 0.6012343981023487, + "learning_rate": 7.367648090573915e-07, + "loss": 0.2727, + "step": 32307 + }, + { + "epoch": 1.5134679346043942, + "grad_norm": 0.6794757287442788, + "learning_rate": 7.366303683236761e-07, + "loss": 0.2427, + "step": 32308 + }, + { + "epoch": 1.5135147795943222, + "grad_norm": 0.6532799183220005, + "learning_rate": 7.364959377377107e-07, + "loss": 0.294, + "step": 32309 + }, + { + "epoch": 1.5135616245842507, + "grad_norm": 0.6147239911763157, + "learning_rate": 7.363615173002669e-07, + "loss": 0.2581, + "step": 32310 + }, + { + "epoch": 1.5136084695741792, + "grad_norm": 0.5758977685789117, + "learning_rate": 7.362271070121197e-07, + "loss": 0.2697, + "step": 32311 + }, + { + "epoch": 1.5136553145641074, + "grad_norm": 0.6014791525315244, + "learning_rate": 7.360927068740429e-07, + "loss": 0.274, + "step": 32312 + }, + { + "epoch": 1.5137021595540356, + "grad_norm": 0.5661635422291049, + "learning_rate": 7.359583168868081e-07, + "loss": 0.2613, + "step": 32313 + }, + { + "epoch": 1.513749004543964, + "grad_norm": 0.6114060002848354, + "learning_rate": 7.358239370511902e-07, + "loss": 0.2854, + "step": 32314 + }, + { + "epoch": 1.5137958495338923, + "grad_norm": 0.5472441143477143, + "learning_rate": 7.356895673679626e-07, + "loss": 0.2453, + "step": 32315 + }, + { + "epoch": 1.5138426945238206, + "grad_norm": 0.5769910785911716, + "learning_rate": 7.355552078378975e-07, + "loss": 0.267, + "step": 32316 + }, + { + "epoch": 1.513889539513749, + "grad_norm": 0.601445046367712, + "learning_rate": 7.354208584617686e-07, + "loss": 0.2707, + "step": 32317 + }, + { + "epoch": 1.5139363845036773, + "grad_norm": 0.5708813895697513, + "learning_rate": 7.352865192403499e-07, + "loss": 0.261, + "step": 32318 + }, + { + "epoch": 1.5139832294936055, + "grad_norm": 0.6061922189782193, + "learning_rate": 7.351521901744133e-07, + "loss": 0.2896, + "step": 32319 + }, + { + "epoch": 1.514030074483534, + "grad_norm": 0.5853567667920604, + "learning_rate": 7.350178712647319e-07, + "loss": 0.2609, + "step": 32320 + }, + { + "epoch": 1.5140769194734625, + "grad_norm": 0.5621834066152027, + "learning_rate": 7.3488356251208e-07, + "loss": 0.2648, + "step": 32321 + }, + { + "epoch": 1.5141237644633905, + "grad_norm": 0.5944756180658046, + "learning_rate": 7.347492639172288e-07, + "loss": 0.2623, + "step": 32322 + }, + { + "epoch": 1.514170609453319, + "grad_norm": 0.6108063499805837, + "learning_rate": 7.346149754809517e-07, + "loss": 0.2868, + "step": 32323 + }, + { + "epoch": 1.5142174544432474, + "grad_norm": 0.5934184790027861, + "learning_rate": 7.34480697204022e-07, + "loss": 0.265, + "step": 32324 + }, + { + "epoch": 1.5142642994331756, + "grad_norm": 0.5935106464657726, + "learning_rate": 7.343464290872126e-07, + "loss": 0.2689, + "step": 32325 + }, + { + "epoch": 1.5143111444231039, + "grad_norm": 0.6126048178543709, + "learning_rate": 7.34212171131295e-07, + "loss": 0.2859, + "step": 32326 + }, + { + "epoch": 1.5143579894130323, + "grad_norm": 0.5989932191803868, + "learning_rate": 7.340779233370426e-07, + "loss": 0.2628, + "step": 32327 + }, + { + "epoch": 1.5144048344029606, + "grad_norm": 0.6058032657153035, + "learning_rate": 7.339436857052288e-07, + "loss": 0.2564, + "step": 32328 + }, + { + "epoch": 1.5144516793928888, + "grad_norm": 0.586659558178631, + "learning_rate": 7.338094582366242e-07, + "loss": 0.2713, + "step": 32329 + }, + { + "epoch": 1.5144985243828173, + "grad_norm": 0.586726535529272, + "learning_rate": 7.336752409320025e-07, + "loss": 0.2717, + "step": 32330 + }, + { + "epoch": 1.5145453693727455, + "grad_norm": 0.55560076565589, + "learning_rate": 7.335410337921358e-07, + "loss": 0.27, + "step": 32331 + }, + { + "epoch": 1.5145922143626738, + "grad_norm": 0.6332286412284288, + "learning_rate": 7.334068368177971e-07, + "loss": 0.2809, + "step": 32332 + }, + { + "epoch": 1.5146390593526022, + "grad_norm": 0.6061919217728566, + "learning_rate": 7.332726500097573e-07, + "loss": 0.2726, + "step": 32333 + }, + { + "epoch": 1.5146859043425307, + "grad_norm": 0.6039480468138957, + "learning_rate": 7.331384733687901e-07, + "loss": 0.2656, + "step": 32334 + }, + { + "epoch": 1.514732749332459, + "grad_norm": 0.5841549044473062, + "learning_rate": 7.330043068956664e-07, + "loss": 0.2669, + "step": 32335 + }, + { + "epoch": 1.5147795943223872, + "grad_norm": 0.6269548296172469, + "learning_rate": 7.328701505911584e-07, + "loss": 0.2754, + "step": 32336 + }, + { + "epoch": 1.5148264393123156, + "grad_norm": 0.6217844834673326, + "learning_rate": 7.327360044560389e-07, + "loss": 0.2688, + "step": 32337 + }, + { + "epoch": 1.5148732843022439, + "grad_norm": 0.6452205068968038, + "learning_rate": 7.326018684910793e-07, + "loss": 0.2988, + "step": 32338 + }, + { + "epoch": 1.5149201292921721, + "grad_norm": 0.573448456141488, + "learning_rate": 7.324677426970525e-07, + "loss": 0.2697, + "step": 32339 + }, + { + "epoch": 1.5149669742821006, + "grad_norm": 0.5770128542736073, + "learning_rate": 7.323336270747297e-07, + "loss": 0.2793, + "step": 32340 + }, + { + "epoch": 1.5150138192720288, + "grad_norm": 0.5891635941250771, + "learning_rate": 7.321995216248817e-07, + "loss": 0.255, + "step": 32341 + }, + { + "epoch": 1.515060664261957, + "grad_norm": 0.6223895951390123, + "learning_rate": 7.32065426348281e-07, + "loss": 0.2793, + "step": 32342 + }, + { + "epoch": 1.5151075092518855, + "grad_norm": 0.6183409800010374, + "learning_rate": 7.319313412456996e-07, + "loss": 0.2786, + "step": 32343 + }, + { + "epoch": 1.515154354241814, + "grad_norm": 0.5648244751935562, + "learning_rate": 7.31797266317909e-07, + "loss": 0.2648, + "step": 32344 + }, + { + "epoch": 1.515201199231742, + "grad_norm": 0.5948749500020163, + "learning_rate": 7.316632015656805e-07, + "loss": 0.2807, + "step": 32345 + }, + { + "epoch": 1.5152480442216705, + "grad_norm": 0.5848668821753529, + "learning_rate": 7.315291469897868e-07, + "loss": 0.2697, + "step": 32346 + }, + { + "epoch": 1.515294889211599, + "grad_norm": 0.6119127317196934, + "learning_rate": 7.313951025909982e-07, + "loss": 0.2797, + "step": 32347 + }, + { + "epoch": 1.5153417342015272, + "grad_norm": 0.5761811916389665, + "learning_rate": 7.312610683700857e-07, + "loss": 0.2703, + "step": 32348 + }, + { + "epoch": 1.5153885791914554, + "grad_norm": 0.6105763807435537, + "learning_rate": 7.311270443278213e-07, + "loss": 0.2679, + "step": 32349 + }, + { + "epoch": 1.5154354241813839, + "grad_norm": 0.5764677547772836, + "learning_rate": 7.309930304649757e-07, + "loss": 0.2685, + "step": 32350 + }, + { + "epoch": 1.5154822691713121, + "grad_norm": 0.5662798018790403, + "learning_rate": 7.30859026782321e-07, + "loss": 0.2589, + "step": 32351 + }, + { + "epoch": 1.5155291141612404, + "grad_norm": 0.5973968820040403, + "learning_rate": 7.307250332806285e-07, + "loss": 0.2759, + "step": 32352 + }, + { + "epoch": 1.5155759591511688, + "grad_norm": 0.5954392248165316, + "learning_rate": 7.30591049960668e-07, + "loss": 0.2737, + "step": 32353 + }, + { + "epoch": 1.515622804141097, + "grad_norm": 0.6259040992820606, + "learning_rate": 7.304570768232122e-07, + "loss": 0.2732, + "step": 32354 + }, + { + "epoch": 1.5156696491310253, + "grad_norm": 0.5746141752188803, + "learning_rate": 7.303231138690301e-07, + "loss": 0.2699, + "step": 32355 + }, + { + "epoch": 1.5157164941209538, + "grad_norm": 0.5698261276800005, + "learning_rate": 7.301891610988939e-07, + "loss": 0.2729, + "step": 32356 + }, + { + "epoch": 1.5157633391108822, + "grad_norm": 0.6216644708025757, + "learning_rate": 7.300552185135743e-07, + "loss": 0.2694, + "step": 32357 + }, + { + "epoch": 1.5158101841008103, + "grad_norm": 0.6038161319341208, + "learning_rate": 7.299212861138427e-07, + "loss": 0.2708, + "step": 32358 + }, + { + "epoch": 1.5158570290907387, + "grad_norm": 0.6122971074783234, + "learning_rate": 7.297873639004685e-07, + "loss": 0.2872, + "step": 32359 + }, + { + "epoch": 1.5159038740806672, + "grad_norm": 0.5448067340896683, + "learning_rate": 7.296534518742229e-07, + "loss": 0.2549, + "step": 32360 + }, + { + "epoch": 1.5159507190705954, + "grad_norm": 0.6397847564644494, + "learning_rate": 7.295195500358779e-07, + "loss": 0.2886, + "step": 32361 + }, + { + "epoch": 1.5159975640605237, + "grad_norm": 0.5993883965755035, + "learning_rate": 7.293856583862019e-07, + "loss": 0.2752, + "step": 32362 + }, + { + "epoch": 1.5160444090504521, + "grad_norm": 0.6208219992459303, + "learning_rate": 7.292517769259663e-07, + "loss": 0.2907, + "step": 32363 + }, + { + "epoch": 1.5160912540403804, + "grad_norm": 0.6158959708232619, + "learning_rate": 7.291179056559425e-07, + "loss": 0.2679, + "step": 32364 + }, + { + "epoch": 1.5161380990303086, + "grad_norm": 0.61079138431585, + "learning_rate": 7.289840445768995e-07, + "loss": 0.2786, + "step": 32365 + }, + { + "epoch": 1.516184944020237, + "grad_norm": 0.683851105820028, + "learning_rate": 7.288501936896078e-07, + "loss": 0.2916, + "step": 32366 + }, + { + "epoch": 1.5162317890101653, + "grad_norm": 0.5968410570350604, + "learning_rate": 7.287163529948394e-07, + "loss": 0.2736, + "step": 32367 + }, + { + "epoch": 1.5162786340000936, + "grad_norm": 0.5807813427250401, + "learning_rate": 7.285825224933618e-07, + "loss": 0.261, + "step": 32368 + }, + { + "epoch": 1.516325478990022, + "grad_norm": 0.6001312442298087, + "learning_rate": 7.284487021859469e-07, + "loss": 0.2721, + "step": 32369 + }, + { + "epoch": 1.5163723239799505, + "grad_norm": 0.567962064072008, + "learning_rate": 7.283148920733651e-07, + "loss": 0.2517, + "step": 32370 + }, + { + "epoch": 1.5164191689698787, + "grad_norm": 0.6233777343968938, + "learning_rate": 7.281810921563854e-07, + "loss": 0.2732, + "step": 32371 + }, + { + "epoch": 1.516466013959807, + "grad_norm": 0.6276943216232561, + "learning_rate": 7.280473024357776e-07, + "loss": 0.2892, + "step": 32372 + }, + { + "epoch": 1.5165128589497354, + "grad_norm": 0.5911528477276996, + "learning_rate": 7.279135229123127e-07, + "loss": 0.2724, + "step": 32373 + }, + { + "epoch": 1.5165597039396637, + "grad_norm": 0.6333385628500147, + "learning_rate": 7.277797535867607e-07, + "loss": 0.2829, + "step": 32374 + }, + { + "epoch": 1.516606548929592, + "grad_norm": 0.5918873875263058, + "learning_rate": 7.276459944598899e-07, + "loss": 0.27, + "step": 32375 + }, + { + "epoch": 1.5166533939195204, + "grad_norm": 0.6117745692065684, + "learning_rate": 7.275122455324718e-07, + "loss": 0.2888, + "step": 32376 + }, + { + "epoch": 1.5167002389094486, + "grad_norm": 0.621871279257186, + "learning_rate": 7.273785068052744e-07, + "loss": 0.2903, + "step": 32377 + }, + { + "epoch": 1.5167470838993768, + "grad_norm": 0.5595278423536746, + "learning_rate": 7.272447782790681e-07, + "loss": 0.259, + "step": 32378 + }, + { + "epoch": 1.5167939288893053, + "grad_norm": 0.5884222876321016, + "learning_rate": 7.271110599546227e-07, + "loss": 0.2824, + "step": 32379 + }, + { + "epoch": 1.5168407738792338, + "grad_norm": 0.5478106937143457, + "learning_rate": 7.269773518327075e-07, + "loss": 0.2567, + "step": 32380 + }, + { + "epoch": 1.5168876188691618, + "grad_norm": 0.6153927394223273, + "learning_rate": 7.268436539140927e-07, + "loss": 0.276, + "step": 32381 + }, + { + "epoch": 1.5169344638590903, + "grad_norm": 0.6504060425271849, + "learning_rate": 7.267099661995469e-07, + "loss": 0.2775, + "step": 32382 + }, + { + "epoch": 1.5169813088490187, + "grad_norm": 0.5649216853263053, + "learning_rate": 7.265762886898389e-07, + "loss": 0.2666, + "step": 32383 + }, + { + "epoch": 1.517028153838947, + "grad_norm": 0.5946161986444718, + "learning_rate": 7.264426213857387e-07, + "loss": 0.2731, + "step": 32384 + }, + { + "epoch": 1.5170749988288752, + "grad_norm": 0.5980707222709721, + "learning_rate": 7.263089642880156e-07, + "loss": 0.2669, + "step": 32385 + }, + { + "epoch": 1.5171218438188037, + "grad_norm": 0.6001225072159088, + "learning_rate": 7.261753173974384e-07, + "loss": 0.2704, + "step": 32386 + }, + { + "epoch": 1.517168688808732, + "grad_norm": 0.6026290092906538, + "learning_rate": 7.260416807147763e-07, + "loss": 0.2742, + "step": 32387 + }, + { + "epoch": 1.5172155337986601, + "grad_norm": 0.6309783188223385, + "learning_rate": 7.259080542407995e-07, + "loss": 0.2758, + "step": 32388 + }, + { + "epoch": 1.5172623787885886, + "grad_norm": 0.6173972977977648, + "learning_rate": 7.257744379762757e-07, + "loss": 0.2914, + "step": 32389 + }, + { + "epoch": 1.5173092237785168, + "grad_norm": 0.5802596077992422, + "learning_rate": 7.256408319219735e-07, + "loss": 0.2896, + "step": 32390 + }, + { + "epoch": 1.517356068768445, + "grad_norm": 0.5984168201750777, + "learning_rate": 7.255072360786622e-07, + "loss": 0.2752, + "step": 32391 + }, + { + "epoch": 1.5174029137583736, + "grad_norm": 0.5768000726359688, + "learning_rate": 7.25373650447111e-07, + "loss": 0.2664, + "step": 32392 + }, + { + "epoch": 1.517449758748302, + "grad_norm": 0.61171226836263, + "learning_rate": 7.25240075028088e-07, + "loss": 0.2848, + "step": 32393 + }, + { + "epoch": 1.51749660373823, + "grad_norm": 0.656679759180959, + "learning_rate": 7.251065098223625e-07, + "loss": 0.2877, + "step": 32394 + }, + { + "epoch": 1.5175434487281585, + "grad_norm": 0.5849867522677579, + "learning_rate": 7.249729548307038e-07, + "loss": 0.2696, + "step": 32395 + }, + { + "epoch": 1.517590293718087, + "grad_norm": 0.5609842853399982, + "learning_rate": 7.248394100538794e-07, + "loss": 0.2659, + "step": 32396 + }, + { + "epoch": 1.5176371387080152, + "grad_norm": 0.5989683271045934, + "learning_rate": 7.247058754926575e-07, + "loss": 0.2606, + "step": 32397 + }, + { + "epoch": 1.5176839836979434, + "grad_norm": 0.590663851788473, + "learning_rate": 7.24572351147807e-07, + "loss": 0.278, + "step": 32398 + }, + { + "epoch": 1.517730828687872, + "grad_norm": 0.6285699585558956, + "learning_rate": 7.244388370200963e-07, + "loss": 0.2733, + "step": 32399 + }, + { + "epoch": 1.5177776736778001, + "grad_norm": 0.5804466110879458, + "learning_rate": 7.243053331102939e-07, + "loss": 0.2653, + "step": 32400 + }, + { + "epoch": 1.5178245186677284, + "grad_norm": 0.5911791587784352, + "learning_rate": 7.241718394191688e-07, + "loss": 0.28, + "step": 32401 + }, + { + "epoch": 1.5178713636576568, + "grad_norm": 0.600419778663002, + "learning_rate": 7.240383559474875e-07, + "loss": 0.2886, + "step": 32402 + }, + { + "epoch": 1.517918208647585, + "grad_norm": 0.5801155943230709, + "learning_rate": 7.239048826960199e-07, + "loss": 0.2739, + "step": 32403 + }, + { + "epoch": 1.5179650536375133, + "grad_norm": 0.6106064664780467, + "learning_rate": 7.237714196655327e-07, + "loss": 0.2741, + "step": 32404 + }, + { + "epoch": 1.5180118986274418, + "grad_norm": 0.6548723524554259, + "learning_rate": 7.236379668567944e-07, + "loss": 0.2921, + "step": 32405 + }, + { + "epoch": 1.5180587436173703, + "grad_norm": 0.5652654284686118, + "learning_rate": 7.235045242705732e-07, + "loss": 0.2728, + "step": 32406 + }, + { + "epoch": 1.5181055886072985, + "grad_norm": 0.5681803365611686, + "learning_rate": 7.233710919076375e-07, + "loss": 0.269, + "step": 32407 + }, + { + "epoch": 1.5181524335972267, + "grad_norm": 0.6145040654514258, + "learning_rate": 7.232376697687543e-07, + "loss": 0.2907, + "step": 32408 + }, + { + "epoch": 1.5181992785871552, + "grad_norm": 0.6359666755085366, + "learning_rate": 7.231042578546913e-07, + "loss": 0.2806, + "step": 32409 + }, + { + "epoch": 1.5182461235770834, + "grad_norm": 0.5851894609532874, + "learning_rate": 7.229708561662177e-07, + "loss": 0.2522, + "step": 32410 + }, + { + "epoch": 1.5182929685670117, + "grad_norm": 0.6468902043058622, + "learning_rate": 7.228374647040995e-07, + "loss": 0.2846, + "step": 32411 + }, + { + "epoch": 1.5183398135569401, + "grad_norm": 0.5749907841890388, + "learning_rate": 7.227040834691049e-07, + "loss": 0.2613, + "step": 32412 + }, + { + "epoch": 1.5183866585468684, + "grad_norm": 0.567490733989908, + "learning_rate": 7.225707124620021e-07, + "loss": 0.2678, + "step": 32413 + }, + { + "epoch": 1.5184335035367966, + "grad_norm": 0.5825349746976454, + "learning_rate": 7.224373516835575e-07, + "loss": 0.261, + "step": 32414 + }, + { + "epoch": 1.518480348526725, + "grad_norm": 0.5955150021362656, + "learning_rate": 7.223040011345395e-07, + "loss": 0.2653, + "step": 32415 + }, + { + "epoch": 1.5185271935166536, + "grad_norm": 0.5739338688208916, + "learning_rate": 7.221706608157156e-07, + "loss": 0.2621, + "step": 32416 + }, + { + "epoch": 1.5185740385065816, + "grad_norm": 0.5720344921455078, + "learning_rate": 7.220373307278519e-07, + "loss": 0.2621, + "step": 32417 + }, + { + "epoch": 1.51862088349651, + "grad_norm": 0.6077622200725378, + "learning_rate": 7.219040108717168e-07, + "loss": 0.2782, + "step": 32418 + }, + { + "epoch": 1.5186677284864385, + "grad_norm": 0.5635556445074866, + "learning_rate": 7.217707012480777e-07, + "loss": 0.2517, + "step": 32419 + }, + { + "epoch": 1.5187145734763667, + "grad_norm": 0.667309946771076, + "learning_rate": 7.216374018577005e-07, + "loss": 0.2874, + "step": 32420 + }, + { + "epoch": 1.518761418466295, + "grad_norm": 0.598255852636638, + "learning_rate": 7.215041127013533e-07, + "loss": 0.2799, + "step": 32421 + }, + { + "epoch": 1.5188082634562234, + "grad_norm": 0.6296335064347987, + "learning_rate": 7.213708337798028e-07, + "loss": 0.2542, + "step": 32422 + }, + { + "epoch": 1.5188551084461517, + "grad_norm": 0.6361649324032818, + "learning_rate": 7.212375650938166e-07, + "loss": 0.2897, + "step": 32423 + }, + { + "epoch": 1.51890195343608, + "grad_norm": 0.6282778129603087, + "learning_rate": 7.211043066441608e-07, + "loss": 0.2904, + "step": 32424 + }, + { + "epoch": 1.5189487984260084, + "grad_norm": 0.6204903130090826, + "learning_rate": 7.209710584316032e-07, + "loss": 0.2698, + "step": 32425 + }, + { + "epoch": 1.5189956434159366, + "grad_norm": 0.6303696129813885, + "learning_rate": 7.208378204569092e-07, + "loss": 0.2817, + "step": 32426 + }, + { + "epoch": 1.5190424884058649, + "grad_norm": 0.623121217102233, + "learning_rate": 7.207045927208464e-07, + "loss": 0.2814, + "step": 32427 + }, + { + "epoch": 1.5190893333957933, + "grad_norm": 0.5934672335752155, + "learning_rate": 7.205713752241816e-07, + "loss": 0.2653, + "step": 32428 + }, + { + "epoch": 1.5191361783857218, + "grad_norm": 0.6222872713642636, + "learning_rate": 7.204381679676811e-07, + "loss": 0.2854, + "step": 32429 + }, + { + "epoch": 1.5191830233756498, + "grad_norm": 0.6075763117996695, + "learning_rate": 7.203049709521126e-07, + "loss": 0.2748, + "step": 32430 + }, + { + "epoch": 1.5192298683655783, + "grad_norm": 0.6055111528416303, + "learning_rate": 7.201717841782416e-07, + "loss": 0.2729, + "step": 32431 + }, + { + "epoch": 1.5192767133555067, + "grad_norm": 0.6016847325296756, + "learning_rate": 7.200386076468338e-07, + "loss": 0.2719, + "step": 32432 + }, + { + "epoch": 1.519323558345435, + "grad_norm": 0.5721468805909791, + "learning_rate": 7.199054413586564e-07, + "loss": 0.2584, + "step": 32433 + }, + { + "epoch": 1.5193704033353632, + "grad_norm": 0.6210020597100049, + "learning_rate": 7.197722853144759e-07, + "loss": 0.2557, + "step": 32434 + }, + { + "epoch": 1.5194172483252917, + "grad_norm": 0.6267635215283176, + "learning_rate": 7.196391395150585e-07, + "loss": 0.279, + "step": 32435 + }, + { + "epoch": 1.51946409331522, + "grad_norm": 0.6192976006577116, + "learning_rate": 7.195060039611703e-07, + "loss": 0.2806, + "step": 32436 + }, + { + "epoch": 1.5195109383051482, + "grad_norm": 0.5883759979120271, + "learning_rate": 7.193728786535784e-07, + "loss": 0.2587, + "step": 32437 + }, + { + "epoch": 1.5195577832950766, + "grad_norm": 0.5674522690352779, + "learning_rate": 7.192397635930479e-07, + "loss": 0.2621, + "step": 32438 + }, + { + "epoch": 1.5196046282850049, + "grad_norm": 0.5467139254268293, + "learning_rate": 7.191066587803444e-07, + "loss": 0.2544, + "step": 32439 + }, + { + "epoch": 1.519651473274933, + "grad_norm": 0.5962091056807112, + "learning_rate": 7.189735642162343e-07, + "loss": 0.2762, + "step": 32440 + }, + { + "epoch": 1.5196983182648616, + "grad_norm": 0.585627917659474, + "learning_rate": 7.188404799014836e-07, + "loss": 0.2738, + "step": 32441 + }, + { + "epoch": 1.51974516325479, + "grad_norm": 0.6233781037750626, + "learning_rate": 7.187074058368585e-07, + "loss": 0.27, + "step": 32442 + }, + { + "epoch": 1.5197920082447183, + "grad_norm": 0.6038944781107823, + "learning_rate": 7.185743420231254e-07, + "loss": 0.2656, + "step": 32443 + }, + { + "epoch": 1.5198388532346465, + "grad_norm": 0.5741130429764846, + "learning_rate": 7.184412884610481e-07, + "loss": 0.2741, + "step": 32444 + }, + { + "epoch": 1.519885698224575, + "grad_norm": 0.6565255025229298, + "learning_rate": 7.183082451513942e-07, + "loss": 0.2828, + "step": 32445 + }, + { + "epoch": 1.5199325432145032, + "grad_norm": 0.6391534018412033, + "learning_rate": 7.181752120949281e-07, + "loss": 0.2803, + "step": 32446 + }, + { + "epoch": 1.5199793882044315, + "grad_norm": 0.6094364714154682, + "learning_rate": 7.180421892924156e-07, + "loss": 0.2828, + "step": 32447 + }, + { + "epoch": 1.52002623319436, + "grad_norm": 0.6191392927959811, + "learning_rate": 7.179091767446225e-07, + "loss": 0.286, + "step": 32448 + }, + { + "epoch": 1.5200730781842882, + "grad_norm": 0.6217907891096924, + "learning_rate": 7.177761744523149e-07, + "loss": 0.253, + "step": 32449 + }, + { + "epoch": 1.5201199231742164, + "grad_norm": 0.5909584354054949, + "learning_rate": 7.176431824162566e-07, + "loss": 0.2709, + "step": 32450 + }, + { + "epoch": 1.5201667681641449, + "grad_norm": 0.6174800996544937, + "learning_rate": 7.17510200637214e-07, + "loss": 0.284, + "step": 32451 + }, + { + "epoch": 1.5202136131540733, + "grad_norm": 0.5730881974218152, + "learning_rate": 7.173772291159528e-07, + "loss": 0.2714, + "step": 32452 + }, + { + "epoch": 1.5202604581440013, + "grad_norm": 0.5728898912740452, + "learning_rate": 7.172442678532368e-07, + "loss": 0.2686, + "step": 32453 + }, + { + "epoch": 1.5203073031339298, + "grad_norm": 0.5756044016236228, + "learning_rate": 7.17111316849832e-07, + "loss": 0.2707, + "step": 32454 + }, + { + "epoch": 1.5203541481238583, + "grad_norm": 0.5672287242508808, + "learning_rate": 7.169783761065044e-07, + "loss": 0.2676, + "step": 32455 + }, + { + "epoch": 1.5204009931137865, + "grad_norm": 0.6104769848249875, + "learning_rate": 7.168454456240173e-07, + "loss": 0.2738, + "step": 32456 + }, + { + "epoch": 1.5204478381037148, + "grad_norm": 0.6082403228634828, + "learning_rate": 7.167125254031362e-07, + "loss": 0.2746, + "step": 32457 + }, + { + "epoch": 1.5204946830936432, + "grad_norm": 0.5733451592381043, + "learning_rate": 7.165796154446272e-07, + "loss": 0.2809, + "step": 32458 + }, + { + "epoch": 1.5205415280835715, + "grad_norm": 0.5737289380911984, + "learning_rate": 7.164467157492539e-07, + "loss": 0.281, + "step": 32459 + }, + { + "epoch": 1.5205883730734997, + "grad_norm": 0.6132089148223066, + "learning_rate": 7.16313826317781e-07, + "loss": 0.2692, + "step": 32460 + }, + { + "epoch": 1.5206352180634282, + "grad_norm": 0.6042704024703043, + "learning_rate": 7.161809471509737e-07, + "loss": 0.2852, + "step": 32461 + }, + { + "epoch": 1.5206820630533564, + "grad_norm": 0.5916744908343801, + "learning_rate": 7.160480782495977e-07, + "loss": 0.2578, + "step": 32462 + }, + { + "epoch": 1.5207289080432846, + "grad_norm": 0.618417289357976, + "learning_rate": 7.159152196144159e-07, + "loss": 0.2489, + "step": 32463 + }, + { + "epoch": 1.520775753033213, + "grad_norm": 0.5446176444334467, + "learning_rate": 7.157823712461934e-07, + "loss": 0.2578, + "step": 32464 + }, + { + "epoch": 1.5208225980231416, + "grad_norm": 0.5930456352829169, + "learning_rate": 7.156495331456959e-07, + "loss": 0.2705, + "step": 32465 + }, + { + "epoch": 1.5208694430130696, + "grad_norm": 0.5234090428251025, + "learning_rate": 7.155167053136857e-07, + "loss": 0.2496, + "step": 32466 + }, + { + "epoch": 1.520916288002998, + "grad_norm": 0.5751614596001131, + "learning_rate": 7.153838877509287e-07, + "loss": 0.2695, + "step": 32467 + }, + { + "epoch": 1.5209631329929265, + "grad_norm": 0.583959794312962, + "learning_rate": 7.152510804581894e-07, + "loss": 0.2808, + "step": 32468 + }, + { + "epoch": 1.5210099779828548, + "grad_norm": 0.6440177212326474, + "learning_rate": 7.15118283436231e-07, + "loss": 0.2793, + "step": 32469 + }, + { + "epoch": 1.521056822972783, + "grad_norm": 0.650657670755455, + "learning_rate": 7.149854966858183e-07, + "loss": 0.2822, + "step": 32470 + }, + { + "epoch": 1.5211036679627115, + "grad_norm": 0.5950512919278758, + "learning_rate": 7.148527202077152e-07, + "loss": 0.279, + "step": 32471 + }, + { + "epoch": 1.5211505129526397, + "grad_norm": 0.6052456414722122, + "learning_rate": 7.14719954002687e-07, + "loss": 0.2768, + "step": 32472 + }, + { + "epoch": 1.521197357942568, + "grad_norm": 0.6304823068440454, + "learning_rate": 7.145871980714958e-07, + "loss": 0.2826, + "step": 32473 + }, + { + "epoch": 1.5212442029324964, + "grad_norm": 0.6012815009916306, + "learning_rate": 7.144544524149075e-07, + "loss": 0.2725, + "step": 32474 + }, + { + "epoch": 1.5212910479224246, + "grad_norm": 0.5498281784355576, + "learning_rate": 7.143217170336842e-07, + "loss": 0.2578, + "step": 32475 + }, + { + "epoch": 1.5213378929123529, + "grad_norm": 0.5860333539607073, + "learning_rate": 7.141889919285905e-07, + "loss": 0.2685, + "step": 32476 + }, + { + "epoch": 1.5213847379022813, + "grad_norm": 0.60569845700416, + "learning_rate": 7.140562771003903e-07, + "loss": 0.2786, + "step": 32477 + }, + { + "epoch": 1.5214315828922098, + "grad_norm": 0.6057272123558055, + "learning_rate": 7.139235725498475e-07, + "loss": 0.275, + "step": 32478 + }, + { + "epoch": 1.521478427882138, + "grad_norm": 0.6212812787949333, + "learning_rate": 7.137908782777261e-07, + "loss": 0.2699, + "step": 32479 + }, + { + "epoch": 1.5215252728720663, + "grad_norm": 0.6071407760756761, + "learning_rate": 7.136581942847895e-07, + "loss": 0.2859, + "step": 32480 + }, + { + "epoch": 1.5215721178619948, + "grad_norm": 0.6385869844729365, + "learning_rate": 7.135255205718003e-07, + "loss": 0.2646, + "step": 32481 + }, + { + "epoch": 1.521618962851923, + "grad_norm": 0.5272745506615603, + "learning_rate": 7.133928571395227e-07, + "loss": 0.2534, + "step": 32482 + }, + { + "epoch": 1.5216658078418512, + "grad_norm": 0.6071710526691945, + "learning_rate": 7.132602039887198e-07, + "loss": 0.2619, + "step": 32483 + }, + { + "epoch": 1.5217126528317797, + "grad_norm": 0.5913211072330614, + "learning_rate": 7.131275611201558e-07, + "loss": 0.2784, + "step": 32484 + }, + { + "epoch": 1.521759497821708, + "grad_norm": 0.6097641970216874, + "learning_rate": 7.129949285345933e-07, + "loss": 0.2807, + "step": 32485 + }, + { + "epoch": 1.5218063428116362, + "grad_norm": 0.6227506072671143, + "learning_rate": 7.128623062327966e-07, + "loss": 0.2899, + "step": 32486 + }, + { + "epoch": 1.5218531878015646, + "grad_norm": 0.589764360586272, + "learning_rate": 7.127296942155285e-07, + "loss": 0.2705, + "step": 32487 + }, + { + "epoch": 1.521900032791493, + "grad_norm": 0.5837915856057694, + "learning_rate": 7.125970924835507e-07, + "loss": 0.2783, + "step": 32488 + }, + { + "epoch": 1.5219468777814211, + "grad_norm": 0.5783140041892868, + "learning_rate": 7.124645010376275e-07, + "loss": 0.271, + "step": 32489 + }, + { + "epoch": 1.5219937227713496, + "grad_norm": 0.5649678331834125, + "learning_rate": 7.123319198785217e-07, + "loss": 0.2647, + "step": 32490 + }, + { + "epoch": 1.522040567761278, + "grad_norm": 0.6140579340906493, + "learning_rate": 7.121993490069964e-07, + "loss": 0.2777, + "step": 32491 + }, + { + "epoch": 1.5220874127512063, + "grad_norm": 0.6028752603061588, + "learning_rate": 7.120667884238153e-07, + "loss": 0.2732, + "step": 32492 + }, + { + "epoch": 1.5221342577411345, + "grad_norm": 0.5698332594238973, + "learning_rate": 7.119342381297397e-07, + "loss": 0.2622, + "step": 32493 + }, + { + "epoch": 1.522181102731063, + "grad_norm": 0.6135886732528301, + "learning_rate": 7.118016981255341e-07, + "loss": 0.2676, + "step": 32494 + }, + { + "epoch": 1.5222279477209912, + "grad_norm": 0.616217157594594, + "learning_rate": 7.116691684119592e-07, + "loss": 0.2816, + "step": 32495 + }, + { + "epoch": 1.5222747927109195, + "grad_norm": 0.6211729395127193, + "learning_rate": 7.11536648989779e-07, + "loss": 0.2819, + "step": 32496 + }, + { + "epoch": 1.522321637700848, + "grad_norm": 0.6116874086515396, + "learning_rate": 7.114041398597557e-07, + "loss": 0.2931, + "step": 32497 + }, + { + "epoch": 1.5223684826907762, + "grad_norm": 0.5910923738704281, + "learning_rate": 7.112716410226527e-07, + "loss": 0.2762, + "step": 32498 + }, + { + "epoch": 1.5224153276807044, + "grad_norm": 0.6320178176408214, + "learning_rate": 7.111391524792313e-07, + "loss": 0.2821, + "step": 32499 + }, + { + "epoch": 1.5224621726706329, + "grad_norm": 0.5889101570815793, + "learning_rate": 7.110066742302546e-07, + "loss": 0.2764, + "step": 32500 + }, + { + "epoch": 1.5225090176605613, + "grad_norm": 0.5994026626003677, + "learning_rate": 7.108742062764854e-07, + "loss": 0.2682, + "step": 32501 + }, + { + "epoch": 1.5225558626504894, + "grad_norm": 0.6079517618702366, + "learning_rate": 7.107417486186846e-07, + "loss": 0.2646, + "step": 32502 + }, + { + "epoch": 1.5226027076404178, + "grad_norm": 0.5825902468261231, + "learning_rate": 7.106093012576154e-07, + "loss": 0.2649, + "step": 32503 + }, + { + "epoch": 1.5226495526303463, + "grad_norm": 0.5972096930762362, + "learning_rate": 7.104768641940407e-07, + "loss": 0.2771, + "step": 32504 + }, + { + "epoch": 1.5226963976202745, + "grad_norm": 0.6230200787592551, + "learning_rate": 7.10344437428721e-07, + "loss": 0.2699, + "step": 32505 + }, + { + "epoch": 1.5227432426102028, + "grad_norm": 0.5869167385775468, + "learning_rate": 7.102120209624195e-07, + "loss": 0.2609, + "step": 32506 + }, + { + "epoch": 1.5227900876001312, + "grad_norm": 0.567920070858226, + "learning_rate": 7.100796147958986e-07, + "loss": 0.2703, + "step": 32507 + }, + { + "epoch": 1.5228369325900595, + "grad_norm": 0.6349231632159305, + "learning_rate": 7.099472189299189e-07, + "loss": 0.2739, + "step": 32508 + }, + { + "epoch": 1.5228837775799877, + "grad_norm": 0.598966701730526, + "learning_rate": 7.09814833365243e-07, + "loss": 0.2827, + "step": 32509 + }, + { + "epoch": 1.5229306225699162, + "grad_norm": 0.6571973995336461, + "learning_rate": 7.096824581026335e-07, + "loss": 0.2891, + "step": 32510 + }, + { + "epoch": 1.5229774675598444, + "grad_norm": 0.5668573496013658, + "learning_rate": 7.095500931428509e-07, + "loss": 0.2672, + "step": 32511 + }, + { + "epoch": 1.5230243125497727, + "grad_norm": 0.6192758283067952, + "learning_rate": 7.094177384866574e-07, + "loss": 0.2745, + "step": 32512 + }, + { + "epoch": 1.5230711575397011, + "grad_norm": 0.6168367024846134, + "learning_rate": 7.092853941348146e-07, + "loss": 0.3005, + "step": 32513 + }, + { + "epoch": 1.5231180025296296, + "grad_norm": 0.6020166921405692, + "learning_rate": 7.091530600880853e-07, + "loss": 0.2705, + "step": 32514 + }, + { + "epoch": 1.5231648475195578, + "grad_norm": 0.5776385356208676, + "learning_rate": 7.09020736347229e-07, + "loss": 0.2672, + "step": 32515 + }, + { + "epoch": 1.523211692509486, + "grad_norm": 0.6251394686608663, + "learning_rate": 7.088884229130091e-07, + "loss": 0.2663, + "step": 32516 + }, + { + "epoch": 1.5232585374994145, + "grad_norm": 0.5999898595922137, + "learning_rate": 7.087561197861855e-07, + "loss": 0.2885, + "step": 32517 + }, + { + "epoch": 1.5233053824893428, + "grad_norm": 0.6140663590375136, + "learning_rate": 7.086238269675202e-07, + "loss": 0.2756, + "step": 32518 + }, + { + "epoch": 1.523352227479271, + "grad_norm": 0.5830798346588575, + "learning_rate": 7.084915444577745e-07, + "loss": 0.2786, + "step": 32519 + }, + { + "epoch": 1.5233990724691995, + "grad_norm": 0.5882461625088066, + "learning_rate": 7.083592722577096e-07, + "loss": 0.2824, + "step": 32520 + }, + { + "epoch": 1.5234459174591277, + "grad_norm": 0.6144804265231932, + "learning_rate": 7.082270103680875e-07, + "loss": 0.2856, + "step": 32521 + }, + { + "epoch": 1.523492762449056, + "grad_norm": 0.5653074207587052, + "learning_rate": 7.080947587896686e-07, + "loss": 0.258, + "step": 32522 + }, + { + "epoch": 1.5235396074389844, + "grad_norm": 0.6118689792989651, + "learning_rate": 7.079625175232135e-07, + "loss": 0.2616, + "step": 32523 + }, + { + "epoch": 1.5235864524289129, + "grad_norm": 0.6046979054686741, + "learning_rate": 7.078302865694833e-07, + "loss": 0.2729, + "step": 32524 + }, + { + "epoch": 1.523633297418841, + "grad_norm": 0.5716752471739572, + "learning_rate": 7.076980659292398e-07, + "loss": 0.2689, + "step": 32525 + }, + { + "epoch": 1.5236801424087694, + "grad_norm": 0.6270961150259297, + "learning_rate": 7.07565855603243e-07, + "loss": 0.259, + "step": 32526 + }, + { + "epoch": 1.5237269873986978, + "grad_norm": 0.6256159375836979, + "learning_rate": 7.074336555922545e-07, + "loss": 0.2679, + "step": 32527 + }, + { + "epoch": 1.523773832388626, + "grad_norm": 0.634354395458295, + "learning_rate": 7.073014658970356e-07, + "loss": 0.2934, + "step": 32528 + }, + { + "epoch": 1.5238206773785543, + "grad_norm": 0.5826019117601045, + "learning_rate": 7.07169286518346e-07, + "loss": 0.2635, + "step": 32529 + }, + { + "epoch": 1.5238675223684828, + "grad_norm": 0.5671877544921549, + "learning_rate": 7.070371174569457e-07, + "loss": 0.2734, + "step": 32530 + }, + { + "epoch": 1.523914367358411, + "grad_norm": 0.6116090544430786, + "learning_rate": 7.069049587135962e-07, + "loss": 0.2807, + "step": 32531 + }, + { + "epoch": 1.5239612123483393, + "grad_norm": 0.5927645450086656, + "learning_rate": 7.06772810289058e-07, + "loss": 0.268, + "step": 32532 + }, + { + "epoch": 1.5240080573382677, + "grad_norm": 0.5872501701938249, + "learning_rate": 7.066406721840918e-07, + "loss": 0.269, + "step": 32533 + }, + { + "epoch": 1.524054902328196, + "grad_norm": 0.6080410542858113, + "learning_rate": 7.065085443994577e-07, + "loss": 0.2814, + "step": 32534 + }, + { + "epoch": 1.5241017473181242, + "grad_norm": 0.579507546111324, + "learning_rate": 7.063764269359166e-07, + "loss": 0.2712, + "step": 32535 + }, + { + "epoch": 1.5241485923080527, + "grad_norm": 0.6210209766285257, + "learning_rate": 7.062443197942286e-07, + "loss": 0.2721, + "step": 32536 + }, + { + "epoch": 1.5241954372979811, + "grad_norm": 0.5377122973403332, + "learning_rate": 7.06112222975153e-07, + "loss": 0.2591, + "step": 32537 + }, + { + "epoch": 1.5242422822879091, + "grad_norm": 0.5779503319983668, + "learning_rate": 7.059801364794505e-07, + "loss": 0.2642, + "step": 32538 + }, + { + "epoch": 1.5242891272778376, + "grad_norm": 0.5833580677275021, + "learning_rate": 7.058480603078816e-07, + "loss": 0.2621, + "step": 32539 + }, + { + "epoch": 1.524335972267766, + "grad_norm": 0.6227797916185417, + "learning_rate": 7.05715994461206e-07, + "loss": 0.2735, + "step": 32540 + }, + { + "epoch": 1.5243828172576943, + "grad_norm": 0.6581457666491817, + "learning_rate": 7.055839389401847e-07, + "loss": 0.2772, + "step": 32541 + }, + { + "epoch": 1.5244296622476226, + "grad_norm": 0.6180500854439965, + "learning_rate": 7.054518937455759e-07, + "loss": 0.2766, + "step": 32542 + }, + { + "epoch": 1.524476507237551, + "grad_norm": 0.620017352136649, + "learning_rate": 7.053198588781413e-07, + "loss": 0.2699, + "step": 32543 + }, + { + "epoch": 1.5245233522274793, + "grad_norm": 0.6172399349146444, + "learning_rate": 7.051878343386393e-07, + "loss": 0.2825, + "step": 32544 + }, + { + "epoch": 1.5245701972174075, + "grad_norm": 0.6368182089609957, + "learning_rate": 7.050558201278298e-07, + "loss": 0.29, + "step": 32545 + }, + { + "epoch": 1.524617042207336, + "grad_norm": 0.5626343330548362, + "learning_rate": 7.04923816246473e-07, + "loss": 0.2572, + "step": 32546 + }, + { + "epoch": 1.5246638871972642, + "grad_norm": 0.5636044672043601, + "learning_rate": 7.047918226953295e-07, + "loss": 0.2556, + "step": 32547 + }, + { + "epoch": 1.5247107321871924, + "grad_norm": 0.6042697690697736, + "learning_rate": 7.046598394751569e-07, + "loss": 0.2698, + "step": 32548 + }, + { + "epoch": 1.524757577177121, + "grad_norm": 0.5614354071609123, + "learning_rate": 7.045278665867159e-07, + "loss": 0.2681, + "step": 32549 + }, + { + "epoch": 1.5248044221670494, + "grad_norm": 0.6882345785400961, + "learning_rate": 7.04395904030766e-07, + "loss": 0.287, + "step": 32550 + }, + { + "epoch": 1.5248512671569776, + "grad_norm": 0.6075571611539663, + "learning_rate": 7.04263951808066e-07, + "loss": 0.29, + "step": 32551 + }, + { + "epoch": 1.5248981121469058, + "grad_norm": 0.5991883250322996, + "learning_rate": 7.041320099193757e-07, + "loss": 0.2812, + "step": 32552 + }, + { + "epoch": 1.5249449571368343, + "grad_norm": 0.6301184231049041, + "learning_rate": 7.040000783654549e-07, + "loss": 0.2764, + "step": 32553 + }, + { + "epoch": 1.5249918021267626, + "grad_norm": 0.5841710846304701, + "learning_rate": 7.038681571470615e-07, + "loss": 0.2645, + "step": 32554 + }, + { + "epoch": 1.5250386471166908, + "grad_norm": 0.6152807493211119, + "learning_rate": 7.037362462649552e-07, + "loss": 0.2716, + "step": 32555 + }, + { + "epoch": 1.5250854921066193, + "grad_norm": 0.6247956978885276, + "learning_rate": 7.036043457198963e-07, + "loss": 0.2832, + "step": 32556 + }, + { + "epoch": 1.5251323370965475, + "grad_norm": 0.5657754516937271, + "learning_rate": 7.034724555126421e-07, + "loss": 0.2525, + "step": 32557 + }, + { + "epoch": 1.5251791820864757, + "grad_norm": 0.6377516273718645, + "learning_rate": 7.033405756439527e-07, + "loss": 0.2758, + "step": 32558 + }, + { + "epoch": 1.5252260270764042, + "grad_norm": 0.5597655338306184, + "learning_rate": 7.032087061145871e-07, + "loss": 0.2665, + "step": 32559 + }, + { + "epoch": 1.5252728720663327, + "grad_norm": 0.618290153340702, + "learning_rate": 7.03076846925303e-07, + "loss": 0.2574, + "step": 32560 + }, + { + "epoch": 1.5253197170562607, + "grad_norm": 0.5782070532608022, + "learning_rate": 7.029449980768601e-07, + "loss": 0.268, + "step": 32561 + }, + { + "epoch": 1.5253665620461891, + "grad_norm": 0.6127960991548727, + "learning_rate": 7.028131595700171e-07, + "loss": 0.2717, + "step": 32562 + }, + { + "epoch": 1.5254134070361176, + "grad_norm": 0.5918368593561666, + "learning_rate": 7.026813314055333e-07, + "loss": 0.2725, + "step": 32563 + }, + { + "epoch": 1.5254602520260458, + "grad_norm": 0.6315937423156749, + "learning_rate": 7.025495135841662e-07, + "loss": 0.2933, + "step": 32564 + }, + { + "epoch": 1.525507097015974, + "grad_norm": 0.5672061260410144, + "learning_rate": 7.024177061066753e-07, + "loss": 0.2578, + "step": 32565 + }, + { + "epoch": 1.5255539420059026, + "grad_norm": 0.5725143256063522, + "learning_rate": 7.022859089738182e-07, + "loss": 0.2711, + "step": 32566 + }, + { + "epoch": 1.5256007869958308, + "grad_norm": 0.602778205859593, + "learning_rate": 7.021541221863538e-07, + "loss": 0.2704, + "step": 32567 + }, + { + "epoch": 1.525647631985759, + "grad_norm": 0.6010646146067168, + "learning_rate": 7.020223457450404e-07, + "loss": 0.2704, + "step": 32568 + }, + { + "epoch": 1.5256944769756875, + "grad_norm": 0.5955533014693186, + "learning_rate": 7.018905796506364e-07, + "loss": 0.2788, + "step": 32569 + }, + { + "epoch": 1.5257413219656157, + "grad_norm": 0.6036384313410957, + "learning_rate": 7.017588239039014e-07, + "loss": 0.2789, + "step": 32570 + }, + { + "epoch": 1.525788166955544, + "grad_norm": 0.5792809833579293, + "learning_rate": 7.01627078505592e-07, + "loss": 0.275, + "step": 32571 + }, + { + "epoch": 1.5258350119454724, + "grad_norm": 0.603756327597194, + "learning_rate": 7.014953434564662e-07, + "loss": 0.2747, + "step": 32572 + }, + { + "epoch": 1.525881856935401, + "grad_norm": 0.6067243786477572, + "learning_rate": 7.013636187572825e-07, + "loss": 0.261, + "step": 32573 + }, + { + "epoch": 1.525928701925329, + "grad_norm": 0.6211555436932432, + "learning_rate": 7.012319044087992e-07, + "loss": 0.2746, + "step": 32574 + }, + { + "epoch": 1.5259755469152574, + "grad_norm": 0.6028003124191961, + "learning_rate": 7.011002004117742e-07, + "loss": 0.2666, + "step": 32575 + }, + { + "epoch": 1.5260223919051858, + "grad_norm": 0.6074360903923596, + "learning_rate": 7.009685067669655e-07, + "loss": 0.2658, + "step": 32576 + }, + { + "epoch": 1.526069236895114, + "grad_norm": 0.6032561472392872, + "learning_rate": 7.008368234751315e-07, + "loss": 0.259, + "step": 32577 + }, + { + "epoch": 1.5261160818850423, + "grad_norm": 0.5978124118908886, + "learning_rate": 7.007051505370293e-07, + "loss": 0.2687, + "step": 32578 + }, + { + "epoch": 1.5261629268749708, + "grad_norm": 0.5723688339345298, + "learning_rate": 7.005734879534162e-07, + "loss": 0.2422, + "step": 32579 + }, + { + "epoch": 1.526209771864899, + "grad_norm": 0.5953535899830065, + "learning_rate": 7.004418357250503e-07, + "loss": 0.277, + "step": 32580 + }, + { + "epoch": 1.5262566168548273, + "grad_norm": 0.5960480377681563, + "learning_rate": 7.003101938526893e-07, + "loss": 0.2723, + "step": 32581 + }, + { + "epoch": 1.5263034618447557, + "grad_norm": 0.5858420127803194, + "learning_rate": 7.001785623370908e-07, + "loss": 0.2634, + "step": 32582 + }, + { + "epoch": 1.526350306834684, + "grad_norm": 0.626773822855191, + "learning_rate": 7.000469411790131e-07, + "loss": 0.2691, + "step": 32583 + }, + { + "epoch": 1.5263971518246122, + "grad_norm": 0.573545765116289, + "learning_rate": 6.999153303792122e-07, + "loss": 0.2553, + "step": 32584 + }, + { + "epoch": 1.5264439968145407, + "grad_norm": 0.6421159258298773, + "learning_rate": 6.997837299384467e-07, + "loss": 0.2767, + "step": 32585 + }, + { + "epoch": 1.5264908418044691, + "grad_norm": 0.5739028856706414, + "learning_rate": 6.996521398574727e-07, + "loss": 0.2759, + "step": 32586 + }, + { + "epoch": 1.5265376867943974, + "grad_norm": 0.617173109681711, + "learning_rate": 6.995205601370481e-07, + "loss": 0.2752, + "step": 32587 + }, + { + "epoch": 1.5265845317843256, + "grad_norm": 0.6370412141025557, + "learning_rate": 6.9938899077793e-07, + "loss": 0.3024, + "step": 32588 + }, + { + "epoch": 1.526631376774254, + "grad_norm": 0.6094653051515906, + "learning_rate": 6.992574317808768e-07, + "loss": 0.2817, + "step": 32589 + }, + { + "epoch": 1.5266782217641823, + "grad_norm": 0.5764913320753873, + "learning_rate": 6.991258831466435e-07, + "loss": 0.265, + "step": 32590 + }, + { + "epoch": 1.5267250667541106, + "grad_norm": 0.5897447275305895, + "learning_rate": 6.989943448759882e-07, + "loss": 0.2766, + "step": 32591 + }, + { + "epoch": 1.526771911744039, + "grad_norm": 0.5928438605981595, + "learning_rate": 6.988628169696685e-07, + "loss": 0.2828, + "step": 32592 + }, + { + "epoch": 1.5268187567339673, + "grad_norm": 0.6130433921771231, + "learning_rate": 6.987312994284399e-07, + "loss": 0.2686, + "step": 32593 + }, + { + "epoch": 1.5268656017238955, + "grad_norm": 0.5746102645203743, + "learning_rate": 6.985997922530596e-07, + "loss": 0.2665, + "step": 32594 + }, + { + "epoch": 1.526912446713824, + "grad_norm": 0.6016898318799245, + "learning_rate": 6.984682954442859e-07, + "loss": 0.279, + "step": 32595 + }, + { + "epoch": 1.5269592917037524, + "grad_norm": 0.6117507688408175, + "learning_rate": 6.983368090028733e-07, + "loss": 0.2942, + "step": 32596 + }, + { + "epoch": 1.5270061366936805, + "grad_norm": 0.5549118013050989, + "learning_rate": 6.982053329295796e-07, + "loss": 0.2587, + "step": 32597 + }, + { + "epoch": 1.527052981683609, + "grad_norm": 0.5600354313251218, + "learning_rate": 6.980738672251622e-07, + "loss": 0.2671, + "step": 32598 + }, + { + "epoch": 1.5270998266735374, + "grad_norm": 0.6141700381161648, + "learning_rate": 6.979424118903761e-07, + "loss": 0.2793, + "step": 32599 + }, + { + "epoch": 1.5271466716634656, + "grad_norm": 0.6236249504818592, + "learning_rate": 6.978109669259783e-07, + "loss": 0.2632, + "step": 32600 + }, + { + "epoch": 1.5271935166533939, + "grad_norm": 0.5912971061953325, + "learning_rate": 6.976795323327256e-07, + "loss": 0.2736, + "step": 32601 + }, + { + "epoch": 1.5272403616433223, + "grad_norm": 0.6132965720160657, + "learning_rate": 6.97548108111375e-07, + "loss": 0.2819, + "step": 32602 + }, + { + "epoch": 1.5272872066332506, + "grad_norm": 0.5864319092911263, + "learning_rate": 6.974166942626812e-07, + "loss": 0.2597, + "step": 32603 + }, + { + "epoch": 1.5273340516231788, + "grad_norm": 0.5309379941241394, + "learning_rate": 6.972852907874012e-07, + "loss": 0.2515, + "step": 32604 + }, + { + "epoch": 1.5273808966131073, + "grad_norm": 0.5927960392033982, + "learning_rate": 6.971538976862918e-07, + "loss": 0.2587, + "step": 32605 + }, + { + "epoch": 1.5274277416030355, + "grad_norm": 0.5590536693206016, + "learning_rate": 6.970225149601081e-07, + "loss": 0.2761, + "step": 32606 + }, + { + "epoch": 1.5274745865929638, + "grad_norm": 0.6314786435614531, + "learning_rate": 6.968911426096068e-07, + "loss": 0.3022, + "step": 32607 + }, + { + "epoch": 1.5275214315828922, + "grad_norm": 0.5988605043898182, + "learning_rate": 6.967597806355442e-07, + "loss": 0.2636, + "step": 32608 + }, + { + "epoch": 1.5275682765728207, + "grad_norm": 0.6361953299538148, + "learning_rate": 6.966284290386751e-07, + "loss": 0.2993, + "step": 32609 + }, + { + "epoch": 1.5276151215627487, + "grad_norm": 0.5792762685732624, + "learning_rate": 6.964970878197563e-07, + "loss": 0.2663, + "step": 32610 + }, + { + "epoch": 1.5276619665526772, + "grad_norm": 0.5961835544229028, + "learning_rate": 6.963657569795434e-07, + "loss": 0.2644, + "step": 32611 + }, + { + "epoch": 1.5277088115426056, + "grad_norm": 0.5384369206079024, + "learning_rate": 6.962344365187932e-07, + "loss": 0.2425, + "step": 32612 + }, + { + "epoch": 1.5277556565325339, + "grad_norm": 0.6429655712995614, + "learning_rate": 6.961031264382592e-07, + "loss": 0.2783, + "step": 32613 + }, + { + "epoch": 1.527802501522462, + "grad_norm": 0.6046581360600048, + "learning_rate": 6.959718267386994e-07, + "loss": 0.2757, + "step": 32614 + }, + { + "epoch": 1.5278493465123906, + "grad_norm": 0.6277902911068988, + "learning_rate": 6.958405374208676e-07, + "loss": 0.2799, + "step": 32615 + }, + { + "epoch": 1.5278961915023188, + "grad_norm": 0.5902082632648337, + "learning_rate": 6.957092584855202e-07, + "loss": 0.2611, + "step": 32616 + }, + { + "epoch": 1.527943036492247, + "grad_norm": 0.5579258269544652, + "learning_rate": 6.955779899334123e-07, + "loss": 0.2536, + "step": 32617 + }, + { + "epoch": 1.5279898814821755, + "grad_norm": 0.6038089139795682, + "learning_rate": 6.954467317652994e-07, + "loss": 0.2734, + "step": 32618 + }, + { + "epoch": 1.5280367264721038, + "grad_norm": 0.6449568339553067, + "learning_rate": 6.953154839819379e-07, + "loss": 0.2643, + "step": 32619 + }, + { + "epoch": 1.528083571462032, + "grad_norm": 0.5624178208791902, + "learning_rate": 6.951842465840824e-07, + "loss": 0.2678, + "step": 32620 + }, + { + "epoch": 1.5281304164519605, + "grad_norm": 0.5885323018089464, + "learning_rate": 6.95053019572487e-07, + "loss": 0.2637, + "step": 32621 + }, + { + "epoch": 1.528177261441889, + "grad_norm": 0.5771491746167753, + "learning_rate": 6.949218029479077e-07, + "loss": 0.2887, + "step": 32622 + }, + { + "epoch": 1.5282241064318172, + "grad_norm": 0.5432955166019862, + "learning_rate": 6.947905967110999e-07, + "loss": 0.2638, + "step": 32623 + }, + { + "epoch": 1.5282709514217454, + "grad_norm": 0.5840402155368166, + "learning_rate": 6.946594008628186e-07, + "loss": 0.2528, + "step": 32624 + }, + { + "epoch": 1.5283177964116739, + "grad_norm": 0.6050160969952668, + "learning_rate": 6.945282154038182e-07, + "loss": 0.2621, + "step": 32625 + }, + { + "epoch": 1.528364641401602, + "grad_norm": 0.5794272635554716, + "learning_rate": 6.943970403348555e-07, + "loss": 0.257, + "step": 32626 + }, + { + "epoch": 1.5284114863915303, + "grad_norm": 0.5803455228369295, + "learning_rate": 6.942658756566836e-07, + "loss": 0.2575, + "step": 32627 + }, + { + "epoch": 1.5284583313814588, + "grad_norm": 0.6060036567596621, + "learning_rate": 6.941347213700573e-07, + "loss": 0.2732, + "step": 32628 + }, + { + "epoch": 1.528505176371387, + "grad_norm": 0.6076475535481157, + "learning_rate": 6.940035774757314e-07, + "loss": 0.2665, + "step": 32629 + }, + { + "epoch": 1.5285520213613153, + "grad_norm": 0.5699258619655078, + "learning_rate": 6.938724439744612e-07, + "loss": 0.2556, + "step": 32630 + }, + { + "epoch": 1.5285988663512438, + "grad_norm": 0.6084685900092807, + "learning_rate": 6.937413208670013e-07, + "loss": 0.2572, + "step": 32631 + }, + { + "epoch": 1.5286457113411722, + "grad_norm": 0.6453186926198441, + "learning_rate": 6.936102081541065e-07, + "loss": 0.2817, + "step": 32632 + }, + { + "epoch": 1.5286925563311002, + "grad_norm": 0.5696611198721441, + "learning_rate": 6.934791058365303e-07, + "loss": 0.2519, + "step": 32633 + }, + { + "epoch": 1.5287394013210287, + "grad_norm": 0.5591930500441517, + "learning_rate": 6.933480139150286e-07, + "loss": 0.2581, + "step": 32634 + }, + { + "epoch": 1.5287862463109572, + "grad_norm": 0.6206280704586397, + "learning_rate": 6.932169323903545e-07, + "loss": 0.2861, + "step": 32635 + }, + { + "epoch": 1.5288330913008854, + "grad_norm": 0.6096958017750134, + "learning_rate": 6.930858612632626e-07, + "loss": 0.2746, + "step": 32636 + }, + { + "epoch": 1.5288799362908136, + "grad_norm": 0.5718440754510856, + "learning_rate": 6.929548005345075e-07, + "loss": 0.2618, + "step": 32637 + }, + { + "epoch": 1.528926781280742, + "grad_norm": 0.5934023732377298, + "learning_rate": 6.928237502048437e-07, + "loss": 0.2694, + "step": 32638 + }, + { + "epoch": 1.5289736262706703, + "grad_norm": 0.6004018182134157, + "learning_rate": 6.926927102750247e-07, + "loss": 0.2826, + "step": 32639 + }, + { + "epoch": 1.5290204712605986, + "grad_norm": 0.5945617533574483, + "learning_rate": 6.925616807458046e-07, + "loss": 0.2676, + "step": 32640 + }, + { + "epoch": 1.529067316250527, + "grad_norm": 0.6047435206388759, + "learning_rate": 6.924306616179388e-07, + "loss": 0.2674, + "step": 32641 + }, + { + "epoch": 1.5291141612404553, + "grad_norm": 0.6027646119826976, + "learning_rate": 6.922996528921794e-07, + "loss": 0.2716, + "step": 32642 + }, + { + "epoch": 1.5291610062303835, + "grad_norm": 0.5809795868031744, + "learning_rate": 6.92168654569281e-07, + "loss": 0.2758, + "step": 32643 + }, + { + "epoch": 1.529207851220312, + "grad_norm": 0.5646910792204668, + "learning_rate": 6.920376666499984e-07, + "loss": 0.2523, + "step": 32644 + }, + { + "epoch": 1.5292546962102405, + "grad_norm": 0.5632475946916924, + "learning_rate": 6.91906689135084e-07, + "loss": 0.2585, + "step": 32645 + }, + { + "epoch": 1.5293015412001685, + "grad_norm": 0.6135709215533915, + "learning_rate": 6.917757220252921e-07, + "loss": 0.2917, + "step": 32646 + }, + { + "epoch": 1.529348386190097, + "grad_norm": 0.5851931130473378, + "learning_rate": 6.91644765321377e-07, + "loss": 0.2843, + "step": 32647 + }, + { + "epoch": 1.5293952311800254, + "grad_norm": 0.6414818911829103, + "learning_rate": 6.915138190240913e-07, + "loss": 0.2811, + "step": 32648 + }, + { + "epoch": 1.5294420761699536, + "grad_norm": 0.6072269721936583, + "learning_rate": 6.91382883134189e-07, + "loss": 0.2753, + "step": 32649 + }, + { + "epoch": 1.5294889211598819, + "grad_norm": 0.5867487236409602, + "learning_rate": 6.91251957652424e-07, + "loss": 0.2733, + "step": 32650 + }, + { + "epoch": 1.5295357661498103, + "grad_norm": 0.6483435683038259, + "learning_rate": 6.91121042579549e-07, + "loss": 0.2926, + "step": 32651 + }, + { + "epoch": 1.5295826111397386, + "grad_norm": 0.5910922931688036, + "learning_rate": 6.909901379163178e-07, + "loss": 0.267, + "step": 32652 + }, + { + "epoch": 1.5296294561296668, + "grad_norm": 0.61094841565388, + "learning_rate": 6.908592436634834e-07, + "loss": 0.2732, + "step": 32653 + }, + { + "epoch": 1.5296763011195953, + "grad_norm": 0.6229379775799255, + "learning_rate": 6.907283598218003e-07, + "loss": 0.2689, + "step": 32654 + }, + { + "epoch": 1.5297231461095235, + "grad_norm": 0.6226030068370995, + "learning_rate": 6.905974863920198e-07, + "loss": 0.2762, + "step": 32655 + }, + { + "epoch": 1.5297699910994518, + "grad_norm": 0.6172495565523136, + "learning_rate": 6.904666233748969e-07, + "loss": 0.2835, + "step": 32656 + }, + { + "epoch": 1.5298168360893802, + "grad_norm": 0.6182998561587745, + "learning_rate": 6.90335770771183e-07, + "loss": 0.2737, + "step": 32657 + }, + { + "epoch": 1.5298636810793087, + "grad_norm": 0.5998858578589741, + "learning_rate": 6.902049285816318e-07, + "loss": 0.2514, + "step": 32658 + }, + { + "epoch": 1.529910526069237, + "grad_norm": 0.6401587237916094, + "learning_rate": 6.900740968069966e-07, + "loss": 0.2894, + "step": 32659 + }, + { + "epoch": 1.5299573710591652, + "grad_norm": 0.5831451320926355, + "learning_rate": 6.899432754480298e-07, + "loss": 0.2594, + "step": 32660 + }, + { + "epoch": 1.5300042160490936, + "grad_norm": 0.6007270934404234, + "learning_rate": 6.898124645054855e-07, + "loss": 0.2582, + "step": 32661 + }, + { + "epoch": 1.5300510610390219, + "grad_norm": 0.592494996740161, + "learning_rate": 6.896816639801152e-07, + "loss": 0.2777, + "step": 32662 + }, + { + "epoch": 1.5300979060289501, + "grad_norm": 0.5852403492493506, + "learning_rate": 6.895508738726714e-07, + "loss": 0.2583, + "step": 32663 + }, + { + "epoch": 1.5301447510188786, + "grad_norm": 0.5873675864538688, + "learning_rate": 6.894200941839071e-07, + "loss": 0.2848, + "step": 32664 + }, + { + "epoch": 1.5301915960088068, + "grad_norm": 0.5553566649227336, + "learning_rate": 6.892893249145752e-07, + "loss": 0.2712, + "step": 32665 + }, + { + "epoch": 1.530238440998735, + "grad_norm": 0.6331789900529922, + "learning_rate": 6.891585660654282e-07, + "loss": 0.2785, + "step": 32666 + }, + { + "epoch": 1.5302852859886635, + "grad_norm": 0.6302359764838588, + "learning_rate": 6.890278176372183e-07, + "loss": 0.283, + "step": 32667 + }, + { + "epoch": 1.530332130978592, + "grad_norm": 0.6200317229830218, + "learning_rate": 6.88897079630699e-07, + "loss": 0.2755, + "step": 32668 + }, + { + "epoch": 1.53037897596852, + "grad_norm": 0.5737548978500162, + "learning_rate": 6.88766352046622e-07, + "loss": 0.2505, + "step": 32669 + }, + { + "epoch": 1.5304258209584485, + "grad_norm": 0.617205069726271, + "learning_rate": 6.886356348857384e-07, + "loss": 0.2883, + "step": 32670 + }, + { + "epoch": 1.530472665948377, + "grad_norm": 0.6270630108313339, + "learning_rate": 6.885049281488015e-07, + "loss": 0.28, + "step": 32671 + }, + { + "epoch": 1.5305195109383052, + "grad_norm": 0.6307581015608538, + "learning_rate": 6.883742318365636e-07, + "loss": 0.2707, + "step": 32672 + }, + { + "epoch": 1.5305663559282334, + "grad_norm": 0.5627698511847453, + "learning_rate": 6.882435459497764e-07, + "loss": 0.27, + "step": 32673 + }, + { + "epoch": 1.5306132009181619, + "grad_norm": 0.6216020794894144, + "learning_rate": 6.881128704891924e-07, + "loss": 0.2679, + "step": 32674 + }, + { + "epoch": 1.5306600459080901, + "grad_norm": 0.5916507507883475, + "learning_rate": 6.87982205455564e-07, + "loss": 0.2746, + "step": 32675 + }, + { + "epoch": 1.5307068908980184, + "grad_norm": 0.5950888335028081, + "learning_rate": 6.878515508496428e-07, + "loss": 0.241, + "step": 32676 + }, + { + "epoch": 1.5307537358879468, + "grad_norm": 0.6166940153556744, + "learning_rate": 6.877209066721796e-07, + "loss": 0.287, + "step": 32677 + }, + { + "epoch": 1.530800580877875, + "grad_norm": 0.6343903998796155, + "learning_rate": 6.875902729239273e-07, + "loss": 0.2594, + "step": 32678 + }, + { + "epoch": 1.5308474258678033, + "grad_norm": 0.6109170957196488, + "learning_rate": 6.874596496056371e-07, + "loss": 0.2808, + "step": 32679 + }, + { + "epoch": 1.5308942708577318, + "grad_norm": 0.5774198767430591, + "learning_rate": 6.873290367180613e-07, + "loss": 0.2639, + "step": 32680 + }, + { + "epoch": 1.5309411158476602, + "grad_norm": 0.5960771290985972, + "learning_rate": 6.87198434261952e-07, + "loss": 0.2657, + "step": 32681 + }, + { + "epoch": 1.5309879608375883, + "grad_norm": 0.5791125063108304, + "learning_rate": 6.870678422380591e-07, + "loss": 0.2737, + "step": 32682 + }, + { + "epoch": 1.5310348058275167, + "grad_norm": 0.6402544243394608, + "learning_rate": 6.869372606471361e-07, + "loss": 0.2827, + "step": 32683 + }, + { + "epoch": 1.5310816508174452, + "grad_norm": 0.6178577615662024, + "learning_rate": 6.868066894899328e-07, + "loss": 0.2793, + "step": 32684 + }, + { + "epoch": 1.5311284958073734, + "grad_norm": 0.5494972328108237, + "learning_rate": 6.866761287672011e-07, + "loss": 0.2504, + "step": 32685 + }, + { + "epoch": 1.5311753407973017, + "grad_norm": 0.5554664240107635, + "learning_rate": 6.865455784796923e-07, + "loss": 0.2772, + "step": 32686 + }, + { + "epoch": 1.5312221857872301, + "grad_norm": 0.603214426678631, + "learning_rate": 6.86415038628159e-07, + "loss": 0.2623, + "step": 32687 + }, + { + "epoch": 1.5312690307771584, + "grad_norm": 0.5889220453827322, + "learning_rate": 6.862845092133505e-07, + "loss": 0.2675, + "step": 32688 + }, + { + "epoch": 1.5313158757670866, + "grad_norm": 0.5872648552939335, + "learning_rate": 6.861539902360187e-07, + "loss": 0.2503, + "step": 32689 + }, + { + "epoch": 1.531362720757015, + "grad_norm": 0.578000634940039, + "learning_rate": 6.860234816969155e-07, + "loss": 0.2505, + "step": 32690 + }, + { + "epoch": 1.5314095657469433, + "grad_norm": 0.5634870993287382, + "learning_rate": 6.858929835967906e-07, + "loss": 0.26, + "step": 32691 + }, + { + "epoch": 1.5314564107368716, + "grad_norm": 0.5936108753019805, + "learning_rate": 6.857624959363956e-07, + "loss": 0.2684, + "step": 32692 + }, + { + "epoch": 1.5315032557268, + "grad_norm": 0.6236267789909593, + "learning_rate": 6.856320187164822e-07, + "loss": 0.2634, + "step": 32693 + }, + { + "epoch": 1.5315501007167285, + "grad_norm": 0.6244926496204447, + "learning_rate": 6.855015519378e-07, + "loss": 0.2717, + "step": 32694 + }, + { + "epoch": 1.5315969457066567, + "grad_norm": 0.5513707491207345, + "learning_rate": 6.853710956011e-07, + "loss": 0.255, + "step": 32695 + }, + { + "epoch": 1.531643790696585, + "grad_norm": 0.592574692464672, + "learning_rate": 6.85240649707134e-07, + "loss": 0.2872, + "step": 32696 + }, + { + "epoch": 1.5316906356865134, + "grad_norm": 0.5591605835092979, + "learning_rate": 6.851102142566512e-07, + "loss": 0.2552, + "step": 32697 + }, + { + "epoch": 1.5317374806764417, + "grad_norm": 0.5934333605551797, + "learning_rate": 6.849797892504031e-07, + "loss": 0.2802, + "step": 32698 + }, + { + "epoch": 1.53178432566637, + "grad_norm": 0.5791166326776133, + "learning_rate": 6.848493746891408e-07, + "loss": 0.2657, + "step": 32699 + }, + { + "epoch": 1.5318311706562984, + "grad_norm": 0.5925018797401861, + "learning_rate": 6.847189705736134e-07, + "loss": 0.2647, + "step": 32700 + }, + { + "epoch": 1.5318780156462266, + "grad_norm": 0.5700279482882509, + "learning_rate": 6.84588576904572e-07, + "loss": 0.2628, + "step": 32701 + }, + { + "epoch": 1.5319248606361549, + "grad_norm": 0.6579755802829971, + "learning_rate": 6.844581936827671e-07, + "loss": 0.2868, + "step": 32702 + }, + { + "epoch": 1.5319717056260833, + "grad_norm": 0.6139668334938293, + "learning_rate": 6.843278209089499e-07, + "loss": 0.2735, + "step": 32703 + }, + { + "epoch": 1.5320185506160118, + "grad_norm": 0.6308858280529297, + "learning_rate": 6.841974585838687e-07, + "loss": 0.2746, + "step": 32704 + }, + { + "epoch": 1.5320653956059398, + "grad_norm": 0.5907142845025806, + "learning_rate": 6.840671067082758e-07, + "loss": 0.2829, + "step": 32705 + }, + { + "epoch": 1.5321122405958683, + "grad_norm": 0.5868155682126078, + "learning_rate": 6.83936765282919e-07, + "loss": 0.2626, + "step": 32706 + }, + { + "epoch": 1.5321590855857967, + "grad_norm": 0.6304833925242576, + "learning_rate": 6.838064343085501e-07, + "loss": 0.2778, + "step": 32707 + }, + { + "epoch": 1.532205930575725, + "grad_norm": 0.5877799659786542, + "learning_rate": 6.836761137859185e-07, + "loss": 0.2729, + "step": 32708 + }, + { + "epoch": 1.5322527755656532, + "grad_norm": 0.644073457497551, + "learning_rate": 6.835458037157746e-07, + "loss": 0.302, + "step": 32709 + }, + { + "epoch": 1.5322996205555817, + "grad_norm": 0.6224348927380833, + "learning_rate": 6.834155040988686e-07, + "loss": 0.2888, + "step": 32710 + }, + { + "epoch": 1.53234646554551, + "grad_norm": 0.6163490477069101, + "learning_rate": 6.832852149359498e-07, + "loss": 0.2772, + "step": 32711 + }, + { + "epoch": 1.5323933105354381, + "grad_norm": 0.5923200619476637, + "learning_rate": 6.831549362277673e-07, + "loss": 0.2819, + "step": 32712 + }, + { + "epoch": 1.5324401555253666, + "grad_norm": 0.5943660521480547, + "learning_rate": 6.830246679750716e-07, + "loss": 0.2732, + "step": 32713 + }, + { + "epoch": 1.5324870005152949, + "grad_norm": 0.5593946050438676, + "learning_rate": 6.828944101786119e-07, + "loss": 0.2662, + "step": 32714 + }, + { + "epoch": 1.532533845505223, + "grad_norm": 0.5900248771598774, + "learning_rate": 6.827641628391385e-07, + "loss": 0.2692, + "step": 32715 + }, + { + "epoch": 1.5325806904951516, + "grad_norm": 0.6211600468978652, + "learning_rate": 6.826339259574006e-07, + "loss": 0.2908, + "step": 32716 + }, + { + "epoch": 1.53262753548508, + "grad_norm": 0.5608175732457978, + "learning_rate": 6.82503699534148e-07, + "loss": 0.2606, + "step": 32717 + }, + { + "epoch": 1.532674380475008, + "grad_norm": 0.5890908991466225, + "learning_rate": 6.823734835701301e-07, + "loss": 0.2604, + "step": 32718 + }, + { + "epoch": 1.5327212254649365, + "grad_norm": 0.5223096968071016, + "learning_rate": 6.822432780660953e-07, + "loss": 0.2529, + "step": 32719 + }, + { + "epoch": 1.532768070454865, + "grad_norm": 0.5749976842896882, + "learning_rate": 6.821130830227935e-07, + "loss": 0.2702, + "step": 32720 + }, + { + "epoch": 1.5328149154447932, + "grad_norm": 0.579717628843916, + "learning_rate": 6.81982898440974e-07, + "loss": 0.2498, + "step": 32721 + }, + { + "epoch": 1.5328617604347214, + "grad_norm": 0.5654409427868521, + "learning_rate": 6.81852724321386e-07, + "loss": 0.2557, + "step": 32722 + }, + { + "epoch": 1.53290860542465, + "grad_norm": 0.5502629531679396, + "learning_rate": 6.817225606647793e-07, + "loss": 0.2456, + "step": 32723 + }, + { + "epoch": 1.5329554504145781, + "grad_norm": 0.5749912173947891, + "learning_rate": 6.815924074719013e-07, + "loss": 0.2776, + "step": 32724 + }, + { + "epoch": 1.5330022954045064, + "grad_norm": 0.6003984995647454, + "learning_rate": 6.814622647435029e-07, + "loss": 0.2748, + "step": 32725 + }, + { + "epoch": 1.5330491403944349, + "grad_norm": 0.6120539009447042, + "learning_rate": 6.813321324803316e-07, + "loss": 0.2597, + "step": 32726 + }, + { + "epoch": 1.533095985384363, + "grad_norm": 0.6178434107457708, + "learning_rate": 6.812020106831363e-07, + "loss": 0.2775, + "step": 32727 + }, + { + "epoch": 1.5331428303742913, + "grad_norm": 0.5895477844429023, + "learning_rate": 6.810718993526666e-07, + "loss": 0.2784, + "step": 32728 + }, + { + "epoch": 1.5331896753642198, + "grad_norm": 0.6263900077917877, + "learning_rate": 6.809417984896716e-07, + "loss": 0.2799, + "step": 32729 + }, + { + "epoch": 1.5332365203541483, + "grad_norm": 0.6021904616414292, + "learning_rate": 6.808117080948986e-07, + "loss": 0.2738, + "step": 32730 + }, + { + "epoch": 1.5332833653440765, + "grad_norm": 0.5729006861719117, + "learning_rate": 6.80681628169097e-07, + "loss": 0.2688, + "step": 32731 + }, + { + "epoch": 1.5333302103340047, + "grad_norm": 0.624989113375905, + "learning_rate": 6.80551558713016e-07, + "loss": 0.2754, + "step": 32732 + }, + { + "epoch": 1.5333770553239332, + "grad_norm": 0.6956041339080756, + "learning_rate": 6.804214997274028e-07, + "loss": 0.2982, + "step": 32733 + }, + { + "epoch": 1.5334239003138614, + "grad_norm": 0.5356137104743064, + "learning_rate": 6.802914512130065e-07, + "loss": 0.2365, + "step": 32734 + }, + { + "epoch": 1.5334707453037897, + "grad_norm": 0.5874287299133448, + "learning_rate": 6.801614131705758e-07, + "loss": 0.2764, + "step": 32735 + }, + { + "epoch": 1.5335175902937181, + "grad_norm": 0.6797353208891066, + "learning_rate": 6.800313856008592e-07, + "loss": 0.2902, + "step": 32736 + }, + { + "epoch": 1.5335644352836464, + "grad_norm": 0.5894595019137017, + "learning_rate": 6.799013685046038e-07, + "loss": 0.2688, + "step": 32737 + }, + { + "epoch": 1.5336112802735746, + "grad_norm": 0.602334632364298, + "learning_rate": 6.797713618825596e-07, + "loss": 0.2804, + "step": 32738 + }, + { + "epoch": 1.533658125263503, + "grad_norm": 0.593005419662457, + "learning_rate": 6.796413657354728e-07, + "loss": 0.2615, + "step": 32739 + }, + { + "epoch": 1.5337049702534316, + "grad_norm": 0.5938240868814935, + "learning_rate": 6.795113800640923e-07, + "loss": 0.2753, + "step": 32740 + }, + { + "epoch": 1.5337518152433596, + "grad_norm": 0.5722390945144483, + "learning_rate": 6.793814048691663e-07, + "loss": 0.2596, + "step": 32741 + }, + { + "epoch": 1.533798660233288, + "grad_norm": 0.5696515846338825, + "learning_rate": 6.792514401514436e-07, + "loss": 0.2531, + "step": 32742 + }, + { + "epoch": 1.5338455052232165, + "grad_norm": 0.5720069450808206, + "learning_rate": 6.791214859116705e-07, + "loss": 0.2524, + "step": 32743 + }, + { + "epoch": 1.5338923502131447, + "grad_norm": 0.5786696801696463, + "learning_rate": 6.789915421505952e-07, + "loss": 0.2616, + "step": 32744 + }, + { + "epoch": 1.533939195203073, + "grad_norm": 0.6202672810160473, + "learning_rate": 6.78861608868967e-07, + "loss": 0.2641, + "step": 32745 + }, + { + "epoch": 1.5339860401930014, + "grad_norm": 0.6041414467982045, + "learning_rate": 6.787316860675316e-07, + "loss": 0.2755, + "step": 32746 + }, + { + "epoch": 1.5340328851829297, + "grad_norm": 0.5939113320781899, + "learning_rate": 6.786017737470377e-07, + "loss": 0.2709, + "step": 32747 + }, + { + "epoch": 1.534079730172858, + "grad_norm": 0.5847543346654532, + "learning_rate": 6.784718719082337e-07, + "loss": 0.2736, + "step": 32748 + }, + { + "epoch": 1.5341265751627864, + "grad_norm": 0.5789846585785015, + "learning_rate": 6.783419805518651e-07, + "loss": 0.2587, + "step": 32749 + }, + { + "epoch": 1.5341734201527146, + "grad_norm": 0.5838821046217526, + "learning_rate": 6.782120996786809e-07, + "loss": 0.2652, + "step": 32750 + }, + { + "epoch": 1.5342202651426429, + "grad_norm": 0.5516800284512638, + "learning_rate": 6.780822292894279e-07, + "loss": 0.2567, + "step": 32751 + }, + { + "epoch": 1.5342671101325713, + "grad_norm": 0.5696900903282668, + "learning_rate": 6.779523693848547e-07, + "loss": 0.263, + "step": 32752 + }, + { + "epoch": 1.5343139551224998, + "grad_norm": 0.60378457990448, + "learning_rate": 6.778225199657068e-07, + "loss": 0.2678, + "step": 32753 + }, + { + "epoch": 1.5343608001124278, + "grad_norm": 0.6640594418301755, + "learning_rate": 6.776926810327331e-07, + "loss": 0.2929, + "step": 32754 + }, + { + "epoch": 1.5344076451023563, + "grad_norm": 0.6211458428445582, + "learning_rate": 6.775628525866793e-07, + "loss": 0.2911, + "step": 32755 + }, + { + "epoch": 1.5344544900922847, + "grad_norm": 0.6284975238224422, + "learning_rate": 6.774330346282931e-07, + "loss": 0.2748, + "step": 32756 + }, + { + "epoch": 1.534501335082213, + "grad_norm": 0.5926099299964457, + "learning_rate": 6.773032271583219e-07, + "loss": 0.2672, + "step": 32757 + }, + { + "epoch": 1.5345481800721412, + "grad_norm": 0.5533269896549334, + "learning_rate": 6.771734301775124e-07, + "loss": 0.2562, + "step": 32758 + }, + { + "epoch": 1.5345950250620697, + "grad_norm": 0.5672786276780747, + "learning_rate": 6.770436436866124e-07, + "loss": 0.2629, + "step": 32759 + }, + { + "epoch": 1.534641870051998, + "grad_norm": 0.5917804935089668, + "learning_rate": 6.769138676863682e-07, + "loss": 0.2511, + "step": 32760 + }, + { + "epoch": 1.5346887150419262, + "grad_norm": 0.5604784900284246, + "learning_rate": 6.767841021775254e-07, + "loss": 0.2588, + "step": 32761 + }, + { + "epoch": 1.5347355600318546, + "grad_norm": 0.6093153460185421, + "learning_rate": 6.76654347160832e-07, + "loss": 0.2771, + "step": 32762 + }, + { + "epoch": 1.5347824050217829, + "grad_norm": 0.6308094943169704, + "learning_rate": 6.765246026370345e-07, + "loss": 0.2939, + "step": 32763 + }, + { + "epoch": 1.534829250011711, + "grad_norm": 0.592054909373063, + "learning_rate": 6.763948686068797e-07, + "loss": 0.2617, + "step": 32764 + }, + { + "epoch": 1.5348760950016396, + "grad_norm": 0.5888260812416102, + "learning_rate": 6.762651450711141e-07, + "loss": 0.2605, + "step": 32765 + }, + { + "epoch": 1.534922939991568, + "grad_norm": 0.5886344166320657, + "learning_rate": 6.76135432030485e-07, + "loss": 0.2766, + "step": 32766 + }, + { + "epoch": 1.5349697849814963, + "grad_norm": 0.5626680727072333, + "learning_rate": 6.76005729485738e-07, + "loss": 0.2632, + "step": 32767 + }, + { + "epoch": 1.5350166299714245, + "grad_norm": 0.6341381661907913, + "learning_rate": 6.758760374376189e-07, + "loss": 0.2975, + "step": 32768 + }, + { + "epoch": 1.535063474961353, + "grad_norm": 0.60351811201678, + "learning_rate": 6.757463558868746e-07, + "loss": 0.2718, + "step": 32769 + }, + { + "epoch": 1.5351103199512812, + "grad_norm": 0.6088839481441737, + "learning_rate": 6.756166848342515e-07, + "loss": 0.2598, + "step": 32770 + }, + { + "epoch": 1.5351571649412095, + "grad_norm": 0.5981578849525012, + "learning_rate": 6.754870242804959e-07, + "loss": 0.2716, + "step": 32771 + }, + { + "epoch": 1.535204009931138, + "grad_norm": 0.5788243343376746, + "learning_rate": 6.753573742263547e-07, + "loss": 0.2605, + "step": 32772 + }, + { + "epoch": 1.5352508549210662, + "grad_norm": 0.6061129168459656, + "learning_rate": 6.752277346725725e-07, + "loss": 0.267, + "step": 32773 + }, + { + "epoch": 1.5352976999109944, + "grad_norm": 0.6498422053246261, + "learning_rate": 6.750981056198966e-07, + "loss": 0.2842, + "step": 32774 + }, + { + "epoch": 1.5353445449009229, + "grad_norm": 0.5759360884595951, + "learning_rate": 6.749684870690718e-07, + "loss": 0.2682, + "step": 32775 + }, + { + "epoch": 1.5353913898908513, + "grad_norm": 0.5877643711150641, + "learning_rate": 6.748388790208446e-07, + "loss": 0.2634, + "step": 32776 + }, + { + "epoch": 1.5354382348807794, + "grad_norm": 0.5776360455073769, + "learning_rate": 6.747092814759609e-07, + "loss": 0.2636, + "step": 32777 + }, + { + "epoch": 1.5354850798707078, + "grad_norm": 0.5725554661386528, + "learning_rate": 6.745796944351676e-07, + "loss": 0.2537, + "step": 32778 + }, + { + "epoch": 1.5355319248606363, + "grad_norm": 0.6229055546664493, + "learning_rate": 6.744501178992082e-07, + "loss": 0.2714, + "step": 32779 + }, + { + "epoch": 1.5355787698505645, + "grad_norm": 0.5626356103003719, + "learning_rate": 6.743205518688298e-07, + "loss": 0.2609, + "step": 32780 + }, + { + "epoch": 1.5356256148404928, + "grad_norm": 0.6522347179831957, + "learning_rate": 6.741909963447782e-07, + "loss": 0.3042, + "step": 32781 + }, + { + "epoch": 1.5356724598304212, + "grad_norm": 0.6071200194258187, + "learning_rate": 6.740614513277979e-07, + "loss": 0.2698, + "step": 32782 + }, + { + "epoch": 1.5357193048203495, + "grad_norm": 0.5953260935007036, + "learning_rate": 6.739319168186351e-07, + "loss": 0.2681, + "step": 32783 + }, + { + "epoch": 1.5357661498102777, + "grad_norm": 0.5537305296034585, + "learning_rate": 6.738023928180359e-07, + "loss": 0.2579, + "step": 32784 + }, + { + "epoch": 1.5358129948002062, + "grad_norm": 0.5447631379834474, + "learning_rate": 6.736728793267441e-07, + "loss": 0.2515, + "step": 32785 + }, + { + "epoch": 1.5358598397901344, + "grad_norm": 0.6356795703223935, + "learning_rate": 6.73543376345506e-07, + "loss": 0.2791, + "step": 32786 + }, + { + "epoch": 1.5359066847800626, + "grad_norm": 0.5591012014202336, + "learning_rate": 6.734138838750672e-07, + "loss": 0.2595, + "step": 32787 + }, + { + "epoch": 1.535953529769991, + "grad_norm": 0.5691625612253971, + "learning_rate": 6.732844019161719e-07, + "loss": 0.2606, + "step": 32788 + }, + { + "epoch": 1.5360003747599196, + "grad_norm": 0.6123562499638445, + "learning_rate": 6.731549304695656e-07, + "loss": 0.2645, + "step": 32789 + }, + { + "epoch": 1.5360472197498476, + "grad_norm": 0.5765100470606078, + "learning_rate": 6.730254695359944e-07, + "loss": 0.2717, + "step": 32790 + }, + { + "epoch": 1.536094064739776, + "grad_norm": 0.5859753734347743, + "learning_rate": 6.728960191162015e-07, + "loss": 0.2595, + "step": 32791 + }, + { + "epoch": 1.5361409097297045, + "grad_norm": 0.5676921914915636, + "learning_rate": 6.727665792109331e-07, + "loss": 0.2568, + "step": 32792 + }, + { + "epoch": 1.5361877547196328, + "grad_norm": 0.5652571586951164, + "learning_rate": 6.726371498209333e-07, + "loss": 0.2577, + "step": 32793 + }, + { + "epoch": 1.536234599709561, + "grad_norm": 0.6316194517146225, + "learning_rate": 6.725077309469485e-07, + "loss": 0.2767, + "step": 32794 + }, + { + "epoch": 1.5362814446994895, + "grad_norm": 0.6332126382827613, + "learning_rate": 6.723783225897215e-07, + "loss": 0.2859, + "step": 32795 + }, + { + "epoch": 1.5363282896894177, + "grad_norm": 0.6169526795280235, + "learning_rate": 6.722489247499989e-07, + "loss": 0.2882, + "step": 32796 + }, + { + "epoch": 1.536375134679346, + "grad_norm": 0.6088385832917058, + "learning_rate": 6.721195374285233e-07, + "loss": 0.2706, + "step": 32797 + }, + { + "epoch": 1.5364219796692744, + "grad_norm": 0.5827948264555528, + "learning_rate": 6.719901606260404e-07, + "loss": 0.2659, + "step": 32798 + }, + { + "epoch": 1.5364688246592026, + "grad_norm": 0.630144392592937, + "learning_rate": 6.718607943432948e-07, + "loss": 0.2709, + "step": 32799 + }, + { + "epoch": 1.5365156696491309, + "grad_norm": 0.6279149911487268, + "learning_rate": 6.717314385810306e-07, + "loss": 0.2896, + "step": 32800 + }, + { + "epoch": 1.5365625146390594, + "grad_norm": 0.5728268195386734, + "learning_rate": 6.716020933399933e-07, + "loss": 0.266, + "step": 32801 + }, + { + "epoch": 1.5366093596289878, + "grad_norm": 0.6054602399576668, + "learning_rate": 6.714727586209258e-07, + "loss": 0.2823, + "step": 32802 + }, + { + "epoch": 1.536656204618916, + "grad_norm": 0.5956432568463736, + "learning_rate": 6.713434344245737e-07, + "loss": 0.2788, + "step": 32803 + }, + { + "epoch": 1.5367030496088443, + "grad_norm": 0.5825580381576749, + "learning_rate": 6.712141207516798e-07, + "loss": 0.272, + "step": 32804 + }, + { + "epoch": 1.5367498945987728, + "grad_norm": 0.6077437860406795, + "learning_rate": 6.710848176029888e-07, + "loss": 0.2786, + "step": 32805 + }, + { + "epoch": 1.536796739588701, + "grad_norm": 0.6111437190789247, + "learning_rate": 6.709555249792452e-07, + "loss": 0.2815, + "step": 32806 + }, + { + "epoch": 1.5368435845786292, + "grad_norm": 0.6071209019935251, + "learning_rate": 6.708262428811926e-07, + "loss": 0.2803, + "step": 32807 + }, + { + "epoch": 1.5368904295685577, + "grad_norm": 0.5985676837330638, + "learning_rate": 6.706969713095763e-07, + "loss": 0.2567, + "step": 32808 + }, + { + "epoch": 1.536937274558486, + "grad_norm": 0.5828736798772212, + "learning_rate": 6.70567710265139e-07, + "loss": 0.2576, + "step": 32809 + }, + { + "epoch": 1.5369841195484142, + "grad_norm": 0.5622494793358569, + "learning_rate": 6.704384597486238e-07, + "loss": 0.2554, + "step": 32810 + }, + { + "epoch": 1.5370309645383426, + "grad_norm": 0.6347018506252936, + "learning_rate": 6.703092197607755e-07, + "loss": 0.2943, + "step": 32811 + }, + { + "epoch": 1.537077809528271, + "grad_norm": 0.6280673687629755, + "learning_rate": 6.701799903023379e-07, + "loss": 0.2958, + "step": 32812 + }, + { + "epoch": 1.5371246545181991, + "grad_norm": 0.6134160116938272, + "learning_rate": 6.700507713740545e-07, + "loss": 0.2665, + "step": 32813 + }, + { + "epoch": 1.5371714995081276, + "grad_norm": 0.5870856107964725, + "learning_rate": 6.699215629766689e-07, + "loss": 0.2717, + "step": 32814 + }, + { + "epoch": 1.537218344498056, + "grad_norm": 0.6477617235750207, + "learning_rate": 6.697923651109256e-07, + "loss": 0.2967, + "step": 32815 + }, + { + "epoch": 1.5372651894879843, + "grad_norm": 0.6243982874211496, + "learning_rate": 6.69663177777567e-07, + "loss": 0.2849, + "step": 32816 + }, + { + "epoch": 1.5373120344779125, + "grad_norm": 0.5920586249141014, + "learning_rate": 6.695340009773363e-07, + "loss": 0.2698, + "step": 32817 + }, + { + "epoch": 1.537358879467841, + "grad_norm": 0.5857940331521339, + "learning_rate": 6.694048347109772e-07, + "loss": 0.2698, + "step": 32818 + }, + { + "epoch": 1.5374057244577692, + "grad_norm": 0.6728261720936888, + "learning_rate": 6.692756789792332e-07, + "loss": 0.2846, + "step": 32819 + }, + { + "epoch": 1.5374525694476975, + "grad_norm": 0.5947647704312968, + "learning_rate": 6.691465337828477e-07, + "loss": 0.2637, + "step": 32820 + }, + { + "epoch": 1.537499414437626, + "grad_norm": 0.6108213490705952, + "learning_rate": 6.690173991225641e-07, + "loss": 0.2753, + "step": 32821 + }, + { + "epoch": 1.5375462594275542, + "grad_norm": 0.5696909969141443, + "learning_rate": 6.688882749991246e-07, + "loss": 0.2589, + "step": 32822 + }, + { + "epoch": 1.5375931044174824, + "grad_norm": 0.6072510933190259, + "learning_rate": 6.687591614132738e-07, + "loss": 0.2859, + "step": 32823 + }, + { + "epoch": 1.5376399494074109, + "grad_norm": 0.6335081376763556, + "learning_rate": 6.686300583657527e-07, + "loss": 0.2724, + "step": 32824 + }, + { + "epoch": 1.5376867943973394, + "grad_norm": 0.556552852666859, + "learning_rate": 6.685009658573055e-07, + "loss": 0.2506, + "step": 32825 + }, + { + "epoch": 1.5377336393872674, + "grad_norm": 0.6162897639947972, + "learning_rate": 6.683718838886749e-07, + "loss": 0.2799, + "step": 32826 + }, + { + "epoch": 1.5377804843771958, + "grad_norm": 0.5656808622325099, + "learning_rate": 6.682428124606044e-07, + "loss": 0.2639, + "step": 32827 + }, + { + "epoch": 1.5378273293671243, + "grad_norm": 0.6470025787980691, + "learning_rate": 6.681137515738356e-07, + "loss": 0.296, + "step": 32828 + }, + { + "epoch": 1.5378741743570525, + "grad_norm": 0.5997394467566304, + "learning_rate": 6.679847012291116e-07, + "loss": 0.2713, + "step": 32829 + }, + { + "epoch": 1.5379210193469808, + "grad_norm": 0.6399250662120191, + "learning_rate": 6.678556614271759e-07, + "loss": 0.2905, + "step": 32830 + }, + { + "epoch": 1.5379678643369092, + "grad_norm": 0.617667980968896, + "learning_rate": 6.677266321687694e-07, + "loss": 0.2731, + "step": 32831 + }, + { + "epoch": 1.5380147093268375, + "grad_norm": 0.6393889143760482, + "learning_rate": 6.675976134546358e-07, + "loss": 0.2869, + "step": 32832 + }, + { + "epoch": 1.5380615543167657, + "grad_norm": 0.5858136568179225, + "learning_rate": 6.674686052855184e-07, + "loss": 0.2884, + "step": 32833 + }, + { + "epoch": 1.5381083993066942, + "grad_norm": 0.566681869441911, + "learning_rate": 6.673396076621575e-07, + "loss": 0.2668, + "step": 32834 + }, + { + "epoch": 1.5381552442966224, + "grad_norm": 0.5881545346413599, + "learning_rate": 6.672106205852965e-07, + "loss": 0.2752, + "step": 32835 + }, + { + "epoch": 1.5382020892865507, + "grad_norm": 0.5668427732823627, + "learning_rate": 6.670816440556788e-07, + "loss": 0.2515, + "step": 32836 + }, + { + "epoch": 1.5382489342764791, + "grad_norm": 0.5673265156470502, + "learning_rate": 6.669526780740445e-07, + "loss": 0.2632, + "step": 32837 + }, + { + "epoch": 1.5382957792664076, + "grad_norm": 0.6138177810022828, + "learning_rate": 6.66823722641137e-07, + "loss": 0.2717, + "step": 32838 + }, + { + "epoch": 1.5383426242563358, + "grad_norm": 0.6082939229114794, + "learning_rate": 6.66694777757699e-07, + "loss": 0.2744, + "step": 32839 + }, + { + "epoch": 1.538389469246264, + "grad_norm": 0.5953189773435018, + "learning_rate": 6.66565843424471e-07, + "loss": 0.2704, + "step": 32840 + }, + { + "epoch": 1.5384363142361925, + "grad_norm": 0.6093800476794722, + "learning_rate": 6.664369196421955e-07, + "loss": 0.2771, + "step": 32841 + }, + { + "epoch": 1.5384831592261208, + "grad_norm": 0.5944441076591782, + "learning_rate": 6.663080064116148e-07, + "loss": 0.2929, + "step": 32842 + }, + { + "epoch": 1.538530004216049, + "grad_norm": 0.5482030180430896, + "learning_rate": 6.661791037334716e-07, + "loss": 0.2582, + "step": 32843 + }, + { + "epoch": 1.5385768492059775, + "grad_norm": 0.6043024845002422, + "learning_rate": 6.660502116085057e-07, + "loss": 0.2802, + "step": 32844 + }, + { + "epoch": 1.5386236941959057, + "grad_norm": 0.5770609546972227, + "learning_rate": 6.659213300374609e-07, + "loss": 0.2715, + "step": 32845 + }, + { + "epoch": 1.538670539185834, + "grad_norm": 0.6159627507186012, + "learning_rate": 6.65792459021077e-07, + "loss": 0.2813, + "step": 32846 + }, + { + "epoch": 1.5387173841757624, + "grad_norm": 0.6345306213551977, + "learning_rate": 6.656635985600965e-07, + "loss": 0.2821, + "step": 32847 + }, + { + "epoch": 1.5387642291656909, + "grad_norm": 0.6228677367351566, + "learning_rate": 6.655347486552611e-07, + "loss": 0.2901, + "step": 32848 + }, + { + "epoch": 1.538811074155619, + "grad_norm": 0.5854388307935644, + "learning_rate": 6.654059093073118e-07, + "loss": 0.2704, + "step": 32849 + }, + { + "epoch": 1.5388579191455474, + "grad_norm": 0.5877849844303609, + "learning_rate": 6.652770805169914e-07, + "loss": 0.2665, + "step": 32850 + }, + { + "epoch": 1.5389047641354758, + "grad_norm": 0.6505735149145732, + "learning_rate": 6.6514826228504e-07, + "loss": 0.2973, + "step": 32851 + }, + { + "epoch": 1.538951609125404, + "grad_norm": 0.5804899696758602, + "learning_rate": 6.650194546121988e-07, + "loss": 0.2757, + "step": 32852 + }, + { + "epoch": 1.5389984541153323, + "grad_norm": 0.5560809222229014, + "learning_rate": 6.648906574992092e-07, + "loss": 0.2531, + "step": 32853 + }, + { + "epoch": 1.5390452991052608, + "grad_norm": 0.5949054494120221, + "learning_rate": 6.647618709468126e-07, + "loss": 0.2742, + "step": 32854 + }, + { + "epoch": 1.539092144095189, + "grad_norm": 0.61105377350265, + "learning_rate": 6.646330949557503e-07, + "loss": 0.2694, + "step": 32855 + }, + { + "epoch": 1.5391389890851173, + "grad_norm": 0.58403257258153, + "learning_rate": 6.645043295267631e-07, + "loss": 0.2704, + "step": 32856 + }, + { + "epoch": 1.5391858340750457, + "grad_norm": 0.5822415862799387, + "learning_rate": 6.643755746605929e-07, + "loss": 0.2572, + "step": 32857 + }, + { + "epoch": 1.539232679064974, + "grad_norm": 0.5943355657249083, + "learning_rate": 6.642468303579799e-07, + "loss": 0.2613, + "step": 32858 + }, + { + "epoch": 1.5392795240549022, + "grad_norm": 0.5847028732679583, + "learning_rate": 6.641180966196644e-07, + "loss": 0.2562, + "step": 32859 + }, + { + "epoch": 1.5393263690448307, + "grad_norm": 0.5985762036272165, + "learning_rate": 6.639893734463879e-07, + "loss": 0.2672, + "step": 32860 + }, + { + "epoch": 1.5393732140347591, + "grad_norm": 0.5667946275696731, + "learning_rate": 6.638606608388909e-07, + "loss": 0.2645, + "step": 32861 + }, + { + "epoch": 1.5394200590246871, + "grad_norm": 0.6326322829917425, + "learning_rate": 6.637319587979141e-07, + "loss": 0.3015, + "step": 32862 + }, + { + "epoch": 1.5394669040146156, + "grad_norm": 0.6099057222103913, + "learning_rate": 6.636032673241993e-07, + "loss": 0.2711, + "step": 32863 + }, + { + "epoch": 1.539513749004544, + "grad_norm": 0.5522806016896941, + "learning_rate": 6.634745864184855e-07, + "loss": 0.2627, + "step": 32864 + }, + { + "epoch": 1.5395605939944723, + "grad_norm": 0.6335750720023782, + "learning_rate": 6.633459160815145e-07, + "loss": 0.2926, + "step": 32865 + }, + { + "epoch": 1.5396074389844006, + "grad_norm": 0.6298042797471529, + "learning_rate": 6.632172563140255e-07, + "loss": 0.2841, + "step": 32866 + }, + { + "epoch": 1.539654283974329, + "grad_norm": 0.5774862460031619, + "learning_rate": 6.630886071167594e-07, + "loss": 0.2591, + "step": 32867 + }, + { + "epoch": 1.5397011289642573, + "grad_norm": 0.5886076470588824, + "learning_rate": 6.629599684904566e-07, + "loss": 0.2739, + "step": 32868 + }, + { + "epoch": 1.5397479739541855, + "grad_norm": 0.6316243204879518, + "learning_rate": 6.628313404358583e-07, + "loss": 0.2864, + "step": 32869 + }, + { + "epoch": 1.539794818944114, + "grad_norm": 0.6119337484454997, + "learning_rate": 6.627027229537034e-07, + "loss": 0.2648, + "step": 32870 + }, + { + "epoch": 1.5398416639340422, + "grad_norm": 0.5305370706652114, + "learning_rate": 6.625741160447322e-07, + "loss": 0.2639, + "step": 32871 + }, + { + "epoch": 1.5398885089239704, + "grad_norm": 0.6000423300811482, + "learning_rate": 6.62445519709686e-07, + "loss": 0.2667, + "step": 32872 + }, + { + "epoch": 1.539935353913899, + "grad_norm": 0.6072417644208162, + "learning_rate": 6.623169339493033e-07, + "loss": 0.2701, + "step": 32873 + }, + { + "epoch": 1.5399821989038274, + "grad_norm": 0.5989043339452207, + "learning_rate": 6.621883587643246e-07, + "loss": 0.2757, + "step": 32874 + }, + { + "epoch": 1.5400290438937556, + "grad_norm": 0.5441697451515334, + "learning_rate": 6.620597941554899e-07, + "loss": 0.2754, + "step": 32875 + }, + { + "epoch": 1.5400758888836839, + "grad_norm": 0.5810448115267397, + "learning_rate": 6.619312401235401e-07, + "loss": 0.2711, + "step": 32876 + }, + { + "epoch": 1.5401227338736123, + "grad_norm": 0.6162210178507951, + "learning_rate": 6.618026966692132e-07, + "loss": 0.2914, + "step": 32877 + }, + { + "epoch": 1.5401695788635406, + "grad_norm": 0.5591510976540766, + "learning_rate": 6.616741637932505e-07, + "loss": 0.2582, + "step": 32878 + }, + { + "epoch": 1.5402164238534688, + "grad_norm": 0.5977927805734444, + "learning_rate": 6.615456414963903e-07, + "loss": 0.2841, + "step": 32879 + }, + { + "epoch": 1.5402632688433973, + "grad_norm": 0.6332899563097978, + "learning_rate": 6.614171297793728e-07, + "loss": 0.2672, + "step": 32880 + }, + { + "epoch": 1.5403101138333255, + "grad_norm": 0.6378141216790478, + "learning_rate": 6.612886286429377e-07, + "loss": 0.276, + "step": 32881 + }, + { + "epoch": 1.5403569588232537, + "grad_norm": 0.6274676706241934, + "learning_rate": 6.611601380878249e-07, + "loss": 0.2782, + "step": 32882 + }, + { + "epoch": 1.5404038038131822, + "grad_norm": 0.5859855750234936, + "learning_rate": 6.610316581147727e-07, + "loss": 0.2659, + "step": 32883 + }, + { + "epoch": 1.5404506488031107, + "grad_norm": 0.6120948739975872, + "learning_rate": 6.609031887245213e-07, + "loss": 0.2765, + "step": 32884 + }, + { + "epoch": 1.5404974937930387, + "grad_norm": 0.5938781993186025, + "learning_rate": 6.607747299178102e-07, + "loss": 0.273, + "step": 32885 + }, + { + "epoch": 1.5405443387829671, + "grad_norm": 0.5821793633284437, + "learning_rate": 6.606462816953777e-07, + "loss": 0.2603, + "step": 32886 + }, + { + "epoch": 1.5405911837728956, + "grad_norm": 0.5772471516807296, + "learning_rate": 6.605178440579635e-07, + "loss": 0.2585, + "step": 32887 + }, + { + "epoch": 1.5406380287628239, + "grad_norm": 0.5790903941826532, + "learning_rate": 6.603894170063078e-07, + "loss": 0.2741, + "step": 32888 + }, + { + "epoch": 1.540684873752752, + "grad_norm": 0.583107348229206, + "learning_rate": 6.602610005411475e-07, + "loss": 0.2621, + "step": 32889 + }, + { + "epoch": 1.5407317187426806, + "grad_norm": 0.6397844500676638, + "learning_rate": 6.601325946632228e-07, + "loss": 0.2831, + "step": 32890 + }, + { + "epoch": 1.5407785637326088, + "grad_norm": 0.607629361893147, + "learning_rate": 6.600041993732725e-07, + "loss": 0.268, + "step": 32891 + }, + { + "epoch": 1.540825408722537, + "grad_norm": 0.5737605287816163, + "learning_rate": 6.598758146720366e-07, + "loss": 0.2576, + "step": 32892 + }, + { + "epoch": 1.5408722537124655, + "grad_norm": 0.5689838865086255, + "learning_rate": 6.597474405602522e-07, + "loss": 0.27, + "step": 32893 + }, + { + "epoch": 1.5409190987023937, + "grad_norm": 0.5520302879016833, + "learning_rate": 6.596190770386593e-07, + "loss": 0.2697, + "step": 32894 + }, + { + "epoch": 1.540965943692322, + "grad_norm": 0.6287884078774685, + "learning_rate": 6.594907241079951e-07, + "loss": 0.2858, + "step": 32895 + }, + { + "epoch": 1.5410127886822504, + "grad_norm": 0.5582035777190322, + "learning_rate": 6.593623817689995e-07, + "loss": 0.2662, + "step": 32896 + }, + { + "epoch": 1.541059633672179, + "grad_norm": 0.584515134012431, + "learning_rate": 6.592340500224106e-07, + "loss": 0.2692, + "step": 32897 + }, + { + "epoch": 1.541106478662107, + "grad_norm": 0.5874216917230697, + "learning_rate": 6.591057288689673e-07, + "loss": 0.2677, + "step": 32898 + }, + { + "epoch": 1.5411533236520354, + "grad_norm": 0.6221009158885863, + "learning_rate": 6.589774183094083e-07, + "loss": 0.2823, + "step": 32899 + }, + { + "epoch": 1.5412001686419639, + "grad_norm": 0.5716072733364181, + "learning_rate": 6.588491183444717e-07, + "loss": 0.2536, + "step": 32900 + }, + { + "epoch": 1.541247013631892, + "grad_norm": 0.5941972693516534, + "learning_rate": 6.58720828974895e-07, + "loss": 0.2913, + "step": 32901 + }, + { + "epoch": 1.5412938586218203, + "grad_norm": 0.5664799578244899, + "learning_rate": 6.58592550201417e-07, + "loss": 0.2666, + "step": 32902 + }, + { + "epoch": 1.5413407036117488, + "grad_norm": 0.5752163084966532, + "learning_rate": 6.584642820247761e-07, + "loss": 0.2625, + "step": 32903 + }, + { + "epoch": 1.541387548601677, + "grad_norm": 0.606884354462278, + "learning_rate": 6.583360244457105e-07, + "loss": 0.2675, + "step": 32904 + }, + { + "epoch": 1.5414343935916053, + "grad_norm": 0.5981261094338371, + "learning_rate": 6.582077774649578e-07, + "loss": 0.2871, + "step": 32905 + }, + { + "epoch": 1.5414812385815337, + "grad_norm": 0.5980686552883631, + "learning_rate": 6.580795410832577e-07, + "loss": 0.2699, + "step": 32906 + }, + { + "epoch": 1.541528083571462, + "grad_norm": 0.6376070944984532, + "learning_rate": 6.579513153013467e-07, + "loss": 0.266, + "step": 32907 + }, + { + "epoch": 1.5415749285613902, + "grad_norm": 0.5762121822459247, + "learning_rate": 6.57823100119962e-07, + "loss": 0.2679, + "step": 32908 + }, + { + "epoch": 1.5416217735513187, + "grad_norm": 0.5853814163317608, + "learning_rate": 6.576948955398423e-07, + "loss": 0.2632, + "step": 32909 + }, + { + "epoch": 1.5416686185412471, + "grad_norm": 0.5537378357945402, + "learning_rate": 6.575667015617257e-07, + "loss": 0.2582, + "step": 32910 + }, + { + "epoch": 1.5417154635311754, + "grad_norm": 0.5881052509491496, + "learning_rate": 6.574385181863496e-07, + "loss": 0.2705, + "step": 32911 + }, + { + "epoch": 1.5417623085211036, + "grad_norm": 0.5836268157909119, + "learning_rate": 6.573103454144525e-07, + "loss": 0.2621, + "step": 32912 + }, + { + "epoch": 1.541809153511032, + "grad_norm": 0.6203570053849811, + "learning_rate": 6.571821832467703e-07, + "loss": 0.2669, + "step": 32913 + }, + { + "epoch": 1.5418559985009603, + "grad_norm": 0.651841506684642, + "learning_rate": 6.57054031684042e-07, + "loss": 0.2902, + "step": 32914 + }, + { + "epoch": 1.5419028434908886, + "grad_norm": 0.6042899635346621, + "learning_rate": 6.569258907270043e-07, + "loss": 0.2593, + "step": 32915 + }, + { + "epoch": 1.541949688480817, + "grad_norm": 0.5633422863498322, + "learning_rate": 6.567977603763948e-07, + "loss": 0.2617, + "step": 32916 + }, + { + "epoch": 1.5419965334707453, + "grad_norm": 0.5756201645679431, + "learning_rate": 6.566696406329506e-07, + "loss": 0.263, + "step": 32917 + }, + { + "epoch": 1.5420433784606735, + "grad_norm": 0.6123210682426261, + "learning_rate": 6.565415314974102e-07, + "loss": 0.2904, + "step": 32918 + }, + { + "epoch": 1.542090223450602, + "grad_norm": 0.6024634021783505, + "learning_rate": 6.564134329705091e-07, + "loss": 0.2836, + "step": 32919 + }, + { + "epoch": 1.5421370684405304, + "grad_norm": 0.6399308590691659, + "learning_rate": 6.562853450529855e-07, + "loss": 0.2948, + "step": 32920 + }, + { + "epoch": 1.5421839134304585, + "grad_norm": 0.6185027921528871, + "learning_rate": 6.561572677455771e-07, + "loss": 0.2832, + "step": 32921 + }, + { + "epoch": 1.542230758420387, + "grad_norm": 0.6135714483458496, + "learning_rate": 6.560292010490194e-07, + "loss": 0.2644, + "step": 32922 + }, + { + "epoch": 1.5422776034103154, + "grad_norm": 0.5849725248787956, + "learning_rate": 6.559011449640501e-07, + "loss": 0.2676, + "step": 32923 + }, + { + "epoch": 1.5423244484002436, + "grad_norm": 0.6273185360957055, + "learning_rate": 6.557730994914069e-07, + "loss": 0.2717, + "step": 32924 + }, + { + "epoch": 1.5423712933901719, + "grad_norm": 0.664721872251476, + "learning_rate": 6.556450646318255e-07, + "loss": 0.2867, + "step": 32925 + }, + { + "epoch": 1.5424181383801003, + "grad_norm": 0.6292849585176117, + "learning_rate": 6.555170403860431e-07, + "loss": 0.278, + "step": 32926 + }, + { + "epoch": 1.5424649833700286, + "grad_norm": 0.6552702216997393, + "learning_rate": 6.553890267547971e-07, + "loss": 0.2721, + "step": 32927 + }, + { + "epoch": 1.5425118283599568, + "grad_norm": 0.6387292020413735, + "learning_rate": 6.55261023738823e-07, + "loss": 0.2603, + "step": 32928 + }, + { + "epoch": 1.5425586733498853, + "grad_norm": 0.5979988604316545, + "learning_rate": 6.551330313388582e-07, + "loss": 0.2784, + "step": 32929 + }, + { + "epoch": 1.5426055183398135, + "grad_norm": 0.6504068228930856, + "learning_rate": 6.550050495556395e-07, + "loss": 0.2918, + "step": 32930 + }, + { + "epoch": 1.5426523633297418, + "grad_norm": 0.5894739537933728, + "learning_rate": 6.548770783899025e-07, + "loss": 0.2762, + "step": 32931 + }, + { + "epoch": 1.5426992083196702, + "grad_norm": 0.6134084048847951, + "learning_rate": 6.547491178423843e-07, + "loss": 0.2766, + "step": 32932 + }, + { + "epoch": 1.5427460533095987, + "grad_norm": 0.5734075193798451, + "learning_rate": 6.546211679138209e-07, + "loss": 0.2627, + "step": 32933 + }, + { + "epoch": 1.5427928982995267, + "grad_norm": 0.5745012937558253, + "learning_rate": 6.544932286049496e-07, + "loss": 0.2685, + "step": 32934 + }, + { + "epoch": 1.5428397432894552, + "grad_norm": 0.5676061980978565, + "learning_rate": 6.543652999165054e-07, + "loss": 0.2637, + "step": 32935 + }, + { + "epoch": 1.5428865882793836, + "grad_norm": 0.6056523490622686, + "learning_rate": 6.542373818492256e-07, + "loss": 0.2776, + "step": 32936 + }, + { + "epoch": 1.5429334332693119, + "grad_norm": 0.6039732537479773, + "learning_rate": 6.541094744038451e-07, + "loss": 0.2636, + "step": 32937 + }, + { + "epoch": 1.5429802782592401, + "grad_norm": 0.6257240997914529, + "learning_rate": 6.539815775811007e-07, + "loss": 0.2898, + "step": 32938 + }, + { + "epoch": 1.5430271232491686, + "grad_norm": 0.6243805956622822, + "learning_rate": 6.538536913817281e-07, + "loss": 0.2793, + "step": 32939 + }, + { + "epoch": 1.5430739682390968, + "grad_norm": 0.5449203378804838, + "learning_rate": 6.537258158064636e-07, + "loss": 0.2453, + "step": 32940 + }, + { + "epoch": 1.543120813229025, + "grad_norm": 0.6186720082492756, + "learning_rate": 6.535979508560436e-07, + "loss": 0.2845, + "step": 32941 + }, + { + "epoch": 1.5431676582189535, + "grad_norm": 0.6220056028457934, + "learning_rate": 6.534700965312026e-07, + "loss": 0.2826, + "step": 32942 + }, + { + "epoch": 1.5432145032088818, + "grad_norm": 0.6262640338169093, + "learning_rate": 6.533422528326783e-07, + "loss": 0.2743, + "step": 32943 + }, + { + "epoch": 1.54326134819881, + "grad_norm": 0.5522941754593482, + "learning_rate": 6.532144197612039e-07, + "loss": 0.2583, + "step": 32944 + }, + { + "epoch": 1.5433081931887385, + "grad_norm": 0.627883211592979, + "learning_rate": 6.530865973175166e-07, + "loss": 0.2686, + "step": 32945 + }, + { + "epoch": 1.543355038178667, + "grad_norm": 0.5621330785549021, + "learning_rate": 6.529587855023517e-07, + "loss": 0.2688, + "step": 32946 + }, + { + "epoch": 1.5434018831685952, + "grad_norm": 0.6192734832976377, + "learning_rate": 6.528309843164446e-07, + "loss": 0.2719, + "step": 32947 + }, + { + "epoch": 1.5434487281585234, + "grad_norm": 0.5841038474494797, + "learning_rate": 6.527031937605319e-07, + "loss": 0.2609, + "step": 32948 + }, + { + "epoch": 1.5434955731484519, + "grad_norm": 0.5834551717463152, + "learning_rate": 6.525754138353477e-07, + "loss": 0.2719, + "step": 32949 + }, + { + "epoch": 1.5435424181383801, + "grad_norm": 0.6125384052157476, + "learning_rate": 6.524476445416272e-07, + "loss": 0.2746, + "step": 32950 + }, + { + "epoch": 1.5435892631283084, + "grad_norm": 0.568460050776728, + "learning_rate": 6.523198858801061e-07, + "loss": 0.2549, + "step": 32951 + }, + { + "epoch": 1.5436361081182368, + "grad_norm": 0.5874447756779683, + "learning_rate": 6.521921378515195e-07, + "loss": 0.265, + "step": 32952 + }, + { + "epoch": 1.543682953108165, + "grad_norm": 0.5622133809514658, + "learning_rate": 6.52064400456603e-07, + "loss": 0.2696, + "step": 32953 + }, + { + "epoch": 1.5437297980980933, + "grad_norm": 0.5622824729158353, + "learning_rate": 6.51936673696091e-07, + "loss": 0.265, + "step": 32954 + }, + { + "epoch": 1.5437766430880218, + "grad_norm": 0.5614989030559784, + "learning_rate": 6.518089575707201e-07, + "loss": 0.2548, + "step": 32955 + }, + { + "epoch": 1.5438234880779502, + "grad_norm": 0.5610228850641642, + "learning_rate": 6.516812520812241e-07, + "loss": 0.2702, + "step": 32956 + }, + { + "epoch": 1.5438703330678782, + "grad_norm": 0.5786765277366358, + "learning_rate": 6.515535572283368e-07, + "loss": 0.2748, + "step": 32957 + }, + { + "epoch": 1.5439171780578067, + "grad_norm": 0.6153528763312456, + "learning_rate": 6.514258730127945e-07, + "loss": 0.2499, + "step": 32958 + }, + { + "epoch": 1.5439640230477352, + "grad_norm": 0.5997958643291388, + "learning_rate": 6.512981994353315e-07, + "loss": 0.2743, + "step": 32959 + }, + { + "epoch": 1.5440108680376634, + "grad_norm": 0.6064583113940948, + "learning_rate": 6.51170536496683e-07, + "loss": 0.2839, + "step": 32960 + }, + { + "epoch": 1.5440577130275916, + "grad_norm": 0.5463439769359961, + "learning_rate": 6.510428841975838e-07, + "loss": 0.259, + "step": 32961 + }, + { + "epoch": 1.5441045580175201, + "grad_norm": 0.5462698249821482, + "learning_rate": 6.509152425387677e-07, + "loss": 0.2403, + "step": 32962 + }, + { + "epoch": 1.5441514030074484, + "grad_norm": 0.5848120223733329, + "learning_rate": 6.507876115209702e-07, + "loss": 0.2655, + "step": 32963 + }, + { + "epoch": 1.5441982479973766, + "grad_norm": 0.6046349326503083, + "learning_rate": 6.506599911449244e-07, + "loss": 0.2671, + "step": 32964 + }, + { + "epoch": 1.544245092987305, + "grad_norm": 0.5703919261181859, + "learning_rate": 6.505323814113654e-07, + "loss": 0.2718, + "step": 32965 + }, + { + "epoch": 1.5442919379772333, + "grad_norm": 0.6458689320433502, + "learning_rate": 6.50404782321028e-07, + "loss": 0.2724, + "step": 32966 + }, + { + "epoch": 1.5443387829671615, + "grad_norm": 0.6249110613606021, + "learning_rate": 6.502771938746469e-07, + "loss": 0.2699, + "step": 32967 + }, + { + "epoch": 1.54438562795709, + "grad_norm": 0.5997206429250153, + "learning_rate": 6.501496160729548e-07, + "loss": 0.2614, + "step": 32968 + }, + { + "epoch": 1.5444324729470185, + "grad_norm": 0.5865527855482771, + "learning_rate": 6.500220489166866e-07, + "loss": 0.2511, + "step": 32969 + }, + { + "epoch": 1.5444793179369465, + "grad_norm": 0.5846374646545953, + "learning_rate": 6.498944924065772e-07, + "loss": 0.2582, + "step": 32970 + }, + { + "epoch": 1.544526162926875, + "grad_norm": 0.5845717936662137, + "learning_rate": 6.497669465433595e-07, + "loss": 0.2754, + "step": 32971 + }, + { + "epoch": 1.5445730079168034, + "grad_norm": 0.609451590316304, + "learning_rate": 6.496394113277679e-07, + "loss": 0.2733, + "step": 32972 + }, + { + "epoch": 1.5446198529067316, + "grad_norm": 0.5923639390321273, + "learning_rate": 6.495118867605373e-07, + "loss": 0.2763, + "step": 32973 + }, + { + "epoch": 1.54466669789666, + "grad_norm": 0.6080029477470876, + "learning_rate": 6.493843728423998e-07, + "loss": 0.2838, + "step": 32974 + }, + { + "epoch": 1.5447135428865884, + "grad_norm": 0.5864401390781562, + "learning_rate": 6.492568695740902e-07, + "loss": 0.2688, + "step": 32975 + }, + { + "epoch": 1.5447603878765166, + "grad_norm": 0.6215517080784334, + "learning_rate": 6.491293769563428e-07, + "loss": 0.2745, + "step": 32976 + }, + { + "epoch": 1.5448072328664448, + "grad_norm": 0.594682009482527, + "learning_rate": 6.490018949898899e-07, + "loss": 0.2762, + "step": 32977 + }, + { + "epoch": 1.5448540778563733, + "grad_norm": 0.6431908703029534, + "learning_rate": 6.48874423675466e-07, + "loss": 0.2761, + "step": 32978 + }, + { + "epoch": 1.5449009228463015, + "grad_norm": 0.5776544322778939, + "learning_rate": 6.487469630138052e-07, + "loss": 0.2684, + "step": 32979 + }, + { + "epoch": 1.5449477678362298, + "grad_norm": 0.6216536892318397, + "learning_rate": 6.486195130056394e-07, + "loss": 0.2656, + "step": 32980 + }, + { + "epoch": 1.5449946128261582, + "grad_norm": 0.5738436791519911, + "learning_rate": 6.484920736517031e-07, + "loss": 0.276, + "step": 32981 + }, + { + "epoch": 1.5450414578160867, + "grad_norm": 0.5832810464278829, + "learning_rate": 6.483646449527298e-07, + "loss": 0.2639, + "step": 32982 + }, + { + "epoch": 1.545088302806015, + "grad_norm": 0.5921437573426731, + "learning_rate": 6.482372269094531e-07, + "loss": 0.2724, + "step": 32983 + }, + { + "epoch": 1.5451351477959432, + "grad_norm": 0.5753906693348967, + "learning_rate": 6.481098195226049e-07, + "loss": 0.2804, + "step": 32984 + }, + { + "epoch": 1.5451819927858716, + "grad_norm": 0.6059929352626834, + "learning_rate": 6.479824227929199e-07, + "loss": 0.2615, + "step": 32985 + }, + { + "epoch": 1.5452288377758, + "grad_norm": 0.606362851543813, + "learning_rate": 6.4785503672113e-07, + "loss": 0.2716, + "step": 32986 + }, + { + "epoch": 1.5452756827657281, + "grad_norm": 0.5937030939763074, + "learning_rate": 6.477276613079689e-07, + "loss": 0.2675, + "step": 32987 + }, + { + "epoch": 1.5453225277556566, + "grad_norm": 0.5973128936640003, + "learning_rate": 6.476002965541695e-07, + "loss": 0.2662, + "step": 32988 + }, + { + "epoch": 1.5453693727455848, + "grad_norm": 0.5718723709793163, + "learning_rate": 6.474729424604647e-07, + "loss": 0.266, + "step": 32989 + }, + { + "epoch": 1.545416217735513, + "grad_norm": 0.5608404238087121, + "learning_rate": 6.473455990275885e-07, + "loss": 0.2678, + "step": 32990 + }, + { + "epoch": 1.5454630627254415, + "grad_norm": 0.6537308931697212, + "learning_rate": 6.472182662562726e-07, + "loss": 0.2752, + "step": 32991 + }, + { + "epoch": 1.54550990771537, + "grad_norm": 0.6063366834902404, + "learning_rate": 6.470909441472493e-07, + "loss": 0.275, + "step": 32992 + }, + { + "epoch": 1.545556752705298, + "grad_norm": 0.5634283972354205, + "learning_rate": 6.469636327012516e-07, + "loss": 0.252, + "step": 32993 + }, + { + "epoch": 1.5456035976952265, + "grad_norm": 0.5912621576271166, + "learning_rate": 6.468363319190127e-07, + "loss": 0.2593, + "step": 32994 + }, + { + "epoch": 1.545650442685155, + "grad_norm": 0.6049860626523753, + "learning_rate": 6.467090418012648e-07, + "loss": 0.2629, + "step": 32995 + }, + { + "epoch": 1.5456972876750832, + "grad_norm": 0.5660436543878148, + "learning_rate": 6.465817623487405e-07, + "loss": 0.2638, + "step": 32996 + }, + { + "epoch": 1.5457441326650114, + "grad_norm": 0.6818971058190154, + "learning_rate": 6.464544935621731e-07, + "loss": 0.28, + "step": 32997 + }, + { + "epoch": 1.54579097765494, + "grad_norm": 0.5817343224386681, + "learning_rate": 6.463272354422942e-07, + "loss": 0.275, + "step": 32998 + }, + { + "epoch": 1.5458378226448681, + "grad_norm": 0.6047208887444615, + "learning_rate": 6.461999879898356e-07, + "loss": 0.2789, + "step": 32999 + }, + { + "epoch": 1.5458846676347964, + "grad_norm": 0.5805005063422352, + "learning_rate": 6.460727512055301e-07, + "loss": 0.2606, + "step": 33000 + }, + { + "epoch": 1.5459315126247248, + "grad_norm": 0.6014704690170608, + "learning_rate": 6.459455250901098e-07, + "loss": 0.2781, + "step": 33001 + }, + { + "epoch": 1.545978357614653, + "grad_norm": 0.6241948669072914, + "learning_rate": 6.458183096443071e-07, + "loss": 0.2628, + "step": 33002 + }, + { + "epoch": 1.5460252026045813, + "grad_norm": 0.5539083281819791, + "learning_rate": 6.456911048688547e-07, + "loss": 0.2705, + "step": 33003 + }, + { + "epoch": 1.5460720475945098, + "grad_norm": 0.6072638532232616, + "learning_rate": 6.455639107644832e-07, + "loss": 0.2845, + "step": 33004 + }, + { + "epoch": 1.5461188925844382, + "grad_norm": 0.5891289264080033, + "learning_rate": 6.454367273319259e-07, + "loss": 0.2618, + "step": 33005 + }, + { + "epoch": 1.5461657375743663, + "grad_norm": 0.6484637807671795, + "learning_rate": 6.453095545719135e-07, + "loss": 0.2773, + "step": 33006 + }, + { + "epoch": 1.5462125825642947, + "grad_norm": 0.6183315321084634, + "learning_rate": 6.451823924851785e-07, + "loss": 0.2713, + "step": 33007 + }, + { + "epoch": 1.5462594275542232, + "grad_norm": 0.6034923476726773, + "learning_rate": 6.450552410724526e-07, + "loss": 0.2665, + "step": 33008 + }, + { + "epoch": 1.5463062725441514, + "grad_norm": 0.5658802290063629, + "learning_rate": 6.449281003344676e-07, + "loss": 0.2556, + "step": 33009 + }, + { + "epoch": 1.5463531175340797, + "grad_norm": 0.6011008744630795, + "learning_rate": 6.448009702719557e-07, + "loss": 0.2638, + "step": 33010 + }, + { + "epoch": 1.5463999625240081, + "grad_norm": 0.5482767605916973, + "learning_rate": 6.446738508856473e-07, + "loss": 0.2546, + "step": 33011 + }, + { + "epoch": 1.5464468075139364, + "grad_norm": 0.5363811971424722, + "learning_rate": 6.445467421762755e-07, + "loss": 0.253, + "step": 33012 + }, + { + "epoch": 1.5464936525038646, + "grad_norm": 0.6336115930504036, + "learning_rate": 6.444196441445699e-07, + "loss": 0.2852, + "step": 33013 + }, + { + "epoch": 1.546540497493793, + "grad_norm": 0.6041990923910026, + "learning_rate": 6.442925567912628e-07, + "loss": 0.2784, + "step": 33014 + }, + { + "epoch": 1.5465873424837213, + "grad_norm": 0.5890909593941775, + "learning_rate": 6.441654801170857e-07, + "loss": 0.268, + "step": 33015 + }, + { + "epoch": 1.5466341874736496, + "grad_norm": 0.5841895197959426, + "learning_rate": 6.440384141227707e-07, + "loss": 0.2701, + "step": 33016 + }, + { + "epoch": 1.546681032463578, + "grad_norm": 0.5775415060190535, + "learning_rate": 6.439113588090473e-07, + "loss": 0.2806, + "step": 33017 + }, + { + "epoch": 1.5467278774535065, + "grad_norm": 0.6032409209971262, + "learning_rate": 6.437843141766481e-07, + "loss": 0.2755, + "step": 33018 + }, + { + "epoch": 1.5467747224434347, + "grad_norm": 0.606915406248371, + "learning_rate": 6.436572802263031e-07, + "loss": 0.2627, + "step": 33019 + }, + { + "epoch": 1.546821567433363, + "grad_norm": 0.6046954026783276, + "learning_rate": 6.435302569587437e-07, + "loss": 0.2774, + "step": 33020 + }, + { + "epoch": 1.5468684124232914, + "grad_norm": 0.6464704331834099, + "learning_rate": 6.434032443747013e-07, + "loss": 0.3, + "step": 33021 + }, + { + "epoch": 1.5469152574132197, + "grad_norm": 0.5509920962990322, + "learning_rate": 6.432762424749073e-07, + "loss": 0.2595, + "step": 33022 + }, + { + "epoch": 1.546962102403148, + "grad_norm": 0.5807444103919792, + "learning_rate": 6.43149251260091e-07, + "loss": 0.2656, + "step": 33023 + }, + { + "epoch": 1.5470089473930764, + "grad_norm": 0.6187207534362082, + "learning_rate": 6.430222707309841e-07, + "loss": 0.2744, + "step": 33024 + }, + { + "epoch": 1.5470557923830046, + "grad_norm": 0.5980377803201727, + "learning_rate": 6.428953008883179e-07, + "loss": 0.2801, + "step": 33025 + }, + { + "epoch": 1.5471026373729329, + "grad_norm": 0.5981538393482063, + "learning_rate": 6.427683417328217e-07, + "loss": 0.2633, + "step": 33026 + }, + { + "epoch": 1.5471494823628613, + "grad_norm": 0.5685206179691221, + "learning_rate": 6.426413932652273e-07, + "loss": 0.26, + "step": 33027 + }, + { + "epoch": 1.5471963273527898, + "grad_norm": 0.5865473244961557, + "learning_rate": 6.425144554862653e-07, + "loss": 0.2791, + "step": 33028 + }, + { + "epoch": 1.5472431723427178, + "grad_norm": 0.6068455186076884, + "learning_rate": 6.423875283966652e-07, + "loss": 0.2673, + "step": 33029 + }, + { + "epoch": 1.5472900173326463, + "grad_norm": 0.5799004961702962, + "learning_rate": 6.422606119971578e-07, + "loss": 0.2641, + "step": 33030 + }, + { + "epoch": 1.5473368623225747, + "grad_norm": 0.6013198319303102, + "learning_rate": 6.421337062884736e-07, + "loss": 0.2655, + "step": 33031 + }, + { + "epoch": 1.547383707312503, + "grad_norm": 0.5786161525610178, + "learning_rate": 6.420068112713437e-07, + "loss": 0.2684, + "step": 33032 + }, + { + "epoch": 1.5474305523024312, + "grad_norm": 0.6063215229449926, + "learning_rate": 6.41879926946497e-07, + "loss": 0.2796, + "step": 33033 + }, + { + "epoch": 1.5474773972923597, + "grad_norm": 0.5934479564317129, + "learning_rate": 6.417530533146651e-07, + "loss": 0.2809, + "step": 33034 + }, + { + "epoch": 1.547524242282288, + "grad_norm": 0.5946534188383769, + "learning_rate": 6.416261903765764e-07, + "loss": 0.2663, + "step": 33035 + }, + { + "epoch": 1.5475710872722162, + "grad_norm": 0.6009292796556196, + "learning_rate": 6.414993381329621e-07, + "loss": 0.271, + "step": 33036 + }, + { + "epoch": 1.5476179322621446, + "grad_norm": 0.5471762370163504, + "learning_rate": 6.413724965845516e-07, + "loss": 0.2453, + "step": 33037 + }, + { + "epoch": 1.5476647772520729, + "grad_norm": 0.5984122100906046, + "learning_rate": 6.412456657320756e-07, + "loss": 0.2723, + "step": 33038 + }, + { + "epoch": 1.547711622242001, + "grad_norm": 0.6260077274309914, + "learning_rate": 6.411188455762643e-07, + "loss": 0.2677, + "step": 33039 + }, + { + "epoch": 1.5477584672319296, + "grad_norm": 0.5775376248535098, + "learning_rate": 6.409920361178467e-07, + "loss": 0.2503, + "step": 33040 + }, + { + "epoch": 1.547805312221858, + "grad_norm": 0.56715939935576, + "learning_rate": 6.408652373575522e-07, + "loss": 0.2608, + "step": 33041 + }, + { + "epoch": 1.547852157211786, + "grad_norm": 0.5943555531256344, + "learning_rate": 6.407384492961107e-07, + "loss": 0.2611, + "step": 33042 + }, + { + "epoch": 1.5478990022017145, + "grad_norm": 0.6346189432640842, + "learning_rate": 6.406116719342523e-07, + "loss": 0.2835, + "step": 33043 + }, + { + "epoch": 1.547945847191643, + "grad_norm": 0.6029634327619332, + "learning_rate": 6.404849052727063e-07, + "loss": 0.2839, + "step": 33044 + }, + { + "epoch": 1.5479926921815712, + "grad_norm": 0.6081553876476494, + "learning_rate": 6.403581493122024e-07, + "loss": 0.2642, + "step": 33045 + }, + { + "epoch": 1.5480395371714994, + "grad_norm": 0.5767742986692355, + "learning_rate": 6.402314040534705e-07, + "loss": 0.2583, + "step": 33046 + }, + { + "epoch": 1.548086382161428, + "grad_norm": 0.5584534576736515, + "learning_rate": 6.401046694972396e-07, + "loss": 0.2545, + "step": 33047 + }, + { + "epoch": 1.5481332271513562, + "grad_norm": 0.5865620716391486, + "learning_rate": 6.399779456442379e-07, + "loss": 0.2658, + "step": 33048 + }, + { + "epoch": 1.5481800721412844, + "grad_norm": 0.5549515390470613, + "learning_rate": 6.398512324951958e-07, + "loss": 0.2721, + "step": 33049 + }, + { + "epoch": 1.5482269171312129, + "grad_norm": 0.5833444889767647, + "learning_rate": 6.397245300508422e-07, + "loss": 0.2558, + "step": 33050 + }, + { + "epoch": 1.548273762121141, + "grad_norm": 0.6263622674541616, + "learning_rate": 6.395978383119067e-07, + "loss": 0.2671, + "step": 33051 + }, + { + "epoch": 1.5483206071110693, + "grad_norm": 0.5671918993383941, + "learning_rate": 6.394711572791185e-07, + "loss": 0.2741, + "step": 33052 + }, + { + "epoch": 1.5483674521009978, + "grad_norm": 0.5915453551422353, + "learning_rate": 6.393444869532056e-07, + "loss": 0.2757, + "step": 33053 + }, + { + "epoch": 1.5484142970909263, + "grad_norm": 0.610689208357877, + "learning_rate": 6.392178273348981e-07, + "loss": 0.2944, + "step": 33054 + }, + { + "epoch": 1.5484611420808545, + "grad_norm": 0.5952155852768327, + "learning_rate": 6.390911784249234e-07, + "loss": 0.2681, + "step": 33055 + }, + { + "epoch": 1.5485079870707827, + "grad_norm": 0.6116201736733701, + "learning_rate": 6.389645402240116e-07, + "loss": 0.2624, + "step": 33056 + }, + { + "epoch": 1.5485548320607112, + "grad_norm": 0.5978293158851707, + "learning_rate": 6.388379127328911e-07, + "loss": 0.2718, + "step": 33057 + }, + { + "epoch": 1.5486016770506394, + "grad_norm": 0.6107693356214026, + "learning_rate": 6.387112959522912e-07, + "loss": 0.2659, + "step": 33058 + }, + { + "epoch": 1.5486485220405677, + "grad_norm": 0.6113498500972452, + "learning_rate": 6.385846898829393e-07, + "loss": 0.291, + "step": 33059 + }, + { + "epoch": 1.5486953670304962, + "grad_norm": 0.6352792495673838, + "learning_rate": 6.384580945255647e-07, + "loss": 0.2852, + "step": 33060 + }, + { + "epoch": 1.5487422120204244, + "grad_norm": 0.5823118827282299, + "learning_rate": 6.383315098808965e-07, + "loss": 0.2765, + "step": 33061 + }, + { + "epoch": 1.5487890570103526, + "grad_norm": 0.5556558901760613, + "learning_rate": 6.38204935949662e-07, + "loss": 0.2636, + "step": 33062 + }, + { + "epoch": 1.548835902000281, + "grad_norm": 0.6055157078936936, + "learning_rate": 6.380783727325898e-07, + "loss": 0.2728, + "step": 33063 + }, + { + "epoch": 1.5488827469902096, + "grad_norm": 0.574836101431231, + "learning_rate": 6.379518202304097e-07, + "loss": 0.262, + "step": 33064 + }, + { + "epoch": 1.5489295919801376, + "grad_norm": 0.6360837075592962, + "learning_rate": 6.37825278443848e-07, + "loss": 0.2894, + "step": 33065 + }, + { + "epoch": 1.548976436970066, + "grad_norm": 0.607900116531243, + "learning_rate": 6.376987473736337e-07, + "loss": 0.2978, + "step": 33066 + }, + { + "epoch": 1.5490232819599945, + "grad_norm": 0.5981670797108092, + "learning_rate": 6.37572227020496e-07, + "loss": 0.2818, + "step": 33067 + }, + { + "epoch": 1.5490701269499227, + "grad_norm": 0.5729437943402993, + "learning_rate": 6.374457173851609e-07, + "loss": 0.259, + "step": 33068 + }, + { + "epoch": 1.549116971939851, + "grad_norm": 0.6251704201538577, + "learning_rate": 6.373192184683579e-07, + "loss": 0.2786, + "step": 33069 + }, + { + "epoch": 1.5491638169297794, + "grad_norm": 0.6009481776125337, + "learning_rate": 6.371927302708153e-07, + "loss": 0.2773, + "step": 33070 + }, + { + "epoch": 1.5492106619197077, + "grad_norm": 0.604457084532972, + "learning_rate": 6.370662527932595e-07, + "loss": 0.2722, + "step": 33071 + }, + { + "epoch": 1.549257506909636, + "grad_norm": 0.583844973987084, + "learning_rate": 6.369397860364193e-07, + "loss": 0.2703, + "step": 33072 + }, + { + "epoch": 1.5493043518995644, + "grad_norm": 0.5856304450415007, + "learning_rate": 6.368133300010223e-07, + "loss": 0.2666, + "step": 33073 + }, + { + "epoch": 1.5493511968894926, + "grad_norm": 0.6077980103944903, + "learning_rate": 6.366868846877968e-07, + "loss": 0.2857, + "step": 33074 + }, + { + "epoch": 1.5493980418794209, + "grad_norm": 0.6162758715947053, + "learning_rate": 6.365604500974695e-07, + "loss": 0.2713, + "step": 33075 + }, + { + "epoch": 1.5494448868693493, + "grad_norm": 0.5948464194293059, + "learning_rate": 6.364340262307687e-07, + "loss": 0.2681, + "step": 33076 + }, + { + "epoch": 1.5494917318592778, + "grad_norm": 0.5654225062148404, + "learning_rate": 6.363076130884219e-07, + "loss": 0.2563, + "step": 33077 + }, + { + "epoch": 1.5495385768492058, + "grad_norm": 0.6836338475264495, + "learning_rate": 6.361812106711559e-07, + "loss": 0.2834, + "step": 33078 + }, + { + "epoch": 1.5495854218391343, + "grad_norm": 0.5895623930782998, + "learning_rate": 6.360548189796986e-07, + "loss": 0.2703, + "step": 33079 + }, + { + "epoch": 1.5496322668290627, + "grad_norm": 0.5772611926803372, + "learning_rate": 6.359284380147773e-07, + "loss": 0.2567, + "step": 33080 + }, + { + "epoch": 1.549679111818991, + "grad_norm": 0.5715229295789507, + "learning_rate": 6.358020677771201e-07, + "loss": 0.2485, + "step": 33081 + }, + { + "epoch": 1.5497259568089192, + "grad_norm": 0.5578268687173359, + "learning_rate": 6.356757082674525e-07, + "loss": 0.2473, + "step": 33082 + }, + { + "epoch": 1.5497728017988477, + "grad_norm": 0.659176207198834, + "learning_rate": 6.355493594865036e-07, + "loss": 0.2874, + "step": 33083 + }, + { + "epoch": 1.549819646788776, + "grad_norm": 0.5798601323554768, + "learning_rate": 6.354230214349988e-07, + "loss": 0.2481, + "step": 33084 + }, + { + "epoch": 1.5498664917787042, + "grad_norm": 0.6441976906678452, + "learning_rate": 6.352966941136656e-07, + "loss": 0.2727, + "step": 33085 + }, + { + "epoch": 1.5499133367686326, + "grad_norm": 0.5970207302748805, + "learning_rate": 6.351703775232315e-07, + "loss": 0.2674, + "step": 33086 + }, + { + "epoch": 1.5499601817585609, + "grad_norm": 0.6560029204065353, + "learning_rate": 6.35044071664423e-07, + "loss": 0.2708, + "step": 33087 + }, + { + "epoch": 1.5500070267484891, + "grad_norm": 0.6092729926258481, + "learning_rate": 6.349177765379679e-07, + "loss": 0.2734, + "step": 33088 + }, + { + "epoch": 1.5500538717384176, + "grad_norm": 0.6025575893618473, + "learning_rate": 6.347914921445924e-07, + "loss": 0.2824, + "step": 33089 + }, + { + "epoch": 1.550100716728346, + "grad_norm": 0.6385971392638292, + "learning_rate": 6.346652184850222e-07, + "loss": 0.2839, + "step": 33090 + }, + { + "epoch": 1.5501475617182743, + "grad_norm": 0.6031469673763195, + "learning_rate": 6.345389555599849e-07, + "loss": 0.264, + "step": 33091 + }, + { + "epoch": 1.5501944067082025, + "grad_norm": 0.5808202709202842, + "learning_rate": 6.344127033702069e-07, + "loss": 0.2616, + "step": 33092 + }, + { + "epoch": 1.550241251698131, + "grad_norm": 0.5763528484883549, + "learning_rate": 6.342864619164149e-07, + "loss": 0.2604, + "step": 33093 + }, + { + "epoch": 1.5502880966880592, + "grad_norm": 0.5785375772576946, + "learning_rate": 6.341602311993356e-07, + "loss": 0.2634, + "step": 33094 + }, + { + "epoch": 1.5503349416779875, + "grad_norm": 0.5763761071282754, + "learning_rate": 6.340340112196958e-07, + "loss": 0.2649, + "step": 33095 + }, + { + "epoch": 1.550381786667916, + "grad_norm": 0.6101850403358607, + "learning_rate": 6.339078019782211e-07, + "loss": 0.2596, + "step": 33096 + }, + { + "epoch": 1.5504286316578442, + "grad_norm": 0.6291372634734698, + "learning_rate": 6.337816034756375e-07, + "loss": 0.2904, + "step": 33097 + }, + { + "epoch": 1.5504754766477724, + "grad_norm": 0.5790146683068711, + "learning_rate": 6.336554157126715e-07, + "loss": 0.2672, + "step": 33098 + }, + { + "epoch": 1.5505223216377009, + "grad_norm": 0.5712578023398313, + "learning_rate": 6.335292386900496e-07, + "loss": 0.2612, + "step": 33099 + }, + { + "epoch": 1.5505691666276293, + "grad_norm": 0.585302219951614, + "learning_rate": 6.33403072408498e-07, + "loss": 0.2566, + "step": 33100 + }, + { + "epoch": 1.5506160116175574, + "grad_norm": 0.6048244487601966, + "learning_rate": 6.332769168687431e-07, + "loss": 0.2705, + "step": 33101 + }, + { + "epoch": 1.5506628566074858, + "grad_norm": 0.5970753490723582, + "learning_rate": 6.331507720715097e-07, + "loss": 0.274, + "step": 33102 + }, + { + "epoch": 1.5507097015974143, + "grad_norm": 0.656496199707951, + "learning_rate": 6.33024638017525e-07, + "loss": 0.2771, + "step": 33103 + }, + { + "epoch": 1.5507565465873425, + "grad_norm": 0.5726343430907563, + "learning_rate": 6.328985147075137e-07, + "loss": 0.2702, + "step": 33104 + }, + { + "epoch": 1.5508033915772708, + "grad_norm": 0.5822341507827223, + "learning_rate": 6.327724021422018e-07, + "loss": 0.2578, + "step": 33105 + }, + { + "epoch": 1.5508502365671992, + "grad_norm": 0.5650866187953059, + "learning_rate": 6.326463003223157e-07, + "loss": 0.2555, + "step": 33106 + }, + { + "epoch": 1.5508970815571275, + "grad_norm": 0.6383298117526652, + "learning_rate": 6.325202092485816e-07, + "loss": 0.2745, + "step": 33107 + }, + { + "epoch": 1.5509439265470557, + "grad_norm": 0.6356413468901586, + "learning_rate": 6.323941289217236e-07, + "loss": 0.2836, + "step": 33108 + }, + { + "epoch": 1.5509907715369842, + "grad_norm": 0.5791496378145924, + "learning_rate": 6.32268059342468e-07, + "loss": 0.259, + "step": 33109 + }, + { + "epoch": 1.5510376165269124, + "grad_norm": 0.5910493807973601, + "learning_rate": 6.321420005115408e-07, + "loss": 0.2734, + "step": 33110 + }, + { + "epoch": 1.5510844615168407, + "grad_norm": 0.5512708228004567, + "learning_rate": 6.320159524296662e-07, + "loss": 0.2688, + "step": 33111 + }, + { + "epoch": 1.5511313065067691, + "grad_norm": 0.6289147344342061, + "learning_rate": 6.318899150975705e-07, + "loss": 0.2819, + "step": 33112 + }, + { + "epoch": 1.5511781514966976, + "grad_norm": 0.5881785176354798, + "learning_rate": 6.317638885159793e-07, + "loss": 0.2509, + "step": 33113 + }, + { + "epoch": 1.5512249964866256, + "grad_norm": 0.6112635918360406, + "learning_rate": 6.316378726856168e-07, + "loss": 0.278, + "step": 33114 + }, + { + "epoch": 1.551271841476554, + "grad_norm": 0.5625969981637726, + "learning_rate": 6.315118676072088e-07, + "loss": 0.2679, + "step": 33115 + }, + { + "epoch": 1.5513186864664825, + "grad_norm": 0.6316525716313167, + "learning_rate": 6.313858732814809e-07, + "loss": 0.2863, + "step": 33116 + }, + { + "epoch": 1.5513655314564108, + "grad_norm": 0.5892395421838613, + "learning_rate": 6.312598897091571e-07, + "loss": 0.2739, + "step": 33117 + }, + { + "epoch": 1.551412376446339, + "grad_norm": 0.6312288019073191, + "learning_rate": 6.311339168909628e-07, + "loss": 0.2892, + "step": 33118 + }, + { + "epoch": 1.5514592214362675, + "grad_norm": 0.5544152832049385, + "learning_rate": 6.310079548276241e-07, + "loss": 0.2504, + "step": 33119 + }, + { + "epoch": 1.5515060664261957, + "grad_norm": 0.5662269462867278, + "learning_rate": 6.308820035198637e-07, + "loss": 0.2741, + "step": 33120 + }, + { + "epoch": 1.551552911416124, + "grad_norm": 0.5853350626306014, + "learning_rate": 6.307560629684078e-07, + "loss": 0.257, + "step": 33121 + }, + { + "epoch": 1.5515997564060524, + "grad_norm": 0.5762237166284601, + "learning_rate": 6.306301331739812e-07, + "loss": 0.2614, + "step": 33122 + }, + { + "epoch": 1.5516466013959807, + "grad_norm": 0.633524504930393, + "learning_rate": 6.305042141373086e-07, + "loss": 0.2958, + "step": 33123 + }, + { + "epoch": 1.551693446385909, + "grad_norm": 0.5810924786526499, + "learning_rate": 6.30378305859114e-07, + "loss": 0.2729, + "step": 33124 + }, + { + "epoch": 1.5517402913758374, + "grad_norm": 0.5511932709544156, + "learning_rate": 6.302524083401229e-07, + "loss": 0.2513, + "step": 33125 + }, + { + "epoch": 1.5517871363657658, + "grad_norm": 0.6238667808523294, + "learning_rate": 6.301265215810584e-07, + "loss": 0.2629, + "step": 33126 + }, + { + "epoch": 1.551833981355694, + "grad_norm": 0.559842972140519, + "learning_rate": 6.30000645582646e-07, + "loss": 0.2604, + "step": 33127 + }, + { + "epoch": 1.5518808263456223, + "grad_norm": 0.580884311472612, + "learning_rate": 6.298747803456099e-07, + "loss": 0.267, + "step": 33128 + }, + { + "epoch": 1.5519276713355508, + "grad_norm": 0.6335787069132043, + "learning_rate": 6.297489258706741e-07, + "loss": 0.2935, + "step": 33129 + }, + { + "epoch": 1.551974516325479, + "grad_norm": 0.6536773489124958, + "learning_rate": 6.296230821585642e-07, + "loss": 0.2829, + "step": 33130 + }, + { + "epoch": 1.5520213613154072, + "grad_norm": 0.5949479697386917, + "learning_rate": 6.294972492100032e-07, + "loss": 0.2647, + "step": 33131 + }, + { + "epoch": 1.5520682063053357, + "grad_norm": 0.5731160542965154, + "learning_rate": 6.293714270257148e-07, + "loss": 0.258, + "step": 33132 + }, + { + "epoch": 1.552115051295264, + "grad_norm": 0.6196499636291457, + "learning_rate": 6.292456156064236e-07, + "loss": 0.2739, + "step": 33133 + }, + { + "epoch": 1.5521618962851922, + "grad_norm": 0.6088241031574139, + "learning_rate": 6.291198149528535e-07, + "loss": 0.2615, + "step": 33134 + }, + { + "epoch": 1.5522087412751207, + "grad_norm": 0.6457339707108825, + "learning_rate": 6.28994025065729e-07, + "loss": 0.2876, + "step": 33135 + }, + { + "epoch": 1.5522555862650491, + "grad_norm": 0.6420559659596131, + "learning_rate": 6.288682459457734e-07, + "loss": 0.2811, + "step": 33136 + }, + { + "epoch": 1.5523024312549771, + "grad_norm": 0.6030214228763014, + "learning_rate": 6.287424775937115e-07, + "loss": 0.2678, + "step": 33137 + }, + { + "epoch": 1.5523492762449056, + "grad_norm": 0.5976029295436267, + "learning_rate": 6.286167200102663e-07, + "loss": 0.2778, + "step": 33138 + }, + { + "epoch": 1.552396121234834, + "grad_norm": 0.5671009762383584, + "learning_rate": 6.284909731961608e-07, + "loss": 0.2583, + "step": 33139 + }, + { + "epoch": 1.5524429662247623, + "grad_norm": 0.626319259611591, + "learning_rate": 6.283652371521196e-07, + "loss": 0.2857, + "step": 33140 + }, + { + "epoch": 1.5524898112146905, + "grad_norm": 0.6418079805186216, + "learning_rate": 6.282395118788659e-07, + "loss": 0.2954, + "step": 33141 + }, + { + "epoch": 1.552536656204619, + "grad_norm": 0.6054749053130692, + "learning_rate": 6.281137973771231e-07, + "loss": 0.2639, + "step": 33142 + }, + { + "epoch": 1.5525835011945472, + "grad_norm": 0.6069748124071118, + "learning_rate": 6.279880936476151e-07, + "loss": 0.2692, + "step": 33143 + }, + { + "epoch": 1.5526303461844755, + "grad_norm": 0.6078437178863252, + "learning_rate": 6.278624006910661e-07, + "loss": 0.2643, + "step": 33144 + }, + { + "epoch": 1.552677191174404, + "grad_norm": 0.6129658069633441, + "learning_rate": 6.277367185081981e-07, + "loss": 0.2801, + "step": 33145 + }, + { + "epoch": 1.5527240361643322, + "grad_norm": 0.5428079791750863, + "learning_rate": 6.27611047099734e-07, + "loss": 0.2511, + "step": 33146 + }, + { + "epoch": 1.5527708811542604, + "grad_norm": 0.5870444174809248, + "learning_rate": 6.27485386466398e-07, + "loss": 0.2677, + "step": 33147 + }, + { + "epoch": 1.552817726144189, + "grad_norm": 0.6330427831549978, + "learning_rate": 6.273597366089127e-07, + "loss": 0.3057, + "step": 33148 + }, + { + "epoch": 1.5528645711341174, + "grad_norm": 0.5759953554174944, + "learning_rate": 6.272340975280014e-07, + "loss": 0.2674, + "step": 33149 + }, + { + "epoch": 1.5529114161240454, + "grad_norm": 0.6615349321386628, + "learning_rate": 6.271084692243881e-07, + "loss": 0.2919, + "step": 33150 + }, + { + "epoch": 1.5529582611139738, + "grad_norm": 0.6403299860642325, + "learning_rate": 6.269828516987939e-07, + "loss": 0.282, + "step": 33151 + }, + { + "epoch": 1.5530051061039023, + "grad_norm": 0.5851021157868187, + "learning_rate": 6.268572449519434e-07, + "loss": 0.2876, + "step": 33152 + }, + { + "epoch": 1.5530519510938305, + "grad_norm": 0.5850286125813307, + "learning_rate": 6.26731648984558e-07, + "loss": 0.2709, + "step": 33153 + }, + { + "epoch": 1.5530987960837588, + "grad_norm": 0.5802676526666763, + "learning_rate": 6.266060637973612e-07, + "loss": 0.2597, + "step": 33154 + }, + { + "epoch": 1.5531456410736872, + "grad_norm": 0.5948339596054798, + "learning_rate": 6.264804893910755e-07, + "loss": 0.2662, + "step": 33155 + }, + { + "epoch": 1.5531924860636155, + "grad_norm": 0.5956977982550697, + "learning_rate": 6.263549257664244e-07, + "loss": 0.2601, + "step": 33156 + }, + { + "epoch": 1.5532393310535437, + "grad_norm": 0.588971891386133, + "learning_rate": 6.262293729241292e-07, + "loss": 0.2757, + "step": 33157 + }, + { + "epoch": 1.5532861760434722, + "grad_norm": 0.5942195194223113, + "learning_rate": 6.261038308649137e-07, + "loss": 0.268, + "step": 33158 + }, + { + "epoch": 1.5533330210334004, + "grad_norm": 0.6003906819192106, + "learning_rate": 6.259782995894989e-07, + "loss": 0.261, + "step": 33159 + }, + { + "epoch": 1.5533798660233287, + "grad_norm": 0.5938005082734232, + "learning_rate": 6.25852779098608e-07, + "loss": 0.2624, + "step": 33160 + }, + { + "epoch": 1.5534267110132571, + "grad_norm": 0.6053547142131823, + "learning_rate": 6.257272693929631e-07, + "loss": 0.2857, + "step": 33161 + }, + { + "epoch": 1.5534735560031856, + "grad_norm": 0.59861593031812, + "learning_rate": 6.256017704732876e-07, + "loss": 0.2731, + "step": 33162 + }, + { + "epoch": 1.5535204009931138, + "grad_norm": 0.6320134923023226, + "learning_rate": 6.254762823403021e-07, + "loss": 0.2853, + "step": 33163 + }, + { + "epoch": 1.553567245983042, + "grad_norm": 0.6518444744687036, + "learning_rate": 6.253508049947294e-07, + "loss": 0.2741, + "step": 33164 + }, + { + "epoch": 1.5536140909729705, + "grad_norm": 0.5965582234261405, + "learning_rate": 6.252253384372925e-07, + "loss": 0.274, + "step": 33165 + }, + { + "epoch": 1.5536609359628988, + "grad_norm": 0.595212715129879, + "learning_rate": 6.250998826687116e-07, + "loss": 0.2756, + "step": 33166 + }, + { + "epoch": 1.553707780952827, + "grad_norm": 0.6142405265826711, + "learning_rate": 6.249744376897096e-07, + "loss": 0.2813, + "step": 33167 + }, + { + "epoch": 1.5537546259427555, + "grad_norm": 0.6210119328626837, + "learning_rate": 6.248490035010093e-07, + "loss": 0.2615, + "step": 33168 + }, + { + "epoch": 1.5538014709326837, + "grad_norm": 0.6060782318129591, + "learning_rate": 6.24723580103331e-07, + "loss": 0.2738, + "step": 33169 + }, + { + "epoch": 1.553848315922612, + "grad_norm": 0.6347302821887771, + "learning_rate": 6.245981674973972e-07, + "loss": 0.3035, + "step": 33170 + }, + { + "epoch": 1.5538951609125404, + "grad_norm": 0.6553287400558648, + "learning_rate": 6.244727656839298e-07, + "loss": 0.2752, + "step": 33171 + }, + { + "epoch": 1.553942005902469, + "grad_norm": 0.5979661034667084, + "learning_rate": 6.243473746636505e-07, + "loss": 0.2723, + "step": 33172 + }, + { + "epoch": 1.553988850892397, + "grad_norm": 0.5855046956468047, + "learning_rate": 6.242219944372802e-07, + "loss": 0.2542, + "step": 33173 + }, + { + "epoch": 1.5540356958823254, + "grad_norm": 0.6034172801251505, + "learning_rate": 6.240966250055413e-07, + "loss": 0.2845, + "step": 33174 + }, + { + "epoch": 1.5540825408722538, + "grad_norm": 0.6100740177941276, + "learning_rate": 6.239712663691544e-07, + "loss": 0.2812, + "step": 33175 + }, + { + "epoch": 1.554129385862182, + "grad_norm": 0.6377447207665183, + "learning_rate": 6.238459185288412e-07, + "loss": 0.2766, + "step": 33176 + }, + { + "epoch": 1.5541762308521103, + "grad_norm": 0.6629304644745196, + "learning_rate": 6.237205814853231e-07, + "loss": 0.2794, + "step": 33177 + }, + { + "epoch": 1.5542230758420388, + "grad_norm": 0.6174638432853716, + "learning_rate": 6.235952552393218e-07, + "loss": 0.2793, + "step": 33178 + }, + { + "epoch": 1.554269920831967, + "grad_norm": 0.6221383197054039, + "learning_rate": 6.234699397915586e-07, + "loss": 0.2859, + "step": 33179 + }, + { + "epoch": 1.5543167658218953, + "grad_norm": 0.5810208029573608, + "learning_rate": 6.233446351427544e-07, + "loss": 0.27, + "step": 33180 + }, + { + "epoch": 1.5543636108118237, + "grad_norm": 0.6289803910639198, + "learning_rate": 6.232193412936294e-07, + "loss": 0.2737, + "step": 33181 + }, + { + "epoch": 1.554410455801752, + "grad_norm": 0.5669311654948236, + "learning_rate": 6.230940582449052e-07, + "loss": 0.2568, + "step": 33182 + }, + { + "epoch": 1.5544573007916802, + "grad_norm": 0.5823845725054906, + "learning_rate": 6.22968785997303e-07, + "loss": 0.2682, + "step": 33183 + }, + { + "epoch": 1.5545041457816087, + "grad_norm": 0.581299860338432, + "learning_rate": 6.228435245515438e-07, + "loss": 0.2606, + "step": 33184 + }, + { + "epoch": 1.5545509907715371, + "grad_norm": 0.5734537458085559, + "learning_rate": 6.227182739083485e-07, + "loss": 0.2827, + "step": 33185 + }, + { + "epoch": 1.5545978357614652, + "grad_norm": 0.6160899253009088, + "learning_rate": 6.22593034068438e-07, + "loss": 0.2781, + "step": 33186 + }, + { + "epoch": 1.5546446807513936, + "grad_norm": 0.6370333877231202, + "learning_rate": 6.22467805032533e-07, + "loss": 0.3, + "step": 33187 + }, + { + "epoch": 1.554691525741322, + "grad_norm": 0.560605942541627, + "learning_rate": 6.22342586801353e-07, + "loss": 0.2431, + "step": 33188 + }, + { + "epoch": 1.5547383707312503, + "grad_norm": 0.6187406143480464, + "learning_rate": 6.222173793756195e-07, + "loss": 0.2788, + "step": 33189 + }, + { + "epoch": 1.5547852157211786, + "grad_norm": 0.583669102499441, + "learning_rate": 6.220921827560531e-07, + "loss": 0.2695, + "step": 33190 + }, + { + "epoch": 1.554832060711107, + "grad_norm": 0.6172139104993045, + "learning_rate": 6.219669969433742e-07, + "loss": 0.275, + "step": 33191 + }, + { + "epoch": 1.5548789057010353, + "grad_norm": 0.5920727409521894, + "learning_rate": 6.218418219383038e-07, + "loss": 0.2656, + "step": 33192 + }, + { + "epoch": 1.5549257506909635, + "grad_norm": 0.5929092230057968, + "learning_rate": 6.217166577415612e-07, + "loss": 0.2732, + "step": 33193 + }, + { + "epoch": 1.554972595680892, + "grad_norm": 0.5600887788598742, + "learning_rate": 6.215915043538676e-07, + "loss": 0.2613, + "step": 33194 + }, + { + "epoch": 1.5550194406708202, + "grad_norm": 0.6083517660315441, + "learning_rate": 6.214663617759423e-07, + "loss": 0.2653, + "step": 33195 + }, + { + "epoch": 1.5550662856607484, + "grad_norm": 0.6836803197120714, + "learning_rate": 6.213412300085056e-07, + "loss": 0.2937, + "step": 33196 + }, + { + "epoch": 1.555113130650677, + "grad_norm": 0.6381498781124518, + "learning_rate": 6.212161090522781e-07, + "loss": 0.2645, + "step": 33197 + }, + { + "epoch": 1.5551599756406054, + "grad_norm": 0.5995380638400983, + "learning_rate": 6.210909989079805e-07, + "loss": 0.2822, + "step": 33198 + }, + { + "epoch": 1.5552068206305336, + "grad_norm": 0.5904018258221543, + "learning_rate": 6.209658995763312e-07, + "loss": 0.2731, + "step": 33199 + }, + { + "epoch": 1.5552536656204619, + "grad_norm": 0.605226125482961, + "learning_rate": 6.208408110580507e-07, + "loss": 0.2588, + "step": 33200 + }, + { + "epoch": 1.5553005106103903, + "grad_norm": 0.6206476353250514, + "learning_rate": 6.207157333538599e-07, + "loss": 0.275, + "step": 33201 + }, + { + "epoch": 1.5553473556003186, + "grad_norm": 0.5622556293972569, + "learning_rate": 6.20590666464477e-07, + "loss": 0.2587, + "step": 33202 + }, + { + "epoch": 1.5553942005902468, + "grad_norm": 0.6288406492392826, + "learning_rate": 6.204656103906223e-07, + "loss": 0.2766, + "step": 33203 + }, + { + "epoch": 1.5554410455801753, + "grad_norm": 0.5999037389352171, + "learning_rate": 6.203405651330166e-07, + "loss": 0.2736, + "step": 33204 + }, + { + "epoch": 1.5554878905701035, + "grad_norm": 0.5675181013345935, + "learning_rate": 6.202155306923777e-07, + "loss": 0.2504, + "step": 33205 + }, + { + "epoch": 1.5555347355600317, + "grad_norm": 0.5593452869319026, + "learning_rate": 6.200905070694258e-07, + "loss": 0.2628, + "step": 33206 + }, + { + "epoch": 1.5555815805499602, + "grad_norm": 0.6144455259274566, + "learning_rate": 6.199654942648814e-07, + "loss": 0.2758, + "step": 33207 + }, + { + "epoch": 1.5556284255398887, + "grad_norm": 0.5743819066888279, + "learning_rate": 6.198404922794621e-07, + "loss": 0.2459, + "step": 33208 + }, + { + "epoch": 1.5556752705298167, + "grad_norm": 0.5891944176498713, + "learning_rate": 6.197155011138883e-07, + "loss": 0.2743, + "step": 33209 + }, + { + "epoch": 1.5557221155197452, + "grad_norm": 0.5727378861735304, + "learning_rate": 6.195905207688802e-07, + "loss": 0.2592, + "step": 33210 + }, + { + "epoch": 1.5557689605096736, + "grad_norm": 0.6211427480595088, + "learning_rate": 6.19465551245155e-07, + "loss": 0.2614, + "step": 33211 + }, + { + "epoch": 1.5558158054996019, + "grad_norm": 0.5873743862020405, + "learning_rate": 6.193405925434332e-07, + "loss": 0.2588, + "step": 33212 + }, + { + "epoch": 1.55586265048953, + "grad_norm": 0.6114739035305731, + "learning_rate": 6.192156446644332e-07, + "loss": 0.2842, + "step": 33213 + }, + { + "epoch": 1.5559094954794586, + "grad_norm": 0.6849716684272644, + "learning_rate": 6.190907076088753e-07, + "loss": 0.276, + "step": 33214 + }, + { + "epoch": 1.5559563404693868, + "grad_norm": 0.5791239043892071, + "learning_rate": 6.189657813774771e-07, + "loss": 0.2602, + "step": 33215 + }, + { + "epoch": 1.556003185459315, + "grad_norm": 0.5901904724771793, + "learning_rate": 6.18840865970958e-07, + "loss": 0.2718, + "step": 33216 + }, + { + "epoch": 1.5560500304492435, + "grad_norm": 0.6282361215737253, + "learning_rate": 6.187159613900376e-07, + "loss": 0.2721, + "step": 33217 + }, + { + "epoch": 1.5560968754391717, + "grad_norm": 0.6243954207640261, + "learning_rate": 6.185910676354331e-07, + "loss": 0.2792, + "step": 33218 + }, + { + "epoch": 1.5561437204291, + "grad_norm": 0.6116990625631264, + "learning_rate": 6.184661847078643e-07, + "loss": 0.2703, + "step": 33219 + }, + { + "epoch": 1.5561905654190284, + "grad_norm": 0.6493607536864351, + "learning_rate": 6.183413126080495e-07, + "loss": 0.2772, + "step": 33220 + }, + { + "epoch": 1.556237410408957, + "grad_norm": 0.6078343556549815, + "learning_rate": 6.182164513367086e-07, + "loss": 0.2791, + "step": 33221 + }, + { + "epoch": 1.556284255398885, + "grad_norm": 0.6670261926947838, + "learning_rate": 6.180916008945581e-07, + "loss": 0.2752, + "step": 33222 + }, + { + "epoch": 1.5563311003888134, + "grad_norm": 0.5660176003194414, + "learning_rate": 6.179667612823182e-07, + "loss": 0.2518, + "step": 33223 + }, + { + "epoch": 1.5563779453787419, + "grad_norm": 0.56220917696784, + "learning_rate": 6.178419325007056e-07, + "loss": 0.2725, + "step": 33224 + }, + { + "epoch": 1.55642479036867, + "grad_norm": 0.5947367776081404, + "learning_rate": 6.177171145504399e-07, + "loss": 0.2726, + "step": 33225 + }, + { + "epoch": 1.5564716353585983, + "grad_norm": 0.6212890764727899, + "learning_rate": 6.17592307432239e-07, + "loss": 0.2669, + "step": 33226 + }, + { + "epoch": 1.5565184803485268, + "grad_norm": 0.5712429811152528, + "learning_rate": 6.174675111468214e-07, + "loss": 0.2786, + "step": 33227 + }, + { + "epoch": 1.556565325338455, + "grad_norm": 0.6070184515631056, + "learning_rate": 6.173427256949058e-07, + "loss": 0.2791, + "step": 33228 + }, + { + "epoch": 1.5566121703283833, + "grad_norm": 0.6009410877588298, + "learning_rate": 6.172179510772095e-07, + "loss": 0.2721, + "step": 33229 + }, + { + "epoch": 1.5566590153183117, + "grad_norm": 0.5955741456951844, + "learning_rate": 6.1709318729445e-07, + "loss": 0.2672, + "step": 33230 + }, + { + "epoch": 1.55670586030824, + "grad_norm": 0.647655803596181, + "learning_rate": 6.169684343473461e-07, + "loss": 0.2783, + "step": 33231 + }, + { + "epoch": 1.5567527052981682, + "grad_norm": 0.5848673434961961, + "learning_rate": 6.168436922366153e-07, + "loss": 0.2692, + "step": 33232 + }, + { + "epoch": 1.5567995502880967, + "grad_norm": 0.6314903747339065, + "learning_rate": 6.167189609629759e-07, + "loss": 0.2807, + "step": 33233 + }, + { + "epoch": 1.5568463952780252, + "grad_norm": 0.5631944085687304, + "learning_rate": 6.165942405271455e-07, + "loss": 0.2523, + "step": 33234 + }, + { + "epoch": 1.5568932402679534, + "grad_norm": 0.6110353112936018, + "learning_rate": 6.164695309298426e-07, + "loss": 0.2661, + "step": 33235 + }, + { + "epoch": 1.5569400852578816, + "grad_norm": 0.638924264390061, + "learning_rate": 6.163448321717843e-07, + "loss": 0.2848, + "step": 33236 + }, + { + "epoch": 1.55698693024781, + "grad_norm": 0.567633452346289, + "learning_rate": 6.162201442536871e-07, + "loss": 0.2589, + "step": 33237 + }, + { + "epoch": 1.5570337752377383, + "grad_norm": 0.6788161743069074, + "learning_rate": 6.160954671762696e-07, + "loss": 0.2853, + "step": 33238 + }, + { + "epoch": 1.5570806202276666, + "grad_norm": 0.6069682153753699, + "learning_rate": 6.15970800940249e-07, + "loss": 0.2725, + "step": 33239 + }, + { + "epoch": 1.557127465217595, + "grad_norm": 0.611956510232538, + "learning_rate": 6.158461455463432e-07, + "loss": 0.2727, + "step": 33240 + }, + { + "epoch": 1.5571743102075233, + "grad_norm": 0.6010129814537374, + "learning_rate": 6.157215009952699e-07, + "loss": 0.266, + "step": 33241 + }, + { + "epoch": 1.5572211551974515, + "grad_norm": 0.5895863439145884, + "learning_rate": 6.15596867287745e-07, + "loss": 0.2522, + "step": 33242 + }, + { + "epoch": 1.55726800018738, + "grad_norm": 0.5881850823532562, + "learning_rate": 6.154722444244874e-07, + "loss": 0.2742, + "step": 33243 + }, + { + "epoch": 1.5573148451773084, + "grad_norm": 0.5878661017023886, + "learning_rate": 6.153476324062124e-07, + "loss": 0.259, + "step": 33244 + }, + { + "epoch": 1.5573616901672365, + "grad_norm": 0.6949703486261538, + "learning_rate": 6.15223031233638e-07, + "loss": 0.3045, + "step": 33245 + }, + { + "epoch": 1.557408535157165, + "grad_norm": 0.6126640930344459, + "learning_rate": 6.150984409074818e-07, + "loss": 0.2802, + "step": 33246 + }, + { + "epoch": 1.5574553801470934, + "grad_norm": 0.5824898033418672, + "learning_rate": 6.149738614284606e-07, + "loss": 0.2656, + "step": 33247 + }, + { + "epoch": 1.5575022251370216, + "grad_norm": 0.5785574361254421, + "learning_rate": 6.148492927972904e-07, + "loss": 0.2668, + "step": 33248 + }, + { + "epoch": 1.5575490701269499, + "grad_norm": 0.6479751612136475, + "learning_rate": 6.147247350146887e-07, + "loss": 0.3035, + "step": 33249 + }, + { + "epoch": 1.5575959151168783, + "grad_norm": 0.6107628028645216, + "learning_rate": 6.146001880813731e-07, + "loss": 0.2814, + "step": 33250 + }, + { + "epoch": 1.5576427601068066, + "grad_norm": 0.5704074209402803, + "learning_rate": 6.144756519980588e-07, + "loss": 0.2619, + "step": 33251 + }, + { + "epoch": 1.5576896050967348, + "grad_norm": 0.5729152203332089, + "learning_rate": 6.143511267654634e-07, + "loss": 0.2579, + "step": 33252 + }, + { + "epoch": 1.5577364500866633, + "grad_norm": 0.6257740446669828, + "learning_rate": 6.142266123843038e-07, + "loss": 0.2821, + "step": 33253 + }, + { + "epoch": 1.5577832950765915, + "grad_norm": 0.6178361932875098, + "learning_rate": 6.141021088552953e-07, + "loss": 0.2838, + "step": 33254 + }, + { + "epoch": 1.5578301400665198, + "grad_norm": 0.571377113337084, + "learning_rate": 6.139776161791555e-07, + "loss": 0.2658, + "step": 33255 + }, + { + "epoch": 1.5578769850564482, + "grad_norm": 0.576873915302979, + "learning_rate": 6.13853134356601e-07, + "loss": 0.2552, + "step": 33256 + }, + { + "epoch": 1.5579238300463767, + "grad_norm": 0.6257328423537019, + "learning_rate": 6.137286633883469e-07, + "loss": 0.287, + "step": 33257 + }, + { + "epoch": 1.5579706750363047, + "grad_norm": 0.6065588616866452, + "learning_rate": 6.136042032751102e-07, + "loss": 0.2859, + "step": 33258 + }, + { + "epoch": 1.5580175200262332, + "grad_norm": 0.5712784333735038, + "learning_rate": 6.13479754017608e-07, + "loss": 0.2725, + "step": 33259 + }, + { + "epoch": 1.5580643650161616, + "grad_norm": 0.6247371910638033, + "learning_rate": 6.13355315616555e-07, + "loss": 0.2841, + "step": 33260 + }, + { + "epoch": 1.5581112100060899, + "grad_norm": 0.6090398365375368, + "learning_rate": 6.132308880726678e-07, + "loss": 0.2658, + "step": 33261 + }, + { + "epoch": 1.5581580549960181, + "grad_norm": 0.56270247074045, + "learning_rate": 6.131064713866628e-07, + "loss": 0.259, + "step": 33262 + }, + { + "epoch": 1.5582048999859466, + "grad_norm": 0.5516615133975115, + "learning_rate": 6.129820655592564e-07, + "loss": 0.2551, + "step": 33263 + }, + { + "epoch": 1.5582517449758748, + "grad_norm": 0.5296247775715759, + "learning_rate": 6.128576705911632e-07, + "loss": 0.2592, + "step": 33264 + }, + { + "epoch": 1.558298589965803, + "grad_norm": 0.6183219517850828, + "learning_rate": 6.127332864831004e-07, + "loss": 0.2803, + "step": 33265 + }, + { + "epoch": 1.5583454349557315, + "grad_norm": 0.5700448353762213, + "learning_rate": 6.126089132357826e-07, + "loss": 0.2614, + "step": 33266 + }, + { + "epoch": 1.5583922799456598, + "grad_norm": 0.6502188866979124, + "learning_rate": 6.124845508499261e-07, + "loss": 0.2695, + "step": 33267 + }, + { + "epoch": 1.558439124935588, + "grad_norm": 0.5876635322506318, + "learning_rate": 6.123601993262468e-07, + "loss": 0.2604, + "step": 33268 + }, + { + "epoch": 1.5584859699255165, + "grad_norm": 0.5737185979062346, + "learning_rate": 6.122358586654598e-07, + "loss": 0.2566, + "step": 33269 + }, + { + "epoch": 1.558532814915445, + "grad_norm": 0.6165852193548126, + "learning_rate": 6.121115288682819e-07, + "loss": 0.278, + "step": 33270 + }, + { + "epoch": 1.5585796599053732, + "grad_norm": 0.5915530888527781, + "learning_rate": 6.119872099354276e-07, + "loss": 0.2629, + "step": 33271 + }, + { + "epoch": 1.5586265048953014, + "grad_norm": 0.6102219341484919, + "learning_rate": 6.118629018676117e-07, + "loss": 0.2706, + "step": 33272 + }, + { + "epoch": 1.5586733498852299, + "grad_norm": 0.5459174563277104, + "learning_rate": 6.117386046655502e-07, + "loss": 0.2526, + "step": 33273 + }, + { + "epoch": 1.5587201948751581, + "grad_norm": 0.5538238251685913, + "learning_rate": 6.116143183299584e-07, + "loss": 0.2665, + "step": 33274 + }, + { + "epoch": 1.5587670398650864, + "grad_norm": 0.5768621105316392, + "learning_rate": 6.114900428615514e-07, + "loss": 0.2704, + "step": 33275 + }, + { + "epoch": 1.5588138848550148, + "grad_norm": 0.6193462314915487, + "learning_rate": 6.113657782610447e-07, + "loss": 0.2708, + "step": 33276 + }, + { + "epoch": 1.558860729844943, + "grad_norm": 0.6166298456459033, + "learning_rate": 6.112415245291542e-07, + "loss": 0.2711, + "step": 33277 + }, + { + "epoch": 1.5589075748348713, + "grad_norm": 0.636280280864558, + "learning_rate": 6.111172816665936e-07, + "loss": 0.2759, + "step": 33278 + }, + { + "epoch": 1.5589544198247998, + "grad_norm": 0.6259782167103716, + "learning_rate": 6.109930496740779e-07, + "loss": 0.2699, + "step": 33279 + }, + { + "epoch": 1.5590012648147282, + "grad_norm": 0.5965750713640289, + "learning_rate": 6.108688285523223e-07, + "loss": 0.2701, + "step": 33280 + }, + { + "epoch": 1.5590481098046562, + "grad_norm": 0.6443040792828316, + "learning_rate": 6.107446183020416e-07, + "loss": 0.2731, + "step": 33281 + }, + { + "epoch": 1.5590949547945847, + "grad_norm": 0.6306559588557149, + "learning_rate": 6.106204189239509e-07, + "loss": 0.2963, + "step": 33282 + }, + { + "epoch": 1.5591417997845132, + "grad_norm": 0.6191653603131105, + "learning_rate": 6.10496230418765e-07, + "loss": 0.2794, + "step": 33283 + }, + { + "epoch": 1.5591886447744414, + "grad_norm": 0.5962447700534873, + "learning_rate": 6.103720527871989e-07, + "loss": 0.2642, + "step": 33284 + }, + { + "epoch": 1.5592354897643697, + "grad_norm": 0.6078955246806578, + "learning_rate": 6.102478860299668e-07, + "loss": 0.2922, + "step": 33285 + }, + { + "epoch": 1.5592823347542981, + "grad_norm": 0.604902049121968, + "learning_rate": 6.101237301477823e-07, + "loss": 0.2822, + "step": 33286 + }, + { + "epoch": 1.5593291797442264, + "grad_norm": 0.6513740998609896, + "learning_rate": 6.099995851413607e-07, + "loss": 0.2915, + "step": 33287 + }, + { + "epoch": 1.5593760247341546, + "grad_norm": 0.608042248920122, + "learning_rate": 6.098754510114166e-07, + "loss": 0.2599, + "step": 33288 + }, + { + "epoch": 1.559422869724083, + "grad_norm": 0.6207107337688562, + "learning_rate": 6.097513277586642e-07, + "loss": 0.279, + "step": 33289 + }, + { + "epoch": 1.5594697147140113, + "grad_norm": 0.5666058580321875, + "learning_rate": 6.096272153838184e-07, + "loss": 0.2667, + "step": 33290 + }, + { + "epoch": 1.5595165597039395, + "grad_norm": 0.6074567296538268, + "learning_rate": 6.095031138875923e-07, + "loss": 0.2785, + "step": 33291 + }, + { + "epoch": 1.559563404693868, + "grad_norm": 0.5934876350661696, + "learning_rate": 6.093790232707014e-07, + "loss": 0.2607, + "step": 33292 + }, + { + "epoch": 1.5596102496837965, + "grad_norm": 0.6029126576049191, + "learning_rate": 6.092549435338579e-07, + "loss": 0.2782, + "step": 33293 + }, + { + "epoch": 1.5596570946737245, + "grad_norm": 0.59189233292875, + "learning_rate": 6.091308746777774e-07, + "loss": 0.2694, + "step": 33294 + }, + { + "epoch": 1.559703939663653, + "grad_norm": 0.5950881098830767, + "learning_rate": 6.090068167031735e-07, + "loss": 0.2811, + "step": 33295 + }, + { + "epoch": 1.5597507846535814, + "grad_norm": 0.5949198318150548, + "learning_rate": 6.088827696107605e-07, + "loss": 0.28, + "step": 33296 + }, + { + "epoch": 1.5597976296435097, + "grad_norm": 0.6012740405685779, + "learning_rate": 6.087587334012513e-07, + "loss": 0.2811, + "step": 33297 + }, + { + "epoch": 1.559844474633438, + "grad_norm": 0.6278093828295843, + "learning_rate": 6.086347080753607e-07, + "loss": 0.2747, + "step": 33298 + }, + { + "epoch": 1.5598913196233664, + "grad_norm": 0.6530330860026313, + "learning_rate": 6.085106936338017e-07, + "loss": 0.2848, + "step": 33299 + }, + { + "epoch": 1.5599381646132946, + "grad_norm": 0.6371767056664016, + "learning_rate": 6.08386690077288e-07, + "loss": 0.2732, + "step": 33300 + }, + { + "epoch": 1.5599850096032228, + "grad_norm": 0.5941297268144297, + "learning_rate": 6.082626974065334e-07, + "loss": 0.2694, + "step": 33301 + }, + { + "epoch": 1.5600318545931513, + "grad_norm": 0.593195216872475, + "learning_rate": 6.081387156222523e-07, + "loss": 0.2692, + "step": 33302 + }, + { + "epoch": 1.5600786995830795, + "grad_norm": 0.5977302056216444, + "learning_rate": 6.080147447251566e-07, + "loss": 0.2717, + "step": 33303 + }, + { + "epoch": 1.5601255445730078, + "grad_norm": 0.5875118491143496, + "learning_rate": 6.078907847159607e-07, + "loss": 0.2668, + "step": 33304 + }, + { + "epoch": 1.5601723895629362, + "grad_norm": 0.5479996814098278, + "learning_rate": 6.077668355953784e-07, + "loss": 0.2554, + "step": 33305 + }, + { + "epoch": 1.5602192345528647, + "grad_norm": 0.6372244732080408, + "learning_rate": 6.076428973641216e-07, + "loss": 0.2861, + "step": 33306 + }, + { + "epoch": 1.560266079542793, + "grad_norm": 0.5968955284211686, + "learning_rate": 6.075189700229045e-07, + "loss": 0.2625, + "step": 33307 + }, + { + "epoch": 1.5603129245327212, + "grad_norm": 0.591209129379092, + "learning_rate": 6.073950535724405e-07, + "loss": 0.2748, + "step": 33308 + }, + { + "epoch": 1.5603597695226497, + "grad_norm": 0.619753616340443, + "learning_rate": 6.072711480134416e-07, + "loss": 0.2675, + "step": 33309 + }, + { + "epoch": 1.560406614512578, + "grad_norm": 0.665222354868407, + "learning_rate": 6.071472533466216e-07, + "loss": 0.2922, + "step": 33310 + }, + { + "epoch": 1.5604534595025061, + "grad_norm": 0.5483274766591595, + "learning_rate": 6.070233695726935e-07, + "loss": 0.2659, + "step": 33311 + }, + { + "epoch": 1.5605003044924346, + "grad_norm": 0.6248668362988903, + "learning_rate": 6.068994966923708e-07, + "loss": 0.2896, + "step": 33312 + }, + { + "epoch": 1.5605471494823628, + "grad_norm": 0.5751764138958155, + "learning_rate": 6.06775634706365e-07, + "loss": 0.2493, + "step": 33313 + }, + { + "epoch": 1.560593994472291, + "grad_norm": 0.5725980440312394, + "learning_rate": 6.066517836153901e-07, + "loss": 0.2786, + "step": 33314 + }, + { + "epoch": 1.5606408394622195, + "grad_norm": 0.5719697473488239, + "learning_rate": 6.065279434201576e-07, + "loss": 0.2632, + "step": 33315 + }, + { + "epoch": 1.560687684452148, + "grad_norm": 0.6138273920872083, + "learning_rate": 6.064041141213811e-07, + "loss": 0.2825, + "step": 33316 + }, + { + "epoch": 1.560734529442076, + "grad_norm": 0.6196612945161878, + "learning_rate": 6.062802957197727e-07, + "loss": 0.2831, + "step": 33317 + }, + { + "epoch": 1.5607813744320045, + "grad_norm": 0.6138370858546114, + "learning_rate": 6.061564882160456e-07, + "loss": 0.2728, + "step": 33318 + }, + { + "epoch": 1.560828219421933, + "grad_norm": 0.6122432034895301, + "learning_rate": 6.060326916109125e-07, + "loss": 0.2576, + "step": 33319 + }, + { + "epoch": 1.5608750644118612, + "grad_norm": 0.6041632520231371, + "learning_rate": 6.059089059050852e-07, + "loss": 0.2712, + "step": 33320 + }, + { + "epoch": 1.5609219094017894, + "grad_norm": 0.6038756529019815, + "learning_rate": 6.057851310992752e-07, + "loss": 0.2653, + "step": 33321 + }, + { + "epoch": 1.560968754391718, + "grad_norm": 0.5774062297461304, + "learning_rate": 6.056613671941958e-07, + "loss": 0.2467, + "step": 33322 + }, + { + "epoch": 1.5610155993816461, + "grad_norm": 0.5888782690962624, + "learning_rate": 6.055376141905592e-07, + "loss": 0.2694, + "step": 33323 + }, + { + "epoch": 1.5610624443715744, + "grad_norm": 0.6037992204963042, + "learning_rate": 6.054138720890774e-07, + "loss": 0.2583, + "step": 33324 + }, + { + "epoch": 1.5611092893615028, + "grad_norm": 0.6584064984758109, + "learning_rate": 6.052901408904624e-07, + "loss": 0.2981, + "step": 33325 + }, + { + "epoch": 1.561156134351431, + "grad_norm": 0.5450566485106655, + "learning_rate": 6.051664205954274e-07, + "loss": 0.2574, + "step": 33326 + }, + { + "epoch": 1.5612029793413593, + "grad_norm": 0.6059274059609893, + "learning_rate": 6.050427112046834e-07, + "loss": 0.2612, + "step": 33327 + }, + { + "epoch": 1.5612498243312878, + "grad_norm": 0.6198107819230818, + "learning_rate": 6.049190127189414e-07, + "loss": 0.2718, + "step": 33328 + }, + { + "epoch": 1.5612966693212162, + "grad_norm": 0.5857170618308736, + "learning_rate": 6.047953251389144e-07, + "loss": 0.2722, + "step": 33329 + }, + { + "epoch": 1.5613435143111443, + "grad_norm": 0.5885139914666259, + "learning_rate": 6.046716484653137e-07, + "loss": 0.2627, + "step": 33330 + }, + { + "epoch": 1.5613903593010727, + "grad_norm": 0.5677548546480031, + "learning_rate": 6.045479826988515e-07, + "loss": 0.253, + "step": 33331 + }, + { + "epoch": 1.5614372042910012, + "grad_norm": 0.5739785423474013, + "learning_rate": 6.044243278402398e-07, + "loss": 0.2718, + "step": 33332 + }, + { + "epoch": 1.5614840492809294, + "grad_norm": 0.6126808089108121, + "learning_rate": 6.043006838901891e-07, + "loss": 0.2721, + "step": 33333 + }, + { + "epoch": 1.5615308942708577, + "grad_norm": 0.5388253548189325, + "learning_rate": 6.041770508494121e-07, + "loss": 0.2627, + "step": 33334 + }, + { + "epoch": 1.5615777392607861, + "grad_norm": 0.6130987106932316, + "learning_rate": 6.04053428718619e-07, + "loss": 0.2644, + "step": 33335 + }, + { + "epoch": 1.5616245842507144, + "grad_norm": 0.5866035445732561, + "learning_rate": 6.03929817498522e-07, + "loss": 0.2599, + "step": 33336 + }, + { + "epoch": 1.5616714292406426, + "grad_norm": 0.6214650671924747, + "learning_rate": 6.038062171898323e-07, + "loss": 0.2829, + "step": 33337 + }, + { + "epoch": 1.561718274230571, + "grad_norm": 0.6737443466646023, + "learning_rate": 6.036826277932617e-07, + "loss": 0.2881, + "step": 33338 + }, + { + "epoch": 1.5617651192204993, + "grad_norm": 0.5831851029584261, + "learning_rate": 6.035590493095206e-07, + "loss": 0.2678, + "step": 33339 + }, + { + "epoch": 1.5618119642104276, + "grad_norm": 0.6067335361145105, + "learning_rate": 6.034354817393204e-07, + "loss": 0.2802, + "step": 33340 + }, + { + "epoch": 1.561858809200356, + "grad_norm": 0.5997782718235559, + "learning_rate": 6.03311925083373e-07, + "loss": 0.2542, + "step": 33341 + }, + { + "epoch": 1.5619056541902845, + "grad_norm": 0.6149102870685574, + "learning_rate": 6.031883793423879e-07, + "loss": 0.2839, + "step": 33342 + }, + { + "epoch": 1.5619524991802127, + "grad_norm": 0.5982188219615198, + "learning_rate": 6.030648445170769e-07, + "loss": 0.2771, + "step": 33343 + }, + { + "epoch": 1.561999344170141, + "grad_norm": 0.608709803334902, + "learning_rate": 6.029413206081519e-07, + "loss": 0.2789, + "step": 33344 + }, + { + "epoch": 1.5620461891600694, + "grad_norm": 0.5843054053311857, + "learning_rate": 6.028178076163221e-07, + "loss": 0.2665, + "step": 33345 + }, + { + "epoch": 1.5620930341499977, + "grad_norm": 0.5916222948677812, + "learning_rate": 6.026943055422987e-07, + "loss": 0.2836, + "step": 33346 + }, + { + "epoch": 1.562139879139926, + "grad_norm": 0.5710870033553971, + "learning_rate": 6.025708143867936e-07, + "loss": 0.2648, + "step": 33347 + }, + { + "epoch": 1.5621867241298544, + "grad_norm": 0.5777664324010355, + "learning_rate": 6.024473341505161e-07, + "loss": 0.2755, + "step": 33348 + }, + { + "epoch": 1.5622335691197826, + "grad_norm": 0.5689832705994198, + "learning_rate": 6.02323864834177e-07, + "loss": 0.2694, + "step": 33349 + }, + { + "epoch": 1.5622804141097109, + "grad_norm": 0.5798576990056216, + "learning_rate": 6.022004064384871e-07, + "loss": 0.2567, + "step": 33350 + }, + { + "epoch": 1.5623272590996393, + "grad_norm": 0.5843088882784828, + "learning_rate": 6.020769589641576e-07, + "loss": 0.2701, + "step": 33351 + }, + { + "epoch": 1.5623741040895678, + "grad_norm": 0.6243165853531091, + "learning_rate": 6.019535224118972e-07, + "loss": 0.2663, + "step": 33352 + }, + { + "epoch": 1.5624209490794958, + "grad_norm": 0.6415083180809347, + "learning_rate": 6.018300967824176e-07, + "loss": 0.2853, + "step": 33353 + }, + { + "epoch": 1.5624677940694243, + "grad_norm": 0.5762316599312814, + "learning_rate": 6.017066820764291e-07, + "loss": 0.2611, + "step": 33354 + }, + { + "epoch": 1.5625146390593527, + "grad_norm": 0.6411084802970616, + "learning_rate": 6.015832782946413e-07, + "loss": 0.2783, + "step": 33355 + }, + { + "epoch": 1.562561484049281, + "grad_norm": 0.6312754490945865, + "learning_rate": 6.01459885437764e-07, + "loss": 0.2861, + "step": 33356 + }, + { + "epoch": 1.5626083290392092, + "grad_norm": 0.6114203205101282, + "learning_rate": 6.013365035065089e-07, + "loss": 0.2822, + "step": 33357 + }, + { + "epoch": 1.5626551740291377, + "grad_norm": 0.6006302200961823, + "learning_rate": 6.012131325015844e-07, + "loss": 0.2698, + "step": 33358 + }, + { + "epoch": 1.562702019019066, + "grad_norm": 0.584833580695163, + "learning_rate": 6.010897724237008e-07, + "loss": 0.2685, + "step": 33359 + }, + { + "epoch": 1.5627488640089942, + "grad_norm": 0.581744540043699, + "learning_rate": 6.009664232735685e-07, + "loss": 0.2714, + "step": 33360 + }, + { + "epoch": 1.5627957089989226, + "grad_norm": 0.5938236119643147, + "learning_rate": 6.008430850518979e-07, + "loss": 0.2686, + "step": 33361 + }, + { + "epoch": 1.5628425539888509, + "grad_norm": 0.6596011753505097, + "learning_rate": 6.00719757759397e-07, + "loss": 0.2879, + "step": 33362 + }, + { + "epoch": 1.562889398978779, + "grad_norm": 0.6454564363582743, + "learning_rate": 6.005964413967775e-07, + "loss": 0.2825, + "step": 33363 + }, + { + "epoch": 1.5629362439687076, + "grad_norm": 0.6228178866784051, + "learning_rate": 6.004731359647473e-07, + "loss": 0.2693, + "step": 33364 + }, + { + "epoch": 1.562983088958636, + "grad_norm": 0.6253872011784262, + "learning_rate": 6.003498414640169e-07, + "loss": 0.2929, + "step": 33365 + }, + { + "epoch": 1.563029933948564, + "grad_norm": 0.6269103321846458, + "learning_rate": 6.002265578952954e-07, + "loss": 0.263, + "step": 33366 + }, + { + "epoch": 1.5630767789384925, + "grad_norm": 0.6182685950109476, + "learning_rate": 6.001032852592928e-07, + "loss": 0.2958, + "step": 33367 + }, + { + "epoch": 1.563123623928421, + "grad_norm": 0.6141805556037606, + "learning_rate": 5.999800235567188e-07, + "loss": 0.2664, + "step": 33368 + }, + { + "epoch": 1.5631704689183492, + "grad_norm": 0.5456484915279357, + "learning_rate": 5.998567727882823e-07, + "loss": 0.2512, + "step": 33369 + }, + { + "epoch": 1.5632173139082775, + "grad_norm": 0.6354566138508692, + "learning_rate": 5.997335329546919e-07, + "loss": 0.2586, + "step": 33370 + }, + { + "epoch": 1.563264158898206, + "grad_norm": 0.6324076768351857, + "learning_rate": 5.996103040566572e-07, + "loss": 0.2873, + "step": 33371 + }, + { + "epoch": 1.5633110038881342, + "grad_norm": 0.6196432470143671, + "learning_rate": 5.994870860948879e-07, + "loss": 0.2838, + "step": 33372 + }, + { + "epoch": 1.5633578488780624, + "grad_norm": 0.6125814723867483, + "learning_rate": 5.993638790700923e-07, + "loss": 0.2836, + "step": 33373 + }, + { + "epoch": 1.5634046938679909, + "grad_norm": 0.6005223972279475, + "learning_rate": 5.9924068298298e-07, + "loss": 0.2775, + "step": 33374 + }, + { + "epoch": 1.563451538857919, + "grad_norm": 0.6006412673525336, + "learning_rate": 5.991174978342607e-07, + "loss": 0.2689, + "step": 33375 + }, + { + "epoch": 1.5634983838478473, + "grad_norm": 0.5804876822709741, + "learning_rate": 5.989943236246423e-07, + "loss": 0.2754, + "step": 33376 + }, + { + "epoch": 1.5635452288377758, + "grad_norm": 0.5915118456303163, + "learning_rate": 5.988711603548333e-07, + "loss": 0.2527, + "step": 33377 + }, + { + "epoch": 1.5635920738277043, + "grad_norm": 0.6346888276159216, + "learning_rate": 5.987480080255426e-07, + "loss": 0.2795, + "step": 33378 + }, + { + "epoch": 1.5636389188176325, + "grad_norm": 0.6195425250639003, + "learning_rate": 5.986248666374794e-07, + "loss": 0.2647, + "step": 33379 + }, + { + "epoch": 1.5636857638075607, + "grad_norm": 0.5509078840926463, + "learning_rate": 5.985017361913523e-07, + "loss": 0.2448, + "step": 33380 + }, + { + "epoch": 1.5637326087974892, + "grad_norm": 0.6050358887067679, + "learning_rate": 5.983786166878703e-07, + "loss": 0.2891, + "step": 33381 + }, + { + "epoch": 1.5637794537874175, + "grad_norm": 0.602192087843168, + "learning_rate": 5.982555081277408e-07, + "loss": 0.2834, + "step": 33382 + }, + { + "epoch": 1.5638262987773457, + "grad_norm": 0.6000580988746345, + "learning_rate": 5.981324105116737e-07, + "loss": 0.273, + "step": 33383 + }, + { + "epoch": 1.5638731437672742, + "grad_norm": 0.5863915599213071, + "learning_rate": 5.980093238403756e-07, + "loss": 0.2834, + "step": 33384 + }, + { + "epoch": 1.5639199887572024, + "grad_norm": 0.5556027153984139, + "learning_rate": 5.978862481145558e-07, + "loss": 0.2505, + "step": 33385 + }, + { + "epoch": 1.5639668337471306, + "grad_norm": 0.6011049863550253, + "learning_rate": 5.977631833349228e-07, + "loss": 0.2588, + "step": 33386 + }, + { + "epoch": 1.564013678737059, + "grad_norm": 0.6005948196204195, + "learning_rate": 5.976401295021853e-07, + "loss": 0.2701, + "step": 33387 + }, + { + "epoch": 1.5640605237269876, + "grad_norm": 0.5980812367497931, + "learning_rate": 5.975170866170499e-07, + "loss": 0.2684, + "step": 33388 + }, + { + "epoch": 1.5641073687169156, + "grad_norm": 0.58932814010774, + "learning_rate": 5.973940546802254e-07, + "loss": 0.276, + "step": 33389 + }, + { + "epoch": 1.564154213706844, + "grad_norm": 0.6100561467053219, + "learning_rate": 5.972710336924206e-07, + "loss": 0.2569, + "step": 33390 + }, + { + "epoch": 1.5642010586967725, + "grad_norm": 0.5836021091843506, + "learning_rate": 5.97148023654342e-07, + "loss": 0.2615, + "step": 33391 + }, + { + "epoch": 1.5642479036867007, + "grad_norm": 0.5755057015356888, + "learning_rate": 5.970250245666986e-07, + "loss": 0.2683, + "step": 33392 + }, + { + "epoch": 1.564294748676629, + "grad_norm": 0.582313620289832, + "learning_rate": 5.969020364301983e-07, + "loss": 0.2478, + "step": 33393 + }, + { + "epoch": 1.5643415936665575, + "grad_norm": 0.5737702995265723, + "learning_rate": 5.967790592455478e-07, + "loss": 0.2845, + "step": 33394 + }, + { + "epoch": 1.5643884386564857, + "grad_norm": 0.6337594899310632, + "learning_rate": 5.966560930134554e-07, + "loss": 0.2956, + "step": 33395 + }, + { + "epoch": 1.564435283646414, + "grad_norm": 0.5875517689489962, + "learning_rate": 5.965331377346298e-07, + "loss": 0.2707, + "step": 33396 + }, + { + "epoch": 1.5644821286363424, + "grad_norm": 0.6037152652873508, + "learning_rate": 5.964101934097766e-07, + "loss": 0.2809, + "step": 33397 + }, + { + "epoch": 1.5645289736262706, + "grad_norm": 0.6359030550629385, + "learning_rate": 5.962872600396044e-07, + "loss": 0.2783, + "step": 33398 + }, + { + "epoch": 1.5645758186161989, + "grad_norm": 0.5433716562834864, + "learning_rate": 5.961643376248211e-07, + "loss": 0.2585, + "step": 33399 + }, + { + "epoch": 1.5646226636061273, + "grad_norm": 0.5990575115545164, + "learning_rate": 5.960414261661329e-07, + "loss": 0.2764, + "step": 33400 + }, + { + "epoch": 1.5646695085960558, + "grad_norm": 0.5963264256016535, + "learning_rate": 5.959185256642481e-07, + "loss": 0.2814, + "step": 33401 + }, + { + "epoch": 1.5647163535859838, + "grad_norm": 0.5763415005095015, + "learning_rate": 5.95795636119873e-07, + "loss": 0.2622, + "step": 33402 + }, + { + "epoch": 1.5647631985759123, + "grad_norm": 0.5795757821656683, + "learning_rate": 5.956727575337166e-07, + "loss": 0.2669, + "step": 33403 + }, + { + "epoch": 1.5648100435658407, + "grad_norm": 0.6065545731649149, + "learning_rate": 5.955498899064837e-07, + "loss": 0.2719, + "step": 33404 + }, + { + "epoch": 1.564856888555769, + "grad_norm": 0.6318273873914741, + "learning_rate": 5.954270332388837e-07, + "loss": 0.2839, + "step": 33405 + }, + { + "epoch": 1.5649037335456972, + "grad_norm": 0.5722168223029688, + "learning_rate": 5.953041875316215e-07, + "loss": 0.2677, + "step": 33406 + }, + { + "epoch": 1.5649505785356257, + "grad_norm": 0.58700848531659, + "learning_rate": 5.951813527854048e-07, + "loss": 0.2705, + "step": 33407 + }, + { + "epoch": 1.564997423525554, + "grad_norm": 0.6344326453270136, + "learning_rate": 5.950585290009409e-07, + "loss": 0.2857, + "step": 33408 + }, + { + "epoch": 1.5650442685154822, + "grad_norm": 0.5521266509532717, + "learning_rate": 5.949357161789362e-07, + "loss": 0.2538, + "step": 33409 + }, + { + "epoch": 1.5650911135054106, + "grad_norm": 0.6448896629242473, + "learning_rate": 5.948129143200985e-07, + "loss": 0.2927, + "step": 33410 + }, + { + "epoch": 1.5651379584953389, + "grad_norm": 0.6255049643626639, + "learning_rate": 5.946901234251334e-07, + "loss": 0.2805, + "step": 33411 + }, + { + "epoch": 1.5651848034852671, + "grad_norm": 0.5966088553765562, + "learning_rate": 5.945673434947474e-07, + "loss": 0.2753, + "step": 33412 + }, + { + "epoch": 1.5652316484751956, + "grad_norm": 0.6010160194383174, + "learning_rate": 5.944445745296474e-07, + "loss": 0.2694, + "step": 33413 + }, + { + "epoch": 1.565278493465124, + "grad_norm": 0.5836811355171342, + "learning_rate": 5.943218165305395e-07, + "loss": 0.2589, + "step": 33414 + }, + { + "epoch": 1.5653253384550523, + "grad_norm": 0.6001849413256152, + "learning_rate": 5.941990694981308e-07, + "loss": 0.2545, + "step": 33415 + }, + { + "epoch": 1.5653721834449805, + "grad_norm": 0.6343047282726558, + "learning_rate": 5.940763334331276e-07, + "loss": 0.2778, + "step": 33416 + }, + { + "epoch": 1.565419028434909, + "grad_norm": 0.6539577665933484, + "learning_rate": 5.939536083362365e-07, + "loss": 0.2753, + "step": 33417 + }, + { + "epoch": 1.5654658734248372, + "grad_norm": 0.5797558268942167, + "learning_rate": 5.938308942081636e-07, + "loss": 0.2631, + "step": 33418 + }, + { + "epoch": 1.5655127184147655, + "grad_norm": 0.5704532743810704, + "learning_rate": 5.93708191049614e-07, + "loss": 0.2707, + "step": 33419 + }, + { + "epoch": 1.565559563404694, + "grad_norm": 0.5802703114512285, + "learning_rate": 5.935854988612946e-07, + "loss": 0.2755, + "step": 33420 + }, + { + "epoch": 1.5656064083946222, + "grad_norm": 0.5830109284602373, + "learning_rate": 5.934628176439114e-07, + "loss": 0.273, + "step": 33421 + }, + { + "epoch": 1.5656532533845504, + "grad_norm": 0.6529892145964659, + "learning_rate": 5.933401473981706e-07, + "loss": 0.2859, + "step": 33422 + }, + { + "epoch": 1.5657000983744789, + "grad_norm": 0.6807041071559136, + "learning_rate": 5.932174881247782e-07, + "loss": 0.2802, + "step": 33423 + }, + { + "epoch": 1.5657469433644073, + "grad_norm": 0.5818346749559161, + "learning_rate": 5.930948398244405e-07, + "loss": 0.2692, + "step": 33424 + }, + { + "epoch": 1.5657937883543354, + "grad_norm": 0.6065834373639515, + "learning_rate": 5.929722024978626e-07, + "loss": 0.2787, + "step": 33425 + }, + { + "epoch": 1.5658406333442638, + "grad_norm": 0.5587561534181463, + "learning_rate": 5.928495761457498e-07, + "loss": 0.263, + "step": 33426 + }, + { + "epoch": 1.5658874783341923, + "grad_norm": 0.6363714182593366, + "learning_rate": 5.92726960768808e-07, + "loss": 0.2894, + "step": 33427 + }, + { + "epoch": 1.5659343233241205, + "grad_norm": 0.6174791339557041, + "learning_rate": 5.926043563677436e-07, + "loss": 0.2685, + "step": 33428 + }, + { + "epoch": 1.5659811683140488, + "grad_norm": 0.6169206927264874, + "learning_rate": 5.924817629432614e-07, + "loss": 0.2812, + "step": 33429 + }, + { + "epoch": 1.5660280133039772, + "grad_norm": 0.592827322262281, + "learning_rate": 5.923591804960682e-07, + "loss": 0.2746, + "step": 33430 + }, + { + "epoch": 1.5660748582939055, + "grad_norm": 0.6188489396788502, + "learning_rate": 5.922366090268675e-07, + "loss": 0.2696, + "step": 33431 + }, + { + "epoch": 1.5661217032838337, + "grad_norm": 0.6371853719402925, + "learning_rate": 5.921140485363666e-07, + "loss": 0.3071, + "step": 33432 + }, + { + "epoch": 1.5661685482737622, + "grad_norm": 0.5915510094740828, + "learning_rate": 5.919914990252687e-07, + "loss": 0.2745, + "step": 33433 + }, + { + "epoch": 1.5662153932636904, + "grad_norm": 0.6523694629160863, + "learning_rate": 5.918689604942806e-07, + "loss": 0.2967, + "step": 33434 + }, + { + "epoch": 1.5662622382536187, + "grad_norm": 0.5460918513430935, + "learning_rate": 5.917464329441067e-07, + "loss": 0.2682, + "step": 33435 + }, + { + "epoch": 1.5663090832435471, + "grad_norm": 0.6374075232053136, + "learning_rate": 5.916239163754534e-07, + "loss": 0.2827, + "step": 33436 + }, + { + "epoch": 1.5663559282334756, + "grad_norm": 0.6281436161512056, + "learning_rate": 5.915014107890241e-07, + "loss": 0.2748, + "step": 33437 + }, + { + "epoch": 1.5664027732234036, + "grad_norm": 0.584769382526658, + "learning_rate": 5.913789161855251e-07, + "loss": 0.2669, + "step": 33438 + }, + { + "epoch": 1.566449618213332, + "grad_norm": 0.5689462501316359, + "learning_rate": 5.9125643256566e-07, + "loss": 0.2646, + "step": 33439 + }, + { + "epoch": 1.5664964632032605, + "grad_norm": 0.641434028440374, + "learning_rate": 5.911339599301344e-07, + "loss": 0.2896, + "step": 33440 + }, + { + "epoch": 1.5665433081931888, + "grad_norm": 0.6257265362086807, + "learning_rate": 5.91011498279653e-07, + "loss": 0.2978, + "step": 33441 + }, + { + "epoch": 1.566590153183117, + "grad_norm": 0.6104864898844357, + "learning_rate": 5.908890476149215e-07, + "loss": 0.2672, + "step": 33442 + }, + { + "epoch": 1.5666369981730455, + "grad_norm": 0.5701655435535702, + "learning_rate": 5.907666079366431e-07, + "loss": 0.2505, + "step": 33443 + }, + { + "epoch": 1.5666838431629737, + "grad_norm": 0.5846630326754674, + "learning_rate": 5.906441792455228e-07, + "loss": 0.2603, + "step": 33444 + }, + { + "epoch": 1.566730688152902, + "grad_norm": 0.5708855360725896, + "learning_rate": 5.905217615422659e-07, + "loss": 0.2471, + "step": 33445 + }, + { + "epoch": 1.5667775331428304, + "grad_norm": 0.604261488147762, + "learning_rate": 5.90399354827576e-07, + "loss": 0.2853, + "step": 33446 + }, + { + "epoch": 1.5668243781327587, + "grad_norm": 0.5674709533599822, + "learning_rate": 5.902769591021576e-07, + "loss": 0.2417, + "step": 33447 + }, + { + "epoch": 1.566871223122687, + "grad_norm": 0.6633300370241195, + "learning_rate": 5.901545743667162e-07, + "loss": 0.2837, + "step": 33448 + }, + { + "epoch": 1.5669180681126154, + "grad_norm": 0.5982303364380985, + "learning_rate": 5.900322006219541e-07, + "loss": 0.2735, + "step": 33449 + }, + { + "epoch": 1.5669649131025438, + "grad_norm": 0.6291672982114588, + "learning_rate": 5.899098378685772e-07, + "loss": 0.2798, + "step": 33450 + }, + { + "epoch": 1.567011758092472, + "grad_norm": 0.5666679831789094, + "learning_rate": 5.897874861072886e-07, + "loss": 0.2625, + "step": 33451 + }, + { + "epoch": 1.5670586030824003, + "grad_norm": 0.6134668594565151, + "learning_rate": 5.896651453387938e-07, + "loss": 0.2728, + "step": 33452 + }, + { + "epoch": 1.5671054480723288, + "grad_norm": 0.608082242576667, + "learning_rate": 5.895428155637953e-07, + "loss": 0.276, + "step": 33453 + }, + { + "epoch": 1.567152293062257, + "grad_norm": 0.5812613520548795, + "learning_rate": 5.894204967829984e-07, + "loss": 0.2689, + "step": 33454 + }, + { + "epoch": 1.5671991380521852, + "grad_norm": 0.6073046065978395, + "learning_rate": 5.892981889971056e-07, + "loss": 0.2671, + "step": 33455 + }, + { + "epoch": 1.5672459830421137, + "grad_norm": 0.6029996198265783, + "learning_rate": 5.891758922068216e-07, + "loss": 0.2754, + "step": 33456 + }, + { + "epoch": 1.567292828032042, + "grad_norm": 0.5954708430209666, + "learning_rate": 5.8905360641285e-07, + "loss": 0.2577, + "step": 33457 + }, + { + "epoch": 1.5673396730219702, + "grad_norm": 0.5932572000797095, + "learning_rate": 5.889313316158945e-07, + "loss": 0.2486, + "step": 33458 + }, + { + "epoch": 1.5673865180118987, + "grad_norm": 0.6092187465239118, + "learning_rate": 5.888090678166597e-07, + "loss": 0.2751, + "step": 33459 + }, + { + "epoch": 1.5674333630018271, + "grad_norm": 0.5970560821957357, + "learning_rate": 5.886868150158481e-07, + "loss": 0.271, + "step": 33460 + }, + { + "epoch": 1.5674802079917551, + "grad_norm": 0.6617195847155685, + "learning_rate": 5.885645732141632e-07, + "loss": 0.2917, + "step": 33461 + }, + { + "epoch": 1.5675270529816836, + "grad_norm": 0.5672546294196165, + "learning_rate": 5.884423424123084e-07, + "loss": 0.271, + "step": 33462 + }, + { + "epoch": 1.567573897971612, + "grad_norm": 0.5954771016796717, + "learning_rate": 5.883201226109877e-07, + "loss": 0.2646, + "step": 33463 + }, + { + "epoch": 1.5676207429615403, + "grad_norm": 0.6595049595495482, + "learning_rate": 5.88197913810904e-07, + "loss": 0.288, + "step": 33464 + }, + { + "epoch": 1.5676675879514685, + "grad_norm": 0.5735695569926376, + "learning_rate": 5.880757160127609e-07, + "loss": 0.2697, + "step": 33465 + }, + { + "epoch": 1.567714432941397, + "grad_norm": 0.6739037093167459, + "learning_rate": 5.879535292172623e-07, + "loss": 0.2816, + "step": 33466 + }, + { + "epoch": 1.5677612779313252, + "grad_norm": 0.5699780339052631, + "learning_rate": 5.878313534251104e-07, + "loss": 0.2548, + "step": 33467 + }, + { + "epoch": 1.5678081229212535, + "grad_norm": 0.6208766298228285, + "learning_rate": 5.877091886370078e-07, + "loss": 0.2777, + "step": 33468 + }, + { + "epoch": 1.567854967911182, + "grad_norm": 0.6182949271756933, + "learning_rate": 5.875870348536583e-07, + "loss": 0.2719, + "step": 33469 + }, + { + "epoch": 1.5679018129011102, + "grad_norm": 0.6123048285453712, + "learning_rate": 5.874648920757648e-07, + "loss": 0.2802, + "step": 33470 + }, + { + "epoch": 1.5679486578910384, + "grad_norm": 0.5700462982149901, + "learning_rate": 5.8734276030403e-07, + "loss": 0.2642, + "step": 33471 + }, + { + "epoch": 1.567995502880967, + "grad_norm": 0.6352579885005645, + "learning_rate": 5.872206395391575e-07, + "loss": 0.2665, + "step": 33472 + }, + { + "epoch": 1.5680423478708954, + "grad_norm": 0.6370050451814708, + "learning_rate": 5.870985297818488e-07, + "loss": 0.262, + "step": 33473 + }, + { + "epoch": 1.5680891928608234, + "grad_norm": 0.6209835304733772, + "learning_rate": 5.86976431032808e-07, + "loss": 0.2796, + "step": 33474 + }, + { + "epoch": 1.5681360378507518, + "grad_norm": 0.6239376799127657, + "learning_rate": 5.868543432927365e-07, + "loss": 0.2742, + "step": 33475 + }, + { + "epoch": 1.5681828828406803, + "grad_norm": 0.5641358353692824, + "learning_rate": 5.867322665623371e-07, + "loss": 0.2583, + "step": 33476 + }, + { + "epoch": 1.5682297278306085, + "grad_norm": 0.5997916973995487, + "learning_rate": 5.866102008423127e-07, + "loss": 0.2696, + "step": 33477 + }, + { + "epoch": 1.5682765728205368, + "grad_norm": 0.6548760021257423, + "learning_rate": 5.864881461333666e-07, + "loss": 0.2785, + "step": 33478 + }, + { + "epoch": 1.5683234178104652, + "grad_norm": 0.6001583110730608, + "learning_rate": 5.863661024361994e-07, + "loss": 0.2773, + "step": 33479 + }, + { + "epoch": 1.5683702628003935, + "grad_norm": 0.5912723227539711, + "learning_rate": 5.862440697515143e-07, + "loss": 0.264, + "step": 33480 + }, + { + "epoch": 1.5684171077903217, + "grad_norm": 0.5770206424088226, + "learning_rate": 5.861220480800145e-07, + "loss": 0.2587, + "step": 33481 + }, + { + "epoch": 1.5684639527802502, + "grad_norm": 0.6015708311791642, + "learning_rate": 5.860000374224004e-07, + "loss": 0.2643, + "step": 33482 + }, + { + "epoch": 1.5685107977701784, + "grad_norm": 0.6161789029031101, + "learning_rate": 5.85878037779375e-07, + "loss": 0.2899, + "step": 33483 + }, + { + "epoch": 1.5685576427601067, + "grad_norm": 0.6531220819114579, + "learning_rate": 5.857560491516404e-07, + "loss": 0.2737, + "step": 33484 + }, + { + "epoch": 1.5686044877500351, + "grad_norm": 0.6208934885866594, + "learning_rate": 5.856340715398992e-07, + "loss": 0.2728, + "step": 33485 + }, + { + "epoch": 1.5686513327399636, + "grad_norm": 0.6328264372567203, + "learning_rate": 5.855121049448522e-07, + "loss": 0.3027, + "step": 33486 + }, + { + "epoch": 1.5686981777298918, + "grad_norm": 0.636669312270371, + "learning_rate": 5.853901493672026e-07, + "loss": 0.2846, + "step": 33487 + }, + { + "epoch": 1.56874502271982, + "grad_norm": 0.6548069910507012, + "learning_rate": 5.852682048076508e-07, + "loss": 0.2905, + "step": 33488 + }, + { + "epoch": 1.5687918677097485, + "grad_norm": 0.6281476991370879, + "learning_rate": 5.85146271266899e-07, + "loss": 0.2803, + "step": 33489 + }, + { + "epoch": 1.5688387126996768, + "grad_norm": 0.597181181257627, + "learning_rate": 5.850243487456492e-07, + "loss": 0.2656, + "step": 33490 + }, + { + "epoch": 1.568885557689605, + "grad_norm": 0.6425107602730162, + "learning_rate": 5.849024372446038e-07, + "loss": 0.2882, + "step": 33491 + }, + { + "epoch": 1.5689324026795335, + "grad_norm": 0.6418865069130754, + "learning_rate": 5.847805367644627e-07, + "loss": 0.2721, + "step": 33492 + }, + { + "epoch": 1.5689792476694617, + "grad_norm": 0.5719229075178398, + "learning_rate": 5.846586473059282e-07, + "loss": 0.2663, + "step": 33493 + }, + { + "epoch": 1.56902609265939, + "grad_norm": 0.6231851359099786, + "learning_rate": 5.845367688697027e-07, + "loss": 0.2854, + "step": 33494 + }, + { + "epoch": 1.5690729376493184, + "grad_norm": 0.5742454065618924, + "learning_rate": 5.84414901456486e-07, + "loss": 0.2618, + "step": 33495 + }, + { + "epoch": 1.569119782639247, + "grad_norm": 0.6403664666972535, + "learning_rate": 5.842930450669798e-07, + "loss": 0.2907, + "step": 33496 + }, + { + "epoch": 1.569166627629175, + "grad_norm": 0.6257715862414364, + "learning_rate": 5.841711997018864e-07, + "loss": 0.2626, + "step": 33497 + }, + { + "epoch": 1.5692134726191034, + "grad_norm": 0.5809648176826591, + "learning_rate": 5.840493653619056e-07, + "loss": 0.2725, + "step": 33498 + }, + { + "epoch": 1.5692603176090318, + "grad_norm": 0.5701036500281598, + "learning_rate": 5.839275420477392e-07, + "loss": 0.2619, + "step": 33499 + }, + { + "epoch": 1.56930716259896, + "grad_norm": 0.5943942871677697, + "learning_rate": 5.838057297600879e-07, + "loss": 0.276, + "step": 33500 + }, + { + "epoch": 1.5693540075888883, + "grad_norm": 0.6139533111220272, + "learning_rate": 5.83683928499654e-07, + "loss": 0.2857, + "step": 33501 + }, + { + "epoch": 1.5694008525788168, + "grad_norm": 0.6072829663181271, + "learning_rate": 5.835621382671366e-07, + "loss": 0.2596, + "step": 33502 + }, + { + "epoch": 1.569447697568745, + "grad_norm": 0.6321965539681039, + "learning_rate": 5.834403590632382e-07, + "loss": 0.2818, + "step": 33503 + }, + { + "epoch": 1.5694945425586733, + "grad_norm": 0.6011690682334161, + "learning_rate": 5.833185908886582e-07, + "loss": 0.2819, + "step": 33504 + }, + { + "epoch": 1.5695413875486017, + "grad_norm": 0.6225935609887884, + "learning_rate": 5.831968337440979e-07, + "loss": 0.258, + "step": 33505 + }, + { + "epoch": 1.56958823253853, + "grad_norm": 0.583964314616602, + "learning_rate": 5.83075087630258e-07, + "loss": 0.2776, + "step": 33506 + }, + { + "epoch": 1.5696350775284582, + "grad_norm": 0.5694226836995784, + "learning_rate": 5.829533525478393e-07, + "loss": 0.2517, + "step": 33507 + }, + { + "epoch": 1.5696819225183867, + "grad_norm": 0.678912755119447, + "learning_rate": 5.828316284975427e-07, + "loss": 0.2939, + "step": 33508 + }, + { + "epoch": 1.5697287675083151, + "grad_norm": 0.5667354484913069, + "learning_rate": 5.827099154800683e-07, + "loss": 0.2512, + "step": 33509 + }, + { + "epoch": 1.5697756124982432, + "grad_norm": 0.5802916308392798, + "learning_rate": 5.825882134961158e-07, + "loss": 0.2612, + "step": 33510 + }, + { + "epoch": 1.5698224574881716, + "grad_norm": 0.602700696320932, + "learning_rate": 5.824665225463863e-07, + "loss": 0.2693, + "step": 33511 + }, + { + "epoch": 1.5698693024781, + "grad_norm": 0.6174554132605565, + "learning_rate": 5.823448426315798e-07, + "loss": 0.2639, + "step": 33512 + }, + { + "epoch": 1.5699161474680283, + "grad_norm": 0.5965826404818985, + "learning_rate": 5.82223173752397e-07, + "loss": 0.2831, + "step": 33513 + }, + { + "epoch": 1.5699629924579566, + "grad_norm": 0.5645082014711037, + "learning_rate": 5.821015159095375e-07, + "loss": 0.2625, + "step": 33514 + }, + { + "epoch": 1.570009837447885, + "grad_norm": 0.6386682735877903, + "learning_rate": 5.819798691037026e-07, + "loss": 0.2814, + "step": 33515 + }, + { + "epoch": 1.5700566824378133, + "grad_norm": 0.5885563059581407, + "learning_rate": 5.818582333355916e-07, + "loss": 0.2582, + "step": 33516 + }, + { + "epoch": 1.5701035274277415, + "grad_norm": 0.6170582991796869, + "learning_rate": 5.817366086059034e-07, + "loss": 0.273, + "step": 33517 + }, + { + "epoch": 1.57015037241767, + "grad_norm": 0.5865288297191038, + "learning_rate": 5.816149949153391e-07, + "loss": 0.2644, + "step": 33518 + }, + { + "epoch": 1.5701972174075982, + "grad_norm": 0.5413872256891016, + "learning_rate": 5.814933922645982e-07, + "loss": 0.2499, + "step": 33519 + }, + { + "epoch": 1.5702440623975265, + "grad_norm": 0.5338249840312302, + "learning_rate": 5.813718006543806e-07, + "loss": 0.2517, + "step": 33520 + }, + { + "epoch": 1.570290907387455, + "grad_norm": 0.6192369572182563, + "learning_rate": 5.812502200853871e-07, + "loss": 0.2765, + "step": 33521 + }, + { + "epoch": 1.5703377523773834, + "grad_norm": 0.5830033593970998, + "learning_rate": 5.811286505583152e-07, + "loss": 0.2644, + "step": 33522 + }, + { + "epoch": 1.5703845973673116, + "grad_norm": 0.604566968667846, + "learning_rate": 5.810070920738664e-07, + "loss": 0.2852, + "step": 33523 + }, + { + "epoch": 1.5704314423572399, + "grad_norm": 0.6348228612039112, + "learning_rate": 5.808855446327391e-07, + "loss": 0.2967, + "step": 33524 + }, + { + "epoch": 1.5704782873471683, + "grad_norm": 0.6064579716114147, + "learning_rate": 5.807640082356328e-07, + "loss": 0.2772, + "step": 33525 + }, + { + "epoch": 1.5705251323370966, + "grad_norm": 0.5678386468744304, + "learning_rate": 5.806424828832475e-07, + "loss": 0.2664, + "step": 33526 + }, + { + "epoch": 1.5705719773270248, + "grad_norm": 0.5994655279523466, + "learning_rate": 5.805209685762828e-07, + "loss": 0.2673, + "step": 33527 + }, + { + "epoch": 1.5706188223169533, + "grad_norm": 0.6420889059519567, + "learning_rate": 5.803994653154366e-07, + "loss": 0.2873, + "step": 33528 + }, + { + "epoch": 1.5706656673068815, + "grad_norm": 0.6503204311035072, + "learning_rate": 5.8027797310141e-07, + "loss": 0.2721, + "step": 33529 + }, + { + "epoch": 1.5707125122968097, + "grad_norm": 0.6183224920043138, + "learning_rate": 5.801564919349003e-07, + "loss": 0.2791, + "step": 33530 + }, + { + "epoch": 1.5707593572867382, + "grad_norm": 0.6087017284323799, + "learning_rate": 5.800350218166076e-07, + "loss": 0.2806, + "step": 33531 + }, + { + "epoch": 1.5708062022766667, + "grad_norm": 0.6074751308960238, + "learning_rate": 5.799135627472308e-07, + "loss": 0.2648, + "step": 33532 + }, + { + "epoch": 1.5708530472665947, + "grad_norm": 0.6384219586309253, + "learning_rate": 5.797921147274696e-07, + "loss": 0.2856, + "step": 33533 + }, + { + "epoch": 1.5708998922565232, + "grad_norm": 0.6245149257867115, + "learning_rate": 5.796706777580213e-07, + "loss": 0.2828, + "step": 33534 + }, + { + "epoch": 1.5709467372464516, + "grad_norm": 0.5986160649669445, + "learning_rate": 5.795492518395857e-07, + "loss": 0.2741, + "step": 33535 + }, + { + "epoch": 1.5709935822363799, + "grad_norm": 0.5751588220028366, + "learning_rate": 5.79427836972862e-07, + "loss": 0.261, + "step": 33536 + }, + { + "epoch": 1.571040427226308, + "grad_norm": 0.6084569050615819, + "learning_rate": 5.793064331585477e-07, + "loss": 0.2781, + "step": 33537 + }, + { + "epoch": 1.5710872722162366, + "grad_norm": 0.5988158406738356, + "learning_rate": 5.791850403973422e-07, + "loss": 0.2646, + "step": 33538 + }, + { + "epoch": 1.5711341172061648, + "grad_norm": 0.6134539934944334, + "learning_rate": 5.790636586899448e-07, + "loss": 0.2716, + "step": 33539 + }, + { + "epoch": 1.571180962196093, + "grad_norm": 0.6682855868847752, + "learning_rate": 5.789422880370524e-07, + "loss": 0.2877, + "step": 33540 + }, + { + "epoch": 1.5712278071860215, + "grad_norm": 0.5907542464962157, + "learning_rate": 5.788209284393642e-07, + "loss": 0.2759, + "step": 33541 + }, + { + "epoch": 1.5712746521759497, + "grad_norm": 0.5973471927195353, + "learning_rate": 5.786995798975789e-07, + "loss": 0.2746, + "step": 33542 + }, + { + "epoch": 1.571321497165878, + "grad_norm": 0.5967268151212383, + "learning_rate": 5.785782424123954e-07, + "loss": 0.2542, + "step": 33543 + }, + { + "epoch": 1.5713683421558065, + "grad_norm": 0.5427678547454717, + "learning_rate": 5.784569159845102e-07, + "loss": 0.2556, + "step": 33544 + }, + { + "epoch": 1.571415187145735, + "grad_norm": 0.5944201867905118, + "learning_rate": 5.783356006146234e-07, + "loss": 0.273, + "step": 33545 + }, + { + "epoch": 1.571462032135663, + "grad_norm": 0.5822563865979109, + "learning_rate": 5.782142963034316e-07, + "loss": 0.2611, + "step": 33546 + }, + { + "epoch": 1.5715088771255914, + "grad_norm": 0.628909336568835, + "learning_rate": 5.780930030516333e-07, + "loss": 0.2786, + "step": 33547 + }, + { + "epoch": 1.5715557221155199, + "grad_norm": 0.6132879961799006, + "learning_rate": 5.779717208599267e-07, + "loss": 0.273, + "step": 33548 + }, + { + "epoch": 1.571602567105448, + "grad_norm": 0.5896216458406899, + "learning_rate": 5.778504497290096e-07, + "loss": 0.266, + "step": 33549 + }, + { + "epoch": 1.5716494120953763, + "grad_norm": 0.590960027050907, + "learning_rate": 5.777291896595811e-07, + "loss": 0.2644, + "step": 33550 + }, + { + "epoch": 1.5716962570853048, + "grad_norm": 0.6537677988717736, + "learning_rate": 5.77607940652338e-07, + "loss": 0.2807, + "step": 33551 + }, + { + "epoch": 1.571743102075233, + "grad_norm": 0.5924547467967922, + "learning_rate": 5.774867027079769e-07, + "loss": 0.266, + "step": 33552 + }, + { + "epoch": 1.5717899470651613, + "grad_norm": 0.5940887688187295, + "learning_rate": 5.773654758271971e-07, + "loss": 0.2809, + "step": 33553 + }, + { + "epoch": 1.5718367920550897, + "grad_norm": 0.5878012993118511, + "learning_rate": 5.772442600106954e-07, + "loss": 0.2714, + "step": 33554 + }, + { + "epoch": 1.571883637045018, + "grad_norm": 0.6306744452181355, + "learning_rate": 5.7712305525917e-07, + "loss": 0.2692, + "step": 33555 + }, + { + "epoch": 1.5719304820349462, + "grad_norm": 0.5674407044640128, + "learning_rate": 5.770018615733178e-07, + "loss": 0.2647, + "step": 33556 + }, + { + "epoch": 1.5719773270248747, + "grad_norm": 0.5909095950188361, + "learning_rate": 5.768806789538375e-07, + "loss": 0.2673, + "step": 33557 + }, + { + "epoch": 1.5720241720148032, + "grad_norm": 0.6017575766487966, + "learning_rate": 5.767595074014254e-07, + "loss": 0.265, + "step": 33558 + }, + { + "epoch": 1.5720710170047314, + "grad_norm": 0.6081430637371689, + "learning_rate": 5.766383469167783e-07, + "loss": 0.2842, + "step": 33559 + }, + { + "epoch": 1.5721178619946596, + "grad_norm": 0.5672067420377886, + "learning_rate": 5.765171975005943e-07, + "loss": 0.2678, + "step": 33560 + }, + { + "epoch": 1.572164706984588, + "grad_norm": 0.5671067975759552, + "learning_rate": 5.7639605915357e-07, + "loss": 0.2699, + "step": 33561 + }, + { + "epoch": 1.5722115519745163, + "grad_norm": 0.5625740489548641, + "learning_rate": 5.762749318764033e-07, + "loss": 0.2515, + "step": 33562 + }, + { + "epoch": 1.5722583969644446, + "grad_norm": 0.593315139144813, + "learning_rate": 5.761538156697904e-07, + "loss": 0.2787, + "step": 33563 + }, + { + "epoch": 1.572305241954373, + "grad_norm": 0.6242489876602568, + "learning_rate": 5.760327105344299e-07, + "loss": 0.2722, + "step": 33564 + }, + { + "epoch": 1.5723520869443013, + "grad_norm": 0.5833565531821227, + "learning_rate": 5.759116164710174e-07, + "loss": 0.2676, + "step": 33565 + }, + { + "epoch": 1.5723989319342295, + "grad_norm": 0.6895497439667837, + "learning_rate": 5.757905334802491e-07, + "loss": 0.2868, + "step": 33566 + }, + { + "epoch": 1.572445776924158, + "grad_norm": 0.616794568722109, + "learning_rate": 5.756694615628228e-07, + "loss": 0.2848, + "step": 33567 + }, + { + "epoch": 1.5724926219140865, + "grad_norm": 0.5994324365756121, + "learning_rate": 5.755484007194351e-07, + "loss": 0.2706, + "step": 33568 + }, + { + "epoch": 1.5725394669040145, + "grad_norm": 0.6529821005851304, + "learning_rate": 5.754273509507827e-07, + "loss": 0.2895, + "step": 33569 + }, + { + "epoch": 1.572586311893943, + "grad_norm": 0.6214900967959017, + "learning_rate": 5.753063122575628e-07, + "loss": 0.2791, + "step": 33570 + }, + { + "epoch": 1.5726331568838714, + "grad_norm": 0.6972837686281997, + "learning_rate": 5.751852846404704e-07, + "loss": 0.2656, + "step": 33571 + }, + { + "epoch": 1.5726800018737996, + "grad_norm": 0.6248008394313771, + "learning_rate": 5.750642681002039e-07, + "loss": 0.2685, + "step": 33572 + }, + { + "epoch": 1.5727268468637279, + "grad_norm": 0.5948508678267729, + "learning_rate": 5.74943262637458e-07, + "loss": 0.2643, + "step": 33573 + }, + { + "epoch": 1.5727736918536563, + "grad_norm": 0.6567382114928253, + "learning_rate": 5.748222682529297e-07, + "loss": 0.2667, + "step": 33574 + }, + { + "epoch": 1.5728205368435846, + "grad_norm": 0.6134512730406142, + "learning_rate": 5.747012849473155e-07, + "loss": 0.2676, + "step": 33575 + }, + { + "epoch": 1.5728673818335128, + "grad_norm": 0.577076408877641, + "learning_rate": 5.74580312721312e-07, + "loss": 0.2548, + "step": 33576 + }, + { + "epoch": 1.5729142268234413, + "grad_norm": 0.6069954766549074, + "learning_rate": 5.744593515756142e-07, + "loss": 0.2753, + "step": 33577 + }, + { + "epoch": 1.5729610718133695, + "grad_norm": 0.5727994354323402, + "learning_rate": 5.743384015109196e-07, + "loss": 0.2526, + "step": 33578 + }, + { + "epoch": 1.5730079168032978, + "grad_norm": 0.603847850327296, + "learning_rate": 5.742174625279229e-07, + "loss": 0.2614, + "step": 33579 + }, + { + "epoch": 1.5730547617932262, + "grad_norm": 0.6212828971946335, + "learning_rate": 5.740965346273206e-07, + "loss": 0.2853, + "step": 33580 + }, + { + "epoch": 1.5731016067831547, + "grad_norm": 0.648359868652038, + "learning_rate": 5.739756178098085e-07, + "loss": 0.2835, + "step": 33581 + }, + { + "epoch": 1.5731484517730827, + "grad_norm": 0.5901519487241846, + "learning_rate": 5.738547120760837e-07, + "loss": 0.2577, + "step": 33582 + }, + { + "epoch": 1.5731952967630112, + "grad_norm": 0.6005592443827127, + "learning_rate": 5.7373381742684e-07, + "loss": 0.2661, + "step": 33583 + }, + { + "epoch": 1.5732421417529396, + "grad_norm": 0.6278186206571259, + "learning_rate": 5.736129338627741e-07, + "loss": 0.303, + "step": 33584 + }, + { + "epoch": 1.5732889867428679, + "grad_norm": 0.6212200578679855, + "learning_rate": 5.734920613845821e-07, + "loss": 0.2568, + "step": 33585 + }, + { + "epoch": 1.5733358317327961, + "grad_norm": 0.592748347870144, + "learning_rate": 5.733711999929586e-07, + "loss": 0.2643, + "step": 33586 + }, + { + "epoch": 1.5733826767227246, + "grad_norm": 0.6003629748609097, + "learning_rate": 5.732503496885994e-07, + "loss": 0.2654, + "step": 33587 + }, + { + "epoch": 1.5734295217126528, + "grad_norm": 0.5830305985995494, + "learning_rate": 5.731295104722009e-07, + "loss": 0.25, + "step": 33588 + }, + { + "epoch": 1.573476366702581, + "grad_norm": 0.5658909045619264, + "learning_rate": 5.730086823444572e-07, + "loss": 0.2625, + "step": 33589 + }, + { + "epoch": 1.5735232116925095, + "grad_norm": 0.5824341788450458, + "learning_rate": 5.728878653060643e-07, + "loss": 0.28, + "step": 33590 + }, + { + "epoch": 1.5735700566824378, + "grad_norm": 0.5999900127560646, + "learning_rate": 5.727670593577172e-07, + "loss": 0.2863, + "step": 33591 + }, + { + "epoch": 1.573616901672366, + "grad_norm": 0.5970224746198064, + "learning_rate": 5.726462645001121e-07, + "loss": 0.2682, + "step": 33592 + }, + { + "epoch": 1.5736637466622945, + "grad_norm": 0.6016124379216152, + "learning_rate": 5.725254807339425e-07, + "loss": 0.2785, + "step": 33593 + }, + { + "epoch": 1.573710591652223, + "grad_norm": 0.6122888796073674, + "learning_rate": 5.724047080599052e-07, + "loss": 0.259, + "step": 33594 + }, + { + "epoch": 1.5737574366421512, + "grad_norm": 0.6341175397190838, + "learning_rate": 5.722839464786934e-07, + "loss": 0.272, + "step": 33595 + }, + { + "epoch": 1.5738042816320794, + "grad_norm": 0.6099873711475057, + "learning_rate": 5.721631959910029e-07, + "loss": 0.2784, + "step": 33596 + }, + { + "epoch": 1.5738511266220079, + "grad_norm": 0.6188562093136896, + "learning_rate": 5.720424565975289e-07, + "loss": 0.2821, + "step": 33597 + }, + { + "epoch": 1.5738979716119361, + "grad_norm": 0.5872160387974403, + "learning_rate": 5.719217282989659e-07, + "loss": 0.2753, + "step": 33598 + }, + { + "epoch": 1.5739448166018644, + "grad_norm": 0.5923197147673075, + "learning_rate": 5.718010110960093e-07, + "loss": 0.2826, + "step": 33599 + }, + { + "epoch": 1.5739916615917928, + "grad_norm": 0.6001798286593079, + "learning_rate": 5.716803049893535e-07, + "loss": 0.2576, + "step": 33600 + }, + { + "epoch": 1.574038506581721, + "grad_norm": 0.6166922899712626, + "learning_rate": 5.715596099796922e-07, + "loss": 0.2624, + "step": 33601 + }, + { + "epoch": 1.5740853515716493, + "grad_norm": 0.5629627253428036, + "learning_rate": 5.714389260677203e-07, + "loss": 0.2575, + "step": 33602 + }, + { + "epoch": 1.5741321965615778, + "grad_norm": 0.5969998904190459, + "learning_rate": 5.713182532541328e-07, + "loss": 0.267, + "step": 33603 + }, + { + "epoch": 1.5741790415515062, + "grad_norm": 0.6269916940626116, + "learning_rate": 5.711975915396242e-07, + "loss": 0.2921, + "step": 33604 + }, + { + "epoch": 1.5742258865414342, + "grad_norm": 0.6057076694977324, + "learning_rate": 5.710769409248887e-07, + "loss": 0.2726, + "step": 33605 + }, + { + "epoch": 1.5742727315313627, + "grad_norm": 0.6100077097778925, + "learning_rate": 5.709563014106209e-07, + "loss": 0.2818, + "step": 33606 + }, + { + "epoch": 1.5743195765212912, + "grad_norm": 0.5861444769076568, + "learning_rate": 5.70835672997515e-07, + "loss": 0.2714, + "step": 33607 + }, + { + "epoch": 1.5743664215112194, + "grad_norm": 0.5733366422753381, + "learning_rate": 5.707150556862643e-07, + "loss": 0.2601, + "step": 33608 + }, + { + "epoch": 1.5744132665011477, + "grad_norm": 0.6157038242183903, + "learning_rate": 5.705944494775634e-07, + "loss": 0.2829, + "step": 33609 + }, + { + "epoch": 1.5744601114910761, + "grad_norm": 0.6193428656463345, + "learning_rate": 5.704738543721067e-07, + "loss": 0.2811, + "step": 33610 + }, + { + "epoch": 1.5745069564810044, + "grad_norm": 0.5703644445683106, + "learning_rate": 5.703532703705878e-07, + "loss": 0.262, + "step": 33611 + }, + { + "epoch": 1.5745538014709326, + "grad_norm": 0.564806506874375, + "learning_rate": 5.702326974737016e-07, + "loss": 0.2543, + "step": 33612 + }, + { + "epoch": 1.574600646460861, + "grad_norm": 0.5758282651075801, + "learning_rate": 5.701121356821404e-07, + "loss": 0.267, + "step": 33613 + }, + { + "epoch": 1.5746474914507893, + "grad_norm": 0.5642092129864444, + "learning_rate": 5.699915849965995e-07, + "loss": 0.2666, + "step": 33614 + }, + { + "epoch": 1.5746943364407175, + "grad_norm": 0.6484752055146454, + "learning_rate": 5.698710454177714e-07, + "loss": 0.2817, + "step": 33615 + }, + { + "epoch": 1.574741181430646, + "grad_norm": 0.6078377224333649, + "learning_rate": 5.697505169463502e-07, + "loss": 0.2628, + "step": 33616 + }, + { + "epoch": 1.5747880264205745, + "grad_norm": 0.6220953683288922, + "learning_rate": 5.696299995830296e-07, + "loss": 0.2835, + "step": 33617 + }, + { + "epoch": 1.5748348714105025, + "grad_norm": 0.6545027648719539, + "learning_rate": 5.695094933285039e-07, + "loss": 0.2821, + "step": 33618 + }, + { + "epoch": 1.574881716400431, + "grad_norm": 0.582157800734237, + "learning_rate": 5.693889981834652e-07, + "loss": 0.2493, + "step": 33619 + }, + { + "epoch": 1.5749285613903594, + "grad_norm": 0.6295997311565255, + "learning_rate": 5.692685141486076e-07, + "loss": 0.2884, + "step": 33620 + }, + { + "epoch": 1.5749754063802877, + "grad_norm": 0.6341087171339284, + "learning_rate": 5.691480412246251e-07, + "loss": 0.2727, + "step": 33621 + }, + { + "epoch": 1.575022251370216, + "grad_norm": 0.6049903911284881, + "learning_rate": 5.690275794122096e-07, + "loss": 0.2788, + "step": 33622 + }, + { + "epoch": 1.5750690963601444, + "grad_norm": 0.6128710845124018, + "learning_rate": 5.689071287120552e-07, + "loss": 0.2666, + "step": 33623 + }, + { + "epoch": 1.5751159413500726, + "grad_norm": 0.5375826556362654, + "learning_rate": 5.687866891248547e-07, + "loss": 0.246, + "step": 33624 + }, + { + "epoch": 1.5751627863400008, + "grad_norm": 0.6249191038108208, + "learning_rate": 5.686662606513021e-07, + "loss": 0.2788, + "step": 33625 + }, + { + "epoch": 1.5752096313299293, + "grad_norm": 0.603696556856864, + "learning_rate": 5.685458432920893e-07, + "loss": 0.2762, + "step": 33626 + }, + { + "epoch": 1.5752564763198575, + "grad_norm": 0.5887394162239493, + "learning_rate": 5.684254370479101e-07, + "loss": 0.2699, + "step": 33627 + }, + { + "epoch": 1.5753033213097858, + "grad_norm": 0.6343665072838138, + "learning_rate": 5.683050419194566e-07, + "loss": 0.2817, + "step": 33628 + }, + { + "epoch": 1.5753501662997142, + "grad_norm": 0.6011009024702766, + "learning_rate": 5.681846579074218e-07, + "loss": 0.2624, + "step": 33629 + }, + { + "epoch": 1.5753970112896427, + "grad_norm": 0.5965131085424035, + "learning_rate": 5.68064285012499e-07, + "loss": 0.2678, + "step": 33630 + }, + { + "epoch": 1.575443856279571, + "grad_norm": 0.5997244297855896, + "learning_rate": 5.679439232353811e-07, + "loss": 0.2741, + "step": 33631 + }, + { + "epoch": 1.5754907012694992, + "grad_norm": 0.6079847263585353, + "learning_rate": 5.678235725767597e-07, + "loss": 0.2619, + "step": 33632 + }, + { + "epoch": 1.5755375462594277, + "grad_norm": 0.5970005914650386, + "learning_rate": 5.677032330373283e-07, + "loss": 0.277, + "step": 33633 + }, + { + "epoch": 1.575584391249356, + "grad_norm": 0.5846460991913353, + "learning_rate": 5.675829046177797e-07, + "loss": 0.2678, + "step": 33634 + }, + { + "epoch": 1.5756312362392841, + "grad_norm": 0.5977162862510975, + "learning_rate": 5.67462587318805e-07, + "loss": 0.2772, + "step": 33635 + }, + { + "epoch": 1.5756780812292126, + "grad_norm": 0.571380228406977, + "learning_rate": 5.673422811410973e-07, + "loss": 0.2662, + "step": 33636 + }, + { + "epoch": 1.5757249262191408, + "grad_norm": 0.6133884315164392, + "learning_rate": 5.672219860853498e-07, + "loss": 0.284, + "step": 33637 + }, + { + "epoch": 1.575771771209069, + "grad_norm": 0.5914196995812976, + "learning_rate": 5.671017021522532e-07, + "loss": 0.2731, + "step": 33638 + }, + { + "epoch": 1.5758186161989975, + "grad_norm": 0.5851419720077868, + "learning_rate": 5.669814293425007e-07, + "loss": 0.2627, + "step": 33639 + }, + { + "epoch": 1.575865461188926, + "grad_norm": 0.6439726094830288, + "learning_rate": 5.668611676567839e-07, + "loss": 0.2875, + "step": 33640 + }, + { + "epoch": 1.575912306178854, + "grad_norm": 0.6016410178513755, + "learning_rate": 5.667409170957964e-07, + "loss": 0.2646, + "step": 33641 + }, + { + "epoch": 1.5759591511687825, + "grad_norm": 0.6059925756469626, + "learning_rate": 5.666206776602279e-07, + "loss": 0.2684, + "step": 33642 + }, + { + "epoch": 1.576005996158711, + "grad_norm": 0.6165441790836934, + "learning_rate": 5.665004493507723e-07, + "loss": 0.2675, + "step": 33643 + }, + { + "epoch": 1.5760528411486392, + "grad_norm": 0.5941394885554407, + "learning_rate": 5.6638023216812e-07, + "loss": 0.2665, + "step": 33644 + }, + { + "epoch": 1.5760996861385674, + "grad_norm": 0.6176131737683535, + "learning_rate": 5.662600261129633e-07, + "loss": 0.2842, + "step": 33645 + }, + { + "epoch": 1.576146531128496, + "grad_norm": 0.5684853182061894, + "learning_rate": 5.661398311859942e-07, + "loss": 0.2513, + "step": 33646 + }, + { + "epoch": 1.5761933761184241, + "grad_norm": 0.605170439092976, + "learning_rate": 5.660196473879043e-07, + "loss": 0.2633, + "step": 33647 + }, + { + "epoch": 1.5762402211083524, + "grad_norm": 0.6021466764820325, + "learning_rate": 5.658994747193861e-07, + "loss": 0.282, + "step": 33648 + }, + { + "epoch": 1.5762870660982808, + "grad_norm": 0.5926389719554189, + "learning_rate": 5.657793131811301e-07, + "loss": 0.2765, + "step": 33649 + }, + { + "epoch": 1.576333911088209, + "grad_norm": 0.6291246128686104, + "learning_rate": 5.656591627738275e-07, + "loss": 0.2823, + "step": 33650 + }, + { + "epoch": 1.5763807560781373, + "grad_norm": 0.5516981170385796, + "learning_rate": 5.655390234981701e-07, + "loss": 0.2579, + "step": 33651 + }, + { + "epoch": 1.5764276010680658, + "grad_norm": 0.5831933034790476, + "learning_rate": 5.654188953548495e-07, + "loss": 0.2629, + "step": 33652 + }, + { + "epoch": 1.5764744460579942, + "grad_norm": 0.6504961748898517, + "learning_rate": 5.652987783445568e-07, + "loss": 0.2885, + "step": 33653 + }, + { + "epoch": 1.5765212910479223, + "grad_norm": 0.6271480018724368, + "learning_rate": 5.651786724679834e-07, + "loss": 0.2761, + "step": 33654 + }, + { + "epoch": 1.5765681360378507, + "grad_norm": 0.5685391067778088, + "learning_rate": 5.65058577725821e-07, + "loss": 0.2595, + "step": 33655 + }, + { + "epoch": 1.5766149810277792, + "grad_norm": 0.6225464639308168, + "learning_rate": 5.649384941187605e-07, + "loss": 0.2918, + "step": 33656 + }, + { + "epoch": 1.5766618260177074, + "grad_norm": 0.5806987198012609, + "learning_rate": 5.648184216474917e-07, + "loss": 0.2709, + "step": 33657 + }, + { + "epoch": 1.5767086710076357, + "grad_norm": 0.6091139371247644, + "learning_rate": 5.646983603127065e-07, + "loss": 0.2816, + "step": 33658 + }, + { + "epoch": 1.5767555159975641, + "grad_norm": 0.5543621407822017, + "learning_rate": 5.645783101150958e-07, + "loss": 0.2588, + "step": 33659 + }, + { + "epoch": 1.5768023609874924, + "grad_norm": 0.6152411238019877, + "learning_rate": 5.644582710553506e-07, + "loss": 0.2852, + "step": 33660 + }, + { + "epoch": 1.5768492059774206, + "grad_norm": 0.5866524417984951, + "learning_rate": 5.64338243134162e-07, + "loss": 0.2725, + "step": 33661 + }, + { + "epoch": 1.576896050967349, + "grad_norm": 0.5633674743320208, + "learning_rate": 5.6421822635222e-07, + "loss": 0.2603, + "step": 33662 + }, + { + "epoch": 1.5769428959572773, + "grad_norm": 0.5751493427913377, + "learning_rate": 5.640982207102161e-07, + "loss": 0.2563, + "step": 33663 + }, + { + "epoch": 1.5769897409472056, + "grad_norm": 0.6538509204975778, + "learning_rate": 5.639782262088395e-07, + "loss": 0.2866, + "step": 33664 + }, + { + "epoch": 1.577036585937134, + "grad_norm": 0.6305974790112365, + "learning_rate": 5.638582428487818e-07, + "loss": 0.279, + "step": 33665 + }, + { + "epoch": 1.5770834309270625, + "grad_norm": 0.609810308143948, + "learning_rate": 5.637382706307331e-07, + "loss": 0.2639, + "step": 33666 + }, + { + "epoch": 1.5771302759169907, + "grad_norm": 0.5876209428348916, + "learning_rate": 5.636183095553849e-07, + "loss": 0.26, + "step": 33667 + }, + { + "epoch": 1.577177120906919, + "grad_norm": 0.6061351281606174, + "learning_rate": 5.634983596234258e-07, + "loss": 0.2808, + "step": 33668 + }, + { + "epoch": 1.5772239658968474, + "grad_norm": 0.5416967610463002, + "learning_rate": 5.633784208355478e-07, + "loss": 0.2515, + "step": 33669 + }, + { + "epoch": 1.5772708108867757, + "grad_norm": 0.6276903384208021, + "learning_rate": 5.632584931924393e-07, + "loss": 0.276, + "step": 33670 + }, + { + "epoch": 1.577317655876704, + "grad_norm": 0.6406014318697668, + "learning_rate": 5.631385766947914e-07, + "loss": 0.2825, + "step": 33671 + }, + { + "epoch": 1.5773645008666324, + "grad_norm": 0.5726570188326998, + "learning_rate": 5.630186713432942e-07, + "loss": 0.2717, + "step": 33672 + }, + { + "epoch": 1.5774113458565606, + "grad_norm": 0.5862994528841962, + "learning_rate": 5.628987771386385e-07, + "loss": 0.2617, + "step": 33673 + }, + { + "epoch": 1.5774581908464889, + "grad_norm": 0.6312845281528281, + "learning_rate": 5.627788940815127e-07, + "loss": 0.2691, + "step": 33674 + }, + { + "epoch": 1.5775050358364173, + "grad_norm": 0.6374779678718586, + "learning_rate": 5.626590221726075e-07, + "loss": 0.2796, + "step": 33675 + }, + { + "epoch": 1.5775518808263458, + "grad_norm": 0.623420993709431, + "learning_rate": 5.625391614126136e-07, + "loss": 0.284, + "step": 33676 + }, + { + "epoch": 1.5775987258162738, + "grad_norm": 0.6035249607726036, + "learning_rate": 5.624193118022189e-07, + "loss": 0.2553, + "step": 33677 + }, + { + "epoch": 1.5776455708062023, + "grad_norm": 0.5661094003504846, + "learning_rate": 5.622994733421142e-07, + "loss": 0.2609, + "step": 33678 + }, + { + "epoch": 1.5776924157961307, + "grad_norm": 0.5776944627023212, + "learning_rate": 5.621796460329896e-07, + "loss": 0.2806, + "step": 33679 + }, + { + "epoch": 1.577739260786059, + "grad_norm": 0.5839065592767405, + "learning_rate": 5.620598298755333e-07, + "loss": 0.2606, + "step": 33680 + }, + { + "epoch": 1.5777861057759872, + "grad_norm": 0.604224613606855, + "learning_rate": 5.619400248704357e-07, + "loss": 0.2761, + "step": 33681 + }, + { + "epoch": 1.5778329507659157, + "grad_norm": 0.5853983533700456, + "learning_rate": 5.618202310183862e-07, + "loss": 0.2581, + "step": 33682 + }, + { + "epoch": 1.577879795755844, + "grad_norm": 0.5620648925285363, + "learning_rate": 5.617004483200747e-07, + "loss": 0.2617, + "step": 33683 + }, + { + "epoch": 1.5779266407457722, + "grad_norm": 0.5861772255410498, + "learning_rate": 5.615806767761895e-07, + "loss": 0.2655, + "step": 33684 + }, + { + "epoch": 1.5779734857357006, + "grad_norm": 0.6471323537558051, + "learning_rate": 5.614609163874207e-07, + "loss": 0.2754, + "step": 33685 + }, + { + "epoch": 1.5780203307256289, + "grad_norm": 0.6113221550473757, + "learning_rate": 5.613411671544566e-07, + "loss": 0.2679, + "step": 33686 + }, + { + "epoch": 1.578067175715557, + "grad_norm": 0.5603521789681554, + "learning_rate": 5.612214290779866e-07, + "loss": 0.263, + "step": 33687 + }, + { + "epoch": 1.5781140207054856, + "grad_norm": 0.6510646837462424, + "learning_rate": 5.611017021587e-07, + "loss": 0.2915, + "step": 33688 + }, + { + "epoch": 1.578160865695414, + "grad_norm": 0.5735375760222577, + "learning_rate": 5.60981986397286e-07, + "loss": 0.2777, + "step": 33689 + }, + { + "epoch": 1.578207710685342, + "grad_norm": 0.5550090061393947, + "learning_rate": 5.60862281794434e-07, + "loss": 0.2619, + "step": 33690 + }, + { + "epoch": 1.5782545556752705, + "grad_norm": 0.5990492205732583, + "learning_rate": 5.607425883508313e-07, + "loss": 0.2581, + "step": 33691 + }, + { + "epoch": 1.578301400665199, + "grad_norm": 0.5538618638946936, + "learning_rate": 5.606229060671684e-07, + "loss": 0.2673, + "step": 33692 + }, + { + "epoch": 1.5783482456551272, + "grad_norm": 0.5928509427080673, + "learning_rate": 5.605032349441325e-07, + "loss": 0.2563, + "step": 33693 + }, + { + "epoch": 1.5783950906450555, + "grad_norm": 0.5952080362054801, + "learning_rate": 5.603835749824133e-07, + "loss": 0.2738, + "step": 33694 + }, + { + "epoch": 1.578441935634984, + "grad_norm": 0.5854169352692755, + "learning_rate": 5.602639261826986e-07, + "loss": 0.2626, + "step": 33695 + }, + { + "epoch": 1.5784887806249122, + "grad_norm": 0.5591704246375568, + "learning_rate": 5.601442885456779e-07, + "loss": 0.2609, + "step": 33696 + }, + { + "epoch": 1.5785356256148404, + "grad_norm": 0.6268922601125871, + "learning_rate": 5.600246620720399e-07, + "loss": 0.2826, + "step": 33697 + }, + { + "epoch": 1.5785824706047689, + "grad_norm": 0.6550618980748357, + "learning_rate": 5.59905046762472e-07, + "loss": 0.2718, + "step": 33698 + }, + { + "epoch": 1.578629315594697, + "grad_norm": 0.6109254592578055, + "learning_rate": 5.597854426176624e-07, + "loss": 0.2611, + "step": 33699 + }, + { + "epoch": 1.5786761605846253, + "grad_norm": 0.6030002171203985, + "learning_rate": 5.596658496383001e-07, + "loss": 0.2655, + "step": 33700 + }, + { + "epoch": 1.5787230055745538, + "grad_norm": 0.599402698103007, + "learning_rate": 5.595462678250732e-07, + "loss": 0.2709, + "step": 33701 + }, + { + "epoch": 1.5787698505644823, + "grad_norm": 0.6153075231495497, + "learning_rate": 5.594266971786697e-07, + "loss": 0.2785, + "step": 33702 + }, + { + "epoch": 1.5788166955544105, + "grad_norm": 0.6091803858155481, + "learning_rate": 5.593071376997777e-07, + "loss": 0.2648, + "step": 33703 + }, + { + "epoch": 1.5788635405443388, + "grad_norm": 0.648978921148286, + "learning_rate": 5.59187589389086e-07, + "loss": 0.2752, + "step": 33704 + }, + { + "epoch": 1.5789103855342672, + "grad_norm": 0.6032904369488071, + "learning_rate": 5.590680522472822e-07, + "loss": 0.2679, + "step": 33705 + }, + { + "epoch": 1.5789572305241955, + "grad_norm": 0.597545169198468, + "learning_rate": 5.58948526275053e-07, + "loss": 0.2863, + "step": 33706 + }, + { + "epoch": 1.5790040755141237, + "grad_norm": 0.6477083491697324, + "learning_rate": 5.588290114730874e-07, + "loss": 0.2735, + "step": 33707 + }, + { + "epoch": 1.5790509205040522, + "grad_norm": 0.5765411736456476, + "learning_rate": 5.587095078420726e-07, + "loss": 0.2559, + "step": 33708 + }, + { + "epoch": 1.5790977654939804, + "grad_norm": 0.573884043577552, + "learning_rate": 5.58590015382697e-07, + "loss": 0.2796, + "step": 33709 + }, + { + "epoch": 1.5791446104839086, + "grad_norm": 0.5615133374265914, + "learning_rate": 5.584705340956484e-07, + "loss": 0.2526, + "step": 33710 + }, + { + "epoch": 1.579191455473837, + "grad_norm": 0.5790129266776661, + "learning_rate": 5.583510639816131e-07, + "loss": 0.2642, + "step": 33711 + }, + { + "epoch": 1.5792383004637656, + "grad_norm": 0.5670284948439454, + "learning_rate": 5.582316050412804e-07, + "loss": 0.2639, + "step": 33712 + }, + { + "epoch": 1.5792851454536936, + "grad_norm": 0.602198562097049, + "learning_rate": 5.58112157275336e-07, + "loss": 0.261, + "step": 33713 + }, + { + "epoch": 1.579331990443622, + "grad_norm": 0.5995700882162667, + "learning_rate": 5.579927206844679e-07, + "loss": 0.2667, + "step": 33714 + }, + { + "epoch": 1.5793788354335505, + "grad_norm": 0.6180363458611992, + "learning_rate": 5.578732952693636e-07, + "loss": 0.2725, + "step": 33715 + }, + { + "epoch": 1.5794256804234788, + "grad_norm": 0.5846985479083565, + "learning_rate": 5.577538810307112e-07, + "loss": 0.2643, + "step": 33716 + }, + { + "epoch": 1.579472525413407, + "grad_norm": 0.623159206257284, + "learning_rate": 5.576344779691962e-07, + "loss": 0.2745, + "step": 33717 + }, + { + "epoch": 1.5795193704033355, + "grad_norm": 0.6012909397221166, + "learning_rate": 5.575150860855075e-07, + "loss": 0.2744, + "step": 33718 + }, + { + "epoch": 1.5795662153932637, + "grad_norm": 0.5746747698118603, + "learning_rate": 5.573957053803303e-07, + "loss": 0.269, + "step": 33719 + }, + { + "epoch": 1.579613060383192, + "grad_norm": 0.5602731471913215, + "learning_rate": 5.572763358543525e-07, + "loss": 0.2713, + "step": 33720 + }, + { + "epoch": 1.5796599053731204, + "grad_norm": 0.5601185996124235, + "learning_rate": 5.571569775082613e-07, + "loss": 0.2536, + "step": 33721 + }, + { + "epoch": 1.5797067503630486, + "grad_norm": 0.5724240356510475, + "learning_rate": 5.570376303427441e-07, + "loss": 0.2579, + "step": 33722 + }, + { + "epoch": 1.5797535953529769, + "grad_norm": 0.617225525733464, + "learning_rate": 5.569182943584864e-07, + "loss": 0.2798, + "step": 33723 + }, + { + "epoch": 1.5798004403429053, + "grad_norm": 0.5974514876303444, + "learning_rate": 5.567989695561754e-07, + "loss": 0.2736, + "step": 33724 + }, + { + "epoch": 1.5798472853328338, + "grad_norm": 0.5997326362004732, + "learning_rate": 5.566796559364985e-07, + "loss": 0.2533, + "step": 33725 + }, + { + "epoch": 1.5798941303227618, + "grad_norm": 0.6475534958247188, + "learning_rate": 5.565603535001413e-07, + "loss": 0.275, + "step": 33726 + }, + { + "epoch": 1.5799409753126903, + "grad_norm": 0.5615536701853879, + "learning_rate": 5.564410622477906e-07, + "loss": 0.2514, + "step": 33727 + }, + { + "epoch": 1.5799878203026188, + "grad_norm": 0.638969873059889, + "learning_rate": 5.563217821801336e-07, + "loss": 0.2926, + "step": 33728 + }, + { + "epoch": 1.580034665292547, + "grad_norm": 0.6213182700768717, + "learning_rate": 5.56202513297856e-07, + "loss": 0.2647, + "step": 33729 + }, + { + "epoch": 1.5800815102824752, + "grad_norm": 0.6090609755016113, + "learning_rate": 5.560832556016441e-07, + "loss": 0.2827, + "step": 33730 + }, + { + "epoch": 1.5801283552724037, + "grad_norm": 0.6029551956958212, + "learning_rate": 5.559640090921844e-07, + "loss": 0.2787, + "step": 33731 + }, + { + "epoch": 1.580175200262332, + "grad_norm": 0.5784234442510636, + "learning_rate": 5.558447737701641e-07, + "loss": 0.2775, + "step": 33732 + }, + { + "epoch": 1.5802220452522602, + "grad_norm": 0.5562799837967587, + "learning_rate": 5.557255496362676e-07, + "loss": 0.264, + "step": 33733 + }, + { + "epoch": 1.5802688902421886, + "grad_norm": 0.6028694499575948, + "learning_rate": 5.556063366911829e-07, + "loss": 0.2804, + "step": 33734 + }, + { + "epoch": 1.5803157352321169, + "grad_norm": 0.6302175435334948, + "learning_rate": 5.554871349355939e-07, + "loss": 0.2865, + "step": 33735 + }, + { + "epoch": 1.5803625802220451, + "grad_norm": 0.5981755579518309, + "learning_rate": 5.553679443701881e-07, + "loss": 0.2652, + "step": 33736 + }, + { + "epoch": 1.5804094252119736, + "grad_norm": 0.6408828525218272, + "learning_rate": 5.552487649956509e-07, + "loss": 0.2948, + "step": 33737 + }, + { + "epoch": 1.580456270201902, + "grad_norm": 0.5897451968567128, + "learning_rate": 5.551295968126683e-07, + "loss": 0.2773, + "step": 33738 + }, + { + "epoch": 1.5805031151918303, + "grad_norm": 0.5893289739577107, + "learning_rate": 5.550104398219264e-07, + "loss": 0.2633, + "step": 33739 + }, + { + "epoch": 1.5805499601817585, + "grad_norm": 0.5759546225791723, + "learning_rate": 5.548912940241108e-07, + "loss": 0.2555, + "step": 33740 + }, + { + "epoch": 1.580596805171687, + "grad_norm": 0.5970742992290997, + "learning_rate": 5.547721594199063e-07, + "loss": 0.2674, + "step": 33741 + }, + { + "epoch": 1.5806436501616152, + "grad_norm": 0.6100503688800873, + "learning_rate": 5.546530360099992e-07, + "loss": 0.2852, + "step": 33742 + }, + { + "epoch": 1.5806904951515435, + "grad_norm": 0.568806635212096, + "learning_rate": 5.545339237950747e-07, + "loss": 0.2786, + "step": 33743 + }, + { + "epoch": 1.580737340141472, + "grad_norm": 0.5986658805759404, + "learning_rate": 5.544148227758187e-07, + "loss": 0.2694, + "step": 33744 + }, + { + "epoch": 1.5807841851314002, + "grad_norm": 0.5900138295055416, + "learning_rate": 5.542957329529161e-07, + "loss": 0.2703, + "step": 33745 + }, + { + "epoch": 1.5808310301213284, + "grad_norm": 0.5833195084104946, + "learning_rate": 5.541766543270535e-07, + "loss": 0.2634, + "step": 33746 + }, + { + "epoch": 1.5808778751112569, + "grad_norm": 0.6193628718816062, + "learning_rate": 5.540575868989151e-07, + "loss": 0.2821, + "step": 33747 + }, + { + "epoch": 1.5809247201011853, + "grad_norm": 0.5892327828259993, + "learning_rate": 5.539385306691853e-07, + "loss": 0.2633, + "step": 33748 + }, + { + "epoch": 1.5809715650911134, + "grad_norm": 0.589316158126481, + "learning_rate": 5.538194856385503e-07, + "loss": 0.2881, + "step": 33749 + }, + { + "epoch": 1.5810184100810418, + "grad_norm": 0.608385359003247, + "learning_rate": 5.53700451807695e-07, + "loss": 0.2771, + "step": 33750 + }, + { + "epoch": 1.5810652550709703, + "grad_norm": 0.6166243028889716, + "learning_rate": 5.535814291773045e-07, + "loss": 0.2841, + "step": 33751 + }, + { + "epoch": 1.5811121000608985, + "grad_norm": 0.6182417648180694, + "learning_rate": 5.534624177480644e-07, + "loss": 0.2938, + "step": 33752 + }, + { + "epoch": 1.5811589450508268, + "grad_norm": 0.6090118778675516, + "learning_rate": 5.533434175206578e-07, + "loss": 0.275, + "step": 33753 + }, + { + "epoch": 1.5812057900407552, + "grad_norm": 0.6165885485980742, + "learning_rate": 5.532244284957716e-07, + "loss": 0.2746, + "step": 33754 + }, + { + "epoch": 1.5812526350306835, + "grad_norm": 0.5739166315539019, + "learning_rate": 5.531054506740887e-07, + "loss": 0.2413, + "step": 33755 + }, + { + "epoch": 1.5812994800206117, + "grad_norm": 0.5399162261815607, + "learning_rate": 5.529864840562946e-07, + "loss": 0.2653, + "step": 33756 + }, + { + "epoch": 1.5813463250105402, + "grad_norm": 0.5456934032084378, + "learning_rate": 5.528675286430741e-07, + "loss": 0.2468, + "step": 33757 + }, + { + "epoch": 1.5813931700004684, + "grad_norm": 0.5994769528630882, + "learning_rate": 5.527485844351113e-07, + "loss": 0.2759, + "step": 33758 + }, + { + "epoch": 1.5814400149903967, + "grad_norm": 0.5703158406527914, + "learning_rate": 5.526296514330918e-07, + "loss": 0.2763, + "step": 33759 + }, + { + "epoch": 1.5814868599803251, + "grad_norm": 0.5858484811472315, + "learning_rate": 5.525107296376983e-07, + "loss": 0.272, + "step": 33760 + }, + { + "epoch": 1.5815337049702536, + "grad_norm": 0.5514496836292696, + "learning_rate": 5.523918190496169e-07, + "loss": 0.2555, + "step": 33761 + }, + { + "epoch": 1.5815805499601816, + "grad_norm": 0.6346846031973041, + "learning_rate": 5.522729196695303e-07, + "loss": 0.2654, + "step": 33762 + }, + { + "epoch": 1.58162739495011, + "grad_norm": 0.6453898275422365, + "learning_rate": 5.521540314981239e-07, + "loss": 0.2741, + "step": 33763 + }, + { + "epoch": 1.5816742399400385, + "grad_norm": 0.5593178428806093, + "learning_rate": 5.520351545360811e-07, + "loss": 0.2626, + "step": 33764 + }, + { + "epoch": 1.5817210849299668, + "grad_norm": 0.6110190982368758, + "learning_rate": 5.51916288784087e-07, + "loss": 0.267, + "step": 33765 + }, + { + "epoch": 1.581767929919895, + "grad_norm": 0.5619341707914473, + "learning_rate": 5.517974342428245e-07, + "loss": 0.2509, + "step": 33766 + }, + { + "epoch": 1.5818147749098235, + "grad_norm": 0.5346087455557851, + "learning_rate": 5.51678590912979e-07, + "loss": 0.2533, + "step": 33767 + }, + { + "epoch": 1.5818616198997517, + "grad_norm": 0.5909883175217899, + "learning_rate": 5.515597587952326e-07, + "loss": 0.2816, + "step": 33768 + }, + { + "epoch": 1.58190846488968, + "grad_norm": 0.6042955779137471, + "learning_rate": 5.514409378902699e-07, + "loss": 0.2735, + "step": 33769 + }, + { + "epoch": 1.5819553098796084, + "grad_norm": 0.58396098723861, + "learning_rate": 5.513221281987752e-07, + "loss": 0.2674, + "step": 33770 + }, + { + "epoch": 1.5820021548695367, + "grad_norm": 0.656012163931025, + "learning_rate": 5.512033297214323e-07, + "loss": 0.2784, + "step": 33771 + }, + { + "epoch": 1.582048999859465, + "grad_norm": 0.5934942444323451, + "learning_rate": 5.51084542458924e-07, + "loss": 0.2708, + "step": 33772 + }, + { + "epoch": 1.5820958448493934, + "grad_norm": 0.6535956976524323, + "learning_rate": 5.509657664119342e-07, + "loss": 0.2794, + "step": 33773 + }, + { + "epoch": 1.5821426898393218, + "grad_norm": 0.5848448808063764, + "learning_rate": 5.508470015811471e-07, + "loss": 0.2734, + "step": 33774 + }, + { + "epoch": 1.58218953482925, + "grad_norm": 0.6234994824930321, + "learning_rate": 5.507282479672449e-07, + "loss": 0.2686, + "step": 33775 + }, + { + "epoch": 1.5822363798191783, + "grad_norm": 0.6005962213767729, + "learning_rate": 5.506095055709118e-07, + "loss": 0.2804, + "step": 33776 + }, + { + "epoch": 1.5822832248091068, + "grad_norm": 0.6077845668216044, + "learning_rate": 5.504907743928317e-07, + "loss": 0.274, + "step": 33777 + }, + { + "epoch": 1.582330069799035, + "grad_norm": 0.6163128305845978, + "learning_rate": 5.503720544336866e-07, + "loss": 0.2926, + "step": 33778 + }, + { + "epoch": 1.5823769147889633, + "grad_norm": 0.5960923619741506, + "learning_rate": 5.502533456941602e-07, + "loss": 0.2741, + "step": 33779 + }, + { + "epoch": 1.5824237597788917, + "grad_norm": 0.7161468422568572, + "learning_rate": 5.501346481749359e-07, + "loss": 0.2948, + "step": 33780 + }, + { + "epoch": 1.58247060476882, + "grad_norm": 0.597294849627896, + "learning_rate": 5.50015961876697e-07, + "loss": 0.2667, + "step": 33781 + }, + { + "epoch": 1.5825174497587482, + "grad_norm": 0.6116877127147826, + "learning_rate": 5.498972868001257e-07, + "loss": 0.2614, + "step": 33782 + }, + { + "epoch": 1.5825642947486767, + "grad_norm": 0.6200457935011426, + "learning_rate": 5.497786229459059e-07, + "loss": 0.287, + "step": 33783 + }, + { + "epoch": 1.5826111397386051, + "grad_norm": 0.6224221871886436, + "learning_rate": 5.496599703147196e-07, + "loss": 0.2885, + "step": 33784 + }, + { + "epoch": 1.5826579847285331, + "grad_norm": 0.5848951479144803, + "learning_rate": 5.495413289072496e-07, + "loss": 0.2725, + "step": 33785 + }, + { + "epoch": 1.5827048297184616, + "grad_norm": 0.5808506468337845, + "learning_rate": 5.494226987241791e-07, + "loss": 0.2702, + "step": 33786 + }, + { + "epoch": 1.58275167470839, + "grad_norm": 0.6322337569652625, + "learning_rate": 5.493040797661908e-07, + "loss": 0.2801, + "step": 33787 + }, + { + "epoch": 1.5827985196983183, + "grad_norm": 0.6202116817610143, + "learning_rate": 5.491854720339679e-07, + "loss": 0.2827, + "step": 33788 + }, + { + "epoch": 1.5828453646882465, + "grad_norm": 0.5882974626197516, + "learning_rate": 5.490668755281925e-07, + "loss": 0.2678, + "step": 33789 + }, + { + "epoch": 1.582892209678175, + "grad_norm": 0.59230061948514, + "learning_rate": 5.489482902495457e-07, + "loss": 0.2705, + "step": 33790 + }, + { + "epoch": 1.5829390546681033, + "grad_norm": 0.5666207693501769, + "learning_rate": 5.488297161987117e-07, + "loss": 0.2742, + "step": 33791 + }, + { + "epoch": 1.5829858996580315, + "grad_norm": 0.6053782372250012, + "learning_rate": 5.487111533763717e-07, + "loss": 0.2759, + "step": 33792 + }, + { + "epoch": 1.58303274464796, + "grad_norm": 0.580996431919648, + "learning_rate": 5.485926017832089e-07, + "loss": 0.2736, + "step": 33793 + }, + { + "epoch": 1.5830795896378882, + "grad_norm": 0.5623295848993686, + "learning_rate": 5.484740614199052e-07, + "loss": 0.2531, + "step": 33794 + }, + { + "epoch": 1.5831264346278164, + "grad_norm": 0.5677689736818692, + "learning_rate": 5.483555322871434e-07, + "loss": 0.2616, + "step": 33795 + }, + { + "epoch": 1.583173279617745, + "grad_norm": 0.6572004815496117, + "learning_rate": 5.48237014385605e-07, + "loss": 0.2946, + "step": 33796 + }, + { + "epoch": 1.5832201246076734, + "grad_norm": 0.5799316791966999, + "learning_rate": 5.481185077159712e-07, + "loss": 0.2692, + "step": 33797 + }, + { + "epoch": 1.5832669695976014, + "grad_norm": 0.5742513732642444, + "learning_rate": 5.48000012278925e-07, + "loss": 0.2801, + "step": 33798 + }, + { + "epoch": 1.5833138145875298, + "grad_norm": 0.5961015941768896, + "learning_rate": 5.478815280751481e-07, + "loss": 0.2712, + "step": 33799 + }, + { + "epoch": 1.5833606595774583, + "grad_norm": 0.613989771386309, + "learning_rate": 5.477630551053223e-07, + "loss": 0.2756, + "step": 33800 + }, + { + "epoch": 1.5834075045673865, + "grad_norm": 0.5782949672192014, + "learning_rate": 5.476445933701302e-07, + "loss": 0.2673, + "step": 33801 + }, + { + "epoch": 1.5834543495573148, + "grad_norm": 0.6261488139013767, + "learning_rate": 5.475261428702519e-07, + "loss": 0.2757, + "step": 33802 + }, + { + "epoch": 1.5835011945472433, + "grad_norm": 0.6352849184552292, + "learning_rate": 5.474077036063707e-07, + "loss": 0.2861, + "step": 33803 + }, + { + "epoch": 1.5835480395371715, + "grad_norm": 0.6039280678123016, + "learning_rate": 5.472892755791667e-07, + "loss": 0.2674, + "step": 33804 + }, + { + "epoch": 1.5835948845270997, + "grad_norm": 0.5998795045055879, + "learning_rate": 5.47170858789322e-07, + "loss": 0.2663, + "step": 33805 + }, + { + "epoch": 1.5836417295170282, + "grad_norm": 0.631589088030807, + "learning_rate": 5.470524532375184e-07, + "loss": 0.2721, + "step": 33806 + }, + { + "epoch": 1.5836885745069564, + "grad_norm": 0.6144645194386468, + "learning_rate": 5.469340589244379e-07, + "loss": 0.2694, + "step": 33807 + }, + { + "epoch": 1.5837354194968847, + "grad_norm": 0.5791039346492409, + "learning_rate": 5.468156758507601e-07, + "loss": 0.2716, + "step": 33808 + }, + { + "epoch": 1.5837822644868131, + "grad_norm": 0.5818907809370362, + "learning_rate": 5.466973040171677e-07, + "loss": 0.2707, + "step": 33809 + }, + { + "epoch": 1.5838291094767416, + "grad_norm": 0.5881011844474828, + "learning_rate": 5.46578943424341e-07, + "loss": 0.2607, + "step": 33810 + }, + { + "epoch": 1.5838759544666698, + "grad_norm": 0.5783199856595836, + "learning_rate": 5.464605940729612e-07, + "loss": 0.2686, + "step": 33811 + }, + { + "epoch": 1.583922799456598, + "grad_norm": 0.574153707502817, + "learning_rate": 5.4634225596371e-07, + "loss": 0.2644, + "step": 33812 + }, + { + "epoch": 1.5839696444465265, + "grad_norm": 0.580551102507281, + "learning_rate": 5.462239290972684e-07, + "loss": 0.2655, + "step": 33813 + }, + { + "epoch": 1.5840164894364548, + "grad_norm": 0.5468853813017676, + "learning_rate": 5.461056134743167e-07, + "loss": 0.2624, + "step": 33814 + }, + { + "epoch": 1.584063334426383, + "grad_norm": 0.6909345140860187, + "learning_rate": 5.459873090955359e-07, + "loss": 0.2948, + "step": 33815 + }, + { + "epoch": 1.5841101794163115, + "grad_norm": 0.6237551668614, + "learning_rate": 5.458690159616079e-07, + "loss": 0.2637, + "step": 33816 + }, + { + "epoch": 1.5841570244062397, + "grad_norm": 0.5908439705876698, + "learning_rate": 5.457507340732115e-07, + "loss": 0.27, + "step": 33817 + }, + { + "epoch": 1.584203869396168, + "grad_norm": 0.5577773156199916, + "learning_rate": 5.456324634310286e-07, + "loss": 0.2589, + "step": 33818 + }, + { + "epoch": 1.5842507143860964, + "grad_norm": 0.5774002945818627, + "learning_rate": 5.455142040357405e-07, + "loss": 0.2698, + "step": 33819 + }, + { + "epoch": 1.584297559376025, + "grad_norm": 0.6248854280516937, + "learning_rate": 5.453959558880262e-07, + "loss": 0.2785, + "step": 33820 + }, + { + "epoch": 1.584344404365953, + "grad_norm": 0.5668309136522527, + "learning_rate": 5.452777189885669e-07, + "loss": 0.2657, + "step": 33821 + }, + { + "epoch": 1.5843912493558814, + "grad_norm": 0.5872172538733675, + "learning_rate": 5.451594933380427e-07, + "loss": 0.2621, + "step": 33822 + }, + { + "epoch": 1.5844380943458098, + "grad_norm": 0.572719089303139, + "learning_rate": 5.450412789371353e-07, + "loss": 0.2652, + "step": 33823 + }, + { + "epoch": 1.584484939335738, + "grad_norm": 0.5581927272883005, + "learning_rate": 5.449230757865235e-07, + "loss": 0.2492, + "step": 33824 + }, + { + "epoch": 1.5845317843256663, + "grad_norm": 0.5679458643977457, + "learning_rate": 5.448048838868874e-07, + "loss": 0.2542, + "step": 33825 + }, + { + "epoch": 1.5845786293155948, + "grad_norm": 0.6321438720849217, + "learning_rate": 5.446867032389089e-07, + "loss": 0.2659, + "step": 33826 + }, + { + "epoch": 1.584625474305523, + "grad_norm": 0.5800591203923519, + "learning_rate": 5.445685338432663e-07, + "loss": 0.2488, + "step": 33827 + }, + { + "epoch": 1.5846723192954513, + "grad_norm": 0.6437960330411477, + "learning_rate": 5.444503757006403e-07, + "loss": 0.2852, + "step": 33828 + }, + { + "epoch": 1.5847191642853797, + "grad_norm": 0.6087134438070885, + "learning_rate": 5.443322288117107e-07, + "loss": 0.2655, + "step": 33829 + }, + { + "epoch": 1.584766009275308, + "grad_norm": 0.5610686194606536, + "learning_rate": 5.442140931771583e-07, + "loss": 0.2598, + "step": 33830 + }, + { + "epoch": 1.5848128542652362, + "grad_norm": 0.5798858808092296, + "learning_rate": 5.440959687976616e-07, + "loss": 0.2649, + "step": 33831 + }, + { + "epoch": 1.5848596992551647, + "grad_norm": 0.6131683246903167, + "learning_rate": 5.439778556739017e-07, + "loss": 0.2764, + "step": 33832 + }, + { + "epoch": 1.5849065442450931, + "grad_norm": 0.6164488934555733, + "learning_rate": 5.438597538065568e-07, + "loss": 0.2764, + "step": 33833 + }, + { + "epoch": 1.5849533892350212, + "grad_norm": 0.6031602822358312, + "learning_rate": 5.437416631963074e-07, + "loss": 0.2713, + "step": 33834 + }, + { + "epoch": 1.5850002342249496, + "grad_norm": 0.6178773194895285, + "learning_rate": 5.43623583843833e-07, + "loss": 0.2791, + "step": 33835 + }, + { + "epoch": 1.585047079214878, + "grad_norm": 0.5700049692875618, + "learning_rate": 5.435055157498134e-07, + "loss": 0.246, + "step": 33836 + }, + { + "epoch": 1.5850939242048063, + "grad_norm": 0.5840374221013375, + "learning_rate": 5.433874589149284e-07, + "loss": 0.2461, + "step": 33837 + }, + { + "epoch": 1.5851407691947346, + "grad_norm": 0.5721153004355233, + "learning_rate": 5.432694133398567e-07, + "loss": 0.2609, + "step": 33838 + }, + { + "epoch": 1.585187614184663, + "grad_norm": 0.6340670437647523, + "learning_rate": 5.431513790252771e-07, + "loss": 0.2711, + "step": 33839 + }, + { + "epoch": 1.5852344591745913, + "grad_norm": 0.5935361342706066, + "learning_rate": 5.430333559718695e-07, + "loss": 0.2629, + "step": 33840 + }, + { + "epoch": 1.5852813041645195, + "grad_norm": 0.5907801228321534, + "learning_rate": 5.429153441803131e-07, + "loss": 0.2836, + "step": 33841 + }, + { + "epoch": 1.585328149154448, + "grad_norm": 0.5673492753685243, + "learning_rate": 5.427973436512868e-07, + "loss": 0.2717, + "step": 33842 + }, + { + "epoch": 1.5853749941443762, + "grad_norm": 0.5841453440227208, + "learning_rate": 5.4267935438547e-07, + "loss": 0.2632, + "step": 33843 + }, + { + "epoch": 1.5854218391343045, + "grad_norm": 0.5839144082270276, + "learning_rate": 5.425613763835424e-07, + "loss": 0.2732, + "step": 33844 + }, + { + "epoch": 1.585468684124233, + "grad_norm": 0.625905792267494, + "learning_rate": 5.42443409646182e-07, + "loss": 0.2728, + "step": 33845 + }, + { + "epoch": 1.5855155291141614, + "grad_norm": 0.5636652886974605, + "learning_rate": 5.42325454174067e-07, + "loss": 0.2652, + "step": 33846 + }, + { + "epoch": 1.5855623741040896, + "grad_norm": 0.614852522089311, + "learning_rate": 5.422075099678769e-07, + "loss": 0.283, + "step": 33847 + }, + { + "epoch": 1.5856092190940179, + "grad_norm": 0.6289422958637497, + "learning_rate": 5.420895770282905e-07, + "loss": 0.2944, + "step": 33848 + }, + { + "epoch": 1.5856560640839463, + "grad_norm": 0.5836922673822798, + "learning_rate": 5.419716553559867e-07, + "loss": 0.2675, + "step": 33849 + }, + { + "epoch": 1.5857029090738746, + "grad_norm": 0.5787818254276259, + "learning_rate": 5.418537449516445e-07, + "loss": 0.265, + "step": 33850 + }, + { + "epoch": 1.5857497540638028, + "grad_norm": 0.5920939247192443, + "learning_rate": 5.417358458159411e-07, + "loss": 0.2552, + "step": 33851 + }, + { + "epoch": 1.5857965990537313, + "grad_norm": 0.5654194840075273, + "learning_rate": 5.416179579495562e-07, + "loss": 0.2658, + "step": 33852 + }, + { + "epoch": 1.5858434440436595, + "grad_norm": 0.6441612258686179, + "learning_rate": 5.415000813531673e-07, + "loss": 0.2875, + "step": 33853 + }, + { + "epoch": 1.5858902890335878, + "grad_norm": 0.5987591615956254, + "learning_rate": 5.413822160274532e-07, + "loss": 0.2631, + "step": 33854 + }, + { + "epoch": 1.5859371340235162, + "grad_norm": 0.6069492925765069, + "learning_rate": 5.41264361973092e-07, + "loss": 0.2718, + "step": 33855 + }, + { + "epoch": 1.5859839790134447, + "grad_norm": 0.610181316407393, + "learning_rate": 5.41146519190763e-07, + "loss": 0.2724, + "step": 33856 + }, + { + "epoch": 1.5860308240033727, + "grad_norm": 0.5818624763514081, + "learning_rate": 5.410286876811427e-07, + "loss": 0.2575, + "step": 33857 + }, + { + "epoch": 1.5860776689933012, + "grad_norm": 0.598859594481811, + "learning_rate": 5.409108674449104e-07, + "loss": 0.2672, + "step": 33858 + }, + { + "epoch": 1.5861245139832296, + "grad_norm": 0.6006377378958369, + "learning_rate": 5.407930584827431e-07, + "loss": 0.2786, + "step": 33859 + }, + { + "epoch": 1.5861713589731579, + "grad_norm": 0.6503115855226472, + "learning_rate": 5.406752607953195e-07, + "loss": 0.2951, + "step": 33860 + }, + { + "epoch": 1.586218203963086, + "grad_norm": 0.6051949521656023, + "learning_rate": 5.40557474383317e-07, + "loss": 0.2652, + "step": 33861 + }, + { + "epoch": 1.5862650489530146, + "grad_norm": 0.5628410752187931, + "learning_rate": 5.404396992474145e-07, + "loss": 0.2521, + "step": 33862 + }, + { + "epoch": 1.5863118939429428, + "grad_norm": 0.605786879684072, + "learning_rate": 5.403219353882886e-07, + "loss": 0.2779, + "step": 33863 + }, + { + "epoch": 1.586358738932871, + "grad_norm": 0.6046401716361223, + "learning_rate": 5.40204182806617e-07, + "loss": 0.2652, + "step": 33864 + }, + { + "epoch": 1.5864055839227995, + "grad_norm": 0.628305925807665, + "learning_rate": 5.400864415030788e-07, + "loss": 0.2894, + "step": 33865 + }, + { + "epoch": 1.5864524289127278, + "grad_norm": 0.662527190369839, + "learning_rate": 5.399687114783497e-07, + "loss": 0.3015, + "step": 33866 + }, + { + "epoch": 1.586499273902656, + "grad_norm": 0.6158090439421614, + "learning_rate": 5.398509927331081e-07, + "loss": 0.2757, + "step": 33867 + }, + { + "epoch": 1.5865461188925845, + "grad_norm": 0.6098560425833545, + "learning_rate": 5.397332852680321e-07, + "loss": 0.2817, + "step": 33868 + }, + { + "epoch": 1.586592963882513, + "grad_norm": 0.6359791212179158, + "learning_rate": 5.396155890837976e-07, + "loss": 0.2918, + "step": 33869 + }, + { + "epoch": 1.586639808872441, + "grad_norm": 0.5866690310546473, + "learning_rate": 5.394979041810824e-07, + "loss": 0.2654, + "step": 33870 + }, + { + "epoch": 1.5866866538623694, + "grad_norm": 0.5583474428124928, + "learning_rate": 5.393802305605644e-07, + "loss": 0.256, + "step": 33871 + }, + { + "epoch": 1.5867334988522979, + "grad_norm": 0.577924960413826, + "learning_rate": 5.392625682229208e-07, + "loss": 0.2673, + "step": 33872 + }, + { + "epoch": 1.586780343842226, + "grad_norm": 0.6303126055571168, + "learning_rate": 5.391449171688279e-07, + "loss": 0.2789, + "step": 33873 + }, + { + "epoch": 1.5868271888321543, + "grad_norm": 0.6357283030684469, + "learning_rate": 5.390272773989635e-07, + "loss": 0.2726, + "step": 33874 + }, + { + "epoch": 1.5868740338220828, + "grad_norm": 0.6327732837144477, + "learning_rate": 5.389096489140039e-07, + "loss": 0.2779, + "step": 33875 + }, + { + "epoch": 1.586920878812011, + "grad_norm": 0.658910104839123, + "learning_rate": 5.387920317146262e-07, + "loss": 0.2846, + "step": 33876 + }, + { + "epoch": 1.5869677238019393, + "grad_norm": 0.6410072366431023, + "learning_rate": 5.386744258015073e-07, + "loss": 0.2762, + "step": 33877 + }, + { + "epoch": 1.5870145687918678, + "grad_norm": 0.621729818609902, + "learning_rate": 5.385568311753242e-07, + "loss": 0.2823, + "step": 33878 + }, + { + "epoch": 1.587061413781796, + "grad_norm": 0.602919229690417, + "learning_rate": 5.384392478367542e-07, + "loss": 0.2679, + "step": 33879 + }, + { + "epoch": 1.5871082587717242, + "grad_norm": 0.6192656024237209, + "learning_rate": 5.383216757864734e-07, + "loss": 0.2821, + "step": 33880 + }, + { + "epoch": 1.5871551037616527, + "grad_norm": 0.6269235517730323, + "learning_rate": 5.382041150251577e-07, + "loss": 0.2866, + "step": 33881 + }, + { + "epoch": 1.5872019487515812, + "grad_norm": 0.5758223557141458, + "learning_rate": 5.380865655534842e-07, + "loss": 0.267, + "step": 33882 + }, + { + "epoch": 1.5872487937415094, + "grad_norm": 0.5986835671552292, + "learning_rate": 5.379690273721294e-07, + "loss": 0.2574, + "step": 33883 + }, + { + "epoch": 1.5872956387314376, + "grad_norm": 0.5885044141903986, + "learning_rate": 5.378515004817697e-07, + "loss": 0.2813, + "step": 33884 + }, + { + "epoch": 1.587342483721366, + "grad_norm": 0.5598786611610158, + "learning_rate": 5.377339848830812e-07, + "loss": 0.2537, + "step": 33885 + }, + { + "epoch": 1.5873893287112943, + "grad_norm": 0.5660686896634525, + "learning_rate": 5.376164805767415e-07, + "loss": 0.2603, + "step": 33886 + }, + { + "epoch": 1.5874361737012226, + "grad_norm": 0.580588532821478, + "learning_rate": 5.374989875634254e-07, + "loss": 0.2726, + "step": 33887 + }, + { + "epoch": 1.587483018691151, + "grad_norm": 0.6204770015828722, + "learning_rate": 5.373815058438089e-07, + "loss": 0.2726, + "step": 33888 + }, + { + "epoch": 1.5875298636810793, + "grad_norm": 0.6097497806567361, + "learning_rate": 5.372640354185684e-07, + "loss": 0.2748, + "step": 33889 + }, + { + "epoch": 1.5875767086710075, + "grad_norm": 0.6170454835857274, + "learning_rate": 5.371465762883801e-07, + "loss": 0.2726, + "step": 33890 + }, + { + "epoch": 1.587623553660936, + "grad_norm": 0.6374946461554868, + "learning_rate": 5.3702912845392e-07, + "loss": 0.2923, + "step": 33891 + }, + { + "epoch": 1.5876703986508645, + "grad_norm": 0.5977924658200257, + "learning_rate": 5.369116919158646e-07, + "loss": 0.2713, + "step": 33892 + }, + { + "epoch": 1.5877172436407925, + "grad_norm": 0.5517502080156906, + "learning_rate": 5.367942666748882e-07, + "loss": 0.2615, + "step": 33893 + }, + { + "epoch": 1.587764088630721, + "grad_norm": 0.5865844822736077, + "learning_rate": 5.366768527316682e-07, + "loss": 0.2609, + "step": 33894 + }, + { + "epoch": 1.5878109336206494, + "grad_norm": 0.5889095716104581, + "learning_rate": 5.365594500868787e-07, + "loss": 0.2571, + "step": 33895 + }, + { + "epoch": 1.5878577786105776, + "grad_norm": 0.6054744014756461, + "learning_rate": 5.364420587411959e-07, + "loss": 0.2797, + "step": 33896 + }, + { + "epoch": 1.5879046236005059, + "grad_norm": 0.5704723051067259, + "learning_rate": 5.363246786952958e-07, + "loss": 0.2579, + "step": 33897 + }, + { + "epoch": 1.5879514685904343, + "grad_norm": 0.5796747294143572, + "learning_rate": 5.362073099498533e-07, + "loss": 0.2559, + "step": 33898 + }, + { + "epoch": 1.5879983135803626, + "grad_norm": 0.6280506041546876, + "learning_rate": 5.360899525055452e-07, + "loss": 0.286, + "step": 33899 + }, + { + "epoch": 1.5880451585702908, + "grad_norm": 0.6430892651918191, + "learning_rate": 5.35972606363045e-07, + "loss": 0.2869, + "step": 33900 + }, + { + "epoch": 1.5880920035602193, + "grad_norm": 0.561372444065215, + "learning_rate": 5.358552715230292e-07, + "loss": 0.2505, + "step": 33901 + }, + { + "epoch": 1.5881388485501475, + "grad_norm": 0.5844700338658131, + "learning_rate": 5.357379479861724e-07, + "loss": 0.2798, + "step": 33902 + }, + { + "epoch": 1.5881856935400758, + "grad_norm": 0.6047471419437036, + "learning_rate": 5.356206357531496e-07, + "loss": 0.2773, + "step": 33903 + }, + { + "epoch": 1.5882325385300042, + "grad_norm": 0.6153498272872868, + "learning_rate": 5.355033348246366e-07, + "loss": 0.3038, + "step": 33904 + }, + { + "epoch": 1.5882793835199327, + "grad_norm": 0.6131952692966525, + "learning_rate": 5.353860452013088e-07, + "loss": 0.2768, + "step": 33905 + }, + { + "epoch": 1.5883262285098607, + "grad_norm": 0.6556112175843255, + "learning_rate": 5.352687668838397e-07, + "loss": 0.2787, + "step": 33906 + }, + { + "epoch": 1.5883730734997892, + "grad_norm": 0.5971119454327071, + "learning_rate": 5.35151499872906e-07, + "loss": 0.2589, + "step": 33907 + }, + { + "epoch": 1.5884199184897176, + "grad_norm": 0.6217936184335026, + "learning_rate": 5.350342441691805e-07, + "loss": 0.2674, + "step": 33908 + }, + { + "epoch": 1.5884667634796459, + "grad_norm": 0.6086527989833374, + "learning_rate": 5.349169997733395e-07, + "loss": 0.2804, + "step": 33909 + }, + { + "epoch": 1.5885136084695741, + "grad_norm": 0.5773696399696341, + "learning_rate": 5.347997666860569e-07, + "loss": 0.2759, + "step": 33910 + }, + { + "epoch": 1.5885604534595026, + "grad_norm": 0.6362347633314248, + "learning_rate": 5.346825449080085e-07, + "loss": 0.2793, + "step": 33911 + }, + { + "epoch": 1.5886072984494308, + "grad_norm": 0.618277266925831, + "learning_rate": 5.345653344398675e-07, + "loss": 0.2567, + "step": 33912 + }, + { + "epoch": 1.588654143439359, + "grad_norm": 0.597446834093243, + "learning_rate": 5.344481352823089e-07, + "loss": 0.2679, + "step": 33913 + }, + { + "epoch": 1.5887009884292875, + "grad_norm": 0.5916800687154138, + "learning_rate": 5.343309474360078e-07, + "loss": 0.2852, + "step": 33914 + }, + { + "epoch": 1.5887478334192158, + "grad_norm": 0.5982125871244794, + "learning_rate": 5.342137709016376e-07, + "loss": 0.2797, + "step": 33915 + }, + { + "epoch": 1.588794678409144, + "grad_norm": 0.6254042311878056, + "learning_rate": 5.34096605679873e-07, + "loss": 0.2945, + "step": 33916 + }, + { + "epoch": 1.5888415233990725, + "grad_norm": 0.585091220310545, + "learning_rate": 5.33979451771389e-07, + "loss": 0.2824, + "step": 33917 + }, + { + "epoch": 1.588888368389001, + "grad_norm": 0.6469584799085399, + "learning_rate": 5.338623091768583e-07, + "loss": 0.2828, + "step": 33918 + }, + { + "epoch": 1.5889352133789292, + "grad_norm": 0.5686496424993588, + "learning_rate": 5.337451778969561e-07, + "loss": 0.2747, + "step": 33919 + }, + { + "epoch": 1.5889820583688574, + "grad_norm": 0.5629390255685021, + "learning_rate": 5.33628057932356e-07, + "loss": 0.2644, + "step": 33920 + }, + { + "epoch": 1.5890289033587859, + "grad_norm": 0.5686485030574627, + "learning_rate": 5.335109492837329e-07, + "loss": 0.2618, + "step": 33921 + }, + { + "epoch": 1.5890757483487141, + "grad_norm": 0.6268542120341343, + "learning_rate": 5.333938519517596e-07, + "loss": 0.2721, + "step": 33922 + }, + { + "epoch": 1.5891225933386424, + "grad_norm": 0.6231189247757714, + "learning_rate": 5.332767659371107e-07, + "loss": 0.2868, + "step": 33923 + }, + { + "epoch": 1.5891694383285708, + "grad_norm": 0.6299145390515666, + "learning_rate": 5.331596912404593e-07, + "loss": 0.2799, + "step": 33924 + }, + { + "epoch": 1.589216283318499, + "grad_norm": 0.612233536640737, + "learning_rate": 5.330426278624792e-07, + "loss": 0.2721, + "step": 33925 + }, + { + "epoch": 1.5892631283084273, + "grad_norm": 0.5931353010999811, + "learning_rate": 5.329255758038449e-07, + "loss": 0.2749, + "step": 33926 + }, + { + "epoch": 1.5893099732983558, + "grad_norm": 0.5822327399337835, + "learning_rate": 5.32808535065229e-07, + "loss": 0.2669, + "step": 33927 + }, + { + "epoch": 1.5893568182882842, + "grad_norm": 0.6010846218839779, + "learning_rate": 5.326915056473064e-07, + "loss": 0.2776, + "step": 33928 + }, + { + "epoch": 1.5894036632782123, + "grad_norm": 0.5898503276854788, + "learning_rate": 5.325744875507499e-07, + "loss": 0.2516, + "step": 33929 + }, + { + "epoch": 1.5894505082681407, + "grad_norm": 0.6548640926653478, + "learning_rate": 5.32457480776232e-07, + "loss": 0.2813, + "step": 33930 + }, + { + "epoch": 1.5894973532580692, + "grad_norm": 0.6627069907173199, + "learning_rate": 5.323404853244265e-07, + "loss": 0.2891, + "step": 33931 + }, + { + "epoch": 1.5895441982479974, + "grad_norm": 0.5866561058718851, + "learning_rate": 5.322235011960072e-07, + "loss": 0.2587, + "step": 33932 + }, + { + "epoch": 1.5895910432379257, + "grad_norm": 0.5952189412296088, + "learning_rate": 5.321065283916471e-07, + "loss": 0.2672, + "step": 33933 + }, + { + "epoch": 1.5896378882278541, + "grad_norm": 0.6209489287795386, + "learning_rate": 5.319895669120192e-07, + "loss": 0.2724, + "step": 33934 + }, + { + "epoch": 1.5896847332177824, + "grad_norm": 0.6205201714589136, + "learning_rate": 5.318726167577975e-07, + "loss": 0.2617, + "step": 33935 + }, + { + "epoch": 1.5897315782077106, + "grad_norm": 0.6132709545156032, + "learning_rate": 5.31755677929654e-07, + "loss": 0.2706, + "step": 33936 + }, + { + "epoch": 1.589778423197639, + "grad_norm": 0.6068629770637863, + "learning_rate": 5.316387504282613e-07, + "loss": 0.2745, + "step": 33937 + }, + { + "epoch": 1.5898252681875673, + "grad_norm": 0.5791528116295139, + "learning_rate": 5.31521834254293e-07, + "loss": 0.2689, + "step": 33938 + }, + { + "epoch": 1.5898721131774955, + "grad_norm": 0.6151992731007363, + "learning_rate": 5.314049294084217e-07, + "loss": 0.2726, + "step": 33939 + }, + { + "epoch": 1.589918958167424, + "grad_norm": 0.5840454835179872, + "learning_rate": 5.312880358913203e-07, + "loss": 0.2704, + "step": 33940 + }, + { + "epoch": 1.5899658031573525, + "grad_norm": 0.5659798191667047, + "learning_rate": 5.311711537036618e-07, + "loss": 0.2416, + "step": 33941 + }, + { + "epoch": 1.5900126481472805, + "grad_norm": 0.6179615381670506, + "learning_rate": 5.310542828461182e-07, + "loss": 0.2778, + "step": 33942 + }, + { + "epoch": 1.590059493137209, + "grad_norm": 0.6254484092932081, + "learning_rate": 5.309374233193629e-07, + "loss": 0.2773, + "step": 33943 + }, + { + "epoch": 1.5901063381271374, + "grad_norm": 0.5862250918180657, + "learning_rate": 5.308205751240669e-07, + "loss": 0.2771, + "step": 33944 + }, + { + "epoch": 1.5901531831170657, + "grad_norm": 0.612864418193199, + "learning_rate": 5.307037382609037e-07, + "loss": 0.2685, + "step": 33945 + }, + { + "epoch": 1.590200028106994, + "grad_norm": 0.5802304827656674, + "learning_rate": 5.305869127305455e-07, + "loss": 0.2638, + "step": 33946 + }, + { + "epoch": 1.5902468730969224, + "grad_norm": 0.6375713124989676, + "learning_rate": 5.304700985336656e-07, + "loss": 0.2905, + "step": 33947 + }, + { + "epoch": 1.5902937180868506, + "grad_norm": 0.5788231309415022, + "learning_rate": 5.303532956709343e-07, + "loss": 0.2702, + "step": 33948 + }, + { + "epoch": 1.5903405630767788, + "grad_norm": 0.6416666255301834, + "learning_rate": 5.302365041430254e-07, + "loss": 0.2771, + "step": 33949 + }, + { + "epoch": 1.5903874080667073, + "grad_norm": 0.6118141415537965, + "learning_rate": 5.301197239506095e-07, + "loss": 0.2633, + "step": 33950 + }, + { + "epoch": 1.5904342530566355, + "grad_norm": 0.5840625795526563, + "learning_rate": 5.300029550943597e-07, + "loss": 0.2625, + "step": 33951 + }, + { + "epoch": 1.5904810980465638, + "grad_norm": 0.6527212786834233, + "learning_rate": 5.298861975749478e-07, + "loss": 0.2808, + "step": 33952 + }, + { + "epoch": 1.5905279430364923, + "grad_norm": 0.5938490789480702, + "learning_rate": 5.297694513930463e-07, + "loss": 0.2639, + "step": 33953 + }, + { + "epoch": 1.5905747880264207, + "grad_norm": 0.6127412268431696, + "learning_rate": 5.296527165493256e-07, + "loss": 0.2789, + "step": 33954 + }, + { + "epoch": 1.590621633016349, + "grad_norm": 0.604308303982528, + "learning_rate": 5.295359930444585e-07, + "loss": 0.2559, + "step": 33955 + }, + { + "epoch": 1.5906684780062772, + "grad_norm": 0.6344536883661499, + "learning_rate": 5.294192808791171e-07, + "loss": 0.2845, + "step": 33956 + }, + { + "epoch": 1.5907153229962057, + "grad_norm": 0.6596143303900489, + "learning_rate": 5.293025800539717e-07, + "loss": 0.2887, + "step": 33957 + }, + { + "epoch": 1.590762167986134, + "grad_norm": 0.630268744901826, + "learning_rate": 5.291858905696948e-07, + "loss": 0.2701, + "step": 33958 + }, + { + "epoch": 1.5908090129760621, + "grad_norm": 0.5823561319530716, + "learning_rate": 5.290692124269584e-07, + "loss": 0.2729, + "step": 33959 + }, + { + "epoch": 1.5908558579659906, + "grad_norm": 0.5443461763895516, + "learning_rate": 5.289525456264325e-07, + "loss": 0.2507, + "step": 33960 + }, + { + "epoch": 1.5909027029559188, + "grad_norm": 0.609867399952228, + "learning_rate": 5.288358901687893e-07, + "loss": 0.2829, + "step": 33961 + }, + { + "epoch": 1.590949547945847, + "grad_norm": 0.6018426119347585, + "learning_rate": 5.287192460547e-07, + "loss": 0.2915, + "step": 33962 + }, + { + "epoch": 1.5909963929357755, + "grad_norm": 0.5907639701823517, + "learning_rate": 5.286026132848368e-07, + "loss": 0.2596, + "step": 33963 + }, + { + "epoch": 1.5910432379257038, + "grad_norm": 0.6102743665241894, + "learning_rate": 5.284859918598695e-07, + "loss": 0.285, + "step": 33964 + }, + { + "epoch": 1.591090082915632, + "grad_norm": 0.5715910439122197, + "learning_rate": 5.283693817804697e-07, + "loss": 0.2691, + "step": 33965 + }, + { + "epoch": 1.5911369279055605, + "grad_norm": 0.5887460837669175, + "learning_rate": 5.282527830473091e-07, + "loss": 0.2584, + "step": 33966 + }, + { + "epoch": 1.591183772895489, + "grad_norm": 0.6077927175717488, + "learning_rate": 5.281361956610578e-07, + "loss": 0.2689, + "step": 33967 + }, + { + "epoch": 1.5912306178854172, + "grad_norm": 0.6325926519785705, + "learning_rate": 5.280196196223869e-07, + "loss": 0.2756, + "step": 33968 + }, + { + "epoch": 1.5912774628753454, + "grad_norm": 0.5771752399940657, + "learning_rate": 5.279030549319675e-07, + "loss": 0.2488, + "step": 33969 + }, + { + "epoch": 1.591324307865274, + "grad_norm": 0.6066241507977791, + "learning_rate": 5.277865015904709e-07, + "loss": 0.2928, + "step": 33970 + }, + { + "epoch": 1.5913711528552021, + "grad_norm": 0.6103151520841336, + "learning_rate": 5.276699595985665e-07, + "loss": 0.2591, + "step": 33971 + }, + { + "epoch": 1.5914179978451304, + "grad_norm": 0.6894256328958411, + "learning_rate": 5.275534289569268e-07, + "loss": 0.3008, + "step": 33972 + }, + { + "epoch": 1.5914648428350588, + "grad_norm": 0.5567791406591072, + "learning_rate": 5.274369096662205e-07, + "loss": 0.2646, + "step": 33973 + }, + { + "epoch": 1.591511687824987, + "grad_norm": 0.5853040143533286, + "learning_rate": 5.273204017271188e-07, + "loss": 0.2862, + "step": 33974 + }, + { + "epoch": 1.5915585328149153, + "grad_norm": 0.5949300411603443, + "learning_rate": 5.272039051402928e-07, + "loss": 0.2699, + "step": 33975 + }, + { + "epoch": 1.5916053778048438, + "grad_norm": 0.5728323508789585, + "learning_rate": 5.270874199064122e-07, + "loss": 0.2725, + "step": 33976 + }, + { + "epoch": 1.5916522227947723, + "grad_norm": 0.5749821645099468, + "learning_rate": 5.269709460261483e-07, + "loss": 0.2563, + "step": 33977 + }, + { + "epoch": 1.5916990677847003, + "grad_norm": 0.6112029048575037, + "learning_rate": 5.268544835001707e-07, + "loss": 0.2712, + "step": 33978 + }, + { + "epoch": 1.5917459127746287, + "grad_norm": 0.6384907153195555, + "learning_rate": 5.26738032329149e-07, + "loss": 0.2945, + "step": 33979 + }, + { + "epoch": 1.5917927577645572, + "grad_norm": 0.6225477535714133, + "learning_rate": 5.266215925137541e-07, + "loss": 0.283, + "step": 33980 + }, + { + "epoch": 1.5918396027544854, + "grad_norm": 0.6320118476704174, + "learning_rate": 5.265051640546556e-07, + "loss": 0.2699, + "step": 33981 + }, + { + "epoch": 1.5918864477444137, + "grad_norm": 0.6363917703831127, + "learning_rate": 5.263887469525242e-07, + "loss": 0.2759, + "step": 33982 + }, + { + "epoch": 1.5919332927343421, + "grad_norm": 0.5898004073073888, + "learning_rate": 5.262723412080295e-07, + "loss": 0.2794, + "step": 33983 + }, + { + "epoch": 1.5919801377242704, + "grad_norm": 0.5903028327722446, + "learning_rate": 5.261559468218419e-07, + "loss": 0.265, + "step": 33984 + }, + { + "epoch": 1.5920269827141986, + "grad_norm": 0.5823496994692164, + "learning_rate": 5.260395637946308e-07, + "loss": 0.2585, + "step": 33985 + }, + { + "epoch": 1.592073827704127, + "grad_norm": 0.6150688553628485, + "learning_rate": 5.259231921270652e-07, + "loss": 0.2782, + "step": 33986 + }, + { + "epoch": 1.5921206726940553, + "grad_norm": 0.6571989450461334, + "learning_rate": 5.258068318198154e-07, + "loss": 0.2894, + "step": 33987 + }, + { + "epoch": 1.5921675176839836, + "grad_norm": 0.5991465344709654, + "learning_rate": 5.25690482873551e-07, + "loss": 0.2782, + "step": 33988 + }, + { + "epoch": 1.592214362673912, + "grad_norm": 0.6215193190575186, + "learning_rate": 5.255741452889418e-07, + "loss": 0.2786, + "step": 33989 + }, + { + "epoch": 1.5922612076638405, + "grad_norm": 0.6378818045371872, + "learning_rate": 5.254578190666579e-07, + "loss": 0.2845, + "step": 33990 + }, + { + "epoch": 1.5923080526537685, + "grad_norm": 0.6212961059744645, + "learning_rate": 5.25341504207367e-07, + "loss": 0.2694, + "step": 33991 + }, + { + "epoch": 1.592354897643697, + "grad_norm": 0.6138757094501176, + "learning_rate": 5.252252007117403e-07, + "loss": 0.2888, + "step": 33992 + }, + { + "epoch": 1.5924017426336254, + "grad_norm": 0.5922970274460708, + "learning_rate": 5.251089085804456e-07, + "loss": 0.2672, + "step": 33993 + }, + { + "epoch": 1.5924485876235537, + "grad_norm": 0.6332909274709903, + "learning_rate": 5.249926278141526e-07, + "loss": 0.2864, + "step": 33994 + }, + { + "epoch": 1.592495432613482, + "grad_norm": 0.624586862310536, + "learning_rate": 5.248763584135307e-07, + "loss": 0.2708, + "step": 33995 + }, + { + "epoch": 1.5925422776034104, + "grad_norm": 0.5641107765601542, + "learning_rate": 5.247601003792499e-07, + "loss": 0.2709, + "step": 33996 + }, + { + "epoch": 1.5925891225933386, + "grad_norm": 0.6392422141641574, + "learning_rate": 5.246438537119772e-07, + "loss": 0.2729, + "step": 33997 + }, + { + "epoch": 1.5926359675832669, + "grad_norm": 0.5888405448202453, + "learning_rate": 5.245276184123834e-07, + "loss": 0.2635, + "step": 33998 + }, + { + "epoch": 1.5926828125731953, + "grad_norm": 0.6234057294517595, + "learning_rate": 5.24411394481136e-07, + "loss": 0.2751, + "step": 33999 + }, + { + "epoch": 1.5927296575631236, + "grad_norm": 0.5846268476616998, + "learning_rate": 5.242951819189046e-07, + "loss": 0.2674, + "step": 34000 + }, + { + "epoch": 1.5927765025530518, + "grad_norm": 0.5824418414633399, + "learning_rate": 5.241789807263575e-07, + "loss": 0.2628, + "step": 34001 + }, + { + "epoch": 1.5928233475429803, + "grad_norm": 0.5658023913034589, + "learning_rate": 5.240627909041648e-07, + "loss": 0.2532, + "step": 34002 + }, + { + "epoch": 1.5928701925329087, + "grad_norm": 0.619701769705084, + "learning_rate": 5.239466124529932e-07, + "loss": 0.2673, + "step": 34003 + }, + { + "epoch": 1.592917037522837, + "grad_norm": 0.6265586033647711, + "learning_rate": 5.238304453735124e-07, + "loss": 0.2687, + "step": 34004 + }, + { + "epoch": 1.5929638825127652, + "grad_norm": 0.6292532287058115, + "learning_rate": 5.237142896663913e-07, + "loss": 0.2871, + "step": 34005 + }, + { + "epoch": 1.5930107275026937, + "grad_norm": 0.5910302922109223, + "learning_rate": 5.23598145332297e-07, + "loss": 0.2737, + "step": 34006 + }, + { + "epoch": 1.593057572492622, + "grad_norm": 0.6063714746262845, + "learning_rate": 5.234820123718986e-07, + "loss": 0.2584, + "step": 34007 + }, + { + "epoch": 1.5931044174825502, + "grad_norm": 0.611274612832089, + "learning_rate": 5.233658907858652e-07, + "loss": 0.2697, + "step": 34008 + }, + { + "epoch": 1.5931512624724786, + "grad_norm": 0.602324461564993, + "learning_rate": 5.232497805748634e-07, + "loss": 0.2603, + "step": 34009 + }, + { + "epoch": 1.5931981074624069, + "grad_norm": 0.5934911478049706, + "learning_rate": 5.231336817395627e-07, + "loss": 0.2691, + "step": 34010 + }, + { + "epoch": 1.593244952452335, + "grad_norm": 0.600075695904838, + "learning_rate": 5.230175942806304e-07, + "loss": 0.2712, + "step": 34011 + }, + { + "epoch": 1.5932917974422636, + "grad_norm": 0.6282842313312026, + "learning_rate": 5.229015181987356e-07, + "loss": 0.2659, + "step": 34012 + }, + { + "epoch": 1.593338642432192, + "grad_norm": 0.6501158933953377, + "learning_rate": 5.227854534945453e-07, + "loss": 0.2857, + "step": 34013 + }, + { + "epoch": 1.59338548742212, + "grad_norm": 0.5943390202911971, + "learning_rate": 5.226694001687283e-07, + "loss": 0.264, + "step": 34014 + }, + { + "epoch": 1.5934323324120485, + "grad_norm": 0.6250283742059088, + "learning_rate": 5.225533582219511e-07, + "loss": 0.2733, + "step": 34015 + }, + { + "epoch": 1.593479177401977, + "grad_norm": 0.6154186210540828, + "learning_rate": 5.224373276548825e-07, + "loss": 0.2751, + "step": 34016 + }, + { + "epoch": 1.5935260223919052, + "grad_norm": 0.5820216398635041, + "learning_rate": 5.223213084681899e-07, + "loss": 0.2635, + "step": 34017 + }, + { + "epoch": 1.5935728673818335, + "grad_norm": 0.606176201608408, + "learning_rate": 5.222053006625413e-07, + "loss": 0.2727, + "step": 34018 + }, + { + "epoch": 1.593619712371762, + "grad_norm": 0.6393656723415703, + "learning_rate": 5.220893042386046e-07, + "loss": 0.2698, + "step": 34019 + }, + { + "epoch": 1.5936665573616902, + "grad_norm": 0.5971657731278998, + "learning_rate": 5.219733191970469e-07, + "loss": 0.2776, + "step": 34020 + }, + { + "epoch": 1.5937134023516184, + "grad_norm": 0.6210213064097707, + "learning_rate": 5.218573455385348e-07, + "loss": 0.2858, + "step": 34021 + }, + { + "epoch": 1.5937602473415469, + "grad_norm": 0.6032734263803138, + "learning_rate": 5.217413832637366e-07, + "loss": 0.2654, + "step": 34022 + }, + { + "epoch": 1.593807092331475, + "grad_norm": 0.623046334614662, + "learning_rate": 5.216254323733194e-07, + "loss": 0.2754, + "step": 34023 + }, + { + "epoch": 1.5938539373214033, + "grad_norm": 0.5869121485354771, + "learning_rate": 5.215094928679507e-07, + "loss": 0.2758, + "step": 34024 + }, + { + "epoch": 1.5939007823113318, + "grad_norm": 0.5808045194616174, + "learning_rate": 5.213935647482976e-07, + "loss": 0.2696, + "step": 34025 + }, + { + "epoch": 1.5939476273012603, + "grad_norm": 0.6215691503890661, + "learning_rate": 5.212776480150278e-07, + "loss": 0.2837, + "step": 34026 + }, + { + "epoch": 1.5939944722911883, + "grad_norm": 0.6011108819491869, + "learning_rate": 5.211617426688079e-07, + "loss": 0.2853, + "step": 34027 + }, + { + "epoch": 1.5940413172811168, + "grad_norm": 0.6859621312588737, + "learning_rate": 5.21045848710304e-07, + "loss": 0.2772, + "step": 34028 + }, + { + "epoch": 1.5940881622710452, + "grad_norm": 0.6016152736688379, + "learning_rate": 5.20929966140184e-07, + "loss": 0.2827, + "step": 34029 + }, + { + "epoch": 1.5941350072609735, + "grad_norm": 0.5676719379307712, + "learning_rate": 5.208140949591145e-07, + "loss": 0.2497, + "step": 34030 + }, + { + "epoch": 1.5941818522509017, + "grad_norm": 0.632716813464884, + "learning_rate": 5.206982351677625e-07, + "loss": 0.2781, + "step": 34031 + }, + { + "epoch": 1.5942286972408302, + "grad_norm": 0.61812591138689, + "learning_rate": 5.205823867667948e-07, + "loss": 0.2781, + "step": 34032 + }, + { + "epoch": 1.5942755422307584, + "grad_norm": 0.5786795584087996, + "learning_rate": 5.204665497568787e-07, + "loss": 0.2603, + "step": 34033 + }, + { + "epoch": 1.5943223872206866, + "grad_norm": 0.5799451250281479, + "learning_rate": 5.203507241386796e-07, + "loss": 0.261, + "step": 34034 + }, + { + "epoch": 1.594369232210615, + "grad_norm": 0.5977848287436669, + "learning_rate": 5.202349099128643e-07, + "loss": 0.2671, + "step": 34035 + }, + { + "epoch": 1.5944160772005433, + "grad_norm": 0.5930029641321813, + "learning_rate": 5.201191070800993e-07, + "loss": 0.2652, + "step": 34036 + }, + { + "epoch": 1.5944629221904716, + "grad_norm": 0.5619413882859963, + "learning_rate": 5.200033156410511e-07, + "loss": 0.2493, + "step": 34037 + }, + { + "epoch": 1.5945097671804, + "grad_norm": 0.6024470725208827, + "learning_rate": 5.198875355963865e-07, + "loss": 0.272, + "step": 34038 + }, + { + "epoch": 1.5945566121703285, + "grad_norm": 0.5732289490553566, + "learning_rate": 5.19771766946772e-07, + "loss": 0.2666, + "step": 34039 + }, + { + "epoch": 1.5946034571602568, + "grad_norm": 0.5875880588034713, + "learning_rate": 5.196560096928724e-07, + "loss": 0.2703, + "step": 34040 + }, + { + "epoch": 1.594650302150185, + "grad_norm": 0.605131732702283, + "learning_rate": 5.195402638353555e-07, + "loss": 0.2812, + "step": 34041 + }, + { + "epoch": 1.5946971471401135, + "grad_norm": 0.5846560421493537, + "learning_rate": 5.194245293748859e-07, + "loss": 0.2548, + "step": 34042 + }, + { + "epoch": 1.5947439921300417, + "grad_norm": 0.6021549699931559, + "learning_rate": 5.193088063121304e-07, + "loss": 0.2615, + "step": 34043 + }, + { + "epoch": 1.59479083711997, + "grad_norm": 0.6601653319078495, + "learning_rate": 5.19193094647755e-07, + "loss": 0.278, + "step": 34044 + }, + { + "epoch": 1.5948376821098984, + "grad_norm": 0.5863110070689169, + "learning_rate": 5.190773943824259e-07, + "loss": 0.2609, + "step": 34045 + }, + { + "epoch": 1.5948845270998266, + "grad_norm": 0.5804040375539657, + "learning_rate": 5.189617055168081e-07, + "loss": 0.2524, + "step": 34046 + }, + { + "epoch": 1.5949313720897549, + "grad_norm": 0.6039759304209003, + "learning_rate": 5.188460280515683e-07, + "loss": 0.265, + "step": 34047 + }, + { + "epoch": 1.5949782170796833, + "grad_norm": 0.6110889916909481, + "learning_rate": 5.18730361987371e-07, + "loss": 0.2747, + "step": 34048 + }, + { + "epoch": 1.5950250620696118, + "grad_norm": 0.5825666357490591, + "learning_rate": 5.186147073248826e-07, + "loss": 0.2624, + "step": 34049 + }, + { + "epoch": 1.5950719070595398, + "grad_norm": 0.5645782423527395, + "learning_rate": 5.184990640647683e-07, + "loss": 0.2835, + "step": 34050 + }, + { + "epoch": 1.5951187520494683, + "grad_norm": 0.6132524679023954, + "learning_rate": 5.183834322076947e-07, + "loss": 0.2639, + "step": 34051 + }, + { + "epoch": 1.5951655970393968, + "grad_norm": 0.6136827137338383, + "learning_rate": 5.182678117543258e-07, + "loss": 0.2679, + "step": 34052 + }, + { + "epoch": 1.595212442029325, + "grad_norm": 0.6297370804934815, + "learning_rate": 5.181522027053273e-07, + "loss": 0.2861, + "step": 34053 + }, + { + "epoch": 1.5952592870192532, + "grad_norm": 0.6342686716026, + "learning_rate": 5.180366050613658e-07, + "loss": 0.293, + "step": 34054 + }, + { + "epoch": 1.5953061320091817, + "grad_norm": 0.6116634225156133, + "learning_rate": 5.179210188231045e-07, + "loss": 0.2707, + "step": 34055 + }, + { + "epoch": 1.59535297699911, + "grad_norm": 0.5961999644823891, + "learning_rate": 5.178054439912097e-07, + "loss": 0.2625, + "step": 34056 + }, + { + "epoch": 1.5953998219890382, + "grad_norm": 0.5988737308124271, + "learning_rate": 5.176898805663472e-07, + "loss": 0.2603, + "step": 34057 + }, + { + "epoch": 1.5954466669789666, + "grad_norm": 0.6041359956029397, + "learning_rate": 5.175743285491802e-07, + "loss": 0.2604, + "step": 34058 + }, + { + "epoch": 1.5954935119688949, + "grad_norm": 0.6306771986375111, + "learning_rate": 5.174587879403747e-07, + "loss": 0.282, + "step": 34059 + }, + { + "epoch": 1.5955403569588231, + "grad_norm": 0.5807292065829689, + "learning_rate": 5.173432587405958e-07, + "loss": 0.2719, + "step": 34060 + }, + { + "epoch": 1.5955872019487516, + "grad_norm": 0.6179816367772771, + "learning_rate": 5.172277409505085e-07, + "loss": 0.269, + "step": 34061 + }, + { + "epoch": 1.59563404693868, + "grad_norm": 0.6167566464370771, + "learning_rate": 5.171122345707769e-07, + "loss": 0.2699, + "step": 34062 + }, + { + "epoch": 1.595680891928608, + "grad_norm": 0.5677066052511227, + "learning_rate": 5.169967396020664e-07, + "loss": 0.2578, + "step": 34063 + }, + { + "epoch": 1.5957277369185365, + "grad_norm": 0.601509526984389, + "learning_rate": 5.168812560450406e-07, + "loss": 0.2636, + "step": 34064 + }, + { + "epoch": 1.595774581908465, + "grad_norm": 0.6340755499936742, + "learning_rate": 5.16765783900365e-07, + "loss": 0.2635, + "step": 34065 + }, + { + "epoch": 1.5958214268983932, + "grad_norm": 0.5785479973088604, + "learning_rate": 5.166503231687036e-07, + "loss": 0.2743, + "step": 34066 + }, + { + "epoch": 1.5958682718883215, + "grad_norm": 0.562461538338202, + "learning_rate": 5.165348738507212e-07, + "loss": 0.2577, + "step": 34067 + }, + { + "epoch": 1.59591511687825, + "grad_norm": 0.5881054586194544, + "learning_rate": 5.164194359470825e-07, + "loss": 0.2793, + "step": 34068 + }, + { + "epoch": 1.5959619618681782, + "grad_norm": 0.5618235876928608, + "learning_rate": 5.163040094584518e-07, + "loss": 0.263, + "step": 34069 + }, + { + "epoch": 1.5960088068581064, + "grad_norm": 0.6214244205237861, + "learning_rate": 5.16188594385492e-07, + "loss": 0.2868, + "step": 34070 + }, + { + "epoch": 1.5960556518480349, + "grad_norm": 0.6412781092946401, + "learning_rate": 5.160731907288682e-07, + "loss": 0.2814, + "step": 34071 + }, + { + "epoch": 1.5961024968379631, + "grad_norm": 0.6006224517987054, + "learning_rate": 5.159577984892447e-07, + "loss": 0.2731, + "step": 34072 + }, + { + "epoch": 1.5961493418278914, + "grad_norm": 0.6670478712882852, + "learning_rate": 5.158424176672855e-07, + "loss": 0.2922, + "step": 34073 + }, + { + "epoch": 1.5961961868178198, + "grad_norm": 0.5702074775264568, + "learning_rate": 5.157270482636542e-07, + "loss": 0.2644, + "step": 34074 + }, + { + "epoch": 1.5962430318077483, + "grad_norm": 0.596953359983059, + "learning_rate": 5.156116902790159e-07, + "loss": 0.2645, + "step": 34075 + }, + { + "epoch": 1.5962898767976765, + "grad_norm": 0.6147477478589068, + "learning_rate": 5.154963437140334e-07, + "loss": 0.2849, + "step": 34076 + }, + { + "epoch": 1.5963367217876048, + "grad_norm": 0.5894463379096635, + "learning_rate": 5.1538100856937e-07, + "loss": 0.2681, + "step": 34077 + }, + { + "epoch": 1.5963835667775332, + "grad_norm": 0.5636116733278439, + "learning_rate": 5.152656848456902e-07, + "loss": 0.268, + "step": 34078 + }, + { + "epoch": 1.5964304117674615, + "grad_norm": 0.6187522226125762, + "learning_rate": 5.151503725436574e-07, + "loss": 0.2756, + "step": 34079 + }, + { + "epoch": 1.5964772567573897, + "grad_norm": 0.6002392351964134, + "learning_rate": 5.150350716639354e-07, + "loss": 0.2749, + "step": 34080 + }, + { + "epoch": 1.5965241017473182, + "grad_norm": 0.6128727158386584, + "learning_rate": 5.149197822071883e-07, + "loss": 0.2601, + "step": 34081 + }, + { + "epoch": 1.5965709467372464, + "grad_norm": 0.561844211833628, + "learning_rate": 5.148045041740785e-07, + "loss": 0.2701, + "step": 34082 + }, + { + "epoch": 1.5966177917271747, + "grad_norm": 0.5647405741429901, + "learning_rate": 5.146892375652701e-07, + "loss": 0.2466, + "step": 34083 + }, + { + "epoch": 1.5966646367171031, + "grad_norm": 0.5757640098458237, + "learning_rate": 5.145739823814258e-07, + "loss": 0.2642, + "step": 34084 + }, + { + "epoch": 1.5967114817070316, + "grad_norm": 0.6131516747348973, + "learning_rate": 5.144587386232089e-07, + "loss": 0.2675, + "step": 34085 + }, + { + "epoch": 1.5967583266969596, + "grad_norm": 0.6110022285850939, + "learning_rate": 5.14343506291283e-07, + "loss": 0.2784, + "step": 34086 + }, + { + "epoch": 1.596805171686888, + "grad_norm": 0.6215720016915351, + "learning_rate": 5.14228285386312e-07, + "loss": 0.2785, + "step": 34087 + }, + { + "epoch": 1.5968520166768165, + "grad_norm": 0.5913243384383173, + "learning_rate": 5.141130759089574e-07, + "loss": 0.2706, + "step": 34088 + }, + { + "epoch": 1.5968988616667448, + "grad_norm": 0.557214286273495, + "learning_rate": 5.139978778598834e-07, + "loss": 0.2662, + "step": 34089 + }, + { + "epoch": 1.596945706656673, + "grad_norm": 0.5793156901876624, + "learning_rate": 5.138826912397521e-07, + "loss": 0.2692, + "step": 34090 + }, + { + "epoch": 1.5969925516466015, + "grad_norm": 0.6048502636953249, + "learning_rate": 5.137675160492264e-07, + "loss": 0.2676, + "step": 34091 + }, + { + "epoch": 1.5970393966365297, + "grad_norm": 0.5645627290874411, + "learning_rate": 5.136523522889694e-07, + "loss": 0.2599, + "step": 34092 + }, + { + "epoch": 1.597086241626458, + "grad_norm": 0.6130470355831525, + "learning_rate": 5.135371999596447e-07, + "loss": 0.2706, + "step": 34093 + }, + { + "epoch": 1.5971330866163864, + "grad_norm": 0.6489774126008198, + "learning_rate": 5.134220590619135e-07, + "loss": 0.2883, + "step": 34094 + }, + { + "epoch": 1.5971799316063147, + "grad_norm": 0.5445464558673142, + "learning_rate": 5.133069295964391e-07, + "loss": 0.246, + "step": 34095 + }, + { + "epoch": 1.597226776596243, + "grad_norm": 0.6276257275778745, + "learning_rate": 5.131918115638845e-07, + "loss": 0.2656, + "step": 34096 + }, + { + "epoch": 1.5972736215861714, + "grad_norm": 0.6036642155818192, + "learning_rate": 5.13076704964911e-07, + "loss": 0.2728, + "step": 34097 + }, + { + "epoch": 1.5973204665760998, + "grad_norm": 0.5924681442009881, + "learning_rate": 5.129616098001816e-07, + "loss": 0.2529, + "step": 34098 + }, + { + "epoch": 1.5973673115660278, + "grad_norm": 0.5655199010416935, + "learning_rate": 5.128465260703589e-07, + "loss": 0.2586, + "step": 34099 + }, + { + "epoch": 1.5974141565559563, + "grad_norm": 0.6111507440518298, + "learning_rate": 5.127314537761055e-07, + "loss": 0.2772, + "step": 34100 + }, + { + "epoch": 1.5974610015458848, + "grad_norm": 0.5701481511683643, + "learning_rate": 5.126163929180827e-07, + "loss": 0.2592, + "step": 34101 + }, + { + "epoch": 1.597507846535813, + "grad_norm": 0.6117565088988591, + "learning_rate": 5.125013434969528e-07, + "loss": 0.2749, + "step": 34102 + }, + { + "epoch": 1.5975546915257413, + "grad_norm": 0.5800983041102182, + "learning_rate": 5.123863055133788e-07, + "loss": 0.2573, + "step": 34103 + }, + { + "epoch": 1.5976015365156697, + "grad_norm": 0.5680808507853451, + "learning_rate": 5.122712789680215e-07, + "loss": 0.2546, + "step": 34104 + }, + { + "epoch": 1.597648381505598, + "grad_norm": 0.593208263682426, + "learning_rate": 5.121562638615433e-07, + "loss": 0.2614, + "step": 34105 + }, + { + "epoch": 1.5976952264955262, + "grad_norm": 0.5928111477828579, + "learning_rate": 5.120412601946068e-07, + "loss": 0.2627, + "step": 34106 + }, + { + "epoch": 1.5977420714854547, + "grad_norm": 0.6144754812385017, + "learning_rate": 5.119262679678727e-07, + "loss": 0.264, + "step": 34107 + }, + { + "epoch": 1.597788916475383, + "grad_norm": 0.560469767238487, + "learning_rate": 5.11811287182003e-07, + "loss": 0.2666, + "step": 34108 + }, + { + "epoch": 1.5978357614653111, + "grad_norm": 0.5991020990826106, + "learning_rate": 5.1169631783766e-07, + "loss": 0.2719, + "step": 34109 + }, + { + "epoch": 1.5978826064552396, + "grad_norm": 0.6208482691574164, + "learning_rate": 5.115813599355052e-07, + "loss": 0.2785, + "step": 34110 + }, + { + "epoch": 1.597929451445168, + "grad_norm": 0.62040768234667, + "learning_rate": 5.114664134761993e-07, + "loss": 0.2846, + "step": 34111 + }, + { + "epoch": 1.5979762964350963, + "grad_norm": 0.6064312138301571, + "learning_rate": 5.113514784604054e-07, + "loss": 0.272, + "step": 34112 + }, + { + "epoch": 1.5980231414250246, + "grad_norm": 0.6138943929415186, + "learning_rate": 5.112365548887829e-07, + "loss": 0.2744, + "step": 34113 + }, + { + "epoch": 1.598069986414953, + "grad_norm": 0.6059496264315388, + "learning_rate": 5.111216427619941e-07, + "loss": 0.2702, + "step": 34114 + }, + { + "epoch": 1.5981168314048813, + "grad_norm": 0.6170264324220758, + "learning_rate": 5.110067420807008e-07, + "loss": 0.2837, + "step": 34115 + }, + { + "epoch": 1.5981636763948095, + "grad_norm": 0.5785904914002855, + "learning_rate": 5.108918528455634e-07, + "loss": 0.2645, + "step": 34116 + }, + { + "epoch": 1.598210521384738, + "grad_norm": 0.5580982620673691, + "learning_rate": 5.107769750572445e-07, + "loss": 0.2557, + "step": 34117 + }, + { + "epoch": 1.5982573663746662, + "grad_norm": 0.6162666763102974, + "learning_rate": 5.106621087164038e-07, + "loss": 0.2724, + "step": 34118 + }, + { + "epoch": 1.5983042113645944, + "grad_norm": 0.5964743047601242, + "learning_rate": 5.105472538237019e-07, + "loss": 0.2764, + "step": 34119 + }, + { + "epoch": 1.598351056354523, + "grad_norm": 0.6203094766469108, + "learning_rate": 5.104324103798008e-07, + "loss": 0.2822, + "step": 34120 + }, + { + "epoch": 1.5983979013444514, + "grad_norm": 0.6480611971666201, + "learning_rate": 5.103175783853609e-07, + "loss": 0.2823, + "step": 34121 + }, + { + "epoch": 1.5984447463343794, + "grad_norm": 0.6066499260442193, + "learning_rate": 5.102027578410434e-07, + "loss": 0.2692, + "step": 34122 + }, + { + "epoch": 1.5984915913243078, + "grad_norm": 0.5763825532732839, + "learning_rate": 5.100879487475088e-07, + "loss": 0.2582, + "step": 34123 + }, + { + "epoch": 1.5985384363142363, + "grad_norm": 0.6678630218295439, + "learning_rate": 5.099731511054188e-07, + "loss": 0.2756, + "step": 34124 + }, + { + "epoch": 1.5985852813041646, + "grad_norm": 0.6157890946443826, + "learning_rate": 5.098583649154329e-07, + "loss": 0.2682, + "step": 34125 + }, + { + "epoch": 1.5986321262940928, + "grad_norm": 0.6007592673545581, + "learning_rate": 5.097435901782113e-07, + "loss": 0.2659, + "step": 34126 + }, + { + "epoch": 1.5986789712840213, + "grad_norm": 0.5783094185005792, + "learning_rate": 5.096288268944153e-07, + "loss": 0.2812, + "step": 34127 + }, + { + "epoch": 1.5987258162739495, + "grad_norm": 0.6556103638141078, + "learning_rate": 5.095140750647048e-07, + "loss": 0.2874, + "step": 34128 + }, + { + "epoch": 1.5987726612638777, + "grad_norm": 0.6105139331950619, + "learning_rate": 5.093993346897408e-07, + "loss": 0.2726, + "step": 34129 + }, + { + "epoch": 1.5988195062538062, + "grad_norm": 0.5915684067296401, + "learning_rate": 5.092846057701839e-07, + "loss": 0.2895, + "step": 34130 + }, + { + "epoch": 1.5988663512437344, + "grad_norm": 0.6074042131875835, + "learning_rate": 5.091698883066931e-07, + "loss": 0.2689, + "step": 34131 + }, + { + "epoch": 1.5989131962336627, + "grad_norm": 0.5725424905068209, + "learning_rate": 5.090551822999298e-07, + "loss": 0.2706, + "step": 34132 + }, + { + "epoch": 1.5989600412235911, + "grad_norm": 0.6118791492029255, + "learning_rate": 5.08940487750553e-07, + "loss": 0.2637, + "step": 34133 + }, + { + "epoch": 1.5990068862135196, + "grad_norm": 0.5660584556581425, + "learning_rate": 5.088258046592232e-07, + "loss": 0.2512, + "step": 34134 + }, + { + "epoch": 1.5990537312034476, + "grad_norm": 0.6117555858050562, + "learning_rate": 5.087111330266e-07, + "loss": 0.2702, + "step": 34135 + }, + { + "epoch": 1.599100576193376, + "grad_norm": 0.5925230717996549, + "learning_rate": 5.085964728533449e-07, + "loss": 0.2658, + "step": 34136 + }, + { + "epoch": 1.5991474211833046, + "grad_norm": 0.6052712170529342, + "learning_rate": 5.08481824140116e-07, + "loss": 0.2614, + "step": 34137 + }, + { + "epoch": 1.5991942661732328, + "grad_norm": 0.6163807985509796, + "learning_rate": 5.083671868875739e-07, + "loss": 0.2771, + "step": 34138 + }, + { + "epoch": 1.599241111163161, + "grad_norm": 0.5882054714573242, + "learning_rate": 5.082525610963776e-07, + "loss": 0.2638, + "step": 34139 + }, + { + "epoch": 1.5992879561530895, + "grad_norm": 0.6649231570479505, + "learning_rate": 5.081379467671873e-07, + "loss": 0.2895, + "step": 34140 + }, + { + "epoch": 1.5993348011430177, + "grad_norm": 0.5907652720631265, + "learning_rate": 5.080233439006623e-07, + "loss": 0.2591, + "step": 34141 + }, + { + "epoch": 1.599381646132946, + "grad_norm": 0.6320229347935665, + "learning_rate": 5.079087524974632e-07, + "loss": 0.2879, + "step": 34142 + }, + { + "epoch": 1.5994284911228744, + "grad_norm": 0.6060750983851854, + "learning_rate": 5.077941725582477e-07, + "loss": 0.2797, + "step": 34143 + }, + { + "epoch": 1.5994753361128027, + "grad_norm": 0.5714488526669228, + "learning_rate": 5.07679604083676e-07, + "loss": 0.2624, + "step": 34144 + }, + { + "epoch": 1.599522181102731, + "grad_norm": 0.6353920743830173, + "learning_rate": 5.075650470744081e-07, + "loss": 0.2748, + "step": 34145 + }, + { + "epoch": 1.5995690260926594, + "grad_norm": 0.6533442066192034, + "learning_rate": 5.074505015311021e-07, + "loss": 0.2882, + "step": 34146 + }, + { + "epoch": 1.5996158710825878, + "grad_norm": 0.5923800443368882, + "learning_rate": 5.073359674544173e-07, + "loss": 0.2585, + "step": 34147 + }, + { + "epoch": 1.599662716072516, + "grad_norm": 0.5862813875456494, + "learning_rate": 5.072214448450141e-07, + "loss": 0.2518, + "step": 34148 + }, + { + "epoch": 1.5997095610624443, + "grad_norm": 0.6093745725942012, + "learning_rate": 5.071069337035497e-07, + "loss": 0.2881, + "step": 34149 + }, + { + "epoch": 1.5997564060523728, + "grad_norm": 0.5731482168190369, + "learning_rate": 5.069924340306845e-07, + "loss": 0.2692, + "step": 34150 + }, + { + "epoch": 1.599803251042301, + "grad_norm": 0.5736211683257257, + "learning_rate": 5.068779458270764e-07, + "loss": 0.2531, + "step": 34151 + }, + { + "epoch": 1.5998500960322293, + "grad_norm": 0.5996260925708913, + "learning_rate": 5.067634690933856e-07, + "loss": 0.2609, + "step": 34152 + }, + { + "epoch": 1.5998969410221577, + "grad_norm": 0.6271301034272259, + "learning_rate": 5.066490038302696e-07, + "loss": 0.2808, + "step": 34153 + }, + { + "epoch": 1.599943786012086, + "grad_norm": 0.5625453362582323, + "learning_rate": 5.065345500383881e-07, + "loss": 0.2528, + "step": 34154 + }, + { + "epoch": 1.5999906310020142, + "grad_norm": 0.5806954380872786, + "learning_rate": 5.064201077183983e-07, + "loss": 0.2732, + "step": 34155 + }, + { + "epoch": 1.6000374759919427, + "grad_norm": 0.5667133278144679, + "learning_rate": 5.063056768709601e-07, + "loss": 0.267, + "step": 34156 + }, + { + "epoch": 1.6000843209818711, + "grad_norm": 0.6002328794854981, + "learning_rate": 5.061912574967315e-07, + "loss": 0.2843, + "step": 34157 + }, + { + "epoch": 1.6001311659717992, + "grad_norm": 0.5866247221426889, + "learning_rate": 5.060768495963708e-07, + "loss": 0.2651, + "step": 34158 + }, + { + "epoch": 1.6001780109617276, + "grad_norm": 0.6043868120889481, + "learning_rate": 5.059624531705379e-07, + "loss": 0.2544, + "step": 34159 + }, + { + "epoch": 1.600224855951656, + "grad_norm": 0.5806948992710771, + "learning_rate": 5.058480682198893e-07, + "loss": 0.2614, + "step": 34160 + }, + { + "epoch": 1.6002717009415843, + "grad_norm": 0.5738960238467357, + "learning_rate": 5.057336947450836e-07, + "loss": 0.2614, + "step": 34161 + }, + { + "epoch": 1.6003185459315126, + "grad_norm": 0.6092650707717351, + "learning_rate": 5.056193327467793e-07, + "loss": 0.2511, + "step": 34162 + }, + { + "epoch": 1.600365390921441, + "grad_norm": 0.599742892948744, + "learning_rate": 5.055049822256342e-07, + "loss": 0.2599, + "step": 34163 + }, + { + "epoch": 1.6004122359113693, + "grad_norm": 0.6419887899940314, + "learning_rate": 5.053906431823066e-07, + "loss": 0.2834, + "step": 34164 + }, + { + "epoch": 1.6004590809012975, + "grad_norm": 0.6081128221862497, + "learning_rate": 5.052763156174547e-07, + "loss": 0.2592, + "step": 34165 + }, + { + "epoch": 1.600505925891226, + "grad_norm": 0.568970319215392, + "learning_rate": 5.05161999531737e-07, + "loss": 0.2611, + "step": 34166 + }, + { + "epoch": 1.6005527708811542, + "grad_norm": 0.6586001407402063, + "learning_rate": 5.050476949258104e-07, + "loss": 0.2912, + "step": 34167 + }, + { + "epoch": 1.6005996158710825, + "grad_norm": 0.6416535364316329, + "learning_rate": 5.049334018003324e-07, + "loss": 0.2773, + "step": 34168 + }, + { + "epoch": 1.600646460861011, + "grad_norm": 0.5797268628241153, + "learning_rate": 5.04819120155961e-07, + "loss": 0.2598, + "step": 34169 + }, + { + "epoch": 1.6006933058509394, + "grad_norm": 0.6006532120248316, + "learning_rate": 5.047048499933543e-07, + "loss": 0.2768, + "step": 34170 + }, + { + "epoch": 1.6007401508408674, + "grad_norm": 0.6385445880537148, + "learning_rate": 5.045905913131696e-07, + "loss": 0.2686, + "step": 34171 + }, + { + "epoch": 1.6007869958307959, + "grad_norm": 0.6638640813490906, + "learning_rate": 5.044763441160646e-07, + "loss": 0.2831, + "step": 34172 + }, + { + "epoch": 1.6008338408207243, + "grad_norm": 0.5573358764559159, + "learning_rate": 5.043621084026973e-07, + "loss": 0.2621, + "step": 34173 + }, + { + "epoch": 1.6008806858106526, + "grad_norm": 0.5747509220589184, + "learning_rate": 5.042478841737245e-07, + "loss": 0.2745, + "step": 34174 + }, + { + "epoch": 1.6009275308005808, + "grad_norm": 0.6203684768256286, + "learning_rate": 5.041336714298029e-07, + "loss": 0.281, + "step": 34175 + }, + { + "epoch": 1.6009743757905093, + "grad_norm": 0.6152118544064585, + "learning_rate": 5.040194701715903e-07, + "loss": 0.2945, + "step": 34176 + }, + { + "epoch": 1.6010212207804375, + "grad_norm": 0.61586826408839, + "learning_rate": 5.039052803997441e-07, + "loss": 0.2798, + "step": 34177 + }, + { + "epoch": 1.6010680657703658, + "grad_norm": 0.6341213037389216, + "learning_rate": 5.037911021149212e-07, + "loss": 0.2776, + "step": 34178 + }, + { + "epoch": 1.6011149107602942, + "grad_norm": 0.6256147289052907, + "learning_rate": 5.036769353177793e-07, + "loss": 0.2896, + "step": 34179 + }, + { + "epoch": 1.6011617557502225, + "grad_norm": 0.6012421893659194, + "learning_rate": 5.035627800089743e-07, + "loss": 0.2752, + "step": 34180 + }, + { + "epoch": 1.6012086007401507, + "grad_norm": 0.6000123903736002, + "learning_rate": 5.034486361891644e-07, + "loss": 0.2761, + "step": 34181 + }, + { + "epoch": 1.6012554457300792, + "grad_norm": 0.5903289343534278, + "learning_rate": 5.033345038590054e-07, + "loss": 0.2672, + "step": 34182 + }, + { + "epoch": 1.6013022907200076, + "grad_norm": 0.5670433334800454, + "learning_rate": 5.03220383019154e-07, + "loss": 0.2678, + "step": 34183 + }, + { + "epoch": 1.6013491357099359, + "grad_norm": 0.5973975475482839, + "learning_rate": 5.031062736702677e-07, + "loss": 0.2717, + "step": 34184 + }, + { + "epoch": 1.601395980699864, + "grad_norm": 0.6275254561779214, + "learning_rate": 5.029921758130035e-07, + "loss": 0.2696, + "step": 34185 + }, + { + "epoch": 1.6014428256897926, + "grad_norm": 0.5811499713538623, + "learning_rate": 5.028780894480167e-07, + "loss": 0.2651, + "step": 34186 + }, + { + "epoch": 1.6014896706797208, + "grad_norm": 0.5639567218362619, + "learning_rate": 5.027640145759654e-07, + "loss": 0.2678, + "step": 34187 + }, + { + "epoch": 1.601536515669649, + "grad_norm": 0.5625908094688239, + "learning_rate": 5.026499511975045e-07, + "loss": 0.2602, + "step": 34188 + }, + { + "epoch": 1.6015833606595775, + "grad_norm": 0.6506968680602722, + "learning_rate": 5.025358993132909e-07, + "loss": 0.2897, + "step": 34189 + }, + { + "epoch": 1.6016302056495058, + "grad_norm": 0.590945866543796, + "learning_rate": 5.024218589239813e-07, + "loss": 0.2835, + "step": 34190 + }, + { + "epoch": 1.601677050639434, + "grad_norm": 0.6343294568806735, + "learning_rate": 5.023078300302326e-07, + "loss": 0.2812, + "step": 34191 + }, + { + "epoch": 1.6017238956293625, + "grad_norm": 0.5585570295269423, + "learning_rate": 5.021938126326994e-07, + "loss": 0.2534, + "step": 34192 + }, + { + "epoch": 1.601770740619291, + "grad_norm": 0.603961264728167, + "learning_rate": 5.020798067320387e-07, + "loss": 0.2697, + "step": 34193 + }, + { + "epoch": 1.601817585609219, + "grad_norm": 0.608146031114681, + "learning_rate": 5.019658123289073e-07, + "loss": 0.2643, + "step": 34194 + }, + { + "epoch": 1.6018644305991474, + "grad_norm": 0.521389337646337, + "learning_rate": 5.018518294239597e-07, + "loss": 0.2482, + "step": 34195 + }, + { + "epoch": 1.6019112755890759, + "grad_norm": 0.6091940514866839, + "learning_rate": 5.017378580178528e-07, + "loss": 0.2682, + "step": 34196 + }, + { + "epoch": 1.601958120579004, + "grad_norm": 0.5808981811071997, + "learning_rate": 5.016238981112426e-07, + "loss": 0.2649, + "step": 34197 + }, + { + "epoch": 1.6020049655689323, + "grad_norm": 0.5715174610308539, + "learning_rate": 5.015099497047843e-07, + "loss": 0.2533, + "step": 34198 + }, + { + "epoch": 1.6020518105588608, + "grad_norm": 0.6231601079110448, + "learning_rate": 5.013960127991338e-07, + "loss": 0.2879, + "step": 34199 + }, + { + "epoch": 1.602098655548789, + "grad_norm": 0.5998988822988134, + "learning_rate": 5.012820873949467e-07, + "loss": 0.2835, + "step": 34200 + }, + { + "epoch": 1.6021455005387173, + "grad_norm": 0.5666934194527156, + "learning_rate": 5.011681734928797e-07, + "loss": 0.2562, + "step": 34201 + }, + { + "epoch": 1.6021923455286458, + "grad_norm": 0.6200703789499082, + "learning_rate": 5.010542710935867e-07, + "loss": 0.2736, + "step": 34202 + }, + { + "epoch": 1.602239190518574, + "grad_norm": 0.5637380312628364, + "learning_rate": 5.009403801977247e-07, + "loss": 0.2613, + "step": 34203 + }, + { + "epoch": 1.6022860355085022, + "grad_norm": 0.6058057652647058, + "learning_rate": 5.00826500805948e-07, + "loss": 0.2711, + "step": 34204 + }, + { + "epoch": 1.6023328804984307, + "grad_norm": 0.6418880996084321, + "learning_rate": 5.007126329189119e-07, + "loss": 0.2846, + "step": 34205 + }, + { + "epoch": 1.6023797254883592, + "grad_norm": 0.6423183219151503, + "learning_rate": 5.005987765372722e-07, + "loss": 0.2693, + "step": 34206 + }, + { + "epoch": 1.6024265704782872, + "grad_norm": 0.571799716128048, + "learning_rate": 5.004849316616839e-07, + "loss": 0.2669, + "step": 34207 + }, + { + "epoch": 1.6024734154682156, + "grad_norm": 0.5903416252367187, + "learning_rate": 5.003710982928031e-07, + "loss": 0.2688, + "step": 34208 + }, + { + "epoch": 1.602520260458144, + "grad_norm": 0.6059702408710438, + "learning_rate": 5.002572764312841e-07, + "loss": 0.2542, + "step": 34209 + }, + { + "epoch": 1.6025671054480723, + "grad_norm": 0.6468291092280487, + "learning_rate": 5.001434660777809e-07, + "loss": 0.2832, + "step": 34210 + }, + { + "epoch": 1.6026139504380006, + "grad_norm": 0.592688062147597, + "learning_rate": 5.000296672329496e-07, + "loss": 0.266, + "step": 34211 + }, + { + "epoch": 1.602660795427929, + "grad_norm": 0.6152174010254504, + "learning_rate": 4.999158798974449e-07, + "loss": 0.2847, + "step": 34212 + }, + { + "epoch": 1.6027076404178573, + "grad_norm": 0.5964102192346744, + "learning_rate": 4.998021040719218e-07, + "loss": 0.2677, + "step": 34213 + }, + { + "epoch": 1.6027544854077855, + "grad_norm": 0.5866843755215881, + "learning_rate": 4.996883397570345e-07, + "loss": 0.2557, + "step": 34214 + }, + { + "epoch": 1.602801330397714, + "grad_norm": 0.5516009596109726, + "learning_rate": 4.995745869534388e-07, + "loss": 0.2513, + "step": 34215 + }, + { + "epoch": 1.6028481753876422, + "grad_norm": 0.582809591885351, + "learning_rate": 4.994608456617888e-07, + "loss": 0.2548, + "step": 34216 + }, + { + "epoch": 1.6028950203775705, + "grad_norm": 0.603348947236136, + "learning_rate": 4.993471158827379e-07, + "loss": 0.2772, + "step": 34217 + }, + { + "epoch": 1.602941865367499, + "grad_norm": 0.6121712845013138, + "learning_rate": 4.992333976169417e-07, + "loss": 0.2781, + "step": 34218 + }, + { + "epoch": 1.6029887103574274, + "grad_norm": 0.563711949571632, + "learning_rate": 4.991196908650542e-07, + "loss": 0.2533, + "step": 34219 + }, + { + "epoch": 1.6030355553473556, + "grad_norm": 0.6005214407474546, + "learning_rate": 4.990059956277301e-07, + "loss": 0.2665, + "step": 34220 + }, + { + "epoch": 1.6030824003372839, + "grad_norm": 0.591504247735999, + "learning_rate": 4.988923119056241e-07, + "loss": 0.2738, + "step": 34221 + }, + { + "epoch": 1.6031292453272123, + "grad_norm": 0.6359574656211681, + "learning_rate": 4.987786396993893e-07, + "loss": 0.2746, + "step": 34222 + }, + { + "epoch": 1.6031760903171406, + "grad_norm": 0.5729189490399723, + "learning_rate": 4.986649790096812e-07, + "loss": 0.2672, + "step": 34223 + }, + { + "epoch": 1.6032229353070688, + "grad_norm": 0.5677188201826455, + "learning_rate": 4.985513298371524e-07, + "loss": 0.2589, + "step": 34224 + }, + { + "epoch": 1.6032697802969973, + "grad_norm": 0.5888892721442149, + "learning_rate": 4.984376921824577e-07, + "loss": 0.2742, + "step": 34225 + }, + { + "epoch": 1.6033166252869255, + "grad_norm": 0.638126634603089, + "learning_rate": 4.98324066046251e-07, + "loss": 0.2655, + "step": 34226 + }, + { + "epoch": 1.6033634702768538, + "grad_norm": 0.6058892096694142, + "learning_rate": 4.982104514291869e-07, + "loss": 0.2746, + "step": 34227 + }, + { + "epoch": 1.6034103152667822, + "grad_norm": 0.6371109628625169, + "learning_rate": 4.980968483319176e-07, + "loss": 0.28, + "step": 34228 + }, + { + "epoch": 1.6034571602567107, + "grad_norm": 0.6154947046646077, + "learning_rate": 4.979832567550988e-07, + "loss": 0.2721, + "step": 34229 + }, + { + "epoch": 1.6035040052466387, + "grad_norm": 0.617343790724848, + "learning_rate": 4.978696766993823e-07, + "loss": 0.2666, + "step": 34230 + }, + { + "epoch": 1.6035508502365672, + "grad_norm": 0.6185987763019727, + "learning_rate": 4.977561081654225e-07, + "loss": 0.2697, + "step": 34231 + }, + { + "epoch": 1.6035976952264956, + "grad_norm": 0.5630225373799013, + "learning_rate": 4.976425511538733e-07, + "loss": 0.2514, + "step": 34232 + }, + { + "epoch": 1.6036445402164239, + "grad_norm": 0.6191933475811776, + "learning_rate": 4.975290056653884e-07, + "loss": 0.2837, + "step": 34233 + }, + { + "epoch": 1.6036913852063521, + "grad_norm": 0.6783812681319294, + "learning_rate": 4.974154717006202e-07, + "loss": 0.2846, + "step": 34234 + }, + { + "epoch": 1.6037382301962806, + "grad_norm": 0.595287950503938, + "learning_rate": 4.973019492602227e-07, + "loss": 0.279, + "step": 34235 + }, + { + "epoch": 1.6037850751862088, + "grad_norm": 0.5653680103791048, + "learning_rate": 4.971884383448497e-07, + "loss": 0.2609, + "step": 34236 + }, + { + "epoch": 1.603831920176137, + "grad_norm": 0.5691665790363505, + "learning_rate": 4.970749389551532e-07, + "loss": 0.2704, + "step": 34237 + }, + { + "epoch": 1.6038787651660655, + "grad_norm": 0.6143168441507827, + "learning_rate": 4.969614510917869e-07, + "loss": 0.279, + "step": 34238 + }, + { + "epoch": 1.6039256101559938, + "grad_norm": 0.6376411789796588, + "learning_rate": 4.968479747554042e-07, + "loss": 0.2777, + "step": 34239 + }, + { + "epoch": 1.603972455145922, + "grad_norm": 0.6194229240642736, + "learning_rate": 4.967345099466583e-07, + "loss": 0.2713, + "step": 34240 + }, + { + "epoch": 1.6040193001358505, + "grad_norm": 0.6330496148248963, + "learning_rate": 4.966210566662013e-07, + "loss": 0.2711, + "step": 34241 + }, + { + "epoch": 1.604066145125779, + "grad_norm": 0.5921923450197718, + "learning_rate": 4.965076149146867e-07, + "loss": 0.2638, + "step": 34242 + }, + { + "epoch": 1.604112990115707, + "grad_norm": 0.6651418943096531, + "learning_rate": 4.963941846927678e-07, + "loss": 0.292, + "step": 34243 + }, + { + "epoch": 1.6041598351056354, + "grad_norm": 0.6307104727435598, + "learning_rate": 4.962807660010963e-07, + "loss": 0.2594, + "step": 34244 + }, + { + "epoch": 1.6042066800955639, + "grad_norm": 0.6064602099704826, + "learning_rate": 4.961673588403252e-07, + "loss": 0.2744, + "step": 34245 + }, + { + "epoch": 1.6042535250854921, + "grad_norm": 0.5989951371007431, + "learning_rate": 4.960539632111078e-07, + "loss": 0.2682, + "step": 34246 + }, + { + "epoch": 1.6043003700754204, + "grad_norm": 0.618968013473369, + "learning_rate": 4.959405791140956e-07, + "loss": 0.2624, + "step": 34247 + }, + { + "epoch": 1.6043472150653488, + "grad_norm": 0.6232983905908569, + "learning_rate": 4.958272065499417e-07, + "loss": 0.282, + "step": 34248 + }, + { + "epoch": 1.604394060055277, + "grad_norm": 0.5917519891087761, + "learning_rate": 4.957138455192986e-07, + "loss": 0.2661, + "step": 34249 + }, + { + "epoch": 1.6044409050452053, + "grad_norm": 0.6235586122615348, + "learning_rate": 4.956004960228191e-07, + "loss": 0.2741, + "step": 34250 + }, + { + "epoch": 1.6044877500351338, + "grad_norm": 0.568983031588924, + "learning_rate": 4.954871580611545e-07, + "loss": 0.2538, + "step": 34251 + }, + { + "epoch": 1.604534595025062, + "grad_norm": 0.5675604927446005, + "learning_rate": 4.953738316349579e-07, + "loss": 0.2665, + "step": 34252 + }, + { + "epoch": 1.6045814400149903, + "grad_norm": 0.5860039364265145, + "learning_rate": 4.952605167448806e-07, + "loss": 0.2639, + "step": 34253 + }, + { + "epoch": 1.6046282850049187, + "grad_norm": 0.5974181166395642, + "learning_rate": 4.95147213391575e-07, + "loss": 0.265, + "step": 34254 + }, + { + "epoch": 1.6046751299948472, + "grad_norm": 0.5858937835432976, + "learning_rate": 4.950339215756933e-07, + "loss": 0.279, + "step": 34255 + }, + { + "epoch": 1.6047219749847754, + "grad_norm": 0.5659677479195789, + "learning_rate": 4.949206412978874e-07, + "loss": 0.273, + "step": 34256 + }, + { + "epoch": 1.6047688199747037, + "grad_norm": 0.6494184639239546, + "learning_rate": 4.948073725588102e-07, + "loss": 0.2884, + "step": 34257 + }, + { + "epoch": 1.6048156649646321, + "grad_norm": 0.5753215841714374, + "learning_rate": 4.946941153591123e-07, + "loss": 0.266, + "step": 34258 + }, + { + "epoch": 1.6048625099545604, + "grad_norm": 0.5878251452201215, + "learning_rate": 4.94580869699445e-07, + "loss": 0.2811, + "step": 34259 + }, + { + "epoch": 1.6049093549444886, + "grad_norm": 0.5754231256773146, + "learning_rate": 4.944676355804612e-07, + "loss": 0.2666, + "step": 34260 + }, + { + "epoch": 1.604956199934417, + "grad_norm": 0.5962757404104094, + "learning_rate": 4.943544130028116e-07, + "loss": 0.2789, + "step": 34261 + }, + { + "epoch": 1.6050030449243453, + "grad_norm": 0.5827491554205438, + "learning_rate": 4.942412019671486e-07, + "loss": 0.2727, + "step": 34262 + }, + { + "epoch": 1.6050498899142736, + "grad_norm": 0.6070386306909048, + "learning_rate": 4.941280024741232e-07, + "loss": 0.2749, + "step": 34263 + }, + { + "epoch": 1.605096734904202, + "grad_norm": 0.5749791260217227, + "learning_rate": 4.940148145243875e-07, + "loss": 0.2738, + "step": 34264 + }, + { + "epoch": 1.6051435798941305, + "grad_norm": 0.5841771321229431, + "learning_rate": 4.939016381185926e-07, + "loss": 0.2617, + "step": 34265 + }, + { + "epoch": 1.6051904248840585, + "grad_norm": 0.5845300359032052, + "learning_rate": 4.937884732573889e-07, + "loss": 0.2671, + "step": 34266 + }, + { + "epoch": 1.605237269873987, + "grad_norm": 0.5972258535037342, + "learning_rate": 4.936753199414282e-07, + "loss": 0.277, + "step": 34267 + }, + { + "epoch": 1.6052841148639154, + "grad_norm": 0.6014767236509235, + "learning_rate": 4.935621781713618e-07, + "loss": 0.2662, + "step": 34268 + }, + { + "epoch": 1.6053309598538437, + "grad_norm": 0.6361391711539498, + "learning_rate": 4.934490479478407e-07, + "loss": 0.2802, + "step": 34269 + }, + { + "epoch": 1.605377804843772, + "grad_norm": 0.5462853638883375, + "learning_rate": 4.933359292715167e-07, + "loss": 0.275, + "step": 34270 + }, + { + "epoch": 1.6054246498337004, + "grad_norm": 0.6985048009727951, + "learning_rate": 4.932228221430394e-07, + "loss": 0.2974, + "step": 34271 + }, + { + "epoch": 1.6054714948236286, + "grad_norm": 0.6274076306370571, + "learning_rate": 4.93109726563061e-07, + "loss": 0.2802, + "step": 34272 + }, + { + "epoch": 1.6055183398135568, + "grad_norm": 0.6321541394508898, + "learning_rate": 4.92996642532231e-07, + "loss": 0.2812, + "step": 34273 + }, + { + "epoch": 1.6055651848034853, + "grad_norm": 0.5955448133033231, + "learning_rate": 4.928835700512011e-07, + "loss": 0.2758, + "step": 34274 + }, + { + "epoch": 1.6056120297934136, + "grad_norm": 0.6276116359464711, + "learning_rate": 4.927705091206217e-07, + "loss": 0.2801, + "step": 34275 + }, + { + "epoch": 1.6056588747833418, + "grad_norm": 0.5872268630604252, + "learning_rate": 4.926574597411443e-07, + "loss": 0.2928, + "step": 34276 + }, + { + "epoch": 1.6057057197732703, + "grad_norm": 0.572174051243345, + "learning_rate": 4.925444219134179e-07, + "loss": 0.2636, + "step": 34277 + }, + { + "epoch": 1.6057525647631987, + "grad_norm": 0.6386997384546487, + "learning_rate": 4.924313956380944e-07, + "loss": 0.2967, + "step": 34278 + }, + { + "epoch": 1.6057994097531267, + "grad_norm": 0.6441258350309594, + "learning_rate": 4.92318380915823e-07, + "loss": 0.2862, + "step": 34279 + }, + { + "epoch": 1.6058462547430552, + "grad_norm": 0.6186602929986996, + "learning_rate": 4.922053777472546e-07, + "loss": 0.2687, + "step": 34280 + }, + { + "epoch": 1.6058930997329837, + "grad_norm": 0.6150579781517628, + "learning_rate": 4.920923861330398e-07, + "loss": 0.2915, + "step": 34281 + }, + { + "epoch": 1.605939944722912, + "grad_norm": 0.5542532339340162, + "learning_rate": 4.919794060738292e-07, + "loss": 0.2523, + "step": 34282 + }, + { + "epoch": 1.6059867897128401, + "grad_norm": 0.6294828998590426, + "learning_rate": 4.918664375702717e-07, + "loss": 0.2793, + "step": 34283 + }, + { + "epoch": 1.6060336347027686, + "grad_norm": 0.5799903618230534, + "learning_rate": 4.917534806230184e-07, + "loss": 0.2611, + "step": 34284 + }, + { + "epoch": 1.6060804796926968, + "grad_norm": 0.5801962867992231, + "learning_rate": 4.916405352327194e-07, + "loss": 0.2715, + "step": 34285 + }, + { + "epoch": 1.606127324682625, + "grad_norm": 0.5816980582324489, + "learning_rate": 4.91527601400024e-07, + "loss": 0.2758, + "step": 34286 + }, + { + "epoch": 1.6061741696725536, + "grad_norm": 0.6296049139631759, + "learning_rate": 4.914146791255822e-07, + "loss": 0.2822, + "step": 34287 + }, + { + "epoch": 1.6062210146624818, + "grad_norm": 0.5882940725450155, + "learning_rate": 4.913017684100449e-07, + "loss": 0.2654, + "step": 34288 + }, + { + "epoch": 1.60626785965241, + "grad_norm": 0.6267491592078736, + "learning_rate": 4.911888692540604e-07, + "loss": 0.271, + "step": 34289 + }, + { + "epoch": 1.6063147046423385, + "grad_norm": 0.611029809660125, + "learning_rate": 4.910759816582788e-07, + "loss": 0.2745, + "step": 34290 + }, + { + "epoch": 1.606361549632267, + "grad_norm": 0.5829860173714176, + "learning_rate": 4.909631056233502e-07, + "loss": 0.2754, + "step": 34291 + }, + { + "epoch": 1.6064083946221952, + "grad_norm": 0.5924173369164342, + "learning_rate": 4.908502411499247e-07, + "loss": 0.2641, + "step": 34292 + }, + { + "epoch": 1.6064552396121234, + "grad_norm": 0.5698111379430942, + "learning_rate": 4.907373882386502e-07, + "loss": 0.2693, + "step": 34293 + }, + { + "epoch": 1.606502084602052, + "grad_norm": 0.5518717505709207, + "learning_rate": 4.906245468901776e-07, + "loss": 0.2624, + "step": 34294 + }, + { + "epoch": 1.6065489295919801, + "grad_norm": 0.5705150488922778, + "learning_rate": 4.905117171051555e-07, + "loss": 0.2532, + "step": 34295 + }, + { + "epoch": 1.6065957745819084, + "grad_norm": 0.6392238279849646, + "learning_rate": 4.903988988842332e-07, + "loss": 0.2942, + "step": 34296 + }, + { + "epoch": 1.6066426195718368, + "grad_norm": 0.6046476459698537, + "learning_rate": 4.9028609222806e-07, + "loss": 0.2868, + "step": 34297 + }, + { + "epoch": 1.606689464561765, + "grad_norm": 0.5869049642179527, + "learning_rate": 4.901732971372852e-07, + "loss": 0.2603, + "step": 34298 + }, + { + "epoch": 1.6067363095516933, + "grad_norm": 0.6268984826838131, + "learning_rate": 4.900605136125589e-07, + "loss": 0.278, + "step": 34299 + }, + { + "epoch": 1.6067831545416218, + "grad_norm": 0.5650175165648486, + "learning_rate": 4.89947741654529e-07, + "loss": 0.2479, + "step": 34300 + }, + { + "epoch": 1.6068299995315503, + "grad_norm": 0.6272524997311754, + "learning_rate": 4.898349812638439e-07, + "loss": 0.2629, + "step": 34301 + }, + { + "epoch": 1.6068768445214783, + "grad_norm": 0.6006217066555637, + "learning_rate": 4.897222324411532e-07, + "loss": 0.2803, + "step": 34302 + }, + { + "epoch": 1.6069236895114067, + "grad_norm": 0.5424268720226391, + "learning_rate": 4.896094951871058e-07, + "loss": 0.2472, + "step": 34303 + }, + { + "epoch": 1.6069705345013352, + "grad_norm": 0.6006548204865029, + "learning_rate": 4.894967695023506e-07, + "loss": 0.2619, + "step": 34304 + }, + { + "epoch": 1.6070173794912634, + "grad_norm": 0.5760397345482259, + "learning_rate": 4.893840553875362e-07, + "loss": 0.28, + "step": 34305 + }, + { + "epoch": 1.6070642244811917, + "grad_norm": 0.6071995855142814, + "learning_rate": 4.892713528433116e-07, + "loss": 0.27, + "step": 34306 + }, + { + "epoch": 1.6071110694711201, + "grad_norm": 0.6312160272762769, + "learning_rate": 4.891586618703254e-07, + "loss": 0.2732, + "step": 34307 + }, + { + "epoch": 1.6071579144610484, + "grad_norm": 0.6331529675386592, + "learning_rate": 4.890459824692245e-07, + "loss": 0.2738, + "step": 34308 + }, + { + "epoch": 1.6072047594509766, + "grad_norm": 0.5944036684089542, + "learning_rate": 4.889333146406589e-07, + "loss": 0.2854, + "step": 34309 + }, + { + "epoch": 1.607251604440905, + "grad_norm": 0.6020033541314331, + "learning_rate": 4.888206583852767e-07, + "loss": 0.2729, + "step": 34310 + }, + { + "epoch": 1.6072984494308333, + "grad_norm": 0.5812794557020291, + "learning_rate": 4.887080137037259e-07, + "loss": 0.2675, + "step": 34311 + }, + { + "epoch": 1.6073452944207616, + "grad_norm": 0.6315601358800557, + "learning_rate": 4.885953805966557e-07, + "loss": 0.2844, + "step": 34312 + }, + { + "epoch": 1.60739213941069, + "grad_norm": 0.5894382888428437, + "learning_rate": 4.884827590647129e-07, + "loss": 0.265, + "step": 34313 + }, + { + "epoch": 1.6074389844006185, + "grad_norm": 0.5872666270114727, + "learning_rate": 4.883701491085468e-07, + "loss": 0.2617, + "step": 34314 + }, + { + "epoch": 1.6074858293905465, + "grad_norm": 0.6194031024964152, + "learning_rate": 4.882575507288043e-07, + "loss": 0.2749, + "step": 34315 + }, + { + "epoch": 1.607532674380475, + "grad_norm": 0.5911190481787736, + "learning_rate": 4.88144963926134e-07, + "loss": 0.2576, + "step": 34316 + }, + { + "epoch": 1.6075795193704034, + "grad_norm": 0.6329440001000411, + "learning_rate": 4.880323887011837e-07, + "loss": 0.2771, + "step": 34317 + }, + { + "epoch": 1.6076263643603317, + "grad_norm": 0.5447153329533676, + "learning_rate": 4.879198250546014e-07, + "loss": 0.2656, + "step": 34318 + }, + { + "epoch": 1.60767320935026, + "grad_norm": 0.6252799618965355, + "learning_rate": 4.878072729870353e-07, + "loss": 0.2779, + "step": 34319 + }, + { + "epoch": 1.6077200543401884, + "grad_norm": 0.5604878426225505, + "learning_rate": 4.876947324991321e-07, + "loss": 0.2657, + "step": 34320 + }, + { + "epoch": 1.6077668993301166, + "grad_norm": 0.5628084709980442, + "learning_rate": 4.875822035915406e-07, + "loss": 0.2788, + "step": 34321 + }, + { + "epoch": 1.6078137443200449, + "grad_norm": 0.6349978099559371, + "learning_rate": 4.874696862649069e-07, + "loss": 0.2849, + "step": 34322 + }, + { + "epoch": 1.6078605893099733, + "grad_norm": 0.5945216163131727, + "learning_rate": 4.873571805198793e-07, + "loss": 0.2729, + "step": 34323 + }, + { + "epoch": 1.6079074342999016, + "grad_norm": 0.5921619253823178, + "learning_rate": 4.872446863571054e-07, + "loss": 0.2726, + "step": 34324 + }, + { + "epoch": 1.6079542792898298, + "grad_norm": 0.6278105999604601, + "learning_rate": 4.871322037772333e-07, + "loss": 0.2993, + "step": 34325 + }, + { + "epoch": 1.6080011242797583, + "grad_norm": 0.5518775706257707, + "learning_rate": 4.870197327809084e-07, + "loss": 0.2539, + "step": 34326 + }, + { + "epoch": 1.6080479692696867, + "grad_norm": 0.5892770067662079, + "learning_rate": 4.869072733687799e-07, + "loss": 0.2574, + "step": 34327 + }, + { + "epoch": 1.608094814259615, + "grad_norm": 0.600004444202427, + "learning_rate": 4.867948255414934e-07, + "loss": 0.2804, + "step": 34328 + }, + { + "epoch": 1.6081416592495432, + "grad_norm": 0.5858811701670971, + "learning_rate": 4.866823892996967e-07, + "loss": 0.2693, + "step": 34329 + }, + { + "epoch": 1.6081885042394717, + "grad_norm": 0.585105529189851, + "learning_rate": 4.865699646440367e-07, + "loss": 0.2608, + "step": 34330 + }, + { + "epoch": 1.6082353492294, + "grad_norm": 0.6182062433230459, + "learning_rate": 4.86457551575161e-07, + "loss": 0.2691, + "step": 34331 + }, + { + "epoch": 1.6082821942193282, + "grad_norm": 1.4000693861584639, + "learning_rate": 4.863451500937155e-07, + "loss": 0.2887, + "step": 34332 + }, + { + "epoch": 1.6083290392092566, + "grad_norm": 0.625137729868225, + "learning_rate": 4.862327602003478e-07, + "loss": 0.2699, + "step": 34333 + }, + { + "epoch": 1.6083758841991849, + "grad_norm": 0.5935522733908635, + "learning_rate": 4.861203818957048e-07, + "loss": 0.2662, + "step": 34334 + }, + { + "epoch": 1.608422729189113, + "grad_norm": 0.5924650715953653, + "learning_rate": 4.860080151804323e-07, + "loss": 0.2673, + "step": 34335 + }, + { + "epoch": 1.6084695741790416, + "grad_norm": 0.5860677205028391, + "learning_rate": 4.858956600551773e-07, + "loss": 0.2701, + "step": 34336 + }, + { + "epoch": 1.60851641916897, + "grad_norm": 0.5787908978737527, + "learning_rate": 4.857833165205875e-07, + "loss": 0.2786, + "step": 34337 + }, + { + "epoch": 1.608563264158898, + "grad_norm": 0.5900693374605317, + "learning_rate": 4.856709845773075e-07, + "loss": 0.2757, + "step": 34338 + }, + { + "epoch": 1.6086101091488265, + "grad_norm": 0.5783209476788466, + "learning_rate": 4.855586642259849e-07, + "loss": 0.2497, + "step": 34339 + }, + { + "epoch": 1.608656954138755, + "grad_norm": 0.5920870623323375, + "learning_rate": 4.854463554672659e-07, + "loss": 0.2758, + "step": 34340 + }, + { + "epoch": 1.6087037991286832, + "grad_norm": 0.5767309736780103, + "learning_rate": 4.853340583017973e-07, + "loss": 0.2751, + "step": 34341 + }, + { + "epoch": 1.6087506441186115, + "grad_norm": 0.6196710481630868, + "learning_rate": 4.852217727302242e-07, + "loss": 0.2606, + "step": 34342 + }, + { + "epoch": 1.60879748910854, + "grad_norm": 0.5688612088701083, + "learning_rate": 4.85109498753194e-07, + "loss": 0.2583, + "step": 34343 + }, + { + "epoch": 1.6088443340984682, + "grad_norm": 0.5816509089005575, + "learning_rate": 4.849972363713518e-07, + "loss": 0.26, + "step": 34344 + }, + { + "epoch": 1.6088911790883964, + "grad_norm": 0.5694977188207753, + "learning_rate": 4.848849855853438e-07, + "loss": 0.2581, + "step": 34345 + }, + { + "epoch": 1.6089380240783249, + "grad_norm": 0.5917081840569528, + "learning_rate": 4.847727463958163e-07, + "loss": 0.2661, + "step": 34346 + }, + { + "epoch": 1.608984869068253, + "grad_norm": 0.5942562164192168, + "learning_rate": 4.846605188034151e-07, + "loss": 0.2721, + "step": 34347 + }, + { + "epoch": 1.6090317140581814, + "grad_norm": 0.579711791081355, + "learning_rate": 4.845483028087869e-07, + "loss": 0.2651, + "step": 34348 + }, + { + "epoch": 1.6090785590481098, + "grad_norm": 0.6501733541925429, + "learning_rate": 4.844360984125765e-07, + "loss": 0.2856, + "step": 34349 + }, + { + "epoch": 1.6091254040380383, + "grad_norm": 0.6317577572634611, + "learning_rate": 4.843239056154292e-07, + "loss": 0.2846, + "step": 34350 + }, + { + "epoch": 1.6091722490279663, + "grad_norm": 0.6695638824273066, + "learning_rate": 4.842117244179911e-07, + "loss": 0.2862, + "step": 34351 + }, + { + "epoch": 1.6092190940178948, + "grad_norm": 0.5973561905676267, + "learning_rate": 4.840995548209079e-07, + "loss": 0.2851, + "step": 34352 + }, + { + "epoch": 1.6092659390078232, + "grad_norm": 0.6364745191508252, + "learning_rate": 4.839873968248252e-07, + "loss": 0.3006, + "step": 34353 + }, + { + "epoch": 1.6093127839977515, + "grad_norm": 0.5978918757912179, + "learning_rate": 4.838752504303882e-07, + "loss": 0.2692, + "step": 34354 + }, + { + "epoch": 1.6093596289876797, + "grad_norm": 0.6109001596142682, + "learning_rate": 4.83763115638243e-07, + "loss": 0.2589, + "step": 34355 + }, + { + "epoch": 1.6094064739776082, + "grad_norm": 0.6161714593095746, + "learning_rate": 4.836509924490345e-07, + "loss": 0.2726, + "step": 34356 + }, + { + "epoch": 1.6094533189675364, + "grad_norm": 0.5863775238941543, + "learning_rate": 4.83538880863407e-07, + "loss": 0.2702, + "step": 34357 + }, + { + "epoch": 1.6095001639574646, + "grad_norm": 0.5975056734228066, + "learning_rate": 4.834267808820065e-07, + "loss": 0.2673, + "step": 34358 + }, + { + "epoch": 1.609547008947393, + "grad_norm": 0.5935090273900858, + "learning_rate": 4.83314692505478e-07, + "loss": 0.2624, + "step": 34359 + }, + { + "epoch": 1.6095938539373214, + "grad_norm": 0.5806386506662758, + "learning_rate": 4.832026157344663e-07, + "loss": 0.252, + "step": 34360 + }, + { + "epoch": 1.6096406989272496, + "grad_norm": 0.5980853803140257, + "learning_rate": 4.830905505696176e-07, + "loss": 0.2644, + "step": 34361 + }, + { + "epoch": 1.609687543917178, + "grad_norm": 0.5925978069050335, + "learning_rate": 4.82978497011575e-07, + "loss": 0.2689, + "step": 34362 + }, + { + "epoch": 1.6097343889071065, + "grad_norm": 0.6091868609119849, + "learning_rate": 4.828664550609849e-07, + "loss": 0.272, + "step": 34363 + }, + { + "epoch": 1.6097812338970348, + "grad_norm": 0.603624825743742, + "learning_rate": 4.827544247184909e-07, + "loss": 0.2756, + "step": 34364 + }, + { + "epoch": 1.609828078886963, + "grad_norm": 0.5506047665722916, + "learning_rate": 4.826424059847379e-07, + "loss": 0.2497, + "step": 34365 + }, + { + "epoch": 1.6098749238768915, + "grad_norm": 0.6223513008793194, + "learning_rate": 4.82530398860371e-07, + "loss": 0.279, + "step": 34366 + }, + { + "epoch": 1.6099217688668197, + "grad_norm": 0.6020382739776559, + "learning_rate": 4.824184033460353e-07, + "loss": 0.2695, + "step": 34367 + }, + { + "epoch": 1.609968613856748, + "grad_norm": 0.614208206351333, + "learning_rate": 4.823064194423738e-07, + "loss": 0.2832, + "step": 34368 + }, + { + "epoch": 1.6100154588466764, + "grad_norm": 0.5820593654122734, + "learning_rate": 4.821944471500323e-07, + "loss": 0.2923, + "step": 34369 + }, + { + "epoch": 1.6100623038366046, + "grad_norm": 0.6023027996547282, + "learning_rate": 4.820824864696542e-07, + "loss": 0.2806, + "step": 34370 + }, + { + "epoch": 1.6101091488265329, + "grad_norm": 0.6024700702357846, + "learning_rate": 4.819705374018841e-07, + "loss": 0.2568, + "step": 34371 + }, + { + "epoch": 1.6101559938164614, + "grad_norm": 0.6026175689540297, + "learning_rate": 4.81858599947366e-07, + "loss": 0.2699, + "step": 34372 + }, + { + "epoch": 1.6102028388063898, + "grad_norm": 0.570301957364795, + "learning_rate": 4.817466741067448e-07, + "loss": 0.2643, + "step": 34373 + }, + { + "epoch": 1.6102496837963178, + "grad_norm": 0.5826161444552644, + "learning_rate": 4.816347598806648e-07, + "loss": 0.2673, + "step": 34374 + }, + { + "epoch": 1.6102965287862463, + "grad_norm": 0.568018717644913, + "learning_rate": 4.815228572697689e-07, + "loss": 0.2663, + "step": 34375 + }, + { + "epoch": 1.6103433737761748, + "grad_norm": 0.6042804913941041, + "learning_rate": 4.814109662747021e-07, + "loss": 0.278, + "step": 34376 + }, + { + "epoch": 1.610390218766103, + "grad_norm": 0.6152690441953403, + "learning_rate": 4.812990868961073e-07, + "loss": 0.2917, + "step": 34377 + }, + { + "epoch": 1.6104370637560312, + "grad_norm": 0.5821079463327199, + "learning_rate": 4.811872191346286e-07, + "loss": 0.2697, + "step": 34378 + }, + { + "epoch": 1.6104839087459597, + "grad_norm": 0.5949484456165748, + "learning_rate": 4.810753629909104e-07, + "loss": 0.2786, + "step": 34379 + }, + { + "epoch": 1.610530753735888, + "grad_norm": 0.6144432548448606, + "learning_rate": 4.809635184655967e-07, + "loss": 0.2947, + "step": 34380 + }, + { + "epoch": 1.6105775987258162, + "grad_norm": 0.5820607104054129, + "learning_rate": 4.808516855593295e-07, + "loss": 0.2759, + "step": 34381 + }, + { + "epoch": 1.6106244437157446, + "grad_norm": 0.5895311481503825, + "learning_rate": 4.807398642727537e-07, + "loss": 0.2761, + "step": 34382 + }, + { + "epoch": 1.6106712887056729, + "grad_norm": 0.6285688678487795, + "learning_rate": 4.806280546065129e-07, + "loss": 0.2758, + "step": 34383 + }, + { + "epoch": 1.6107181336956011, + "grad_norm": 0.5727797176726914, + "learning_rate": 4.805162565612495e-07, + "loss": 0.2653, + "step": 34384 + }, + { + "epoch": 1.6107649786855296, + "grad_norm": 0.5997375097410014, + "learning_rate": 4.804044701376076e-07, + "loss": 0.2793, + "step": 34385 + }, + { + "epoch": 1.610811823675458, + "grad_norm": 0.6156496913392958, + "learning_rate": 4.802926953362308e-07, + "loss": 0.2759, + "step": 34386 + }, + { + "epoch": 1.610858668665386, + "grad_norm": 0.5663776085438197, + "learning_rate": 4.801809321577613e-07, + "loss": 0.2619, + "step": 34387 + }, + { + "epoch": 1.6109055136553145, + "grad_norm": 0.6909833110047813, + "learning_rate": 4.800691806028432e-07, + "loss": 0.2843, + "step": 34388 + }, + { + "epoch": 1.610952358645243, + "grad_norm": 0.5709760360562212, + "learning_rate": 4.799574406721189e-07, + "loss": 0.2615, + "step": 34389 + }, + { + "epoch": 1.6109992036351712, + "grad_norm": 0.6361718979220763, + "learning_rate": 4.798457123662325e-07, + "loss": 0.2967, + "step": 34390 + }, + { + "epoch": 1.6110460486250995, + "grad_norm": 0.6406405275011615, + "learning_rate": 4.797339956858258e-07, + "loss": 0.2761, + "step": 34391 + }, + { + "epoch": 1.611092893615028, + "grad_norm": 0.5867808974897898, + "learning_rate": 4.796222906315426e-07, + "loss": 0.2833, + "step": 34392 + }, + { + "epoch": 1.6111397386049562, + "grad_norm": 0.6195740540854706, + "learning_rate": 4.795105972040249e-07, + "loss": 0.2711, + "step": 34393 + }, + { + "epoch": 1.6111865835948844, + "grad_norm": 0.5673611536887786, + "learning_rate": 4.793989154039158e-07, + "loss": 0.2677, + "step": 34394 + }, + { + "epoch": 1.6112334285848129, + "grad_norm": 0.6067275931959133, + "learning_rate": 4.792872452318578e-07, + "loss": 0.2688, + "step": 34395 + }, + { + "epoch": 1.6112802735747411, + "grad_norm": 0.6002358296733116, + "learning_rate": 4.791755866884943e-07, + "loss": 0.267, + "step": 34396 + }, + { + "epoch": 1.6113271185646694, + "grad_norm": 0.5958291755307134, + "learning_rate": 4.790639397744679e-07, + "loss": 0.2608, + "step": 34397 + }, + { + "epoch": 1.6113739635545978, + "grad_norm": 0.5530364767115674, + "learning_rate": 4.789523044904204e-07, + "loss": 0.2594, + "step": 34398 + }, + { + "epoch": 1.6114208085445263, + "grad_norm": 0.6183056451470342, + "learning_rate": 4.788406808369939e-07, + "loss": 0.2841, + "step": 34399 + }, + { + "epoch": 1.6114676535344545, + "grad_norm": 0.619976535864086, + "learning_rate": 4.78729068814831e-07, + "loss": 0.2647, + "step": 34400 + }, + { + "epoch": 1.6115144985243828, + "grad_norm": 0.5972402252819449, + "learning_rate": 4.786174684245745e-07, + "loss": 0.2849, + "step": 34401 + }, + { + "epoch": 1.6115613435143112, + "grad_norm": 0.5666619090311973, + "learning_rate": 4.785058796668665e-07, + "loss": 0.2622, + "step": 34402 + }, + { + "epoch": 1.6116081885042395, + "grad_norm": 0.5988895552410981, + "learning_rate": 4.783943025423491e-07, + "loss": 0.2616, + "step": 34403 + }, + { + "epoch": 1.6116550334941677, + "grad_norm": 0.5927155670181735, + "learning_rate": 4.782827370516649e-07, + "loss": 0.2789, + "step": 34404 + }, + { + "epoch": 1.6117018784840962, + "grad_norm": 0.5663836149531802, + "learning_rate": 4.781711831954555e-07, + "loss": 0.262, + "step": 34405 + }, + { + "epoch": 1.6117487234740244, + "grad_norm": 0.576152066010154, + "learning_rate": 4.780596409743621e-07, + "loss": 0.2664, + "step": 34406 + }, + { + "epoch": 1.6117955684639527, + "grad_norm": 0.592916222401635, + "learning_rate": 4.779481103890271e-07, + "loss": 0.249, + "step": 34407 + }, + { + "epoch": 1.6118424134538811, + "grad_norm": 0.6119458216129009, + "learning_rate": 4.778365914400926e-07, + "loss": 0.2715, + "step": 34408 + }, + { + "epoch": 1.6118892584438096, + "grad_norm": 0.6233167620722365, + "learning_rate": 4.777250841282005e-07, + "loss": 0.2716, + "step": 34409 + }, + { + "epoch": 1.6119361034337376, + "grad_norm": 0.6268346407051417, + "learning_rate": 4.776135884539926e-07, + "loss": 0.2778, + "step": 34410 + }, + { + "epoch": 1.611982948423666, + "grad_norm": 0.6260875342021711, + "learning_rate": 4.775021044181096e-07, + "loss": 0.278, + "step": 34411 + }, + { + "epoch": 1.6120297934135945, + "grad_norm": 0.5554106347880313, + "learning_rate": 4.773906320211943e-07, + "loss": 0.2683, + "step": 34412 + }, + { + "epoch": 1.6120766384035228, + "grad_norm": 0.6195225165157485, + "learning_rate": 4.772791712638872e-07, + "loss": 0.2783, + "step": 34413 + }, + { + "epoch": 1.612123483393451, + "grad_norm": 0.5906975296884368, + "learning_rate": 4.771677221468299e-07, + "loss": 0.2671, + "step": 34414 + }, + { + "epoch": 1.6121703283833795, + "grad_norm": 0.5981950061571991, + "learning_rate": 4.770562846706636e-07, + "loss": 0.2816, + "step": 34415 + }, + { + "epoch": 1.6122171733733077, + "grad_norm": 0.6078637435476121, + "learning_rate": 4.76944858836031e-07, + "loss": 0.2947, + "step": 34416 + }, + { + "epoch": 1.612264018363236, + "grad_norm": 0.6064718715822195, + "learning_rate": 4.7683344464357157e-07, + "loss": 0.2715, + "step": 34417 + }, + { + "epoch": 1.6123108633531644, + "grad_norm": 0.6103462027505027, + "learning_rate": 4.7672204209392753e-07, + "loss": 0.2696, + "step": 34418 + }, + { + "epoch": 1.6123577083430927, + "grad_norm": 0.5592492606631287, + "learning_rate": 4.7661065118773915e-07, + "loss": 0.2575, + "step": 34419 + }, + { + "epoch": 1.612404553333021, + "grad_norm": 0.5471435702435333, + "learning_rate": 4.764992719256481e-07, + "loss": 0.2672, + "step": 34420 + }, + { + "epoch": 1.6124513983229494, + "grad_norm": 0.5819336928048737, + "learning_rate": 4.7638790430829475e-07, + "loss": 0.267, + "step": 34421 + }, + { + "epoch": 1.6124982433128778, + "grad_norm": 0.6218835277086725, + "learning_rate": 4.762765483363213e-07, + "loss": 0.279, + "step": 34422 + }, + { + "epoch": 1.6125450883028059, + "grad_norm": 0.6281119181613123, + "learning_rate": 4.76165204010367e-07, + "loss": 0.2822, + "step": 34423 + }, + { + "epoch": 1.6125919332927343, + "grad_norm": 0.6285878225434012, + "learning_rate": 4.7605387133107335e-07, + "loss": 0.2716, + "step": 34424 + }, + { + "epoch": 1.6126387782826628, + "grad_norm": 0.649329726580894, + "learning_rate": 4.7594255029908135e-07, + "loss": 0.2869, + "step": 34425 + }, + { + "epoch": 1.612685623272591, + "grad_norm": 0.5756914336741605, + "learning_rate": 4.758312409150309e-07, + "loss": 0.2545, + "step": 34426 + }, + { + "epoch": 1.6127324682625193, + "grad_norm": 0.5914641436997872, + "learning_rate": 4.757199431795628e-07, + "loss": 0.2596, + "step": 34427 + }, + { + "epoch": 1.6127793132524477, + "grad_norm": 0.5987988917316873, + "learning_rate": 4.7560865709331826e-07, + "loss": 0.2648, + "step": 34428 + }, + { + "epoch": 1.612826158242376, + "grad_norm": 0.6437826569692822, + "learning_rate": 4.7549738265693644e-07, + "loss": 0.2761, + "step": 34429 + }, + { + "epoch": 1.6128730032323042, + "grad_norm": 0.6302236169618565, + "learning_rate": 4.7538611987105827e-07, + "loss": 0.2728, + "step": 34430 + }, + { + "epoch": 1.6129198482222327, + "grad_norm": 0.6333889965356148, + "learning_rate": 4.752748687363243e-07, + "loss": 0.2822, + "step": 34431 + }, + { + "epoch": 1.612966693212161, + "grad_norm": 0.527381984893805, + "learning_rate": 4.7516362925337483e-07, + "loss": 0.2482, + "step": 34432 + }, + { + "epoch": 1.6130135382020891, + "grad_norm": 0.5622623507871521, + "learning_rate": 4.750524014228494e-07, + "loss": 0.2602, + "step": 34433 + }, + { + "epoch": 1.6130603831920176, + "grad_norm": 0.6078709111432542, + "learning_rate": 4.749411852453892e-07, + "loss": 0.2749, + "step": 34434 + }, + { + "epoch": 1.613107228181946, + "grad_norm": 0.6138590689451918, + "learning_rate": 4.748299807216328e-07, + "loss": 0.2651, + "step": 34435 + }, + { + "epoch": 1.6131540731718743, + "grad_norm": 0.5763112217301519, + "learning_rate": 4.747187878522208e-07, + "loss": 0.2561, + "step": 34436 + }, + { + "epoch": 1.6132009181618026, + "grad_norm": 0.5756676141752978, + "learning_rate": 4.7460760663779303e-07, + "loss": 0.2937, + "step": 34437 + }, + { + "epoch": 1.613247763151731, + "grad_norm": 0.6158408503361087, + "learning_rate": 4.744964370789895e-07, + "loss": 0.2823, + "step": 34438 + }, + { + "epoch": 1.6132946081416593, + "grad_norm": 0.5997279057122579, + "learning_rate": 4.7438527917645054e-07, + "loss": 0.2749, + "step": 34439 + }, + { + "epoch": 1.6133414531315875, + "grad_norm": 0.6360508966222271, + "learning_rate": 4.7427413293081454e-07, + "loss": 0.289, + "step": 34440 + }, + { + "epoch": 1.613388298121516, + "grad_norm": 0.5939838392547914, + "learning_rate": 4.741629983427223e-07, + "loss": 0.262, + "step": 34441 + }, + { + "epoch": 1.6134351431114442, + "grad_norm": 0.5625749263083275, + "learning_rate": 4.740518754128123e-07, + "loss": 0.2686, + "step": 34442 + }, + { + "epoch": 1.6134819881013724, + "grad_norm": 0.598464623610624, + "learning_rate": 4.739407641417246e-07, + "loss": 0.2851, + "step": 34443 + }, + { + "epoch": 1.613528833091301, + "grad_norm": 0.6047288666993756, + "learning_rate": 4.738296645300985e-07, + "loss": 0.2604, + "step": 34444 + }, + { + "epoch": 1.6135756780812294, + "grad_norm": 0.6121169001809319, + "learning_rate": 4.7371857657857343e-07, + "loss": 0.2687, + "step": 34445 + }, + { + "epoch": 1.6136225230711574, + "grad_norm": 0.622774788728896, + "learning_rate": 4.7360750028778934e-07, + "loss": 0.2693, + "step": 34446 + }, + { + "epoch": 1.6136693680610859, + "grad_norm": 0.6170445248583495, + "learning_rate": 4.734964356583846e-07, + "loss": 0.2721, + "step": 34447 + }, + { + "epoch": 1.6137162130510143, + "grad_norm": 0.6015502629304038, + "learning_rate": 4.7338538269099816e-07, + "loss": 0.2603, + "step": 34448 + }, + { + "epoch": 1.6137630580409426, + "grad_norm": 0.6200694808843086, + "learning_rate": 4.7327434138626923e-07, + "loss": 0.2682, + "step": 34449 + }, + { + "epoch": 1.6138099030308708, + "grad_norm": 0.5419296601066351, + "learning_rate": 4.731633117448373e-07, + "loss": 0.2575, + "step": 34450 + }, + { + "epoch": 1.6138567480207993, + "grad_norm": 0.6138509003176558, + "learning_rate": 4.7305229376734067e-07, + "loss": 0.2685, + "step": 34451 + }, + { + "epoch": 1.6139035930107275, + "grad_norm": 0.594870723606923, + "learning_rate": 4.729412874544198e-07, + "loss": 0.2733, + "step": 34452 + }, + { + "epoch": 1.6139504380006557, + "grad_norm": 0.6040860216893288, + "learning_rate": 4.7283029280671135e-07, + "loss": 0.2644, + "step": 34453 + }, + { + "epoch": 1.6139972829905842, + "grad_norm": 0.5744929180550468, + "learning_rate": 4.7271930982485587e-07, + "loss": 0.2631, + "step": 34454 + }, + { + "epoch": 1.6140441279805124, + "grad_norm": 0.593897802519966, + "learning_rate": 4.7260833850949037e-07, + "loss": 0.2766, + "step": 34455 + }, + { + "epoch": 1.6140909729704407, + "grad_norm": 0.5872732710127732, + "learning_rate": 4.724973788612547e-07, + "loss": 0.2688, + "step": 34456 + }, + { + "epoch": 1.6141378179603691, + "grad_norm": 0.5797748064522222, + "learning_rate": 4.7238643088078655e-07, + "loss": 0.2747, + "step": 34457 + }, + { + "epoch": 1.6141846629502976, + "grad_norm": 0.5582156427389982, + "learning_rate": 4.7227549456872524e-07, + "loss": 0.2701, + "step": 34458 + }, + { + "epoch": 1.6142315079402256, + "grad_norm": 0.6056902949636754, + "learning_rate": 4.7216456992570936e-07, + "loss": 0.2869, + "step": 34459 + }, + { + "epoch": 1.614278352930154, + "grad_norm": 0.5867935668223738, + "learning_rate": 4.720536569523759e-07, + "loss": 0.2695, + "step": 34460 + }, + { + "epoch": 1.6143251979200826, + "grad_norm": 0.5845146725259667, + "learning_rate": 4.719427556493647e-07, + "loss": 0.2705, + "step": 34461 + }, + { + "epoch": 1.6143720429100108, + "grad_norm": 0.5895826315739449, + "learning_rate": 4.7183186601731267e-07, + "loss": 0.2575, + "step": 34462 + }, + { + "epoch": 1.614418887899939, + "grad_norm": 0.6577777679742207, + "learning_rate": 4.7172098805685823e-07, + "loss": 0.2966, + "step": 34463 + }, + { + "epoch": 1.6144657328898675, + "grad_norm": 0.5961321512851183, + "learning_rate": 4.7161012176864004e-07, + "loss": 0.2634, + "step": 34464 + }, + { + "epoch": 1.6145125778797957, + "grad_norm": 0.5957685696078245, + "learning_rate": 4.714992671532961e-07, + "loss": 0.274, + "step": 34465 + }, + { + "epoch": 1.614559422869724, + "grad_norm": 0.6276975551744315, + "learning_rate": 4.7138842421146356e-07, + "loss": 0.2782, + "step": 34466 + }, + { + "epoch": 1.6146062678596524, + "grad_norm": 0.5880668420659879, + "learning_rate": 4.712775929437813e-07, + "loss": 0.2696, + "step": 34467 + }, + { + "epoch": 1.6146531128495807, + "grad_norm": 0.6491557284572473, + "learning_rate": 4.7116677335088625e-07, + "loss": 0.2847, + "step": 34468 + }, + { + "epoch": 1.614699957839509, + "grad_norm": 0.5889052840046833, + "learning_rate": 4.7105596543341624e-07, + "loss": 0.2495, + "step": 34469 + }, + { + "epoch": 1.6147468028294374, + "grad_norm": 0.6012213144502088, + "learning_rate": 4.7094516919200915e-07, + "loss": 0.2581, + "step": 34470 + }, + { + "epoch": 1.6147936478193659, + "grad_norm": 0.5813583253909342, + "learning_rate": 4.7083438462730335e-07, + "loss": 0.2652, + "step": 34471 + }, + { + "epoch": 1.614840492809294, + "grad_norm": 0.5963594177264487, + "learning_rate": 4.70723611739935e-07, + "loss": 0.2819, + "step": 34472 + }, + { + "epoch": 1.6148873377992223, + "grad_norm": 0.607962833507493, + "learning_rate": 4.706128505305424e-07, + "loss": 0.2731, + "step": 34473 + }, + { + "epoch": 1.6149341827891508, + "grad_norm": 0.5887079029796812, + "learning_rate": 4.705021009997632e-07, + "loss": 0.2683, + "step": 34474 + }, + { + "epoch": 1.614981027779079, + "grad_norm": 0.5882083303928846, + "learning_rate": 4.7039136314823377e-07, + "loss": 0.2715, + "step": 34475 + }, + { + "epoch": 1.6150278727690073, + "grad_norm": 0.5972056539980328, + "learning_rate": 4.702806369765919e-07, + "loss": 0.2734, + "step": 34476 + }, + { + "epoch": 1.6150747177589357, + "grad_norm": 0.5426953712418779, + "learning_rate": 4.7016992248547526e-07, + "loss": 0.254, + "step": 34477 + }, + { + "epoch": 1.615121562748864, + "grad_norm": 0.5978186998041811, + "learning_rate": 4.700592196755202e-07, + "loss": 0.2932, + "step": 34478 + }, + { + "epoch": 1.6151684077387922, + "grad_norm": 0.6796553234497156, + "learning_rate": 4.69948528547364e-07, + "loss": 0.2765, + "step": 34479 + }, + { + "epoch": 1.6152152527287207, + "grad_norm": 0.5696265219439093, + "learning_rate": 4.6983784910164365e-07, + "loss": 0.2672, + "step": 34480 + }, + { + "epoch": 1.6152620977186491, + "grad_norm": 0.5819477211918869, + "learning_rate": 4.69727181338997e-07, + "loss": 0.2555, + "step": 34481 + }, + { + "epoch": 1.6153089427085772, + "grad_norm": 0.6226021514573699, + "learning_rate": 4.696165252600596e-07, + "loss": 0.2968, + "step": 34482 + }, + { + "epoch": 1.6153557876985056, + "grad_norm": 0.5941817114341006, + "learning_rate": 4.6950588086546926e-07, + "loss": 0.2529, + "step": 34483 + }, + { + "epoch": 1.615402632688434, + "grad_norm": 0.5713502509525592, + "learning_rate": 4.6939524815586166e-07, + "loss": 0.2672, + "step": 34484 + }, + { + "epoch": 1.6154494776783623, + "grad_norm": 0.6248798848555746, + "learning_rate": 4.6928462713187406e-07, + "loss": 0.2762, + "step": 34485 + }, + { + "epoch": 1.6154963226682906, + "grad_norm": 0.6027638177732092, + "learning_rate": 4.6917401779414286e-07, + "loss": 0.2707, + "step": 34486 + }, + { + "epoch": 1.615543167658219, + "grad_norm": 0.614291360210959, + "learning_rate": 4.69063420143305e-07, + "loss": 0.2812, + "step": 34487 + }, + { + "epoch": 1.6155900126481473, + "grad_norm": 0.5805915615474317, + "learning_rate": 4.68952834179997e-07, + "loss": 0.2682, + "step": 34488 + }, + { + "epoch": 1.6156368576380755, + "grad_norm": 0.6184670038595473, + "learning_rate": 4.6884225990485533e-07, + "loss": 0.2783, + "step": 34489 + }, + { + "epoch": 1.615683702628004, + "grad_norm": 0.6335041018253571, + "learning_rate": 4.687316973185152e-07, + "loss": 0.2661, + "step": 34490 + }, + { + "epoch": 1.6157305476179322, + "grad_norm": 0.5031925983746585, + "learning_rate": 4.686211464216134e-07, + "loss": 0.2359, + "step": 34491 + }, + { + "epoch": 1.6157773926078605, + "grad_norm": 0.5874989549386631, + "learning_rate": 4.685106072147866e-07, + "loss": 0.277, + "step": 34492 + }, + { + "epoch": 1.615824237597789, + "grad_norm": 0.5369986344372181, + "learning_rate": 4.684000796986704e-07, + "loss": 0.2523, + "step": 34493 + }, + { + "epoch": 1.6158710825877174, + "grad_norm": 0.6051383958495734, + "learning_rate": 4.682895638739013e-07, + "loss": 0.2767, + "step": 34494 + }, + { + "epoch": 1.6159179275776454, + "grad_norm": 0.6005222394728086, + "learning_rate": 4.6817905974111595e-07, + "loss": 0.2648, + "step": 34495 + }, + { + "epoch": 1.6159647725675739, + "grad_norm": 0.587657758684018, + "learning_rate": 4.6806856730094914e-07, + "loss": 0.2604, + "step": 34496 + }, + { + "epoch": 1.6160116175575023, + "grad_norm": 0.6173838684664725, + "learning_rate": 4.679580865540365e-07, + "loss": 0.2696, + "step": 34497 + }, + { + "epoch": 1.6160584625474306, + "grad_norm": 0.5984000625684084, + "learning_rate": 4.678476175010141e-07, + "loss": 0.2897, + "step": 34498 + }, + { + "epoch": 1.6161053075373588, + "grad_norm": 0.5506142513046561, + "learning_rate": 4.6773716014251816e-07, + "loss": 0.2641, + "step": 34499 + }, + { + "epoch": 1.6161521525272873, + "grad_norm": 0.5942868184195893, + "learning_rate": 4.6762671447918407e-07, + "loss": 0.272, + "step": 34500 + }, + { + "epoch": 1.6161989975172155, + "grad_norm": 0.6029085066062772, + "learning_rate": 4.6751628051164787e-07, + "loss": 0.2695, + "step": 34501 + }, + { + "epoch": 1.6162458425071438, + "grad_norm": 0.5925231444910228, + "learning_rate": 4.674058582405441e-07, + "loss": 0.261, + "step": 34502 + }, + { + "epoch": 1.6162926874970722, + "grad_norm": 0.5857776371745873, + "learning_rate": 4.6729544766650927e-07, + "loss": 0.2711, + "step": 34503 + }, + { + "epoch": 1.6163395324870005, + "grad_norm": 0.5914775705661752, + "learning_rate": 4.6718504879017774e-07, + "loss": 0.269, + "step": 34504 + }, + { + "epoch": 1.6163863774769287, + "grad_norm": 0.530729142677947, + "learning_rate": 4.6707466161218547e-07, + "loss": 0.2452, + "step": 34505 + }, + { + "epoch": 1.6164332224668572, + "grad_norm": 0.5875531117866681, + "learning_rate": 4.669642861331672e-07, + "loss": 0.2645, + "step": 34506 + }, + { + "epoch": 1.6164800674567856, + "grad_norm": 0.5956633095550414, + "learning_rate": 4.668539223537588e-07, + "loss": 0.2654, + "step": 34507 + }, + { + "epoch": 1.6165269124467139, + "grad_norm": 0.6062700305910883, + "learning_rate": 4.6674357027459537e-07, + "loss": 0.2577, + "step": 34508 + }, + { + "epoch": 1.6165737574366421, + "grad_norm": 0.6178185892755513, + "learning_rate": 4.6663322989631195e-07, + "loss": 0.2709, + "step": 34509 + }, + { + "epoch": 1.6166206024265706, + "grad_norm": 0.6624275631037959, + "learning_rate": 4.6652290121954243e-07, + "loss": 0.2962, + "step": 34510 + }, + { + "epoch": 1.6166674474164988, + "grad_norm": 0.5639316050279194, + "learning_rate": 4.664125842449227e-07, + "loss": 0.2762, + "step": 34511 + }, + { + "epoch": 1.616714292406427, + "grad_norm": 0.6118966329123361, + "learning_rate": 4.6630227897308727e-07, + "loss": 0.2752, + "step": 34512 + }, + { + "epoch": 1.6167611373963555, + "grad_norm": 0.5711688952021503, + "learning_rate": 4.661919854046712e-07, + "loss": 0.2702, + "step": 34513 + }, + { + "epoch": 1.6168079823862838, + "grad_norm": 0.5617226692513716, + "learning_rate": 4.660817035403095e-07, + "loss": 0.2626, + "step": 34514 + }, + { + "epoch": 1.616854827376212, + "grad_norm": 0.5658054066246415, + "learning_rate": 4.659714333806359e-07, + "loss": 0.2603, + "step": 34515 + }, + { + "epoch": 1.6169016723661405, + "grad_norm": 0.6336583205875365, + "learning_rate": 4.658611749262862e-07, + "loss": 0.2949, + "step": 34516 + }, + { + "epoch": 1.616948517356069, + "grad_norm": 0.7662724462852789, + "learning_rate": 4.6575092817789325e-07, + "loss": 0.26, + "step": 34517 + }, + { + "epoch": 1.616995362345997, + "grad_norm": 0.5746129154522507, + "learning_rate": 4.656406931360927e-07, + "loss": 0.263, + "step": 34518 + }, + { + "epoch": 1.6170422073359254, + "grad_norm": 0.5609151401032313, + "learning_rate": 4.655304698015184e-07, + "loss": 0.2637, + "step": 34519 + }, + { + "epoch": 1.6170890523258539, + "grad_norm": 0.6427614393387642, + "learning_rate": 4.654202581748057e-07, + "loss": 0.2883, + "step": 34520 + }, + { + "epoch": 1.6171358973157821, + "grad_norm": 0.611637764922181, + "learning_rate": 4.6531005825658727e-07, + "loss": 0.2742, + "step": 34521 + }, + { + "epoch": 1.6171827423057104, + "grad_norm": 0.615171946468297, + "learning_rate": 4.651998700474983e-07, + "loss": 0.2724, + "step": 34522 + }, + { + "epoch": 1.6172295872956388, + "grad_norm": 0.5733245200312255, + "learning_rate": 4.6508969354817303e-07, + "loss": 0.2628, + "step": 34523 + }, + { + "epoch": 1.617276432285567, + "grad_norm": 0.5922188198539394, + "learning_rate": 4.6497952875924455e-07, + "loss": 0.2582, + "step": 34524 + }, + { + "epoch": 1.6173232772754953, + "grad_norm": 0.570140194310772, + "learning_rate": 4.648693756813477e-07, + "loss": 0.2646, + "step": 34525 + }, + { + "epoch": 1.6173701222654238, + "grad_norm": 0.6018500812815667, + "learning_rate": 4.647592343151164e-07, + "loss": 0.281, + "step": 34526 + }, + { + "epoch": 1.617416967255352, + "grad_norm": 0.5835777310133139, + "learning_rate": 4.646491046611834e-07, + "loss": 0.2535, + "step": 34527 + }, + { + "epoch": 1.6174638122452802, + "grad_norm": 0.6169568316243962, + "learning_rate": 4.6453898672018355e-07, + "loss": 0.2638, + "step": 34528 + }, + { + "epoch": 1.6175106572352087, + "grad_norm": 0.5891465651646554, + "learning_rate": 4.644288804927502e-07, + "loss": 0.269, + "step": 34529 + }, + { + "epoch": 1.6175575022251372, + "grad_norm": 0.596612767317248, + "learning_rate": 4.643187859795176e-07, + "loss": 0.2732, + "step": 34530 + }, + { + "epoch": 1.6176043472150652, + "grad_norm": 0.600832387700545, + "learning_rate": 4.6420870318111794e-07, + "loss": 0.2737, + "step": 34531 + }, + { + "epoch": 1.6176511922049936, + "grad_norm": 0.6126626476012973, + "learning_rate": 4.6409863209818636e-07, + "loss": 0.2881, + "step": 34532 + }, + { + "epoch": 1.6176980371949221, + "grad_norm": 0.6314983571195041, + "learning_rate": 4.639885727313548e-07, + "loss": 0.2799, + "step": 34533 + }, + { + "epoch": 1.6177448821848504, + "grad_norm": 0.5626133715331976, + "learning_rate": 4.638785250812572e-07, + "loss": 0.251, + "step": 34534 + }, + { + "epoch": 1.6177917271747786, + "grad_norm": 0.6069512189150668, + "learning_rate": 4.6376848914852696e-07, + "loss": 0.2793, + "step": 34535 + }, + { + "epoch": 1.617838572164707, + "grad_norm": 0.5919605491208455, + "learning_rate": 4.636584649337972e-07, + "loss": 0.2794, + "step": 34536 + }, + { + "epoch": 1.6178854171546353, + "grad_norm": 0.5916343857543302, + "learning_rate": 4.635484524377018e-07, + "loss": 0.2751, + "step": 34537 + }, + { + "epoch": 1.6179322621445635, + "grad_norm": 0.6064538034821297, + "learning_rate": 4.634384516608734e-07, + "loss": 0.2831, + "step": 34538 + }, + { + "epoch": 1.617979107134492, + "grad_norm": 0.6120363459337513, + "learning_rate": 4.6332846260394385e-07, + "loss": 0.2697, + "step": 34539 + }, + { + "epoch": 1.6180259521244202, + "grad_norm": 0.6041988331330296, + "learning_rate": 4.6321848526754753e-07, + "loss": 0.2776, + "step": 34540 + }, + { + "epoch": 1.6180727971143485, + "grad_norm": 0.597832099842173, + "learning_rate": 4.6310851965231664e-07, + "loss": 0.2667, + "step": 34541 + }, + { + "epoch": 1.618119642104277, + "grad_norm": 0.594295888354646, + "learning_rate": 4.629985657588842e-07, + "loss": 0.2799, + "step": 34542 + }, + { + "epoch": 1.6181664870942054, + "grad_norm": 0.5817733260159104, + "learning_rate": 4.6288862358788324e-07, + "loss": 0.2618, + "step": 34543 + }, + { + "epoch": 1.6182133320841336, + "grad_norm": 0.6012256738057332, + "learning_rate": 4.6277869313994667e-07, + "loss": 0.278, + "step": 34544 + }, + { + "epoch": 1.618260177074062, + "grad_norm": 0.5659459872589272, + "learning_rate": 4.6266877441570683e-07, + "loss": 0.2686, + "step": 34545 + }, + { + "epoch": 1.6183070220639904, + "grad_norm": 0.5787822873619274, + "learning_rate": 4.6255886741579545e-07, + "loss": 0.2688, + "step": 34546 + }, + { + "epoch": 1.6183538670539186, + "grad_norm": 0.5701706846105891, + "learning_rate": 4.6244897214084595e-07, + "loss": 0.2644, + "step": 34547 + }, + { + "epoch": 1.6184007120438468, + "grad_norm": 0.6114180509356725, + "learning_rate": 4.623390885914902e-07, + "loss": 0.2638, + "step": 34548 + }, + { + "epoch": 1.6184475570337753, + "grad_norm": 0.6262377938111289, + "learning_rate": 4.622292167683609e-07, + "loss": 0.2721, + "step": 34549 + }, + { + "epoch": 1.6184944020237035, + "grad_norm": 0.5933619757641231, + "learning_rate": 4.621193566720908e-07, + "loss": 0.2647, + "step": 34550 + }, + { + "epoch": 1.6185412470136318, + "grad_norm": 0.5996442109553473, + "learning_rate": 4.62009508303311e-07, + "loss": 0.2795, + "step": 34551 + }, + { + "epoch": 1.6185880920035602, + "grad_norm": 0.6164529653633014, + "learning_rate": 4.6189967166265505e-07, + "loss": 0.2837, + "step": 34552 + }, + { + "epoch": 1.6186349369934887, + "grad_norm": 0.6094790430148684, + "learning_rate": 4.6178984675075366e-07, + "loss": 0.2646, + "step": 34553 + }, + { + "epoch": 1.6186817819834167, + "grad_norm": 0.6289916008750398, + "learning_rate": 4.616800335682392e-07, + "loss": 0.269, + "step": 34554 + }, + { + "epoch": 1.6187286269733452, + "grad_norm": 0.5911738894448605, + "learning_rate": 4.6157023211574395e-07, + "loss": 0.2671, + "step": 34555 + }, + { + "epoch": 1.6187754719632736, + "grad_norm": 0.6034866725170913, + "learning_rate": 4.6146044239390036e-07, + "loss": 0.2766, + "step": 34556 + }, + { + "epoch": 1.618822316953202, + "grad_norm": 0.6862262020235931, + "learning_rate": 4.613506644033389e-07, + "loss": 0.2803, + "step": 34557 + }, + { + "epoch": 1.6188691619431301, + "grad_norm": 0.5767529572470119, + "learning_rate": 4.6124089814469254e-07, + "loss": 0.2747, + "step": 34558 + }, + { + "epoch": 1.6189160069330586, + "grad_norm": 0.5639105586433022, + "learning_rate": 4.6113114361859167e-07, + "loss": 0.2622, + "step": 34559 + }, + { + "epoch": 1.6189628519229868, + "grad_norm": 0.5803003445165739, + "learning_rate": 4.610214008256686e-07, + "loss": 0.2524, + "step": 34560 + }, + { + "epoch": 1.619009696912915, + "grad_norm": 0.6030184712807942, + "learning_rate": 4.60911669766555e-07, + "loss": 0.2699, + "step": 34561 + }, + { + "epoch": 1.6190565419028435, + "grad_norm": 0.5941044525676031, + "learning_rate": 4.608019504418829e-07, + "loss": 0.2602, + "step": 34562 + }, + { + "epoch": 1.6191033868927718, + "grad_norm": 0.5562087628505925, + "learning_rate": 4.6069224285228223e-07, + "loss": 0.2613, + "step": 34563 + }, + { + "epoch": 1.6191502318827, + "grad_norm": 0.5875238447723523, + "learning_rate": 4.6058254699838544e-07, + "loss": 0.271, + "step": 34564 + }, + { + "epoch": 1.6191970768726285, + "grad_norm": 0.6214552368343804, + "learning_rate": 4.604728628808239e-07, + "loss": 0.2746, + "step": 34565 + }, + { + "epoch": 1.619243921862557, + "grad_norm": 0.6315425709056368, + "learning_rate": 4.6036319050022786e-07, + "loss": 0.2871, + "step": 34566 + }, + { + "epoch": 1.619290766852485, + "grad_norm": 0.5505791174150222, + "learning_rate": 4.6025352985722916e-07, + "loss": 0.2644, + "step": 34567 + }, + { + "epoch": 1.6193376118424134, + "grad_norm": 0.5924083268337709, + "learning_rate": 4.6014388095245915e-07, + "loss": 0.275, + "step": 34568 + }, + { + "epoch": 1.619384456832342, + "grad_norm": 0.5665782918685268, + "learning_rate": 4.600342437865479e-07, + "loss": 0.2681, + "step": 34569 + }, + { + "epoch": 1.6194313018222701, + "grad_norm": 0.6141800194702224, + "learning_rate": 4.599246183601266e-07, + "loss": 0.2657, + "step": 34570 + }, + { + "epoch": 1.6194781468121984, + "grad_norm": 0.5609131191072771, + "learning_rate": 4.598150046738267e-07, + "loss": 0.2672, + "step": 34571 + }, + { + "epoch": 1.6195249918021268, + "grad_norm": 0.5872233948572406, + "learning_rate": 4.5970540272827906e-07, + "loss": 0.2645, + "step": 34572 + }, + { + "epoch": 1.619571836792055, + "grad_norm": 0.6263469553080361, + "learning_rate": 4.5959581252411346e-07, + "loss": 0.2568, + "step": 34573 + }, + { + "epoch": 1.6196186817819833, + "grad_norm": 0.6429339854446198, + "learning_rate": 4.594862340619619e-07, + "loss": 0.2849, + "step": 34574 + }, + { + "epoch": 1.6196655267719118, + "grad_norm": 0.6190693405072859, + "learning_rate": 4.593766673424535e-07, + "loss": 0.2813, + "step": 34575 + }, + { + "epoch": 1.61971237176184, + "grad_norm": 0.5723576597648429, + "learning_rate": 4.592671123662193e-07, + "loss": 0.2695, + "step": 34576 + }, + { + "epoch": 1.6197592167517683, + "grad_norm": 0.5875324666264999, + "learning_rate": 4.591575691338901e-07, + "loss": 0.2746, + "step": 34577 + }, + { + "epoch": 1.6198060617416967, + "grad_norm": 0.5885161765252924, + "learning_rate": 4.5904803764609624e-07, + "loss": 0.2716, + "step": 34578 + }, + { + "epoch": 1.6198529067316252, + "grad_norm": 0.5908426677884523, + "learning_rate": 4.5893851790346834e-07, + "loss": 0.264, + "step": 34579 + }, + { + "epoch": 1.6198997517215534, + "grad_norm": 0.6099188516405729, + "learning_rate": 4.588290099066359e-07, + "loss": 0.2772, + "step": 34580 + }, + { + "epoch": 1.6199465967114817, + "grad_norm": 0.591917982270814, + "learning_rate": 4.5871951365623004e-07, + "loss": 0.2956, + "step": 34581 + }, + { + "epoch": 1.6199934417014101, + "grad_norm": 0.6386762100776603, + "learning_rate": 4.5861002915287944e-07, + "loss": 0.2862, + "step": 34582 + }, + { + "epoch": 1.6200402866913384, + "grad_norm": 0.5582243167067755, + "learning_rate": 4.585005563972153e-07, + "loss": 0.2675, + "step": 34583 + }, + { + "epoch": 1.6200871316812666, + "grad_norm": 0.5956364510222444, + "learning_rate": 4.5839109538986737e-07, + "loss": 0.2755, + "step": 34584 + }, + { + "epoch": 1.620133976671195, + "grad_norm": 0.6043302843705961, + "learning_rate": 4.5828164613146543e-07, + "loss": 0.2772, + "step": 34585 + }, + { + "epoch": 1.6201808216611233, + "grad_norm": 0.6209009556340603, + "learning_rate": 4.5817220862264004e-07, + "loss": 0.2678, + "step": 34586 + }, + { + "epoch": 1.6202276666510516, + "grad_norm": 0.6521992300566185, + "learning_rate": 4.580627828640205e-07, + "loss": 0.2862, + "step": 34587 + }, + { + "epoch": 1.62027451164098, + "grad_norm": 0.6071160040883345, + "learning_rate": 4.579533688562357e-07, + "loss": 0.2596, + "step": 34588 + }, + { + "epoch": 1.6203213566309085, + "grad_norm": 0.5694354221785598, + "learning_rate": 4.5784396659991597e-07, + "loss": 0.2731, + "step": 34589 + }, + { + "epoch": 1.6203682016208365, + "grad_norm": 0.6250499663311585, + "learning_rate": 4.577345760956911e-07, + "loss": 0.2622, + "step": 34590 + }, + { + "epoch": 1.620415046610765, + "grad_norm": 0.5901296876819627, + "learning_rate": 4.576251973441903e-07, + "loss": 0.2713, + "step": 34591 + }, + { + "epoch": 1.6204618916006934, + "grad_norm": 0.6205123336883875, + "learning_rate": 4.575158303460439e-07, + "loss": 0.2816, + "step": 34592 + }, + { + "epoch": 1.6205087365906217, + "grad_norm": 0.6716610119654357, + "learning_rate": 4.5740647510187974e-07, + "loss": 0.2876, + "step": 34593 + }, + { + "epoch": 1.62055558158055, + "grad_norm": 0.5621808660395816, + "learning_rate": 4.572971316123287e-07, + "loss": 0.2603, + "step": 34594 + }, + { + "epoch": 1.6206024265704784, + "grad_norm": 0.5578200371240738, + "learning_rate": 4.5718779987801857e-07, + "loss": 0.2521, + "step": 34595 + }, + { + "epoch": 1.6206492715604066, + "grad_norm": 0.5969765247346915, + "learning_rate": 4.570784798995795e-07, + "loss": 0.266, + "step": 34596 + }, + { + "epoch": 1.6206961165503349, + "grad_norm": 0.6270872221637523, + "learning_rate": 4.5696917167764e-07, + "loss": 0.2829, + "step": 34597 + }, + { + "epoch": 1.6207429615402633, + "grad_norm": 0.6147949778457396, + "learning_rate": 4.5685987521282947e-07, + "loss": 0.2899, + "step": 34598 + }, + { + "epoch": 1.6207898065301916, + "grad_norm": 0.5386628506762768, + "learning_rate": 4.5675059050577733e-07, + "loss": 0.2522, + "step": 34599 + }, + { + "epoch": 1.6208366515201198, + "grad_norm": 0.5773040555605224, + "learning_rate": 4.566413175571116e-07, + "loss": 0.2667, + "step": 34600 + }, + { + "epoch": 1.6208834965100483, + "grad_norm": 0.5705514692258925, + "learning_rate": 4.565320563674619e-07, + "loss": 0.2589, + "step": 34601 + }, + { + "epoch": 1.6209303414999767, + "grad_norm": 0.6197327668534336, + "learning_rate": 4.564228069374563e-07, + "loss": 0.2757, + "step": 34602 + }, + { + "epoch": 1.6209771864899047, + "grad_norm": 0.6068858001585725, + "learning_rate": 4.563135692677237e-07, + "loss": 0.2729, + "step": 34603 + }, + { + "epoch": 1.6210240314798332, + "grad_norm": 0.6182361169186751, + "learning_rate": 4.562043433588925e-07, + "loss": 0.2844, + "step": 34604 + }, + { + "epoch": 1.6210708764697617, + "grad_norm": 0.6236319734104022, + "learning_rate": 4.5609512921159256e-07, + "loss": 0.27, + "step": 34605 + }, + { + "epoch": 1.62111772145969, + "grad_norm": 0.6098640096437546, + "learning_rate": 4.559859268264508e-07, + "loss": 0.282, + "step": 34606 + }, + { + "epoch": 1.6211645664496181, + "grad_norm": 0.620039309867317, + "learning_rate": 4.558767362040969e-07, + "loss": 0.2678, + "step": 34607 + }, + { + "epoch": 1.6212114114395466, + "grad_norm": 0.5642448505071943, + "learning_rate": 4.557675573451581e-07, + "loss": 0.2658, + "step": 34608 + }, + { + "epoch": 1.6212582564294749, + "grad_norm": 0.6339173579805805, + "learning_rate": 4.5565839025026285e-07, + "loss": 0.2953, + "step": 34609 + }, + { + "epoch": 1.621305101419403, + "grad_norm": 0.5749688681270468, + "learning_rate": 4.555492349200402e-07, + "loss": 0.273, + "step": 34610 + }, + { + "epoch": 1.6213519464093316, + "grad_norm": 0.6248045254338701, + "learning_rate": 4.5544009135511827e-07, + "loss": 0.2708, + "step": 34611 + }, + { + "epoch": 1.6213987913992598, + "grad_norm": 0.5897231263324401, + "learning_rate": 4.5533095955612395e-07, + "loss": 0.2601, + "step": 34612 + }, + { + "epoch": 1.621445636389188, + "grad_norm": 0.6168454516489231, + "learning_rate": 4.5522183952368624e-07, + "loss": 0.289, + "step": 34613 + }, + { + "epoch": 1.6214924813791165, + "grad_norm": 0.5596694359599529, + "learning_rate": 4.551127312584333e-07, + "loss": 0.2634, + "step": 34614 + }, + { + "epoch": 1.621539326369045, + "grad_norm": 0.6223689722941642, + "learning_rate": 4.5500363476099233e-07, + "loss": 0.2709, + "step": 34615 + }, + { + "epoch": 1.6215861713589732, + "grad_norm": 0.5991580656375238, + "learning_rate": 4.548945500319912e-07, + "loss": 0.262, + "step": 34616 + }, + { + "epoch": 1.6216330163489014, + "grad_norm": 0.6021851936121868, + "learning_rate": 4.547854770720586e-07, + "loss": 0.2935, + "step": 34617 + }, + { + "epoch": 1.62167986133883, + "grad_norm": 0.5902279955704882, + "learning_rate": 4.546764158818209e-07, + "loss": 0.2616, + "step": 34618 + }, + { + "epoch": 1.6217267063287581, + "grad_norm": 0.6145023451067276, + "learning_rate": 4.545673664619066e-07, + "loss": 0.2661, + "step": 34619 + }, + { + "epoch": 1.6217735513186864, + "grad_norm": 0.593187832487185, + "learning_rate": 4.544583288129428e-07, + "loss": 0.2747, + "step": 34620 + }, + { + "epoch": 1.6218203963086149, + "grad_norm": 0.605460387175314, + "learning_rate": 4.543493029355578e-07, + "loss": 0.2773, + "step": 34621 + }, + { + "epoch": 1.621867241298543, + "grad_norm": 0.5933468826032572, + "learning_rate": 4.5424028883037764e-07, + "loss": 0.2607, + "step": 34622 + }, + { + "epoch": 1.6219140862884713, + "grad_norm": 0.6279076755445965, + "learning_rate": 4.541312864980313e-07, + "loss": 0.2623, + "step": 34623 + }, + { + "epoch": 1.6219609312783998, + "grad_norm": 0.6132639960066398, + "learning_rate": 4.5402229593914466e-07, + "loss": 0.2797, + "step": 34624 + }, + { + "epoch": 1.6220077762683283, + "grad_norm": 0.6360040173095842, + "learning_rate": 4.5391331715434527e-07, + "loss": 0.2796, + "step": 34625 + }, + { + "epoch": 1.6220546212582563, + "grad_norm": 0.589398713366526, + "learning_rate": 4.5380435014426046e-07, + "loss": 0.2788, + "step": 34626 + }, + { + "epoch": 1.6221014662481847, + "grad_norm": 0.6204477109165786, + "learning_rate": 4.536953949095174e-07, + "loss": 0.2848, + "step": 34627 + }, + { + "epoch": 1.6221483112381132, + "grad_norm": 0.6073735208351646, + "learning_rate": 4.5358645145074347e-07, + "loss": 0.2554, + "step": 34628 + }, + { + "epoch": 1.6221951562280414, + "grad_norm": 0.5817635557356673, + "learning_rate": 4.5347751976856536e-07, + "loss": 0.2711, + "step": 34629 + }, + { + "epoch": 1.6222420012179697, + "grad_norm": 0.5905265877414051, + "learning_rate": 4.533685998636092e-07, + "loss": 0.2702, + "step": 34630 + }, + { + "epoch": 1.6222888462078981, + "grad_norm": 0.607253102831029, + "learning_rate": 4.5325969173650204e-07, + "loss": 0.2632, + "step": 34631 + }, + { + "epoch": 1.6223356911978264, + "grad_norm": 0.5809603937747668, + "learning_rate": 4.531507953878711e-07, + "loss": 0.2864, + "step": 34632 + }, + { + "epoch": 1.6223825361877546, + "grad_norm": 0.5786024118137554, + "learning_rate": 4.5304191081834286e-07, + "loss": 0.2591, + "step": 34633 + }, + { + "epoch": 1.622429381177683, + "grad_norm": 0.5586837820813811, + "learning_rate": 4.5293303802854403e-07, + "loss": 0.2467, + "step": 34634 + }, + { + "epoch": 1.6224762261676113, + "grad_norm": 0.5744905093318332, + "learning_rate": 4.5282417701910166e-07, + "loss": 0.2783, + "step": 34635 + }, + { + "epoch": 1.6225230711575396, + "grad_norm": 0.6027485902687633, + "learning_rate": 4.5271532779064155e-07, + "loss": 0.2807, + "step": 34636 + }, + { + "epoch": 1.622569916147468, + "grad_norm": 0.5955752502265853, + "learning_rate": 4.526064903437896e-07, + "loss": 0.2738, + "step": 34637 + }, + { + "epoch": 1.6226167611373965, + "grad_norm": 0.6218675749897515, + "learning_rate": 4.524976646791726e-07, + "loss": 0.2791, + "step": 34638 + }, + { + "epoch": 1.6226636061273245, + "grad_norm": 0.6035761680225861, + "learning_rate": 4.5238885079741693e-07, + "loss": 0.2507, + "step": 34639 + }, + { + "epoch": 1.622710451117253, + "grad_norm": 0.5471886062790833, + "learning_rate": 4.522800486991488e-07, + "loss": 0.246, + "step": 34640 + }, + { + "epoch": 1.6227572961071814, + "grad_norm": 0.6042215868413932, + "learning_rate": 4.521712583849952e-07, + "loss": 0.2824, + "step": 34641 + }, + { + "epoch": 1.6228041410971097, + "grad_norm": 0.5622385489147363, + "learning_rate": 4.5206247985558034e-07, + "loss": 0.2639, + "step": 34642 + }, + { + "epoch": 1.622850986087038, + "grad_norm": 0.5974745945952427, + "learning_rate": 4.5195371311153205e-07, + "loss": 0.2738, + "step": 34643 + }, + { + "epoch": 1.6228978310769664, + "grad_norm": 0.5703263543172449, + "learning_rate": 4.518449581534745e-07, + "loss": 0.2565, + "step": 34644 + }, + { + "epoch": 1.6229446760668946, + "grad_norm": 0.6235931245822385, + "learning_rate": 4.517362149820345e-07, + "loss": 0.2899, + "step": 34645 + }, + { + "epoch": 1.6229915210568229, + "grad_norm": 0.6116588436975194, + "learning_rate": 4.516274835978379e-07, + "loss": 0.2863, + "step": 34646 + }, + { + "epoch": 1.6230383660467513, + "grad_norm": 0.6400223908223549, + "learning_rate": 4.5151876400151034e-07, + "loss": 0.2937, + "step": 34647 + }, + { + "epoch": 1.6230852110366796, + "grad_norm": 0.6218638353003056, + "learning_rate": 4.5141005619367766e-07, + "loss": 0.2825, + "step": 34648 + }, + { + "epoch": 1.6231320560266078, + "grad_norm": 0.5973978010893093, + "learning_rate": 4.513013601749655e-07, + "loss": 0.2759, + "step": 34649 + }, + { + "epoch": 1.6231789010165363, + "grad_norm": 0.5995096461076097, + "learning_rate": 4.5119267594599866e-07, + "loss": 0.2738, + "step": 34650 + }, + { + "epoch": 1.6232257460064647, + "grad_norm": 0.5602437533218928, + "learning_rate": 4.5108400350740265e-07, + "loss": 0.2717, + "step": 34651 + }, + { + "epoch": 1.623272590996393, + "grad_norm": 0.5743702719493416, + "learning_rate": 4.509753428598035e-07, + "loss": 0.255, + "step": 34652 + }, + { + "epoch": 1.6233194359863212, + "grad_norm": 0.6148359848420946, + "learning_rate": 4.5086669400382614e-07, + "loss": 0.266, + "step": 34653 + }, + { + "epoch": 1.6233662809762497, + "grad_norm": 0.5544021962144603, + "learning_rate": 4.5075805694009655e-07, + "loss": 0.2393, + "step": 34654 + }, + { + "epoch": 1.623413125966178, + "grad_norm": 0.5669851218524112, + "learning_rate": 4.5064943166923864e-07, + "loss": 0.255, + "step": 34655 + }, + { + "epoch": 1.6234599709561062, + "grad_norm": 0.5808473679938964, + "learning_rate": 4.5054081819187886e-07, + "loss": 0.2584, + "step": 34656 + }, + { + "epoch": 1.6235068159460346, + "grad_norm": 0.6178006030375063, + "learning_rate": 4.5043221650864083e-07, + "loss": 0.2603, + "step": 34657 + }, + { + "epoch": 1.6235536609359629, + "grad_norm": 0.5663356135380355, + "learning_rate": 4.5032362662015025e-07, + "loss": 0.2517, + "step": 34658 + }, + { + "epoch": 1.6236005059258911, + "grad_norm": 0.5811176765453742, + "learning_rate": 4.5021504852703205e-07, + "loss": 0.2741, + "step": 34659 + }, + { + "epoch": 1.6236473509158196, + "grad_norm": 0.5970560400381895, + "learning_rate": 4.5010648222991174e-07, + "loss": 0.2843, + "step": 34660 + }, + { + "epoch": 1.623694195905748, + "grad_norm": 0.6547163983045228, + "learning_rate": 4.499979277294128e-07, + "loss": 0.28, + "step": 34661 + }, + { + "epoch": 1.623741040895676, + "grad_norm": 0.5883099821586586, + "learning_rate": 4.498893850261604e-07, + "loss": 0.2801, + "step": 34662 + }, + { + "epoch": 1.6237878858856045, + "grad_norm": 0.6120463350172247, + "learning_rate": 4.4978085412078014e-07, + "loss": 0.2634, + "step": 34663 + }, + { + "epoch": 1.623834730875533, + "grad_norm": 0.6053738736571144, + "learning_rate": 4.4967233501389486e-07, + "loss": 0.2795, + "step": 34664 + }, + { + "epoch": 1.6238815758654612, + "grad_norm": 0.5892599308043238, + "learning_rate": 4.495638277061301e-07, + "loss": 0.2605, + "step": 34665 + }, + { + "epoch": 1.6239284208553895, + "grad_norm": 0.6293695997067736, + "learning_rate": 4.49455332198111e-07, + "loss": 0.2803, + "step": 34666 + }, + { + "epoch": 1.623975265845318, + "grad_norm": 0.5707173133791175, + "learning_rate": 4.4934684849046007e-07, + "loss": 0.2625, + "step": 34667 + }, + { + "epoch": 1.6240221108352462, + "grad_norm": 0.5918332427127941, + "learning_rate": 4.492383765838029e-07, + "loss": 0.2766, + "step": 34668 + }, + { + "epoch": 1.6240689558251744, + "grad_norm": 0.6096532103435061, + "learning_rate": 4.4912991647876326e-07, + "loss": 0.2824, + "step": 34669 + }, + { + "epoch": 1.6241158008151029, + "grad_norm": 0.581223845643586, + "learning_rate": 4.490214681759664e-07, + "loss": 0.2516, + "step": 34670 + }, + { + "epoch": 1.6241626458050311, + "grad_norm": 0.5667638339557918, + "learning_rate": 4.4891303167603457e-07, + "loss": 0.2586, + "step": 34671 + }, + { + "epoch": 1.6242094907949594, + "grad_norm": 0.5703629589323439, + "learning_rate": 4.488046069795934e-07, + "loss": 0.2715, + "step": 34672 + }, + { + "epoch": 1.6242563357848878, + "grad_norm": 0.6396826195192304, + "learning_rate": 4.4869619408726557e-07, + "loss": 0.2904, + "step": 34673 + }, + { + "epoch": 1.6243031807748163, + "grad_norm": 0.6186928312065739, + "learning_rate": 4.4858779299967544e-07, + "loss": 0.2845, + "step": 34674 + }, + { + "epoch": 1.6243500257647443, + "grad_norm": 0.6157307079109811, + "learning_rate": 4.4847940371744724e-07, + "loss": 0.2892, + "step": 34675 + }, + { + "epoch": 1.6243968707546728, + "grad_norm": 0.5806377316908748, + "learning_rate": 4.4837102624120417e-07, + "loss": 0.2501, + "step": 34676 + }, + { + "epoch": 1.6244437157446012, + "grad_norm": 0.6072176596461807, + "learning_rate": 4.4826266057157093e-07, + "loss": 0.2628, + "step": 34677 + }, + { + "epoch": 1.6244905607345295, + "grad_norm": 0.605766484708152, + "learning_rate": 4.481543067091704e-07, + "loss": 0.2683, + "step": 34678 + }, + { + "epoch": 1.6245374057244577, + "grad_norm": 0.6381911997243481, + "learning_rate": 4.480459646546256e-07, + "loss": 0.2728, + "step": 34679 + }, + { + "epoch": 1.6245842507143862, + "grad_norm": 0.5972033837646946, + "learning_rate": 4.479376344085604e-07, + "loss": 0.2635, + "step": 34680 + }, + { + "epoch": 1.6246310957043144, + "grad_norm": 0.5886093371574006, + "learning_rate": 4.4782931597159826e-07, + "loss": 0.2805, + "step": 34681 + }, + { + "epoch": 1.6246779406942427, + "grad_norm": 0.6263033049124366, + "learning_rate": 4.4772100934436293e-07, + "loss": 0.267, + "step": 34682 + }, + { + "epoch": 1.6247247856841711, + "grad_norm": 0.6064999745718275, + "learning_rate": 4.47612714527477e-07, + "loss": 0.2685, + "step": 34683 + }, + { + "epoch": 1.6247716306740994, + "grad_norm": 0.5846879799477568, + "learning_rate": 4.4750443152156486e-07, + "loss": 0.2833, + "step": 34684 + }, + { + "epoch": 1.6248184756640276, + "grad_norm": 0.5962683443972694, + "learning_rate": 4.473961603272489e-07, + "loss": 0.2741, + "step": 34685 + }, + { + "epoch": 1.624865320653956, + "grad_norm": 0.5503910540816873, + "learning_rate": 4.472879009451514e-07, + "loss": 0.2504, + "step": 34686 + }, + { + "epoch": 1.6249121656438845, + "grad_norm": 0.603393523168875, + "learning_rate": 4.4717965337589625e-07, + "loss": 0.27, + "step": 34687 + }, + { + "epoch": 1.6249590106338128, + "grad_norm": 0.5898486724276115, + "learning_rate": 4.4707141762010604e-07, + "loss": 0.2624, + "step": 34688 + }, + { + "epoch": 1.625005855623741, + "grad_norm": 0.6012245873857459, + "learning_rate": 4.4696319367840384e-07, + "loss": 0.2907, + "step": 34689 + }, + { + "epoch": 1.6250527006136695, + "grad_norm": 0.62799468407177, + "learning_rate": 4.468549815514131e-07, + "loss": 0.2711, + "step": 34690 + }, + { + "epoch": 1.6250995456035977, + "grad_norm": 0.6814127345213005, + "learning_rate": 4.467467812397555e-07, + "loss": 0.2814, + "step": 34691 + }, + { + "epoch": 1.625146390593526, + "grad_norm": 0.614528803337646, + "learning_rate": 4.4663859274405445e-07, + "loss": 0.2797, + "step": 34692 + }, + { + "epoch": 1.6251932355834544, + "grad_norm": 0.6288303787979177, + "learning_rate": 4.465304160649317e-07, + "loss": 0.2751, + "step": 34693 + }, + { + "epoch": 1.6252400805733827, + "grad_norm": 0.6047872380442861, + "learning_rate": 4.464222512030103e-07, + "loss": 0.2571, + "step": 34694 + }, + { + "epoch": 1.625286925563311, + "grad_norm": 0.6210444186967344, + "learning_rate": 4.4631409815891256e-07, + "loss": 0.2738, + "step": 34695 + }, + { + "epoch": 1.6253337705532394, + "grad_norm": 0.629253122099609, + "learning_rate": 4.462059569332619e-07, + "loss": 0.2874, + "step": 34696 + }, + { + "epoch": 1.6253806155431678, + "grad_norm": 0.6302484965359514, + "learning_rate": 4.460978275266789e-07, + "loss": 0.2786, + "step": 34697 + }, + { + "epoch": 1.6254274605330958, + "grad_norm": 0.6153156725888793, + "learning_rate": 4.4598970993978754e-07, + "loss": 0.2719, + "step": 34698 + }, + { + "epoch": 1.6254743055230243, + "grad_norm": 0.5793823931066198, + "learning_rate": 4.4588160417320835e-07, + "loss": 0.2576, + "step": 34699 + }, + { + "epoch": 1.6255211505129528, + "grad_norm": 0.592384019631815, + "learning_rate": 4.4577351022756427e-07, + "loss": 0.2701, + "step": 34700 + }, + { + "epoch": 1.625567995502881, + "grad_norm": 0.6096087025368014, + "learning_rate": 4.4566542810347723e-07, + "loss": 0.2667, + "step": 34701 + }, + { + "epoch": 1.6256148404928092, + "grad_norm": 0.6171242006877777, + "learning_rate": 4.455573578015701e-07, + "loss": 0.2773, + "step": 34702 + }, + { + "epoch": 1.6256616854827377, + "grad_norm": 0.6763893557085754, + "learning_rate": 4.454492993224632e-07, + "loss": 0.2831, + "step": 34703 + }, + { + "epoch": 1.625708530472666, + "grad_norm": 0.5915469229919695, + "learning_rate": 4.453412526667794e-07, + "loss": 0.2571, + "step": 34704 + }, + { + "epoch": 1.6257553754625942, + "grad_norm": 0.5914733291654274, + "learning_rate": 4.4523321783514065e-07, + "loss": 0.2595, + "step": 34705 + }, + { + "epoch": 1.6258022204525227, + "grad_norm": 0.579790487759149, + "learning_rate": 4.451251948281679e-07, + "loss": 0.2655, + "step": 34706 + }, + { + "epoch": 1.625849065442451, + "grad_norm": 0.6006303998861402, + "learning_rate": 4.450171836464831e-07, + "loss": 0.2756, + "step": 34707 + }, + { + "epoch": 1.6258959104323791, + "grad_norm": 0.6495383789960049, + "learning_rate": 4.4490918429070857e-07, + "loss": 0.2903, + "step": 34708 + }, + { + "epoch": 1.6259427554223076, + "grad_norm": 0.5744362518522458, + "learning_rate": 4.448011967614643e-07, + "loss": 0.275, + "step": 34709 + }, + { + "epoch": 1.625989600412236, + "grad_norm": 0.6241410557982673, + "learning_rate": 4.446932210593727e-07, + "loss": 0.2787, + "step": 34710 + }, + { + "epoch": 1.626036445402164, + "grad_norm": 0.5843058532649583, + "learning_rate": 4.445852571850551e-07, + "loss": 0.2781, + "step": 34711 + }, + { + "epoch": 1.6260832903920925, + "grad_norm": 0.603925696572302, + "learning_rate": 4.444773051391335e-07, + "loss": 0.2746, + "step": 34712 + }, + { + "epoch": 1.626130135382021, + "grad_norm": 0.6138913239537916, + "learning_rate": 4.443693649222275e-07, + "loss": 0.2765, + "step": 34713 + }, + { + "epoch": 1.6261769803719492, + "grad_norm": 0.5579941392889347, + "learning_rate": 4.442614365349593e-07, + "loss": 0.2631, + "step": 34714 + }, + { + "epoch": 1.6262238253618775, + "grad_norm": 0.5531359272874006, + "learning_rate": 4.441535199779504e-07, + "loss": 0.2579, + "step": 34715 + }, + { + "epoch": 1.626270670351806, + "grad_norm": 0.6305810116480176, + "learning_rate": 4.4404561525182076e-07, + "loss": 0.2726, + "step": 34716 + }, + { + "epoch": 1.6263175153417342, + "grad_norm": 0.619356324935414, + "learning_rate": 4.4393772235719166e-07, + "loss": 0.2863, + "step": 34717 + }, + { + "epoch": 1.6263643603316624, + "grad_norm": 0.5909792665331277, + "learning_rate": 4.438298412946843e-07, + "loss": 0.2665, + "step": 34718 + }, + { + "epoch": 1.626411205321591, + "grad_norm": 0.5921200836209592, + "learning_rate": 4.4372197206491995e-07, + "loss": 0.2724, + "step": 34719 + }, + { + "epoch": 1.6264580503115191, + "grad_norm": 0.6037800049412552, + "learning_rate": 4.436141146685183e-07, + "loss": 0.2863, + "step": 34720 + }, + { + "epoch": 1.6265048953014474, + "grad_norm": 0.6278431227282805, + "learning_rate": 4.4350626910610125e-07, + "loss": 0.2742, + "step": 34721 + }, + { + "epoch": 1.6265517402913758, + "grad_norm": 0.5770381951885452, + "learning_rate": 4.4339843537828804e-07, + "loss": 0.263, + "step": 34722 + }, + { + "epoch": 1.6265985852813043, + "grad_norm": 0.5697156885045997, + "learning_rate": 4.4329061348569986e-07, + "loss": 0.2656, + "step": 34723 + }, + { + "epoch": 1.6266454302712325, + "grad_norm": 0.5813349688786856, + "learning_rate": 4.431828034289573e-07, + "loss": 0.2623, + "step": 34724 + }, + { + "epoch": 1.6266922752611608, + "grad_norm": 0.6553283328034777, + "learning_rate": 4.43075005208681e-07, + "loss": 0.2738, + "step": 34725 + }, + { + "epoch": 1.6267391202510892, + "grad_norm": 0.5867415489439269, + "learning_rate": 4.4296721882549133e-07, + "loss": 0.2555, + "step": 34726 + }, + { + "epoch": 1.6267859652410175, + "grad_norm": 0.5932408856569128, + "learning_rate": 4.4285944428000853e-07, + "loss": 0.2777, + "step": 34727 + }, + { + "epoch": 1.6268328102309457, + "grad_norm": 0.6149689913866204, + "learning_rate": 4.4275168157285164e-07, + "loss": 0.2786, + "step": 34728 + }, + { + "epoch": 1.6268796552208742, + "grad_norm": 0.6254319640107882, + "learning_rate": 4.4264393070464206e-07, + "loss": 0.2778, + "step": 34729 + }, + { + "epoch": 1.6269265002108024, + "grad_norm": 0.6224634323179007, + "learning_rate": 4.4253619167599957e-07, + "loss": 0.2913, + "step": 34730 + }, + { + "epoch": 1.6269733452007307, + "grad_norm": 0.6080790323691496, + "learning_rate": 4.42428464487544e-07, + "loss": 0.2832, + "step": 34731 + }, + { + "epoch": 1.6270201901906591, + "grad_norm": 0.5734371450571224, + "learning_rate": 4.423207491398962e-07, + "loss": 0.2708, + "step": 34732 + }, + { + "epoch": 1.6270670351805876, + "grad_norm": 0.6207299132231737, + "learning_rate": 4.422130456336746e-07, + "loss": 0.2839, + "step": 34733 + }, + { + "epoch": 1.6271138801705156, + "grad_norm": 0.5985725442883562, + "learning_rate": 4.4210535396950053e-07, + "loss": 0.2673, + "step": 34734 + }, + { + "epoch": 1.627160725160444, + "grad_norm": 0.5993625788846848, + "learning_rate": 4.41997674147992e-07, + "loss": 0.2784, + "step": 34735 + }, + { + "epoch": 1.6272075701503725, + "grad_norm": 0.6156017798936008, + "learning_rate": 4.4189000616976977e-07, + "loss": 0.2789, + "step": 34736 + }, + { + "epoch": 1.6272544151403008, + "grad_norm": 0.5856914999469699, + "learning_rate": 4.417823500354532e-07, + "loss": 0.2729, + "step": 34737 + }, + { + "epoch": 1.627301260130229, + "grad_norm": 0.6220330271552575, + "learning_rate": 4.4167470574566193e-07, + "loss": 0.2766, + "step": 34738 + }, + { + "epoch": 1.6273481051201575, + "grad_norm": 0.5522292911752883, + "learning_rate": 4.4156707330101605e-07, + "loss": 0.25, + "step": 34739 + }, + { + "epoch": 1.6273949501100857, + "grad_norm": 0.607215150581363, + "learning_rate": 4.4145945270213373e-07, + "loss": 0.2766, + "step": 34740 + }, + { + "epoch": 1.627441795100014, + "grad_norm": 0.5684646239437766, + "learning_rate": 4.413518439496353e-07, + "loss": 0.2584, + "step": 34741 + }, + { + "epoch": 1.6274886400899424, + "grad_norm": 0.6280191814465814, + "learning_rate": 4.4124424704413915e-07, + "loss": 0.2824, + "step": 34742 + }, + { + "epoch": 1.6275354850798707, + "grad_norm": 0.6334937037774906, + "learning_rate": 4.4113666198626473e-07, + "loss": 0.2916, + "step": 34743 + }, + { + "epoch": 1.627582330069799, + "grad_norm": 0.6039848792169141, + "learning_rate": 4.410290887766314e-07, + "loss": 0.2805, + "step": 34744 + }, + { + "epoch": 1.6276291750597274, + "grad_norm": 0.6009991762690633, + "learning_rate": 4.40921527415859e-07, + "loss": 0.2842, + "step": 34745 + }, + { + "epoch": 1.6276760200496558, + "grad_norm": 0.5704685883370648, + "learning_rate": 4.4081397790456476e-07, + "loss": 0.2513, + "step": 34746 + }, + { + "epoch": 1.6277228650395839, + "grad_norm": 0.5788953138711717, + "learning_rate": 4.4070644024336917e-07, + "loss": 0.2626, + "step": 34747 + }, + { + "epoch": 1.6277697100295123, + "grad_norm": 0.5912231354808789, + "learning_rate": 4.405989144328901e-07, + "loss": 0.2612, + "step": 34748 + }, + { + "epoch": 1.6278165550194408, + "grad_norm": 0.5851622012458858, + "learning_rate": 4.404914004737465e-07, + "loss": 0.2581, + "step": 34749 + }, + { + "epoch": 1.627863400009369, + "grad_norm": 0.6021902219262237, + "learning_rate": 4.4038389836655704e-07, + "loss": 0.2788, + "step": 34750 + }, + { + "epoch": 1.6279102449992973, + "grad_norm": 0.6001671413497197, + "learning_rate": 4.402764081119415e-07, + "loss": 0.2576, + "step": 34751 + }, + { + "epoch": 1.6279570899892257, + "grad_norm": 0.59055913190122, + "learning_rate": 4.4016892971051663e-07, + "loss": 0.277, + "step": 34752 + }, + { + "epoch": 1.628003934979154, + "grad_norm": 0.6580019519111896, + "learning_rate": 4.4006146316290217e-07, + "loss": 0.2816, + "step": 34753 + }, + { + "epoch": 1.6280507799690822, + "grad_norm": 0.5676930918517152, + "learning_rate": 4.399540084697168e-07, + "loss": 0.2645, + "step": 34754 + }, + { + "epoch": 1.6280976249590107, + "grad_norm": 0.6210653812735039, + "learning_rate": 4.3984656563157756e-07, + "loss": 0.2736, + "step": 34755 + }, + { + "epoch": 1.628144469948939, + "grad_norm": 0.6578336509009336, + "learning_rate": 4.3973913464910366e-07, + "loss": 0.279, + "step": 34756 + }, + { + "epoch": 1.6281913149388672, + "grad_norm": 0.62407980226074, + "learning_rate": 4.396317155229138e-07, + "loss": 0.2803, + "step": 34757 + }, + { + "epoch": 1.6282381599287956, + "grad_norm": 0.571825526476695, + "learning_rate": 4.395243082536249e-07, + "loss": 0.269, + "step": 34758 + }, + { + "epoch": 1.628285004918724, + "grad_norm": 0.5912679049619722, + "learning_rate": 4.3941691284185574e-07, + "loss": 0.2592, + "step": 34759 + }, + { + "epoch": 1.6283318499086523, + "grad_norm": 0.6037103504228033, + "learning_rate": 4.3930952928822437e-07, + "loss": 0.2707, + "step": 34760 + }, + { + "epoch": 1.6283786948985806, + "grad_norm": 0.5624087321844206, + "learning_rate": 4.392021575933492e-07, + "loss": 0.2574, + "step": 34761 + }, + { + "epoch": 1.628425539888509, + "grad_norm": 0.5589960986806402, + "learning_rate": 4.390947977578472e-07, + "loss": 0.258, + "step": 34762 + }, + { + "epoch": 1.6284723848784373, + "grad_norm": 0.6084811463724749, + "learning_rate": 4.389874497823371e-07, + "loss": 0.2792, + "step": 34763 + }, + { + "epoch": 1.6285192298683655, + "grad_norm": 0.6814226689358576, + "learning_rate": 4.3888011366743587e-07, + "loss": 0.288, + "step": 34764 + }, + { + "epoch": 1.628566074858294, + "grad_norm": 0.5919876164878413, + "learning_rate": 4.387727894137614e-07, + "loss": 0.2811, + "step": 34765 + }, + { + "epoch": 1.6286129198482222, + "grad_norm": 0.6075217216532052, + "learning_rate": 4.386654770219315e-07, + "loss": 0.2807, + "step": 34766 + }, + { + "epoch": 1.6286597648381504, + "grad_norm": 0.6248952517112032, + "learning_rate": 4.385581764925634e-07, + "loss": 0.2669, + "step": 34767 + }, + { + "epoch": 1.628706609828079, + "grad_norm": 0.6425313461955562, + "learning_rate": 4.384508878262758e-07, + "loss": 0.2856, + "step": 34768 + }, + { + "epoch": 1.6287534548180074, + "grad_norm": 0.614983417963493, + "learning_rate": 4.3834361102368496e-07, + "loss": 0.2793, + "step": 34769 + }, + { + "epoch": 1.6288002998079354, + "grad_norm": 0.5912586321666785, + "learning_rate": 4.3823634608540804e-07, + "loss": 0.2681, + "step": 34770 + }, + { + "epoch": 1.6288471447978639, + "grad_norm": 0.5874223553970365, + "learning_rate": 4.381290930120627e-07, + "loss": 0.2852, + "step": 34771 + }, + { + "epoch": 1.6288939897877923, + "grad_norm": 0.5603274816751843, + "learning_rate": 4.3802185180426614e-07, + "loss": 0.2589, + "step": 34772 + }, + { + "epoch": 1.6289408347777206, + "grad_norm": 0.5718579512816886, + "learning_rate": 4.379146224626357e-07, + "loss": 0.2679, + "step": 34773 + }, + { + "epoch": 1.6289876797676488, + "grad_norm": 0.6006515452288168, + "learning_rate": 4.378074049877881e-07, + "loss": 0.2909, + "step": 34774 + }, + { + "epoch": 1.6290345247575773, + "grad_norm": 0.6129311040047223, + "learning_rate": 4.3770019938034145e-07, + "loss": 0.2814, + "step": 34775 + }, + { + "epoch": 1.6290813697475055, + "grad_norm": 0.5940784225132727, + "learning_rate": 4.3759300564091165e-07, + "loss": 0.2649, + "step": 34776 + }, + { + "epoch": 1.6291282147374337, + "grad_norm": 0.5839965358628122, + "learning_rate": 4.374858237701152e-07, + "loss": 0.2492, + "step": 34777 + }, + { + "epoch": 1.6291750597273622, + "grad_norm": 0.5981148815297415, + "learning_rate": 4.3737865376856927e-07, + "loss": 0.2692, + "step": 34778 + }, + { + "epoch": 1.6292219047172904, + "grad_norm": 0.596348048629934, + "learning_rate": 4.372714956368909e-07, + "loss": 0.2811, + "step": 34779 + }, + { + "epoch": 1.6292687497072187, + "grad_norm": 0.5571894924416564, + "learning_rate": 4.371643493756966e-07, + "loss": 0.254, + "step": 34780 + }, + { + "epoch": 1.6293155946971472, + "grad_norm": 0.5843466932349816, + "learning_rate": 4.3705721498560365e-07, + "loss": 0.2629, + "step": 34781 + }, + { + "epoch": 1.6293624396870756, + "grad_norm": 0.5858369547420361, + "learning_rate": 4.3695009246722737e-07, + "loss": 0.2796, + "step": 34782 + }, + { + "epoch": 1.6294092846770036, + "grad_norm": 0.5511339853464887, + "learning_rate": 4.3684298182118526e-07, + "loss": 0.2592, + "step": 34783 + }, + { + "epoch": 1.629456129666932, + "grad_norm": 0.6029385140971487, + "learning_rate": 4.3673588304809276e-07, + "loss": 0.27, + "step": 34784 + }, + { + "epoch": 1.6295029746568606, + "grad_norm": 0.6351176088183729, + "learning_rate": 4.3662879614856655e-07, + "loss": 0.2883, + "step": 34785 + }, + { + "epoch": 1.6295498196467888, + "grad_norm": 0.5973624109648208, + "learning_rate": 4.365217211232231e-07, + "loss": 0.2612, + "step": 34786 + }, + { + "epoch": 1.629596664636717, + "grad_norm": 0.5804677946247059, + "learning_rate": 4.364146579726783e-07, + "loss": 0.2786, + "step": 34787 + }, + { + "epoch": 1.6296435096266455, + "grad_norm": 0.601128628780933, + "learning_rate": 4.363076066975494e-07, + "loss": 0.2753, + "step": 34788 + }, + { + "epoch": 1.6296903546165737, + "grad_norm": 0.61593316327986, + "learning_rate": 4.362005672984512e-07, + "loss": 0.2717, + "step": 34789 + }, + { + "epoch": 1.629737199606502, + "grad_norm": 0.5970473613866389, + "learning_rate": 4.3609353977599966e-07, + "loss": 0.2577, + "step": 34790 + }, + { + "epoch": 1.6297840445964304, + "grad_norm": 0.6037182935588778, + "learning_rate": 4.359865241308112e-07, + "loss": 0.2859, + "step": 34791 + }, + { + "epoch": 1.6298308895863587, + "grad_norm": 0.627920116561055, + "learning_rate": 4.358795203635011e-07, + "loss": 0.2935, + "step": 34792 + }, + { + "epoch": 1.629877734576287, + "grad_norm": 0.6363442069492327, + "learning_rate": 4.3577252847468584e-07, + "loss": 0.2815, + "step": 34793 + }, + { + "epoch": 1.6299245795662154, + "grad_norm": 0.6580557826308742, + "learning_rate": 4.356655484649813e-07, + "loss": 0.2912, + "step": 34794 + }, + { + "epoch": 1.6299714245561439, + "grad_norm": 0.5831319895396965, + "learning_rate": 4.355585803350021e-07, + "loss": 0.2558, + "step": 34795 + }, + { + "epoch": 1.630018269546072, + "grad_norm": 0.6105040911985117, + "learning_rate": 4.354516240853651e-07, + "loss": 0.2748, + "step": 34796 + }, + { + "epoch": 1.6300651145360003, + "grad_norm": 0.5838292696261275, + "learning_rate": 4.3534467971668465e-07, + "loss": 0.2711, + "step": 34797 + }, + { + "epoch": 1.6301119595259288, + "grad_norm": 0.5693963546758891, + "learning_rate": 4.3523774722957627e-07, + "loss": 0.2527, + "step": 34798 + }, + { + "epoch": 1.630158804515857, + "grad_norm": 0.659168588805518, + "learning_rate": 4.351308266246559e-07, + "loss": 0.2877, + "step": 34799 + }, + { + "epoch": 1.6302056495057853, + "grad_norm": 0.6539807260891971, + "learning_rate": 4.3502391790253924e-07, + "loss": 0.3067, + "step": 34800 + }, + { + "epoch": 1.6302524944957137, + "grad_norm": 0.576897686646309, + "learning_rate": 4.3491702106384037e-07, + "loss": 0.261, + "step": 34801 + }, + { + "epoch": 1.630299339485642, + "grad_norm": 0.5854402406916255, + "learning_rate": 4.3481013610917474e-07, + "loss": 0.2662, + "step": 34802 + }, + { + "epoch": 1.6303461844755702, + "grad_norm": 0.6000399561422233, + "learning_rate": 4.3470326303915873e-07, + "loss": 0.2634, + "step": 34803 + }, + { + "epoch": 1.6303930294654987, + "grad_norm": 0.6146808501829407, + "learning_rate": 4.345964018544055e-07, + "loss": 0.2694, + "step": 34804 + }, + { + "epoch": 1.6304398744554272, + "grad_norm": 0.6046244280174743, + "learning_rate": 4.3448955255553067e-07, + "loss": 0.267, + "step": 34805 + }, + { + "epoch": 1.6304867194453552, + "grad_norm": 0.6048964181808977, + "learning_rate": 4.343827151431501e-07, + "loss": 0.2753, + "step": 34806 + }, + { + "epoch": 1.6305335644352836, + "grad_norm": 0.5862487106212209, + "learning_rate": 4.3427588961787725e-07, + "loss": 0.2616, + "step": 34807 + }, + { + "epoch": 1.630580409425212, + "grad_norm": 0.5548108949985192, + "learning_rate": 4.341690759803274e-07, + "loss": 0.2569, + "step": 34808 + }, + { + "epoch": 1.6306272544151403, + "grad_norm": 0.5965135066592562, + "learning_rate": 4.3406227423111543e-07, + "loss": 0.2866, + "step": 34809 + }, + { + "epoch": 1.6306740994050686, + "grad_norm": 0.6161655859457469, + "learning_rate": 4.3395548437085626e-07, + "loss": 0.2623, + "step": 34810 + }, + { + "epoch": 1.630720944394997, + "grad_norm": 0.625853671209639, + "learning_rate": 4.3384870640016367e-07, + "loss": 0.2861, + "step": 34811 + }, + { + "epoch": 1.6307677893849253, + "grad_norm": 0.6124028984958522, + "learning_rate": 4.337419403196527e-07, + "loss": 0.2768, + "step": 34812 + }, + { + "epoch": 1.6308146343748535, + "grad_norm": 0.6230393052365434, + "learning_rate": 4.3363518612993704e-07, + "loss": 0.2895, + "step": 34813 + }, + { + "epoch": 1.630861479364782, + "grad_norm": 0.5887965041879772, + "learning_rate": 4.335284438316317e-07, + "loss": 0.2642, + "step": 34814 + }, + { + "epoch": 1.6309083243547102, + "grad_norm": 0.5963775291879704, + "learning_rate": 4.334217134253507e-07, + "loss": 0.2814, + "step": 34815 + }, + { + "epoch": 1.6309551693446385, + "grad_norm": 0.5878245675972794, + "learning_rate": 4.3331499491170827e-07, + "loss": 0.2678, + "step": 34816 + }, + { + "epoch": 1.631002014334567, + "grad_norm": 0.6237384250560435, + "learning_rate": 4.332082882913191e-07, + "loss": 0.2641, + "step": 34817 + }, + { + "epoch": 1.6310488593244954, + "grad_norm": 0.5919491463162455, + "learning_rate": 4.3310159356479695e-07, + "loss": 0.2653, + "step": 34818 + }, + { + "epoch": 1.6310957043144234, + "grad_norm": 0.6207082621914333, + "learning_rate": 4.3299491073275495e-07, + "loss": 0.2711, + "step": 34819 + }, + { + "epoch": 1.6311425493043519, + "grad_norm": 0.6077895358948782, + "learning_rate": 4.3288823979580786e-07, + "loss": 0.2583, + "step": 34820 + }, + { + "epoch": 1.6311893942942803, + "grad_norm": 0.5965117774786594, + "learning_rate": 4.3278158075456903e-07, + "loss": 0.2777, + "step": 34821 + }, + { + "epoch": 1.6312362392842086, + "grad_norm": 0.6389916799511727, + "learning_rate": 4.3267493360965305e-07, + "loss": 0.2688, + "step": 34822 + }, + { + "epoch": 1.6312830842741368, + "grad_norm": 0.6439931066996974, + "learning_rate": 4.3256829836167304e-07, + "loss": 0.2863, + "step": 34823 + }, + { + "epoch": 1.6313299292640653, + "grad_norm": 0.5192541740678318, + "learning_rate": 4.3246167501124345e-07, + "loss": 0.2263, + "step": 34824 + }, + { + "epoch": 1.6313767742539935, + "grad_norm": 0.6018452649640464, + "learning_rate": 4.3235506355897713e-07, + "loss": 0.2831, + "step": 34825 + }, + { + "epoch": 1.6314236192439218, + "grad_norm": 0.5930754355775153, + "learning_rate": 4.322484640054872e-07, + "loss": 0.2811, + "step": 34826 + }, + { + "epoch": 1.6314704642338502, + "grad_norm": 0.5551852244406469, + "learning_rate": 4.321418763513877e-07, + "loss": 0.2513, + "step": 34827 + }, + { + "epoch": 1.6315173092237785, + "grad_norm": 0.5806900755875894, + "learning_rate": 4.3203530059729216e-07, + "loss": 0.2672, + "step": 34828 + }, + { + "epoch": 1.6315641542137067, + "grad_norm": 0.5798809969751572, + "learning_rate": 4.3192873674381325e-07, + "loss": 0.2665, + "step": 34829 + }, + { + "epoch": 1.6316109992036352, + "grad_norm": 0.6135455874655643, + "learning_rate": 4.3182218479156573e-07, + "loss": 0.2798, + "step": 34830 + }, + { + "epoch": 1.6316578441935636, + "grad_norm": 0.6553421074134093, + "learning_rate": 4.317156447411608e-07, + "loss": 0.2841, + "step": 34831 + }, + { + "epoch": 1.6317046891834919, + "grad_norm": 0.659499823184804, + "learning_rate": 4.316091165932132e-07, + "loss": 0.2847, + "step": 34832 + }, + { + "epoch": 1.6317515341734201, + "grad_norm": 0.599311242096169, + "learning_rate": 4.315026003483347e-07, + "loss": 0.281, + "step": 34833 + }, + { + "epoch": 1.6317983791633486, + "grad_norm": 0.5968970177129844, + "learning_rate": 4.3139609600713894e-07, + "loss": 0.278, + "step": 34834 + }, + { + "epoch": 1.6318452241532768, + "grad_norm": 0.5657213913144267, + "learning_rate": 4.3128960357023855e-07, + "loss": 0.264, + "step": 34835 + }, + { + "epoch": 1.631892069143205, + "grad_norm": 0.5744130690488548, + "learning_rate": 4.311831230382471e-07, + "loss": 0.2569, + "step": 34836 + }, + { + "epoch": 1.6319389141331335, + "grad_norm": 0.5940388237170997, + "learning_rate": 4.3107665441177644e-07, + "loss": 0.2665, + "step": 34837 + }, + { + "epoch": 1.6319857591230618, + "grad_norm": 0.5828531658454797, + "learning_rate": 4.3097019769144017e-07, + "loss": 0.2562, + "step": 34838 + }, + { + "epoch": 1.63203260411299, + "grad_norm": 0.6176565433811899, + "learning_rate": 4.308637528778495e-07, + "loss": 0.264, + "step": 34839 + }, + { + "epoch": 1.6320794491029185, + "grad_norm": 0.5537221042362386, + "learning_rate": 4.307573199716181e-07, + "loss": 0.2575, + "step": 34840 + }, + { + "epoch": 1.632126294092847, + "grad_norm": 0.6419887348144999, + "learning_rate": 4.306508989733582e-07, + "loss": 0.2681, + "step": 34841 + }, + { + "epoch": 1.632173139082775, + "grad_norm": 0.5779746180873803, + "learning_rate": 4.3054448988368276e-07, + "loss": 0.2631, + "step": 34842 + }, + { + "epoch": 1.6322199840727034, + "grad_norm": 0.5803728958489681, + "learning_rate": 4.3043809270320315e-07, + "loss": 0.2623, + "step": 34843 + }, + { + "epoch": 1.6322668290626319, + "grad_norm": 0.5819863134447523, + "learning_rate": 4.303317074325322e-07, + "loss": 0.2684, + "step": 34844 + }, + { + "epoch": 1.6323136740525601, + "grad_norm": 0.6600048938841938, + "learning_rate": 4.3022533407228285e-07, + "loss": 0.2834, + "step": 34845 + }, + { + "epoch": 1.6323605190424884, + "grad_norm": 0.5698134849950357, + "learning_rate": 4.301189726230656e-07, + "loss": 0.2589, + "step": 34846 + }, + { + "epoch": 1.6324073640324168, + "grad_norm": 0.602338859176613, + "learning_rate": 4.3001262308549343e-07, + "loss": 0.272, + "step": 34847 + }, + { + "epoch": 1.632454209022345, + "grad_norm": 0.6230622671271316, + "learning_rate": 4.2990628546017906e-07, + "loss": 0.2882, + "step": 34848 + }, + { + "epoch": 1.6325010540122733, + "grad_norm": 0.6162764724977, + "learning_rate": 4.2979995974773285e-07, + "loss": 0.2983, + "step": 34849 + }, + { + "epoch": 1.6325478990022018, + "grad_norm": 0.6399942024372899, + "learning_rate": 4.29693645948768e-07, + "loss": 0.2887, + "step": 34850 + }, + { + "epoch": 1.63259474399213, + "grad_norm": 0.549710830297036, + "learning_rate": 4.2958734406389537e-07, + "loss": 0.2497, + "step": 34851 + }, + { + "epoch": 1.6326415889820582, + "grad_norm": 0.614078292733638, + "learning_rate": 4.294810540937283e-07, + "loss": 0.277, + "step": 34852 + }, + { + "epoch": 1.6326884339719867, + "grad_norm": 0.6044379932081342, + "learning_rate": 4.293747760388764e-07, + "loss": 0.27, + "step": 34853 + }, + { + "epoch": 1.6327352789619152, + "grad_norm": 0.5661972904469269, + "learning_rate": 4.2926850989995247e-07, + "loss": 0.2725, + "step": 34854 + }, + { + "epoch": 1.6327821239518432, + "grad_norm": 0.6043373733642913, + "learning_rate": 4.291622556775682e-07, + "loss": 0.2775, + "step": 34855 + }, + { + "epoch": 1.6328289689417717, + "grad_norm": 0.5893309059176348, + "learning_rate": 4.2905601337233397e-07, + "loss": 0.2716, + "step": 34856 + }, + { + "epoch": 1.6328758139317001, + "grad_norm": 0.6248631279898987, + "learning_rate": 4.2894978298486214e-07, + "loss": 0.2852, + "step": 34857 + }, + { + "epoch": 1.6329226589216284, + "grad_norm": 0.6073939824012957, + "learning_rate": 4.2884356451576374e-07, + "loss": 0.2736, + "step": 34858 + }, + { + "epoch": 1.6329695039115566, + "grad_norm": 0.5813003156725423, + "learning_rate": 4.2873735796565065e-07, + "loss": 0.2675, + "step": 34859 + }, + { + "epoch": 1.633016348901485, + "grad_norm": 0.5469721356313966, + "learning_rate": 4.2863116333513253e-07, + "loss": 0.2635, + "step": 34860 + }, + { + "epoch": 1.6330631938914133, + "grad_norm": 0.6009029370585134, + "learning_rate": 4.2852498062482236e-07, + "loss": 0.2589, + "step": 34861 + }, + { + "epoch": 1.6331100388813415, + "grad_norm": 0.6340348993492372, + "learning_rate": 4.284188098353298e-07, + "loss": 0.2779, + "step": 34862 + }, + { + "epoch": 1.63315688387127, + "grad_norm": 0.596523589616077, + "learning_rate": 4.283126509672661e-07, + "loss": 0.2662, + "step": 34863 + }, + { + "epoch": 1.6332037288611982, + "grad_norm": 0.6052895101064707, + "learning_rate": 4.2820650402124244e-07, + "loss": 0.2752, + "step": 34864 + }, + { + "epoch": 1.6332505738511265, + "grad_norm": 0.5902169896984938, + "learning_rate": 4.2810036899786965e-07, + "loss": 0.2848, + "step": 34865 + }, + { + "epoch": 1.633297418841055, + "grad_norm": 0.610840691231653, + "learning_rate": 4.27994245897759e-07, + "loss": 0.2849, + "step": 34866 + }, + { + "epoch": 1.6333442638309834, + "grad_norm": 0.5754201674893356, + "learning_rate": 4.278881347215208e-07, + "loss": 0.2733, + "step": 34867 + }, + { + "epoch": 1.6333911088209117, + "grad_norm": 0.6264628815502601, + "learning_rate": 4.277820354697651e-07, + "loss": 0.2854, + "step": 34868 + }, + { + "epoch": 1.63343795381084, + "grad_norm": 0.5602982563914692, + "learning_rate": 4.276759481431028e-07, + "loss": 0.2611, + "step": 34869 + }, + { + "epoch": 1.6334847988007684, + "grad_norm": 0.6051228957709164, + "learning_rate": 4.2756987274214456e-07, + "loss": 0.2882, + "step": 34870 + }, + { + "epoch": 1.6335316437906966, + "grad_norm": 0.5926048302408372, + "learning_rate": 4.2746380926750067e-07, + "loss": 0.2611, + "step": 34871 + }, + { + "epoch": 1.6335784887806248, + "grad_norm": 0.6173630066579572, + "learning_rate": 4.273577577197824e-07, + "loss": 0.2572, + "step": 34872 + }, + { + "epoch": 1.6336253337705533, + "grad_norm": 0.5753678985202749, + "learning_rate": 4.272517180995986e-07, + "loss": 0.2647, + "step": 34873 + }, + { + "epoch": 1.6336721787604815, + "grad_norm": 0.614781254563498, + "learning_rate": 4.2714569040756107e-07, + "loss": 0.2827, + "step": 34874 + }, + { + "epoch": 1.6337190237504098, + "grad_norm": 0.6191647849320455, + "learning_rate": 4.2703967464427817e-07, + "loss": 0.2858, + "step": 34875 + }, + { + "epoch": 1.6337658687403382, + "grad_norm": 0.596490075799794, + "learning_rate": 4.269336708103611e-07, + "loss": 0.2688, + "step": 34876 + }, + { + "epoch": 1.6338127137302667, + "grad_norm": 0.685120893454687, + "learning_rate": 4.268276789064196e-07, + "loss": 0.2859, + "step": 34877 + }, + { + "epoch": 1.6338595587201947, + "grad_norm": 0.52501185662966, + "learning_rate": 4.267216989330639e-07, + "loss": 0.2497, + "step": 34878 + }, + { + "epoch": 1.6339064037101232, + "grad_norm": 0.5988533307159017, + "learning_rate": 4.2661573089090413e-07, + "loss": 0.2661, + "step": 34879 + }, + { + "epoch": 1.6339532487000517, + "grad_norm": 0.5932588212596801, + "learning_rate": 4.2650977478054997e-07, + "loss": 0.2615, + "step": 34880 + }, + { + "epoch": 1.63400009368998, + "grad_norm": 0.5786050202252601, + "learning_rate": 4.2640383060260997e-07, + "loss": 0.2575, + "step": 34881 + }, + { + "epoch": 1.6340469386799081, + "grad_norm": 0.6339549013190762, + "learning_rate": 4.2629789835769486e-07, + "loss": 0.2847, + "step": 34882 + }, + { + "epoch": 1.6340937836698366, + "grad_norm": 0.5998449657224946, + "learning_rate": 4.261919780464144e-07, + "loss": 0.2739, + "step": 34883 + }, + { + "epoch": 1.6341406286597648, + "grad_norm": 0.5918126918944162, + "learning_rate": 4.260860696693775e-07, + "loss": 0.2751, + "step": 34884 + }, + { + "epoch": 1.634187473649693, + "grad_norm": 0.5886084889758396, + "learning_rate": 4.2598017322719483e-07, + "loss": 0.272, + "step": 34885 + }, + { + "epoch": 1.6342343186396215, + "grad_norm": 0.5739254192133686, + "learning_rate": 4.258742887204742e-07, + "loss": 0.2667, + "step": 34886 + }, + { + "epoch": 1.6342811636295498, + "grad_norm": 0.5920842531969834, + "learning_rate": 4.257684161498266e-07, + "loss": 0.2665, + "step": 34887 + }, + { + "epoch": 1.634328008619478, + "grad_norm": 0.612386848618441, + "learning_rate": 4.256625555158597e-07, + "loss": 0.266, + "step": 34888 + }, + { + "epoch": 1.6343748536094065, + "grad_norm": 0.5532291563962918, + "learning_rate": 4.255567068191835e-07, + "loss": 0.2544, + "step": 34889 + }, + { + "epoch": 1.634421698599335, + "grad_norm": 0.6015335356491386, + "learning_rate": 4.254508700604068e-07, + "loss": 0.2681, + "step": 34890 + }, + { + "epoch": 1.634468543589263, + "grad_norm": 0.6050201530485528, + "learning_rate": 4.253450452401398e-07, + "loss": 0.2696, + "step": 34891 + }, + { + "epoch": 1.6345153885791914, + "grad_norm": 0.5879369415636656, + "learning_rate": 4.252392323589899e-07, + "loss": 0.2711, + "step": 34892 + }, + { + "epoch": 1.63456223356912, + "grad_norm": 0.6413131163715099, + "learning_rate": 4.2513343141756676e-07, + "loss": 0.2757, + "step": 34893 + }, + { + "epoch": 1.6346090785590481, + "grad_norm": 0.5498389348595532, + "learning_rate": 4.250276424164798e-07, + "loss": 0.2301, + "step": 34894 + }, + { + "epoch": 1.6346559235489764, + "grad_norm": 0.5933967329715238, + "learning_rate": 4.2492186535633666e-07, + "loss": 0.2665, + "step": 34895 + }, + { + "epoch": 1.6347027685389048, + "grad_norm": 0.6476604647368959, + "learning_rate": 4.2481610023774653e-07, + "loss": 0.2873, + "step": 34896 + }, + { + "epoch": 1.634749613528833, + "grad_norm": 0.643520612343823, + "learning_rate": 4.24710347061319e-07, + "loss": 0.2726, + "step": 34897 + }, + { + "epoch": 1.6347964585187613, + "grad_norm": 0.584705705838958, + "learning_rate": 4.2460460582766094e-07, + "loss": 0.266, + "step": 34898 + }, + { + "epoch": 1.6348433035086898, + "grad_norm": 0.5804609409473815, + "learning_rate": 4.2449887653738197e-07, + "loss": 0.2689, + "step": 34899 + }, + { + "epoch": 1.634890148498618, + "grad_norm": 0.6168488403299794, + "learning_rate": 4.243931591910902e-07, + "loss": 0.2736, + "step": 34900 + }, + { + "epoch": 1.6349369934885463, + "grad_norm": 0.5853320257407388, + "learning_rate": 4.2428745378939486e-07, + "loss": 0.2873, + "step": 34901 + }, + { + "epoch": 1.6349838384784747, + "grad_norm": 0.5902136625603476, + "learning_rate": 4.2418176033290264e-07, + "loss": 0.269, + "step": 34902 + }, + { + "epoch": 1.6350306834684032, + "grad_norm": 0.5930197754924441, + "learning_rate": 4.240760788222234e-07, + "loss": 0.2686, + "step": 34903 + }, + { + "epoch": 1.6350775284583314, + "grad_norm": 0.6204287620060409, + "learning_rate": 4.2397040925796384e-07, + "loss": 0.265, + "step": 34904 + }, + { + "epoch": 1.6351243734482597, + "grad_norm": 0.618259032843439, + "learning_rate": 4.2386475164073295e-07, + "loss": 0.2789, + "step": 34905 + }, + { + "epoch": 1.6351712184381881, + "grad_norm": 0.6706124191528932, + "learning_rate": 4.237591059711385e-07, + "loss": 0.2872, + "step": 34906 + }, + { + "epoch": 1.6352180634281164, + "grad_norm": 0.6021944767392234, + "learning_rate": 4.236534722497887e-07, + "loss": 0.2755, + "step": 34907 + }, + { + "epoch": 1.6352649084180446, + "grad_norm": 0.6051130613973715, + "learning_rate": 4.2354785047729194e-07, + "loss": 0.2819, + "step": 34908 + }, + { + "epoch": 1.635311753407973, + "grad_norm": 0.6310270303385743, + "learning_rate": 4.2344224065425514e-07, + "loss": 0.2769, + "step": 34909 + }, + { + "epoch": 1.6353585983979013, + "grad_norm": 0.5946371040137681, + "learning_rate": 4.23336642781286e-07, + "loss": 0.2655, + "step": 34910 + }, + { + "epoch": 1.6354054433878296, + "grad_norm": 0.6671236223148211, + "learning_rate": 4.232310568589923e-07, + "loss": 0.2878, + "step": 34911 + }, + { + "epoch": 1.635452288377758, + "grad_norm": 0.5923309043636328, + "learning_rate": 4.231254828879819e-07, + "loss": 0.267, + "step": 34912 + }, + { + "epoch": 1.6354991333676865, + "grad_norm": 0.6021371495196086, + "learning_rate": 4.230199208688623e-07, + "loss": 0.2851, + "step": 34913 + }, + { + "epoch": 1.6355459783576145, + "grad_norm": 0.6117923316275518, + "learning_rate": 4.2291437080224125e-07, + "loss": 0.267, + "step": 34914 + }, + { + "epoch": 1.635592823347543, + "grad_norm": 0.5825097806613933, + "learning_rate": 4.228088326887264e-07, + "loss": 0.2668, + "step": 34915 + }, + { + "epoch": 1.6356396683374714, + "grad_norm": 0.5482511980360423, + "learning_rate": 4.227033065289249e-07, + "loss": 0.2555, + "step": 34916 + }, + { + "epoch": 1.6356865133273997, + "grad_norm": 0.5954923221869968, + "learning_rate": 4.2259779232344284e-07, + "loss": 0.2744, + "step": 34917 + }, + { + "epoch": 1.635733358317328, + "grad_norm": 0.6071874447324979, + "learning_rate": 4.2249229007288837e-07, + "loss": 0.2848, + "step": 34918 + }, + { + "epoch": 1.6357802033072564, + "grad_norm": 0.5876715122770135, + "learning_rate": 4.2238679977786857e-07, + "loss": 0.2596, + "step": 34919 + }, + { + "epoch": 1.6358270482971846, + "grad_norm": 0.6181037639633101, + "learning_rate": 4.222813214389906e-07, + "loss": 0.2682, + "step": 34920 + }, + { + "epoch": 1.6358738932871129, + "grad_norm": 0.6024442135387718, + "learning_rate": 4.221758550568622e-07, + "loss": 0.2767, + "step": 34921 + }, + { + "epoch": 1.6359207382770413, + "grad_norm": 0.6319023885223256, + "learning_rate": 4.2207040063208885e-07, + "loss": 0.2778, + "step": 34922 + }, + { + "epoch": 1.6359675832669696, + "grad_norm": 0.545009362809217, + "learning_rate": 4.219649581652785e-07, + "loss": 0.2526, + "step": 34923 + }, + { + "epoch": 1.6360144282568978, + "grad_norm": 0.6141852505762085, + "learning_rate": 4.2185952765703697e-07, + "loss": 0.2853, + "step": 34924 + }, + { + "epoch": 1.6360612732468263, + "grad_norm": 0.5909935885832954, + "learning_rate": 4.217541091079716e-07, + "loss": 0.2689, + "step": 34925 + }, + { + "epoch": 1.6361081182367547, + "grad_norm": 0.6210815200949358, + "learning_rate": 4.216487025186891e-07, + "loss": 0.2799, + "step": 34926 + }, + { + "epoch": 1.6361549632266827, + "grad_norm": 0.5712664167374626, + "learning_rate": 4.215433078897957e-07, + "loss": 0.259, + "step": 34927 + }, + { + "epoch": 1.6362018082166112, + "grad_norm": 0.6039456396172631, + "learning_rate": 4.214379252218989e-07, + "loss": 0.2836, + "step": 34928 + }, + { + "epoch": 1.6362486532065397, + "grad_norm": 0.6241823781751266, + "learning_rate": 4.213325545156044e-07, + "loss": 0.2745, + "step": 34929 + }, + { + "epoch": 1.636295498196468, + "grad_norm": 0.620656345317466, + "learning_rate": 4.2122719577151806e-07, + "loss": 0.2644, + "step": 34930 + }, + { + "epoch": 1.6363423431863962, + "grad_norm": 0.623910342738556, + "learning_rate": 4.211218489902466e-07, + "loss": 0.2749, + "step": 34931 + }, + { + "epoch": 1.6363891881763246, + "grad_norm": 0.5643137284782037, + "learning_rate": 4.210165141723965e-07, + "loss": 0.2687, + "step": 34932 + }, + { + "epoch": 1.6364360331662529, + "grad_norm": 0.5874667206350757, + "learning_rate": 4.209111913185737e-07, + "loss": 0.2696, + "step": 34933 + }, + { + "epoch": 1.636482878156181, + "grad_norm": 0.574410046648888, + "learning_rate": 4.208058804293852e-07, + "loss": 0.2616, + "step": 34934 + }, + { + "epoch": 1.6365297231461096, + "grad_norm": 0.587346395334978, + "learning_rate": 4.207005815054357e-07, + "loss": 0.2559, + "step": 34935 + }, + { + "epoch": 1.6365765681360378, + "grad_norm": 0.5881570484698404, + "learning_rate": 4.205952945473321e-07, + "loss": 0.2772, + "step": 34936 + }, + { + "epoch": 1.636623413125966, + "grad_norm": 0.6542290402089334, + "learning_rate": 4.2049001955567965e-07, + "loss": 0.2652, + "step": 34937 + }, + { + "epoch": 1.6366702581158945, + "grad_norm": 0.6274711156100017, + "learning_rate": 4.2038475653108426e-07, + "loss": 0.2843, + "step": 34938 + }, + { + "epoch": 1.636717103105823, + "grad_norm": 0.6361822527959252, + "learning_rate": 4.202795054741518e-07, + "loss": 0.2851, + "step": 34939 + }, + { + "epoch": 1.6367639480957512, + "grad_norm": 0.5937808400089929, + "learning_rate": 4.201742663854888e-07, + "loss": 0.2754, + "step": 34940 + }, + { + "epoch": 1.6368107930856794, + "grad_norm": 0.5910559794675395, + "learning_rate": 4.2006903926569945e-07, + "loss": 0.2736, + "step": 34941 + }, + { + "epoch": 1.636857638075608, + "grad_norm": 0.6228019172039831, + "learning_rate": 4.1996382411539025e-07, + "loss": 0.264, + "step": 34942 + }, + { + "epoch": 1.6369044830655362, + "grad_norm": 0.6208949176502565, + "learning_rate": 4.198586209351668e-07, + "loss": 0.2774, + "step": 34943 + }, + { + "epoch": 1.6369513280554644, + "grad_norm": 0.5917906375071708, + "learning_rate": 4.1975342972563394e-07, + "loss": 0.281, + "step": 34944 + }, + { + "epoch": 1.6369981730453929, + "grad_norm": 0.6480103439689497, + "learning_rate": 4.196482504873969e-07, + "loss": 0.2943, + "step": 34945 + }, + { + "epoch": 1.637045018035321, + "grad_norm": 0.6137452653319041, + "learning_rate": 4.19543083221062e-07, + "loss": 0.2637, + "step": 34946 + }, + { + "epoch": 1.6370918630252493, + "grad_norm": 0.5941083801565419, + "learning_rate": 4.1943792792723315e-07, + "loss": 0.2764, + "step": 34947 + }, + { + "epoch": 1.6371387080151778, + "grad_norm": 0.6337468721927813, + "learning_rate": 4.1933278460651595e-07, + "loss": 0.2832, + "step": 34948 + }, + { + "epoch": 1.6371855530051063, + "grad_norm": 0.5824193129334833, + "learning_rate": 4.1922765325951574e-07, + "loss": 0.2573, + "step": 34949 + }, + { + "epoch": 1.6372323979950343, + "grad_norm": 0.5904069373738093, + "learning_rate": 4.191225338868379e-07, + "loss": 0.2834, + "step": 34950 + }, + { + "epoch": 1.6372792429849627, + "grad_norm": 0.6135408223901793, + "learning_rate": 4.1901742648908644e-07, + "loss": 0.2773, + "step": 34951 + }, + { + "epoch": 1.6373260879748912, + "grad_norm": 0.5619841653229527, + "learning_rate": 4.189123310668672e-07, + "loss": 0.2697, + "step": 34952 + }, + { + "epoch": 1.6373729329648194, + "grad_norm": 0.5642597341494615, + "learning_rate": 4.188072476207838e-07, + "loss": 0.2606, + "step": 34953 + }, + { + "epoch": 1.6374197779547477, + "grad_norm": 0.5963689340280256, + "learning_rate": 4.187021761514418e-07, + "loss": 0.2756, + "step": 34954 + }, + { + "epoch": 1.6374666229446762, + "grad_norm": 0.6151594485448725, + "learning_rate": 4.1859711665944545e-07, + "loss": 0.2613, + "step": 34955 + }, + { + "epoch": 1.6375134679346044, + "grad_norm": 0.545415055660807, + "learning_rate": 4.184920691453995e-07, + "loss": 0.252, + "step": 34956 + }, + { + "epoch": 1.6375603129245326, + "grad_norm": 0.5941311158607352, + "learning_rate": 4.183870336099091e-07, + "loss": 0.2785, + "step": 34957 + }, + { + "epoch": 1.637607157914461, + "grad_norm": 0.620943481779303, + "learning_rate": 4.182820100535784e-07, + "loss": 0.2925, + "step": 34958 + }, + { + "epoch": 1.6376540029043893, + "grad_norm": 0.5824795932928236, + "learning_rate": 4.1817699847701065e-07, + "loss": 0.2704, + "step": 34959 + }, + { + "epoch": 1.6377008478943176, + "grad_norm": 0.6101861397318215, + "learning_rate": 4.180719988808113e-07, + "loss": 0.2791, + "step": 34960 + }, + { + "epoch": 1.637747692884246, + "grad_norm": 0.6406029119100525, + "learning_rate": 4.17967011265584e-07, + "loss": 0.2847, + "step": 34961 + }, + { + "epoch": 1.6377945378741745, + "grad_norm": 0.5964711490284094, + "learning_rate": 4.178620356319335e-07, + "loss": 0.2603, + "step": 34962 + }, + { + "epoch": 1.6378413828641025, + "grad_norm": 0.5841047559459989, + "learning_rate": 4.177570719804636e-07, + "loss": 0.2677, + "step": 34963 + }, + { + "epoch": 1.637888227854031, + "grad_norm": 0.5746477970275099, + "learning_rate": 4.1765212031177896e-07, + "loss": 0.2625, + "step": 34964 + }, + { + "epoch": 1.6379350728439594, + "grad_norm": 0.5601085177884487, + "learning_rate": 4.17547180626483e-07, + "loss": 0.2439, + "step": 34965 + }, + { + "epoch": 1.6379819178338877, + "grad_norm": 0.6008187625285005, + "learning_rate": 4.1744225292517896e-07, + "loss": 0.2668, + "step": 34966 + }, + { + "epoch": 1.638028762823816, + "grad_norm": 0.573690714102055, + "learning_rate": 4.173373372084713e-07, + "loss": 0.2569, + "step": 34967 + }, + { + "epoch": 1.6380756078137444, + "grad_norm": 0.6179565979822854, + "learning_rate": 4.1723243347696396e-07, + "loss": 0.2676, + "step": 34968 + }, + { + "epoch": 1.6381224528036726, + "grad_norm": 0.5764092597263254, + "learning_rate": 4.1712754173126033e-07, + "loss": 0.2677, + "step": 34969 + }, + { + "epoch": 1.6381692977936009, + "grad_norm": 0.6109484960838818, + "learning_rate": 4.17022661971965e-07, + "loss": 0.2723, + "step": 34970 + }, + { + "epoch": 1.6382161427835293, + "grad_norm": 0.6285519755634468, + "learning_rate": 4.169177941996799e-07, + "loss": 0.2885, + "step": 34971 + }, + { + "epoch": 1.6382629877734576, + "grad_norm": 0.6385472880834144, + "learning_rate": 4.1681293841500994e-07, + "loss": 0.2816, + "step": 34972 + }, + { + "epoch": 1.6383098327633858, + "grad_norm": 0.6092568551625427, + "learning_rate": 4.1670809461855724e-07, + "loss": 0.2762, + "step": 34973 + }, + { + "epoch": 1.6383566777533143, + "grad_norm": 0.6043736784655525, + "learning_rate": 4.1660326281092625e-07, + "loss": 0.2754, + "step": 34974 + }, + { + "epoch": 1.6384035227432427, + "grad_norm": 0.5563655160558493, + "learning_rate": 4.1649844299271935e-07, + "loss": 0.2645, + "step": 34975 + }, + { + "epoch": 1.638450367733171, + "grad_norm": 0.6011674547011646, + "learning_rate": 4.1639363516454117e-07, + "loss": 0.2727, + "step": 34976 + }, + { + "epoch": 1.6384972127230992, + "grad_norm": 0.578736336853253, + "learning_rate": 4.1628883932699315e-07, + "loss": 0.2657, + "step": 34977 + }, + { + "epoch": 1.6385440577130277, + "grad_norm": 0.5882685214755647, + "learning_rate": 4.161840554806798e-07, + "loss": 0.2787, + "step": 34978 + }, + { + "epoch": 1.638590902702956, + "grad_norm": 0.6101805841365687, + "learning_rate": 4.160792836262029e-07, + "loss": 0.2797, + "step": 34979 + }, + { + "epoch": 1.6386377476928842, + "grad_norm": 0.5765399464034817, + "learning_rate": 4.15974523764166e-07, + "loss": 0.2468, + "step": 34980 + }, + { + "epoch": 1.6386845926828126, + "grad_norm": 0.5772398524638731, + "learning_rate": 4.1586977589517185e-07, + "loss": 0.2623, + "step": 34981 + }, + { + "epoch": 1.6387314376727409, + "grad_norm": 0.6326355290693786, + "learning_rate": 4.157650400198238e-07, + "loss": 0.3073, + "step": 34982 + }, + { + "epoch": 1.6387782826626691, + "grad_norm": 0.6307750706904862, + "learning_rate": 4.1566031613872376e-07, + "loss": 0.2838, + "step": 34983 + }, + { + "epoch": 1.6388251276525976, + "grad_norm": 0.5832245787137879, + "learning_rate": 4.155556042524744e-07, + "loss": 0.2591, + "step": 34984 + }, + { + "epoch": 1.638871972642526, + "grad_norm": 0.5711723624183004, + "learning_rate": 4.1545090436167944e-07, + "loss": 0.2517, + "step": 34985 + }, + { + "epoch": 1.638918817632454, + "grad_norm": 0.6468905864559021, + "learning_rate": 4.1534621646694e-07, + "loss": 0.2842, + "step": 34986 + }, + { + "epoch": 1.6389656626223825, + "grad_norm": 0.6231880539928931, + "learning_rate": 4.1524154056885894e-07, + "loss": 0.2578, + "step": 34987 + }, + { + "epoch": 1.639012507612311, + "grad_norm": 0.6142624258804046, + "learning_rate": 4.1513687666803916e-07, + "loss": 0.2849, + "step": 34988 + }, + { + "epoch": 1.6390593526022392, + "grad_norm": 0.6088652953971968, + "learning_rate": 4.150322247650829e-07, + "loss": 0.2749, + "step": 34989 + }, + { + "epoch": 1.6391061975921675, + "grad_norm": 0.617714965086987, + "learning_rate": 4.149275848605916e-07, + "loss": 0.2595, + "step": 34990 + }, + { + "epoch": 1.639153042582096, + "grad_norm": 0.610453407392588, + "learning_rate": 4.1482295695516817e-07, + "loss": 0.2658, + "step": 34991 + }, + { + "epoch": 1.6391998875720242, + "grad_norm": 0.6244739420749066, + "learning_rate": 4.147183410494152e-07, + "loss": 0.2719, + "step": 34992 + }, + { + "epoch": 1.6392467325619524, + "grad_norm": 0.6313400995732082, + "learning_rate": 4.146137371439332e-07, + "loss": 0.2831, + "step": 34993 + }, + { + "epoch": 1.6392935775518809, + "grad_norm": 0.5785873286612334, + "learning_rate": 4.145091452393252e-07, + "loss": 0.2488, + "step": 34994 + }, + { + "epoch": 1.6393404225418091, + "grad_norm": 0.6146032739966439, + "learning_rate": 4.1440456533619335e-07, + "loss": 0.2897, + "step": 34995 + }, + { + "epoch": 1.6393872675317374, + "grad_norm": 0.5809395359671501, + "learning_rate": 4.1429999743513833e-07, + "loss": 0.2715, + "step": 34996 + }, + { + "epoch": 1.6394341125216658, + "grad_norm": 0.5871157072512958, + "learning_rate": 4.1419544153676274e-07, + "loss": 0.2703, + "step": 34997 + }, + { + "epoch": 1.6394809575115943, + "grad_norm": 0.5934352641821639, + "learning_rate": 4.1409089764166827e-07, + "loss": 0.2723, + "step": 34998 + }, + { + "epoch": 1.6395278025015223, + "grad_norm": 0.5993943280285128, + "learning_rate": 4.1398636575045696e-07, + "loss": 0.2687, + "step": 34999 + }, + { + "epoch": 1.6395746474914508, + "grad_norm": 0.6023533571162009, + "learning_rate": 4.138818458637292e-07, + "loss": 0.2785, + "step": 35000 + }, + { + "epoch": 1.6396214924813792, + "grad_norm": 0.6014503746187111, + "learning_rate": 4.137773379820875e-07, + "loss": 0.2741, + "step": 35001 + }, + { + "epoch": 1.6396683374713075, + "grad_norm": 0.6119926176106641, + "learning_rate": 4.1367284210613255e-07, + "loss": 0.2785, + "step": 35002 + }, + { + "epoch": 1.6397151824612357, + "grad_norm": 0.594054503498825, + "learning_rate": 4.135683582364658e-07, + "loss": 0.2738, + "step": 35003 + }, + { + "epoch": 1.6397620274511642, + "grad_norm": 0.5735872901263623, + "learning_rate": 4.1346388637368866e-07, + "loss": 0.2607, + "step": 35004 + }, + { + "epoch": 1.6398088724410924, + "grad_norm": 0.6126347052352799, + "learning_rate": 4.1335942651840267e-07, + "loss": 0.2885, + "step": 35005 + }, + { + "epoch": 1.6398557174310207, + "grad_norm": 0.6148515337562656, + "learning_rate": 4.132549786712092e-07, + "loss": 0.2728, + "step": 35006 + }, + { + "epoch": 1.6399025624209491, + "grad_norm": 0.5843469623399534, + "learning_rate": 4.13150542832709e-07, + "loss": 0.2623, + "step": 35007 + }, + { + "epoch": 1.6399494074108774, + "grad_norm": 0.5872433177981327, + "learning_rate": 4.1304611900350207e-07, + "loss": 0.2742, + "step": 35008 + }, + { + "epoch": 1.6399962524008056, + "grad_norm": 0.666372382278738, + "learning_rate": 4.1294170718419043e-07, + "loss": 0.2944, + "step": 35009 + }, + { + "epoch": 1.640043097390734, + "grad_norm": 0.5613612624731467, + "learning_rate": 4.128373073753744e-07, + "loss": 0.2583, + "step": 35010 + }, + { + "epoch": 1.6400899423806625, + "grad_norm": 0.6611549129838924, + "learning_rate": 4.1273291957765526e-07, + "loss": 0.2846, + "step": 35011 + }, + { + "epoch": 1.6401367873705908, + "grad_norm": 0.5951933689871162, + "learning_rate": 4.126285437916344e-07, + "loss": 0.2632, + "step": 35012 + }, + { + "epoch": 1.640183632360519, + "grad_norm": 0.642343042899691, + "learning_rate": 4.125241800179106e-07, + "loss": 0.2874, + "step": 35013 + }, + { + "epoch": 1.6402304773504475, + "grad_norm": 0.5785501648180892, + "learning_rate": 4.1241982825708625e-07, + "loss": 0.2582, + "step": 35014 + }, + { + "epoch": 1.6402773223403757, + "grad_norm": 0.6445931731479434, + "learning_rate": 4.123154885097605e-07, + "loss": 0.2878, + "step": 35015 + }, + { + "epoch": 1.640324167330304, + "grad_norm": 0.5847544964996687, + "learning_rate": 4.1221116077653443e-07, + "loss": 0.2777, + "step": 35016 + }, + { + "epoch": 1.6403710123202324, + "grad_norm": 0.6072514145751593, + "learning_rate": 4.1210684505800846e-07, + "loss": 0.2663, + "step": 35017 + }, + { + "epoch": 1.6404178573101607, + "grad_norm": 0.5924176177324438, + "learning_rate": 4.1200254135478265e-07, + "loss": 0.2756, + "step": 35018 + }, + { + "epoch": 1.640464702300089, + "grad_norm": 0.6097257552949417, + "learning_rate": 4.1189824966745813e-07, + "loss": 0.2677, + "step": 35019 + }, + { + "epoch": 1.6405115472900174, + "grad_norm": 0.6040011893424568, + "learning_rate": 4.1179396999663443e-07, + "loss": 0.2685, + "step": 35020 + }, + { + "epoch": 1.6405583922799458, + "grad_norm": 0.59690306079237, + "learning_rate": 4.116897023429109e-07, + "loss": 0.2703, + "step": 35021 + }, + { + "epoch": 1.6406052372698738, + "grad_norm": 0.6296655174489743, + "learning_rate": 4.1158544670688806e-07, + "loss": 0.2865, + "step": 35022 + }, + { + "epoch": 1.6406520822598023, + "grad_norm": 0.5653623743965266, + "learning_rate": 4.1148120308916626e-07, + "loss": 0.2694, + "step": 35023 + }, + { + "epoch": 1.6406989272497308, + "grad_norm": 0.6128243983768741, + "learning_rate": 4.113769714903451e-07, + "loss": 0.2714, + "step": 35024 + }, + { + "epoch": 1.640745772239659, + "grad_norm": 0.5808645555127884, + "learning_rate": 4.1127275191102515e-07, + "loss": 0.2731, + "step": 35025 + }, + { + "epoch": 1.6407926172295872, + "grad_norm": 0.5533282853529093, + "learning_rate": 4.1116854435180514e-07, + "loss": 0.2588, + "step": 35026 + }, + { + "epoch": 1.6408394622195157, + "grad_norm": 0.6283267997934641, + "learning_rate": 4.1106434881328565e-07, + "loss": 0.2893, + "step": 35027 + }, + { + "epoch": 1.640886307209444, + "grad_norm": 0.6090234400516774, + "learning_rate": 4.1096016529606516e-07, + "loss": 0.291, + "step": 35028 + }, + { + "epoch": 1.6409331521993722, + "grad_norm": 0.6275491771858581, + "learning_rate": 4.1085599380074364e-07, + "loss": 0.2808, + "step": 35029 + }, + { + "epoch": 1.6409799971893007, + "grad_norm": 0.5908801892328683, + "learning_rate": 4.10751834327921e-07, + "loss": 0.2677, + "step": 35030 + }, + { + "epoch": 1.641026842179229, + "grad_norm": 0.6239264758766261, + "learning_rate": 4.1064768687819694e-07, + "loss": 0.2716, + "step": 35031 + }, + { + "epoch": 1.6410736871691571, + "grad_norm": 0.60605046913421, + "learning_rate": 4.1054355145216994e-07, + "loss": 0.2785, + "step": 35032 + }, + { + "epoch": 1.6411205321590856, + "grad_norm": 0.5447733482338712, + "learning_rate": 4.104394280504395e-07, + "loss": 0.2651, + "step": 35033 + }, + { + "epoch": 1.641167377149014, + "grad_norm": 0.6103751724347641, + "learning_rate": 4.1033531667360545e-07, + "loss": 0.2744, + "step": 35034 + }, + { + "epoch": 1.641214222138942, + "grad_norm": 0.5924285315924385, + "learning_rate": 4.102312173222658e-07, + "loss": 0.2642, + "step": 35035 + }, + { + "epoch": 1.6412610671288705, + "grad_norm": 0.5943257597094866, + "learning_rate": 4.1012712999702056e-07, + "loss": 0.2658, + "step": 35036 + }, + { + "epoch": 1.641307912118799, + "grad_norm": 0.5516660338771388, + "learning_rate": 4.1002305469846874e-07, + "loss": 0.2637, + "step": 35037 + }, + { + "epoch": 1.6413547571087272, + "grad_norm": 0.6036872064639541, + "learning_rate": 4.0991899142720837e-07, + "loss": 0.2633, + "step": 35038 + }, + { + "epoch": 1.6414016020986555, + "grad_norm": 0.625496977041188, + "learning_rate": 4.0981494018383915e-07, + "loss": 0.2871, + "step": 35039 + }, + { + "epoch": 1.641448447088584, + "grad_norm": 0.6243396721425538, + "learning_rate": 4.097109009689593e-07, + "loss": 0.2934, + "step": 35040 + }, + { + "epoch": 1.6414952920785122, + "grad_norm": 0.6101939367085799, + "learning_rate": 4.096068737831685e-07, + "loss": 0.2728, + "step": 35041 + }, + { + "epoch": 1.6415421370684404, + "grad_norm": 0.5702248047692529, + "learning_rate": 4.095028586270641e-07, + "loss": 0.2613, + "step": 35042 + }, + { + "epoch": 1.641588982058369, + "grad_norm": 0.6153548432674409, + "learning_rate": 4.0939885550124623e-07, + "loss": 0.2659, + "step": 35043 + }, + { + "epoch": 1.6416358270482971, + "grad_norm": 0.5881135705228461, + "learning_rate": 4.092948644063116e-07, + "loss": 0.2761, + "step": 35044 + }, + { + "epoch": 1.6416826720382254, + "grad_norm": 0.5942063271585477, + "learning_rate": 4.0919088534285946e-07, + "loss": 0.2725, + "step": 35045 + }, + { + "epoch": 1.6417295170281538, + "grad_norm": 0.5765054480997333, + "learning_rate": 4.0908691831148843e-07, + "loss": 0.2654, + "step": 35046 + }, + { + "epoch": 1.6417763620180823, + "grad_norm": 0.598352959560374, + "learning_rate": 4.089829633127967e-07, + "loss": 0.2658, + "step": 35047 + }, + { + "epoch": 1.6418232070080105, + "grad_norm": 0.546577095484401, + "learning_rate": 4.08879020347383e-07, + "loss": 0.257, + "step": 35048 + }, + { + "epoch": 1.6418700519979388, + "grad_norm": 0.606631832003308, + "learning_rate": 4.0877508941584517e-07, + "loss": 0.2724, + "step": 35049 + }, + { + "epoch": 1.6419168969878672, + "grad_norm": 0.6049031659244893, + "learning_rate": 4.0867117051878013e-07, + "loss": 0.2708, + "step": 35050 + }, + { + "epoch": 1.6419637419777955, + "grad_norm": 0.6114199459134122, + "learning_rate": 4.0856726365678695e-07, + "loss": 0.2771, + "step": 35051 + }, + { + "epoch": 1.6420105869677237, + "grad_norm": 0.7611642288990579, + "learning_rate": 4.0846336883046375e-07, + "loss": 0.2837, + "step": 35052 + }, + { + "epoch": 1.6420574319576522, + "grad_norm": 0.5875431573773138, + "learning_rate": 4.083594860404078e-07, + "loss": 0.2693, + "step": 35053 + }, + { + "epoch": 1.6421042769475804, + "grad_norm": 0.6291149994925095, + "learning_rate": 4.0825561528721744e-07, + "loss": 0.2781, + "step": 35054 + }, + { + "epoch": 1.6421511219375087, + "grad_norm": 0.679629468830314, + "learning_rate": 4.08151756571491e-07, + "loss": 0.2909, + "step": 35055 + }, + { + "epoch": 1.6421979669274371, + "grad_norm": 0.6220331873340533, + "learning_rate": 4.080479098938253e-07, + "loss": 0.2781, + "step": 35056 + }, + { + "epoch": 1.6422448119173656, + "grad_norm": 0.6238207134384823, + "learning_rate": 4.079440752548178e-07, + "loss": 0.2724, + "step": 35057 + }, + { + "epoch": 1.6422916569072936, + "grad_norm": 0.5938512531850246, + "learning_rate": 4.0784025265506595e-07, + "loss": 0.2841, + "step": 35058 + }, + { + "epoch": 1.642338501897222, + "grad_norm": 0.5830362642712293, + "learning_rate": 4.077364420951677e-07, + "loss": 0.2575, + "step": 35059 + }, + { + "epoch": 1.6423853468871505, + "grad_norm": 0.6328110132497046, + "learning_rate": 4.076326435757205e-07, + "loss": 0.2752, + "step": 35060 + }, + { + "epoch": 1.6424321918770788, + "grad_norm": 0.6342558387021009, + "learning_rate": 4.0752885709732184e-07, + "loss": 0.281, + "step": 35061 + }, + { + "epoch": 1.642479036867007, + "grad_norm": 0.591929956818403, + "learning_rate": 4.0742508266056827e-07, + "loss": 0.2779, + "step": 35062 + }, + { + "epoch": 1.6425258818569355, + "grad_norm": 0.5985659983725256, + "learning_rate": 4.0732132026605807e-07, + "loss": 0.28, + "step": 35063 + }, + { + "epoch": 1.6425727268468637, + "grad_norm": 0.6396178478945737, + "learning_rate": 4.072175699143871e-07, + "loss": 0.2895, + "step": 35064 + }, + { + "epoch": 1.642619571836792, + "grad_norm": 0.6082684219770338, + "learning_rate": 4.071138316061529e-07, + "loss": 0.2588, + "step": 35065 + }, + { + "epoch": 1.6426664168267204, + "grad_norm": 0.5818669746887959, + "learning_rate": 4.0701010534195255e-07, + "loss": 0.2615, + "step": 35066 + }, + { + "epoch": 1.6427132618166487, + "grad_norm": 0.5451137523483431, + "learning_rate": 4.0690639112238305e-07, + "loss": 0.2501, + "step": 35067 + }, + { + "epoch": 1.642760106806577, + "grad_norm": 0.6001670587892413, + "learning_rate": 4.068026889480417e-07, + "loss": 0.263, + "step": 35068 + }, + { + "epoch": 1.6428069517965054, + "grad_norm": 0.6390776235400903, + "learning_rate": 4.0669899881952494e-07, + "loss": 0.259, + "step": 35069 + }, + { + "epoch": 1.6428537967864338, + "grad_norm": 0.6155264250867877, + "learning_rate": 4.0659532073742875e-07, + "loss": 0.2998, + "step": 35070 + }, + { + "epoch": 1.6429006417763619, + "grad_norm": 0.5995937460483557, + "learning_rate": 4.064916547023501e-07, + "loss": 0.2736, + "step": 35071 + }, + { + "epoch": 1.6429474867662903, + "grad_norm": 0.5975999908158158, + "learning_rate": 4.06388000714886e-07, + "loss": 0.2721, + "step": 35072 + }, + { + "epoch": 1.6429943317562188, + "grad_norm": 0.6156751897277417, + "learning_rate": 4.0628435877563266e-07, + "loss": 0.2745, + "step": 35073 + }, + { + "epoch": 1.643041176746147, + "grad_norm": 0.6091769754049791, + "learning_rate": 4.061807288851871e-07, + "loss": 0.2729, + "step": 35074 + }, + { + "epoch": 1.6430880217360753, + "grad_norm": 0.582469702315252, + "learning_rate": 4.060771110441447e-07, + "loss": 0.2713, + "step": 35075 + }, + { + "epoch": 1.6431348667260037, + "grad_norm": 0.575072484741886, + "learning_rate": 4.0597350525310275e-07, + "loss": 0.2746, + "step": 35076 + }, + { + "epoch": 1.643181711715932, + "grad_norm": 0.6133883874562309, + "learning_rate": 4.058699115126566e-07, + "loss": 0.27, + "step": 35077 + }, + { + "epoch": 1.6432285567058602, + "grad_norm": 0.6047030007497001, + "learning_rate": 4.057663298234024e-07, + "loss": 0.2655, + "step": 35078 + }, + { + "epoch": 1.6432754016957887, + "grad_norm": 0.5679211942680708, + "learning_rate": 4.0566276018593697e-07, + "loss": 0.2571, + "step": 35079 + }, + { + "epoch": 1.643322246685717, + "grad_norm": 0.6030373051363831, + "learning_rate": 4.0555920260085617e-07, + "loss": 0.2636, + "step": 35080 + }, + { + "epoch": 1.6433690916756452, + "grad_norm": 0.5987944790696481, + "learning_rate": 4.054556570687554e-07, + "loss": 0.2586, + "step": 35081 + }, + { + "epoch": 1.6434159366655736, + "grad_norm": 0.6224902392186823, + "learning_rate": 4.0535212359023107e-07, + "loss": 0.2812, + "step": 35082 + }, + { + "epoch": 1.643462781655502, + "grad_norm": 0.5547150794500674, + "learning_rate": 4.0524860216587914e-07, + "loss": 0.2504, + "step": 35083 + }, + { + "epoch": 1.6435096266454303, + "grad_norm": 0.5738649056061728, + "learning_rate": 4.051450927962944e-07, + "loss": 0.2673, + "step": 35084 + }, + { + "epoch": 1.6435564716353586, + "grad_norm": 0.5373903916132827, + "learning_rate": 4.05041595482073e-07, + "loss": 0.2671, + "step": 35085 + }, + { + "epoch": 1.643603316625287, + "grad_norm": 0.6504404824099813, + "learning_rate": 4.0493811022381146e-07, + "loss": 0.2849, + "step": 35086 + }, + { + "epoch": 1.6436501616152153, + "grad_norm": 0.6040894789973696, + "learning_rate": 4.04834637022104e-07, + "loss": 0.2634, + "step": 35087 + }, + { + "epoch": 1.6436970066051435, + "grad_norm": 0.6049374596393854, + "learning_rate": 4.0473117587754654e-07, + "loss": 0.2612, + "step": 35088 + }, + { + "epoch": 1.643743851595072, + "grad_norm": 0.5556846866319561, + "learning_rate": 4.046277267907345e-07, + "loss": 0.252, + "step": 35089 + }, + { + "epoch": 1.6437906965850002, + "grad_norm": 0.6075974895112627, + "learning_rate": 4.045242897622639e-07, + "loss": 0.2723, + "step": 35090 + }, + { + "epoch": 1.6438375415749285, + "grad_norm": 0.6092389658873023, + "learning_rate": 4.0442086479272863e-07, + "loss": 0.2652, + "step": 35091 + }, + { + "epoch": 1.643884386564857, + "grad_norm": 0.6097615597906937, + "learning_rate": 4.043174518827253e-07, + "loss": 0.274, + "step": 35092 + }, + { + "epoch": 1.6439312315547854, + "grad_norm": 0.5955883596072256, + "learning_rate": 4.0421405103284764e-07, + "loss": 0.2762, + "step": 35093 + }, + { + "epoch": 1.6439780765447134, + "grad_norm": 0.5598009742741733, + "learning_rate": 4.0411066224369133e-07, + "loss": 0.2618, + "step": 35094 + }, + { + "epoch": 1.6440249215346419, + "grad_norm": 0.5853985304611654, + "learning_rate": 4.040072855158514e-07, + "loss": 0.2724, + "step": 35095 + }, + { + "epoch": 1.6440717665245703, + "grad_norm": 0.6226628662627874, + "learning_rate": 4.0390392084992264e-07, + "loss": 0.2761, + "step": 35096 + }, + { + "epoch": 1.6441186115144986, + "grad_norm": 0.5692451383333946, + "learning_rate": 4.038005682465007e-07, + "loss": 0.2484, + "step": 35097 + }, + { + "epoch": 1.6441654565044268, + "grad_norm": 0.6460762594005465, + "learning_rate": 4.0369722770617957e-07, + "loss": 0.2722, + "step": 35098 + }, + { + "epoch": 1.6442123014943553, + "grad_norm": 0.5935898563722231, + "learning_rate": 4.035938992295535e-07, + "loss": 0.2727, + "step": 35099 + }, + { + "epoch": 1.6442591464842835, + "grad_norm": 0.6193500202973656, + "learning_rate": 4.0349058281721756e-07, + "loss": 0.2793, + "step": 35100 + }, + { + "epoch": 1.6443059914742117, + "grad_norm": 0.5958129649160098, + "learning_rate": 4.0338727846976647e-07, + "loss": 0.276, + "step": 35101 + }, + { + "epoch": 1.6443528364641402, + "grad_norm": 0.6400601995220135, + "learning_rate": 4.0328398618779435e-07, + "loss": 0.294, + "step": 35102 + }, + { + "epoch": 1.6443996814540685, + "grad_norm": 0.5751532330505503, + "learning_rate": 4.031807059718962e-07, + "loss": 0.2528, + "step": 35103 + }, + { + "epoch": 1.6444465264439967, + "grad_norm": 0.5734243861768666, + "learning_rate": 4.030774378226665e-07, + "loss": 0.2464, + "step": 35104 + }, + { + "epoch": 1.6444933714339252, + "grad_norm": 0.5971560122739047, + "learning_rate": 4.029741817406993e-07, + "loss": 0.273, + "step": 35105 + }, + { + "epoch": 1.6445402164238536, + "grad_norm": 0.564140919241025, + "learning_rate": 4.028709377265877e-07, + "loss": 0.2592, + "step": 35106 + }, + { + "epoch": 1.6445870614137816, + "grad_norm": 0.6117447031113272, + "learning_rate": 4.0276770578092704e-07, + "loss": 0.2597, + "step": 35107 + }, + { + "epoch": 1.64463390640371, + "grad_norm": 0.5864937961523783, + "learning_rate": 4.0266448590431106e-07, + "loss": 0.2703, + "step": 35108 + }, + { + "epoch": 1.6446807513936386, + "grad_norm": 0.6016285237313238, + "learning_rate": 4.0256127809733367e-07, + "loss": 0.2887, + "step": 35109 + }, + { + "epoch": 1.6447275963835668, + "grad_norm": 0.6096205647440002, + "learning_rate": 4.0245808236058976e-07, + "loss": 0.2764, + "step": 35110 + }, + { + "epoch": 1.644774441373495, + "grad_norm": 0.5985504210434108, + "learning_rate": 4.023548986946718e-07, + "loss": 0.2769, + "step": 35111 + }, + { + "epoch": 1.6448212863634235, + "grad_norm": 0.6258075429913887, + "learning_rate": 4.022517271001747e-07, + "loss": 0.2806, + "step": 35112 + }, + { + "epoch": 1.6448681313533517, + "grad_norm": 0.5896593254991096, + "learning_rate": 4.0214856757769095e-07, + "loss": 0.2749, + "step": 35113 + }, + { + "epoch": 1.64491497634328, + "grad_norm": 0.611213792876891, + "learning_rate": 4.020454201278148e-07, + "loss": 0.2858, + "step": 35114 + }, + { + "epoch": 1.6449618213332085, + "grad_norm": 0.6135914887606629, + "learning_rate": 4.0194228475114033e-07, + "loss": 0.2722, + "step": 35115 + }, + { + "epoch": 1.6450086663231367, + "grad_norm": 0.5773171530555194, + "learning_rate": 4.0183916144826114e-07, + "loss": 0.2713, + "step": 35116 + }, + { + "epoch": 1.645055511313065, + "grad_norm": 0.5825441791888647, + "learning_rate": 4.0173605021976954e-07, + "loss": 0.2673, + "step": 35117 + }, + { + "epoch": 1.6451023563029934, + "grad_norm": 0.6231704579424242, + "learning_rate": 4.016329510662603e-07, + "loss": 0.2665, + "step": 35118 + }, + { + "epoch": 1.6451492012929219, + "grad_norm": 0.6045785886513049, + "learning_rate": 4.015298639883253e-07, + "loss": 0.274, + "step": 35119 + }, + { + "epoch": 1.64519604628285, + "grad_norm": 0.6111789925654052, + "learning_rate": 4.014267889865586e-07, + "loss": 0.2793, + "step": 35120 + }, + { + "epoch": 1.6452428912727783, + "grad_norm": 0.5874432855161058, + "learning_rate": 4.013237260615532e-07, + "loss": 0.2845, + "step": 35121 + }, + { + "epoch": 1.6452897362627068, + "grad_norm": 0.6434322058152163, + "learning_rate": 4.0122067521390224e-07, + "loss": 0.259, + "step": 35122 + }, + { + "epoch": 1.645336581252635, + "grad_norm": 0.5611655300479313, + "learning_rate": 4.011176364441996e-07, + "loss": 0.2632, + "step": 35123 + }, + { + "epoch": 1.6453834262425633, + "grad_norm": 0.5898337695098215, + "learning_rate": 4.0101460975303654e-07, + "loss": 0.2673, + "step": 35124 + }, + { + "epoch": 1.6454302712324917, + "grad_norm": 0.6139750235303553, + "learning_rate": 4.009115951410078e-07, + "loss": 0.2823, + "step": 35125 + }, + { + "epoch": 1.64547711622242, + "grad_norm": 0.6594525752906395, + "learning_rate": 4.008085926087044e-07, + "loss": 0.2876, + "step": 35126 + }, + { + "epoch": 1.6455239612123482, + "grad_norm": 0.6294563518456476, + "learning_rate": 4.0070560215671986e-07, + "loss": 0.289, + "step": 35127 + }, + { + "epoch": 1.6455708062022767, + "grad_norm": 0.6037108578707729, + "learning_rate": 4.0060262378564695e-07, + "loss": 0.2637, + "step": 35128 + }, + { + "epoch": 1.6456176511922052, + "grad_norm": 0.6152254599696778, + "learning_rate": 4.0049965749607897e-07, + "loss": 0.2778, + "step": 35129 + }, + { + "epoch": 1.6456644961821332, + "grad_norm": 0.5741564349915753, + "learning_rate": 4.0039670328860717e-07, + "loss": 0.2621, + "step": 35130 + }, + { + "epoch": 1.6457113411720616, + "grad_norm": 0.6259557955908142, + "learning_rate": 4.0029376116382467e-07, + "loss": 0.283, + "step": 35131 + }, + { + "epoch": 1.64575818616199, + "grad_norm": 0.5949944765906269, + "learning_rate": 4.001908311223246e-07, + "loss": 0.2787, + "step": 35132 + }, + { + "epoch": 1.6458050311519183, + "grad_norm": 0.6432492516763866, + "learning_rate": 4.000879131646976e-07, + "loss": 0.2748, + "step": 35133 + }, + { + "epoch": 1.6458518761418466, + "grad_norm": 0.5915677775267761, + "learning_rate": 3.9998500729153706e-07, + "loss": 0.274, + "step": 35134 + }, + { + "epoch": 1.645898721131775, + "grad_norm": 0.544668342651379, + "learning_rate": 3.9988211350343556e-07, + "loss": 0.2523, + "step": 35135 + }, + { + "epoch": 1.6459455661217033, + "grad_norm": 0.5834988162428977, + "learning_rate": 3.997792318009838e-07, + "loss": 0.2638, + "step": 35136 + }, + { + "epoch": 1.6459924111116315, + "grad_norm": 0.5880978790773261, + "learning_rate": 3.996763621847752e-07, + "loss": 0.2695, + "step": 35137 + }, + { + "epoch": 1.64603925610156, + "grad_norm": 0.6069112736464384, + "learning_rate": 3.995735046554008e-07, + "loss": 0.2654, + "step": 35138 + }, + { + "epoch": 1.6460861010914882, + "grad_norm": 0.6390641222578085, + "learning_rate": 3.994706592134537e-07, + "loss": 0.2866, + "step": 35139 + }, + { + "epoch": 1.6461329460814165, + "grad_norm": 0.5563194709600816, + "learning_rate": 3.993678258595243e-07, + "loss": 0.2469, + "step": 35140 + }, + { + "epoch": 1.646179791071345, + "grad_norm": 0.5751789650744146, + "learning_rate": 3.9926500459420593e-07, + "loss": 0.2608, + "step": 35141 + }, + { + "epoch": 1.6462266360612734, + "grad_norm": 0.6071380023197112, + "learning_rate": 3.991621954180888e-07, + "loss": 0.2576, + "step": 35142 + }, + { + "epoch": 1.6462734810512014, + "grad_norm": 0.612006486774168, + "learning_rate": 3.9905939833176503e-07, + "loss": 0.285, + "step": 35143 + }, + { + "epoch": 1.6463203260411299, + "grad_norm": 0.5936308693799544, + "learning_rate": 3.9895661333582634e-07, + "loss": 0.2792, + "step": 35144 + }, + { + "epoch": 1.6463671710310583, + "grad_norm": 0.5974221856351245, + "learning_rate": 3.988538404308645e-07, + "loss": 0.2661, + "step": 35145 + }, + { + "epoch": 1.6464140160209866, + "grad_norm": 0.59683991100089, + "learning_rate": 3.9875107961747123e-07, + "loss": 0.2707, + "step": 35146 + }, + { + "epoch": 1.6464608610109148, + "grad_norm": 0.5944163933715104, + "learning_rate": 3.986483308962374e-07, + "loss": 0.2634, + "step": 35147 + }, + { + "epoch": 1.6465077060008433, + "grad_norm": 0.5633453598980256, + "learning_rate": 3.985455942677535e-07, + "loss": 0.2674, + "step": 35148 + }, + { + "epoch": 1.6465545509907715, + "grad_norm": 0.5369036077606295, + "learning_rate": 3.9844286973261176e-07, + "loss": 0.2565, + "step": 35149 + }, + { + "epoch": 1.6466013959806998, + "grad_norm": 0.5955733864759188, + "learning_rate": 3.983401572914028e-07, + "loss": 0.2627, + "step": 35150 + }, + { + "epoch": 1.6466482409706282, + "grad_norm": 0.5657563976618891, + "learning_rate": 3.982374569447184e-07, + "loss": 0.2699, + "step": 35151 + }, + { + "epoch": 1.6466950859605565, + "grad_norm": 0.6213307476059753, + "learning_rate": 3.981347686931494e-07, + "loss": 0.2859, + "step": 35152 + }, + { + "epoch": 1.6467419309504847, + "grad_norm": 0.6248288042507717, + "learning_rate": 3.98032092537286e-07, + "loss": 0.2509, + "step": 35153 + }, + { + "epoch": 1.6467887759404132, + "grad_norm": 0.5736530396856566, + "learning_rate": 3.979294284777205e-07, + "loss": 0.2715, + "step": 35154 + }, + { + "epoch": 1.6468356209303416, + "grad_norm": 0.59316618733373, + "learning_rate": 3.9782677651504173e-07, + "loss": 0.2827, + "step": 35155 + }, + { + "epoch": 1.6468824659202699, + "grad_norm": 0.6334634578980246, + "learning_rate": 3.977241366498419e-07, + "loss": 0.2824, + "step": 35156 + }, + { + "epoch": 1.6469293109101981, + "grad_norm": 0.5233315878574242, + "learning_rate": 3.9762150888271125e-07, + "loss": 0.2435, + "step": 35157 + }, + { + "epoch": 1.6469761559001266, + "grad_norm": 0.5315979161382178, + "learning_rate": 3.9751889321424013e-07, + "loss": 0.2546, + "step": 35158 + }, + { + "epoch": 1.6470230008900548, + "grad_norm": 0.5813691414240959, + "learning_rate": 3.974162896450201e-07, + "loss": 0.2672, + "step": 35159 + }, + { + "epoch": 1.647069845879983, + "grad_norm": 0.6442082999243389, + "learning_rate": 3.9731369817564093e-07, + "loss": 0.2835, + "step": 35160 + }, + { + "epoch": 1.6471166908699115, + "grad_norm": 0.6023667999168612, + "learning_rate": 3.9721111880669213e-07, + "loss": 0.2907, + "step": 35161 + }, + { + "epoch": 1.6471635358598398, + "grad_norm": 0.5772637534340268, + "learning_rate": 3.9710855153876486e-07, + "loss": 0.2471, + "step": 35162 + }, + { + "epoch": 1.647210380849768, + "grad_norm": 0.5787028444923734, + "learning_rate": 3.9700599637244956e-07, + "loss": 0.2627, + "step": 35163 + }, + { + "epoch": 1.6472572258396965, + "grad_norm": 0.6112172365883185, + "learning_rate": 3.96903453308336e-07, + "loss": 0.2759, + "step": 35164 + }, + { + "epoch": 1.647304070829625, + "grad_norm": 0.6118886357500436, + "learning_rate": 3.968009223470151e-07, + "loss": 0.2856, + "step": 35165 + }, + { + "epoch": 1.647350915819553, + "grad_norm": 0.6189308750231395, + "learning_rate": 3.9669840348907585e-07, + "loss": 0.2714, + "step": 35166 + }, + { + "epoch": 1.6473977608094814, + "grad_norm": 0.5515118622193004, + "learning_rate": 3.9659589673510885e-07, + "loss": 0.258, + "step": 35167 + }, + { + "epoch": 1.6474446057994099, + "grad_norm": 0.6126173089349896, + "learning_rate": 3.964934020857036e-07, + "loss": 0.2821, + "step": 35168 + }, + { + "epoch": 1.6474914507893381, + "grad_norm": 0.5933652687016996, + "learning_rate": 3.9639091954145005e-07, + "loss": 0.2783, + "step": 35169 + }, + { + "epoch": 1.6475382957792664, + "grad_norm": 0.6396958593985702, + "learning_rate": 3.9628844910293785e-07, + "loss": 0.276, + "step": 35170 + }, + { + "epoch": 1.6475851407691948, + "grad_norm": 0.5953983488799417, + "learning_rate": 3.9618599077075743e-07, + "loss": 0.2742, + "step": 35171 + }, + { + "epoch": 1.647631985759123, + "grad_norm": 0.589868706278052, + "learning_rate": 3.9608354454549745e-07, + "loss": 0.2774, + "step": 35172 + }, + { + "epoch": 1.6476788307490513, + "grad_norm": 0.59860746201968, + "learning_rate": 3.959811104277478e-07, + "loss": 0.2856, + "step": 35173 + }, + { + "epoch": 1.6477256757389798, + "grad_norm": 0.580121979612222, + "learning_rate": 3.958786884180987e-07, + "loss": 0.2654, + "step": 35174 + }, + { + "epoch": 1.647772520728908, + "grad_norm": 0.5987361304113276, + "learning_rate": 3.957762785171382e-07, + "loss": 0.2533, + "step": 35175 + }, + { + "epoch": 1.6478193657188362, + "grad_norm": 0.5663574652557596, + "learning_rate": 3.956738807254565e-07, + "loss": 0.2542, + "step": 35176 + }, + { + "epoch": 1.6478662107087647, + "grad_norm": 0.5559956417650069, + "learning_rate": 3.9557149504364317e-07, + "loss": 0.2555, + "step": 35177 + }, + { + "epoch": 1.6479130556986932, + "grad_norm": 0.5938956462395192, + "learning_rate": 3.954691214722864e-07, + "loss": 0.2752, + "step": 35178 + }, + { + "epoch": 1.6479599006886212, + "grad_norm": 0.6237852976151239, + "learning_rate": 3.9536676001197596e-07, + "loss": 0.2789, + "step": 35179 + }, + { + "epoch": 1.6480067456785497, + "grad_norm": 0.5917711714845743, + "learning_rate": 3.952644106633005e-07, + "loss": 0.275, + "step": 35180 + }, + { + "epoch": 1.6480535906684781, + "grad_norm": 0.5687612075407377, + "learning_rate": 3.951620734268502e-07, + "loss": 0.2539, + "step": 35181 + }, + { + "epoch": 1.6481004356584064, + "grad_norm": 0.6230310550266224, + "learning_rate": 3.950597483032126e-07, + "loss": 0.2892, + "step": 35182 + }, + { + "epoch": 1.6481472806483346, + "grad_norm": 0.5919312995623052, + "learning_rate": 3.9495743529297774e-07, + "loss": 0.2605, + "step": 35183 + }, + { + "epoch": 1.648194125638263, + "grad_norm": 0.6216885607342433, + "learning_rate": 3.9485513439673303e-07, + "loss": 0.2721, + "step": 35184 + }, + { + "epoch": 1.6482409706281913, + "grad_norm": 0.6464370796244412, + "learning_rate": 3.947528456150679e-07, + "loss": 0.2832, + "step": 35185 + }, + { + "epoch": 1.6482878156181195, + "grad_norm": 0.5967089630011546, + "learning_rate": 3.946505689485711e-07, + "loss": 0.2648, + "step": 35186 + }, + { + "epoch": 1.648334660608048, + "grad_norm": 0.614093625528369, + "learning_rate": 3.94548304397831e-07, + "loss": 0.2701, + "step": 35187 + }, + { + "epoch": 1.6483815055979762, + "grad_norm": 0.5890298694057343, + "learning_rate": 3.944460519634369e-07, + "loss": 0.2739, + "step": 35188 + }, + { + "epoch": 1.6484283505879045, + "grad_norm": 0.6329865593649872, + "learning_rate": 3.9434381164597637e-07, + "loss": 0.2781, + "step": 35189 + }, + { + "epoch": 1.648475195577833, + "grad_norm": 0.5979546739120963, + "learning_rate": 3.9424158344603753e-07, + "loss": 0.2952, + "step": 35190 + }, + { + "epoch": 1.6485220405677614, + "grad_norm": 0.6055607665086511, + "learning_rate": 3.941393673642091e-07, + "loss": 0.2775, + "step": 35191 + }, + { + "epoch": 1.6485688855576897, + "grad_norm": 0.608518169808415, + "learning_rate": 3.940371634010795e-07, + "loss": 0.2856, + "step": 35192 + }, + { + "epoch": 1.648615730547618, + "grad_norm": 0.5872569819162515, + "learning_rate": 3.9393497155723657e-07, + "loss": 0.2697, + "step": 35193 + }, + { + "epoch": 1.6486625755375464, + "grad_norm": 0.6164837776234589, + "learning_rate": 3.938327918332685e-07, + "loss": 0.2546, + "step": 35194 + }, + { + "epoch": 1.6487094205274746, + "grad_norm": 0.603310113016155, + "learning_rate": 3.9373062422976394e-07, + "loss": 0.2624, + "step": 35195 + }, + { + "epoch": 1.6487562655174028, + "grad_norm": 0.6149567419057769, + "learning_rate": 3.936284687473102e-07, + "loss": 0.2706, + "step": 35196 + }, + { + "epoch": 1.6488031105073313, + "grad_norm": 0.5846375442919699, + "learning_rate": 3.935263253864946e-07, + "loss": 0.2744, + "step": 35197 + }, + { + "epoch": 1.6488499554972595, + "grad_norm": 0.6246156757359593, + "learning_rate": 3.9342419414790583e-07, + "loss": 0.2868, + "step": 35198 + }, + { + "epoch": 1.6488968004871878, + "grad_norm": 0.582610472622331, + "learning_rate": 3.9332207503213123e-07, + "loss": 0.2532, + "step": 35199 + }, + { + "epoch": 1.6489436454771162, + "grad_norm": 0.6118023403482309, + "learning_rate": 3.932199680397583e-07, + "loss": 0.2726, + "step": 35200 + }, + { + "epoch": 1.6489904904670447, + "grad_norm": 0.6116371418131411, + "learning_rate": 3.9311787317137586e-07, + "loss": 0.2691, + "step": 35201 + }, + { + "epoch": 1.6490373354569727, + "grad_norm": 0.6177025230729034, + "learning_rate": 3.930157904275697e-07, + "loss": 0.2795, + "step": 35202 + }, + { + "epoch": 1.6490841804469012, + "grad_norm": 0.608955149407558, + "learning_rate": 3.929137198089289e-07, + "loss": 0.2575, + "step": 35203 + }, + { + "epoch": 1.6491310254368297, + "grad_norm": 0.6006451662527097, + "learning_rate": 3.928116613160393e-07, + "loss": 0.2707, + "step": 35204 + }, + { + "epoch": 1.649177870426758, + "grad_norm": 0.536739326726214, + "learning_rate": 3.927096149494891e-07, + "loss": 0.2563, + "step": 35205 + }, + { + "epoch": 1.6492247154166861, + "grad_norm": 0.6780967788157588, + "learning_rate": 3.926075807098653e-07, + "loss": 0.2881, + "step": 35206 + }, + { + "epoch": 1.6492715604066146, + "grad_norm": 0.5624483248338263, + "learning_rate": 3.925055585977552e-07, + "loss": 0.263, + "step": 35207 + }, + { + "epoch": 1.6493184053965428, + "grad_norm": 0.5855417421199479, + "learning_rate": 3.9240354861374673e-07, + "loss": 0.2805, + "step": 35208 + }, + { + "epoch": 1.649365250386471, + "grad_norm": 0.5731314379568043, + "learning_rate": 3.9230155075842573e-07, + "loss": 0.2608, + "step": 35209 + }, + { + "epoch": 1.6494120953763995, + "grad_norm": 0.5308339112351004, + "learning_rate": 3.9219956503237925e-07, + "loss": 0.2486, + "step": 35210 + }, + { + "epoch": 1.6494589403663278, + "grad_norm": 0.6054899860539213, + "learning_rate": 3.9209759143619435e-07, + "loss": 0.2867, + "step": 35211 + }, + { + "epoch": 1.649505785356256, + "grad_norm": 0.591538686010317, + "learning_rate": 3.91995629970458e-07, + "loss": 0.2738, + "step": 35212 + }, + { + "epoch": 1.6495526303461845, + "grad_norm": 0.5742671024650446, + "learning_rate": 3.918936806357573e-07, + "loss": 0.2617, + "step": 35213 + }, + { + "epoch": 1.649599475336113, + "grad_norm": 0.5769923103511798, + "learning_rate": 3.9179174343267874e-07, + "loss": 0.2705, + "step": 35214 + }, + { + "epoch": 1.649646320326041, + "grad_norm": 0.5565162155134665, + "learning_rate": 3.9168981836180844e-07, + "loss": 0.2651, + "step": 35215 + }, + { + "epoch": 1.6496931653159694, + "grad_norm": 0.6017173233708528, + "learning_rate": 3.91587905423734e-07, + "loss": 0.2586, + "step": 35216 + }, + { + "epoch": 1.649740010305898, + "grad_norm": 0.6499837073890353, + "learning_rate": 3.914860046190405e-07, + "loss": 0.2812, + "step": 35217 + }, + { + "epoch": 1.6497868552958261, + "grad_norm": 0.6346432291444846, + "learning_rate": 3.913841159483153e-07, + "loss": 0.2763, + "step": 35218 + }, + { + "epoch": 1.6498337002857544, + "grad_norm": 0.610823540197546, + "learning_rate": 3.9128223941214433e-07, + "loss": 0.2761, + "step": 35219 + }, + { + "epoch": 1.6498805452756828, + "grad_norm": 0.6042972014286947, + "learning_rate": 3.9118037501111484e-07, + "loss": 0.2747, + "step": 35220 + }, + { + "epoch": 1.649927390265611, + "grad_norm": 0.6084210527890198, + "learning_rate": 3.9107852274581136e-07, + "loss": 0.2864, + "step": 35221 + }, + { + "epoch": 1.6499742352555393, + "grad_norm": 0.5593224853419184, + "learning_rate": 3.9097668261682126e-07, + "loss": 0.2552, + "step": 35222 + }, + { + "epoch": 1.6500210802454678, + "grad_norm": 0.6213260481094378, + "learning_rate": 3.9087485462473067e-07, + "loss": 0.2807, + "step": 35223 + }, + { + "epoch": 1.650067925235396, + "grad_norm": 0.6023233085257377, + "learning_rate": 3.907730387701245e-07, + "loss": 0.2635, + "step": 35224 + }, + { + "epoch": 1.6501147702253243, + "grad_norm": 0.6433869372276926, + "learning_rate": 3.9067123505358963e-07, + "loss": 0.2859, + "step": 35225 + }, + { + "epoch": 1.6501616152152527, + "grad_norm": 0.5885163944903921, + "learning_rate": 3.9056944347571205e-07, + "loss": 0.2752, + "step": 35226 + }, + { + "epoch": 1.6502084602051812, + "grad_norm": 0.6065828293814646, + "learning_rate": 3.9046766403707667e-07, + "loss": 0.2737, + "step": 35227 + }, + { + "epoch": 1.6502553051951094, + "grad_norm": 0.5338534684797269, + "learning_rate": 3.9036589673826953e-07, + "loss": 0.2442, + "step": 35228 + }, + { + "epoch": 1.6503021501850377, + "grad_norm": 0.5558995465996899, + "learning_rate": 3.9026414157987636e-07, + "loss": 0.2512, + "step": 35229 + }, + { + "epoch": 1.6503489951749661, + "grad_norm": 0.5988391068089515, + "learning_rate": 3.901623985624836e-07, + "loss": 0.2724, + "step": 35230 + }, + { + "epoch": 1.6503958401648944, + "grad_norm": 0.5994015027890304, + "learning_rate": 3.900606676866753e-07, + "loss": 0.2804, + "step": 35231 + }, + { + "epoch": 1.6504426851548226, + "grad_norm": 0.6473187824588948, + "learning_rate": 3.899589489530381e-07, + "loss": 0.2792, + "step": 35232 + }, + { + "epoch": 1.650489530144751, + "grad_norm": 0.5834014498967324, + "learning_rate": 3.8985724236215606e-07, + "loss": 0.2813, + "step": 35233 + }, + { + "epoch": 1.6505363751346793, + "grad_norm": 0.6189343201960366, + "learning_rate": 3.8975554791461533e-07, + "loss": 0.2681, + "step": 35234 + }, + { + "epoch": 1.6505832201246076, + "grad_norm": 0.6150858590878143, + "learning_rate": 3.8965386561100107e-07, + "loss": 0.2704, + "step": 35235 + }, + { + "epoch": 1.650630065114536, + "grad_norm": 0.6065063726060205, + "learning_rate": 3.8955219545189855e-07, + "loss": 0.2711, + "step": 35236 + }, + { + "epoch": 1.6506769101044645, + "grad_norm": 0.5447354906769147, + "learning_rate": 3.894505374378932e-07, + "loss": 0.2716, + "step": 35237 + }, + { + "epoch": 1.6507237550943925, + "grad_norm": 0.6057139734349221, + "learning_rate": 3.893488915695695e-07, + "loss": 0.2616, + "step": 35238 + }, + { + "epoch": 1.650770600084321, + "grad_norm": 0.6003570187825313, + "learning_rate": 3.8924725784751176e-07, + "loss": 0.2698, + "step": 35239 + }, + { + "epoch": 1.6508174450742494, + "grad_norm": 0.6086273231507419, + "learning_rate": 3.891456362723056e-07, + "loss": 0.261, + "step": 35240 + }, + { + "epoch": 1.6508642900641777, + "grad_norm": 0.5848541311967709, + "learning_rate": 3.8904402684453583e-07, + "loss": 0.2807, + "step": 35241 + }, + { + "epoch": 1.650911135054106, + "grad_norm": 0.5388385213214932, + "learning_rate": 3.88942429564787e-07, + "loss": 0.245, + "step": 35242 + }, + { + "epoch": 1.6509579800440344, + "grad_norm": 0.6471678633724014, + "learning_rate": 3.888408444336439e-07, + "loss": 0.2796, + "step": 35243 + }, + { + "epoch": 1.6510048250339626, + "grad_norm": 0.6108848964718003, + "learning_rate": 3.887392714516919e-07, + "loss": 0.2855, + "step": 35244 + }, + { + "epoch": 1.6510516700238909, + "grad_norm": 0.577027306076762, + "learning_rate": 3.8863771061951436e-07, + "loss": 0.2761, + "step": 35245 + }, + { + "epoch": 1.6510985150138193, + "grad_norm": 0.5789662028812002, + "learning_rate": 3.885361619376959e-07, + "loss": 0.266, + "step": 35246 + }, + { + "epoch": 1.6511453600037476, + "grad_norm": 0.5990445602266868, + "learning_rate": 3.8843462540682107e-07, + "loss": 0.2602, + "step": 35247 + }, + { + "epoch": 1.6511922049936758, + "grad_norm": 0.5933864677276438, + "learning_rate": 3.8833310102747405e-07, + "loss": 0.2668, + "step": 35248 + }, + { + "epoch": 1.6512390499836043, + "grad_norm": 0.5926679930894225, + "learning_rate": 3.882315888002394e-07, + "loss": 0.2601, + "step": 35249 + }, + { + "epoch": 1.6512858949735327, + "grad_norm": 0.5882384965273888, + "learning_rate": 3.8813008872570196e-07, + "loss": 0.2613, + "step": 35250 + }, + { + "epoch": 1.6513327399634607, + "grad_norm": 0.5956680786474565, + "learning_rate": 3.8802860080444425e-07, + "loss": 0.2806, + "step": 35251 + }, + { + "epoch": 1.6513795849533892, + "grad_norm": 0.5810897742004937, + "learning_rate": 3.879271250370517e-07, + "loss": 0.2735, + "step": 35252 + }, + { + "epoch": 1.6514264299433177, + "grad_norm": 0.5842738760685687, + "learning_rate": 3.8782566142410716e-07, + "loss": 0.2745, + "step": 35253 + }, + { + "epoch": 1.651473274933246, + "grad_norm": 0.5840208302609027, + "learning_rate": 3.8772420996619513e-07, + "loss": 0.2604, + "step": 35254 + }, + { + "epoch": 1.6515201199231742, + "grad_norm": 0.5788115021844319, + "learning_rate": 3.8762277066389936e-07, + "loss": 0.2751, + "step": 35255 + }, + { + "epoch": 1.6515669649131026, + "grad_norm": 0.5953453670725305, + "learning_rate": 3.8752134351780407e-07, + "loss": 0.2733, + "step": 35256 + }, + { + "epoch": 1.6516138099030309, + "grad_norm": 0.6414437277408618, + "learning_rate": 3.8741992852849214e-07, + "loss": 0.2844, + "step": 35257 + }, + { + "epoch": 1.651660654892959, + "grad_norm": 0.6366267851560699, + "learning_rate": 3.873185256965481e-07, + "loss": 0.2776, + "step": 35258 + }, + { + "epoch": 1.6517074998828876, + "grad_norm": 0.6136486810449329, + "learning_rate": 3.8721713502255424e-07, + "loss": 0.2708, + "step": 35259 + }, + { + "epoch": 1.6517543448728158, + "grad_norm": 0.5888661725886717, + "learning_rate": 3.871157565070946e-07, + "loss": 0.2769, + "step": 35260 + }, + { + "epoch": 1.651801189862744, + "grad_norm": 0.6045950099929592, + "learning_rate": 3.870143901507528e-07, + "loss": 0.2763, + "step": 35261 + }, + { + "epoch": 1.6518480348526725, + "grad_norm": 0.5990074615809187, + "learning_rate": 3.869130359541123e-07, + "loss": 0.257, + "step": 35262 + }, + { + "epoch": 1.651894879842601, + "grad_norm": 0.6536316795808497, + "learning_rate": 3.868116939177566e-07, + "loss": 0.2793, + "step": 35263 + }, + { + "epoch": 1.6519417248325292, + "grad_norm": 0.59261796347389, + "learning_rate": 3.8671036404226816e-07, + "loss": 0.2707, + "step": 35264 + }, + { + "epoch": 1.6519885698224575, + "grad_norm": 0.639994786983041, + "learning_rate": 3.866090463282307e-07, + "loss": 0.2638, + "step": 35265 + }, + { + "epoch": 1.652035414812386, + "grad_norm": 0.5621879390661159, + "learning_rate": 3.8650774077622633e-07, + "loss": 0.2503, + "step": 35266 + }, + { + "epoch": 1.6520822598023142, + "grad_norm": 0.6042797792619743, + "learning_rate": 3.8640644738683895e-07, + "loss": 0.2628, + "step": 35267 + }, + { + "epoch": 1.6521291047922424, + "grad_norm": 0.5607910858809996, + "learning_rate": 3.8630516616065124e-07, + "loss": 0.2584, + "step": 35268 + }, + { + "epoch": 1.6521759497821709, + "grad_norm": 0.5932960567658377, + "learning_rate": 3.862038970982465e-07, + "loss": 0.2698, + "step": 35269 + }, + { + "epoch": 1.652222794772099, + "grad_norm": 0.556279506411386, + "learning_rate": 3.8610264020020634e-07, + "loss": 0.2535, + "step": 35270 + }, + { + "epoch": 1.6522696397620273, + "grad_norm": 0.5798030201044985, + "learning_rate": 3.8600139546711434e-07, + "loss": 0.2639, + "step": 35271 + }, + { + "epoch": 1.6523164847519558, + "grad_norm": 0.598725777843934, + "learning_rate": 3.8590016289955374e-07, + "loss": 0.2751, + "step": 35272 + }, + { + "epoch": 1.6523633297418843, + "grad_norm": 0.601599649069551, + "learning_rate": 3.857989424981054e-07, + "loss": 0.2675, + "step": 35273 + }, + { + "epoch": 1.6524101747318123, + "grad_norm": 0.6460245348654448, + "learning_rate": 3.8569773426335303e-07, + "loss": 0.2759, + "step": 35274 + }, + { + "epoch": 1.6524570197217407, + "grad_norm": 0.5778989934448887, + "learning_rate": 3.855965381958793e-07, + "loss": 0.2728, + "step": 35275 + }, + { + "epoch": 1.6525038647116692, + "grad_norm": 0.5813110249695671, + "learning_rate": 3.8549535429626553e-07, + "loss": 0.257, + "step": 35276 + }, + { + "epoch": 1.6525507097015975, + "grad_norm": 0.6015458383853186, + "learning_rate": 3.853941825650945e-07, + "loss": 0.2801, + "step": 35277 + }, + { + "epoch": 1.6525975546915257, + "grad_norm": 0.5478424278059206, + "learning_rate": 3.8529302300294835e-07, + "loss": 0.255, + "step": 35278 + }, + { + "epoch": 1.6526443996814542, + "grad_norm": 0.5453395837118368, + "learning_rate": 3.8519187561041005e-07, + "loss": 0.2668, + "step": 35279 + }, + { + "epoch": 1.6526912446713824, + "grad_norm": 0.6038418108664348, + "learning_rate": 3.850907403880602e-07, + "loss": 0.2751, + "step": 35280 + }, + { + "epoch": 1.6527380896613106, + "grad_norm": 0.5909914427718496, + "learning_rate": 3.8498961733648225e-07, + "loss": 0.2675, + "step": 35281 + }, + { + "epoch": 1.652784934651239, + "grad_norm": 0.5800988181991861, + "learning_rate": 3.8488850645625714e-07, + "loss": 0.285, + "step": 35282 + }, + { + "epoch": 1.6528317796411673, + "grad_norm": 0.6180270868779211, + "learning_rate": 3.847874077479666e-07, + "loss": 0.2736, + "step": 35283 + }, + { + "epoch": 1.6528786246310956, + "grad_norm": 0.5761399309835601, + "learning_rate": 3.8468632121219317e-07, + "loss": 0.2746, + "step": 35284 + }, + { + "epoch": 1.652925469621024, + "grad_norm": 0.5914675888081313, + "learning_rate": 3.845852468495184e-07, + "loss": 0.2739, + "step": 35285 + }, + { + "epoch": 1.6529723146109525, + "grad_norm": 0.6095500246468408, + "learning_rate": 3.8448418466052434e-07, + "loss": 0.2713, + "step": 35286 + }, + { + "epoch": 1.6530191596008805, + "grad_norm": 0.5861629681190559, + "learning_rate": 3.8438313464579183e-07, + "loss": 0.2733, + "step": 35287 + }, + { + "epoch": 1.653066004590809, + "grad_norm": 0.5915622451541247, + "learning_rate": 3.8428209680590243e-07, + "loss": 0.2813, + "step": 35288 + }, + { + "epoch": 1.6531128495807375, + "grad_norm": 0.6004380877502568, + "learning_rate": 3.841810711414376e-07, + "loss": 0.2728, + "step": 35289 + }, + { + "epoch": 1.6531596945706657, + "grad_norm": 0.5878623009328666, + "learning_rate": 3.840800576529788e-07, + "loss": 0.2645, + "step": 35290 + }, + { + "epoch": 1.653206539560594, + "grad_norm": 0.574317040427641, + "learning_rate": 3.839790563411078e-07, + "loss": 0.2732, + "step": 35291 + }, + { + "epoch": 1.6532533845505224, + "grad_norm": 0.5881059216496118, + "learning_rate": 3.838780672064057e-07, + "loss": 0.274, + "step": 35292 + }, + { + "epoch": 1.6533002295404506, + "grad_norm": 0.6168918198678267, + "learning_rate": 3.837770902494531e-07, + "loss": 0.2838, + "step": 35293 + }, + { + "epoch": 1.6533470745303789, + "grad_norm": 0.6131827887927851, + "learning_rate": 3.836761254708321e-07, + "loss": 0.2862, + "step": 35294 + }, + { + "epoch": 1.6533939195203073, + "grad_norm": 0.6189270888676794, + "learning_rate": 3.8357517287112236e-07, + "loss": 0.2864, + "step": 35295 + }, + { + "epoch": 1.6534407645102356, + "grad_norm": 0.5955689336467819, + "learning_rate": 3.8347423245090553e-07, + "loss": 0.2788, + "step": 35296 + }, + { + "epoch": 1.6534876095001638, + "grad_norm": 0.5602891250154121, + "learning_rate": 3.8337330421076233e-07, + "loss": 0.253, + "step": 35297 + }, + { + "epoch": 1.6535344544900923, + "grad_norm": 0.6087279285343378, + "learning_rate": 3.8327238815127426e-07, + "loss": 0.2728, + "step": 35298 + }, + { + "epoch": 1.6535812994800207, + "grad_norm": 0.6035502293309037, + "learning_rate": 3.831714842730219e-07, + "loss": 0.2759, + "step": 35299 + }, + { + "epoch": 1.653628144469949, + "grad_norm": 0.5718004097394632, + "learning_rate": 3.830705925765854e-07, + "loss": 0.2661, + "step": 35300 + }, + { + "epoch": 1.6536749894598772, + "grad_norm": 0.6076069831852152, + "learning_rate": 3.829697130625454e-07, + "loss": 0.2733, + "step": 35301 + }, + { + "epoch": 1.6537218344498057, + "grad_norm": 0.6102620485482947, + "learning_rate": 3.828688457314822e-07, + "loss": 0.2594, + "step": 35302 + }, + { + "epoch": 1.653768679439734, + "grad_norm": 0.6119281659534153, + "learning_rate": 3.8276799058397707e-07, + "loss": 0.283, + "step": 35303 + }, + { + "epoch": 1.6538155244296622, + "grad_norm": 0.6108555456884401, + "learning_rate": 3.826671476206098e-07, + "loss": 0.2723, + "step": 35304 + }, + { + "epoch": 1.6538623694195906, + "grad_norm": 0.6003773861813478, + "learning_rate": 3.8256631684196133e-07, + "loss": 0.263, + "step": 35305 + }, + { + "epoch": 1.6539092144095189, + "grad_norm": 0.6063742163748955, + "learning_rate": 3.824654982486112e-07, + "loss": 0.2976, + "step": 35306 + }, + { + "epoch": 1.6539560593994471, + "grad_norm": 0.6283403550989353, + "learning_rate": 3.8236469184114026e-07, + "loss": 0.2829, + "step": 35307 + }, + { + "epoch": 1.6540029043893756, + "grad_norm": 0.6435282807565601, + "learning_rate": 3.822638976201279e-07, + "loss": 0.2776, + "step": 35308 + }, + { + "epoch": 1.654049749379304, + "grad_norm": 0.5893699165658279, + "learning_rate": 3.8216311558615463e-07, + "loss": 0.2654, + "step": 35309 + }, + { + "epoch": 1.654096594369232, + "grad_norm": 0.6398192962503414, + "learning_rate": 3.820623457398001e-07, + "loss": 0.2912, + "step": 35310 + }, + { + "epoch": 1.6541434393591605, + "grad_norm": 0.6031515239036507, + "learning_rate": 3.819615880816452e-07, + "loss": 0.2696, + "step": 35311 + }, + { + "epoch": 1.654190284349089, + "grad_norm": 0.6443098183457545, + "learning_rate": 3.818608426122683e-07, + "loss": 0.2776, + "step": 35312 + }, + { + "epoch": 1.6542371293390172, + "grad_norm": 0.594277889251924, + "learning_rate": 3.817601093322498e-07, + "loss": 0.2701, + "step": 35313 + }, + { + "epoch": 1.6542839743289455, + "grad_norm": 0.5781306993752917, + "learning_rate": 3.8165938824217016e-07, + "loss": 0.2642, + "step": 35314 + }, + { + "epoch": 1.654330819318874, + "grad_norm": 0.638436826302175, + "learning_rate": 3.8155867934260763e-07, + "loss": 0.2889, + "step": 35315 + }, + { + "epoch": 1.6543776643088022, + "grad_norm": 0.6675283514787749, + "learning_rate": 3.814579826341427e-07, + "loss": 0.2934, + "step": 35316 + }, + { + "epoch": 1.6544245092987304, + "grad_norm": 0.538627580588784, + "learning_rate": 3.8135729811735495e-07, + "loss": 0.2577, + "step": 35317 + }, + { + "epoch": 1.6544713542886589, + "grad_norm": 0.5595844319465966, + "learning_rate": 3.8125662579282294e-07, + "loss": 0.2644, + "step": 35318 + }, + { + "epoch": 1.6545181992785871, + "grad_norm": 0.60435890890688, + "learning_rate": 3.811559656611266e-07, + "loss": 0.2826, + "step": 35319 + }, + { + "epoch": 1.6545650442685154, + "grad_norm": 0.5937434298999018, + "learning_rate": 3.810553177228449e-07, + "loss": 0.2781, + "step": 35320 + }, + { + "epoch": 1.6546118892584438, + "grad_norm": 0.5965478158963772, + "learning_rate": 3.809546819785581e-07, + "loss": 0.277, + "step": 35321 + }, + { + "epoch": 1.6546587342483723, + "grad_norm": 0.6242765126937757, + "learning_rate": 3.808540584288439e-07, + "loss": 0.2783, + "step": 35322 + }, + { + "epoch": 1.6547055792383003, + "grad_norm": 0.6074789889241914, + "learning_rate": 3.8075344707428244e-07, + "loss": 0.2636, + "step": 35323 + }, + { + "epoch": 1.6547524242282288, + "grad_norm": 0.5556140281610336, + "learning_rate": 3.8065284791545173e-07, + "loss": 0.2699, + "step": 35324 + }, + { + "epoch": 1.6547992692181572, + "grad_norm": 0.6380121572443975, + "learning_rate": 3.8055226095293107e-07, + "loss": 0.29, + "step": 35325 + }, + { + "epoch": 1.6548461142080855, + "grad_norm": 0.6005359203705086, + "learning_rate": 3.804516861872995e-07, + "loss": 0.2736, + "step": 35326 + }, + { + "epoch": 1.6548929591980137, + "grad_norm": 0.6135227084010331, + "learning_rate": 3.8035112361913597e-07, + "loss": 0.2748, + "step": 35327 + }, + { + "epoch": 1.6549398041879422, + "grad_norm": 0.612543475151393, + "learning_rate": 3.8025057324901916e-07, + "loss": 0.2737, + "step": 35328 + }, + { + "epoch": 1.6549866491778704, + "grad_norm": 0.6060682641300789, + "learning_rate": 3.801500350775272e-07, + "loss": 0.2881, + "step": 35329 + }, + { + "epoch": 1.6550334941677987, + "grad_norm": 0.5665462644960871, + "learning_rate": 3.800495091052395e-07, + "loss": 0.2515, + "step": 35330 + }, + { + "epoch": 1.6550803391577271, + "grad_norm": 0.5879089205775994, + "learning_rate": 3.799489953327337e-07, + "loss": 0.2707, + "step": 35331 + }, + { + "epoch": 1.6551271841476554, + "grad_norm": 0.6157469643292612, + "learning_rate": 3.7984849376058814e-07, + "loss": 0.2729, + "step": 35332 + }, + { + "epoch": 1.6551740291375836, + "grad_norm": 0.6135507675871563, + "learning_rate": 3.7974800438938195e-07, + "loss": 0.2867, + "step": 35333 + }, + { + "epoch": 1.655220874127512, + "grad_norm": 0.5770994206986543, + "learning_rate": 3.7964752721969284e-07, + "loss": 0.2565, + "step": 35334 + }, + { + "epoch": 1.6552677191174405, + "grad_norm": 0.6015449509815435, + "learning_rate": 3.7954706225210026e-07, + "loss": 0.2649, + "step": 35335 + }, + { + "epoch": 1.6553145641073688, + "grad_norm": 0.6214042312417736, + "learning_rate": 3.7944660948718096e-07, + "loss": 0.2753, + "step": 35336 + }, + { + "epoch": 1.655361409097297, + "grad_norm": 0.6116222688404593, + "learning_rate": 3.793461689255132e-07, + "loss": 0.2709, + "step": 35337 + }, + { + "epoch": 1.6554082540872255, + "grad_norm": 0.6101224983415472, + "learning_rate": 3.79245740567675e-07, + "loss": 0.2827, + "step": 35338 + }, + { + "epoch": 1.6554550990771537, + "grad_norm": 0.597148766883964, + "learning_rate": 3.7914532441424457e-07, + "loss": 0.2669, + "step": 35339 + }, + { + "epoch": 1.655501944067082, + "grad_norm": 0.6177628559146043, + "learning_rate": 3.7904492046579977e-07, + "loss": 0.2656, + "step": 35340 + }, + { + "epoch": 1.6555487890570104, + "grad_norm": 0.5902956210527688, + "learning_rate": 3.7894452872291874e-07, + "loss": 0.2701, + "step": 35341 + }, + { + "epoch": 1.6555956340469387, + "grad_norm": 0.5921103372739958, + "learning_rate": 3.7884414918617857e-07, + "loss": 0.2793, + "step": 35342 + }, + { + "epoch": 1.655642479036867, + "grad_norm": 0.602126131053842, + "learning_rate": 3.7874378185615763e-07, + "loss": 0.2866, + "step": 35343 + }, + { + "epoch": 1.6556893240267954, + "grad_norm": 0.5752025992756606, + "learning_rate": 3.7864342673343243e-07, + "loss": 0.2562, + "step": 35344 + }, + { + "epoch": 1.6557361690167238, + "grad_norm": 0.5808013972862334, + "learning_rate": 3.7854308381858134e-07, + "loss": 0.2646, + "step": 35345 + }, + { + "epoch": 1.6557830140066518, + "grad_norm": 0.5675834424593883, + "learning_rate": 3.784427531121815e-07, + "loss": 0.2445, + "step": 35346 + }, + { + "epoch": 1.6558298589965803, + "grad_norm": 0.5879348140543732, + "learning_rate": 3.783424346148104e-07, + "loss": 0.2631, + "step": 35347 + }, + { + "epoch": 1.6558767039865088, + "grad_norm": 0.6122385034358688, + "learning_rate": 3.782421283270457e-07, + "loss": 0.2849, + "step": 35348 + }, + { + "epoch": 1.655923548976437, + "grad_norm": 0.602241479336808, + "learning_rate": 3.7814183424946444e-07, + "loss": 0.27, + "step": 35349 + }, + { + "epoch": 1.6559703939663653, + "grad_norm": 0.5874311385674379, + "learning_rate": 3.7804155238264304e-07, + "loss": 0.2692, + "step": 35350 + }, + { + "epoch": 1.6560172389562937, + "grad_norm": 0.5878130297073113, + "learning_rate": 3.779412827271592e-07, + "loss": 0.2608, + "step": 35351 + }, + { + "epoch": 1.656064083946222, + "grad_norm": 0.551265361818106, + "learning_rate": 3.778410252835898e-07, + "loss": 0.2655, + "step": 35352 + }, + { + "epoch": 1.6561109289361502, + "grad_norm": 0.6037240150500821, + "learning_rate": 3.777407800525121e-07, + "loss": 0.2786, + "step": 35353 + }, + { + "epoch": 1.6561577739260787, + "grad_norm": 0.5855702716754769, + "learning_rate": 3.776405470345032e-07, + "loss": 0.2661, + "step": 35354 + }, + { + "epoch": 1.656204618916007, + "grad_norm": 0.612155334161344, + "learning_rate": 3.7754032623013885e-07, + "loss": 0.2813, + "step": 35355 + }, + { + "epoch": 1.6562514639059351, + "grad_norm": 0.5853752118748808, + "learning_rate": 3.774401176399972e-07, + "loss": 0.2629, + "step": 35356 + }, + { + "epoch": 1.6562983088958636, + "grad_norm": 0.6422391588365648, + "learning_rate": 3.7733992126465364e-07, + "loss": 0.2891, + "step": 35357 + }, + { + "epoch": 1.656345153885792, + "grad_norm": 0.6042589864170806, + "learning_rate": 3.7723973710468544e-07, + "loss": 0.2658, + "step": 35358 + }, + { + "epoch": 1.65639199887572, + "grad_norm": 0.6337173302088391, + "learning_rate": 3.771395651606688e-07, + "loss": 0.2787, + "step": 35359 + }, + { + "epoch": 1.6564388438656485, + "grad_norm": 0.6005182965716424, + "learning_rate": 3.7703940543318107e-07, + "loss": 0.2503, + "step": 35360 + }, + { + "epoch": 1.656485688855577, + "grad_norm": 0.6158414757637253, + "learning_rate": 3.7693925792279734e-07, + "loss": 0.279, + "step": 35361 + }, + { + "epoch": 1.6565325338455053, + "grad_norm": 0.5998243132973963, + "learning_rate": 3.7683912263009465e-07, + "loss": 0.2678, + "step": 35362 + }, + { + "epoch": 1.6565793788354335, + "grad_norm": 0.5499980497079369, + "learning_rate": 3.7673899955564974e-07, + "loss": 0.2684, + "step": 35363 + }, + { + "epoch": 1.656626223825362, + "grad_norm": 0.5975654628801501, + "learning_rate": 3.766388887000375e-07, + "loss": 0.2734, + "step": 35364 + }, + { + "epoch": 1.6566730688152902, + "grad_norm": 0.5950853986432955, + "learning_rate": 3.765387900638348e-07, + "loss": 0.2552, + "step": 35365 + }, + { + "epoch": 1.6567199138052184, + "grad_norm": 0.5999533925742696, + "learning_rate": 3.764387036476183e-07, + "loss": 0.2556, + "step": 35366 + }, + { + "epoch": 1.656766758795147, + "grad_norm": 0.6049705820082292, + "learning_rate": 3.7633862945196275e-07, + "loss": 0.2702, + "step": 35367 + }, + { + "epoch": 1.6568136037850751, + "grad_norm": 0.5841314777706886, + "learning_rate": 3.7623856747744466e-07, + "loss": 0.2764, + "step": 35368 + }, + { + "epoch": 1.6568604487750034, + "grad_norm": 0.632314305199429, + "learning_rate": 3.7613851772463986e-07, + "loss": 0.2861, + "step": 35369 + }, + { + "epoch": 1.6569072937649318, + "grad_norm": 0.6050370431688548, + "learning_rate": 3.760384801941247e-07, + "loss": 0.265, + "step": 35370 + }, + { + "epoch": 1.6569541387548603, + "grad_norm": 0.5985043141907935, + "learning_rate": 3.7593845488647336e-07, + "loss": 0.267, + "step": 35371 + }, + { + "epoch": 1.6570009837447885, + "grad_norm": 0.6306618253899986, + "learning_rate": 3.7583844180226315e-07, + "loss": 0.2715, + "step": 35372 + }, + { + "epoch": 1.6570478287347168, + "grad_norm": 0.5925149935429838, + "learning_rate": 3.7573844094206836e-07, + "loss": 0.2675, + "step": 35373 + }, + { + "epoch": 1.6570946737246453, + "grad_norm": 0.5785690167838158, + "learning_rate": 3.756384523064646e-07, + "loss": 0.2706, + "step": 35374 + }, + { + "epoch": 1.6571415187145735, + "grad_norm": 0.5847925479855095, + "learning_rate": 3.755384758960279e-07, + "loss": 0.2773, + "step": 35375 + }, + { + "epoch": 1.6571883637045017, + "grad_norm": 0.5901187595756675, + "learning_rate": 3.754385117113332e-07, + "loss": 0.2636, + "step": 35376 + }, + { + "epoch": 1.6572352086944302, + "grad_norm": 0.5851657637480331, + "learning_rate": 3.753385597529566e-07, + "loss": 0.2685, + "step": 35377 + }, + { + "epoch": 1.6572820536843584, + "grad_norm": 0.5776469019114154, + "learning_rate": 3.752386200214722e-07, + "loss": 0.254, + "step": 35378 + }, + { + "epoch": 1.6573288986742867, + "grad_norm": 0.5893033149464038, + "learning_rate": 3.751386925174552e-07, + "loss": 0.2563, + "step": 35379 + }, + { + "epoch": 1.6573757436642151, + "grad_norm": 0.6065785872568645, + "learning_rate": 3.7503877724148095e-07, + "loss": 0.2518, + "step": 35380 + }, + { + "epoch": 1.6574225886541436, + "grad_norm": 0.6003576289838717, + "learning_rate": 3.7493887419412453e-07, + "loss": 0.2749, + "step": 35381 + }, + { + "epoch": 1.6574694336440716, + "grad_norm": 0.5884550044643918, + "learning_rate": 3.7483898337596055e-07, + "loss": 0.2621, + "step": 35382 + }, + { + "epoch": 1.657516278634, + "grad_norm": 0.6356556725220135, + "learning_rate": 3.7473910478756426e-07, + "loss": 0.2905, + "step": 35383 + }, + { + "epoch": 1.6575631236239285, + "grad_norm": 0.5516322154831023, + "learning_rate": 3.7463923842951083e-07, + "loss": 0.2384, + "step": 35384 + }, + { + "epoch": 1.6576099686138568, + "grad_norm": 0.5810296231341919, + "learning_rate": 3.7453938430237425e-07, + "loss": 0.2754, + "step": 35385 + }, + { + "epoch": 1.657656813603785, + "grad_norm": 0.561142671242529, + "learning_rate": 3.7443954240672875e-07, + "loss": 0.252, + "step": 35386 + }, + { + "epoch": 1.6577036585937135, + "grad_norm": 0.5572141089384605, + "learning_rate": 3.7433971274314945e-07, + "loss": 0.2677, + "step": 35387 + }, + { + "epoch": 1.6577505035836417, + "grad_norm": 0.5854636950886081, + "learning_rate": 3.742398953122109e-07, + "loss": 0.268, + "step": 35388 + }, + { + "epoch": 1.65779734857357, + "grad_norm": 0.5953810696858435, + "learning_rate": 3.741400901144876e-07, + "loss": 0.2711, + "step": 35389 + }, + { + "epoch": 1.6578441935634984, + "grad_norm": 0.610473002361994, + "learning_rate": 3.740402971505541e-07, + "loss": 0.2709, + "step": 35390 + }, + { + "epoch": 1.6578910385534267, + "grad_norm": 0.5725535053244489, + "learning_rate": 3.739405164209839e-07, + "loss": 0.2729, + "step": 35391 + }, + { + "epoch": 1.657937883543355, + "grad_norm": 0.6421247132750499, + "learning_rate": 3.7384074792635226e-07, + "loss": 0.2799, + "step": 35392 + }, + { + "epoch": 1.6579847285332834, + "grad_norm": 0.6212452377256118, + "learning_rate": 3.7374099166723216e-07, + "loss": 0.2907, + "step": 35393 + }, + { + "epoch": 1.6580315735232118, + "grad_norm": 0.5752147081493559, + "learning_rate": 3.7364124764419807e-07, + "loss": 0.2682, + "step": 35394 + }, + { + "epoch": 1.6580784185131399, + "grad_norm": 0.6095236775617684, + "learning_rate": 3.735415158578243e-07, + "loss": 0.266, + "step": 35395 + }, + { + "epoch": 1.6581252635030683, + "grad_norm": 0.5731388965236213, + "learning_rate": 3.7344179630868447e-07, + "loss": 0.2702, + "step": 35396 + }, + { + "epoch": 1.6581721084929968, + "grad_norm": 0.5681794208221917, + "learning_rate": 3.7334208899735354e-07, + "loss": 0.2651, + "step": 35397 + }, + { + "epoch": 1.658218953482925, + "grad_norm": 0.6047524626998041, + "learning_rate": 3.73242393924404e-07, + "loss": 0.2721, + "step": 35398 + }, + { + "epoch": 1.6582657984728533, + "grad_norm": 0.5673442045404516, + "learning_rate": 3.7314271109040955e-07, + "loss": 0.2652, + "step": 35399 + }, + { + "epoch": 1.6583126434627817, + "grad_norm": 0.5750575537514911, + "learning_rate": 3.730430404959443e-07, + "loss": 0.2555, + "step": 35400 + }, + { + "epoch": 1.65835948845271, + "grad_norm": 0.5849959362700068, + "learning_rate": 3.729433821415815e-07, + "loss": 0.2675, + "step": 35401 + }, + { + "epoch": 1.6584063334426382, + "grad_norm": 0.6116063715410976, + "learning_rate": 3.728437360278953e-07, + "loss": 0.2633, + "step": 35402 + }, + { + "epoch": 1.6584531784325667, + "grad_norm": 0.6043388641000841, + "learning_rate": 3.7274410215545897e-07, + "loss": 0.2576, + "step": 35403 + }, + { + "epoch": 1.658500023422495, + "grad_norm": 0.5740440271214028, + "learning_rate": 3.7264448052484526e-07, + "loss": 0.2724, + "step": 35404 + }, + { + "epoch": 1.6585468684124232, + "grad_norm": 0.5886274828175168, + "learning_rate": 3.7254487113662833e-07, + "loss": 0.2581, + "step": 35405 + }, + { + "epoch": 1.6585937134023516, + "grad_norm": 0.6454116349848783, + "learning_rate": 3.724452739913806e-07, + "loss": 0.2852, + "step": 35406 + }, + { + "epoch": 1.65864055839228, + "grad_norm": 0.6119375917401794, + "learning_rate": 3.723456890896754e-07, + "loss": 0.2845, + "step": 35407 + }, + { + "epoch": 1.6586874033822083, + "grad_norm": 0.5881112808283432, + "learning_rate": 3.722461164320859e-07, + "loss": 0.2581, + "step": 35408 + }, + { + "epoch": 1.6587342483721366, + "grad_norm": 0.5646594380065953, + "learning_rate": 3.7214655601918615e-07, + "loss": 0.2508, + "step": 35409 + }, + { + "epoch": 1.658781093362065, + "grad_norm": 0.6702483268205062, + "learning_rate": 3.720470078515473e-07, + "loss": 0.2907, + "step": 35410 + }, + { + "epoch": 1.6588279383519933, + "grad_norm": 0.6455884143446835, + "learning_rate": 3.7194747192974304e-07, + "loss": 0.2923, + "step": 35411 + }, + { + "epoch": 1.6588747833419215, + "grad_norm": 0.6559734797510555, + "learning_rate": 3.718479482543469e-07, + "loss": 0.2817, + "step": 35412 + }, + { + "epoch": 1.65892162833185, + "grad_norm": 0.5689824010647243, + "learning_rate": 3.717484368259305e-07, + "loss": 0.2482, + "step": 35413 + }, + { + "epoch": 1.6589684733217782, + "grad_norm": 0.6022502219032726, + "learning_rate": 3.716489376450666e-07, + "loss": 0.2678, + "step": 35414 + }, + { + "epoch": 1.6590153183117065, + "grad_norm": 0.6241100027130968, + "learning_rate": 3.7154945071232906e-07, + "loss": 0.2713, + "step": 35415 + }, + { + "epoch": 1.659062163301635, + "grad_norm": 0.622636169637902, + "learning_rate": 3.714499760282886e-07, + "loss": 0.2851, + "step": 35416 + }, + { + "epoch": 1.6591090082915634, + "grad_norm": 0.5939755083473279, + "learning_rate": 3.713505135935186e-07, + "loss": 0.2704, + "step": 35417 + }, + { + "epoch": 1.6591558532814914, + "grad_norm": 0.615149054112751, + "learning_rate": 3.712510634085914e-07, + "loss": 0.2702, + "step": 35418 + }, + { + "epoch": 1.6592026982714199, + "grad_norm": 0.5959522652731278, + "learning_rate": 3.711516254740799e-07, + "loss": 0.2723, + "step": 35419 + }, + { + "epoch": 1.6592495432613483, + "grad_norm": 0.5769301458117054, + "learning_rate": 3.7105219979055503e-07, + "loss": 0.2737, + "step": 35420 + }, + { + "epoch": 1.6592963882512766, + "grad_norm": 0.5816595018966849, + "learning_rate": 3.7095278635859046e-07, + "loss": 0.2692, + "step": 35421 + }, + { + "epoch": 1.6593432332412048, + "grad_norm": 0.585039069040915, + "learning_rate": 3.7085338517875684e-07, + "loss": 0.254, + "step": 35422 + }, + { + "epoch": 1.6593900782311333, + "grad_norm": 0.6048751208517334, + "learning_rate": 3.7075399625162686e-07, + "loss": 0.2818, + "step": 35423 + }, + { + "epoch": 1.6594369232210615, + "grad_norm": 0.5889575427439512, + "learning_rate": 3.7065461957777216e-07, + "loss": 0.2741, + "step": 35424 + }, + { + "epoch": 1.6594837682109898, + "grad_norm": 0.5535435692085082, + "learning_rate": 3.705552551577654e-07, + "loss": 0.2597, + "step": 35425 + }, + { + "epoch": 1.6595306132009182, + "grad_norm": 0.6805685694932654, + "learning_rate": 3.704559029921781e-07, + "loss": 0.2906, + "step": 35426 + }, + { + "epoch": 1.6595774581908465, + "grad_norm": 0.6207733583645243, + "learning_rate": 3.7035656308158194e-07, + "loss": 0.2811, + "step": 35427 + }, + { + "epoch": 1.6596243031807747, + "grad_norm": 0.5684855165259903, + "learning_rate": 3.702572354265482e-07, + "loss": 0.2766, + "step": 35428 + }, + { + "epoch": 1.6596711481707032, + "grad_norm": 0.5537921953749284, + "learning_rate": 3.7015792002764837e-07, + "loss": 0.2869, + "step": 35429 + }, + { + "epoch": 1.6597179931606316, + "grad_norm": 0.6072242599336771, + "learning_rate": 3.7005861688545476e-07, + "loss": 0.2704, + "step": 35430 + }, + { + "epoch": 1.6597648381505596, + "grad_norm": 0.5997446773820836, + "learning_rate": 3.6995932600053824e-07, + "loss": 0.2677, + "step": 35431 + }, + { + "epoch": 1.659811683140488, + "grad_norm": 0.5837761064031987, + "learning_rate": 3.698600473734712e-07, + "loss": 0.2687, + "step": 35432 + }, + { + "epoch": 1.6598585281304166, + "grad_norm": 0.5813049954893571, + "learning_rate": 3.6976078100482345e-07, + "loss": 0.2691, + "step": 35433 + }, + { + "epoch": 1.6599053731203448, + "grad_norm": 0.5635128293697305, + "learning_rate": 3.696615268951675e-07, + "loss": 0.2656, + "step": 35434 + }, + { + "epoch": 1.659952218110273, + "grad_norm": 0.6148232137362896, + "learning_rate": 3.6956228504507365e-07, + "loss": 0.277, + "step": 35435 + }, + { + "epoch": 1.6599990631002015, + "grad_norm": 0.5388318479492977, + "learning_rate": 3.694630554551132e-07, + "loss": 0.2533, + "step": 35436 + }, + { + "epoch": 1.6600459080901298, + "grad_norm": 0.594335487537456, + "learning_rate": 3.693638381258577e-07, + "loss": 0.2771, + "step": 35437 + }, + { + "epoch": 1.660092753080058, + "grad_norm": 0.655056182949989, + "learning_rate": 3.6926463305787753e-07, + "loss": 0.2765, + "step": 35438 + }, + { + "epoch": 1.6601395980699865, + "grad_norm": 0.5930461817467947, + "learning_rate": 3.6916544025174443e-07, + "loss": 0.2653, + "step": 35439 + }, + { + "epoch": 1.6601864430599147, + "grad_norm": 0.5814737873790109, + "learning_rate": 3.690662597080286e-07, + "loss": 0.2697, + "step": 35440 + }, + { + "epoch": 1.660233288049843, + "grad_norm": 0.6024600192035857, + "learning_rate": 3.689670914273005e-07, + "loss": 0.2725, + "step": 35441 + }, + { + "epoch": 1.6602801330397714, + "grad_norm": 0.5733918848528521, + "learning_rate": 3.68867935410131e-07, + "loss": 0.2584, + "step": 35442 + }, + { + "epoch": 1.6603269780296999, + "grad_norm": 0.5721297084175097, + "learning_rate": 3.687687916570909e-07, + "loss": 0.269, + "step": 35443 + }, + { + "epoch": 1.660373823019628, + "grad_norm": 0.5538627368043814, + "learning_rate": 3.686696601687509e-07, + "loss": 0.2455, + "step": 35444 + }, + { + "epoch": 1.6604206680095563, + "grad_norm": 0.583115788954521, + "learning_rate": 3.685705409456816e-07, + "loss": 0.275, + "step": 35445 + }, + { + "epoch": 1.6604675129994848, + "grad_norm": 0.6669324108687696, + "learning_rate": 3.684714339884529e-07, + "loss": 0.2653, + "step": 35446 + }, + { + "epoch": 1.660514357989413, + "grad_norm": 0.602568366922815, + "learning_rate": 3.683723392976357e-07, + "loss": 0.2678, + "step": 35447 + }, + { + "epoch": 1.6605612029793413, + "grad_norm": 0.5955827027203164, + "learning_rate": 3.682732568737993e-07, + "loss": 0.2576, + "step": 35448 + }, + { + "epoch": 1.6606080479692698, + "grad_norm": 0.6083345196573404, + "learning_rate": 3.681741867175148e-07, + "loss": 0.2564, + "step": 35449 + }, + { + "epoch": 1.660654892959198, + "grad_norm": 0.6048582793062767, + "learning_rate": 3.680751288293519e-07, + "loss": 0.2797, + "step": 35450 + }, + { + "epoch": 1.6607017379491262, + "grad_norm": 0.5895239198628535, + "learning_rate": 3.679760832098811e-07, + "loss": 0.2705, + "step": 35451 + }, + { + "epoch": 1.6607485829390547, + "grad_norm": 0.6277443863514989, + "learning_rate": 3.6787704985967176e-07, + "loss": 0.2749, + "step": 35452 + }, + { + "epoch": 1.6607954279289832, + "grad_norm": 0.5672564634384558, + "learning_rate": 3.6777802877929426e-07, + "loss": 0.26, + "step": 35453 + }, + { + "epoch": 1.6608422729189112, + "grad_norm": 0.5843076441714179, + "learning_rate": 3.6767901996931863e-07, + "loss": 0.2725, + "step": 35454 + }, + { + "epoch": 1.6608891179088396, + "grad_norm": 0.5931583410779535, + "learning_rate": 3.675800234303137e-07, + "loss": 0.2703, + "step": 35455 + }, + { + "epoch": 1.660935962898768, + "grad_norm": 0.576693734074921, + "learning_rate": 3.674810391628497e-07, + "loss": 0.2718, + "step": 35456 + }, + { + "epoch": 1.6609828078886963, + "grad_norm": 0.6732200630100033, + "learning_rate": 3.6738206716749714e-07, + "loss": 0.2891, + "step": 35457 + }, + { + "epoch": 1.6610296528786246, + "grad_norm": 0.5599866925266577, + "learning_rate": 3.672831074448241e-07, + "loss": 0.2756, + "step": 35458 + }, + { + "epoch": 1.661076497868553, + "grad_norm": 0.5851306913345322, + "learning_rate": 3.671841599954007e-07, + "loss": 0.2615, + "step": 35459 + }, + { + "epoch": 1.6611233428584813, + "grad_norm": 0.5813578025738033, + "learning_rate": 3.6708522481979647e-07, + "loss": 0.271, + "step": 35460 + }, + { + "epoch": 1.6611701878484095, + "grad_norm": 0.5947238772578736, + "learning_rate": 3.6698630191858125e-07, + "loss": 0.2674, + "step": 35461 + }, + { + "epoch": 1.661217032838338, + "grad_norm": 0.6240782728859853, + "learning_rate": 3.668873912923232e-07, + "loss": 0.2728, + "step": 35462 + }, + { + "epoch": 1.6612638778282662, + "grad_norm": 0.5677675022260164, + "learning_rate": 3.6678849294159216e-07, + "loss": 0.2546, + "step": 35463 + }, + { + "epoch": 1.6613107228181945, + "grad_norm": 0.621174832850324, + "learning_rate": 3.666896068669576e-07, + "loss": 0.2753, + "step": 35464 + }, + { + "epoch": 1.661357567808123, + "grad_norm": 0.5897881024829856, + "learning_rate": 3.6659073306898746e-07, + "loss": 0.2737, + "step": 35465 + }, + { + "epoch": 1.6614044127980514, + "grad_norm": 0.5750133928906984, + "learning_rate": 3.664918715482518e-07, + "loss": 0.2534, + "step": 35466 + }, + { + "epoch": 1.6614512577879794, + "grad_norm": 0.5783883717945945, + "learning_rate": 3.663930223053189e-07, + "loss": 0.2664, + "step": 35467 + }, + { + "epoch": 1.6614981027779079, + "grad_norm": 0.602303384813075, + "learning_rate": 3.662941853407587e-07, + "loss": 0.2733, + "step": 35468 + }, + { + "epoch": 1.6615449477678363, + "grad_norm": 0.6386473577991957, + "learning_rate": 3.6619536065513826e-07, + "loss": 0.2649, + "step": 35469 + }, + { + "epoch": 1.6615917927577646, + "grad_norm": 0.6197791459466634, + "learning_rate": 3.6609654824902806e-07, + "loss": 0.2698, + "step": 35470 + }, + { + "epoch": 1.6616386377476928, + "grad_norm": 0.5786258582486845, + "learning_rate": 3.659977481229951e-07, + "loss": 0.2581, + "step": 35471 + }, + { + "epoch": 1.6616854827376213, + "grad_norm": 0.5774988474897362, + "learning_rate": 3.658989602776089e-07, + "loss": 0.261, + "step": 35472 + }, + { + "epoch": 1.6617323277275495, + "grad_norm": 0.6170567591468161, + "learning_rate": 3.6580018471343794e-07, + "loss": 0.2903, + "step": 35473 + }, + { + "epoch": 1.6617791727174778, + "grad_norm": 0.5901350859086685, + "learning_rate": 3.657014214310503e-07, + "loss": 0.273, + "step": 35474 + }, + { + "epoch": 1.6618260177074062, + "grad_norm": 0.6310979750834479, + "learning_rate": 3.6560267043101503e-07, + "loss": 0.2724, + "step": 35475 + }, + { + "epoch": 1.6618728626973345, + "grad_norm": 0.5721577045024935, + "learning_rate": 3.655039317139e-07, + "loss": 0.2602, + "step": 35476 + }, + { + "epoch": 1.6619197076872627, + "grad_norm": 0.6272848779503012, + "learning_rate": 3.6540520528027284e-07, + "loss": 0.2827, + "step": 35477 + }, + { + "epoch": 1.6619665526771912, + "grad_norm": 0.57798747541122, + "learning_rate": 3.653064911307022e-07, + "loss": 0.2565, + "step": 35478 + }, + { + "epoch": 1.6620133976671196, + "grad_norm": 0.5870890010135014, + "learning_rate": 3.6520778926575627e-07, + "loss": 0.2694, + "step": 35479 + }, + { + "epoch": 1.6620602426570479, + "grad_norm": 0.6297605772003152, + "learning_rate": 3.6510909968600296e-07, + "loss": 0.28, + "step": 35480 + }, + { + "epoch": 1.6621070876469761, + "grad_norm": 0.6081269857430482, + "learning_rate": 3.650104223920106e-07, + "loss": 0.291, + "step": 35481 + }, + { + "epoch": 1.6621539326369046, + "grad_norm": 0.6013768106944674, + "learning_rate": 3.649117573843461e-07, + "loss": 0.2727, + "step": 35482 + }, + { + "epoch": 1.6622007776268328, + "grad_norm": 0.593002136400352, + "learning_rate": 3.6481310466357835e-07, + "loss": 0.2651, + "step": 35483 + }, + { + "epoch": 1.662247622616761, + "grad_norm": 0.5936107625756675, + "learning_rate": 3.6471446423027415e-07, + "loss": 0.2696, + "step": 35484 + }, + { + "epoch": 1.6622944676066895, + "grad_norm": 0.6232579641346095, + "learning_rate": 3.646158360850013e-07, + "loss": 0.2781, + "step": 35485 + }, + { + "epoch": 1.6623413125966178, + "grad_norm": 0.604738546969848, + "learning_rate": 3.6451722022832785e-07, + "loss": 0.2668, + "step": 35486 + }, + { + "epoch": 1.662388157586546, + "grad_norm": 0.5972540253259856, + "learning_rate": 3.6441861666082073e-07, + "loss": 0.2687, + "step": 35487 + }, + { + "epoch": 1.6624350025764745, + "grad_norm": 0.5805003595930414, + "learning_rate": 3.643200253830487e-07, + "loss": 0.2655, + "step": 35488 + }, + { + "epoch": 1.662481847566403, + "grad_norm": 0.5383566864029278, + "learning_rate": 3.642214463955779e-07, + "loss": 0.2509, + "step": 35489 + }, + { + "epoch": 1.662528692556331, + "grad_norm": 0.5720819392250696, + "learning_rate": 3.6412287969897516e-07, + "loss": 0.268, + "step": 35490 + }, + { + "epoch": 1.6625755375462594, + "grad_norm": 0.5996922813638239, + "learning_rate": 3.640243252938086e-07, + "loss": 0.2812, + "step": 35491 + }, + { + "epoch": 1.6626223825361879, + "grad_norm": 0.6393996194103279, + "learning_rate": 3.6392578318064503e-07, + "loss": 0.2784, + "step": 35492 + }, + { + "epoch": 1.6626692275261161, + "grad_norm": 0.5979866309306485, + "learning_rate": 3.6382725336005204e-07, + "loss": 0.2774, + "step": 35493 + }, + { + "epoch": 1.6627160725160444, + "grad_norm": 0.5574466263154332, + "learning_rate": 3.6372873583259665e-07, + "loss": 0.2593, + "step": 35494 + }, + { + "epoch": 1.6627629175059728, + "grad_norm": 0.5686590550262325, + "learning_rate": 3.6363023059884485e-07, + "loss": 0.2734, + "step": 35495 + }, + { + "epoch": 1.662809762495901, + "grad_norm": 0.6156781967460835, + "learning_rate": 3.635317376593647e-07, + "loss": 0.269, + "step": 35496 + }, + { + "epoch": 1.6628566074858293, + "grad_norm": 0.5739050074921811, + "learning_rate": 3.634332570147217e-07, + "loss": 0.2598, + "step": 35497 + }, + { + "epoch": 1.6629034524757578, + "grad_norm": 0.594320470918168, + "learning_rate": 3.633347886654837e-07, + "loss": 0.2782, + "step": 35498 + }, + { + "epoch": 1.662950297465686, + "grad_norm": 0.6391056928819617, + "learning_rate": 3.632363326122165e-07, + "loss": 0.2799, + "step": 35499 + }, + { + "epoch": 1.6629971424556143, + "grad_norm": 0.5701091120625847, + "learning_rate": 3.6313788885548816e-07, + "loss": 0.2533, + "step": 35500 + }, + { + "epoch": 1.6630439874455427, + "grad_norm": 0.592041070233731, + "learning_rate": 3.6303945739586343e-07, + "loss": 0.2725, + "step": 35501 + }, + { + "epoch": 1.6630908324354712, + "grad_norm": 0.605253763589187, + "learning_rate": 3.6294103823390937e-07, + "loss": 0.2649, + "step": 35502 + }, + { + "epoch": 1.6631376774253992, + "grad_norm": 0.6456588895495993, + "learning_rate": 3.6284263137019326e-07, + "loss": 0.2829, + "step": 35503 + }, + { + "epoch": 1.6631845224153277, + "grad_norm": 0.6038612236154751, + "learning_rate": 3.6274423680528e-07, + "loss": 0.2842, + "step": 35504 + }, + { + "epoch": 1.6632313674052561, + "grad_norm": 0.588885815464342, + "learning_rate": 3.6264585453973657e-07, + "loss": 0.2616, + "step": 35505 + }, + { + "epoch": 1.6632782123951844, + "grad_norm": 0.6005091741818319, + "learning_rate": 3.6254748457412947e-07, + "loss": 0.287, + "step": 35506 + }, + { + "epoch": 1.6633250573851126, + "grad_norm": 0.5765593663987593, + "learning_rate": 3.624491269090238e-07, + "loss": 0.2623, + "step": 35507 + }, + { + "epoch": 1.663371902375041, + "grad_norm": 0.6246220510383524, + "learning_rate": 3.6235078154498644e-07, + "loss": 0.262, + "step": 35508 + }, + { + "epoch": 1.6634187473649693, + "grad_norm": 0.6093166359880178, + "learning_rate": 3.622524484825826e-07, + "loss": 0.2751, + "step": 35509 + }, + { + "epoch": 1.6634655923548975, + "grad_norm": 0.5823421405005209, + "learning_rate": 3.6215412772237944e-07, + "loss": 0.2641, + "step": 35510 + }, + { + "epoch": 1.663512437344826, + "grad_norm": 0.6536577532932503, + "learning_rate": 3.620558192649415e-07, + "loss": 0.2789, + "step": 35511 + }, + { + "epoch": 1.6635592823347543, + "grad_norm": 0.5831409969535462, + "learning_rate": 3.619575231108355e-07, + "loss": 0.2716, + "step": 35512 + }, + { + "epoch": 1.6636061273246825, + "grad_norm": 0.571926798407001, + "learning_rate": 3.618592392606257e-07, + "loss": 0.26, + "step": 35513 + }, + { + "epoch": 1.663652972314611, + "grad_norm": 0.5808904427947077, + "learning_rate": 3.6176096771487896e-07, + "loss": 0.265, + "step": 35514 + }, + { + "epoch": 1.6636998173045394, + "grad_norm": 0.5694531429913187, + "learning_rate": 3.6166270847416004e-07, + "loss": 0.2732, + "step": 35515 + }, + { + "epoch": 1.6637466622944677, + "grad_norm": 0.5945467388333644, + "learning_rate": 3.6156446153903514e-07, + "loss": 0.2756, + "step": 35516 + }, + { + "epoch": 1.663793507284396, + "grad_norm": 0.6241578467557992, + "learning_rate": 3.6146622691006966e-07, + "loss": 0.2665, + "step": 35517 + }, + { + "epoch": 1.6638403522743244, + "grad_norm": 0.6107439007177471, + "learning_rate": 3.613680045878287e-07, + "loss": 0.271, + "step": 35518 + }, + { + "epoch": 1.6638871972642526, + "grad_norm": 0.6526968825148783, + "learning_rate": 3.612697945728766e-07, + "loss": 0.2881, + "step": 35519 + }, + { + "epoch": 1.6639340422541808, + "grad_norm": 0.6147698846284263, + "learning_rate": 3.6117159686577944e-07, + "loss": 0.2833, + "step": 35520 + }, + { + "epoch": 1.6639808872441093, + "grad_norm": 0.5779057318033659, + "learning_rate": 3.610734114671019e-07, + "loss": 0.256, + "step": 35521 + }, + { + "epoch": 1.6640277322340375, + "grad_norm": 0.6532729881936319, + "learning_rate": 3.6097523837740954e-07, + "loss": 0.2883, + "step": 35522 + }, + { + "epoch": 1.6640745772239658, + "grad_norm": 0.6261490767541463, + "learning_rate": 3.60877077597267e-07, + "loss": 0.2792, + "step": 35523 + }, + { + "epoch": 1.6641214222138943, + "grad_norm": 0.5892313995290297, + "learning_rate": 3.607789291272398e-07, + "loss": 0.2588, + "step": 35524 + }, + { + "epoch": 1.6641682672038227, + "grad_norm": 0.5972102355982861, + "learning_rate": 3.606807929678921e-07, + "loss": 0.2703, + "step": 35525 + }, + { + "epoch": 1.6642151121937507, + "grad_norm": 0.6025146426550042, + "learning_rate": 3.6058266911978836e-07, + "loss": 0.2698, + "step": 35526 + }, + { + "epoch": 1.6642619571836792, + "grad_norm": 0.6289898159849303, + "learning_rate": 3.604845575834934e-07, + "loss": 0.2759, + "step": 35527 + }, + { + "epoch": 1.6643088021736077, + "grad_norm": 0.5482012584990557, + "learning_rate": 3.603864583595723e-07, + "loss": 0.2526, + "step": 35528 + }, + { + "epoch": 1.664355647163536, + "grad_norm": 0.615035152143419, + "learning_rate": 3.6028837144858915e-07, + "loss": 0.2776, + "step": 35529 + }, + { + "epoch": 1.6644024921534641, + "grad_norm": 0.5882094866284258, + "learning_rate": 3.6019029685110925e-07, + "loss": 0.2858, + "step": 35530 + }, + { + "epoch": 1.6644493371433926, + "grad_norm": 0.5761083362598147, + "learning_rate": 3.6009223456769577e-07, + "loss": 0.2751, + "step": 35531 + }, + { + "epoch": 1.6644961821333208, + "grad_norm": 0.6097334596010258, + "learning_rate": 3.599941845989144e-07, + "loss": 0.2715, + "step": 35532 + }, + { + "epoch": 1.664543027123249, + "grad_norm": 0.569031426985242, + "learning_rate": 3.59896146945328e-07, + "loss": 0.2624, + "step": 35533 + }, + { + "epoch": 1.6645898721131775, + "grad_norm": 0.6402864360157121, + "learning_rate": 3.5979812160750113e-07, + "loss": 0.2796, + "step": 35534 + }, + { + "epoch": 1.6646367171031058, + "grad_norm": 0.6077397488443866, + "learning_rate": 3.597001085859983e-07, + "loss": 0.2733, + "step": 35535 + }, + { + "epoch": 1.664683562093034, + "grad_norm": 0.6325208396171202, + "learning_rate": 3.5960210788138356e-07, + "loss": 0.2745, + "step": 35536 + }, + { + "epoch": 1.6647304070829625, + "grad_norm": 0.6230540426191531, + "learning_rate": 3.5950411949422114e-07, + "loss": 0.2836, + "step": 35537 + }, + { + "epoch": 1.664777252072891, + "grad_norm": 0.5752441458685343, + "learning_rate": 3.5940614342507454e-07, + "loss": 0.2617, + "step": 35538 + }, + { + "epoch": 1.664824097062819, + "grad_norm": 0.6149025235362475, + "learning_rate": 3.5930817967450707e-07, + "loss": 0.2882, + "step": 35539 + }, + { + "epoch": 1.6648709420527474, + "grad_norm": 0.576177678626939, + "learning_rate": 3.5921022824308287e-07, + "loss": 0.2564, + "step": 35540 + }, + { + "epoch": 1.664917787042676, + "grad_norm": 0.6215393176943067, + "learning_rate": 3.591122891313656e-07, + "loss": 0.276, + "step": 35541 + }, + { + "epoch": 1.6649646320326041, + "grad_norm": 0.6151818749418081, + "learning_rate": 3.590143623399192e-07, + "loss": 0.2749, + "step": 35542 + }, + { + "epoch": 1.6650114770225324, + "grad_norm": 0.5695700502130384, + "learning_rate": 3.5891644786930776e-07, + "loss": 0.2443, + "step": 35543 + }, + { + "epoch": 1.6650583220124608, + "grad_norm": 0.5920416792153337, + "learning_rate": 3.5881854572009325e-07, + "loss": 0.2658, + "step": 35544 + }, + { + "epoch": 1.665105167002389, + "grad_norm": 0.5779524772654353, + "learning_rate": 3.587206558928405e-07, + "loss": 0.2743, + "step": 35545 + }, + { + "epoch": 1.6651520119923173, + "grad_norm": 0.5694686856676926, + "learning_rate": 3.586227783881116e-07, + "loss": 0.2619, + "step": 35546 + }, + { + "epoch": 1.6651988569822458, + "grad_norm": 0.662345941327054, + "learning_rate": 3.5852491320647025e-07, + "loss": 0.2845, + "step": 35547 + }, + { + "epoch": 1.665245701972174, + "grad_norm": 0.5547849538848308, + "learning_rate": 3.584270603484799e-07, + "loss": 0.2712, + "step": 35548 + }, + { + "epoch": 1.6652925469621023, + "grad_norm": 0.5750173505378431, + "learning_rate": 3.5832921981470396e-07, + "loss": 0.2632, + "step": 35549 + }, + { + "epoch": 1.6653393919520307, + "grad_norm": 0.6017074141919346, + "learning_rate": 3.582313916057048e-07, + "loss": 0.2593, + "step": 35550 + }, + { + "epoch": 1.6653862369419592, + "grad_norm": 0.593118531642266, + "learning_rate": 3.5813357572204555e-07, + "loss": 0.2677, + "step": 35551 + }, + { + "epoch": 1.6654330819318874, + "grad_norm": 0.6071277910470746, + "learning_rate": 3.580357721642896e-07, + "loss": 0.2583, + "step": 35552 + }, + { + "epoch": 1.6654799269218157, + "grad_norm": 0.6183211742168895, + "learning_rate": 3.5793798093299913e-07, + "loss": 0.2804, + "step": 35553 + }, + { + "epoch": 1.6655267719117441, + "grad_norm": 0.6333700480869578, + "learning_rate": 3.578402020287372e-07, + "loss": 0.2904, + "step": 35554 + }, + { + "epoch": 1.6655736169016724, + "grad_norm": 0.5741476542889771, + "learning_rate": 3.57742435452067e-07, + "loss": 0.2542, + "step": 35555 + }, + { + "epoch": 1.6656204618916006, + "grad_norm": 0.6183481214286597, + "learning_rate": 3.5764468120355e-07, + "loss": 0.2911, + "step": 35556 + }, + { + "epoch": 1.665667306881529, + "grad_norm": 0.5524708509835917, + "learning_rate": 3.575469392837494e-07, + "loss": 0.2509, + "step": 35557 + }, + { + "epoch": 1.6657141518714573, + "grad_norm": 0.5564341151753407, + "learning_rate": 3.5744920969322776e-07, + "loss": 0.2491, + "step": 35558 + }, + { + "epoch": 1.6657609968613856, + "grad_norm": 0.6459528451568556, + "learning_rate": 3.573514924325477e-07, + "loss": 0.2734, + "step": 35559 + }, + { + "epoch": 1.665807841851314, + "grad_norm": 0.6168566176173238, + "learning_rate": 3.572537875022711e-07, + "loss": 0.2653, + "step": 35560 + }, + { + "epoch": 1.6658546868412425, + "grad_norm": 0.590496885003506, + "learning_rate": 3.5715609490296065e-07, + "loss": 0.2781, + "step": 35561 + }, + { + "epoch": 1.6659015318311705, + "grad_norm": 0.6053208851668135, + "learning_rate": 3.57058414635178e-07, + "loss": 0.2683, + "step": 35562 + }, + { + "epoch": 1.665948376821099, + "grad_norm": 0.584625124445643, + "learning_rate": 3.5696074669948536e-07, + "loss": 0.2474, + "step": 35563 + }, + { + "epoch": 1.6659952218110274, + "grad_norm": 0.6049651399131494, + "learning_rate": 3.568630910964452e-07, + "loss": 0.2761, + "step": 35564 + }, + { + "epoch": 1.6660420668009557, + "grad_norm": 0.6192502886453387, + "learning_rate": 3.5676544782661886e-07, + "loss": 0.2888, + "step": 35565 + }, + { + "epoch": 1.666088911790884, + "grad_norm": 0.6130831489929085, + "learning_rate": 3.5666781689056964e-07, + "loss": 0.2627, + "step": 35566 + }, + { + "epoch": 1.6661357567808124, + "grad_norm": 0.5557487880897422, + "learning_rate": 3.5657019828885835e-07, + "loss": 0.2582, + "step": 35567 + }, + { + "epoch": 1.6661826017707406, + "grad_norm": 0.5907969961583835, + "learning_rate": 3.564725920220463e-07, + "loss": 0.2683, + "step": 35568 + }, + { + "epoch": 1.6662294467606689, + "grad_norm": 0.6154918221061838, + "learning_rate": 3.5637499809069556e-07, + "loss": 0.2699, + "step": 35569 + }, + { + "epoch": 1.6662762917505973, + "grad_norm": 0.6430568002702106, + "learning_rate": 3.5627741649536806e-07, + "loss": 0.2785, + "step": 35570 + }, + { + "epoch": 1.6663231367405256, + "grad_norm": 0.5803608167082216, + "learning_rate": 3.561798472366254e-07, + "loss": 0.2702, + "step": 35571 + }, + { + "epoch": 1.6663699817304538, + "grad_norm": 0.6120121384473404, + "learning_rate": 3.5608229031502907e-07, + "loss": 0.2747, + "step": 35572 + }, + { + "epoch": 1.6664168267203823, + "grad_norm": 0.6243331532970782, + "learning_rate": 3.5598474573113994e-07, + "loss": 0.2725, + "step": 35573 + }, + { + "epoch": 1.6664636717103107, + "grad_norm": 0.5814930184961047, + "learning_rate": 3.558872134855204e-07, + "loss": 0.2713, + "step": 35574 + }, + { + "epoch": 1.6665105167002388, + "grad_norm": 0.6115672532641627, + "learning_rate": 3.5578969357873027e-07, + "loss": 0.2783, + "step": 35575 + }, + { + "epoch": 1.6665573616901672, + "grad_norm": 0.6193786834431213, + "learning_rate": 3.5569218601133157e-07, + "loss": 0.2835, + "step": 35576 + }, + { + "epoch": 1.6666042066800957, + "grad_norm": 0.5969522926425674, + "learning_rate": 3.5559469078388557e-07, + "loss": 0.2771, + "step": 35577 + }, + { + "epoch": 1.666651051670024, + "grad_norm": 0.6179966886459293, + "learning_rate": 3.554972078969529e-07, + "loss": 0.2646, + "step": 35578 + }, + { + "epoch": 1.6666978966599522, + "grad_norm": 0.5987497042517627, + "learning_rate": 3.553997373510956e-07, + "loss": 0.27, + "step": 35579 + }, + { + "epoch": 1.6667447416498806, + "grad_norm": 0.6053946515312013, + "learning_rate": 3.553022791468735e-07, + "loss": 0.2715, + "step": 35580 + }, + { + "epoch": 1.6667915866398089, + "grad_norm": 0.5913719136138087, + "learning_rate": 3.5520483328484735e-07, + "loss": 0.2684, + "step": 35581 + }, + { + "epoch": 1.666838431629737, + "grad_norm": 0.6194416633257899, + "learning_rate": 3.5510739976557823e-07, + "loss": 0.2723, + "step": 35582 + }, + { + "epoch": 1.6668852766196656, + "grad_norm": 0.5831187039839729, + "learning_rate": 3.550099785896272e-07, + "loss": 0.2677, + "step": 35583 + }, + { + "epoch": 1.6669321216095938, + "grad_norm": 0.6286629747154524, + "learning_rate": 3.5491256975755423e-07, + "loss": 0.2748, + "step": 35584 + }, + { + "epoch": 1.666978966599522, + "grad_norm": 0.6040580998873841, + "learning_rate": 3.548151732699212e-07, + "loss": 0.2727, + "step": 35585 + }, + { + "epoch": 1.6670258115894505, + "grad_norm": 0.6683243226955334, + "learning_rate": 3.5471778912728683e-07, + "loss": 0.2917, + "step": 35586 + }, + { + "epoch": 1.667072656579379, + "grad_norm": 0.5934094118077973, + "learning_rate": 3.546204173302131e-07, + "loss": 0.2664, + "step": 35587 + }, + { + "epoch": 1.6671195015693072, + "grad_norm": 0.6077953196299646, + "learning_rate": 3.545230578792591e-07, + "loss": 0.2653, + "step": 35588 + }, + { + "epoch": 1.6671663465592355, + "grad_norm": 0.5801655779185663, + "learning_rate": 3.544257107749854e-07, + "loss": 0.2681, + "step": 35589 + }, + { + "epoch": 1.667213191549164, + "grad_norm": 0.6027152038514262, + "learning_rate": 3.5432837601795277e-07, + "loss": 0.2614, + "step": 35590 + }, + { + "epoch": 1.6672600365390922, + "grad_norm": 0.5733722971729518, + "learning_rate": 3.542310536087215e-07, + "loss": 0.2617, + "step": 35591 + }, + { + "epoch": 1.6673068815290204, + "grad_norm": 0.6034007149589833, + "learning_rate": 3.541337435478506e-07, + "loss": 0.2767, + "step": 35592 + }, + { + "epoch": 1.6673537265189489, + "grad_norm": 0.6502134595451922, + "learning_rate": 3.5403644583590053e-07, + "loss": 0.2868, + "step": 35593 + }, + { + "epoch": 1.667400571508877, + "grad_norm": 0.5818940271326754, + "learning_rate": 3.5393916047343215e-07, + "loss": 0.2579, + "step": 35594 + }, + { + "epoch": 1.6674474164988053, + "grad_norm": 0.5982923312697369, + "learning_rate": 3.538418874610039e-07, + "loss": 0.279, + "step": 35595 + }, + { + "epoch": 1.6674942614887338, + "grad_norm": 0.5842969331909175, + "learning_rate": 3.537446267991762e-07, + "loss": 0.28, + "step": 35596 + }, + { + "epoch": 1.6675411064786623, + "grad_norm": 0.5911047768356972, + "learning_rate": 3.5364737848850916e-07, + "loss": 0.2934, + "step": 35597 + }, + { + "epoch": 1.6675879514685903, + "grad_norm": 0.6071187186092494, + "learning_rate": 3.535501425295615e-07, + "loss": 0.2784, + "step": 35598 + }, + { + "epoch": 1.6676347964585188, + "grad_norm": 0.567493217771707, + "learning_rate": 3.534529189228933e-07, + "loss": 0.2487, + "step": 35599 + }, + { + "epoch": 1.6676816414484472, + "grad_norm": 0.6452635453089655, + "learning_rate": 3.5335570766906383e-07, + "loss": 0.2813, + "step": 35600 + }, + { + "epoch": 1.6677284864383755, + "grad_norm": 0.5862532212116226, + "learning_rate": 3.5325850876863355e-07, + "loss": 0.2682, + "step": 35601 + }, + { + "epoch": 1.6677753314283037, + "grad_norm": 0.620691849923157, + "learning_rate": 3.5316132222216054e-07, + "loss": 0.2766, + "step": 35602 + }, + { + "epoch": 1.6678221764182322, + "grad_norm": 0.6248193787224321, + "learning_rate": 3.530641480302041e-07, + "loss": 0.2865, + "step": 35603 + }, + { + "epoch": 1.6678690214081604, + "grad_norm": 0.6450734533769444, + "learning_rate": 3.5296698619332494e-07, + "loss": 0.2777, + "step": 35604 + }, + { + "epoch": 1.6679158663980886, + "grad_norm": 0.5775899178812035, + "learning_rate": 3.5286983671208e-07, + "loss": 0.2657, + "step": 35605 + }, + { + "epoch": 1.667962711388017, + "grad_norm": 0.6492914951026615, + "learning_rate": 3.527726995870301e-07, + "loss": 0.2674, + "step": 35606 + }, + { + "epoch": 1.6680095563779453, + "grad_norm": 0.6105077508936726, + "learning_rate": 3.5267557481873325e-07, + "loss": 0.2703, + "step": 35607 + }, + { + "epoch": 1.6680564013678736, + "grad_norm": 0.6394632184920849, + "learning_rate": 3.525784624077494e-07, + "loss": 0.3031, + "step": 35608 + }, + { + "epoch": 1.668103246357802, + "grad_norm": 0.593059519772336, + "learning_rate": 3.524813623546361e-07, + "loss": 0.2634, + "step": 35609 + }, + { + "epoch": 1.6681500913477305, + "grad_norm": 0.5987752611030298, + "learning_rate": 3.5238427465995347e-07, + "loss": 0.2631, + "step": 35610 + }, + { + "epoch": 1.6681969363376585, + "grad_norm": 0.5888450197119888, + "learning_rate": 3.522871993242591e-07, + "loss": 0.259, + "step": 35611 + }, + { + "epoch": 1.668243781327587, + "grad_norm": 0.6009524787621175, + "learning_rate": 3.52190136348112e-07, + "loss": 0.2755, + "step": 35612 + }, + { + "epoch": 1.6682906263175155, + "grad_norm": 0.6244530418840355, + "learning_rate": 3.520930857320706e-07, + "loss": 0.2859, + "step": 35613 + }, + { + "epoch": 1.6683374713074437, + "grad_norm": 0.6077476830348257, + "learning_rate": 3.51996047476694e-07, + "loss": 0.2768, + "step": 35614 + }, + { + "epoch": 1.668384316297372, + "grad_norm": 0.5604192946897972, + "learning_rate": 3.5189902158254073e-07, + "loss": 0.2496, + "step": 35615 + }, + { + "epoch": 1.6684311612873004, + "grad_norm": 0.5934329854206547, + "learning_rate": 3.518020080501686e-07, + "loss": 0.2526, + "step": 35616 + }, + { + "epoch": 1.6684780062772286, + "grad_norm": 0.6127478175588523, + "learning_rate": 3.517050068801353e-07, + "loss": 0.2654, + "step": 35617 + }, + { + "epoch": 1.6685248512671569, + "grad_norm": 0.6114975434652963, + "learning_rate": 3.5160801807299997e-07, + "loss": 0.2706, + "step": 35618 + }, + { + "epoch": 1.6685716962570853, + "grad_norm": 0.6086100958333406, + "learning_rate": 3.5151104162932046e-07, + "loss": 0.2804, + "step": 35619 + }, + { + "epoch": 1.6686185412470136, + "grad_norm": 0.5763542384742505, + "learning_rate": 3.5141407754965494e-07, + "loss": 0.283, + "step": 35620 + }, + { + "epoch": 1.6686653862369418, + "grad_norm": 0.5887874768130659, + "learning_rate": 3.5131712583456187e-07, + "loss": 0.2678, + "step": 35621 + }, + { + "epoch": 1.6687122312268703, + "grad_norm": 0.574949132902316, + "learning_rate": 3.5122018648459796e-07, + "loss": 0.2619, + "step": 35622 + }, + { + "epoch": 1.6687590762167988, + "grad_norm": 0.598600293410417, + "learning_rate": 3.511232595003228e-07, + "loss": 0.2784, + "step": 35623 + }, + { + "epoch": 1.668805921206727, + "grad_norm": 0.5431976930676137, + "learning_rate": 3.510263448822923e-07, + "loss": 0.2546, + "step": 35624 + }, + { + "epoch": 1.6688527661966552, + "grad_norm": 0.5590591732307628, + "learning_rate": 3.50929442631065e-07, + "loss": 0.2632, + "step": 35625 + }, + { + "epoch": 1.6688996111865837, + "grad_norm": 0.6039594389001618, + "learning_rate": 3.508325527471987e-07, + "loss": 0.2886, + "step": 35626 + }, + { + "epoch": 1.668946456176512, + "grad_norm": 0.5645399350749923, + "learning_rate": 3.50735675231251e-07, + "loss": 0.237, + "step": 35627 + }, + { + "epoch": 1.6689933011664402, + "grad_norm": 0.5912076154221936, + "learning_rate": 3.506388100837796e-07, + "loss": 0.2743, + "step": 35628 + }, + { + "epoch": 1.6690401461563686, + "grad_norm": 0.5852152003583491, + "learning_rate": 3.50541957305342e-07, + "loss": 0.2567, + "step": 35629 + }, + { + "epoch": 1.6690869911462969, + "grad_norm": 0.5730988872819339, + "learning_rate": 3.5044511689649425e-07, + "loss": 0.2815, + "step": 35630 + }, + { + "epoch": 1.6691338361362251, + "grad_norm": 0.5929924657097914, + "learning_rate": 3.503482888577947e-07, + "loss": 0.2578, + "step": 35631 + }, + { + "epoch": 1.6691806811261536, + "grad_norm": 0.6093088067473852, + "learning_rate": 3.5025147318980076e-07, + "loss": 0.2683, + "step": 35632 + }, + { + "epoch": 1.669227526116082, + "grad_norm": 0.6281743114553553, + "learning_rate": 3.5015466989306886e-07, + "loss": 0.2919, + "step": 35633 + }, + { + "epoch": 1.66927437110601, + "grad_norm": 0.5690963224008279, + "learning_rate": 3.500578789681572e-07, + "loss": 0.2594, + "step": 35634 + }, + { + "epoch": 1.6693212160959385, + "grad_norm": 0.6499467474386526, + "learning_rate": 3.499611004156217e-07, + "loss": 0.2646, + "step": 35635 + }, + { + "epoch": 1.669368061085867, + "grad_norm": 0.6269518032950075, + "learning_rate": 3.4986433423602e-07, + "loss": 0.2734, + "step": 35636 + }, + { + "epoch": 1.6694149060757952, + "grad_norm": 0.582277108655193, + "learning_rate": 3.49767580429908e-07, + "loss": 0.2558, + "step": 35637 + }, + { + "epoch": 1.6694617510657235, + "grad_norm": 0.6228748555485407, + "learning_rate": 3.496708389978434e-07, + "loss": 0.2738, + "step": 35638 + }, + { + "epoch": 1.669508596055652, + "grad_norm": 0.653140168948152, + "learning_rate": 3.495741099403824e-07, + "loss": 0.2929, + "step": 35639 + }, + { + "epoch": 1.6695554410455802, + "grad_norm": 0.6247042979258073, + "learning_rate": 3.494773932580828e-07, + "loss": 0.2959, + "step": 35640 + }, + { + "epoch": 1.6696022860355084, + "grad_norm": 0.5880851244944942, + "learning_rate": 3.4938068895149946e-07, + "loss": 0.267, + "step": 35641 + }, + { + "epoch": 1.6696491310254369, + "grad_norm": 0.6698018624342081, + "learning_rate": 3.4928399702118976e-07, + "loss": 0.3005, + "step": 35642 + }, + { + "epoch": 1.6696959760153651, + "grad_norm": 0.6319796912623373, + "learning_rate": 3.491873174677107e-07, + "loss": 0.2859, + "step": 35643 + }, + { + "epoch": 1.6697428210052934, + "grad_norm": 0.6189152188111471, + "learning_rate": 3.490906502916175e-07, + "loss": 0.2822, + "step": 35644 + }, + { + "epoch": 1.6697896659952218, + "grad_norm": 0.546419541385114, + "learning_rate": 3.489939954934668e-07, + "loss": 0.2583, + "step": 35645 + }, + { + "epoch": 1.6698365109851503, + "grad_norm": 0.6356183768769671, + "learning_rate": 3.48897353073816e-07, + "loss": 0.2825, + "step": 35646 + }, + { + "epoch": 1.6698833559750783, + "grad_norm": 0.6201564976022025, + "learning_rate": 3.488007230332194e-07, + "loss": 0.2857, + "step": 35647 + }, + { + "epoch": 1.6699302009650068, + "grad_norm": 0.5772474308591246, + "learning_rate": 3.48704105372234e-07, + "loss": 0.2645, + "step": 35648 + }, + { + "epoch": 1.6699770459549352, + "grad_norm": 0.5949397752783853, + "learning_rate": 3.4860750009141575e-07, + "loss": 0.2814, + "step": 35649 + }, + { + "epoch": 1.6700238909448635, + "grad_norm": 0.5751154474152379, + "learning_rate": 3.485109071913212e-07, + "loss": 0.2644, + "step": 35650 + }, + { + "epoch": 1.6700707359347917, + "grad_norm": 0.6122167568994964, + "learning_rate": 3.4841432667250515e-07, + "loss": 0.2627, + "step": 35651 + }, + { + "epoch": 1.6701175809247202, + "grad_norm": 0.5741952054929004, + "learning_rate": 3.483177585355241e-07, + "loss": 0.2625, + "step": 35652 + }, + { + "epoch": 1.6701644259146484, + "grad_norm": 0.6422589364082355, + "learning_rate": 3.482212027809334e-07, + "loss": 0.2895, + "step": 35653 + }, + { + "epoch": 1.6702112709045767, + "grad_norm": 0.6231329514347103, + "learning_rate": 3.481246594092885e-07, + "loss": 0.2741, + "step": 35654 + }, + { + "epoch": 1.6702581158945051, + "grad_norm": 0.6118272743297252, + "learning_rate": 3.4802812842114564e-07, + "loss": 0.2673, + "step": 35655 + }, + { + "epoch": 1.6703049608844334, + "grad_norm": 0.5840389000740692, + "learning_rate": 3.4793160981705987e-07, + "loss": 0.2717, + "step": 35656 + }, + { + "epoch": 1.6703518058743616, + "grad_norm": 0.6003923213930458, + "learning_rate": 3.4783510359758746e-07, + "loss": 0.2652, + "step": 35657 + }, + { + "epoch": 1.67039865086429, + "grad_norm": 0.6476832505060073, + "learning_rate": 3.477386097632829e-07, + "loss": 0.285, + "step": 35658 + }, + { + "epoch": 1.6704454958542185, + "grad_norm": 0.6257744592984854, + "learning_rate": 3.4764212831470106e-07, + "loss": 0.2704, + "step": 35659 + }, + { + "epoch": 1.6704923408441468, + "grad_norm": 0.577064398799492, + "learning_rate": 3.475456592523982e-07, + "loss": 0.2671, + "step": 35660 + }, + { + "epoch": 1.670539185834075, + "grad_norm": 0.6033815121515899, + "learning_rate": 3.474492025769288e-07, + "loss": 0.2762, + "step": 35661 + }, + { + "epoch": 1.6705860308240035, + "grad_norm": 0.6071814755630202, + "learning_rate": 3.473527582888481e-07, + "loss": 0.2593, + "step": 35662 + }, + { + "epoch": 1.6706328758139317, + "grad_norm": 0.5807766792520492, + "learning_rate": 3.472563263887116e-07, + "loss": 0.2681, + "step": 35663 + }, + { + "epoch": 1.67067972080386, + "grad_norm": 0.5829718595443307, + "learning_rate": 3.4715990687707395e-07, + "loss": 0.2601, + "step": 35664 + }, + { + "epoch": 1.6707265657937884, + "grad_norm": 0.5737814365499159, + "learning_rate": 3.470634997544903e-07, + "loss": 0.2701, + "step": 35665 + }, + { + "epoch": 1.6707734107837167, + "grad_norm": 0.612332184637012, + "learning_rate": 3.4696710502151455e-07, + "loss": 0.2713, + "step": 35666 + }, + { + "epoch": 1.670820255773645, + "grad_norm": 0.5772655257449429, + "learning_rate": 3.468707226787019e-07, + "loss": 0.266, + "step": 35667 + }, + { + "epoch": 1.6708671007635734, + "grad_norm": 0.6288051502416407, + "learning_rate": 3.467743527266068e-07, + "loss": 0.2845, + "step": 35668 + }, + { + "epoch": 1.6709139457535018, + "grad_norm": 0.6000921391963773, + "learning_rate": 3.466779951657845e-07, + "loss": 0.2575, + "step": 35669 + }, + { + "epoch": 1.6709607907434298, + "grad_norm": 0.6549574611709786, + "learning_rate": 3.4658164999678945e-07, + "loss": 0.2661, + "step": 35670 + }, + { + "epoch": 1.6710076357333583, + "grad_norm": 0.6037902097314463, + "learning_rate": 3.4648531722017513e-07, + "loss": 0.2669, + "step": 35671 + }, + { + "epoch": 1.6710544807232868, + "grad_norm": 0.6279345450912326, + "learning_rate": 3.463889968364972e-07, + "loss": 0.2738, + "step": 35672 + }, + { + "epoch": 1.671101325713215, + "grad_norm": 0.5645826577327413, + "learning_rate": 3.462926888463089e-07, + "loss": 0.2539, + "step": 35673 + }, + { + "epoch": 1.6711481707031433, + "grad_norm": 0.6121896894432639, + "learning_rate": 3.461963932501647e-07, + "loss": 0.2695, + "step": 35674 + }, + { + "epoch": 1.6711950156930717, + "grad_norm": 0.5883550998704712, + "learning_rate": 3.461001100486189e-07, + "loss": 0.2682, + "step": 35675 + }, + { + "epoch": 1.671241860683, + "grad_norm": 0.6188815226341997, + "learning_rate": 3.4600383924222575e-07, + "loss": 0.2837, + "step": 35676 + }, + { + "epoch": 1.6712887056729282, + "grad_norm": 0.5891161820362327, + "learning_rate": 3.4590758083153956e-07, + "loss": 0.2621, + "step": 35677 + }, + { + "epoch": 1.6713355506628567, + "grad_norm": 0.6284751788049957, + "learning_rate": 3.458113348171138e-07, + "loss": 0.2764, + "step": 35678 + }, + { + "epoch": 1.671382395652785, + "grad_norm": 0.6300502923469673, + "learning_rate": 3.457151011995019e-07, + "loss": 0.27, + "step": 35679 + }, + { + "epoch": 1.6714292406427131, + "grad_norm": 0.6484530591373916, + "learning_rate": 3.4561887997925813e-07, + "loss": 0.2664, + "step": 35680 + }, + { + "epoch": 1.6714760856326416, + "grad_norm": 0.5421606444281526, + "learning_rate": 3.4552267115693624e-07, + "loss": 0.2711, + "step": 35681 + }, + { + "epoch": 1.67152293062257, + "grad_norm": 0.5551744119264831, + "learning_rate": 3.454264747330899e-07, + "loss": 0.2508, + "step": 35682 + }, + { + "epoch": 1.671569775612498, + "grad_norm": 0.5863087310268648, + "learning_rate": 3.453302907082731e-07, + "loss": 0.255, + "step": 35683 + }, + { + "epoch": 1.6716166206024266, + "grad_norm": 0.6107595015811206, + "learning_rate": 3.4523411908303855e-07, + "loss": 0.2763, + "step": 35684 + }, + { + "epoch": 1.671663465592355, + "grad_norm": 0.5953096079963826, + "learning_rate": 3.4513795985794044e-07, + "loss": 0.2601, + "step": 35685 + }, + { + "epoch": 1.6717103105822833, + "grad_norm": 0.6142720661524396, + "learning_rate": 3.4504181303353116e-07, + "loss": 0.2485, + "step": 35686 + }, + { + "epoch": 1.6717571555722115, + "grad_norm": 0.5905953653918071, + "learning_rate": 3.449456786103647e-07, + "loss": 0.2723, + "step": 35687 + }, + { + "epoch": 1.67180400056214, + "grad_norm": 0.6863218594872752, + "learning_rate": 3.448495565889942e-07, + "loss": 0.3056, + "step": 35688 + }, + { + "epoch": 1.6718508455520682, + "grad_norm": 0.591875428541445, + "learning_rate": 3.4475344696997345e-07, + "loss": 0.2781, + "step": 35689 + }, + { + "epoch": 1.6718976905419964, + "grad_norm": 0.6267363706594433, + "learning_rate": 3.446573497538544e-07, + "loss": 0.2893, + "step": 35690 + }, + { + "epoch": 1.671944535531925, + "grad_norm": 0.579333393383056, + "learning_rate": 3.445612649411903e-07, + "loss": 0.2672, + "step": 35691 + }, + { + "epoch": 1.6719913805218531, + "grad_norm": 0.6133682937682001, + "learning_rate": 3.444651925325351e-07, + "loss": 0.2807, + "step": 35692 + }, + { + "epoch": 1.6720382255117814, + "grad_norm": 0.5799465701121046, + "learning_rate": 3.4436913252844065e-07, + "loss": 0.2724, + "step": 35693 + }, + { + "epoch": 1.6720850705017098, + "grad_norm": 0.5798036243903487, + "learning_rate": 3.442730849294598e-07, + "loss": 0.2631, + "step": 35694 + }, + { + "epoch": 1.6721319154916383, + "grad_norm": 0.6072788812072382, + "learning_rate": 3.44177049736146e-07, + "loss": 0.2765, + "step": 35695 + }, + { + "epoch": 1.6721787604815666, + "grad_norm": 0.5530062523774661, + "learning_rate": 3.4408102694905104e-07, + "loss": 0.2579, + "step": 35696 + }, + { + "epoch": 1.6722256054714948, + "grad_norm": 0.6149271361892575, + "learning_rate": 3.439850165687278e-07, + "loss": 0.2599, + "step": 35697 + }, + { + "epoch": 1.6722724504614233, + "grad_norm": 0.6198464235637006, + "learning_rate": 3.438890185957289e-07, + "loss": 0.2738, + "step": 35698 + }, + { + "epoch": 1.6723192954513515, + "grad_norm": 0.6092007896816923, + "learning_rate": 3.437930330306072e-07, + "loss": 0.2639, + "step": 35699 + }, + { + "epoch": 1.6723661404412797, + "grad_norm": 0.5545172280323474, + "learning_rate": 3.4369705987391424e-07, + "loss": 0.2532, + "step": 35700 + }, + { + "epoch": 1.6724129854312082, + "grad_norm": 0.5899490501401125, + "learning_rate": 3.436010991262034e-07, + "loss": 0.2702, + "step": 35701 + }, + { + "epoch": 1.6724598304211364, + "grad_norm": 0.6235290520116304, + "learning_rate": 3.435051507880255e-07, + "loss": 0.2767, + "step": 35702 + }, + { + "epoch": 1.6725066754110647, + "grad_norm": 0.5625047231867462, + "learning_rate": 3.4340921485993356e-07, + "loss": 0.2665, + "step": 35703 + }, + { + "epoch": 1.6725535204009931, + "grad_norm": 0.5727390225562584, + "learning_rate": 3.4331329134247945e-07, + "loss": 0.2702, + "step": 35704 + }, + { + "epoch": 1.6726003653909216, + "grad_norm": 0.6088734124359265, + "learning_rate": 3.432173802362154e-07, + "loss": 0.265, + "step": 35705 + }, + { + "epoch": 1.6726472103808496, + "grad_norm": 0.5771054741530918, + "learning_rate": 3.4312148154169387e-07, + "loss": 0.2581, + "step": 35706 + }, + { + "epoch": 1.672694055370778, + "grad_norm": 0.5644638884316355, + "learning_rate": 3.4302559525946576e-07, + "loss": 0.2594, + "step": 35707 + }, + { + "epoch": 1.6727409003607066, + "grad_norm": 0.5731174152147265, + "learning_rate": 3.4292972139008314e-07, + "loss": 0.249, + "step": 35708 + }, + { + "epoch": 1.6727877453506348, + "grad_norm": 0.6186733637570404, + "learning_rate": 3.428338599340975e-07, + "loss": 0.2983, + "step": 35709 + }, + { + "epoch": 1.672834590340563, + "grad_norm": 0.5899464551342734, + "learning_rate": 3.4273801089206084e-07, + "loss": 0.2531, + "step": 35710 + }, + { + "epoch": 1.6728814353304915, + "grad_norm": 0.6253228138899192, + "learning_rate": 3.4264217426452474e-07, + "loss": 0.2721, + "step": 35711 + }, + { + "epoch": 1.6729282803204197, + "grad_norm": 0.6154393582997921, + "learning_rate": 3.4254635005204123e-07, + "loss": 0.28, + "step": 35712 + }, + { + "epoch": 1.672975125310348, + "grad_norm": 0.6097859504188367, + "learning_rate": 3.4245053825516073e-07, + "loss": 0.2709, + "step": 35713 + }, + { + "epoch": 1.6730219703002764, + "grad_norm": 0.6400459691407446, + "learning_rate": 3.4235473887443555e-07, + "loss": 0.2902, + "step": 35714 + }, + { + "epoch": 1.6730688152902047, + "grad_norm": 0.6106426638489074, + "learning_rate": 3.422589519104161e-07, + "loss": 0.2714, + "step": 35715 + }, + { + "epoch": 1.673115660280133, + "grad_norm": 0.6098239741667411, + "learning_rate": 3.421631773636538e-07, + "loss": 0.29, + "step": 35716 + }, + { + "epoch": 1.6731625052700614, + "grad_norm": 0.6407413645464073, + "learning_rate": 3.420674152347003e-07, + "loss": 0.2784, + "step": 35717 + }, + { + "epoch": 1.6732093502599898, + "grad_norm": 0.5786087584471835, + "learning_rate": 3.419716655241065e-07, + "loss": 0.2751, + "step": 35718 + }, + { + "epoch": 1.6732561952499179, + "grad_norm": 0.6125977230157602, + "learning_rate": 3.418759282324238e-07, + "loss": 0.2899, + "step": 35719 + }, + { + "epoch": 1.6733030402398463, + "grad_norm": 0.6122687116676322, + "learning_rate": 3.4178020336020275e-07, + "loss": 0.2636, + "step": 35720 + }, + { + "epoch": 1.6733498852297748, + "grad_norm": 0.5910696965082557, + "learning_rate": 3.416844909079936e-07, + "loss": 0.2599, + "step": 35721 + }, + { + "epoch": 1.673396730219703, + "grad_norm": 0.6102526950921063, + "learning_rate": 3.4158879087634795e-07, + "loss": 0.2791, + "step": 35722 + }, + { + "epoch": 1.6734435752096313, + "grad_norm": 0.6118004264019318, + "learning_rate": 3.4149310326581615e-07, + "loss": 0.269, + "step": 35723 + }, + { + "epoch": 1.6734904201995597, + "grad_norm": 0.654509085167267, + "learning_rate": 3.413974280769489e-07, + "loss": 0.2915, + "step": 35724 + }, + { + "epoch": 1.673537265189488, + "grad_norm": 0.6000731077181312, + "learning_rate": 3.413017653102976e-07, + "loss": 0.2734, + "step": 35725 + }, + { + "epoch": 1.6735841101794162, + "grad_norm": 0.5793034687721771, + "learning_rate": 3.4120611496641145e-07, + "loss": 0.2665, + "step": 35726 + }, + { + "epoch": 1.6736309551693447, + "grad_norm": 0.5643911415296644, + "learning_rate": 3.4111047704584206e-07, + "loss": 0.2584, + "step": 35727 + }, + { + "epoch": 1.673677800159273, + "grad_norm": 0.5753377805296531, + "learning_rate": 3.410148515491388e-07, + "loss": 0.2708, + "step": 35728 + }, + { + "epoch": 1.6737246451492012, + "grad_norm": 0.5364938789702841, + "learning_rate": 3.409192384768523e-07, + "loss": 0.255, + "step": 35729 + }, + { + "epoch": 1.6737714901391296, + "grad_norm": 0.6332830129763374, + "learning_rate": 3.4082363782953294e-07, + "loss": 0.2769, + "step": 35730 + }, + { + "epoch": 1.673818335129058, + "grad_norm": 0.5935019515000475, + "learning_rate": 3.4072804960773145e-07, + "loss": 0.2597, + "step": 35731 + }, + { + "epoch": 1.6738651801189863, + "grad_norm": 0.5472247969968035, + "learning_rate": 3.406324738119968e-07, + "loss": 0.2608, + "step": 35732 + }, + { + "epoch": 1.6739120251089146, + "grad_norm": 0.5838819490326731, + "learning_rate": 3.405369104428796e-07, + "loss": 0.2502, + "step": 35733 + }, + { + "epoch": 1.673958870098843, + "grad_norm": 0.6132039602464152, + "learning_rate": 3.404413595009301e-07, + "loss": 0.2871, + "step": 35734 + }, + { + "epoch": 1.6740057150887713, + "grad_norm": 0.5752660813298124, + "learning_rate": 3.403458209866972e-07, + "loss": 0.2693, + "step": 35735 + }, + { + "epoch": 1.6740525600786995, + "grad_norm": 0.6270060992401272, + "learning_rate": 3.4025029490073166e-07, + "loss": 0.2869, + "step": 35736 + }, + { + "epoch": 1.674099405068628, + "grad_norm": 0.6024650088297397, + "learning_rate": 3.4015478124358234e-07, + "loss": 0.271, + "step": 35737 + }, + { + "epoch": 1.6741462500585562, + "grad_norm": 0.5686390331702175, + "learning_rate": 3.4005928001580037e-07, + "loss": 0.2619, + "step": 35738 + }, + { + "epoch": 1.6741930950484845, + "grad_norm": 0.6534811112694657, + "learning_rate": 3.399637912179335e-07, + "loss": 0.2789, + "step": 35739 + }, + { + "epoch": 1.674239940038413, + "grad_norm": 0.6901701870262923, + "learning_rate": 3.3986831485053216e-07, + "loss": 0.2845, + "step": 35740 + }, + { + "epoch": 1.6742867850283414, + "grad_norm": 0.5573348502952183, + "learning_rate": 3.3977285091414654e-07, + "loss": 0.2478, + "step": 35741 + }, + { + "epoch": 1.6743336300182694, + "grad_norm": 0.6060769325490851, + "learning_rate": 3.396773994093244e-07, + "loss": 0.2562, + "step": 35742 + }, + { + "epoch": 1.6743804750081979, + "grad_norm": 0.583909884637559, + "learning_rate": 3.39581960336616e-07, + "loss": 0.2689, + "step": 35743 + }, + { + "epoch": 1.6744273199981263, + "grad_norm": 0.6391180694146172, + "learning_rate": 3.3948653369657083e-07, + "loss": 0.2826, + "step": 35744 + }, + { + "epoch": 1.6744741649880546, + "grad_norm": 0.5757771975370369, + "learning_rate": 3.393911194897373e-07, + "loss": 0.2537, + "step": 35745 + }, + { + "epoch": 1.6745210099779828, + "grad_norm": 0.6018846874564097, + "learning_rate": 3.3929571771666447e-07, + "loss": 0.258, + "step": 35746 + }, + { + "epoch": 1.6745678549679113, + "grad_norm": 0.6481394802661745, + "learning_rate": 3.3920032837790215e-07, + "loss": 0.286, + "step": 35747 + }, + { + "epoch": 1.6746146999578395, + "grad_norm": 0.6049923121946738, + "learning_rate": 3.3910495147399913e-07, + "loss": 0.2696, + "step": 35748 + }, + { + "epoch": 1.6746615449477678, + "grad_norm": 0.5882683407313881, + "learning_rate": 3.390095870055035e-07, + "loss": 0.2678, + "step": 35749 + }, + { + "epoch": 1.6747083899376962, + "grad_norm": 0.6147778488685886, + "learning_rate": 3.3891423497296516e-07, + "loss": 0.2717, + "step": 35750 + }, + { + "epoch": 1.6747552349276245, + "grad_norm": 0.6033413257213206, + "learning_rate": 3.3881889537693144e-07, + "loss": 0.2789, + "step": 35751 + }, + { + "epoch": 1.6748020799175527, + "grad_norm": 0.6106090133389213, + "learning_rate": 3.387235682179521e-07, + "loss": 0.2828, + "step": 35752 + }, + { + "epoch": 1.6748489249074812, + "grad_norm": 0.60009336863994, + "learning_rate": 3.3862825349657515e-07, + "loss": 0.2734, + "step": 35753 + }, + { + "epoch": 1.6748957698974096, + "grad_norm": 0.6044168864603985, + "learning_rate": 3.385329512133495e-07, + "loss": 0.2745, + "step": 35754 + }, + { + "epoch": 1.6749426148873376, + "grad_norm": 0.602989764802025, + "learning_rate": 3.3843766136882397e-07, + "loss": 0.2707, + "step": 35755 + }, + { + "epoch": 1.674989459877266, + "grad_norm": 0.5452230030383496, + "learning_rate": 3.3834238396354637e-07, + "loss": 0.2624, + "step": 35756 + }, + { + "epoch": 1.6750363048671946, + "grad_norm": 0.6171347895487299, + "learning_rate": 3.382471189980646e-07, + "loss": 0.2717, + "step": 35757 + }, + { + "epoch": 1.6750831498571228, + "grad_norm": 0.6244806583895877, + "learning_rate": 3.381518664729272e-07, + "loss": 0.2559, + "step": 35758 + }, + { + "epoch": 1.675129994847051, + "grad_norm": 0.5966463724291075, + "learning_rate": 3.3805662638868255e-07, + "loss": 0.2752, + "step": 35759 + }, + { + "epoch": 1.6751768398369795, + "grad_norm": 0.6055841283238376, + "learning_rate": 3.379613987458785e-07, + "loss": 0.2679, + "step": 35760 + }, + { + "epoch": 1.6752236848269078, + "grad_norm": 0.6067652776652652, + "learning_rate": 3.378661835450639e-07, + "loss": 0.2647, + "step": 35761 + }, + { + "epoch": 1.675270529816836, + "grad_norm": 0.6188467331244998, + "learning_rate": 3.3777098078678545e-07, + "loss": 0.26, + "step": 35762 + }, + { + "epoch": 1.6753173748067645, + "grad_norm": 0.5946177379278123, + "learning_rate": 3.3767579047159186e-07, + "loss": 0.2783, + "step": 35763 + }, + { + "epoch": 1.6753642197966927, + "grad_norm": 0.5868277998976864, + "learning_rate": 3.375806126000303e-07, + "loss": 0.2771, + "step": 35764 + }, + { + "epoch": 1.675411064786621, + "grad_norm": 0.5809851266744273, + "learning_rate": 3.374854471726488e-07, + "loss": 0.2596, + "step": 35765 + }, + { + "epoch": 1.6754579097765494, + "grad_norm": 0.6051154689088721, + "learning_rate": 3.373902941899948e-07, + "loss": 0.2564, + "step": 35766 + }, + { + "epoch": 1.6755047547664779, + "grad_norm": 0.5729833073942245, + "learning_rate": 3.3729515365261615e-07, + "loss": 0.2654, + "step": 35767 + }, + { + "epoch": 1.675551599756406, + "grad_norm": 0.5545448013424775, + "learning_rate": 3.372000255610611e-07, + "loss": 0.2503, + "step": 35768 + }, + { + "epoch": 1.6755984447463343, + "grad_norm": 0.6535831724654111, + "learning_rate": 3.371049099158763e-07, + "loss": 0.2784, + "step": 35769 + }, + { + "epoch": 1.6756452897362628, + "grad_norm": 0.5917051221078341, + "learning_rate": 3.3700980671760833e-07, + "loss": 0.2697, + "step": 35770 + }, + { + "epoch": 1.675692134726191, + "grad_norm": 0.6125720516976787, + "learning_rate": 3.3691471596680544e-07, + "loss": 0.2718, + "step": 35771 + }, + { + "epoch": 1.6757389797161193, + "grad_norm": 0.5748425557525203, + "learning_rate": 3.3681963766401457e-07, + "loss": 0.2574, + "step": 35772 + }, + { + "epoch": 1.6757858247060478, + "grad_norm": 0.5943631351339093, + "learning_rate": 3.3672457180978306e-07, + "loss": 0.2653, + "step": 35773 + }, + { + "epoch": 1.675832669695976, + "grad_norm": 0.6031687174537642, + "learning_rate": 3.366295184046586e-07, + "loss": 0.2589, + "step": 35774 + }, + { + "epoch": 1.6758795146859042, + "grad_norm": 0.6180124176201367, + "learning_rate": 3.3653447744918686e-07, + "loss": 0.2828, + "step": 35775 + }, + { + "epoch": 1.6759263596758327, + "grad_norm": 0.578091726965997, + "learning_rate": 3.364394489439157e-07, + "loss": 0.2632, + "step": 35776 + }, + { + "epoch": 1.6759732046657612, + "grad_norm": 0.6042152823667967, + "learning_rate": 3.363444328893917e-07, + "loss": 0.2627, + "step": 35777 + }, + { + "epoch": 1.6760200496556892, + "grad_norm": 0.6165508265333729, + "learning_rate": 3.3624942928616124e-07, + "loss": 0.2753, + "step": 35778 + }, + { + "epoch": 1.6760668946456176, + "grad_norm": 0.5992289361036417, + "learning_rate": 3.3615443813477174e-07, + "loss": 0.2757, + "step": 35779 + }, + { + "epoch": 1.676113739635546, + "grad_norm": 0.6015449431726446, + "learning_rate": 3.3605945943576997e-07, + "loss": 0.2525, + "step": 35780 + }, + { + "epoch": 1.6761605846254743, + "grad_norm": 0.5913808456282721, + "learning_rate": 3.359644931897016e-07, + "loss": 0.2402, + "step": 35781 + }, + { + "epoch": 1.6762074296154026, + "grad_norm": 0.6194601967563087, + "learning_rate": 3.358695393971137e-07, + "loss": 0.2807, + "step": 35782 + }, + { + "epoch": 1.676254274605331, + "grad_norm": 0.6381237166178421, + "learning_rate": 3.357745980585533e-07, + "loss": 0.2866, + "step": 35783 + }, + { + "epoch": 1.6763011195952593, + "grad_norm": 0.6411413808756025, + "learning_rate": 3.356796691745656e-07, + "loss": 0.2764, + "step": 35784 + }, + { + "epoch": 1.6763479645851875, + "grad_norm": 0.5740965480707336, + "learning_rate": 3.3558475274569734e-07, + "loss": 0.258, + "step": 35785 + }, + { + "epoch": 1.676394809575116, + "grad_norm": 0.6313668174701393, + "learning_rate": 3.354898487724953e-07, + "loss": 0.286, + "step": 35786 + }, + { + "epoch": 1.6764416545650442, + "grad_norm": 0.5666386314231027, + "learning_rate": 3.353949572555046e-07, + "loss": 0.2565, + "step": 35787 + }, + { + "epoch": 1.6764884995549725, + "grad_norm": 0.5754722267994652, + "learning_rate": 3.3530007819527197e-07, + "loss": 0.2584, + "step": 35788 + }, + { + "epoch": 1.676535344544901, + "grad_norm": 0.6259344915329595, + "learning_rate": 3.352052115923432e-07, + "loss": 0.2699, + "step": 35789 + }, + { + "epoch": 1.6765821895348294, + "grad_norm": 0.5992322213515257, + "learning_rate": 3.35110357447265e-07, + "loss": 0.2645, + "step": 35790 + }, + { + "epoch": 1.6766290345247574, + "grad_norm": 0.5960780826931561, + "learning_rate": 3.3501551576058193e-07, + "loss": 0.2706, + "step": 35791 + }, + { + "epoch": 1.6766758795146859, + "grad_norm": 0.6397624497365261, + "learning_rate": 3.349206865328411e-07, + "loss": 0.2927, + "step": 35792 + }, + { + "epoch": 1.6767227245046143, + "grad_norm": 0.6212904127536272, + "learning_rate": 3.3482586976458703e-07, + "loss": 0.2912, + "step": 35793 + }, + { + "epoch": 1.6767695694945426, + "grad_norm": 0.6122201519893079, + "learning_rate": 3.347310654563657e-07, + "loss": 0.2887, + "step": 35794 + }, + { + "epoch": 1.6768164144844708, + "grad_norm": 0.5618628081656779, + "learning_rate": 3.3463627360872273e-07, + "loss": 0.2637, + "step": 35795 + }, + { + "epoch": 1.6768632594743993, + "grad_norm": 0.5777601213945647, + "learning_rate": 3.3454149422220416e-07, + "loss": 0.2425, + "step": 35796 + }, + { + "epoch": 1.6769101044643275, + "grad_norm": 0.6483806360087441, + "learning_rate": 3.3444672729735533e-07, + "loss": 0.2646, + "step": 35797 + }, + { + "epoch": 1.6769569494542558, + "grad_norm": 0.6016740302348412, + "learning_rate": 3.3435197283472136e-07, + "loss": 0.267, + "step": 35798 + }, + { + "epoch": 1.6770037944441842, + "grad_norm": 0.5974830391602239, + "learning_rate": 3.3425723083484683e-07, + "loss": 0.2599, + "step": 35799 + }, + { + "epoch": 1.6770506394341125, + "grad_norm": 0.6096030023449638, + "learning_rate": 3.341625012982777e-07, + "loss": 0.2967, + "step": 35800 + }, + { + "epoch": 1.6770974844240407, + "grad_norm": 0.5624057629264291, + "learning_rate": 3.340677842255591e-07, + "loss": 0.2581, + "step": 35801 + }, + { + "epoch": 1.6771443294139692, + "grad_norm": 0.6084648795154064, + "learning_rate": 3.339730796172358e-07, + "loss": 0.2837, + "step": 35802 + }, + { + "epoch": 1.6771911744038976, + "grad_norm": 0.5910632537388149, + "learning_rate": 3.3387838747385384e-07, + "loss": 0.2574, + "step": 35803 + }, + { + "epoch": 1.6772380193938259, + "grad_norm": 0.5893809326083036, + "learning_rate": 3.337837077959566e-07, + "loss": 0.2641, + "step": 35804 + }, + { + "epoch": 1.6772848643837541, + "grad_norm": 0.5913378091234655, + "learning_rate": 3.336890405840901e-07, + "loss": 0.2567, + "step": 35805 + }, + { + "epoch": 1.6773317093736826, + "grad_norm": 0.5974913534253204, + "learning_rate": 3.335943858387983e-07, + "loss": 0.2714, + "step": 35806 + }, + { + "epoch": 1.6773785543636108, + "grad_norm": 0.5728053301440443, + "learning_rate": 3.334997435606263e-07, + "loss": 0.2605, + "step": 35807 + }, + { + "epoch": 1.677425399353539, + "grad_norm": 0.6142592913547684, + "learning_rate": 3.334051137501185e-07, + "loss": 0.2819, + "step": 35808 + }, + { + "epoch": 1.6774722443434675, + "grad_norm": 0.5837918866367384, + "learning_rate": 3.3331049640781985e-07, + "loss": 0.2656, + "step": 35809 + }, + { + "epoch": 1.6775190893333958, + "grad_norm": 0.5708171718470014, + "learning_rate": 3.3321589153427565e-07, + "loss": 0.262, + "step": 35810 + }, + { + "epoch": 1.677565934323324, + "grad_norm": 0.5903500937617597, + "learning_rate": 3.331212991300284e-07, + "loss": 0.2634, + "step": 35811 + }, + { + "epoch": 1.6776127793132525, + "grad_norm": 0.5833781027625116, + "learning_rate": 3.330267191956241e-07, + "loss": 0.2484, + "step": 35812 + }, + { + "epoch": 1.677659624303181, + "grad_norm": 0.5889886035178199, + "learning_rate": 3.329321517316059e-07, + "loss": 0.2568, + "step": 35813 + }, + { + "epoch": 1.677706469293109, + "grad_norm": 0.6265024608644801, + "learning_rate": 3.3283759673851836e-07, + "loss": 0.27, + "step": 35814 + }, + { + "epoch": 1.6777533142830374, + "grad_norm": 0.6105258974402435, + "learning_rate": 3.3274305421690604e-07, + "loss": 0.2872, + "step": 35815 + }, + { + "epoch": 1.6778001592729659, + "grad_norm": 0.5751169124457781, + "learning_rate": 3.3264852416731246e-07, + "loss": 0.2688, + "step": 35816 + }, + { + "epoch": 1.6778470042628941, + "grad_norm": 0.5834479898648073, + "learning_rate": 3.325540065902824e-07, + "loss": 0.2709, + "step": 35817 + }, + { + "epoch": 1.6778938492528224, + "grad_norm": 0.5959857846610442, + "learning_rate": 3.324595014863594e-07, + "loss": 0.2767, + "step": 35818 + }, + { + "epoch": 1.6779406942427508, + "grad_norm": 0.5773396898943672, + "learning_rate": 3.323650088560867e-07, + "loss": 0.274, + "step": 35819 + }, + { + "epoch": 1.677987539232679, + "grad_norm": 0.5537057360398047, + "learning_rate": 3.322705287000083e-07, + "loss": 0.2513, + "step": 35820 + }, + { + "epoch": 1.6780343842226073, + "grad_norm": 0.5922672572968299, + "learning_rate": 3.3217606101866835e-07, + "loss": 0.2666, + "step": 35821 + }, + { + "epoch": 1.6780812292125358, + "grad_norm": 0.5474641674269911, + "learning_rate": 3.3208160581261026e-07, + "loss": 0.2551, + "step": 35822 + }, + { + "epoch": 1.678128074202464, + "grad_norm": 0.6301374647460786, + "learning_rate": 3.3198716308237843e-07, + "loss": 0.28, + "step": 35823 + }, + { + "epoch": 1.6781749191923923, + "grad_norm": 0.5803144048725101, + "learning_rate": 3.318927328285146e-07, + "loss": 0.2612, + "step": 35824 + }, + { + "epoch": 1.6782217641823207, + "grad_norm": 0.5899470118450441, + "learning_rate": 3.317983150515641e-07, + "loss": 0.2464, + "step": 35825 + }, + { + "epoch": 1.6782686091722492, + "grad_norm": 0.616798349425378, + "learning_rate": 3.3170390975206857e-07, + "loss": 0.2726, + "step": 35826 + }, + { + "epoch": 1.6783154541621772, + "grad_norm": 0.5825081335858777, + "learning_rate": 3.316095169305722e-07, + "loss": 0.2589, + "step": 35827 + }, + { + "epoch": 1.6783622991521057, + "grad_norm": 0.6484750432656133, + "learning_rate": 3.315151365876179e-07, + "loss": 0.2644, + "step": 35828 + }, + { + "epoch": 1.6784091441420341, + "grad_norm": 0.6161823861481901, + "learning_rate": 3.3142076872374944e-07, + "loss": 0.2702, + "step": 35829 + }, + { + "epoch": 1.6784559891319624, + "grad_norm": 0.5909557029106952, + "learning_rate": 3.3132641333950886e-07, + "loss": 0.2727, + "step": 35830 + }, + { + "epoch": 1.6785028341218906, + "grad_norm": 0.6287794067252648, + "learning_rate": 3.312320704354399e-07, + "loss": 0.28, + "step": 35831 + }, + { + "epoch": 1.678549679111819, + "grad_norm": 0.5598975287015616, + "learning_rate": 3.311377400120858e-07, + "loss": 0.2676, + "step": 35832 + }, + { + "epoch": 1.6785965241017473, + "grad_norm": 0.578242119823152, + "learning_rate": 3.3104342206998817e-07, + "loss": 0.2582, + "step": 35833 + }, + { + "epoch": 1.6786433690916756, + "grad_norm": 0.5888644388187743, + "learning_rate": 3.3094911660969067e-07, + "loss": 0.2671, + "step": 35834 + }, + { + "epoch": 1.678690214081604, + "grad_norm": 0.6219121697900626, + "learning_rate": 3.3085482363173634e-07, + "loss": 0.2825, + "step": 35835 + }, + { + "epoch": 1.6787370590715323, + "grad_norm": 0.6100134896317299, + "learning_rate": 3.3076054313666674e-07, + "loss": 0.2822, + "step": 35836 + }, + { + "epoch": 1.6787839040614605, + "grad_norm": 0.5606577700703089, + "learning_rate": 3.3066627512502505e-07, + "loss": 0.2578, + "step": 35837 + }, + { + "epoch": 1.678830749051389, + "grad_norm": 0.557387508290154, + "learning_rate": 3.3057201959735357e-07, + "loss": 0.2616, + "step": 35838 + }, + { + "epoch": 1.6788775940413174, + "grad_norm": 0.6257776257405498, + "learning_rate": 3.3047777655419556e-07, + "loss": 0.2879, + "step": 35839 + }, + { + "epoch": 1.6789244390312457, + "grad_norm": 0.6390360195050807, + "learning_rate": 3.303835459960919e-07, + "loss": 0.2648, + "step": 35840 + }, + { + "epoch": 1.678971284021174, + "grad_norm": 0.6046015534849687, + "learning_rate": 3.302893279235864e-07, + "loss": 0.2737, + "step": 35841 + }, + { + "epoch": 1.6790181290111024, + "grad_norm": 0.604207467959209, + "learning_rate": 3.301951223372196e-07, + "loss": 0.2661, + "step": 35842 + }, + { + "epoch": 1.6790649740010306, + "grad_norm": 0.5712851015837351, + "learning_rate": 3.3010092923753483e-07, + "loss": 0.2717, + "step": 35843 + }, + { + "epoch": 1.6791118189909588, + "grad_norm": 0.5968172912282569, + "learning_rate": 3.300067486250738e-07, + "loss": 0.2677, + "step": 35844 + }, + { + "epoch": 1.6791586639808873, + "grad_norm": 0.5970308134888441, + "learning_rate": 3.299125805003783e-07, + "loss": 0.2558, + "step": 35845 + }, + { + "epoch": 1.6792055089708156, + "grad_norm": 0.6134229832325437, + "learning_rate": 3.298184248639913e-07, + "loss": 0.2895, + "step": 35846 + }, + { + "epoch": 1.6792523539607438, + "grad_norm": 0.57527207349523, + "learning_rate": 3.2972428171645364e-07, + "loss": 0.2615, + "step": 35847 + }, + { + "epoch": 1.6792991989506723, + "grad_norm": 0.6305532327214863, + "learning_rate": 3.2963015105830666e-07, + "loss": 0.2653, + "step": 35848 + }, + { + "epoch": 1.6793460439406007, + "grad_norm": 0.6343603025727252, + "learning_rate": 3.295360328900929e-07, + "loss": 0.2759, + "step": 35849 + }, + { + "epoch": 1.6793928889305287, + "grad_norm": 0.5873097714851746, + "learning_rate": 3.294419272123536e-07, + "loss": 0.2647, + "step": 35850 + }, + { + "epoch": 1.6794397339204572, + "grad_norm": 0.5926145798436172, + "learning_rate": 3.2934783402563037e-07, + "loss": 0.2597, + "step": 35851 + }, + { + "epoch": 1.6794865789103857, + "grad_norm": 0.604599249290606, + "learning_rate": 3.292537533304654e-07, + "loss": 0.2653, + "step": 35852 + }, + { + "epoch": 1.679533423900314, + "grad_norm": 0.5868868798906507, + "learning_rate": 3.29159685127399e-07, + "loss": 0.2549, + "step": 35853 + }, + { + "epoch": 1.6795802688902421, + "grad_norm": 0.6239775633647591, + "learning_rate": 3.2906562941697335e-07, + "loss": 0.2682, + "step": 35854 + }, + { + "epoch": 1.6796271138801706, + "grad_norm": 0.6471804693049406, + "learning_rate": 3.289715861997292e-07, + "loss": 0.285, + "step": 35855 + }, + { + "epoch": 1.6796739588700988, + "grad_norm": 0.5768529594935351, + "learning_rate": 3.2887755547620747e-07, + "loss": 0.2596, + "step": 35856 + }, + { + "epoch": 1.679720803860027, + "grad_norm": 0.5856527165470137, + "learning_rate": 3.2878353724695e-07, + "loss": 0.2577, + "step": 35857 + }, + { + "epoch": 1.6797676488499556, + "grad_norm": 0.6100104960211682, + "learning_rate": 3.2868953151249745e-07, + "loss": 0.2627, + "step": 35858 + }, + { + "epoch": 1.6798144938398838, + "grad_norm": 0.6484948114928301, + "learning_rate": 3.2859553827339156e-07, + "loss": 0.2964, + "step": 35859 + }, + { + "epoch": 1.679861338829812, + "grad_norm": 0.5663065289030454, + "learning_rate": 3.2850155753017226e-07, + "loss": 0.2758, + "step": 35860 + }, + { + "epoch": 1.6799081838197405, + "grad_norm": 0.5895111973731426, + "learning_rate": 3.2840758928338043e-07, + "loss": 0.2689, + "step": 35861 + }, + { + "epoch": 1.679955028809669, + "grad_norm": 0.6196250766692356, + "learning_rate": 3.283136335335571e-07, + "loss": 0.2708, + "step": 35862 + }, + { + "epoch": 1.680001873799597, + "grad_norm": 0.6269861142783736, + "learning_rate": 3.282196902812429e-07, + "loss": 0.2709, + "step": 35863 + }, + { + "epoch": 1.6800487187895254, + "grad_norm": 0.5938464146761392, + "learning_rate": 3.281257595269788e-07, + "loss": 0.2574, + "step": 35864 + }, + { + "epoch": 1.680095563779454, + "grad_norm": 0.6121687760387362, + "learning_rate": 3.2803184127130546e-07, + "loss": 0.2605, + "step": 35865 + }, + { + "epoch": 1.6801424087693821, + "grad_norm": 0.5938754086958614, + "learning_rate": 3.279379355147622e-07, + "loss": 0.2636, + "step": 35866 + }, + { + "epoch": 1.6801892537593104, + "grad_norm": 0.6024978724516857, + "learning_rate": 3.2784404225789104e-07, + "loss": 0.2808, + "step": 35867 + }, + { + "epoch": 1.6802360987492388, + "grad_norm": 0.6155480541762519, + "learning_rate": 3.2775016150123075e-07, + "loss": 0.2906, + "step": 35868 + }, + { + "epoch": 1.680282943739167, + "grad_norm": 0.5929953194724911, + "learning_rate": 3.276562932453223e-07, + "loss": 0.2576, + "step": 35869 + }, + { + "epoch": 1.6803297887290953, + "grad_norm": 0.5447790495829894, + "learning_rate": 3.275624374907058e-07, + "loss": 0.2541, + "step": 35870 + }, + { + "epoch": 1.6803766337190238, + "grad_norm": 0.5917922759890056, + "learning_rate": 3.2746859423792217e-07, + "loss": 0.2854, + "step": 35871 + }, + { + "epoch": 1.680423478708952, + "grad_norm": 0.5856056480496803, + "learning_rate": 3.273747634875102e-07, + "loss": 0.2852, + "step": 35872 + }, + { + "epoch": 1.6804703236988803, + "grad_norm": 0.6599251457169649, + "learning_rate": 3.2728094524001027e-07, + "loss": 0.2972, + "step": 35873 + }, + { + "epoch": 1.6805171686888087, + "grad_norm": 0.6330762551279352, + "learning_rate": 3.2718713949596305e-07, + "loss": 0.2723, + "step": 35874 + }, + { + "epoch": 1.6805640136787372, + "grad_norm": 0.5815349858925246, + "learning_rate": 3.2709334625590725e-07, + "loss": 0.2629, + "step": 35875 + }, + { + "epoch": 1.6806108586686654, + "grad_norm": 0.6040994768074779, + "learning_rate": 3.269995655203828e-07, + "loss": 0.2626, + "step": 35876 + }, + { + "epoch": 1.6806577036585937, + "grad_norm": 0.578702306885852, + "learning_rate": 3.2690579728993007e-07, + "loss": 0.2711, + "step": 35877 + }, + { + "epoch": 1.6807045486485221, + "grad_norm": 0.5667560933009507, + "learning_rate": 3.2681204156508855e-07, + "loss": 0.2562, + "step": 35878 + }, + { + "epoch": 1.6807513936384504, + "grad_norm": 0.6010738632291257, + "learning_rate": 3.2671829834639706e-07, + "loss": 0.2806, + "step": 35879 + }, + { + "epoch": 1.6807982386283786, + "grad_norm": 0.5979881418366332, + "learning_rate": 3.266245676343957e-07, + "loss": 0.2598, + "step": 35880 + }, + { + "epoch": 1.680845083618307, + "grad_norm": 0.6144740833692504, + "learning_rate": 3.2653084942962407e-07, + "loss": 0.2717, + "step": 35881 + }, + { + "epoch": 1.6808919286082353, + "grad_norm": 0.6089710611829091, + "learning_rate": 3.264371437326205e-07, + "loss": 0.2763, + "step": 35882 + }, + { + "epoch": 1.6809387735981636, + "grad_norm": 0.5971108490800137, + "learning_rate": 3.2634345054392503e-07, + "loss": 0.2784, + "step": 35883 + }, + { + "epoch": 1.680985618588092, + "grad_norm": 0.5567010146317445, + "learning_rate": 3.2624976986407706e-07, + "loss": 0.2507, + "step": 35884 + }, + { + "epoch": 1.6810324635780205, + "grad_norm": 0.5964408111298316, + "learning_rate": 3.261561016936149e-07, + "loss": 0.2714, + "step": 35885 + }, + { + "epoch": 1.6810793085679485, + "grad_norm": 0.5894940198749796, + "learning_rate": 3.2606244603307804e-07, + "loss": 0.2681, + "step": 35886 + }, + { + "epoch": 1.681126153557877, + "grad_norm": 0.5964075943803426, + "learning_rate": 3.259688028830052e-07, + "loss": 0.2599, + "step": 35887 + }, + { + "epoch": 1.6811729985478054, + "grad_norm": 0.6132941989927202, + "learning_rate": 3.258751722439363e-07, + "loss": 0.2914, + "step": 35888 + }, + { + "epoch": 1.6812198435377337, + "grad_norm": 0.6157572553383905, + "learning_rate": 3.257815541164086e-07, + "loss": 0.2807, + "step": 35889 + }, + { + "epoch": 1.681266688527662, + "grad_norm": 0.5729925326647649, + "learning_rate": 3.2568794850096204e-07, + "loss": 0.2535, + "step": 35890 + }, + { + "epoch": 1.6813135335175904, + "grad_norm": 0.6260042064472231, + "learning_rate": 3.2559435539813446e-07, + "loss": 0.2865, + "step": 35891 + }, + { + "epoch": 1.6813603785075186, + "grad_norm": 0.6188446496942646, + "learning_rate": 3.255007748084649e-07, + "loss": 0.2654, + "step": 35892 + }, + { + "epoch": 1.6814072234974469, + "grad_norm": 0.591981132307, + "learning_rate": 3.254072067324918e-07, + "loss": 0.2774, + "step": 35893 + }, + { + "epoch": 1.6814540684873753, + "grad_norm": 0.6206560168188957, + "learning_rate": 3.253136511707536e-07, + "loss": 0.2565, + "step": 35894 + }, + { + "epoch": 1.6815009134773036, + "grad_norm": 0.625702723974957, + "learning_rate": 3.2522010812378933e-07, + "loss": 0.2725, + "step": 35895 + }, + { + "epoch": 1.6815477584672318, + "grad_norm": 0.6011376202511136, + "learning_rate": 3.2512657759213667e-07, + "loss": 0.2741, + "step": 35896 + }, + { + "epoch": 1.6815946034571603, + "grad_norm": 0.5767047192327822, + "learning_rate": 3.2503305957633347e-07, + "loss": 0.2485, + "step": 35897 + }, + { + "epoch": 1.6816414484470887, + "grad_norm": 0.60233066108361, + "learning_rate": 3.249395540769182e-07, + "loss": 0.2611, + "step": 35898 + }, + { + "epoch": 1.6816882934370168, + "grad_norm": 0.57543961765228, + "learning_rate": 3.248460610944293e-07, + "loss": 0.2501, + "step": 35899 + }, + { + "epoch": 1.6817351384269452, + "grad_norm": 0.6523101562352089, + "learning_rate": 3.247525806294044e-07, + "loss": 0.2796, + "step": 35900 + }, + { + "epoch": 1.6817819834168737, + "grad_norm": 0.6130811397737593, + "learning_rate": 3.2465911268238256e-07, + "loss": 0.2631, + "step": 35901 + }, + { + "epoch": 1.681828828406802, + "grad_norm": 0.6079955015824217, + "learning_rate": 3.2456565725389995e-07, + "loss": 0.2535, + "step": 35902 + }, + { + "epoch": 1.6818756733967302, + "grad_norm": 0.5690526478363842, + "learning_rate": 3.244722143444959e-07, + "loss": 0.2583, + "step": 35903 + }, + { + "epoch": 1.6819225183866586, + "grad_norm": 0.6208589088424716, + "learning_rate": 3.2437878395470664e-07, + "loss": 0.2821, + "step": 35904 + }, + { + "epoch": 1.6819693633765869, + "grad_norm": 0.6446140875042997, + "learning_rate": 3.242853660850709e-07, + "loss": 0.2773, + "step": 35905 + }, + { + "epoch": 1.682016208366515, + "grad_norm": 0.6088814618842872, + "learning_rate": 3.24191960736126e-07, + "loss": 0.2682, + "step": 35906 + }, + { + "epoch": 1.6820630533564436, + "grad_norm": 0.5910083537803821, + "learning_rate": 3.2409856790840937e-07, + "loss": 0.254, + "step": 35907 + }, + { + "epoch": 1.6821098983463718, + "grad_norm": 0.5743278057437153, + "learning_rate": 3.240051876024594e-07, + "loss": 0.2636, + "step": 35908 + }, + { + "epoch": 1.6821567433363, + "grad_norm": 0.5712176086231323, + "learning_rate": 3.239118198188124e-07, + "loss": 0.252, + "step": 35909 + }, + { + "epoch": 1.6822035883262285, + "grad_norm": 0.5973633507815772, + "learning_rate": 3.238184645580056e-07, + "loss": 0.2633, + "step": 35910 + }, + { + "epoch": 1.682250433316157, + "grad_norm": 0.6095440257885844, + "learning_rate": 3.2372512182057644e-07, + "loss": 0.267, + "step": 35911 + }, + { + "epoch": 1.682297278306085, + "grad_norm": 0.6161979218603619, + "learning_rate": 3.236317916070622e-07, + "loss": 0.282, + "step": 35912 + }, + { + "epoch": 1.6823441232960135, + "grad_norm": 0.6557194839206153, + "learning_rate": 3.2353847391800003e-07, + "loss": 0.297, + "step": 35913 + }, + { + "epoch": 1.682390968285942, + "grad_norm": 0.6033838115236838, + "learning_rate": 3.234451687539275e-07, + "loss": 0.2832, + "step": 35914 + }, + { + "epoch": 1.6824378132758702, + "grad_norm": 0.6193711729861012, + "learning_rate": 3.233518761153803e-07, + "loss": 0.2872, + "step": 35915 + }, + { + "epoch": 1.6824846582657984, + "grad_norm": 0.6148617868698505, + "learning_rate": 3.232585960028967e-07, + "loss": 0.2661, + "step": 35916 + }, + { + "epoch": 1.6825315032557269, + "grad_norm": 0.5945373765634285, + "learning_rate": 3.231653284170122e-07, + "loss": 0.2704, + "step": 35917 + }, + { + "epoch": 1.682578348245655, + "grad_norm": 0.5810717474849645, + "learning_rate": 3.23072073358264e-07, + "loss": 0.2629, + "step": 35918 + }, + { + "epoch": 1.6826251932355833, + "grad_norm": 0.6043405225804621, + "learning_rate": 3.229788308271889e-07, + "loss": 0.2664, + "step": 35919 + }, + { + "epoch": 1.6826720382255118, + "grad_norm": 0.6010736586666487, + "learning_rate": 3.228856008243242e-07, + "loss": 0.2512, + "step": 35920 + }, + { + "epoch": 1.68271888321544, + "grad_norm": 0.5677525414078974, + "learning_rate": 3.227923833502047e-07, + "loss": 0.2529, + "step": 35921 + }, + { + "epoch": 1.6827657282053683, + "grad_norm": 0.6496174012287136, + "learning_rate": 3.226991784053682e-07, + "loss": 0.2624, + "step": 35922 + }, + { + "epoch": 1.6828125731952968, + "grad_norm": 0.6041662374898815, + "learning_rate": 3.22605985990351e-07, + "loss": 0.2661, + "step": 35923 + }, + { + "epoch": 1.6828594181852252, + "grad_norm": 0.6404980041674481, + "learning_rate": 3.225128061056887e-07, + "loss": 0.2861, + "step": 35924 + }, + { + "epoch": 1.6829062631751535, + "grad_norm": 0.6563639392157763, + "learning_rate": 3.2241963875191767e-07, + "loss": 0.2846, + "step": 35925 + }, + { + "epoch": 1.6829531081650817, + "grad_norm": 0.5407104502157145, + "learning_rate": 3.2232648392957504e-07, + "loss": 0.2626, + "step": 35926 + }, + { + "epoch": 1.6829999531550102, + "grad_norm": 0.600141711170765, + "learning_rate": 3.2223334163919567e-07, + "loss": 0.2644, + "step": 35927 + }, + { + "epoch": 1.6830467981449384, + "grad_norm": 0.5839077542019249, + "learning_rate": 3.2214021188131605e-07, + "loss": 0.2626, + "step": 35928 + }, + { + "epoch": 1.6830936431348666, + "grad_norm": 0.6125237722029477, + "learning_rate": 3.2204709465647186e-07, + "loss": 0.2616, + "step": 35929 + }, + { + "epoch": 1.683140488124795, + "grad_norm": 0.5739506816252452, + "learning_rate": 3.2195398996519993e-07, + "loss": 0.273, + "step": 35930 + }, + { + "epoch": 1.6831873331147233, + "grad_norm": 0.5503009868952877, + "learning_rate": 3.218608978080351e-07, + "loss": 0.2568, + "step": 35931 + }, + { + "epoch": 1.6832341781046516, + "grad_norm": 0.6155479617755557, + "learning_rate": 3.217678181855136e-07, + "loss": 0.2814, + "step": 35932 + }, + { + "epoch": 1.68328102309458, + "grad_norm": 0.5937264202694159, + "learning_rate": 3.216747510981702e-07, + "loss": 0.2686, + "step": 35933 + }, + { + "epoch": 1.6833278680845085, + "grad_norm": 0.5853871696704099, + "learning_rate": 3.2158169654654124e-07, + "loss": 0.2559, + "step": 35934 + }, + { + "epoch": 1.6833747130744365, + "grad_norm": 0.5793236439316642, + "learning_rate": 3.214886545311621e-07, + "loss": 0.2789, + "step": 35935 + }, + { + "epoch": 1.683421558064365, + "grad_norm": 0.6189766255364779, + "learning_rate": 3.213956250525682e-07, + "loss": 0.2841, + "step": 35936 + }, + { + "epoch": 1.6834684030542935, + "grad_norm": 0.586062589716001, + "learning_rate": 3.213026081112955e-07, + "loss": 0.2739, + "step": 35937 + }, + { + "epoch": 1.6835152480442217, + "grad_norm": 0.6028587518758934, + "learning_rate": 3.212096037078785e-07, + "loss": 0.2749, + "step": 35938 + }, + { + "epoch": 1.68356209303415, + "grad_norm": 0.5840592981703076, + "learning_rate": 3.2111661184285215e-07, + "loss": 0.2642, + "step": 35939 + }, + { + "epoch": 1.6836089380240784, + "grad_norm": 0.6221928750818517, + "learning_rate": 3.210236325167521e-07, + "loss": 0.2654, + "step": 35940 + }, + { + "epoch": 1.6836557830140066, + "grad_norm": 0.7117074925874012, + "learning_rate": 3.2093066573011337e-07, + "loss": 0.2965, + "step": 35941 + }, + { + "epoch": 1.6837026280039349, + "grad_norm": 0.6006135573847889, + "learning_rate": 3.2083771148347096e-07, + "loss": 0.277, + "step": 35942 + }, + { + "epoch": 1.6837494729938633, + "grad_norm": 0.6312035472093355, + "learning_rate": 3.207447697773605e-07, + "loss": 0.2843, + "step": 35943 + }, + { + "epoch": 1.6837963179837916, + "grad_norm": 0.6009572655862002, + "learning_rate": 3.2065184061231547e-07, + "loss": 0.2697, + "step": 35944 + }, + { + "epoch": 1.6838431629737198, + "grad_norm": 0.5832309785036659, + "learning_rate": 3.205589239888718e-07, + "loss": 0.2721, + "step": 35945 + }, + { + "epoch": 1.6838900079636483, + "grad_norm": 0.5983478091070149, + "learning_rate": 3.204660199075635e-07, + "loss": 0.2558, + "step": 35946 + }, + { + "epoch": 1.6839368529535768, + "grad_norm": 0.6081393406517043, + "learning_rate": 3.2037312836892517e-07, + "loss": 0.2925, + "step": 35947 + }, + { + "epoch": 1.6839836979435048, + "grad_norm": 0.5899759408291451, + "learning_rate": 3.202802493734916e-07, + "loss": 0.2722, + "step": 35948 + }, + { + "epoch": 1.6840305429334332, + "grad_norm": 0.6558553581621781, + "learning_rate": 3.2018738292179746e-07, + "loss": 0.2796, + "step": 35949 + }, + { + "epoch": 1.6840773879233617, + "grad_norm": 0.5450702517359414, + "learning_rate": 3.200945290143778e-07, + "loss": 0.2664, + "step": 35950 + }, + { + "epoch": 1.68412423291329, + "grad_norm": 0.5888382365315191, + "learning_rate": 3.200016876517656e-07, + "loss": 0.2629, + "step": 35951 + }, + { + "epoch": 1.6841710779032182, + "grad_norm": 0.5975612349761815, + "learning_rate": 3.1990885883449617e-07, + "loss": 0.2864, + "step": 35952 + }, + { + "epoch": 1.6842179228931466, + "grad_norm": 0.6470811960252982, + "learning_rate": 3.198160425631028e-07, + "loss": 0.2847, + "step": 35953 + }, + { + "epoch": 1.6842647678830749, + "grad_norm": 0.6392703047871693, + "learning_rate": 3.1972323883811994e-07, + "loss": 0.2795, + "step": 35954 + }, + { + "epoch": 1.6843116128730031, + "grad_norm": 0.5551986555325001, + "learning_rate": 3.196304476600823e-07, + "loss": 0.259, + "step": 35955 + }, + { + "epoch": 1.6843584578629316, + "grad_norm": 0.6513977290332166, + "learning_rate": 3.1953766902952294e-07, + "loss": 0.2769, + "step": 35956 + }, + { + "epoch": 1.6844053028528598, + "grad_norm": 0.616827216822831, + "learning_rate": 3.1944490294697707e-07, + "loss": 0.2752, + "step": 35957 + }, + { + "epoch": 1.684452147842788, + "grad_norm": 0.6056960252565894, + "learning_rate": 3.193521494129778e-07, + "loss": 0.2655, + "step": 35958 + }, + { + "epoch": 1.6844989928327165, + "grad_norm": 0.5931768071809443, + "learning_rate": 3.1925940842805814e-07, + "loss": 0.2698, + "step": 35959 + }, + { + "epoch": 1.684545837822645, + "grad_norm": 0.601193365955295, + "learning_rate": 3.1916667999275257e-07, + "loss": 0.283, + "step": 35960 + }, + { + "epoch": 1.6845926828125732, + "grad_norm": 0.6207819583968285, + "learning_rate": 3.1907396410759435e-07, + "loss": 0.2577, + "step": 35961 + }, + { + "epoch": 1.6846395278025015, + "grad_norm": 0.5991677484804117, + "learning_rate": 3.189812607731177e-07, + "loss": 0.2685, + "step": 35962 + }, + { + "epoch": 1.68468637279243, + "grad_norm": 0.6017311013413247, + "learning_rate": 3.1888856998985585e-07, + "loss": 0.2725, + "step": 35963 + }, + { + "epoch": 1.6847332177823582, + "grad_norm": 0.6105500274452403, + "learning_rate": 3.1879589175834197e-07, + "loss": 0.267, + "step": 35964 + }, + { + "epoch": 1.6847800627722864, + "grad_norm": 0.5944037089947802, + "learning_rate": 3.187032260791098e-07, + "loss": 0.2586, + "step": 35965 + }, + { + "epoch": 1.6848269077622149, + "grad_norm": 0.6019220935197992, + "learning_rate": 3.186105729526917e-07, + "loss": 0.2708, + "step": 35966 + }, + { + "epoch": 1.6848737527521431, + "grad_norm": 0.5870345968596073, + "learning_rate": 3.185179323796217e-07, + "loss": 0.2667, + "step": 35967 + }, + { + "epoch": 1.6849205977420714, + "grad_norm": 0.5949845545095069, + "learning_rate": 3.184253043604327e-07, + "loss": 0.2871, + "step": 35968 + }, + { + "epoch": 1.6849674427319998, + "grad_norm": 0.5614127184491621, + "learning_rate": 3.183326888956581e-07, + "loss": 0.2683, + "step": 35969 + }, + { + "epoch": 1.6850142877219283, + "grad_norm": 0.6122483999070097, + "learning_rate": 3.1824008598583006e-07, + "loss": 0.2733, + "step": 35970 + }, + { + "epoch": 1.6850611327118563, + "grad_norm": 0.623830684646676, + "learning_rate": 3.18147495631482e-07, + "loss": 0.2835, + "step": 35971 + }, + { + "epoch": 1.6851079777017848, + "grad_norm": 0.5665204777830747, + "learning_rate": 3.1805491783314713e-07, + "loss": 0.2636, + "step": 35972 + }, + { + "epoch": 1.6851548226917132, + "grad_norm": 0.5865284777405068, + "learning_rate": 3.179623525913575e-07, + "loss": 0.2652, + "step": 35973 + }, + { + "epoch": 1.6852016676816415, + "grad_norm": 0.6414401793123906, + "learning_rate": 3.17869799906646e-07, + "loss": 0.2749, + "step": 35974 + }, + { + "epoch": 1.6852485126715697, + "grad_norm": 0.587491294772635, + "learning_rate": 3.177772597795459e-07, + "loss": 0.2811, + "step": 35975 + }, + { + "epoch": 1.6852953576614982, + "grad_norm": 0.5793290054519918, + "learning_rate": 3.1768473221058857e-07, + "loss": 0.2642, + "step": 35976 + }, + { + "epoch": 1.6853422026514264, + "grad_norm": 0.5976177028529561, + "learning_rate": 3.1759221720030706e-07, + "loss": 0.2676, + "step": 35977 + }, + { + "epoch": 1.6853890476413547, + "grad_norm": 0.5449129082464897, + "learning_rate": 3.17499714749234e-07, + "loss": 0.251, + "step": 35978 + }, + { + "epoch": 1.6854358926312831, + "grad_norm": 0.6234481474787898, + "learning_rate": 3.174072248579019e-07, + "loss": 0.2726, + "step": 35979 + }, + { + "epoch": 1.6854827376212114, + "grad_norm": 0.620135371932485, + "learning_rate": 3.1731474752684204e-07, + "loss": 0.274, + "step": 35980 + }, + { + "epoch": 1.6855295826111396, + "grad_norm": 0.5987167547951934, + "learning_rate": 3.1722228275658793e-07, + "loss": 0.2692, + "step": 35981 + }, + { + "epoch": 1.685576427601068, + "grad_norm": 0.6091692551759282, + "learning_rate": 3.171298305476703e-07, + "loss": 0.2676, + "step": 35982 + }, + { + "epoch": 1.6856232725909965, + "grad_norm": 0.5609940496104672, + "learning_rate": 3.170373909006219e-07, + "loss": 0.2603, + "step": 35983 + }, + { + "epoch": 1.6856701175809246, + "grad_norm": 0.6283324913238619, + "learning_rate": 3.1694496381597467e-07, + "loss": 0.2855, + "step": 35984 + }, + { + "epoch": 1.685716962570853, + "grad_norm": 0.6237951451001208, + "learning_rate": 3.1685254929426064e-07, + "loss": 0.2696, + "step": 35985 + }, + { + "epoch": 1.6857638075607815, + "grad_norm": 0.5827233087433435, + "learning_rate": 3.1676014733601187e-07, + "loss": 0.2649, + "step": 35986 + }, + { + "epoch": 1.6858106525507097, + "grad_norm": 0.5894365452111274, + "learning_rate": 3.1666775794175956e-07, + "loss": 0.2629, + "step": 35987 + }, + { + "epoch": 1.685857497540638, + "grad_norm": 0.6200958160038503, + "learning_rate": 3.1657538111203506e-07, + "loss": 0.268, + "step": 35988 + }, + { + "epoch": 1.6859043425305664, + "grad_norm": 0.6298534847244517, + "learning_rate": 3.1648301684737067e-07, + "loss": 0.2916, + "step": 35989 + }, + { + "epoch": 1.6859511875204947, + "grad_norm": 0.569340308005531, + "learning_rate": 3.163906651482976e-07, + "loss": 0.27, + "step": 35990 + }, + { + "epoch": 1.685998032510423, + "grad_norm": 0.5918381325874889, + "learning_rate": 3.162983260153474e-07, + "loss": 0.2738, + "step": 35991 + }, + { + "epoch": 1.6860448775003514, + "grad_norm": 0.593434316150968, + "learning_rate": 3.1620599944905186e-07, + "loss": 0.2718, + "step": 35992 + }, + { + "epoch": 1.6860917224902796, + "grad_norm": 0.6313871101468876, + "learning_rate": 3.1611368544994164e-07, + "loss": 0.2897, + "step": 35993 + }, + { + "epoch": 1.6861385674802079, + "grad_norm": 0.5635229843543673, + "learning_rate": 3.160213840185486e-07, + "loss": 0.2645, + "step": 35994 + }, + { + "epoch": 1.6861854124701363, + "grad_norm": 0.5774376219517979, + "learning_rate": 3.159290951554034e-07, + "loss": 0.2798, + "step": 35995 + }, + { + "epoch": 1.6862322574600648, + "grad_norm": 0.5574824873849268, + "learning_rate": 3.1583681886103694e-07, + "loss": 0.2537, + "step": 35996 + }, + { + "epoch": 1.686279102449993, + "grad_norm": 0.5969364987829181, + "learning_rate": 3.1574455513598055e-07, + "loss": 0.2675, + "step": 35997 + }, + { + "epoch": 1.6863259474399213, + "grad_norm": 0.5798947194535179, + "learning_rate": 3.1565230398076546e-07, + "loss": 0.2712, + "step": 35998 + }, + { + "epoch": 1.6863727924298497, + "grad_norm": 0.5647243040376749, + "learning_rate": 3.1556006539592287e-07, + "loss": 0.2678, + "step": 35999 + }, + { + "epoch": 1.686419637419778, + "grad_norm": 0.6092033986759917, + "learning_rate": 3.154678393819832e-07, + "loss": 0.2876, + "step": 36000 + }, + { + "epoch": 1.6864664824097062, + "grad_norm": 0.6257668801087582, + "learning_rate": 3.1537562593947634e-07, + "loss": 0.2686, + "step": 36001 + }, + { + "epoch": 1.6865133273996347, + "grad_norm": 0.631744243769355, + "learning_rate": 3.152834250689338e-07, + "loss": 0.2961, + "step": 36002 + }, + { + "epoch": 1.686560172389563, + "grad_norm": 0.5737617389029162, + "learning_rate": 3.1519123677088563e-07, + "loss": 0.2589, + "step": 36003 + }, + { + "epoch": 1.6866070173794911, + "grad_norm": 0.6275481400317763, + "learning_rate": 3.1509906104586317e-07, + "loss": 0.2622, + "step": 36004 + }, + { + "epoch": 1.6866538623694196, + "grad_norm": 0.6134332092730189, + "learning_rate": 3.150068978943968e-07, + "loss": 0.2779, + "step": 36005 + }, + { + "epoch": 1.686700707359348, + "grad_norm": 0.6020503364508487, + "learning_rate": 3.149147473170164e-07, + "loss": 0.2854, + "step": 36006 + }, + { + "epoch": 1.686747552349276, + "grad_norm": 0.641811293093624, + "learning_rate": 3.1482260931425256e-07, + "loss": 0.277, + "step": 36007 + }, + { + "epoch": 1.6867943973392046, + "grad_norm": 0.5522351601399763, + "learning_rate": 3.14730483886635e-07, + "loss": 0.2412, + "step": 36008 + }, + { + "epoch": 1.686841242329133, + "grad_norm": 0.6781873286528539, + "learning_rate": 3.1463837103469424e-07, + "loss": 0.2937, + "step": 36009 + }, + { + "epoch": 1.6868880873190613, + "grad_norm": 0.616899539631724, + "learning_rate": 3.1454627075896025e-07, + "loss": 0.2717, + "step": 36010 + }, + { + "epoch": 1.6869349323089895, + "grad_norm": 0.6301872237441718, + "learning_rate": 3.1445418305996315e-07, + "loss": 0.2708, + "step": 36011 + }, + { + "epoch": 1.686981777298918, + "grad_norm": 0.5926620571397971, + "learning_rate": 3.143621079382336e-07, + "loss": 0.2752, + "step": 36012 + }, + { + "epoch": 1.6870286222888462, + "grad_norm": 0.5996395803571168, + "learning_rate": 3.142700453943004e-07, + "loss": 0.2724, + "step": 36013 + }, + { + "epoch": 1.6870754672787744, + "grad_norm": 0.5934381920624207, + "learning_rate": 3.141779954286941e-07, + "loss": 0.269, + "step": 36014 + }, + { + "epoch": 1.687122312268703, + "grad_norm": 0.5842459921297976, + "learning_rate": 3.1408595804194336e-07, + "loss": 0.2601, + "step": 36015 + }, + { + "epoch": 1.6871691572586311, + "grad_norm": 0.6072265470075688, + "learning_rate": 3.1399393323457873e-07, + "loss": 0.2716, + "step": 36016 + }, + { + "epoch": 1.6872160022485594, + "grad_norm": 0.6484605398513028, + "learning_rate": 3.1390192100712953e-07, + "loss": 0.2995, + "step": 36017 + }, + { + "epoch": 1.6872628472384879, + "grad_norm": 0.5730322473334997, + "learning_rate": 3.138099213601259e-07, + "loss": 0.2672, + "step": 36018 + }, + { + "epoch": 1.6873096922284163, + "grad_norm": 0.6199027509843195, + "learning_rate": 3.1371793429409604e-07, + "loss": 0.277, + "step": 36019 + }, + { + "epoch": 1.6873565372183443, + "grad_norm": 0.619518881729746, + "learning_rate": 3.1362595980957005e-07, + "loss": 0.2762, + "step": 36020 + }, + { + "epoch": 1.6874033822082728, + "grad_norm": 0.5986690368969806, + "learning_rate": 3.1353399790707755e-07, + "loss": 0.2783, + "step": 36021 + }, + { + "epoch": 1.6874502271982013, + "grad_norm": 0.587210891584412, + "learning_rate": 3.13442048587147e-07, + "loss": 0.2656, + "step": 36022 + }, + { + "epoch": 1.6874970721881295, + "grad_norm": 0.5888745144321731, + "learning_rate": 3.1335011185030796e-07, + "loss": 0.2698, + "step": 36023 + }, + { + "epoch": 1.6875439171780577, + "grad_norm": 0.6212186545983929, + "learning_rate": 3.132581876970897e-07, + "loss": 0.2781, + "step": 36024 + }, + { + "epoch": 1.6875907621679862, + "grad_norm": 0.6307143262888301, + "learning_rate": 3.131662761280205e-07, + "loss": 0.2943, + "step": 36025 + }, + { + "epoch": 1.6876376071579144, + "grad_norm": 0.6067439768372443, + "learning_rate": 3.130743771436298e-07, + "loss": 0.2668, + "step": 36026 + }, + { + "epoch": 1.6876844521478427, + "grad_norm": 0.6015170020132075, + "learning_rate": 3.129824907444462e-07, + "loss": 0.2731, + "step": 36027 + }, + { + "epoch": 1.6877312971377711, + "grad_norm": 0.6378505871473541, + "learning_rate": 3.128906169309995e-07, + "loss": 0.2843, + "step": 36028 + }, + { + "epoch": 1.6877781421276994, + "grad_norm": 0.5807166865910475, + "learning_rate": 3.12798755703817e-07, + "loss": 0.2626, + "step": 36029 + }, + { + "epoch": 1.6878249871176276, + "grad_norm": 0.5851012073333194, + "learning_rate": 3.1270690706342837e-07, + "loss": 0.2409, + "step": 36030 + }, + { + "epoch": 1.687871832107556, + "grad_norm": 0.6152407352908393, + "learning_rate": 3.126150710103612e-07, + "loss": 0.2824, + "step": 36031 + }, + { + "epoch": 1.6879186770974846, + "grad_norm": 0.5906046641647504, + "learning_rate": 3.125232475451445e-07, + "loss": 0.267, + "step": 36032 + }, + { + "epoch": 1.6879655220874128, + "grad_norm": 0.5832369081473615, + "learning_rate": 3.124314366683068e-07, + "loss": 0.2712, + "step": 36033 + }, + { + "epoch": 1.688012367077341, + "grad_norm": 0.5796134496983459, + "learning_rate": 3.123396383803762e-07, + "loss": 0.268, + "step": 36034 + }, + { + "epoch": 1.6880592120672695, + "grad_norm": 0.5915807595839943, + "learning_rate": 3.12247852681882e-07, + "loss": 0.2715, + "step": 36035 + }, + { + "epoch": 1.6881060570571977, + "grad_norm": 0.6477955088155566, + "learning_rate": 3.1215607957335107e-07, + "loss": 0.2797, + "step": 36036 + }, + { + "epoch": 1.688152902047126, + "grad_norm": 0.5911170282855467, + "learning_rate": 3.120643190553119e-07, + "loss": 0.2832, + "step": 36037 + }, + { + "epoch": 1.6881997470370544, + "grad_norm": 0.5823402871695356, + "learning_rate": 3.119725711282925e-07, + "loss": 0.2667, + "step": 36038 + }, + { + "epoch": 1.6882465920269827, + "grad_norm": 0.6191667986786913, + "learning_rate": 3.11880835792821e-07, + "loss": 0.2774, + "step": 36039 + }, + { + "epoch": 1.688293437016911, + "grad_norm": 0.5668236102843156, + "learning_rate": 3.117891130494252e-07, + "loss": 0.26, + "step": 36040 + }, + { + "epoch": 1.6883402820068394, + "grad_norm": 0.6302484430550445, + "learning_rate": 3.116974028986339e-07, + "loss": 0.2793, + "step": 36041 + }, + { + "epoch": 1.6883871269967679, + "grad_norm": 0.5503513840629137, + "learning_rate": 3.116057053409732e-07, + "loss": 0.27, + "step": 36042 + }, + { + "epoch": 1.6884339719866959, + "grad_norm": 0.5769341671549914, + "learning_rate": 3.115140203769723e-07, + "loss": 0.2619, + "step": 36043 + }, + { + "epoch": 1.6884808169766243, + "grad_norm": 0.5720188441834549, + "learning_rate": 3.1142234800715765e-07, + "loss": 0.2622, + "step": 36044 + }, + { + "epoch": 1.6885276619665528, + "grad_norm": 0.6623359936282242, + "learning_rate": 3.1133068823205715e-07, + "loss": 0.2884, + "step": 36045 + }, + { + "epoch": 1.688574506956481, + "grad_norm": 0.6095996610166468, + "learning_rate": 3.112390410521987e-07, + "loss": 0.2863, + "step": 36046 + }, + { + "epoch": 1.6886213519464093, + "grad_norm": 0.5855207574334016, + "learning_rate": 3.1114740646810915e-07, + "loss": 0.2607, + "step": 36047 + }, + { + "epoch": 1.6886681969363377, + "grad_norm": 0.5801378733084978, + "learning_rate": 3.110557844803169e-07, + "loss": 0.2469, + "step": 36048 + }, + { + "epoch": 1.688715041926266, + "grad_norm": 0.5552995614167837, + "learning_rate": 3.109641750893483e-07, + "loss": 0.2578, + "step": 36049 + }, + { + "epoch": 1.6887618869161942, + "grad_norm": 0.6551859770544829, + "learning_rate": 3.108725782957303e-07, + "loss": 0.2862, + "step": 36050 + }, + { + "epoch": 1.6888087319061227, + "grad_norm": 0.6114713033794592, + "learning_rate": 3.1078099409999037e-07, + "loss": 0.2817, + "step": 36051 + }, + { + "epoch": 1.688855576896051, + "grad_norm": 0.6416774204613898, + "learning_rate": 3.106894225026555e-07, + "loss": 0.2802, + "step": 36052 + }, + { + "epoch": 1.6889024218859792, + "grad_norm": 0.6638878968789796, + "learning_rate": 3.1059786350425255e-07, + "loss": 0.2856, + "step": 36053 + }, + { + "epoch": 1.6889492668759076, + "grad_norm": 0.5966658147131552, + "learning_rate": 3.1050631710530936e-07, + "loss": 0.2738, + "step": 36054 + }, + { + "epoch": 1.688996111865836, + "grad_norm": 0.6161865499810032, + "learning_rate": 3.1041478330635146e-07, + "loss": 0.2414, + "step": 36055 + }, + { + "epoch": 1.689042956855764, + "grad_norm": 0.5839951885971055, + "learning_rate": 3.1032326210790667e-07, + "loss": 0.264, + "step": 36056 + }, + { + "epoch": 1.6890898018456926, + "grad_norm": 0.6011574040533554, + "learning_rate": 3.1023175351050067e-07, + "loss": 0.2729, + "step": 36057 + }, + { + "epoch": 1.689136646835621, + "grad_norm": 0.5930608499033382, + "learning_rate": 3.101402575146606e-07, + "loss": 0.2721, + "step": 36058 + }, + { + "epoch": 1.6891834918255493, + "grad_norm": 0.6020467392983465, + "learning_rate": 3.100487741209127e-07, + "loss": 0.2589, + "step": 36059 + }, + { + "epoch": 1.6892303368154775, + "grad_norm": 0.5796845045228342, + "learning_rate": 3.099573033297845e-07, + "loss": 0.2623, + "step": 36060 + }, + { + "epoch": 1.689277181805406, + "grad_norm": 0.5956651533695484, + "learning_rate": 3.0986584514180075e-07, + "loss": 0.2766, + "step": 36061 + }, + { + "epoch": 1.6893240267953342, + "grad_norm": 0.5875975633537666, + "learning_rate": 3.0977439955748895e-07, + "loss": 0.2675, + "step": 36062 + }, + { + "epoch": 1.6893708717852625, + "grad_norm": 0.6252163769568363, + "learning_rate": 3.096829665773751e-07, + "loss": 0.2768, + "step": 36063 + }, + { + "epoch": 1.689417716775191, + "grad_norm": 0.5801431476930723, + "learning_rate": 3.0959154620198496e-07, + "loss": 0.2729, + "step": 36064 + }, + { + "epoch": 1.6894645617651192, + "grad_norm": 0.6251743765245839, + "learning_rate": 3.0950013843184496e-07, + "loss": 0.2864, + "step": 36065 + }, + { + "epoch": 1.6895114067550474, + "grad_norm": 0.574640232524361, + "learning_rate": 3.094087432674817e-07, + "loss": 0.2669, + "step": 36066 + }, + { + "epoch": 1.6895582517449759, + "grad_norm": 0.5712275147348178, + "learning_rate": 3.093173607094199e-07, + "loss": 0.256, + "step": 36067 + }, + { + "epoch": 1.6896050967349043, + "grad_norm": 0.6280139747532463, + "learning_rate": 3.09225990758186e-07, + "loss": 0.2713, + "step": 36068 + }, + { + "epoch": 1.6896519417248326, + "grad_norm": 0.5882285180611888, + "learning_rate": 3.091346334143061e-07, + "loss": 0.2783, + "step": 36069 + }, + { + "epoch": 1.6896987867147608, + "grad_norm": 0.6544403092599872, + "learning_rate": 3.09043288678306e-07, + "loss": 0.2828, + "step": 36070 + }, + { + "epoch": 1.6897456317046893, + "grad_norm": 0.6040407591731144, + "learning_rate": 3.0895195655071067e-07, + "loss": 0.2738, + "step": 36071 + }, + { + "epoch": 1.6897924766946175, + "grad_norm": 0.6010175403002527, + "learning_rate": 3.088606370320468e-07, + "loss": 0.2696, + "step": 36072 + }, + { + "epoch": 1.6898393216845458, + "grad_norm": 0.6727896912193604, + "learning_rate": 3.0876933012283866e-07, + "loss": 0.2891, + "step": 36073 + }, + { + "epoch": 1.6898861666744742, + "grad_norm": 0.5976035025315901, + "learning_rate": 3.0867803582361217e-07, + "loss": 0.2594, + "step": 36074 + }, + { + "epoch": 1.6899330116644025, + "grad_norm": 0.6029320317630479, + "learning_rate": 3.0858675413489303e-07, + "loss": 0.2574, + "step": 36075 + }, + { + "epoch": 1.6899798566543307, + "grad_norm": 0.6023827912875942, + "learning_rate": 3.084954850572061e-07, + "loss": 0.2815, + "step": 36076 + }, + { + "epoch": 1.6900267016442592, + "grad_norm": 0.5798233630781647, + "learning_rate": 3.084042285910774e-07, + "loss": 0.2713, + "step": 36077 + }, + { + "epoch": 1.6900735466341876, + "grad_norm": 0.5783881914021547, + "learning_rate": 3.083129847370311e-07, + "loss": 0.2653, + "step": 36078 + }, + { + "epoch": 1.6901203916241156, + "grad_norm": 0.6193468243561269, + "learning_rate": 3.082217534955931e-07, + "loss": 0.268, + "step": 36079 + }, + { + "epoch": 1.690167236614044, + "grad_norm": 0.6219334169800773, + "learning_rate": 3.081305348672875e-07, + "loss": 0.2922, + "step": 36080 + }, + { + "epoch": 1.6902140816039726, + "grad_norm": 0.5850341922592103, + "learning_rate": 3.080393288526401e-07, + "loss": 0.2735, + "step": 36081 + }, + { + "epoch": 1.6902609265939008, + "grad_norm": 0.6368935411623627, + "learning_rate": 3.0794813545217516e-07, + "loss": 0.2705, + "step": 36082 + }, + { + "epoch": 1.690307771583829, + "grad_norm": 0.6165453918831041, + "learning_rate": 3.078569546664181e-07, + "loss": 0.2703, + "step": 36083 + }, + { + "epoch": 1.6903546165737575, + "grad_norm": 0.6482173497075102, + "learning_rate": 3.07765786495893e-07, + "loss": 0.2732, + "step": 36084 + }, + { + "epoch": 1.6904014615636858, + "grad_norm": 0.6386813425463279, + "learning_rate": 3.076746309411252e-07, + "loss": 0.2872, + "step": 36085 + }, + { + "epoch": 1.690448306553614, + "grad_norm": 0.5624859368662466, + "learning_rate": 3.0758348800263846e-07, + "loss": 0.2395, + "step": 36086 + }, + { + "epoch": 1.6904951515435425, + "grad_norm": 0.6010499909040099, + "learning_rate": 3.074923576809577e-07, + "loss": 0.2811, + "step": 36087 + }, + { + "epoch": 1.6905419965334707, + "grad_norm": 0.5894990595201652, + "learning_rate": 3.0740123997660745e-07, + "loss": 0.2644, + "step": 36088 + }, + { + "epoch": 1.690588841523399, + "grad_norm": 0.5983256993687674, + "learning_rate": 3.0731013489011196e-07, + "loss": 0.2796, + "step": 36089 + }, + { + "epoch": 1.6906356865133274, + "grad_norm": 0.590973228096219, + "learning_rate": 3.0721904242199593e-07, + "loss": 0.2695, + "step": 36090 + }, + { + "epoch": 1.6906825315032559, + "grad_norm": 0.6102028822183283, + "learning_rate": 3.0712796257278273e-07, + "loss": 0.2636, + "step": 36091 + }, + { + "epoch": 1.6907293764931839, + "grad_norm": 0.5897243791653661, + "learning_rate": 3.0703689534299725e-07, + "loss": 0.2854, + "step": 36092 + }, + { + "epoch": 1.6907762214831124, + "grad_norm": 0.5880206686817223, + "learning_rate": 3.0694584073316295e-07, + "loss": 0.2758, + "step": 36093 + }, + { + "epoch": 1.6908230664730408, + "grad_norm": 0.5863631477320526, + "learning_rate": 3.068547987438042e-07, + "loss": 0.2631, + "step": 36094 + }, + { + "epoch": 1.690869911462969, + "grad_norm": 0.5884320972346324, + "learning_rate": 3.0676376937544464e-07, + "loss": 0.263, + "step": 36095 + }, + { + "epoch": 1.6909167564528973, + "grad_norm": 0.6199741354039955, + "learning_rate": 3.066727526286084e-07, + "loss": 0.2867, + "step": 36096 + }, + { + "epoch": 1.6909636014428258, + "grad_norm": 0.5669273589203375, + "learning_rate": 3.065817485038197e-07, + "loss": 0.2653, + "step": 36097 + }, + { + "epoch": 1.691010446432754, + "grad_norm": 0.6035218702969097, + "learning_rate": 3.064907570016021e-07, + "loss": 0.265, + "step": 36098 + }, + { + "epoch": 1.6910572914226822, + "grad_norm": 0.575114587777447, + "learning_rate": 3.063997781224778e-07, + "loss": 0.2705, + "step": 36099 + }, + { + "epoch": 1.6911041364126107, + "grad_norm": 0.6221578284583227, + "learning_rate": 3.0630881186697186e-07, + "loss": 0.269, + "step": 36100 + }, + { + "epoch": 1.691150981402539, + "grad_norm": 0.562964855168202, + "learning_rate": 3.0621785823560704e-07, + "loss": 0.2595, + "step": 36101 + }, + { + "epoch": 1.6911978263924672, + "grad_norm": 0.6123758162955502, + "learning_rate": 3.061269172289072e-07, + "loss": 0.2788, + "step": 36102 + }, + { + "epoch": 1.6912446713823956, + "grad_norm": 0.555322417883215, + "learning_rate": 3.060359888473963e-07, + "loss": 0.2617, + "step": 36103 + }, + { + "epoch": 1.691291516372324, + "grad_norm": 0.5818000451635498, + "learning_rate": 3.059450730915961e-07, + "loss": 0.2592, + "step": 36104 + }, + { + "epoch": 1.6913383613622524, + "grad_norm": 0.57209631332979, + "learning_rate": 3.05854169962031e-07, + "loss": 0.265, + "step": 36105 + }, + { + "epoch": 1.6913852063521806, + "grad_norm": 0.5855477158349985, + "learning_rate": 3.057632794592233e-07, + "loss": 0.2761, + "step": 36106 + }, + { + "epoch": 1.691432051342109, + "grad_norm": 0.5712945890881309, + "learning_rate": 3.056724015836962e-07, + "loss": 0.2705, + "step": 36107 + }, + { + "epoch": 1.6914788963320373, + "grad_norm": 0.5652897686198978, + "learning_rate": 3.0558153633597295e-07, + "loss": 0.2451, + "step": 36108 + }, + { + "epoch": 1.6915257413219655, + "grad_norm": 0.5881778598396842, + "learning_rate": 3.054906837165772e-07, + "loss": 0.2592, + "step": 36109 + }, + { + "epoch": 1.691572586311894, + "grad_norm": 0.6261292022998545, + "learning_rate": 3.0539984372603025e-07, + "loss": 0.2806, + "step": 36110 + }, + { + "epoch": 1.6916194313018222, + "grad_norm": 0.6017156789168987, + "learning_rate": 3.053090163648556e-07, + "loss": 0.2798, + "step": 36111 + }, + { + "epoch": 1.6916662762917505, + "grad_norm": 0.6643310735079101, + "learning_rate": 3.052182016335767e-07, + "loss": 0.2811, + "step": 36112 + }, + { + "epoch": 1.691713121281679, + "grad_norm": 0.595307544534785, + "learning_rate": 3.0512739953271476e-07, + "loss": 0.2621, + "step": 36113 + }, + { + "epoch": 1.6917599662716074, + "grad_norm": 0.6570304346881615, + "learning_rate": 3.0503661006279274e-07, + "loss": 0.3022, + "step": 36114 + }, + { + "epoch": 1.6918068112615354, + "grad_norm": 0.5955264362687674, + "learning_rate": 3.049458332243341e-07, + "loss": 0.262, + "step": 36115 + }, + { + "epoch": 1.6918536562514639, + "grad_norm": 0.6117254963827429, + "learning_rate": 3.0485506901786006e-07, + "loss": 0.2799, + "step": 36116 + }, + { + "epoch": 1.6919005012413924, + "grad_norm": 0.6191522974534369, + "learning_rate": 3.04764317443893e-07, + "loss": 0.2821, + "step": 36117 + }, + { + "epoch": 1.6919473462313206, + "grad_norm": 0.6100704528338666, + "learning_rate": 3.046735785029559e-07, + "loss": 0.2641, + "step": 36118 + }, + { + "epoch": 1.6919941912212488, + "grad_norm": 0.5996731616468541, + "learning_rate": 3.04582852195571e-07, + "loss": 0.27, + "step": 36119 + }, + { + "epoch": 1.6920410362111773, + "grad_norm": 0.6052872676809328, + "learning_rate": 3.0449213852225936e-07, + "loss": 0.2663, + "step": 36120 + }, + { + "epoch": 1.6920878812011055, + "grad_norm": 0.6031296402777507, + "learning_rate": 3.044014374835444e-07, + "loss": 0.2891, + "step": 36121 + }, + { + "epoch": 1.6921347261910338, + "grad_norm": 0.5814733023466383, + "learning_rate": 3.043107490799466e-07, + "loss": 0.2566, + "step": 36122 + }, + { + "epoch": 1.6921815711809622, + "grad_norm": 0.5819969937145284, + "learning_rate": 3.0422007331198876e-07, + "loss": 0.2635, + "step": 36123 + }, + { + "epoch": 1.6922284161708905, + "grad_norm": 0.6157863951226898, + "learning_rate": 3.0412941018019227e-07, + "loss": 0.2673, + "step": 36124 + }, + { + "epoch": 1.6922752611608187, + "grad_norm": 0.6185935886088018, + "learning_rate": 3.040387596850791e-07, + "loss": 0.273, + "step": 36125 + }, + { + "epoch": 1.6923221061507472, + "grad_norm": 0.6071264707655785, + "learning_rate": 3.039481218271717e-07, + "loss": 0.2744, + "step": 36126 + }, + { + "epoch": 1.6923689511406756, + "grad_norm": 0.6011314957393615, + "learning_rate": 3.038574966069907e-07, + "loss": 0.2583, + "step": 36127 + }, + { + "epoch": 1.6924157961306037, + "grad_norm": 0.6257591883106902, + "learning_rate": 3.037668840250574e-07, + "loss": 0.2849, + "step": 36128 + }, + { + "epoch": 1.6924626411205321, + "grad_norm": 0.6403269408950132, + "learning_rate": 3.036762840818938e-07, + "loss": 0.2734, + "step": 36129 + }, + { + "epoch": 1.6925094861104606, + "grad_norm": 0.589679517878468, + "learning_rate": 3.03585696778021e-07, + "loss": 0.2609, + "step": 36130 + }, + { + "epoch": 1.6925563311003888, + "grad_norm": 0.5702706329005306, + "learning_rate": 3.0349512211396037e-07, + "loss": 0.2627, + "step": 36131 + }, + { + "epoch": 1.692603176090317, + "grad_norm": 0.5764887551231392, + "learning_rate": 3.0340456009023384e-07, + "loss": 0.2634, + "step": 36132 + }, + { + "epoch": 1.6926500210802455, + "grad_norm": 0.5854462043603623, + "learning_rate": 3.0331401070736147e-07, + "loss": 0.2832, + "step": 36133 + }, + { + "epoch": 1.6926968660701738, + "grad_norm": 0.6018277565952593, + "learning_rate": 3.0322347396586546e-07, + "loss": 0.2804, + "step": 36134 + }, + { + "epoch": 1.692743711060102, + "grad_norm": 0.5911887590940952, + "learning_rate": 3.031329498662658e-07, + "loss": 0.2731, + "step": 36135 + }, + { + "epoch": 1.6927905560500305, + "grad_norm": 0.6234306275689279, + "learning_rate": 3.0304243840908356e-07, + "loss": 0.2785, + "step": 36136 + }, + { + "epoch": 1.6928374010399587, + "grad_norm": 0.6294431771804753, + "learning_rate": 3.029519395948402e-07, + "loss": 0.2666, + "step": 36137 + }, + { + "epoch": 1.692884246029887, + "grad_norm": 0.6496885642693929, + "learning_rate": 3.028614534240559e-07, + "loss": 0.2826, + "step": 36138 + }, + { + "epoch": 1.6929310910198154, + "grad_norm": 0.5873778543198425, + "learning_rate": 3.0277097989725253e-07, + "loss": 0.2704, + "step": 36139 + }, + { + "epoch": 1.6929779360097439, + "grad_norm": 0.5630392132340523, + "learning_rate": 3.0268051901494953e-07, + "loss": 0.2637, + "step": 36140 + }, + { + "epoch": 1.6930247809996721, + "grad_norm": 0.6008601086307, + "learning_rate": 3.025900707776677e-07, + "loss": 0.2654, + "step": 36141 + }, + { + "epoch": 1.6930716259896004, + "grad_norm": 0.5835059646821277, + "learning_rate": 3.024996351859277e-07, + "loss": 0.2687, + "step": 36142 + }, + { + "epoch": 1.6931184709795288, + "grad_norm": 0.5809275465034568, + "learning_rate": 3.024092122402497e-07, + "loss": 0.2708, + "step": 36143 + }, + { + "epoch": 1.693165315969457, + "grad_norm": 0.5741676382816608, + "learning_rate": 3.0231880194115463e-07, + "loss": 0.2628, + "step": 36144 + }, + { + "epoch": 1.6932121609593853, + "grad_norm": 0.5751648783800236, + "learning_rate": 3.022284042891624e-07, + "loss": 0.2612, + "step": 36145 + }, + { + "epoch": 1.6932590059493138, + "grad_norm": 0.5945384220775697, + "learning_rate": 3.0213801928479364e-07, + "loss": 0.2773, + "step": 36146 + }, + { + "epoch": 1.693305850939242, + "grad_norm": 0.5970296280513894, + "learning_rate": 3.020476469285685e-07, + "loss": 0.2822, + "step": 36147 + }, + { + "epoch": 1.6933526959291703, + "grad_norm": 0.6141148807478449, + "learning_rate": 3.019572872210061e-07, + "loss": 0.2621, + "step": 36148 + }, + { + "epoch": 1.6933995409190987, + "grad_norm": 0.5846846001035167, + "learning_rate": 3.0186694016262673e-07, + "loss": 0.2657, + "step": 36149 + }, + { + "epoch": 1.6934463859090272, + "grad_norm": 0.588699317191996, + "learning_rate": 3.017766057539509e-07, + "loss": 0.2619, + "step": 36150 + }, + { + "epoch": 1.6934932308989552, + "grad_norm": 0.6208599520454092, + "learning_rate": 3.016862839954984e-07, + "loss": 0.2716, + "step": 36151 + }, + { + "epoch": 1.6935400758888837, + "grad_norm": 0.5742610625335758, + "learning_rate": 3.0159597488778916e-07, + "loss": 0.2586, + "step": 36152 + }, + { + "epoch": 1.6935869208788121, + "grad_norm": 0.566601324145463, + "learning_rate": 3.0150567843134194e-07, + "loss": 0.2715, + "step": 36153 + }, + { + "epoch": 1.6936337658687404, + "grad_norm": 0.6368065490752579, + "learning_rate": 3.014153946266776e-07, + "loss": 0.2689, + "step": 36154 + }, + { + "epoch": 1.6936806108586686, + "grad_norm": 0.5721314596428396, + "learning_rate": 3.0132512347431447e-07, + "loss": 0.257, + "step": 36155 + }, + { + "epoch": 1.693727455848597, + "grad_norm": 0.5742443776306966, + "learning_rate": 3.0123486497477263e-07, + "loss": 0.2704, + "step": 36156 + }, + { + "epoch": 1.6937743008385253, + "grad_norm": 0.6727850154459931, + "learning_rate": 3.011446191285713e-07, + "loss": 0.2878, + "step": 36157 + }, + { + "epoch": 1.6938211458284536, + "grad_norm": 0.630803607689654, + "learning_rate": 3.010543859362308e-07, + "loss": 0.2849, + "step": 36158 + }, + { + "epoch": 1.693867990818382, + "grad_norm": 0.5822127735644902, + "learning_rate": 3.009641653982692e-07, + "loss": 0.2572, + "step": 36159 + }, + { + "epoch": 1.6939148358083103, + "grad_norm": 0.6528789237056234, + "learning_rate": 3.0087395751520586e-07, + "loss": 0.2885, + "step": 36160 + }, + { + "epoch": 1.6939616807982385, + "grad_norm": 0.5985568726150787, + "learning_rate": 3.007837622875609e-07, + "loss": 0.2538, + "step": 36161 + }, + { + "epoch": 1.694008525788167, + "grad_norm": 0.6211589536642883, + "learning_rate": 3.006935797158522e-07, + "loss": 0.2666, + "step": 36162 + }, + { + "epoch": 1.6940553707780954, + "grad_norm": 0.5701744578610893, + "learning_rate": 3.006034098005989e-07, + "loss": 0.2766, + "step": 36163 + }, + { + "epoch": 1.6941022157680234, + "grad_norm": 0.5856613862364547, + "learning_rate": 3.005132525423207e-07, + "loss": 0.2618, + "step": 36164 + }, + { + "epoch": 1.694149060757952, + "grad_norm": 0.5670485429973946, + "learning_rate": 3.0042310794153565e-07, + "loss": 0.2683, + "step": 36165 + }, + { + "epoch": 1.6941959057478804, + "grad_norm": 0.5928377170711387, + "learning_rate": 3.0033297599876247e-07, + "loss": 0.2638, + "step": 36166 + }, + { + "epoch": 1.6942427507378086, + "grad_norm": 0.548542557308616, + "learning_rate": 3.0024285671452044e-07, + "loss": 0.2608, + "step": 36167 + }, + { + "epoch": 1.6942895957277369, + "grad_norm": 0.6028386745801464, + "learning_rate": 3.001527500893284e-07, + "loss": 0.2744, + "step": 36168 + }, + { + "epoch": 1.6943364407176653, + "grad_norm": 0.668123603102523, + "learning_rate": 3.0006265612370355e-07, + "loss": 0.2899, + "step": 36169 + }, + { + "epoch": 1.6943832857075936, + "grad_norm": 0.5900506235312725, + "learning_rate": 2.999725748181659e-07, + "loss": 0.2675, + "step": 36170 + }, + { + "epoch": 1.6944301306975218, + "grad_norm": 0.6639837292669857, + "learning_rate": 2.9988250617323247e-07, + "loss": 0.2942, + "step": 36171 + }, + { + "epoch": 1.6944769756874503, + "grad_norm": 0.6129591104630397, + "learning_rate": 2.9979245018942257e-07, + "loss": 0.2768, + "step": 36172 + }, + { + "epoch": 1.6945238206773785, + "grad_norm": 0.5304105582964518, + "learning_rate": 2.997024068672538e-07, + "loss": 0.2556, + "step": 36173 + }, + { + "epoch": 1.6945706656673067, + "grad_norm": 0.64840056534303, + "learning_rate": 2.996123762072448e-07, + "loss": 0.2728, + "step": 36174 + }, + { + "epoch": 1.6946175106572352, + "grad_norm": 0.6114298756793063, + "learning_rate": 2.995223582099138e-07, + "loss": 0.2672, + "step": 36175 + }, + { + "epoch": 1.6946643556471637, + "grad_norm": 0.5739720366194421, + "learning_rate": 2.9943235287577895e-07, + "loss": 0.2678, + "step": 36176 + }, + { + "epoch": 1.694711200637092, + "grad_norm": 0.5544624952716409, + "learning_rate": 2.993423602053569e-07, + "loss": 0.2566, + "step": 36177 + }, + { + "epoch": 1.6947580456270201, + "grad_norm": 0.590498961290238, + "learning_rate": 2.9925238019916676e-07, + "loss": 0.2778, + "step": 36178 + }, + { + "epoch": 1.6948048906169486, + "grad_norm": 0.6333830405601946, + "learning_rate": 2.9916241285772584e-07, + "loss": 0.2786, + "step": 36179 + }, + { + "epoch": 1.6948517356068769, + "grad_norm": 0.5834922592032052, + "learning_rate": 2.990724581815521e-07, + "loss": 0.2618, + "step": 36180 + }, + { + "epoch": 1.694898580596805, + "grad_norm": 0.5769458663518608, + "learning_rate": 2.9898251617116404e-07, + "loss": 0.2593, + "step": 36181 + }, + { + "epoch": 1.6949454255867336, + "grad_norm": 0.5951506793266533, + "learning_rate": 2.9889258682707755e-07, + "loss": 0.2572, + "step": 36182 + }, + { + "epoch": 1.6949922705766618, + "grad_norm": 0.6128350277744053, + "learning_rate": 2.9880267014981143e-07, + "loss": 0.2763, + "step": 36183 + }, + { + "epoch": 1.69503911556659, + "grad_norm": 0.5874638342628494, + "learning_rate": 2.987127661398823e-07, + "loss": 0.2613, + "step": 36184 + }, + { + "epoch": 1.6950859605565185, + "grad_norm": 0.5907260004668204, + "learning_rate": 2.986228747978079e-07, + "loss": 0.2644, + "step": 36185 + }, + { + "epoch": 1.695132805546447, + "grad_norm": 0.5713268196232987, + "learning_rate": 2.985329961241057e-07, + "loss": 0.268, + "step": 36186 + }, + { + "epoch": 1.695179650536375, + "grad_norm": 0.6081121422919128, + "learning_rate": 2.984431301192928e-07, + "loss": 0.2644, + "step": 36187 + }, + { + "epoch": 1.6952264955263034, + "grad_norm": 0.5850730026263091, + "learning_rate": 2.983532767838865e-07, + "loss": 0.2726, + "step": 36188 + }, + { + "epoch": 1.695273340516232, + "grad_norm": 0.6068353398000516, + "learning_rate": 2.982634361184042e-07, + "loss": 0.2722, + "step": 36189 + }, + { + "epoch": 1.6953201855061601, + "grad_norm": 0.6048921397066563, + "learning_rate": 2.981736081233616e-07, + "loss": 0.2857, + "step": 36190 + }, + { + "epoch": 1.6953670304960884, + "grad_norm": 0.5912377031785755, + "learning_rate": 2.980837927992766e-07, + "loss": 0.2653, + "step": 36191 + }, + { + "epoch": 1.6954138754860169, + "grad_norm": 0.551585131290671, + "learning_rate": 2.979939901466658e-07, + "loss": 0.2664, + "step": 36192 + }, + { + "epoch": 1.695460720475945, + "grad_norm": 0.5866179645295098, + "learning_rate": 2.9790420016604623e-07, + "loss": 0.2659, + "step": 36193 + }, + { + "epoch": 1.6955075654658733, + "grad_norm": 0.5785835535659336, + "learning_rate": 2.978144228579352e-07, + "loss": 0.2636, + "step": 36194 + }, + { + "epoch": 1.6955544104558018, + "grad_norm": 0.6001291451831724, + "learning_rate": 2.9772465822284797e-07, + "loss": 0.2736, + "step": 36195 + }, + { + "epoch": 1.69560125544573, + "grad_norm": 0.5613889095838571, + "learning_rate": 2.9763490626130237e-07, + "loss": 0.2609, + "step": 36196 + }, + { + "epoch": 1.6956481004356583, + "grad_norm": 0.5749316785439895, + "learning_rate": 2.975451669738139e-07, + "loss": 0.2737, + "step": 36197 + }, + { + "epoch": 1.6956949454255867, + "grad_norm": 0.5883405715945964, + "learning_rate": 2.9745544036089924e-07, + "loss": 0.2626, + "step": 36198 + }, + { + "epoch": 1.6957417904155152, + "grad_norm": 0.629403222776839, + "learning_rate": 2.9736572642307504e-07, + "loss": 0.2837, + "step": 36199 + }, + { + "epoch": 1.6957886354054432, + "grad_norm": 0.607657549029781, + "learning_rate": 2.972760251608578e-07, + "loss": 0.2782, + "step": 36200 + }, + { + "epoch": 1.6958354803953717, + "grad_norm": 0.6585334909720489, + "learning_rate": 2.971863365747632e-07, + "loss": 0.2702, + "step": 36201 + }, + { + "epoch": 1.6958823253853001, + "grad_norm": 0.5982044127572083, + "learning_rate": 2.970966606653075e-07, + "loss": 0.2674, + "step": 36202 + }, + { + "epoch": 1.6959291703752284, + "grad_norm": 0.606960626426604, + "learning_rate": 2.9700699743300726e-07, + "loss": 0.2836, + "step": 36203 + }, + { + "epoch": 1.6959760153651566, + "grad_norm": 0.5978502877455989, + "learning_rate": 2.969173468783776e-07, + "loss": 0.2607, + "step": 36204 + }, + { + "epoch": 1.696022860355085, + "grad_norm": 0.5565844103145687, + "learning_rate": 2.968277090019348e-07, + "loss": 0.2512, + "step": 36205 + }, + { + "epoch": 1.6960697053450133, + "grad_norm": 0.5854628722733918, + "learning_rate": 2.967380838041953e-07, + "loss": 0.281, + "step": 36206 + }, + { + "epoch": 1.6961165503349416, + "grad_norm": 0.6334091959949814, + "learning_rate": 2.9664847128567405e-07, + "loss": 0.2893, + "step": 36207 + }, + { + "epoch": 1.69616339532487, + "grad_norm": 0.5901421925933649, + "learning_rate": 2.9655887144688673e-07, + "loss": 0.2758, + "step": 36208 + }, + { + "epoch": 1.6962102403147983, + "grad_norm": 0.5984700817065747, + "learning_rate": 2.964692842883496e-07, + "loss": 0.2667, + "step": 36209 + }, + { + "epoch": 1.6962570853047265, + "grad_norm": 0.6456163726848946, + "learning_rate": 2.9637970981057804e-07, + "loss": 0.2679, + "step": 36210 + }, + { + "epoch": 1.696303930294655, + "grad_norm": 0.5866832853515109, + "learning_rate": 2.962901480140873e-07, + "loss": 0.2692, + "step": 36211 + }, + { + "epoch": 1.6963507752845834, + "grad_norm": 0.6161550547443451, + "learning_rate": 2.962005988993932e-07, + "loss": 0.2624, + "step": 36212 + }, + { + "epoch": 1.6963976202745117, + "grad_norm": 0.5982660507811906, + "learning_rate": 2.961110624670102e-07, + "loss": 0.2833, + "step": 36213 + }, + { + "epoch": 1.69644446526444, + "grad_norm": 0.6240198160325972, + "learning_rate": 2.960215387174542e-07, + "loss": 0.2781, + "step": 36214 + }, + { + "epoch": 1.6964913102543684, + "grad_norm": 0.5665776154948778, + "learning_rate": 2.9593202765124035e-07, + "loss": 0.261, + "step": 36215 + }, + { + "epoch": 1.6965381552442966, + "grad_norm": 0.6007223979132046, + "learning_rate": 2.958425292688835e-07, + "loss": 0.2618, + "step": 36216 + }, + { + "epoch": 1.6965850002342249, + "grad_norm": 0.6026734410100066, + "learning_rate": 2.957530435708994e-07, + "loss": 0.2717, + "step": 36217 + }, + { + "epoch": 1.6966318452241533, + "grad_norm": 0.6072274119496206, + "learning_rate": 2.956635705578023e-07, + "loss": 0.2689, + "step": 36218 + }, + { + "epoch": 1.6966786902140816, + "grad_norm": 0.6070593517384396, + "learning_rate": 2.955741102301077e-07, + "loss": 0.2803, + "step": 36219 + }, + { + "epoch": 1.6967255352040098, + "grad_norm": 0.5792110696733043, + "learning_rate": 2.9548466258832925e-07, + "loss": 0.2578, + "step": 36220 + }, + { + "epoch": 1.6967723801939383, + "grad_norm": 0.6234694566584076, + "learning_rate": 2.9539522763298275e-07, + "loss": 0.2712, + "step": 36221 + }, + { + "epoch": 1.6968192251838667, + "grad_norm": 0.6301978110914713, + "learning_rate": 2.953058053645827e-07, + "loss": 0.2856, + "step": 36222 + }, + { + "epoch": 1.6968660701737948, + "grad_norm": 0.5790859933256747, + "learning_rate": 2.9521639578364404e-07, + "loss": 0.2674, + "step": 36223 + }, + { + "epoch": 1.6969129151637232, + "grad_norm": 0.6322973151709224, + "learning_rate": 2.9512699889068017e-07, + "loss": 0.2636, + "step": 36224 + }, + { + "epoch": 1.6969597601536517, + "grad_norm": 0.6020985793090692, + "learning_rate": 2.950376146862069e-07, + "loss": 0.2645, + "step": 36225 + }, + { + "epoch": 1.69700660514358, + "grad_norm": 0.609490741203259, + "learning_rate": 2.949482431707376e-07, + "loss": 0.2803, + "step": 36226 + }, + { + "epoch": 1.6970534501335082, + "grad_norm": 0.6041736115062075, + "learning_rate": 2.948588843447867e-07, + "loss": 0.2661, + "step": 36227 + }, + { + "epoch": 1.6971002951234366, + "grad_norm": 0.6211424743501965, + "learning_rate": 2.947695382088686e-07, + "loss": 0.2757, + "step": 36228 + }, + { + "epoch": 1.6971471401133649, + "grad_norm": 0.5684847847913618, + "learning_rate": 2.946802047634978e-07, + "loss": 0.2703, + "step": 36229 + }, + { + "epoch": 1.6971939851032931, + "grad_norm": 0.612401595364737, + "learning_rate": 2.9459088400918853e-07, + "loss": 0.2771, + "step": 36230 + }, + { + "epoch": 1.6972408300932216, + "grad_norm": 0.6340861191833204, + "learning_rate": 2.9450157594645396e-07, + "loss": 0.2863, + "step": 36231 + }, + { + "epoch": 1.6972876750831498, + "grad_norm": 0.5998174703183352, + "learning_rate": 2.94412280575809e-07, + "loss": 0.2867, + "step": 36232 + }, + { + "epoch": 1.697334520073078, + "grad_norm": 0.5964980287484962, + "learning_rate": 2.943229978977666e-07, + "loss": 0.273, + "step": 36233 + }, + { + "epoch": 1.6973813650630065, + "grad_norm": 0.6028177763139378, + "learning_rate": 2.942337279128407e-07, + "loss": 0.2722, + "step": 36234 + }, + { + "epoch": 1.697428210052935, + "grad_norm": 0.597495355871772, + "learning_rate": 2.9414447062154545e-07, + "loss": 0.2703, + "step": 36235 + }, + { + "epoch": 1.697475055042863, + "grad_norm": 0.6362486473592446, + "learning_rate": 2.9405522602439425e-07, + "loss": 0.2865, + "step": 36236 + }, + { + "epoch": 1.6975219000327915, + "grad_norm": 0.570757541578886, + "learning_rate": 2.939659941219014e-07, + "loss": 0.2742, + "step": 36237 + }, + { + "epoch": 1.69756874502272, + "grad_norm": 0.5905306173915901, + "learning_rate": 2.938767749145799e-07, + "loss": 0.269, + "step": 36238 + }, + { + "epoch": 1.6976155900126482, + "grad_norm": 0.5675151521692688, + "learning_rate": 2.937875684029423e-07, + "loss": 0.2619, + "step": 36239 + }, + { + "epoch": 1.6976624350025764, + "grad_norm": 0.6048240672800785, + "learning_rate": 2.9369837458750297e-07, + "loss": 0.2636, + "step": 36240 + }, + { + "epoch": 1.6977092799925049, + "grad_norm": 0.5813653206581598, + "learning_rate": 2.9360919346877485e-07, + "loss": 0.2612, + "step": 36241 + }, + { + "epoch": 1.6977561249824331, + "grad_norm": 0.602272159005436, + "learning_rate": 2.9352002504727114e-07, + "loss": 0.2625, + "step": 36242 + }, + { + "epoch": 1.6978029699723614, + "grad_norm": 0.6318171509336931, + "learning_rate": 2.934308693235058e-07, + "loss": 0.2775, + "step": 36243 + }, + { + "epoch": 1.6978498149622898, + "grad_norm": 0.6173105609086665, + "learning_rate": 2.9334172629799066e-07, + "loss": 0.276, + "step": 36244 + }, + { + "epoch": 1.697896659952218, + "grad_norm": 0.6457595813585062, + "learning_rate": 2.9325259597123985e-07, + "loss": 0.2951, + "step": 36245 + }, + { + "epoch": 1.6979435049421463, + "grad_norm": 0.6058188742501261, + "learning_rate": 2.9316347834376507e-07, + "loss": 0.2785, + "step": 36246 + }, + { + "epoch": 1.6979903499320748, + "grad_norm": 0.5932718702764644, + "learning_rate": 2.9307437341607984e-07, + "loss": 0.2694, + "step": 36247 + }, + { + "epoch": 1.6980371949220032, + "grad_norm": 0.5726601342163179, + "learning_rate": 2.929852811886968e-07, + "loss": 0.2621, + "step": 36248 + }, + { + "epoch": 1.6980840399119315, + "grad_norm": 0.5693077639166481, + "learning_rate": 2.928962016621295e-07, + "loss": 0.2607, + "step": 36249 + }, + { + "epoch": 1.6981308849018597, + "grad_norm": 0.610227885510809, + "learning_rate": 2.928071348368891e-07, + "loss": 0.2795, + "step": 36250 + }, + { + "epoch": 1.6981777298917882, + "grad_norm": 0.5695864808796695, + "learning_rate": 2.9271808071348885e-07, + "loss": 0.2532, + "step": 36251 + }, + { + "epoch": 1.6982245748817164, + "grad_norm": 0.592860797247388, + "learning_rate": 2.926290392924419e-07, + "loss": 0.2625, + "step": 36252 + }, + { + "epoch": 1.6982714198716446, + "grad_norm": 0.6588543976242733, + "learning_rate": 2.9254001057425957e-07, + "loss": 0.2877, + "step": 36253 + }, + { + "epoch": 1.6983182648615731, + "grad_norm": 0.5981592626481109, + "learning_rate": 2.9245099455945473e-07, + "loss": 0.2661, + "step": 36254 + }, + { + "epoch": 1.6983651098515014, + "grad_norm": 0.5774534567000736, + "learning_rate": 2.923619912485398e-07, + "loss": 0.2683, + "step": 36255 + }, + { + "epoch": 1.6984119548414296, + "grad_norm": 0.5635942730270026, + "learning_rate": 2.922730006420263e-07, + "loss": 0.2704, + "step": 36256 + }, + { + "epoch": 1.698458799831358, + "grad_norm": 0.5965878045584431, + "learning_rate": 2.9218402274042685e-07, + "loss": 0.262, + "step": 36257 + }, + { + "epoch": 1.6985056448212865, + "grad_norm": 0.592133371314166, + "learning_rate": 2.9209505754425355e-07, + "loss": 0.2614, + "step": 36258 + }, + { + "epoch": 1.6985524898112145, + "grad_norm": 0.649517196744356, + "learning_rate": 2.9200610505401877e-07, + "loss": 0.2895, + "step": 36259 + }, + { + "epoch": 1.698599334801143, + "grad_norm": 0.6004629281631674, + "learning_rate": 2.9191716527023323e-07, + "loss": 0.2726, + "step": 36260 + }, + { + "epoch": 1.6986461797910715, + "grad_norm": 0.5811456804903516, + "learning_rate": 2.918282381934098e-07, + "loss": 0.2703, + "step": 36261 + }, + { + "epoch": 1.6986930247809997, + "grad_norm": 0.6180782828016412, + "learning_rate": 2.917393238240596e-07, + "loss": 0.2792, + "step": 36262 + }, + { + "epoch": 1.698739869770928, + "grad_norm": 0.5896942503976698, + "learning_rate": 2.9165042216269453e-07, + "loss": 0.2841, + "step": 36263 + }, + { + "epoch": 1.6987867147608564, + "grad_norm": 0.5907612696027954, + "learning_rate": 2.915615332098262e-07, + "loss": 0.2694, + "step": 36264 + }, + { + "epoch": 1.6988335597507846, + "grad_norm": 0.5498783992390388, + "learning_rate": 2.914726569659662e-07, + "loss": 0.2585, + "step": 36265 + }, + { + "epoch": 1.698880404740713, + "grad_norm": 0.5758447270184767, + "learning_rate": 2.9138379343162634e-07, + "loss": 0.2685, + "step": 36266 + }, + { + "epoch": 1.6989272497306414, + "grad_norm": 0.6180588750652912, + "learning_rate": 2.912949426073178e-07, + "loss": 0.2781, + "step": 36267 + }, + { + "epoch": 1.6989740947205696, + "grad_norm": 0.5878057277163398, + "learning_rate": 2.912061044935513e-07, + "loss": 0.2736, + "step": 36268 + }, + { + "epoch": 1.6990209397104978, + "grad_norm": 0.5721158460896016, + "learning_rate": 2.911172790908384e-07, + "loss": 0.2485, + "step": 36269 + }, + { + "epoch": 1.6990677847004263, + "grad_norm": 0.6309390771580153, + "learning_rate": 2.910284663996901e-07, + "loss": 0.2679, + "step": 36270 + }, + { + "epoch": 1.6991146296903548, + "grad_norm": 0.6407700469362478, + "learning_rate": 2.9093966642061823e-07, + "loss": 0.2825, + "step": 36271 + }, + { + "epoch": 1.6991614746802828, + "grad_norm": 0.5964101727325815, + "learning_rate": 2.908508791541337e-07, + "loss": 0.2711, + "step": 36272 + }, + { + "epoch": 1.6992083196702112, + "grad_norm": 0.623569386907135, + "learning_rate": 2.9076210460074637e-07, + "loss": 0.2709, + "step": 36273 + }, + { + "epoch": 1.6992551646601397, + "grad_norm": 0.6202236457701324, + "learning_rate": 2.906733427609687e-07, + "loss": 0.2742, + "step": 36274 + }, + { + "epoch": 1.699302009650068, + "grad_norm": 0.6057700113535884, + "learning_rate": 2.9058459363530995e-07, + "loss": 0.2681, + "step": 36275 + }, + { + "epoch": 1.6993488546399962, + "grad_norm": 0.6144893360217785, + "learning_rate": 2.9049585722428164e-07, + "loss": 0.2843, + "step": 36276 + }, + { + "epoch": 1.6993956996299246, + "grad_norm": 0.6398714129756802, + "learning_rate": 2.9040713352839423e-07, + "loss": 0.2909, + "step": 36277 + }, + { + "epoch": 1.699442544619853, + "grad_norm": 0.600826470888636, + "learning_rate": 2.903184225481584e-07, + "loss": 0.2687, + "step": 36278 + }, + { + "epoch": 1.6994893896097811, + "grad_norm": 0.5600522081394493, + "learning_rate": 2.9022972428408514e-07, + "loss": 0.2579, + "step": 36279 + }, + { + "epoch": 1.6995362345997096, + "grad_norm": 0.6230028009728872, + "learning_rate": 2.901410387366846e-07, + "loss": 0.2643, + "step": 36280 + }, + { + "epoch": 1.6995830795896378, + "grad_norm": 0.5671546374478874, + "learning_rate": 2.9005236590646637e-07, + "loss": 0.2741, + "step": 36281 + }, + { + "epoch": 1.699629924579566, + "grad_norm": 0.65119875626123, + "learning_rate": 2.899637057939414e-07, + "loss": 0.2683, + "step": 36282 + }, + { + "epoch": 1.6996767695694945, + "grad_norm": 0.6127529879612883, + "learning_rate": 2.8987505839961963e-07, + "loss": 0.2867, + "step": 36283 + }, + { + "epoch": 1.699723614559423, + "grad_norm": 0.6039643597795453, + "learning_rate": 2.897864237240114e-07, + "loss": 0.2706, + "step": 36284 + }, + { + "epoch": 1.6997704595493512, + "grad_norm": 0.5695836511169218, + "learning_rate": 2.896978017676269e-07, + "loss": 0.2708, + "step": 36285 + }, + { + "epoch": 1.6998173045392795, + "grad_norm": 0.5740197697231639, + "learning_rate": 2.8960919253097657e-07, + "loss": 0.265, + "step": 36286 + }, + { + "epoch": 1.699864149529208, + "grad_norm": 0.6078947166541949, + "learning_rate": 2.895205960145697e-07, + "loss": 0.2722, + "step": 36287 + }, + { + "epoch": 1.6999109945191362, + "grad_norm": 0.6070528390079918, + "learning_rate": 2.8943201221891594e-07, + "loss": 0.2628, + "step": 36288 + }, + { + "epoch": 1.6999578395090644, + "grad_norm": 0.6253234942368335, + "learning_rate": 2.8934344114452506e-07, + "loss": 0.2815, + "step": 36289 + }, + { + "epoch": 1.700004684498993, + "grad_norm": 0.5971196085148487, + "learning_rate": 2.89254882791907e-07, + "loss": 0.2724, + "step": 36290 + }, + { + "epoch": 1.7000515294889211, + "grad_norm": 0.6262719899538622, + "learning_rate": 2.8916633716157163e-07, + "loss": 0.2828, + "step": 36291 + }, + { + "epoch": 1.7000983744788494, + "grad_norm": 0.6050395566427099, + "learning_rate": 2.8907780425402875e-07, + "loss": 0.2753, + "step": 36292 + }, + { + "epoch": 1.7001452194687778, + "grad_norm": 0.5686345240972632, + "learning_rate": 2.8898928406978693e-07, + "loss": 0.2577, + "step": 36293 + }, + { + "epoch": 1.7001920644587063, + "grad_norm": 0.6128568688088972, + "learning_rate": 2.8890077660935657e-07, + "loss": 0.268, + "step": 36294 + }, + { + "epoch": 1.7002389094486343, + "grad_norm": 0.5847574038714748, + "learning_rate": 2.888122818732461e-07, + "loss": 0.2675, + "step": 36295 + }, + { + "epoch": 1.7002857544385628, + "grad_norm": 0.5398904537129029, + "learning_rate": 2.887237998619649e-07, + "loss": 0.258, + "step": 36296 + }, + { + "epoch": 1.7003325994284912, + "grad_norm": 0.5929564670397697, + "learning_rate": 2.8863533057602255e-07, + "loss": 0.2725, + "step": 36297 + }, + { + "epoch": 1.7003794444184195, + "grad_norm": 0.6264096611087119, + "learning_rate": 2.885468740159289e-07, + "loss": 0.2885, + "step": 36298 + }, + { + "epoch": 1.7004262894083477, + "grad_norm": 0.5976437928063156, + "learning_rate": 2.8845843018219133e-07, + "loss": 0.2564, + "step": 36299 + }, + { + "epoch": 1.7004731343982762, + "grad_norm": 0.6368954481682435, + "learning_rate": 2.883699990753197e-07, + "loss": 0.2806, + "step": 36300 + }, + { + "epoch": 1.7005199793882044, + "grad_norm": 0.6009992602898594, + "learning_rate": 2.882815806958233e-07, + "loss": 0.264, + "step": 36301 + }, + { + "epoch": 1.7005668243781327, + "grad_norm": 0.6067283055518845, + "learning_rate": 2.8819317504421016e-07, + "loss": 0.2669, + "step": 36302 + }, + { + "epoch": 1.7006136693680611, + "grad_norm": 0.5739247508803681, + "learning_rate": 2.8810478212098926e-07, + "loss": 0.2684, + "step": 36303 + }, + { + "epoch": 1.7006605143579894, + "grad_norm": 0.615808186402752, + "learning_rate": 2.8801640192666985e-07, + "loss": 0.2759, + "step": 36304 + }, + { + "epoch": 1.7007073593479176, + "grad_norm": 0.583937770998211, + "learning_rate": 2.8792803446175967e-07, + "loss": 0.2692, + "step": 36305 + }, + { + "epoch": 1.700754204337846, + "grad_norm": 0.5739801214326995, + "learning_rate": 2.878396797267677e-07, + "loss": 0.258, + "step": 36306 + }, + { + "epoch": 1.7008010493277745, + "grad_norm": 0.6180403218957568, + "learning_rate": 2.877513377222024e-07, + "loss": 0.2668, + "step": 36307 + }, + { + "epoch": 1.7008478943177026, + "grad_norm": 0.583050901555242, + "learning_rate": 2.8766300844857264e-07, + "loss": 0.2738, + "step": 36308 + }, + { + "epoch": 1.700894739307631, + "grad_norm": 0.6007344210813848, + "learning_rate": 2.8757469190638544e-07, + "loss": 0.268, + "step": 36309 + }, + { + "epoch": 1.7009415842975595, + "grad_norm": 0.5618828779780043, + "learning_rate": 2.8748638809615064e-07, + "loss": 0.2626, + "step": 36310 + }, + { + "epoch": 1.7009884292874877, + "grad_norm": 0.5719804098076209, + "learning_rate": 2.8739809701837487e-07, + "loss": 0.2626, + "step": 36311 + }, + { + "epoch": 1.701035274277416, + "grad_norm": 0.6657652276846578, + "learning_rate": 2.8730981867356703e-07, + "loss": 0.2918, + "step": 36312 + }, + { + "epoch": 1.7010821192673444, + "grad_norm": 0.5741931187062903, + "learning_rate": 2.872215530622349e-07, + "loss": 0.2577, + "step": 36313 + }, + { + "epoch": 1.7011289642572727, + "grad_norm": 0.5792251758620782, + "learning_rate": 2.871333001848869e-07, + "loss": 0.2645, + "step": 36314 + }, + { + "epoch": 1.701175809247201, + "grad_norm": 0.5767690983639526, + "learning_rate": 2.870450600420308e-07, + "loss": 0.2601, + "step": 36315 + }, + { + "epoch": 1.7012226542371294, + "grad_norm": 0.6075744174852779, + "learning_rate": 2.869568326341743e-07, + "loss": 0.2597, + "step": 36316 + }, + { + "epoch": 1.7012694992270576, + "grad_norm": 0.5781418066655729, + "learning_rate": 2.868686179618244e-07, + "loss": 0.2567, + "step": 36317 + }, + { + "epoch": 1.7013163442169859, + "grad_norm": 0.5701194132190549, + "learning_rate": 2.8678041602548945e-07, + "loss": 0.252, + "step": 36318 + }, + { + "epoch": 1.7013631892069143, + "grad_norm": 0.6073204777291372, + "learning_rate": 2.8669222682567693e-07, + "loss": 0.2659, + "step": 36319 + }, + { + "epoch": 1.7014100341968428, + "grad_norm": 0.5817099760522971, + "learning_rate": 2.866040503628942e-07, + "loss": 0.274, + "step": 36320 + }, + { + "epoch": 1.701456879186771, + "grad_norm": 0.6062177695982643, + "learning_rate": 2.8651588663764937e-07, + "loss": 0.2727, + "step": 36321 + }, + { + "epoch": 1.7015037241766993, + "grad_norm": 0.5943029383816502, + "learning_rate": 2.8642773565044914e-07, + "loss": 0.2606, + "step": 36322 + }, + { + "epoch": 1.7015505691666277, + "grad_norm": 0.6207081400392075, + "learning_rate": 2.86339597401801e-07, + "loss": 0.2757, + "step": 36323 + }, + { + "epoch": 1.701597414156556, + "grad_norm": 0.5949681108826542, + "learning_rate": 2.862514718922119e-07, + "loss": 0.2712, + "step": 36324 + }, + { + "epoch": 1.7016442591464842, + "grad_norm": 0.538090445177019, + "learning_rate": 2.8616335912218876e-07, + "loss": 0.2441, + "step": 36325 + }, + { + "epoch": 1.7016911041364127, + "grad_norm": 0.6190254471591321, + "learning_rate": 2.8607525909223944e-07, + "loss": 0.2787, + "step": 36326 + }, + { + "epoch": 1.701737949126341, + "grad_norm": 0.557241609338996, + "learning_rate": 2.8598717180287033e-07, + "loss": 0.2821, + "step": 36327 + }, + { + "epoch": 1.7017847941162692, + "grad_norm": 0.6179030232747089, + "learning_rate": 2.858990972545894e-07, + "loss": 0.2703, + "step": 36328 + }, + { + "epoch": 1.7018316391061976, + "grad_norm": 0.6016215330702591, + "learning_rate": 2.858110354479024e-07, + "loss": 0.2666, + "step": 36329 + }, + { + "epoch": 1.701878484096126, + "grad_norm": 0.5782707182031441, + "learning_rate": 2.857229863833158e-07, + "loss": 0.269, + "step": 36330 + }, + { + "epoch": 1.701925329086054, + "grad_norm": 0.5657240091181537, + "learning_rate": 2.85634950061337e-07, + "loss": 0.2538, + "step": 36331 + }, + { + "epoch": 1.7019721740759826, + "grad_norm": 0.6270135974392901, + "learning_rate": 2.8554692648247254e-07, + "loss": 0.2749, + "step": 36332 + }, + { + "epoch": 1.702019019065911, + "grad_norm": 0.5920409310987423, + "learning_rate": 2.85458915647229e-07, + "loss": 0.2762, + "step": 36333 + }, + { + "epoch": 1.7020658640558393, + "grad_norm": 0.5514089806175919, + "learning_rate": 2.8537091755611314e-07, + "loss": 0.2417, + "step": 36334 + }, + { + "epoch": 1.7021127090457675, + "grad_norm": 0.6220941483497644, + "learning_rate": 2.8528293220963067e-07, + "loss": 0.2834, + "step": 36335 + }, + { + "epoch": 1.702159554035696, + "grad_norm": 0.613459711034588, + "learning_rate": 2.851949596082887e-07, + "loss": 0.2724, + "step": 36336 + }, + { + "epoch": 1.7022063990256242, + "grad_norm": 0.5807037267541654, + "learning_rate": 2.851069997525924e-07, + "loss": 0.2594, + "step": 36337 + }, + { + "epoch": 1.7022532440155524, + "grad_norm": 0.5629995868428491, + "learning_rate": 2.850190526430488e-07, + "loss": 0.266, + "step": 36338 + }, + { + "epoch": 1.702300089005481, + "grad_norm": 0.6079896661797198, + "learning_rate": 2.8493111828016396e-07, + "loss": 0.2737, + "step": 36339 + }, + { + "epoch": 1.7023469339954092, + "grad_norm": 0.6134946754613577, + "learning_rate": 2.848431966644441e-07, + "loss": 0.2726, + "step": 36340 + }, + { + "epoch": 1.7023937789853374, + "grad_norm": 0.59985406484913, + "learning_rate": 2.8475528779639437e-07, + "loss": 0.259, + "step": 36341 + }, + { + "epoch": 1.7024406239752659, + "grad_norm": 0.6277440566027666, + "learning_rate": 2.8466739167652133e-07, + "loss": 0.2743, + "step": 36342 + }, + { + "epoch": 1.7024874689651943, + "grad_norm": 0.6065702745990028, + "learning_rate": 2.84579508305331e-07, + "loss": 0.2748, + "step": 36343 + }, + { + "epoch": 1.7025343139551223, + "grad_norm": 0.615871430248117, + "learning_rate": 2.8449163768332846e-07, + "loss": 0.2812, + "step": 36344 + }, + { + "epoch": 1.7025811589450508, + "grad_norm": 0.5762254382602299, + "learning_rate": 2.844037798110194e-07, + "loss": 0.2692, + "step": 36345 + }, + { + "epoch": 1.7026280039349793, + "grad_norm": 0.5734210988956614, + "learning_rate": 2.8431593468891044e-07, + "loss": 0.2575, + "step": 36346 + }, + { + "epoch": 1.7026748489249075, + "grad_norm": 0.5840919911613703, + "learning_rate": 2.8422810231750585e-07, + "loss": 0.2561, + "step": 36347 + }, + { + "epoch": 1.7027216939148357, + "grad_norm": 0.5868354398338991, + "learning_rate": 2.8414028269731163e-07, + "loss": 0.2781, + "step": 36348 + }, + { + "epoch": 1.7027685389047642, + "grad_norm": 0.6038675307430065, + "learning_rate": 2.840524758288332e-07, + "loss": 0.2525, + "step": 36349 + }, + { + "epoch": 1.7028153838946924, + "grad_norm": 0.6012139199492997, + "learning_rate": 2.839646817125763e-07, + "loss": 0.2589, + "step": 36350 + }, + { + "epoch": 1.7028622288846207, + "grad_norm": 0.6101315554868847, + "learning_rate": 2.838769003490449e-07, + "loss": 0.2766, + "step": 36351 + }, + { + "epoch": 1.7029090738745492, + "grad_norm": 0.6129808444216363, + "learning_rate": 2.8378913173874534e-07, + "loss": 0.2872, + "step": 36352 + }, + { + "epoch": 1.7029559188644774, + "grad_norm": 0.6103703574579207, + "learning_rate": 2.837013758821824e-07, + "loss": 0.2703, + "step": 36353 + }, + { + "epoch": 1.7030027638544056, + "grad_norm": 0.5907849642193072, + "learning_rate": 2.8361363277986083e-07, + "loss": 0.261, + "step": 36354 + }, + { + "epoch": 1.703049608844334, + "grad_norm": 0.6238488626129131, + "learning_rate": 2.835259024322856e-07, + "loss": 0.273, + "step": 36355 + }, + { + "epoch": 1.7030964538342626, + "grad_norm": 0.6142314529208664, + "learning_rate": 2.8343818483996174e-07, + "loss": 0.2804, + "step": 36356 + }, + { + "epoch": 1.7031432988241908, + "grad_norm": 0.6088354806605787, + "learning_rate": 2.8335048000339454e-07, + "loss": 0.2755, + "step": 36357 + }, + { + "epoch": 1.703190143814119, + "grad_norm": 0.6319281153711271, + "learning_rate": 2.8326278792308757e-07, + "loss": 0.2976, + "step": 36358 + }, + { + "epoch": 1.7032369888040475, + "grad_norm": 0.6160419812370178, + "learning_rate": 2.831751085995468e-07, + "loss": 0.2662, + "step": 36359 + }, + { + "epoch": 1.7032838337939757, + "grad_norm": 0.5961809801192619, + "learning_rate": 2.830874420332755e-07, + "loss": 0.2667, + "step": 36360 + }, + { + "epoch": 1.703330678783904, + "grad_norm": 0.6572342242944422, + "learning_rate": 2.8299978822477867e-07, + "loss": 0.3018, + "step": 36361 + }, + { + "epoch": 1.7033775237738324, + "grad_norm": 0.6351100776652809, + "learning_rate": 2.8291214717456104e-07, + "loss": 0.274, + "step": 36362 + }, + { + "epoch": 1.7034243687637607, + "grad_norm": 0.6479938658222273, + "learning_rate": 2.8282451888312716e-07, + "loss": 0.268, + "step": 36363 + }, + { + "epoch": 1.703471213753689, + "grad_norm": 0.6169022095967774, + "learning_rate": 2.8273690335098027e-07, + "loss": 0.2836, + "step": 36364 + }, + { + "epoch": 1.7035180587436174, + "grad_norm": 0.5872039252591006, + "learning_rate": 2.82649300578626e-07, + "loss": 0.2537, + "step": 36365 + }, + { + "epoch": 1.7035649037335459, + "grad_norm": 0.5898965458384784, + "learning_rate": 2.8256171056656707e-07, + "loss": 0.2601, + "step": 36366 + }, + { + "epoch": 1.7036117487234739, + "grad_norm": 0.634972065847191, + "learning_rate": 2.8247413331530804e-07, + "loss": 0.2707, + "step": 36367 + }, + { + "epoch": 1.7036585937134023, + "grad_norm": 0.5588160093408331, + "learning_rate": 2.82386568825353e-07, + "loss": 0.2652, + "step": 36368 + }, + { + "epoch": 1.7037054387033308, + "grad_norm": 0.5755269650466921, + "learning_rate": 2.8229901709720594e-07, + "loss": 0.2561, + "step": 36369 + }, + { + "epoch": 1.703752283693259, + "grad_norm": 0.6503219307426753, + "learning_rate": 2.8221147813137116e-07, + "loss": 0.2801, + "step": 36370 + }, + { + "epoch": 1.7037991286831873, + "grad_norm": 0.5785164722595415, + "learning_rate": 2.8212395192835195e-07, + "loss": 0.2775, + "step": 36371 + }, + { + "epoch": 1.7038459736731157, + "grad_norm": 0.5890103951115123, + "learning_rate": 2.8203643848865145e-07, + "loss": 0.2746, + "step": 36372 + }, + { + "epoch": 1.703892818663044, + "grad_norm": 0.5854866436928231, + "learning_rate": 2.8194893781277376e-07, + "loss": 0.2601, + "step": 36373 + }, + { + "epoch": 1.7039396636529722, + "grad_norm": 0.6485713245073748, + "learning_rate": 2.818614499012226e-07, + "loss": 0.2719, + "step": 36374 + }, + { + "epoch": 1.7039865086429007, + "grad_norm": 0.6098045445473631, + "learning_rate": 2.817739747545012e-07, + "loss": 0.2614, + "step": 36375 + }, + { + "epoch": 1.704033353632829, + "grad_norm": 0.5554566374153901, + "learning_rate": 2.8168651237311275e-07, + "loss": 0.2556, + "step": 36376 + }, + { + "epoch": 1.7040801986227572, + "grad_norm": 0.6239531997259918, + "learning_rate": 2.8159906275756183e-07, + "loss": 0.2656, + "step": 36377 + }, + { + "epoch": 1.7041270436126856, + "grad_norm": 0.6012783490702315, + "learning_rate": 2.815116259083503e-07, + "loss": 0.2715, + "step": 36378 + }, + { + "epoch": 1.704173888602614, + "grad_norm": 0.597731649212397, + "learning_rate": 2.814242018259816e-07, + "loss": 0.2724, + "step": 36379 + }, + { + "epoch": 1.7042207335925421, + "grad_norm": 0.6457751712779002, + "learning_rate": 2.8133679051095876e-07, + "loss": 0.273, + "step": 36380 + }, + { + "epoch": 1.7042675785824706, + "grad_norm": 0.5916693388713451, + "learning_rate": 2.8124939196378513e-07, + "loss": 0.2679, + "step": 36381 + }, + { + "epoch": 1.704314423572399, + "grad_norm": 0.5621139419258134, + "learning_rate": 2.8116200618496376e-07, + "loss": 0.2544, + "step": 36382 + }, + { + "epoch": 1.7043612685623273, + "grad_norm": 0.6125058426914135, + "learning_rate": 2.810746331749978e-07, + "loss": 0.2872, + "step": 36383 + }, + { + "epoch": 1.7044081135522555, + "grad_norm": 0.5801606775703178, + "learning_rate": 2.80987272934389e-07, + "loss": 0.2711, + "step": 36384 + }, + { + "epoch": 1.704454958542184, + "grad_norm": 0.6044159854224581, + "learning_rate": 2.808999254636413e-07, + "loss": 0.2826, + "step": 36385 + }, + { + "epoch": 1.7045018035321122, + "grad_norm": 0.5646035332144567, + "learning_rate": 2.808125907632561e-07, + "loss": 0.2631, + "step": 36386 + }, + { + "epoch": 1.7045486485220405, + "grad_norm": 0.5987705960063142, + "learning_rate": 2.8072526883373695e-07, + "loss": 0.2662, + "step": 36387 + }, + { + "epoch": 1.704595493511969, + "grad_norm": 0.5854976641118053, + "learning_rate": 2.806379596755859e-07, + "loss": 0.2688, + "step": 36388 + }, + { + "epoch": 1.7046423385018972, + "grad_norm": 0.5969096041556408, + "learning_rate": 2.80550663289306e-07, + "loss": 0.2642, + "step": 36389 + }, + { + "epoch": 1.7046891834918254, + "grad_norm": 0.565855321321352, + "learning_rate": 2.8046337967539886e-07, + "loss": 0.2736, + "step": 36390 + }, + { + "epoch": 1.7047360284817539, + "grad_norm": 0.570801834139619, + "learning_rate": 2.8037610883436704e-07, + "loss": 0.2622, + "step": 36391 + }, + { + "epoch": 1.7047828734716823, + "grad_norm": 0.5686516652147195, + "learning_rate": 2.802888507667134e-07, + "loss": 0.2773, + "step": 36392 + }, + { + "epoch": 1.7048297184616106, + "grad_norm": 0.6550633298355193, + "learning_rate": 2.8020160547293897e-07, + "loss": 0.2733, + "step": 36393 + }, + { + "epoch": 1.7048765634515388, + "grad_norm": 0.5763323660753135, + "learning_rate": 2.801143729535463e-07, + "loss": 0.2705, + "step": 36394 + }, + { + "epoch": 1.7049234084414673, + "grad_norm": 0.5886984765381088, + "learning_rate": 2.8002715320903815e-07, + "loss": 0.2545, + "step": 36395 + }, + { + "epoch": 1.7049702534313955, + "grad_norm": 0.6073047280445574, + "learning_rate": 2.7993994623991496e-07, + "loss": 0.272, + "step": 36396 + }, + { + "epoch": 1.7050170984213238, + "grad_norm": 0.5864014730350694, + "learning_rate": 2.798527520466798e-07, + "loss": 0.2631, + "step": 36397 + }, + { + "epoch": 1.7050639434112522, + "grad_norm": 0.5828327819187872, + "learning_rate": 2.7976557062983384e-07, + "loss": 0.262, + "step": 36398 + }, + { + "epoch": 1.7051107884011805, + "grad_norm": 0.6274454462083624, + "learning_rate": 2.7967840198987933e-07, + "loss": 0.2799, + "step": 36399 + }, + { + "epoch": 1.7051576333911087, + "grad_norm": 0.5862129814983668, + "learning_rate": 2.795912461273173e-07, + "loss": 0.2698, + "step": 36400 + }, + { + "epoch": 1.7052044783810372, + "grad_norm": 0.6227171601168586, + "learning_rate": 2.7950410304264985e-07, + "loss": 0.2681, + "step": 36401 + }, + { + "epoch": 1.7052513233709656, + "grad_norm": 0.6340086520609703, + "learning_rate": 2.7941697273637796e-07, + "loss": 0.2827, + "step": 36402 + }, + { + "epoch": 1.7052981683608937, + "grad_norm": 0.5973019864151762, + "learning_rate": 2.793298552090029e-07, + "loss": 0.2682, + "step": 36403 + }, + { + "epoch": 1.7053450133508221, + "grad_norm": 0.5767266651349332, + "learning_rate": 2.7924275046102677e-07, + "loss": 0.2658, + "step": 36404 + }, + { + "epoch": 1.7053918583407506, + "grad_norm": 0.5688125983164857, + "learning_rate": 2.791556584929503e-07, + "loss": 0.2658, + "step": 36405 + }, + { + "epoch": 1.7054387033306788, + "grad_norm": 0.6167581759402667, + "learning_rate": 2.7906857930527523e-07, + "loss": 0.2766, + "step": 36406 + }, + { + "epoch": 1.705485548320607, + "grad_norm": 0.653204463343577, + "learning_rate": 2.789815128985024e-07, + "loss": 0.2953, + "step": 36407 + }, + { + "epoch": 1.7055323933105355, + "grad_norm": 0.627246890792119, + "learning_rate": 2.7889445927313213e-07, + "loss": 0.269, + "step": 36408 + }, + { + "epoch": 1.7055792383004638, + "grad_norm": 0.6016337280587916, + "learning_rate": 2.7880741842966604e-07, + "loss": 0.2746, + "step": 36409 + }, + { + "epoch": 1.705626083290392, + "grad_norm": 0.5490769357975542, + "learning_rate": 2.7872039036860506e-07, + "loss": 0.2491, + "step": 36410 + }, + { + "epoch": 1.7056729282803205, + "grad_norm": 0.5808438852038005, + "learning_rate": 2.7863337509044963e-07, + "loss": 0.2685, + "step": 36411 + }, + { + "epoch": 1.7057197732702487, + "grad_norm": 0.5459420735071125, + "learning_rate": 2.7854637259570163e-07, + "loss": 0.2648, + "step": 36412 + }, + { + "epoch": 1.705766618260177, + "grad_norm": 0.6649444460731004, + "learning_rate": 2.784593828848603e-07, + "loss": 0.2935, + "step": 36413 + }, + { + "epoch": 1.7058134632501054, + "grad_norm": 0.5766245543236763, + "learning_rate": 2.7837240595842753e-07, + "loss": 0.274, + "step": 36414 + }, + { + "epoch": 1.7058603082400339, + "grad_norm": 0.6052510612141234, + "learning_rate": 2.782854418169023e-07, + "loss": 0.2708, + "step": 36415 + }, + { + "epoch": 1.705907153229962, + "grad_norm": 0.5744562747486884, + "learning_rate": 2.781984904607862e-07, + "loss": 0.2611, + "step": 36416 + }, + { + "epoch": 1.7059539982198904, + "grad_norm": 0.5583546705130857, + "learning_rate": 2.7811155189057945e-07, + "loss": 0.2504, + "step": 36417 + }, + { + "epoch": 1.7060008432098188, + "grad_norm": 0.644864615827539, + "learning_rate": 2.7802462610678205e-07, + "loss": 0.2826, + "step": 36418 + }, + { + "epoch": 1.706047688199747, + "grad_norm": 0.578452162622265, + "learning_rate": 2.779377131098951e-07, + "loss": 0.2719, + "step": 36419 + }, + { + "epoch": 1.7060945331896753, + "grad_norm": 0.6265453967715977, + "learning_rate": 2.7785081290041815e-07, + "loss": 0.2905, + "step": 36420 + }, + { + "epoch": 1.7061413781796038, + "grad_norm": 0.6102955398651954, + "learning_rate": 2.7776392547885056e-07, + "loss": 0.2683, + "step": 36421 + }, + { + "epoch": 1.706188223169532, + "grad_norm": 0.594969046768498, + "learning_rate": 2.77677050845693e-07, + "loss": 0.2758, + "step": 36422 + }, + { + "epoch": 1.7062350681594602, + "grad_norm": 0.638233122590943, + "learning_rate": 2.775901890014457e-07, + "loss": 0.2849, + "step": 36423 + }, + { + "epoch": 1.7062819131493887, + "grad_norm": 0.5747151302705298, + "learning_rate": 2.7750333994660816e-07, + "loss": 0.265, + "step": 36424 + }, + { + "epoch": 1.706328758139317, + "grad_norm": 0.6168182032486007, + "learning_rate": 2.7741650368168034e-07, + "loss": 0.2846, + "step": 36425 + }, + { + "epoch": 1.7063756031292452, + "grad_norm": 0.6165191556131059, + "learning_rate": 2.7732968020716235e-07, + "loss": 0.2776, + "step": 36426 + }, + { + "epoch": 1.7064224481191737, + "grad_norm": 0.5759331625882428, + "learning_rate": 2.772428695235535e-07, + "loss": 0.248, + "step": 36427 + }, + { + "epoch": 1.7064692931091021, + "grad_norm": 0.5602434386287334, + "learning_rate": 2.771560716313529e-07, + "loss": 0.2588, + "step": 36428 + }, + { + "epoch": 1.7065161380990304, + "grad_norm": 0.5899292527027843, + "learning_rate": 2.770692865310601e-07, + "loss": 0.2615, + "step": 36429 + }, + { + "epoch": 1.7065629830889586, + "grad_norm": 0.6254687319422348, + "learning_rate": 2.7698251422317497e-07, + "loss": 0.2728, + "step": 36430 + }, + { + "epoch": 1.706609828078887, + "grad_norm": 0.5755473199014149, + "learning_rate": 2.7689575470819684e-07, + "loss": 0.2458, + "step": 36431 + }, + { + "epoch": 1.7066566730688153, + "grad_norm": 0.6644329965556178, + "learning_rate": 2.768090079866251e-07, + "loss": 0.2804, + "step": 36432 + }, + { + "epoch": 1.7067035180587435, + "grad_norm": 0.6271142434604333, + "learning_rate": 2.767222740589584e-07, + "loss": 0.2751, + "step": 36433 + }, + { + "epoch": 1.706750363048672, + "grad_norm": 0.5898331797247781, + "learning_rate": 2.766355529256967e-07, + "loss": 0.2652, + "step": 36434 + }, + { + "epoch": 1.7067972080386002, + "grad_norm": 0.5771879995127859, + "learning_rate": 2.765488445873382e-07, + "loss": 0.2602, + "step": 36435 + }, + { + "epoch": 1.7068440530285285, + "grad_norm": 0.5700372968481245, + "learning_rate": 2.7646214904438224e-07, + "loss": 0.2658, + "step": 36436 + }, + { + "epoch": 1.706890898018457, + "grad_norm": 0.6632481310274024, + "learning_rate": 2.763754662973275e-07, + "loss": 0.2812, + "step": 36437 + }, + { + "epoch": 1.7069377430083854, + "grad_norm": 0.5669801861565208, + "learning_rate": 2.76288796346674e-07, + "loss": 0.2629, + "step": 36438 + }, + { + "epoch": 1.7069845879983134, + "grad_norm": 0.5833896031206368, + "learning_rate": 2.762021391929187e-07, + "loss": 0.2575, + "step": 36439 + }, + { + "epoch": 1.707031432988242, + "grad_norm": 0.6249039252621802, + "learning_rate": 2.761154948365613e-07, + "loss": 0.2636, + "step": 36440 + }, + { + "epoch": 1.7070782779781704, + "grad_norm": 0.6412850694154097, + "learning_rate": 2.760288632781008e-07, + "loss": 0.2637, + "step": 36441 + }, + { + "epoch": 1.7071251229680986, + "grad_norm": 0.5654950954266079, + "learning_rate": 2.7594224451803456e-07, + "loss": 0.2601, + "step": 36442 + }, + { + "epoch": 1.7071719679580268, + "grad_norm": 0.6513360832347649, + "learning_rate": 2.758556385568617e-07, + "loss": 0.2917, + "step": 36443 + }, + { + "epoch": 1.7072188129479553, + "grad_norm": 0.581023910838023, + "learning_rate": 2.757690453950815e-07, + "loss": 0.2726, + "step": 36444 + }, + { + "epoch": 1.7072656579378835, + "grad_norm": 0.5761582522896518, + "learning_rate": 2.7568246503319044e-07, + "loss": 0.2749, + "step": 36445 + }, + { + "epoch": 1.7073125029278118, + "grad_norm": 0.6109539492565466, + "learning_rate": 2.75595897471688e-07, + "loss": 0.2668, + "step": 36446 + }, + { + "epoch": 1.7073593479177402, + "grad_norm": 0.5892018034007845, + "learning_rate": 2.7550934271107194e-07, + "loss": 0.2541, + "step": 36447 + }, + { + "epoch": 1.7074061929076685, + "grad_norm": 0.6144963075897607, + "learning_rate": 2.754228007518411e-07, + "loss": 0.2647, + "step": 36448 + }, + { + "epoch": 1.7074530378975967, + "grad_norm": 0.5668041044299326, + "learning_rate": 2.753362715944924e-07, + "loss": 0.2683, + "step": 36449 + }, + { + "epoch": 1.7074998828875252, + "grad_norm": 0.5637630216853565, + "learning_rate": 2.7524975523952476e-07, + "loss": 0.2639, + "step": 36450 + }, + { + "epoch": 1.7075467278774537, + "grad_norm": 0.5683501946932917, + "learning_rate": 2.751632516874353e-07, + "loss": 0.2554, + "step": 36451 + }, + { + "epoch": 1.7075935728673817, + "grad_norm": 0.6396086618364413, + "learning_rate": 2.75076760938722e-07, + "loss": 0.2798, + "step": 36452 + }, + { + "epoch": 1.7076404178573101, + "grad_norm": 0.579124970898476, + "learning_rate": 2.749902829938825e-07, + "loss": 0.2782, + "step": 36453 + }, + { + "epoch": 1.7076872628472386, + "grad_norm": 0.6089018119926346, + "learning_rate": 2.7490381785341495e-07, + "loss": 0.2696, + "step": 36454 + }, + { + "epoch": 1.7077341078371668, + "grad_norm": 0.5826291753293527, + "learning_rate": 2.748173655178171e-07, + "loss": 0.2623, + "step": 36455 + }, + { + "epoch": 1.707780952827095, + "grad_norm": 0.6174167683382275, + "learning_rate": 2.7473092598758604e-07, + "loss": 0.286, + "step": 36456 + }, + { + "epoch": 1.7078277978170235, + "grad_norm": 0.5780507961050898, + "learning_rate": 2.7464449926321887e-07, + "loss": 0.2593, + "step": 36457 + }, + { + "epoch": 1.7078746428069518, + "grad_norm": 0.6275706001509653, + "learning_rate": 2.745580853452132e-07, + "loss": 0.2863, + "step": 36458 + }, + { + "epoch": 1.70792148779688, + "grad_norm": 0.6282527806211167, + "learning_rate": 2.7447168423406614e-07, + "loss": 0.2859, + "step": 36459 + }, + { + "epoch": 1.7079683327868085, + "grad_norm": 0.5902262042895529, + "learning_rate": 2.7438529593027536e-07, + "loss": 0.2655, + "step": 36460 + }, + { + "epoch": 1.7080151777767367, + "grad_norm": 0.6110032779690711, + "learning_rate": 2.7429892043433855e-07, + "loss": 0.2797, + "step": 36461 + }, + { + "epoch": 1.708062022766665, + "grad_norm": 0.6388972435215298, + "learning_rate": 2.742125577467511e-07, + "loss": 0.2689, + "step": 36462 + }, + { + "epoch": 1.7081088677565934, + "grad_norm": 0.5914575943429221, + "learning_rate": 2.741262078680118e-07, + "loss": 0.271, + "step": 36463 + }, + { + "epoch": 1.708155712746522, + "grad_norm": 0.5891699386771995, + "learning_rate": 2.740398707986161e-07, + "loss": 0.2656, + "step": 36464 + }, + { + "epoch": 1.7082025577364501, + "grad_norm": 0.6117741123064488, + "learning_rate": 2.739535465390614e-07, + "loss": 0.2864, + "step": 36465 + }, + { + "epoch": 1.7082494027263784, + "grad_norm": 0.6126841383203939, + "learning_rate": 2.738672350898447e-07, + "loss": 0.2853, + "step": 36466 + }, + { + "epoch": 1.7082962477163068, + "grad_norm": 0.6064354888208541, + "learning_rate": 2.737809364514624e-07, + "loss": 0.2821, + "step": 36467 + }, + { + "epoch": 1.708343092706235, + "grad_norm": 0.5395320601640505, + "learning_rate": 2.7369465062441177e-07, + "loss": 0.2651, + "step": 36468 + }, + { + "epoch": 1.7083899376961633, + "grad_norm": 0.5978487722084348, + "learning_rate": 2.7360837760918886e-07, + "loss": 0.2733, + "step": 36469 + }, + { + "epoch": 1.7084367826860918, + "grad_norm": 0.5693137699867626, + "learning_rate": 2.7352211740628966e-07, + "loss": 0.263, + "step": 36470 + }, + { + "epoch": 1.70848362767602, + "grad_norm": 0.6118496603503971, + "learning_rate": 2.7343587001621125e-07, + "loss": 0.2731, + "step": 36471 + }, + { + "epoch": 1.7085304726659483, + "grad_norm": 0.5898683846236011, + "learning_rate": 2.7334963543944964e-07, + "loss": 0.2712, + "step": 36472 + }, + { + "epoch": 1.7085773176558767, + "grad_norm": 0.5940575535197395, + "learning_rate": 2.732634136765011e-07, + "loss": 0.2844, + "step": 36473 + }, + { + "epoch": 1.7086241626458052, + "grad_norm": 0.6055014777285762, + "learning_rate": 2.7317720472786275e-07, + "loss": 0.266, + "step": 36474 + }, + { + "epoch": 1.7086710076357332, + "grad_norm": 0.6241708379116175, + "learning_rate": 2.7309100859402915e-07, + "loss": 0.2789, + "step": 36475 + }, + { + "epoch": 1.7087178526256617, + "grad_norm": 0.5817676995571809, + "learning_rate": 2.7300482527549773e-07, + "loss": 0.2631, + "step": 36476 + }, + { + "epoch": 1.7087646976155901, + "grad_norm": 0.635524065403271, + "learning_rate": 2.72918654772763e-07, + "loss": 0.2925, + "step": 36477 + }, + { + "epoch": 1.7088115426055184, + "grad_norm": 0.6001237417198758, + "learning_rate": 2.728324970863219e-07, + "loss": 0.2769, + "step": 36478 + }, + { + "epoch": 1.7088583875954466, + "grad_norm": 0.6083012009111691, + "learning_rate": 2.7274635221667013e-07, + "loss": 0.2653, + "step": 36479 + }, + { + "epoch": 1.708905232585375, + "grad_norm": 0.6644056812210782, + "learning_rate": 2.726602201643036e-07, + "loss": 0.2874, + "step": 36480 + }, + { + "epoch": 1.7089520775753033, + "grad_norm": 0.580712423324927, + "learning_rate": 2.7257410092971724e-07, + "loss": 0.2694, + "step": 36481 + }, + { + "epoch": 1.7089989225652316, + "grad_norm": 0.6339468031518422, + "learning_rate": 2.7248799451340705e-07, + "loss": 0.2609, + "step": 36482 + }, + { + "epoch": 1.70904576755516, + "grad_norm": 0.6125268324271139, + "learning_rate": 2.7240190091586934e-07, + "loss": 0.2688, + "step": 36483 + }, + { + "epoch": 1.7090926125450883, + "grad_norm": 0.604655912341006, + "learning_rate": 2.7231582013759804e-07, + "loss": 0.2497, + "step": 36484 + }, + { + "epoch": 1.7091394575350165, + "grad_norm": 0.6486233473841423, + "learning_rate": 2.7222975217908923e-07, + "loss": 0.2695, + "step": 36485 + }, + { + "epoch": 1.709186302524945, + "grad_norm": 0.5871107800757197, + "learning_rate": 2.7214369704083865e-07, + "loss": 0.2828, + "step": 36486 + }, + { + "epoch": 1.7092331475148734, + "grad_norm": 0.5745946273383132, + "learning_rate": 2.720576547233411e-07, + "loss": 0.2704, + "step": 36487 + }, + { + "epoch": 1.7092799925048014, + "grad_norm": 0.5715748478410934, + "learning_rate": 2.719716252270918e-07, + "loss": 0.2708, + "step": 36488 + }, + { + "epoch": 1.70932683749473, + "grad_norm": 0.5953773234494144, + "learning_rate": 2.7188560855258533e-07, + "loss": 0.2645, + "step": 36489 + }, + { + "epoch": 1.7093736824846584, + "grad_norm": 0.6510094513633511, + "learning_rate": 2.71799604700318e-07, + "loss": 0.2829, + "step": 36490 + }, + { + "epoch": 1.7094205274745866, + "grad_norm": 0.595694231886342, + "learning_rate": 2.717136136707832e-07, + "loss": 0.2702, + "step": 36491 + }, + { + "epoch": 1.7094673724645149, + "grad_norm": 0.592312924092361, + "learning_rate": 2.716276354644767e-07, + "loss": 0.2696, + "step": 36492 + }, + { + "epoch": 1.7095142174544433, + "grad_norm": 0.6211543190955917, + "learning_rate": 2.7154167008189345e-07, + "loss": 0.2659, + "step": 36493 + }, + { + "epoch": 1.7095610624443716, + "grad_norm": 0.6094569193579826, + "learning_rate": 2.7145571752352745e-07, + "loss": 0.2894, + "step": 36494 + }, + { + "epoch": 1.7096079074342998, + "grad_norm": 0.5559640342475485, + "learning_rate": 2.7136977778987354e-07, + "loss": 0.2644, + "step": 36495 + }, + { + "epoch": 1.7096547524242283, + "grad_norm": 0.5969060682082631, + "learning_rate": 2.7128385088142637e-07, + "loss": 0.2719, + "step": 36496 + }, + { + "epoch": 1.7097015974141565, + "grad_norm": 0.5983319200337135, + "learning_rate": 2.7119793679868084e-07, + "loss": 0.2666, + "step": 36497 + }, + { + "epoch": 1.7097484424040847, + "grad_norm": 0.5753674254984936, + "learning_rate": 2.7111203554213065e-07, + "loss": 0.2673, + "step": 36498 + }, + { + "epoch": 1.7097952873940132, + "grad_norm": 0.5904218224082953, + "learning_rate": 2.7102614711227105e-07, + "loss": 0.2822, + "step": 36499 + }, + { + "epoch": 1.7098421323839417, + "grad_norm": 0.5813990333398887, + "learning_rate": 2.709402715095952e-07, + "loss": 0.2627, + "step": 36500 + }, + { + "epoch": 1.70988897737387, + "grad_norm": 0.637550657177247, + "learning_rate": 2.70854408734598e-07, + "loss": 0.297, + "step": 36501 + }, + { + "epoch": 1.7099358223637982, + "grad_norm": 0.6153487244196539, + "learning_rate": 2.7076855878777314e-07, + "loss": 0.2772, + "step": 36502 + }, + { + "epoch": 1.7099826673537266, + "grad_norm": 0.6358972367799747, + "learning_rate": 2.7068272166961534e-07, + "loss": 0.2839, + "step": 36503 + }, + { + "epoch": 1.7100295123436549, + "grad_norm": 0.5792474502690578, + "learning_rate": 2.705968973806181e-07, + "loss": 0.2671, + "step": 36504 + }, + { + "epoch": 1.710076357333583, + "grad_norm": 0.587747991886093, + "learning_rate": 2.705110859212756e-07, + "loss": 0.2609, + "step": 36505 + }, + { + "epoch": 1.7101232023235116, + "grad_norm": 0.5754053575818345, + "learning_rate": 2.7042528729208097e-07, + "loss": 0.2645, + "step": 36506 + }, + { + "epoch": 1.7101700473134398, + "grad_norm": 0.5592529192306366, + "learning_rate": 2.7033950149352837e-07, + "loss": 0.2589, + "step": 36507 + }, + { + "epoch": 1.710216892303368, + "grad_norm": 0.5763411327152239, + "learning_rate": 2.7025372852611143e-07, + "loss": 0.2734, + "step": 36508 + }, + { + "epoch": 1.7102637372932965, + "grad_norm": 0.6216860385537891, + "learning_rate": 2.701679683903241e-07, + "loss": 0.2796, + "step": 36509 + }, + { + "epoch": 1.710310582283225, + "grad_norm": 0.5983690328631612, + "learning_rate": 2.7008222108665994e-07, + "loss": 0.2611, + "step": 36510 + }, + { + "epoch": 1.710357427273153, + "grad_norm": 0.5952294002605223, + "learning_rate": 2.699964866156124e-07, + "loss": 0.2723, + "step": 36511 + }, + { + "epoch": 1.7104042722630814, + "grad_norm": 0.5674542872577527, + "learning_rate": 2.699107649776739e-07, + "loss": 0.2679, + "step": 36512 + }, + { + "epoch": 1.71045111725301, + "grad_norm": 0.5786879113707146, + "learning_rate": 2.6982505617333845e-07, + "loss": 0.2497, + "step": 36513 + }, + { + "epoch": 1.7104979622429382, + "grad_norm": 0.5803984569954438, + "learning_rate": 2.6973936020309927e-07, + "loss": 0.2673, + "step": 36514 + }, + { + "epoch": 1.7105448072328664, + "grad_norm": 0.6049143004331117, + "learning_rate": 2.696536770674496e-07, + "loss": 0.278, + "step": 36515 + }, + { + "epoch": 1.7105916522227949, + "grad_norm": 0.604587130477442, + "learning_rate": 2.6956800676688237e-07, + "loss": 0.279, + "step": 36516 + }, + { + "epoch": 1.710638497212723, + "grad_norm": 0.5770457621140651, + "learning_rate": 2.694823493018911e-07, + "loss": 0.2557, + "step": 36517 + }, + { + "epoch": 1.7106853422026513, + "grad_norm": 0.5942338870467245, + "learning_rate": 2.693967046729684e-07, + "loss": 0.2733, + "step": 36518 + }, + { + "epoch": 1.7107321871925798, + "grad_norm": 0.5811141552610966, + "learning_rate": 2.693110728806064e-07, + "loss": 0.2601, + "step": 36519 + }, + { + "epoch": 1.710779032182508, + "grad_norm": 0.6387953767781209, + "learning_rate": 2.6922545392529864e-07, + "loss": 0.2839, + "step": 36520 + }, + { + "epoch": 1.7108258771724363, + "grad_norm": 0.6101863342495442, + "learning_rate": 2.691398478075377e-07, + "loss": 0.2862, + "step": 36521 + }, + { + "epoch": 1.7108727221623647, + "grad_norm": 0.5877363553214043, + "learning_rate": 2.690542545278163e-07, + "loss": 0.2624, + "step": 36522 + }, + { + "epoch": 1.7109195671522932, + "grad_norm": 0.6135322592283159, + "learning_rate": 2.6896867408662734e-07, + "loss": 0.2744, + "step": 36523 + }, + { + "epoch": 1.7109664121422212, + "grad_norm": 0.5964560634109239, + "learning_rate": 2.688831064844624e-07, + "loss": 0.2569, + "step": 36524 + }, + { + "epoch": 1.7110132571321497, + "grad_norm": 0.6279623961729939, + "learning_rate": 2.68797551721815e-07, + "loss": 0.2915, + "step": 36525 + }, + { + "epoch": 1.7110601021220782, + "grad_norm": 0.5406919850653168, + "learning_rate": 2.6871200979917634e-07, + "loss": 0.2536, + "step": 36526 + }, + { + "epoch": 1.7111069471120064, + "grad_norm": 0.5838150494566989, + "learning_rate": 2.686264807170391e-07, + "loss": 0.261, + "step": 36527 + }, + { + "epoch": 1.7111537921019346, + "grad_norm": 0.6134136015646521, + "learning_rate": 2.685409644758957e-07, + "loss": 0.2724, + "step": 36528 + }, + { + "epoch": 1.711200637091863, + "grad_norm": 0.5845464704142849, + "learning_rate": 2.684554610762388e-07, + "loss": 0.272, + "step": 36529 + }, + { + "epoch": 1.7112474820817913, + "grad_norm": 0.5915427074621011, + "learning_rate": 2.683699705185594e-07, + "loss": 0.2764, + "step": 36530 + }, + { + "epoch": 1.7112943270717196, + "grad_norm": 0.5985656167600962, + "learning_rate": 2.682844928033498e-07, + "loss": 0.2789, + "step": 36531 + }, + { + "epoch": 1.711341172061648, + "grad_norm": 0.5676159594373247, + "learning_rate": 2.6819902793110255e-07, + "loss": 0.257, + "step": 36532 + }, + { + "epoch": 1.7113880170515763, + "grad_norm": 0.6015376881064137, + "learning_rate": 2.6811357590230845e-07, + "loss": 0.2677, + "step": 36533 + }, + { + "epoch": 1.7114348620415045, + "grad_norm": 0.5715309303223589, + "learning_rate": 2.6802813671745974e-07, + "loss": 0.2684, + "step": 36534 + }, + { + "epoch": 1.711481707031433, + "grad_norm": 0.6862477242839169, + "learning_rate": 2.6794271037704876e-07, + "loss": 0.3085, + "step": 36535 + }, + { + "epoch": 1.7115285520213614, + "grad_norm": 0.6259800859174487, + "learning_rate": 2.678572968815657e-07, + "loss": 0.2695, + "step": 36536 + }, + { + "epoch": 1.7115753970112897, + "grad_norm": 0.6163099758085916, + "learning_rate": 2.677718962315032e-07, + "loss": 0.2793, + "step": 36537 + }, + { + "epoch": 1.711622242001218, + "grad_norm": 0.6035736823383457, + "learning_rate": 2.6768650842735197e-07, + "loss": 0.2692, + "step": 36538 + }, + { + "epoch": 1.7116690869911464, + "grad_norm": 0.6434220842563202, + "learning_rate": 2.6760113346960467e-07, + "loss": 0.2831, + "step": 36539 + }, + { + "epoch": 1.7117159319810746, + "grad_norm": 0.6321045010358074, + "learning_rate": 2.675157713587512e-07, + "loss": 0.28, + "step": 36540 + }, + { + "epoch": 1.7117627769710029, + "grad_norm": 0.6013836211844038, + "learning_rate": 2.6743042209528364e-07, + "loss": 0.2674, + "step": 36541 + }, + { + "epoch": 1.7118096219609313, + "grad_norm": 0.5916111826497592, + "learning_rate": 2.6734508567969246e-07, + "loss": 0.2774, + "step": 36542 + }, + { + "epoch": 1.7118564669508596, + "grad_norm": 0.550532051542982, + "learning_rate": 2.6725976211246926e-07, + "loss": 0.2637, + "step": 36543 + }, + { + "epoch": 1.7119033119407878, + "grad_norm": 0.6028085803032378, + "learning_rate": 2.671744513941049e-07, + "loss": 0.2715, + "step": 36544 + }, + { + "epoch": 1.7119501569307163, + "grad_norm": 0.6045359995788154, + "learning_rate": 2.6708915352509054e-07, + "loss": 0.2802, + "step": 36545 + }, + { + "epoch": 1.7119970019206447, + "grad_norm": 0.5952603782138156, + "learning_rate": 2.6700386850591705e-07, + "loss": 0.2867, + "step": 36546 + }, + { + "epoch": 1.7120438469105728, + "grad_norm": 0.5963934011423273, + "learning_rate": 2.6691859633707517e-07, + "loss": 0.2752, + "step": 36547 + }, + { + "epoch": 1.7120906919005012, + "grad_norm": 0.5940361333801305, + "learning_rate": 2.668333370190551e-07, + "loss": 0.2597, + "step": 36548 + }, + { + "epoch": 1.7121375368904297, + "grad_norm": 0.6234052264045532, + "learning_rate": 2.6674809055234784e-07, + "loss": 0.2663, + "step": 36549 + }, + { + "epoch": 1.712184381880358, + "grad_norm": 0.6575453939572188, + "learning_rate": 2.666628569374441e-07, + "loss": 0.2934, + "step": 36550 + }, + { + "epoch": 1.7122312268702862, + "grad_norm": 0.5948123598575874, + "learning_rate": 2.66577636174834e-07, + "loss": 0.2668, + "step": 36551 + }, + { + "epoch": 1.7122780718602146, + "grad_norm": 0.6133674092942597, + "learning_rate": 2.6649242826500915e-07, + "loss": 0.2732, + "step": 36552 + }, + { + "epoch": 1.7123249168501429, + "grad_norm": 0.5926175439205614, + "learning_rate": 2.664072332084583e-07, + "loss": 0.2573, + "step": 36553 + }, + { + "epoch": 1.7123717618400711, + "grad_norm": 0.5952136081732193, + "learning_rate": 2.66322051005673e-07, + "loss": 0.2638, + "step": 36554 + }, + { + "epoch": 1.7124186068299996, + "grad_norm": 0.579702722398526, + "learning_rate": 2.662368816571423e-07, + "loss": 0.2759, + "step": 36555 + }, + { + "epoch": 1.7124654518199278, + "grad_norm": 0.6270592224455113, + "learning_rate": 2.661517251633569e-07, + "loss": 0.2936, + "step": 36556 + }, + { + "epoch": 1.712512296809856, + "grad_norm": 0.6390634912690435, + "learning_rate": 2.660665815248067e-07, + "loss": 0.2728, + "step": 36557 + }, + { + "epoch": 1.7125591417997845, + "grad_norm": 0.5714927038102308, + "learning_rate": 2.6598145074198225e-07, + "loss": 0.2778, + "step": 36558 + }, + { + "epoch": 1.712605986789713, + "grad_norm": 0.6207072008903797, + "learning_rate": 2.6589633281537324e-07, + "loss": 0.2677, + "step": 36559 + }, + { + "epoch": 1.712652831779641, + "grad_norm": 0.6047622695808227, + "learning_rate": 2.6581122774546944e-07, + "loss": 0.2639, + "step": 36560 + }, + { + "epoch": 1.7126996767695695, + "grad_norm": 0.6298817910691461, + "learning_rate": 2.6572613553276005e-07, + "loss": 0.2701, + "step": 36561 + }, + { + "epoch": 1.712746521759498, + "grad_norm": 0.5909866893496757, + "learning_rate": 2.6564105617773505e-07, + "loss": 0.2679, + "step": 36562 + }, + { + "epoch": 1.7127933667494262, + "grad_norm": 0.5807898013722346, + "learning_rate": 2.655559896808843e-07, + "loss": 0.2879, + "step": 36563 + }, + { + "epoch": 1.7128402117393544, + "grad_norm": 0.5826527586906748, + "learning_rate": 2.6547093604269715e-07, + "loss": 0.2783, + "step": 36564 + }, + { + "epoch": 1.7128870567292829, + "grad_norm": 0.6134603630386747, + "learning_rate": 2.6538589526366315e-07, + "loss": 0.2631, + "step": 36565 + }, + { + "epoch": 1.7129339017192111, + "grad_norm": 0.6402640830876944, + "learning_rate": 2.65300867344272e-07, + "loss": 0.2836, + "step": 36566 + }, + { + "epoch": 1.7129807467091394, + "grad_norm": 0.624296660769992, + "learning_rate": 2.652158522850129e-07, + "loss": 0.273, + "step": 36567 + }, + { + "epoch": 1.7130275916990678, + "grad_norm": 0.6161068897835226, + "learning_rate": 2.6513085008637444e-07, + "loss": 0.2862, + "step": 36568 + }, + { + "epoch": 1.713074436688996, + "grad_norm": 0.595802933415837, + "learning_rate": 2.6504586074884597e-07, + "loss": 0.2672, + "step": 36569 + }, + { + "epoch": 1.7131212816789243, + "grad_norm": 0.6069219562369729, + "learning_rate": 2.649608842729171e-07, + "loss": 0.2643, + "step": 36570 + }, + { + "epoch": 1.7131681266688528, + "grad_norm": 0.6126871477065955, + "learning_rate": 2.648759206590765e-07, + "loss": 0.2837, + "step": 36571 + }, + { + "epoch": 1.7132149716587812, + "grad_norm": 0.5932519123795301, + "learning_rate": 2.6479096990781354e-07, + "loss": 0.2689, + "step": 36572 + }, + { + "epoch": 1.7132618166487095, + "grad_norm": 0.5956761991815847, + "learning_rate": 2.6470603201961654e-07, + "loss": 0.2843, + "step": 36573 + }, + { + "epoch": 1.7133086616386377, + "grad_norm": 0.5921583370122221, + "learning_rate": 2.6462110699497475e-07, + "loss": 0.2809, + "step": 36574 + }, + { + "epoch": 1.7133555066285662, + "grad_norm": 0.5913391611907045, + "learning_rate": 2.645361948343761e-07, + "loss": 0.2666, + "step": 36575 + }, + { + "epoch": 1.7134023516184944, + "grad_norm": 0.6639597944436976, + "learning_rate": 2.6445129553830993e-07, + "loss": 0.2894, + "step": 36576 + }, + { + "epoch": 1.7134491966084227, + "grad_norm": 0.5829487034273876, + "learning_rate": 2.643664091072648e-07, + "loss": 0.2536, + "step": 36577 + }, + { + "epoch": 1.7134960415983511, + "grad_norm": 0.5981196407174605, + "learning_rate": 2.6428153554172944e-07, + "loss": 0.276, + "step": 36578 + }, + { + "epoch": 1.7135428865882794, + "grad_norm": 0.5975771415135357, + "learning_rate": 2.6419667484219147e-07, + "loss": 0.2672, + "step": 36579 + }, + { + "epoch": 1.7135897315782076, + "grad_norm": 0.622839871485256, + "learning_rate": 2.6411182700913944e-07, + "loss": 0.2763, + "step": 36580 + }, + { + "epoch": 1.713636576568136, + "grad_norm": 0.6039012369217633, + "learning_rate": 2.640269920430627e-07, + "loss": 0.2723, + "step": 36581 + }, + { + "epoch": 1.7136834215580645, + "grad_norm": 0.622695786783136, + "learning_rate": 2.6394216994444804e-07, + "loss": 0.2698, + "step": 36582 + }, + { + "epoch": 1.7137302665479925, + "grad_norm": 0.5663070281340424, + "learning_rate": 2.638573607137843e-07, + "loss": 0.2632, + "step": 36583 + }, + { + "epoch": 1.713777111537921, + "grad_norm": 0.5837333481270316, + "learning_rate": 2.6377256435155956e-07, + "loss": 0.2524, + "step": 36584 + }, + { + "epoch": 1.7138239565278495, + "grad_norm": 0.6224461832124187, + "learning_rate": 2.6368778085826137e-07, + "loss": 0.2637, + "step": 36585 + }, + { + "epoch": 1.7138708015177777, + "grad_norm": 0.5643517156201322, + "learning_rate": 2.6360301023437785e-07, + "loss": 0.2493, + "step": 36586 + }, + { + "epoch": 1.713917646507706, + "grad_norm": 0.6100482826852875, + "learning_rate": 2.63518252480397e-07, + "loss": 0.2886, + "step": 36587 + }, + { + "epoch": 1.7139644914976344, + "grad_norm": 0.6207416227008227, + "learning_rate": 2.6343350759680696e-07, + "loss": 0.2523, + "step": 36588 + }, + { + "epoch": 1.7140113364875627, + "grad_norm": 0.58802281929198, + "learning_rate": 2.633487755840944e-07, + "loss": 0.2707, + "step": 36589 + }, + { + "epoch": 1.714058181477491, + "grad_norm": 0.6081202895403176, + "learning_rate": 2.6326405644274797e-07, + "loss": 0.2728, + "step": 36590 + }, + { + "epoch": 1.7141050264674194, + "grad_norm": 0.6439469117720747, + "learning_rate": 2.6317935017325437e-07, + "loss": 0.2828, + "step": 36591 + }, + { + "epoch": 1.7141518714573476, + "grad_norm": 0.6034272846428615, + "learning_rate": 2.630946567761014e-07, + "loss": 0.2705, + "step": 36592 + }, + { + "epoch": 1.7141987164472758, + "grad_norm": 0.6145714356058999, + "learning_rate": 2.6300997625177625e-07, + "loss": 0.3078, + "step": 36593 + }, + { + "epoch": 1.7142455614372043, + "grad_norm": 0.5659599632853963, + "learning_rate": 2.629253086007666e-07, + "loss": 0.2693, + "step": 36594 + }, + { + "epoch": 1.7142924064271328, + "grad_norm": 0.576758820602805, + "learning_rate": 2.6284065382356005e-07, + "loss": 0.2791, + "step": 36595 + }, + { + "epoch": 1.7143392514170608, + "grad_norm": 0.6404843426634454, + "learning_rate": 2.627560119206432e-07, + "loss": 0.2784, + "step": 36596 + }, + { + "epoch": 1.7143860964069892, + "grad_norm": 0.6130601924378438, + "learning_rate": 2.626713828925026e-07, + "loss": 0.2888, + "step": 36597 + }, + { + "epoch": 1.7144329413969177, + "grad_norm": 0.6479106350651718, + "learning_rate": 2.6258676673962593e-07, + "loss": 0.2874, + "step": 36598 + }, + { + "epoch": 1.714479786386846, + "grad_norm": 0.6290265318820559, + "learning_rate": 2.6250216346249993e-07, + "loss": 0.2761, + "step": 36599 + }, + { + "epoch": 1.7145266313767742, + "grad_norm": 0.9974607161750985, + "learning_rate": 2.624175730616119e-07, + "loss": 0.2727, + "step": 36600 + }, + { + "epoch": 1.7145734763667027, + "grad_norm": 0.5923774276910562, + "learning_rate": 2.623329955374487e-07, + "loss": 0.2757, + "step": 36601 + }, + { + "epoch": 1.714620321356631, + "grad_norm": 0.5723761655305601, + "learning_rate": 2.62248430890496e-07, + "loss": 0.2507, + "step": 36602 + }, + { + "epoch": 1.7146671663465591, + "grad_norm": 0.6058292876663804, + "learning_rate": 2.621638791212419e-07, + "loss": 0.2833, + "step": 36603 + }, + { + "epoch": 1.7147140113364876, + "grad_norm": 0.6137855845468234, + "learning_rate": 2.6207934023017164e-07, + "loss": 0.2727, + "step": 36604 + }, + { + "epoch": 1.7147608563264158, + "grad_norm": 0.6574257817563817, + "learning_rate": 2.6199481421777225e-07, + "loss": 0.2912, + "step": 36605 + }, + { + "epoch": 1.714807701316344, + "grad_norm": 0.5934898637574059, + "learning_rate": 2.619103010845303e-07, + "loss": 0.2666, + "step": 36606 + }, + { + "epoch": 1.7148545463062725, + "grad_norm": 0.5913058188975971, + "learning_rate": 2.6182580083093214e-07, + "loss": 0.2711, + "step": 36607 + }, + { + "epoch": 1.714901391296201, + "grad_norm": 0.6080035507215218, + "learning_rate": 2.6174131345746424e-07, + "loss": 0.2782, + "step": 36608 + }, + { + "epoch": 1.7149482362861292, + "grad_norm": 0.5726308286315801, + "learning_rate": 2.6165683896461234e-07, + "loss": 0.2566, + "step": 36609 + }, + { + "epoch": 1.7149950812760575, + "grad_norm": 0.6278163915311109, + "learning_rate": 2.615723773528625e-07, + "loss": 0.2717, + "step": 36610 + }, + { + "epoch": 1.715041926265986, + "grad_norm": 0.5732649086232079, + "learning_rate": 2.6148792862270124e-07, + "loss": 0.2674, + "step": 36611 + }, + { + "epoch": 1.7150887712559142, + "grad_norm": 0.5978956562673364, + "learning_rate": 2.6140349277461397e-07, + "loss": 0.2637, + "step": 36612 + }, + { + "epoch": 1.7151356162458424, + "grad_norm": 0.6103736813359298, + "learning_rate": 2.61319069809087e-07, + "loss": 0.2646, + "step": 36613 + }, + { + "epoch": 1.715182461235771, + "grad_norm": 0.669936346835793, + "learning_rate": 2.612346597266066e-07, + "loss": 0.2903, + "step": 36614 + }, + { + "epoch": 1.7152293062256991, + "grad_norm": 0.6173988296752382, + "learning_rate": 2.6115026252765767e-07, + "loss": 0.2782, + "step": 36615 + }, + { + "epoch": 1.7152761512156274, + "grad_norm": 0.5919501943930668, + "learning_rate": 2.610658782127268e-07, + "loss": 0.2561, + "step": 36616 + }, + { + "epoch": 1.7153229962055558, + "grad_norm": 0.6397897864030051, + "learning_rate": 2.609815067822985e-07, + "loss": 0.2871, + "step": 36617 + }, + { + "epoch": 1.7153698411954843, + "grad_norm": 0.5741903238885602, + "learning_rate": 2.6089714823685883e-07, + "loss": 0.2634, + "step": 36618 + }, + { + "epoch": 1.7154166861854123, + "grad_norm": 0.5700270655060247, + "learning_rate": 2.608128025768933e-07, + "loss": 0.2625, + "step": 36619 + }, + { + "epoch": 1.7154635311753408, + "grad_norm": 0.6196278395135003, + "learning_rate": 2.607284698028878e-07, + "loss": 0.2676, + "step": 36620 + }, + { + "epoch": 1.7155103761652692, + "grad_norm": 0.5824843541810116, + "learning_rate": 2.606441499153267e-07, + "loss": 0.2604, + "step": 36621 + }, + { + "epoch": 1.7155572211551975, + "grad_norm": 0.6116860084669035, + "learning_rate": 2.605598429146955e-07, + "loss": 0.2828, + "step": 36622 + }, + { + "epoch": 1.7156040661451257, + "grad_norm": 0.5912391608285131, + "learning_rate": 2.604755488014801e-07, + "loss": 0.2778, + "step": 36623 + }, + { + "epoch": 1.7156509111350542, + "grad_norm": 0.5976402483906711, + "learning_rate": 2.603912675761647e-07, + "loss": 0.2764, + "step": 36624 + }, + { + "epoch": 1.7156977561249824, + "grad_norm": 0.603717860321007, + "learning_rate": 2.6030699923923456e-07, + "loss": 0.2618, + "step": 36625 + }, + { + "epoch": 1.7157446011149107, + "grad_norm": 0.573808326776171, + "learning_rate": 2.6022274379117476e-07, + "loss": 0.2593, + "step": 36626 + }, + { + "epoch": 1.7157914461048391, + "grad_norm": 0.5729193745716082, + "learning_rate": 2.601385012324706e-07, + "loss": 0.2645, + "step": 36627 + }, + { + "epoch": 1.7158382910947674, + "grad_norm": 0.5604509810716704, + "learning_rate": 2.6005427156360565e-07, + "loss": 0.2514, + "step": 36628 + }, + { + "epoch": 1.7158851360846956, + "grad_norm": 0.6073791806091134, + "learning_rate": 2.599700547850656e-07, + "loss": 0.2799, + "step": 36629 + }, + { + "epoch": 1.715931981074624, + "grad_norm": 0.6271352579963912, + "learning_rate": 2.598858508973354e-07, + "loss": 0.277, + "step": 36630 + }, + { + "epoch": 1.7159788260645525, + "grad_norm": 0.6127283832800384, + "learning_rate": 2.598016599008987e-07, + "loss": 0.2834, + "step": 36631 + }, + { + "epoch": 1.7160256710544806, + "grad_norm": 0.6300988994097935, + "learning_rate": 2.5971748179624027e-07, + "loss": 0.2842, + "step": 36632 + }, + { + "epoch": 1.716072516044409, + "grad_norm": 0.5803473677610036, + "learning_rate": 2.5963331658384495e-07, + "loss": 0.2658, + "step": 36633 + }, + { + "epoch": 1.7161193610343375, + "grad_norm": 0.5966217856538476, + "learning_rate": 2.5954916426419646e-07, + "loss": 0.2538, + "step": 36634 + }, + { + "epoch": 1.7161662060242657, + "grad_norm": 0.6117068580221533, + "learning_rate": 2.5946502483777947e-07, + "loss": 0.2694, + "step": 36635 + }, + { + "epoch": 1.716213051014194, + "grad_norm": 0.5917013858944344, + "learning_rate": 2.593808983050783e-07, + "loss": 0.258, + "step": 36636 + }, + { + "epoch": 1.7162598960041224, + "grad_norm": 0.6019576409770989, + "learning_rate": 2.59296784666577e-07, + "loss": 0.2681, + "step": 36637 + }, + { + "epoch": 1.7163067409940507, + "grad_norm": 0.6217314007343101, + "learning_rate": 2.5921268392275933e-07, + "loss": 0.2829, + "step": 36638 + }, + { + "epoch": 1.716353585983979, + "grad_norm": 0.6086097960558481, + "learning_rate": 2.5912859607410994e-07, + "loss": 0.2657, + "step": 36639 + }, + { + "epoch": 1.7164004309739074, + "grad_norm": 0.5690803502411086, + "learning_rate": 2.590445211211115e-07, + "loss": 0.2611, + "step": 36640 + }, + { + "epoch": 1.7164472759638356, + "grad_norm": 0.5771543707463742, + "learning_rate": 2.589604590642489e-07, + "loss": 0.2652, + "step": 36641 + }, + { + "epoch": 1.7164941209537639, + "grad_norm": 0.5479660603580516, + "learning_rate": 2.5887640990400533e-07, + "loss": 0.2427, + "step": 36642 + }, + { + "epoch": 1.7165409659436923, + "grad_norm": 0.6364664950045892, + "learning_rate": 2.5879237364086543e-07, + "loss": 0.2875, + "step": 36643 + }, + { + "epoch": 1.7165878109336208, + "grad_norm": 0.6654557326058829, + "learning_rate": 2.5870835027531156e-07, + "loss": 0.2956, + "step": 36644 + }, + { + "epoch": 1.716634655923549, + "grad_norm": 0.6026840626589981, + "learning_rate": 2.586243398078284e-07, + "loss": 0.2669, + "step": 36645 + }, + { + "epoch": 1.7166815009134773, + "grad_norm": 0.6105592344952632, + "learning_rate": 2.5854034223889824e-07, + "loss": 0.2807, + "step": 36646 + }, + { + "epoch": 1.7167283459034057, + "grad_norm": 0.636371050659116, + "learning_rate": 2.5845635756900494e-07, + "loss": 0.2882, + "step": 36647 + }, + { + "epoch": 1.716775190893334, + "grad_norm": 0.618551924070124, + "learning_rate": 2.58372385798632e-07, + "loss": 0.2642, + "step": 36648 + }, + { + "epoch": 1.7168220358832622, + "grad_norm": 0.6122108512061237, + "learning_rate": 2.5828842692826264e-07, + "loss": 0.2619, + "step": 36649 + }, + { + "epoch": 1.7168688808731907, + "grad_norm": 0.615779533970002, + "learning_rate": 2.582044809583803e-07, + "loss": 0.2713, + "step": 36650 + }, + { + "epoch": 1.716915725863119, + "grad_norm": 0.644292184576928, + "learning_rate": 2.581205478894677e-07, + "loss": 0.2668, + "step": 36651 + }, + { + "epoch": 1.7169625708530472, + "grad_norm": 0.6094711066760571, + "learning_rate": 2.5803662772200754e-07, + "loss": 0.2664, + "step": 36652 + }, + { + "epoch": 1.7170094158429756, + "grad_norm": 0.5445273695118622, + "learning_rate": 2.57952720456483e-07, + "loss": 0.2552, + "step": 36653 + }, + { + "epoch": 1.717056260832904, + "grad_norm": 0.6007754956179114, + "learning_rate": 2.5786882609337707e-07, + "loss": 0.2696, + "step": 36654 + }, + { + "epoch": 1.717103105822832, + "grad_norm": 0.6198622711357193, + "learning_rate": 2.5778494463317233e-07, + "loss": 0.2688, + "step": 36655 + }, + { + "epoch": 1.7171499508127606, + "grad_norm": 0.5930881133563313, + "learning_rate": 2.577010760763518e-07, + "loss": 0.2718, + "step": 36656 + }, + { + "epoch": 1.717196795802689, + "grad_norm": 0.6180600438932227, + "learning_rate": 2.576172204233987e-07, + "loss": 0.2839, + "step": 36657 + }, + { + "epoch": 1.7172436407926173, + "grad_norm": 0.6035460833532639, + "learning_rate": 2.575333776747946e-07, + "loss": 0.2558, + "step": 36658 + }, + { + "epoch": 1.7172904857825455, + "grad_norm": 0.6273867970528769, + "learning_rate": 2.574495478310218e-07, + "loss": 0.296, + "step": 36659 + }, + { + "epoch": 1.717337330772474, + "grad_norm": 0.5933992819750615, + "learning_rate": 2.573657308925634e-07, + "loss": 0.2633, + "step": 36660 + }, + { + "epoch": 1.7173841757624022, + "grad_norm": 0.5656828834403166, + "learning_rate": 2.5728192685990137e-07, + "loss": 0.2547, + "step": 36661 + }, + { + "epoch": 1.7174310207523305, + "grad_norm": 0.5779222032181188, + "learning_rate": 2.5719813573351847e-07, + "loss": 0.2585, + "step": 36662 + }, + { + "epoch": 1.717477865742259, + "grad_norm": 0.5800864832126776, + "learning_rate": 2.571143575138968e-07, + "loss": 0.2684, + "step": 36663 + }, + { + "epoch": 1.7175247107321872, + "grad_norm": 0.5823088831722119, + "learning_rate": 2.5703059220151787e-07, + "loss": 0.2746, + "step": 36664 + }, + { + "epoch": 1.7175715557221154, + "grad_norm": 0.6093031442686551, + "learning_rate": 2.5694683979686467e-07, + "loss": 0.2618, + "step": 36665 + }, + { + "epoch": 1.7176184007120439, + "grad_norm": 0.5449337579530961, + "learning_rate": 2.568631003004179e-07, + "loss": 0.2487, + "step": 36666 + }, + { + "epoch": 1.7176652457019723, + "grad_norm": 0.6060610098401235, + "learning_rate": 2.567793737126606e-07, + "loss": 0.2763, + "step": 36667 + }, + { + "epoch": 1.7177120906919003, + "grad_norm": 0.5850260821498573, + "learning_rate": 2.566956600340739e-07, + "loss": 0.2565, + "step": 36668 + }, + { + "epoch": 1.7177589356818288, + "grad_norm": 0.5754885943635767, + "learning_rate": 2.566119592651403e-07, + "loss": 0.269, + "step": 36669 + }, + { + "epoch": 1.7178057806717573, + "grad_norm": 0.5684114869645421, + "learning_rate": 2.5652827140634077e-07, + "loss": 0.2634, + "step": 36670 + }, + { + "epoch": 1.7178526256616855, + "grad_norm": 0.5776692355192482, + "learning_rate": 2.564445964581572e-07, + "loss": 0.2528, + "step": 36671 + }, + { + "epoch": 1.7178994706516137, + "grad_norm": 0.611541521595477, + "learning_rate": 2.563609344210713e-07, + "loss": 0.2641, + "step": 36672 + }, + { + "epoch": 1.7179463156415422, + "grad_norm": 0.6182001739082973, + "learning_rate": 2.562772852955639e-07, + "loss": 0.2818, + "step": 36673 + }, + { + "epoch": 1.7179931606314705, + "grad_norm": 0.688314018418899, + "learning_rate": 2.561936490821168e-07, + "loss": 0.3003, + "step": 36674 + }, + { + "epoch": 1.7180400056213987, + "grad_norm": 0.6115049619663325, + "learning_rate": 2.5611002578121184e-07, + "loss": 0.2651, + "step": 36675 + }, + { + "epoch": 1.7180868506113272, + "grad_norm": 0.5837265713963861, + "learning_rate": 2.56026415393329e-07, + "loss": 0.2572, + "step": 36676 + }, + { + "epoch": 1.7181336956012554, + "grad_norm": 0.6113557071143576, + "learning_rate": 2.559428179189505e-07, + "loss": 0.2948, + "step": 36677 + }, + { + "epoch": 1.7181805405911836, + "grad_norm": 0.6114051628498672, + "learning_rate": 2.558592333585566e-07, + "loss": 0.2723, + "step": 36678 + }, + { + "epoch": 1.718227385581112, + "grad_norm": 0.5981787142210103, + "learning_rate": 2.5577566171262974e-07, + "loss": 0.2582, + "step": 36679 + }, + { + "epoch": 1.7182742305710406, + "grad_norm": 0.6214095271620264, + "learning_rate": 2.5569210298164923e-07, + "loss": 0.2633, + "step": 36680 + }, + { + "epoch": 1.7183210755609688, + "grad_norm": 0.5974769450973266, + "learning_rate": 2.556085571660968e-07, + "loss": 0.2674, + "step": 36681 + }, + { + "epoch": 1.718367920550897, + "grad_norm": 0.5811605310269233, + "learning_rate": 2.555250242664528e-07, + "loss": 0.2631, + "step": 36682 + }, + { + "epoch": 1.7184147655408255, + "grad_norm": 0.6101242714662394, + "learning_rate": 2.5544150428319813e-07, + "loss": 0.2702, + "step": 36683 + }, + { + "epoch": 1.7184616105307537, + "grad_norm": 0.5513472136956644, + "learning_rate": 2.5535799721681324e-07, + "loss": 0.2414, + "step": 36684 + }, + { + "epoch": 1.718508455520682, + "grad_norm": 0.6213809318558533, + "learning_rate": 2.5527450306777916e-07, + "loss": 0.2702, + "step": 36685 + }, + { + "epoch": 1.7185553005106105, + "grad_norm": 0.6282063417287611, + "learning_rate": 2.5519102183657664e-07, + "loss": 0.2776, + "step": 36686 + }, + { + "epoch": 1.7186021455005387, + "grad_norm": 0.6019453843837983, + "learning_rate": 2.551075535236855e-07, + "loss": 0.2639, + "step": 36687 + }, + { + "epoch": 1.718648990490467, + "grad_norm": 0.5522241044017034, + "learning_rate": 2.5502409812958567e-07, + "loss": 0.2596, + "step": 36688 + }, + { + "epoch": 1.7186958354803954, + "grad_norm": 0.601556529844573, + "learning_rate": 2.5494065565475787e-07, + "loss": 0.271, + "step": 36689 + }, + { + "epoch": 1.7187426804703239, + "grad_norm": 0.621737110636812, + "learning_rate": 2.548572260996823e-07, + "loss": 0.2905, + "step": 36690 + }, + { + "epoch": 1.7187895254602519, + "grad_norm": 0.6279254646103828, + "learning_rate": 2.5477380946483913e-07, + "loss": 0.2616, + "step": 36691 + }, + { + "epoch": 1.7188363704501803, + "grad_norm": 0.598450613530055, + "learning_rate": 2.546904057507088e-07, + "loss": 0.2648, + "step": 36692 + }, + { + "epoch": 1.7188832154401088, + "grad_norm": 0.6032851600541621, + "learning_rate": 2.546070149577703e-07, + "loss": 0.2873, + "step": 36693 + }, + { + "epoch": 1.718930060430037, + "grad_norm": 0.5823584801183245, + "learning_rate": 2.545236370865048e-07, + "loss": 0.2641, + "step": 36694 + }, + { + "epoch": 1.7189769054199653, + "grad_norm": 0.6179979898045492, + "learning_rate": 2.544402721373906e-07, + "loss": 0.2652, + "step": 36695 + }, + { + "epoch": 1.7190237504098937, + "grad_norm": 0.570213498004522, + "learning_rate": 2.5435692011090865e-07, + "loss": 0.2584, + "step": 36696 + }, + { + "epoch": 1.719070595399822, + "grad_norm": 0.6213929420693828, + "learning_rate": 2.542735810075378e-07, + "loss": 0.2807, + "step": 36697 + }, + { + "epoch": 1.7191174403897502, + "grad_norm": 0.5422761112489294, + "learning_rate": 2.5419025482775834e-07, + "loss": 0.2589, + "step": 36698 + }, + { + "epoch": 1.7191642853796787, + "grad_norm": 0.5693289218568139, + "learning_rate": 2.5410694157204985e-07, + "loss": 0.2533, + "step": 36699 + }, + { + "epoch": 1.719211130369607, + "grad_norm": 0.5961670291390412, + "learning_rate": 2.540236412408914e-07, + "loss": 0.2784, + "step": 36700 + }, + { + "epoch": 1.7192579753595352, + "grad_norm": 0.6182514932821864, + "learning_rate": 2.539403538347618e-07, + "loss": 0.2736, + "step": 36701 + }, + { + "epoch": 1.7193048203494636, + "grad_norm": 0.6028892819034785, + "learning_rate": 2.5385707935414116e-07, + "loss": 0.266, + "step": 36702 + }, + { + "epoch": 1.719351665339392, + "grad_norm": 0.556019220155107, + "learning_rate": 2.537738177995083e-07, + "loss": 0.2607, + "step": 36703 + }, + { + "epoch": 1.7193985103293201, + "grad_norm": 0.5709097628575063, + "learning_rate": 2.536905691713429e-07, + "loss": 0.2515, + "step": 36704 + }, + { + "epoch": 1.7194453553192486, + "grad_norm": 0.5694605484029867, + "learning_rate": 2.536073334701233e-07, + "loss": 0.2613, + "step": 36705 + }, + { + "epoch": 1.719492200309177, + "grad_norm": 0.5953350873295042, + "learning_rate": 2.535241106963296e-07, + "loss": 0.2695, + "step": 36706 + }, + { + "epoch": 1.7195390452991053, + "grad_norm": 0.6030256652950048, + "learning_rate": 2.534409008504399e-07, + "loss": 0.2782, + "step": 36707 + }, + { + "epoch": 1.7195858902890335, + "grad_norm": 0.5932528468128356, + "learning_rate": 2.533577039329327e-07, + "loss": 0.2487, + "step": 36708 + }, + { + "epoch": 1.719632735278962, + "grad_norm": 0.617281015872936, + "learning_rate": 2.5327451994428707e-07, + "loss": 0.2692, + "step": 36709 + }, + { + "epoch": 1.7196795802688902, + "grad_norm": 0.6330495706369383, + "learning_rate": 2.5319134888498217e-07, + "loss": 0.2866, + "step": 36710 + }, + { + "epoch": 1.7197264252588185, + "grad_norm": 0.5752085112336397, + "learning_rate": 2.531081907554961e-07, + "loss": 0.2766, + "step": 36711 + }, + { + "epoch": 1.719773270248747, + "grad_norm": 0.8207988312266218, + "learning_rate": 2.5302504555630827e-07, + "loss": 0.2715, + "step": 36712 + }, + { + "epoch": 1.7198201152386752, + "grad_norm": 0.622489665248846, + "learning_rate": 2.52941913287896e-07, + "loss": 0.2808, + "step": 36713 + }, + { + "epoch": 1.7198669602286034, + "grad_norm": 0.5956453459151325, + "learning_rate": 2.52858793950739e-07, + "loss": 0.2727, + "step": 36714 + }, + { + "epoch": 1.7199138052185319, + "grad_norm": 0.5742055146515264, + "learning_rate": 2.5277568754531414e-07, + "loss": 0.2788, + "step": 36715 + }, + { + "epoch": 1.7199606502084603, + "grad_norm": 0.5896104572120728, + "learning_rate": 2.5269259407210035e-07, + "loss": 0.2685, + "step": 36716 + }, + { + "epoch": 1.7200074951983886, + "grad_norm": 0.6251177697592587, + "learning_rate": 2.526095135315759e-07, + "loss": 0.2712, + "step": 36717 + }, + { + "epoch": 1.7200543401883168, + "grad_norm": 0.5743460452974485, + "learning_rate": 2.525264459242194e-07, + "loss": 0.2562, + "step": 36718 + }, + { + "epoch": 1.7201011851782453, + "grad_norm": 0.6022920067247988, + "learning_rate": 2.5244339125050753e-07, + "loss": 0.267, + "step": 36719 + }, + { + "epoch": 1.7201480301681735, + "grad_norm": 0.6278991915780822, + "learning_rate": 2.523603495109192e-07, + "loss": 0.277, + "step": 36720 + }, + { + "epoch": 1.7201948751581018, + "grad_norm": 0.5853054321298881, + "learning_rate": 2.522773207059329e-07, + "loss": 0.2631, + "step": 36721 + }, + { + "epoch": 1.7202417201480302, + "grad_norm": 0.586897806424307, + "learning_rate": 2.521943048360248e-07, + "loss": 0.2684, + "step": 36722 + }, + { + "epoch": 1.7202885651379585, + "grad_norm": 0.611798541546766, + "learning_rate": 2.5211130190167383e-07, + "loss": 0.2702, + "step": 36723 + }, + { + "epoch": 1.7203354101278867, + "grad_norm": 0.6013139869906454, + "learning_rate": 2.520283119033576e-07, + "loss": 0.2692, + "step": 36724 + }, + { + "epoch": 1.7203822551178152, + "grad_norm": 0.6381090354641817, + "learning_rate": 2.519453348415529e-07, + "loss": 0.299, + "step": 36725 + }, + { + "epoch": 1.7204291001077436, + "grad_norm": 0.591743583998651, + "learning_rate": 2.518623707167378e-07, + "loss": 0.2625, + "step": 36726 + }, + { + "epoch": 1.7204759450976717, + "grad_norm": 0.60836807479607, + "learning_rate": 2.517794195293899e-07, + "loss": 0.2641, + "step": 36727 + }, + { + "epoch": 1.7205227900876001, + "grad_norm": 0.6352374229838528, + "learning_rate": 2.5169648127998685e-07, + "loss": 0.2793, + "step": 36728 + }, + { + "epoch": 1.7205696350775286, + "grad_norm": 0.573596429206286, + "learning_rate": 2.5161355596900473e-07, + "loss": 0.2673, + "step": 36729 + }, + { + "epoch": 1.7206164800674568, + "grad_norm": 0.5771644453468339, + "learning_rate": 2.5153064359692226e-07, + "loss": 0.2592, + "step": 36730 + }, + { + "epoch": 1.720663325057385, + "grad_norm": 0.6107083414069988, + "learning_rate": 2.5144774416421493e-07, + "loss": 0.2784, + "step": 36731 + }, + { + "epoch": 1.7207101700473135, + "grad_norm": 0.593935368143471, + "learning_rate": 2.5136485767136095e-07, + "loss": 0.2677, + "step": 36732 + }, + { + "epoch": 1.7207570150372418, + "grad_norm": 0.6656411195254534, + "learning_rate": 2.5128198411883714e-07, + "loss": 0.2808, + "step": 36733 + }, + { + "epoch": 1.72080386002717, + "grad_norm": 0.6430751499005984, + "learning_rate": 2.5119912350712013e-07, + "loss": 0.2704, + "step": 36734 + }, + { + "epoch": 1.7208507050170985, + "grad_norm": 0.573485991499584, + "learning_rate": 2.5111627583668753e-07, + "loss": 0.2569, + "step": 36735 + }, + { + "epoch": 1.7208975500070267, + "grad_norm": 0.603976553248648, + "learning_rate": 2.5103344110801537e-07, + "loss": 0.2738, + "step": 36736 + }, + { + "epoch": 1.720944394996955, + "grad_norm": 0.5912482156237586, + "learning_rate": 2.509506193215802e-07, + "loss": 0.2857, + "step": 36737 + }, + { + "epoch": 1.7209912399868834, + "grad_norm": 0.5964658611745828, + "learning_rate": 2.508678104778589e-07, + "loss": 0.2774, + "step": 36738 + }, + { + "epoch": 1.7210380849768119, + "grad_norm": 0.5491975911423672, + "learning_rate": 2.50785014577328e-07, + "loss": 0.2563, + "step": 36739 + }, + { + "epoch": 1.72108492996674, + "grad_norm": 0.5967784434971286, + "learning_rate": 2.507022316204641e-07, + "loss": 0.2707, + "step": 36740 + }, + { + "epoch": 1.7211317749566684, + "grad_norm": 0.5748214337967298, + "learning_rate": 2.506194616077437e-07, + "loss": 0.2514, + "step": 36741 + }, + { + "epoch": 1.7211786199465968, + "grad_norm": 0.631631653746163, + "learning_rate": 2.5053670453964286e-07, + "loss": 0.2796, + "step": 36742 + }, + { + "epoch": 1.721225464936525, + "grad_norm": 0.5771499054178116, + "learning_rate": 2.5045396041663813e-07, + "loss": 0.2597, + "step": 36743 + }, + { + "epoch": 1.7212723099264533, + "grad_norm": 0.622619274916246, + "learning_rate": 2.503712292392052e-07, + "loss": 0.2762, + "step": 36744 + }, + { + "epoch": 1.7213191549163818, + "grad_norm": 0.5610499038552632, + "learning_rate": 2.502885110078201e-07, + "loss": 0.2658, + "step": 36745 + }, + { + "epoch": 1.72136599990631, + "grad_norm": 0.6232895948823362, + "learning_rate": 2.502058057229595e-07, + "loss": 0.2799, + "step": 36746 + }, + { + "epoch": 1.7214128448962382, + "grad_norm": 0.5852447355798699, + "learning_rate": 2.5012311338509896e-07, + "loss": 0.2711, + "step": 36747 + }, + { + "epoch": 1.7214596898861667, + "grad_norm": 0.5681106466104856, + "learning_rate": 2.5004043399471464e-07, + "loss": 0.2659, + "step": 36748 + }, + { + "epoch": 1.721506534876095, + "grad_norm": 0.5460192407231403, + "learning_rate": 2.499577675522824e-07, + "loss": 0.2617, + "step": 36749 + }, + { + "epoch": 1.7215533798660232, + "grad_norm": 0.572560196621632, + "learning_rate": 2.4987511405827695e-07, + "loss": 0.2585, + "step": 36750 + }, + { + "epoch": 1.7216002248559517, + "grad_norm": 0.6085223141046936, + "learning_rate": 2.497924735131749e-07, + "loss": 0.2905, + "step": 36751 + }, + { + "epoch": 1.7216470698458801, + "grad_norm": 0.600617886650288, + "learning_rate": 2.4970984591745156e-07, + "loss": 0.2732, + "step": 36752 + }, + { + "epoch": 1.7216939148358084, + "grad_norm": 0.5983261624037398, + "learning_rate": 2.496272312715825e-07, + "loss": 0.2667, + "step": 36753 + }, + { + "epoch": 1.7217407598257366, + "grad_norm": 0.6074768173574883, + "learning_rate": 2.495446295760434e-07, + "loss": 0.2691, + "step": 36754 + }, + { + "epoch": 1.721787604815665, + "grad_norm": 0.5526976432195136, + "learning_rate": 2.4946204083130895e-07, + "loss": 0.27, + "step": 36755 + }, + { + "epoch": 1.7218344498055933, + "grad_norm": 0.5733432636161683, + "learning_rate": 2.493794650378553e-07, + "loss": 0.2639, + "step": 36756 + }, + { + "epoch": 1.7218812947955215, + "grad_norm": 0.6169366218679275, + "learning_rate": 2.4929690219615666e-07, + "loss": 0.2697, + "step": 36757 + }, + { + "epoch": 1.72192813978545, + "grad_norm": 0.5995634197833613, + "learning_rate": 2.4921435230668866e-07, + "loss": 0.2656, + "step": 36758 + }, + { + "epoch": 1.7219749847753782, + "grad_norm": 0.6019581678241013, + "learning_rate": 2.491318153699265e-07, + "loss": 0.2778, + "step": 36759 + }, + { + "epoch": 1.7220218297653065, + "grad_norm": 0.5925423111658231, + "learning_rate": 2.4904929138634485e-07, + "loss": 0.2773, + "step": 36760 + }, + { + "epoch": 1.722068674755235, + "grad_norm": 0.5709364094345794, + "learning_rate": 2.489667803564194e-07, + "loss": 0.256, + "step": 36761 + }, + { + "epoch": 1.7221155197451634, + "grad_norm": 0.6197972436683398, + "learning_rate": 2.4888428228062367e-07, + "loss": 0.2644, + "step": 36762 + }, + { + "epoch": 1.7221623647350914, + "grad_norm": 0.5952734688403606, + "learning_rate": 2.4880179715943363e-07, + "loss": 0.2677, + "step": 36763 + }, + { + "epoch": 1.72220920972502, + "grad_norm": 0.6154081530036281, + "learning_rate": 2.4871932499332285e-07, + "loss": 0.2731, + "step": 36764 + }, + { + "epoch": 1.7222560547149484, + "grad_norm": 0.640059743426991, + "learning_rate": 2.4863686578276675e-07, + "loss": 0.274, + "step": 36765 + }, + { + "epoch": 1.7223028997048766, + "grad_norm": 0.5938622480907175, + "learning_rate": 2.4855441952823937e-07, + "loss": 0.2707, + "step": 36766 + }, + { + "epoch": 1.7223497446948048, + "grad_norm": 0.6312575610486243, + "learning_rate": 2.4847198623021625e-07, + "loss": 0.2833, + "step": 36767 + }, + { + "epoch": 1.7223965896847333, + "grad_norm": 0.6185918871527646, + "learning_rate": 2.483895658891702e-07, + "loss": 0.2874, + "step": 36768 + }, + { + "epoch": 1.7224434346746615, + "grad_norm": 0.5741683523469066, + "learning_rate": 2.483071585055763e-07, + "loss": 0.2785, + "step": 36769 + }, + { + "epoch": 1.7224902796645898, + "grad_norm": 0.6125162354083332, + "learning_rate": 2.482247640799093e-07, + "loss": 0.2708, + "step": 36770 + }, + { + "epoch": 1.7225371246545182, + "grad_norm": 0.6010460431516208, + "learning_rate": 2.4814238261264253e-07, + "loss": 0.2793, + "step": 36771 + }, + { + "epoch": 1.7225839696444465, + "grad_norm": 0.5735937523352161, + "learning_rate": 2.4806001410424997e-07, + "loss": 0.2699, + "step": 36772 + }, + { + "epoch": 1.7226308146343747, + "grad_norm": 0.5969135516652511, + "learning_rate": 2.4797765855520686e-07, + "loss": 0.2721, + "step": 36773 + }, + { + "epoch": 1.7226776596243032, + "grad_norm": 0.5786071526323188, + "learning_rate": 2.4789531596598553e-07, + "loss": 0.2607, + "step": 36774 + }, + { + "epoch": 1.7227245046142317, + "grad_norm": 0.6351616896794535, + "learning_rate": 2.478129863370607e-07, + "loss": 0.2688, + "step": 36775 + }, + { + "epoch": 1.7227713496041597, + "grad_norm": 0.5954820863363528, + "learning_rate": 2.477306696689061e-07, + "loss": 0.2853, + "step": 36776 + }, + { + "epoch": 1.7228181945940881, + "grad_norm": 0.5948285795449307, + "learning_rate": 2.4764836596199604e-07, + "loss": 0.263, + "step": 36777 + }, + { + "epoch": 1.7228650395840166, + "grad_norm": 0.6739221578595118, + "learning_rate": 2.47566075216803e-07, + "loss": 0.2995, + "step": 36778 + }, + { + "epoch": 1.7229118845739448, + "grad_norm": 0.5836846944965304, + "learning_rate": 2.4748379743380125e-07, + "loss": 0.2645, + "step": 36779 + }, + { + "epoch": 1.722958729563873, + "grad_norm": 0.5830131350383719, + "learning_rate": 2.474015326134638e-07, + "loss": 0.272, + "step": 36780 + }, + { + "epoch": 1.7230055745538015, + "grad_norm": 0.5815675625574707, + "learning_rate": 2.4731928075626437e-07, + "loss": 0.2536, + "step": 36781 + }, + { + "epoch": 1.7230524195437298, + "grad_norm": 0.6364249961499833, + "learning_rate": 2.4723704186267653e-07, + "loss": 0.2904, + "step": 36782 + }, + { + "epoch": 1.723099264533658, + "grad_norm": 0.5851589263760062, + "learning_rate": 2.4715481593317353e-07, + "loss": 0.2522, + "step": 36783 + }, + { + "epoch": 1.7231461095235865, + "grad_norm": 0.6091796411972681, + "learning_rate": 2.470726029682277e-07, + "loss": 0.2695, + "step": 36784 + }, + { + "epoch": 1.7231929545135147, + "grad_norm": 0.6241127661728524, + "learning_rate": 2.4699040296831345e-07, + "loss": 0.2721, + "step": 36785 + }, + { + "epoch": 1.723239799503443, + "grad_norm": 0.5652452249904331, + "learning_rate": 2.4690821593390286e-07, + "loss": 0.2562, + "step": 36786 + }, + { + "epoch": 1.7232866444933714, + "grad_norm": 0.5742421816777827, + "learning_rate": 2.4682604186546886e-07, + "loss": 0.267, + "step": 36787 + }, + { + "epoch": 1.7233334894833, + "grad_norm": 0.563675618344998, + "learning_rate": 2.467438807634848e-07, + "loss": 0.2564, + "step": 36788 + }, + { + "epoch": 1.7233803344732281, + "grad_norm": 0.5961022533540044, + "learning_rate": 2.466617326284235e-07, + "loss": 0.2673, + "step": 36789 + }, + { + "epoch": 1.7234271794631564, + "grad_norm": 0.5769580927628082, + "learning_rate": 2.465795974607579e-07, + "loss": 0.2679, + "step": 36790 + }, + { + "epoch": 1.7234740244530848, + "grad_norm": 0.5819366470282072, + "learning_rate": 2.464974752609603e-07, + "loss": 0.2595, + "step": 36791 + }, + { + "epoch": 1.723520869443013, + "grad_norm": 0.5840242027225206, + "learning_rate": 2.4641536602950293e-07, + "loss": 0.2628, + "step": 36792 + }, + { + "epoch": 1.7235677144329413, + "grad_norm": 0.5826761205156619, + "learning_rate": 2.4633326976685885e-07, + "loss": 0.2511, + "step": 36793 + }, + { + "epoch": 1.7236145594228698, + "grad_norm": 0.6518011090369669, + "learning_rate": 2.462511864735004e-07, + "loss": 0.2649, + "step": 36794 + }, + { + "epoch": 1.723661404412798, + "grad_norm": 0.6215822100576143, + "learning_rate": 2.461691161498997e-07, + "loss": 0.2685, + "step": 36795 + }, + { + "epoch": 1.7237082494027263, + "grad_norm": 0.5831075639745096, + "learning_rate": 2.4608705879652916e-07, + "loss": 0.2627, + "step": 36796 + }, + { + "epoch": 1.7237550943926547, + "grad_norm": 0.5742647509784075, + "learning_rate": 2.4600501441386176e-07, + "loss": 0.2645, + "step": 36797 + }, + { + "epoch": 1.7238019393825832, + "grad_norm": 0.6156285790245587, + "learning_rate": 2.4592298300236903e-07, + "loss": 0.2701, + "step": 36798 + }, + { + "epoch": 1.7238487843725112, + "grad_norm": 0.5959749821823426, + "learning_rate": 2.458409645625223e-07, + "loss": 0.2674, + "step": 36799 + }, + { + "epoch": 1.7238956293624397, + "grad_norm": 0.6197198393455758, + "learning_rate": 2.457589590947945e-07, + "loss": 0.2856, + "step": 36800 + }, + { + "epoch": 1.7239424743523681, + "grad_norm": 0.5998692154372695, + "learning_rate": 2.456769665996572e-07, + "loss": 0.2725, + "step": 36801 + }, + { + "epoch": 1.7239893193422964, + "grad_norm": 0.5930884540575941, + "learning_rate": 2.455949870775823e-07, + "loss": 0.2724, + "step": 36802 + }, + { + "epoch": 1.7240361643322246, + "grad_norm": 0.6106346949594768, + "learning_rate": 2.455130205290421e-07, + "loss": 0.2732, + "step": 36803 + }, + { + "epoch": 1.724083009322153, + "grad_norm": 0.5973182202266809, + "learning_rate": 2.4543106695450734e-07, + "loss": 0.272, + "step": 36804 + }, + { + "epoch": 1.7241298543120813, + "grad_norm": 0.596932692149252, + "learning_rate": 2.453491263544508e-07, + "loss": 0.2685, + "step": 36805 + }, + { + "epoch": 1.7241766993020096, + "grad_norm": 0.6290034602595561, + "learning_rate": 2.4526719872934283e-07, + "loss": 0.2672, + "step": 36806 + }, + { + "epoch": 1.724223544291938, + "grad_norm": 0.6401011862017592, + "learning_rate": 2.451852840796554e-07, + "loss": 0.2747, + "step": 36807 + }, + { + "epoch": 1.7242703892818663, + "grad_norm": 0.5689666563095744, + "learning_rate": 2.451033824058596e-07, + "loss": 0.2746, + "step": 36808 + }, + { + "epoch": 1.7243172342717945, + "grad_norm": 0.6237871501568278, + "learning_rate": 2.4502149370842805e-07, + "loss": 0.2671, + "step": 36809 + }, + { + "epoch": 1.724364079261723, + "grad_norm": 0.5813072411714836, + "learning_rate": 2.449396179878302e-07, + "loss": 0.2726, + "step": 36810 + }, + { + "epoch": 1.7244109242516514, + "grad_norm": 0.6294142526447571, + "learning_rate": 2.448577552445383e-07, + "loss": 0.2863, + "step": 36811 + }, + { + "epoch": 1.7244577692415795, + "grad_norm": 0.5726120890599191, + "learning_rate": 2.4477590547902357e-07, + "loss": 0.2598, + "step": 36812 + }, + { + "epoch": 1.724504614231508, + "grad_norm": 0.6361908515854349, + "learning_rate": 2.4469406869175623e-07, + "loss": 0.2717, + "step": 36813 + }, + { + "epoch": 1.7245514592214364, + "grad_norm": 0.628360642711585, + "learning_rate": 2.446122448832075e-07, + "loss": 0.281, + "step": 36814 + }, + { + "epoch": 1.7245983042113646, + "grad_norm": 0.6748482169456917, + "learning_rate": 2.445304340538493e-07, + "loss": 0.309, + "step": 36815 + }, + { + "epoch": 1.7246451492012929, + "grad_norm": 0.6094525889867404, + "learning_rate": 2.4444863620415063e-07, + "loss": 0.269, + "step": 36816 + }, + { + "epoch": 1.7246919941912213, + "grad_norm": 0.6383439182426442, + "learning_rate": 2.4436685133458344e-07, + "loss": 0.2863, + "step": 36817 + }, + { + "epoch": 1.7247388391811496, + "grad_norm": 0.6046250491545812, + "learning_rate": 2.4428507944561807e-07, + "loss": 0.2816, + "step": 36818 + }, + { + "epoch": 1.7247856841710778, + "grad_norm": 0.6207190111982965, + "learning_rate": 2.4420332053772563e-07, + "loss": 0.2651, + "step": 36819 + }, + { + "epoch": 1.7248325291610063, + "grad_norm": 0.5839184777096343, + "learning_rate": 2.4412157461137534e-07, + "loss": 0.2642, + "step": 36820 + }, + { + "epoch": 1.7248793741509345, + "grad_norm": 0.6402041300878364, + "learning_rate": 2.440398416670392e-07, + "loss": 0.2859, + "step": 36821 + }, + { + "epoch": 1.7249262191408627, + "grad_norm": 0.5385146955614173, + "learning_rate": 2.439581217051862e-07, + "loss": 0.2512, + "step": 36822 + }, + { + "epoch": 1.7249730641307912, + "grad_norm": 0.597366502414977, + "learning_rate": 2.43876414726287e-07, + "loss": 0.2706, + "step": 36823 + }, + { + "epoch": 1.7250199091207197, + "grad_norm": 0.6350804185397226, + "learning_rate": 2.4379472073081226e-07, + "loss": 0.2888, + "step": 36824 + }, + { + "epoch": 1.725066754110648, + "grad_norm": 0.6035689797142744, + "learning_rate": 2.437130397192317e-07, + "loss": 0.2821, + "step": 36825 + }, + { + "epoch": 1.7251135991005762, + "grad_norm": 0.6340084835684621, + "learning_rate": 2.4363137169201577e-07, + "loss": 0.2618, + "step": 36826 + }, + { + "epoch": 1.7251604440905046, + "grad_norm": 0.5831077996295418, + "learning_rate": 2.4354971664963394e-07, + "loss": 0.2751, + "step": 36827 + }, + { + "epoch": 1.7252072890804329, + "grad_norm": 0.5759318398075528, + "learning_rate": 2.434680745925569e-07, + "loss": 0.2484, + "step": 36828 + }, + { + "epoch": 1.725254134070361, + "grad_norm": 0.6122557589980338, + "learning_rate": 2.433864455212531e-07, + "loss": 0.2923, + "step": 36829 + }, + { + "epoch": 1.7253009790602896, + "grad_norm": 0.6533243717756205, + "learning_rate": 2.433048294361934e-07, + "loss": 0.2734, + "step": 36830 + }, + { + "epoch": 1.7253478240502178, + "grad_norm": 0.5878128605883048, + "learning_rate": 2.432232263378473e-07, + "loss": 0.2542, + "step": 36831 + }, + { + "epoch": 1.725394669040146, + "grad_norm": 0.6620724860706432, + "learning_rate": 2.4314163622668444e-07, + "loss": 0.2861, + "step": 36832 + }, + { + "epoch": 1.7254415140300745, + "grad_norm": 0.6192176061490595, + "learning_rate": 2.4306005910317397e-07, + "loss": 0.2718, + "step": 36833 + }, + { + "epoch": 1.725488359020003, + "grad_norm": 0.6450208849023248, + "learning_rate": 2.429784949677863e-07, + "loss": 0.2761, + "step": 36834 + }, + { + "epoch": 1.725535204009931, + "grad_norm": 0.6391619424682281, + "learning_rate": 2.428969438209894e-07, + "loss": 0.2722, + "step": 36835 + }, + { + "epoch": 1.7255820489998595, + "grad_norm": 0.5762866537234843, + "learning_rate": 2.428154056632531e-07, + "loss": 0.2636, + "step": 36836 + }, + { + "epoch": 1.725628893989788, + "grad_norm": 0.604030620826477, + "learning_rate": 2.4273388049504713e-07, + "loss": 0.2726, + "step": 36837 + }, + { + "epoch": 1.7256757389797162, + "grad_norm": 0.5876821105122301, + "learning_rate": 2.426523683168402e-07, + "loss": 0.264, + "step": 36838 + }, + { + "epoch": 1.7257225839696444, + "grad_norm": 0.5988603211438306, + "learning_rate": 2.4257086912910207e-07, + "loss": 0.2645, + "step": 36839 + }, + { + "epoch": 1.7257694289595729, + "grad_norm": 0.6095991448398249, + "learning_rate": 2.424893829323011e-07, + "loss": 0.2727, + "step": 36840 + }, + { + "epoch": 1.725816273949501, + "grad_norm": 0.6190176620386758, + "learning_rate": 2.4240790972690583e-07, + "loss": 0.2947, + "step": 36841 + }, + { + "epoch": 1.7258631189394293, + "grad_norm": 0.5777178965689604, + "learning_rate": 2.423264495133856e-07, + "loss": 0.2532, + "step": 36842 + }, + { + "epoch": 1.7259099639293578, + "grad_norm": 0.613498002174869, + "learning_rate": 2.4224500229220934e-07, + "loss": 0.2656, + "step": 36843 + }, + { + "epoch": 1.725956808919286, + "grad_norm": 0.5805441663231452, + "learning_rate": 2.4216356806384566e-07, + "loss": 0.2706, + "step": 36844 + }, + { + "epoch": 1.7260036539092143, + "grad_norm": 0.5656027567726821, + "learning_rate": 2.4208214682876293e-07, + "loss": 0.2676, + "step": 36845 + }, + { + "epoch": 1.7260504988991427, + "grad_norm": 0.5979295909027225, + "learning_rate": 2.420007385874307e-07, + "loss": 0.2755, + "step": 36846 + }, + { + "epoch": 1.7260973438890712, + "grad_norm": 0.5906590725687689, + "learning_rate": 2.4191934334031665e-07, + "loss": 0.2728, + "step": 36847 + }, + { + "epoch": 1.7261441888789992, + "grad_norm": 0.5957013736394587, + "learning_rate": 2.4183796108788847e-07, + "loss": 0.2616, + "step": 36848 + }, + { + "epoch": 1.7261910338689277, + "grad_norm": 0.6012006204946426, + "learning_rate": 2.417565918306156e-07, + "loss": 0.2764, + "step": 36849 + }, + { + "epoch": 1.7262378788588562, + "grad_norm": 0.6386082815222084, + "learning_rate": 2.4167523556896585e-07, + "loss": 0.2729, + "step": 36850 + }, + { + "epoch": 1.7262847238487844, + "grad_norm": 0.6371123533498398, + "learning_rate": 2.415938923034072e-07, + "loss": 0.278, + "step": 36851 + }, + { + "epoch": 1.7263315688387126, + "grad_norm": 0.6233451110438387, + "learning_rate": 2.415125620344089e-07, + "loss": 0.2879, + "step": 36852 + }, + { + "epoch": 1.726378413828641, + "grad_norm": 0.5848086041068188, + "learning_rate": 2.4143124476243757e-07, + "loss": 0.2735, + "step": 36853 + }, + { + "epoch": 1.7264252588185693, + "grad_norm": 0.6427934729583678, + "learning_rate": 2.413499404879624e-07, + "loss": 0.2835, + "step": 36854 + }, + { + "epoch": 1.7264721038084976, + "grad_norm": 0.5483225953442223, + "learning_rate": 2.4126864921144993e-07, + "loss": 0.2601, + "step": 36855 + }, + { + "epoch": 1.726518948798426, + "grad_norm": 0.5995142064921392, + "learning_rate": 2.411873709333687e-07, + "loss": 0.2714, + "step": 36856 + }, + { + "epoch": 1.7265657937883543, + "grad_norm": 0.6515662710199254, + "learning_rate": 2.4110610565418665e-07, + "loss": 0.2588, + "step": 36857 + }, + { + "epoch": 1.7266126387782825, + "grad_norm": 0.6321760957852891, + "learning_rate": 2.410248533743714e-07, + "loss": 0.2841, + "step": 36858 + }, + { + "epoch": 1.726659483768211, + "grad_norm": 0.6237039315911548, + "learning_rate": 2.4094361409438993e-07, + "loss": 0.2628, + "step": 36859 + }, + { + "epoch": 1.7267063287581395, + "grad_norm": 0.5992167328826469, + "learning_rate": 2.408623878147101e-07, + "loss": 0.2699, + "step": 36860 + }, + { + "epoch": 1.7267531737480677, + "grad_norm": 0.631127338577135, + "learning_rate": 2.407811745358002e-07, + "loss": 0.2698, + "step": 36861 + }, + { + "epoch": 1.726800018737996, + "grad_norm": 0.5896173657165998, + "learning_rate": 2.406999742581262e-07, + "loss": 0.2681, + "step": 36862 + }, + { + "epoch": 1.7268468637279244, + "grad_norm": 0.6352081926698185, + "learning_rate": 2.406187869821558e-07, + "loss": 0.2753, + "step": 36863 + }, + { + "epoch": 1.7268937087178526, + "grad_norm": 0.5454053472534885, + "learning_rate": 2.4053761270835695e-07, + "loss": 0.2591, + "step": 36864 + }, + { + "epoch": 1.7269405537077809, + "grad_norm": 0.5389477156923076, + "learning_rate": 2.4045645143719596e-07, + "loss": 0.2403, + "step": 36865 + }, + { + "epoch": 1.7269873986977093, + "grad_norm": 0.6108523694082281, + "learning_rate": 2.4037530316914e-07, + "loss": 0.2678, + "step": 36866 + }, + { + "epoch": 1.7270342436876376, + "grad_norm": 0.6114934562506085, + "learning_rate": 2.4029416790465635e-07, + "loss": 0.2823, + "step": 36867 + }, + { + "epoch": 1.7270810886775658, + "grad_norm": 0.6242496269772775, + "learning_rate": 2.4021304564421227e-07, + "loss": 0.2632, + "step": 36868 + }, + { + "epoch": 1.7271279336674943, + "grad_norm": 0.5846413940807679, + "learning_rate": 2.4013193638827345e-07, + "loss": 0.2476, + "step": 36869 + }, + { + "epoch": 1.7271747786574227, + "grad_norm": 0.5884497284679326, + "learning_rate": 2.400508401373081e-07, + "loss": 0.2604, + "step": 36870 + }, + { + "epoch": 1.7272216236473508, + "grad_norm": 0.6299107628787107, + "learning_rate": 2.399697568917814e-07, + "loss": 0.2771, + "step": 36871 + }, + { + "epoch": 1.7272684686372792, + "grad_norm": 0.6090436175886674, + "learning_rate": 2.39888686652161e-07, + "loss": 0.2725, + "step": 36872 + }, + { + "epoch": 1.7273153136272077, + "grad_norm": 0.6060367760858548, + "learning_rate": 2.3980762941891306e-07, + "loss": 0.281, + "step": 36873 + }, + { + "epoch": 1.727362158617136, + "grad_norm": 0.5992149038851456, + "learning_rate": 2.39726585192504e-07, + "loss": 0.268, + "step": 36874 + }, + { + "epoch": 1.7274090036070642, + "grad_norm": 0.5608978172861038, + "learning_rate": 2.3964555397340075e-07, + "loss": 0.2674, + "step": 36875 + }, + { + "epoch": 1.7274558485969926, + "grad_norm": 0.6324260374086115, + "learning_rate": 2.395645357620696e-07, + "loss": 0.279, + "step": 36876 + }, + { + "epoch": 1.7275026935869209, + "grad_norm": 0.5928485778036193, + "learning_rate": 2.394835305589757e-07, + "loss": 0.2692, + "step": 36877 + }, + { + "epoch": 1.7275495385768491, + "grad_norm": 0.6686524526352207, + "learning_rate": 2.3940253836458594e-07, + "loss": 0.274, + "step": 36878 + }, + { + "epoch": 1.7275963835667776, + "grad_norm": 0.5963147324765615, + "learning_rate": 2.3932155917936626e-07, + "loss": 0.2719, + "step": 36879 + }, + { + "epoch": 1.7276432285567058, + "grad_norm": 0.5328570464770723, + "learning_rate": 2.3924059300378306e-07, + "loss": 0.2652, + "step": 36880 + }, + { + "epoch": 1.727690073546634, + "grad_norm": 0.5720637810824231, + "learning_rate": 2.3915963983830225e-07, + "loss": 0.2487, + "step": 36881 + }, + { + "epoch": 1.7277369185365625, + "grad_norm": 0.569954232856525, + "learning_rate": 2.390786996833891e-07, + "loss": 0.2556, + "step": 36882 + }, + { + "epoch": 1.727783763526491, + "grad_norm": 0.6336552033399752, + "learning_rate": 2.3899777253951015e-07, + "loss": 0.2705, + "step": 36883 + }, + { + "epoch": 1.727830608516419, + "grad_norm": 0.6025844122941467, + "learning_rate": 2.389168584071305e-07, + "loss": 0.277, + "step": 36884 + }, + { + "epoch": 1.7278774535063475, + "grad_norm": 0.629406088472734, + "learning_rate": 2.3883595728671577e-07, + "loss": 0.2884, + "step": 36885 + }, + { + "epoch": 1.727924298496276, + "grad_norm": 0.6146151579137772, + "learning_rate": 2.3875506917873217e-07, + "loss": 0.2586, + "step": 36886 + }, + { + "epoch": 1.7279711434862042, + "grad_norm": 0.591597403755273, + "learning_rate": 2.3867419408364426e-07, + "loss": 0.2595, + "step": 36887 + }, + { + "epoch": 1.7280179884761324, + "grad_norm": 0.5736353058842741, + "learning_rate": 2.3859333200191904e-07, + "loss": 0.2585, + "step": 36888 + }, + { + "epoch": 1.7280648334660609, + "grad_norm": 0.5895229124863677, + "learning_rate": 2.3851248293402046e-07, + "loss": 0.2727, + "step": 36889 + }, + { + "epoch": 1.7281116784559891, + "grad_norm": 0.6083303681341463, + "learning_rate": 2.3843164688041376e-07, + "loss": 0.2746, + "step": 36890 + }, + { + "epoch": 1.7281585234459174, + "grad_norm": 0.5913140900222589, + "learning_rate": 2.3835082384156438e-07, + "loss": 0.2599, + "step": 36891 + }, + { + "epoch": 1.7282053684358458, + "grad_norm": 0.5929177829098705, + "learning_rate": 2.3827001381793779e-07, + "loss": 0.2698, + "step": 36892 + }, + { + "epoch": 1.728252213425774, + "grad_norm": 0.6079355544069721, + "learning_rate": 2.3818921680999863e-07, + "loss": 0.2617, + "step": 36893 + }, + { + "epoch": 1.7282990584157023, + "grad_norm": 0.5789950074810514, + "learning_rate": 2.381084328182126e-07, + "loss": 0.2688, + "step": 36894 + }, + { + "epoch": 1.7283459034056308, + "grad_norm": 0.5942280498818517, + "learning_rate": 2.3802766184304353e-07, + "loss": 0.2627, + "step": 36895 + }, + { + "epoch": 1.7283927483955592, + "grad_norm": 0.5922534611912763, + "learning_rate": 2.3794690388495718e-07, + "loss": 0.2778, + "step": 36896 + }, + { + "epoch": 1.7284395933854875, + "grad_norm": 0.5997474629744508, + "learning_rate": 2.378661589444173e-07, + "loss": 0.2742, + "step": 36897 + }, + { + "epoch": 1.7284864383754157, + "grad_norm": 0.6060581800021657, + "learning_rate": 2.3778542702188934e-07, + "loss": 0.2777, + "step": 36898 + }, + { + "epoch": 1.7285332833653442, + "grad_norm": 0.5848677395720395, + "learning_rate": 2.3770470811783742e-07, + "loss": 0.2646, + "step": 36899 + }, + { + "epoch": 1.7285801283552724, + "grad_norm": 0.5740156904512093, + "learning_rate": 2.376240022327267e-07, + "loss": 0.2771, + "step": 36900 + }, + { + "epoch": 1.7286269733452007, + "grad_norm": 0.5461740982989706, + "learning_rate": 2.3754330936702124e-07, + "loss": 0.2454, + "step": 36901 + }, + { + "epoch": 1.7286738183351291, + "grad_norm": 0.5447904200761134, + "learning_rate": 2.3746262952118516e-07, + "loss": 0.2454, + "step": 36902 + }, + { + "epoch": 1.7287206633250574, + "grad_norm": 0.5987108340969907, + "learning_rate": 2.3738196269568332e-07, + "loss": 0.2815, + "step": 36903 + }, + { + "epoch": 1.7287675083149856, + "grad_norm": 0.6214374070596896, + "learning_rate": 2.3730130889097923e-07, + "loss": 0.2697, + "step": 36904 + }, + { + "epoch": 1.728814353304914, + "grad_norm": 0.645460535057547, + "learning_rate": 2.372206681075373e-07, + "loss": 0.3043, + "step": 36905 + }, + { + "epoch": 1.7288611982948425, + "grad_norm": 0.6174937218918977, + "learning_rate": 2.3714004034582182e-07, + "loss": 0.2722, + "step": 36906 + }, + { + "epoch": 1.7289080432847705, + "grad_norm": 0.6230778425205646, + "learning_rate": 2.370594256062972e-07, + "loss": 0.2666, + "step": 36907 + }, + { + "epoch": 1.728954888274699, + "grad_norm": 0.6580747195473056, + "learning_rate": 2.3697882388942606e-07, + "loss": 0.2559, + "step": 36908 + }, + { + "epoch": 1.7290017332646275, + "grad_norm": 0.5753440100017001, + "learning_rate": 2.3689823519567307e-07, + "loss": 0.2749, + "step": 36909 + }, + { + "epoch": 1.7290485782545557, + "grad_norm": 0.6440083068219589, + "learning_rate": 2.3681765952550255e-07, + "loss": 0.2816, + "step": 36910 + }, + { + "epoch": 1.729095423244484, + "grad_norm": 0.6362371216310269, + "learning_rate": 2.3673709687937697e-07, + "loss": 0.2782, + "step": 36911 + }, + { + "epoch": 1.7291422682344124, + "grad_norm": 0.5932798871898964, + "learning_rate": 2.366565472577606e-07, + "loss": 0.264, + "step": 36912 + }, + { + "epoch": 1.7291891132243407, + "grad_norm": 0.59763950424498, + "learning_rate": 2.3657601066111758e-07, + "loss": 0.2635, + "step": 36913 + }, + { + "epoch": 1.729235958214269, + "grad_norm": 0.6192968869885725, + "learning_rate": 2.3649548708991e-07, + "loss": 0.2714, + "step": 36914 + }, + { + "epoch": 1.7292828032041974, + "grad_norm": 0.5707980307831185, + "learning_rate": 2.3641497654460222e-07, + "loss": 0.2697, + "step": 36915 + }, + { + "epoch": 1.7293296481941256, + "grad_norm": 0.5765454967420615, + "learning_rate": 2.3633447902565691e-07, + "loss": 0.2544, + "step": 36916 + }, + { + "epoch": 1.7293764931840538, + "grad_norm": 0.6254821142582087, + "learning_rate": 2.3625399453353848e-07, + "loss": 0.2585, + "step": 36917 + }, + { + "epoch": 1.7294233381739823, + "grad_norm": 0.6209500440236988, + "learning_rate": 2.36173523068709e-07, + "loss": 0.2815, + "step": 36918 + }, + { + "epoch": 1.7294701831639108, + "grad_norm": 0.615095536936906, + "learning_rate": 2.3609306463163227e-07, + "loss": 0.2875, + "step": 36919 + }, + { + "epoch": 1.7295170281538388, + "grad_norm": 0.6544835665537018, + "learning_rate": 2.360126192227702e-07, + "loss": 0.2726, + "step": 36920 + }, + { + "epoch": 1.7295638731437672, + "grad_norm": 0.6128376339110304, + "learning_rate": 2.3593218684258678e-07, + "loss": 0.2594, + "step": 36921 + }, + { + "epoch": 1.7296107181336957, + "grad_norm": 0.6093112080675419, + "learning_rate": 2.3585176749154448e-07, + "loss": 0.2706, + "step": 36922 + }, + { + "epoch": 1.729657563123624, + "grad_norm": 0.6410237774120588, + "learning_rate": 2.3577136117010678e-07, + "loss": 0.274, + "step": 36923 + }, + { + "epoch": 1.7297044081135522, + "grad_norm": 0.6472654136597144, + "learning_rate": 2.35690967878735e-07, + "loss": 0.2899, + "step": 36924 + }, + { + "epoch": 1.7297512531034807, + "grad_norm": 0.5864897739302383, + "learning_rate": 2.356105876178935e-07, + "loss": 0.2726, + "step": 36925 + }, + { + "epoch": 1.729798098093409, + "grad_norm": 0.6064098027632568, + "learning_rate": 2.3553022038804302e-07, + "loss": 0.2683, + "step": 36926 + }, + { + "epoch": 1.7298449430833371, + "grad_norm": 0.5856894748778022, + "learning_rate": 2.354498661896473e-07, + "loss": 0.2716, + "step": 36927 + }, + { + "epoch": 1.7298917880732656, + "grad_norm": 0.6034420665733339, + "learning_rate": 2.3536952502316828e-07, + "loss": 0.2681, + "step": 36928 + }, + { + "epoch": 1.7299386330631938, + "grad_norm": 0.6091185489048094, + "learning_rate": 2.352891968890686e-07, + "loss": 0.2698, + "step": 36929 + }, + { + "epoch": 1.729985478053122, + "grad_norm": 0.6200445404103323, + "learning_rate": 2.3520888178781064e-07, + "loss": 0.2812, + "step": 36930 + }, + { + "epoch": 1.7300323230430505, + "grad_norm": 0.5956807641052988, + "learning_rate": 2.3512857971985632e-07, + "loss": 0.2775, + "step": 36931 + }, + { + "epoch": 1.730079168032979, + "grad_norm": 0.5505357590723546, + "learning_rate": 2.3504829068566742e-07, + "loss": 0.2442, + "step": 36932 + }, + { + "epoch": 1.7301260130229072, + "grad_norm": 0.5953255887175574, + "learning_rate": 2.349680146857064e-07, + "loss": 0.253, + "step": 36933 + }, + { + "epoch": 1.7301728580128355, + "grad_norm": 0.6054527864613203, + "learning_rate": 2.348877517204351e-07, + "loss": 0.2736, + "step": 36934 + }, + { + "epoch": 1.730219703002764, + "grad_norm": 0.5573321271207312, + "learning_rate": 2.3480750179031537e-07, + "loss": 0.2535, + "step": 36935 + }, + { + "epoch": 1.7302665479926922, + "grad_norm": 0.6314910532350818, + "learning_rate": 2.3472726489580933e-07, + "loss": 0.2765, + "step": 36936 + }, + { + "epoch": 1.7303133929826204, + "grad_norm": 0.5823201369502129, + "learning_rate": 2.3464704103737884e-07, + "loss": 0.2565, + "step": 36937 + }, + { + "epoch": 1.730360237972549, + "grad_norm": 0.6059674027580471, + "learning_rate": 2.345668302154852e-07, + "loss": 0.2758, + "step": 36938 + }, + { + "epoch": 1.7304070829624771, + "grad_norm": 0.6215734999814869, + "learning_rate": 2.3448663243058972e-07, + "loss": 0.2809, + "step": 36939 + }, + { + "epoch": 1.7304539279524054, + "grad_norm": 0.6189934803914835, + "learning_rate": 2.3440644768315425e-07, + "loss": 0.2558, + "step": 36940 + }, + { + "epoch": 1.7305007729423338, + "grad_norm": 0.5859682775764524, + "learning_rate": 2.3432627597364004e-07, + "loss": 0.2623, + "step": 36941 + }, + { + "epoch": 1.7305476179322623, + "grad_norm": 0.5812401632341471, + "learning_rate": 2.3424611730250902e-07, + "loss": 0.261, + "step": 36942 + }, + { + "epoch": 1.7305944629221903, + "grad_norm": 0.6118042793093719, + "learning_rate": 2.3416597167022242e-07, + "loss": 0.2698, + "step": 36943 + }, + { + "epoch": 1.7306413079121188, + "grad_norm": 0.5780770476141107, + "learning_rate": 2.3408583907724048e-07, + "loss": 0.254, + "step": 36944 + }, + { + "epoch": 1.7306881529020472, + "grad_norm": 0.5885696212191275, + "learning_rate": 2.3400571952402585e-07, + "loss": 0.2618, + "step": 36945 + }, + { + "epoch": 1.7307349978919755, + "grad_norm": 0.59818757167335, + "learning_rate": 2.339256130110379e-07, + "loss": 0.2693, + "step": 36946 + }, + { + "epoch": 1.7307818428819037, + "grad_norm": 0.5802737611152424, + "learning_rate": 2.3384551953873875e-07, + "loss": 0.275, + "step": 36947 + }, + { + "epoch": 1.7308286878718322, + "grad_norm": 0.6337207187614149, + "learning_rate": 2.3376543910758915e-07, + "loss": 0.2715, + "step": 36948 + }, + { + "epoch": 1.7308755328617604, + "grad_norm": 0.6659925108810291, + "learning_rate": 2.3368537171805012e-07, + "loss": 0.268, + "step": 36949 + }, + { + "epoch": 1.7309223778516887, + "grad_norm": 0.6576270191485535, + "learning_rate": 2.3360531737058183e-07, + "loss": 0.2935, + "step": 36950 + }, + { + "epoch": 1.7309692228416171, + "grad_norm": 0.6595460977135675, + "learning_rate": 2.3352527606564507e-07, + "loss": 0.2741, + "step": 36951 + }, + { + "epoch": 1.7310160678315454, + "grad_norm": 0.5922087676402703, + "learning_rate": 2.3344524780370137e-07, + "loss": 0.2714, + "step": 36952 + }, + { + "epoch": 1.7310629128214736, + "grad_norm": 0.6086343140714816, + "learning_rate": 2.333652325852101e-07, + "loss": 0.2669, + "step": 36953 + }, + { + "epoch": 1.731109757811402, + "grad_norm": 0.6300883701461337, + "learning_rate": 2.33285230410632e-07, + "loss": 0.271, + "step": 36954 + }, + { + "epoch": 1.7311566028013305, + "grad_norm": 0.6072903213585227, + "learning_rate": 2.3320524128042837e-07, + "loss": 0.2758, + "step": 36955 + }, + { + "epoch": 1.7312034477912586, + "grad_norm": 0.5577591158129052, + "learning_rate": 2.331252651950583e-07, + "loss": 0.2527, + "step": 36956 + }, + { + "epoch": 1.731250292781187, + "grad_norm": 0.6056087390336699, + "learning_rate": 2.3304530215498277e-07, + "loss": 0.2728, + "step": 36957 + }, + { + "epoch": 1.7312971377711155, + "grad_norm": 0.6238046083902028, + "learning_rate": 2.3296535216066145e-07, + "loss": 0.2799, + "step": 36958 + }, + { + "epoch": 1.7313439827610437, + "grad_norm": 0.5689667481646826, + "learning_rate": 2.3288541521255535e-07, + "loss": 0.2558, + "step": 36959 + }, + { + "epoch": 1.731390827750972, + "grad_norm": 0.6204350751759604, + "learning_rate": 2.3280549131112357e-07, + "loss": 0.2798, + "step": 36960 + }, + { + "epoch": 1.7314376727409004, + "grad_norm": 0.5637665970998093, + "learning_rate": 2.3272558045682652e-07, + "loss": 0.2524, + "step": 36961 + }, + { + "epoch": 1.7314845177308287, + "grad_norm": 0.5587405956871946, + "learning_rate": 2.326456826501236e-07, + "loss": 0.2547, + "step": 36962 + }, + { + "epoch": 1.731531362720757, + "grad_norm": 0.5335166553520214, + "learning_rate": 2.325657978914747e-07, + "loss": 0.2567, + "step": 36963 + }, + { + "epoch": 1.7315782077106854, + "grad_norm": 0.5661344388121708, + "learning_rate": 2.3248592618134002e-07, + "loss": 0.2685, + "step": 36964 + }, + { + "epoch": 1.7316250527006136, + "grad_norm": 0.5699121247695409, + "learning_rate": 2.3240606752017863e-07, + "loss": 0.2657, + "step": 36965 + }, + { + "epoch": 1.7316718976905419, + "grad_norm": 0.5570267298310202, + "learning_rate": 2.32326221908451e-07, + "loss": 0.2524, + "step": 36966 + }, + { + "epoch": 1.7317187426804703, + "grad_norm": 0.5931753954163894, + "learning_rate": 2.3224638934661563e-07, + "loss": 0.2646, + "step": 36967 + }, + { + "epoch": 1.7317655876703988, + "grad_norm": 0.5719071800412854, + "learning_rate": 2.3216656983513247e-07, + "loss": 0.2751, + "step": 36968 + }, + { + "epoch": 1.731812432660327, + "grad_norm": 0.5857113482267824, + "learning_rate": 2.3208676337446055e-07, + "loss": 0.2576, + "step": 36969 + }, + { + "epoch": 1.7318592776502553, + "grad_norm": 0.5926368730810625, + "learning_rate": 2.3200696996505927e-07, + "loss": 0.2585, + "step": 36970 + }, + { + "epoch": 1.7319061226401837, + "grad_norm": 0.6095372536952821, + "learning_rate": 2.319271896073877e-07, + "loss": 0.2766, + "step": 36971 + }, + { + "epoch": 1.731952967630112, + "grad_norm": 0.5971171666513645, + "learning_rate": 2.3184742230190543e-07, + "loss": 0.2628, + "step": 36972 + }, + { + "epoch": 1.7319998126200402, + "grad_norm": 0.6176934917049401, + "learning_rate": 2.3176766804907103e-07, + "loss": 0.259, + "step": 36973 + }, + { + "epoch": 1.7320466576099687, + "grad_norm": 0.5666781252956044, + "learning_rate": 2.3168792684934384e-07, + "loss": 0.2474, + "step": 36974 + }, + { + "epoch": 1.732093502599897, + "grad_norm": 0.6042716740189954, + "learning_rate": 2.3160819870318208e-07, + "loss": 0.2663, + "step": 36975 + }, + { + "epoch": 1.7321403475898252, + "grad_norm": 0.6119092360489488, + "learning_rate": 2.3152848361104513e-07, + "loss": 0.2818, + "step": 36976 + }, + { + "epoch": 1.7321871925797536, + "grad_norm": 0.6470567209225945, + "learning_rate": 2.314487815733915e-07, + "loss": 0.2742, + "step": 36977 + }, + { + "epoch": 1.732234037569682, + "grad_norm": 0.6187924286407421, + "learning_rate": 2.3136909259068002e-07, + "loss": 0.2689, + "step": 36978 + }, + { + "epoch": 1.73228088255961, + "grad_norm": 0.6085150193670336, + "learning_rate": 2.3128941666336947e-07, + "loss": 0.2605, + "step": 36979 + }, + { + "epoch": 1.7323277275495386, + "grad_norm": 0.6279545198243253, + "learning_rate": 2.3120975379191833e-07, + "loss": 0.2666, + "step": 36980 + }, + { + "epoch": 1.732374572539467, + "grad_norm": 0.612371987981109, + "learning_rate": 2.3113010397678437e-07, + "loss": 0.2771, + "step": 36981 + }, + { + "epoch": 1.7324214175293953, + "grad_norm": 0.6032204557877764, + "learning_rate": 2.3105046721842634e-07, + "loss": 0.2729, + "step": 36982 + }, + { + "epoch": 1.7324682625193235, + "grad_norm": 0.5976260241081653, + "learning_rate": 2.3097084351730247e-07, + "loss": 0.2716, + "step": 36983 + }, + { + "epoch": 1.732515107509252, + "grad_norm": 0.5282029935564186, + "learning_rate": 2.3089123287387105e-07, + "loss": 0.2488, + "step": 36984 + }, + { + "epoch": 1.7325619524991802, + "grad_norm": 0.6385437987151199, + "learning_rate": 2.3081163528859057e-07, + "loss": 0.2868, + "step": 36985 + }, + { + "epoch": 1.7326087974891085, + "grad_norm": 0.616588112986855, + "learning_rate": 2.30732050761919e-07, + "loss": 0.2794, + "step": 36986 + }, + { + "epoch": 1.732655642479037, + "grad_norm": 0.6471278341911723, + "learning_rate": 2.3065247929431434e-07, + "loss": 0.272, + "step": 36987 + }, + { + "epoch": 1.7327024874689652, + "grad_norm": 0.5881434440639863, + "learning_rate": 2.3057292088623367e-07, + "loss": 0.2622, + "step": 36988 + }, + { + "epoch": 1.7327493324588934, + "grad_norm": 0.5712527729902871, + "learning_rate": 2.3049337553813529e-07, + "loss": 0.2696, + "step": 36989 + }, + { + "epoch": 1.7327961774488219, + "grad_norm": 0.6643837884538953, + "learning_rate": 2.3041384325047738e-07, + "loss": 0.2743, + "step": 36990 + }, + { + "epoch": 1.7328430224387503, + "grad_norm": 0.6294828976491356, + "learning_rate": 2.303343240237174e-07, + "loss": 0.2838, + "step": 36991 + }, + { + "epoch": 1.7328898674286783, + "grad_norm": 0.5752579550575414, + "learning_rate": 2.3025481785831306e-07, + "loss": 0.2725, + "step": 36992 + }, + { + "epoch": 1.7329367124186068, + "grad_norm": 0.5452986700851101, + "learning_rate": 2.301753247547217e-07, + "loss": 0.246, + "step": 36993 + }, + { + "epoch": 1.7329835574085353, + "grad_norm": 0.5829753711642527, + "learning_rate": 2.3009584471340107e-07, + "loss": 0.2672, + "step": 36994 + }, + { + "epoch": 1.7330304023984635, + "grad_norm": 0.620006660579596, + "learning_rate": 2.3001637773480773e-07, + "loss": 0.2667, + "step": 36995 + }, + { + "epoch": 1.7330772473883918, + "grad_norm": 0.5972665095657187, + "learning_rate": 2.2993692381939991e-07, + "loss": 0.2644, + "step": 36996 + }, + { + "epoch": 1.7331240923783202, + "grad_norm": 0.5759382155797329, + "learning_rate": 2.298574829676345e-07, + "loss": 0.2653, + "step": 36997 + }, + { + "epoch": 1.7331709373682485, + "grad_norm": 0.5834153671566003, + "learning_rate": 2.2977805517996887e-07, + "loss": 0.2602, + "step": 36998 + }, + { + "epoch": 1.7332177823581767, + "grad_norm": 0.5637305999155393, + "learning_rate": 2.2969864045685962e-07, + "loss": 0.2467, + "step": 36999 + }, + { + "epoch": 1.7332646273481052, + "grad_norm": 0.5854980828700049, + "learning_rate": 2.296192387987642e-07, + "loss": 0.2605, + "step": 37000 + }, + { + "epoch": 1.7333114723380334, + "grad_norm": 0.5939065345700043, + "learning_rate": 2.2953985020613993e-07, + "loss": 0.2567, + "step": 37001 + }, + { + "epoch": 1.7333583173279616, + "grad_norm": 0.5778390397286144, + "learning_rate": 2.2946047467944239e-07, + "loss": 0.2693, + "step": 37002 + }, + { + "epoch": 1.73340516231789, + "grad_norm": 0.593885874851111, + "learning_rate": 2.2938111221912945e-07, + "loss": 0.2626, + "step": 37003 + }, + { + "epoch": 1.7334520073078186, + "grad_norm": 0.5730909554438418, + "learning_rate": 2.2930176282565776e-07, + "loss": 0.2545, + "step": 37004 + }, + { + "epoch": 1.7334988522977468, + "grad_norm": 0.6002365691138093, + "learning_rate": 2.2922242649948328e-07, + "loss": 0.2703, + "step": 37005 + }, + { + "epoch": 1.733545697287675, + "grad_norm": 0.5818094487709148, + "learning_rate": 2.291431032410632e-07, + "loss": 0.2655, + "step": 37006 + }, + { + "epoch": 1.7335925422776035, + "grad_norm": 0.532566745061951, + "learning_rate": 2.290637930508535e-07, + "loss": 0.2528, + "step": 37007 + }, + { + "epoch": 1.7336393872675318, + "grad_norm": 0.6032528551859532, + "learning_rate": 2.2898449592931167e-07, + "loss": 0.2769, + "step": 37008 + }, + { + "epoch": 1.73368623225746, + "grad_norm": 0.6098032621700533, + "learning_rate": 2.2890521187689252e-07, + "loss": 0.273, + "step": 37009 + }, + { + "epoch": 1.7337330772473885, + "grad_norm": 0.5994542119917806, + "learning_rate": 2.2882594089405353e-07, + "loss": 0.2728, + "step": 37010 + }, + { + "epoch": 1.7337799222373167, + "grad_norm": 0.5957385453294171, + "learning_rate": 2.2874668298125012e-07, + "loss": 0.2577, + "step": 37011 + }, + { + "epoch": 1.733826767227245, + "grad_norm": 0.6445879224048577, + "learning_rate": 2.2866743813893865e-07, + "loss": 0.2815, + "step": 37012 + }, + { + "epoch": 1.7338736122171734, + "grad_norm": 0.5688175191370787, + "learning_rate": 2.2858820636757512e-07, + "loss": 0.2658, + "step": 37013 + }, + { + "epoch": 1.7339204572071019, + "grad_norm": 0.6229491408186785, + "learning_rate": 2.285089876676158e-07, + "loss": 0.2701, + "step": 37014 + }, + { + "epoch": 1.7339673021970299, + "grad_norm": 0.5992154393849337, + "learning_rate": 2.284297820395165e-07, + "loss": 0.2626, + "step": 37015 + }, + { + "epoch": 1.7340141471869583, + "grad_norm": 0.6102397579443769, + "learning_rate": 2.283505894837329e-07, + "loss": 0.261, + "step": 37016 + }, + { + "epoch": 1.7340609921768868, + "grad_norm": 0.5682863077113115, + "learning_rate": 2.282714100007205e-07, + "loss": 0.2483, + "step": 37017 + }, + { + "epoch": 1.734107837166815, + "grad_norm": 0.6288008716143861, + "learning_rate": 2.2819224359093507e-07, + "loss": 0.2686, + "step": 37018 + }, + { + "epoch": 1.7341546821567433, + "grad_norm": 0.59141283591168, + "learning_rate": 2.2811309025483201e-07, + "loss": 0.2648, + "step": 37019 + }, + { + "epoch": 1.7342015271466718, + "grad_norm": 0.5828712034753718, + "learning_rate": 2.2803394999286742e-07, + "loss": 0.2743, + "step": 37020 + }, + { + "epoch": 1.7342483721366, + "grad_norm": 0.6320661205493182, + "learning_rate": 2.2795482280549675e-07, + "loss": 0.2736, + "step": 37021 + }, + { + "epoch": 1.7342952171265282, + "grad_norm": 0.6806719719983864, + "learning_rate": 2.2787570869317433e-07, + "loss": 0.2929, + "step": 37022 + }, + { + "epoch": 1.7343420621164567, + "grad_norm": 0.6138031788276478, + "learning_rate": 2.2779660765635674e-07, + "loss": 0.2658, + "step": 37023 + }, + { + "epoch": 1.734388907106385, + "grad_norm": 0.6093878376146941, + "learning_rate": 2.2771751969549839e-07, + "loss": 0.2533, + "step": 37024 + }, + { + "epoch": 1.7344357520963132, + "grad_norm": 0.5975642933896131, + "learning_rate": 2.2763844481105441e-07, + "loss": 0.268, + "step": 37025 + }, + { + "epoch": 1.7344825970862416, + "grad_norm": 0.6273300369618685, + "learning_rate": 2.2755938300348003e-07, + "loss": 0.2935, + "step": 37026 + }, + { + "epoch": 1.73452944207617, + "grad_norm": 0.5663976278186923, + "learning_rate": 2.2748033427323013e-07, + "loss": 0.2651, + "step": 37027 + }, + { + "epoch": 1.7345762870660981, + "grad_norm": 0.6235254106587895, + "learning_rate": 2.2740129862076023e-07, + "loss": 0.2908, + "step": 37028 + }, + { + "epoch": 1.7346231320560266, + "grad_norm": 0.6163980955632647, + "learning_rate": 2.273222760465249e-07, + "loss": 0.2847, + "step": 37029 + }, + { + "epoch": 1.734669977045955, + "grad_norm": 0.646600067504891, + "learning_rate": 2.27243266550978e-07, + "loss": 0.2628, + "step": 37030 + }, + { + "epoch": 1.7347168220358833, + "grad_norm": 0.5728829416772606, + "learning_rate": 2.271642701345747e-07, + "loss": 0.2649, + "step": 37031 + }, + { + "epoch": 1.7347636670258115, + "grad_norm": 0.6167337000030431, + "learning_rate": 2.270852867977699e-07, + "loss": 0.2795, + "step": 37032 + }, + { + "epoch": 1.73481051201574, + "grad_norm": 0.5995458705130035, + "learning_rate": 2.2700631654101828e-07, + "loss": 0.289, + "step": 37033 + }, + { + "epoch": 1.7348573570056682, + "grad_norm": 0.5669191214352964, + "learning_rate": 2.269273593647736e-07, + "loss": 0.265, + "step": 37034 + }, + { + "epoch": 1.7349042019955965, + "grad_norm": 0.6839801869442781, + "learning_rate": 2.2684841526949136e-07, + "loss": 0.2771, + "step": 37035 + }, + { + "epoch": 1.734951046985525, + "grad_norm": 0.600957121891906, + "learning_rate": 2.2676948425562506e-07, + "loss": 0.2703, + "step": 37036 + }, + { + "epoch": 1.7349978919754532, + "grad_norm": 0.5978489703202811, + "learning_rate": 2.2669056632362851e-07, + "loss": 0.2854, + "step": 37037 + }, + { + "epoch": 1.7350447369653814, + "grad_norm": 0.5884661018027748, + "learning_rate": 2.2661166147395663e-07, + "loss": 0.2654, + "step": 37038 + }, + { + "epoch": 1.7350915819553099, + "grad_norm": 0.6094092704631456, + "learning_rate": 2.2653276970706294e-07, + "loss": 0.2764, + "step": 37039 + }, + { + "epoch": 1.7351384269452383, + "grad_norm": 0.5746903757305654, + "learning_rate": 2.2645389102340183e-07, + "loss": 0.2561, + "step": 37040 + }, + { + "epoch": 1.7351852719351666, + "grad_norm": 0.5796933986799802, + "learning_rate": 2.2637502542342788e-07, + "loss": 0.2566, + "step": 37041 + }, + { + "epoch": 1.7352321169250948, + "grad_norm": 0.6449732252701228, + "learning_rate": 2.2629617290759325e-07, + "loss": 0.272, + "step": 37042 + }, + { + "epoch": 1.7352789619150233, + "grad_norm": 0.6385134828586967, + "learning_rate": 2.2621733347635345e-07, + "loss": 0.2861, + "step": 37043 + }, + { + "epoch": 1.7353258069049515, + "grad_norm": 0.6305330644676473, + "learning_rate": 2.2613850713016111e-07, + "loss": 0.2836, + "step": 37044 + }, + { + "epoch": 1.7353726518948798, + "grad_norm": 0.5954077493606199, + "learning_rate": 2.260596938694698e-07, + "loss": 0.2589, + "step": 37045 + }, + { + "epoch": 1.7354194968848082, + "grad_norm": 0.6180998804641102, + "learning_rate": 2.259808936947336e-07, + "loss": 0.277, + "step": 37046 + }, + { + "epoch": 1.7354663418747365, + "grad_norm": 0.6915202384963837, + "learning_rate": 2.2590210660640626e-07, + "loss": 0.2769, + "step": 37047 + }, + { + "epoch": 1.7355131868646647, + "grad_norm": 0.6018745292745068, + "learning_rate": 2.2582333260493998e-07, + "loss": 0.2755, + "step": 37048 + }, + { + "epoch": 1.7355600318545932, + "grad_norm": 0.6138746058428783, + "learning_rate": 2.257445716907891e-07, + "loss": 0.2848, + "step": 37049 + }, + { + "epoch": 1.7356068768445216, + "grad_norm": 0.6206684740541584, + "learning_rate": 2.2566582386440715e-07, + "loss": 0.2914, + "step": 37050 + }, + { + "epoch": 1.7356537218344497, + "grad_norm": 0.6048482158670929, + "learning_rate": 2.25587089126246e-07, + "loss": 0.2606, + "step": 37051 + }, + { + "epoch": 1.7357005668243781, + "grad_norm": 0.5507292177447318, + "learning_rate": 2.2550836747675969e-07, + "loss": 0.2603, + "step": 37052 + }, + { + "epoch": 1.7357474118143066, + "grad_norm": 0.6151772890449962, + "learning_rate": 2.254296589164015e-07, + "loss": 0.2688, + "step": 37053 + }, + { + "epoch": 1.7357942568042348, + "grad_norm": 0.6479090182042484, + "learning_rate": 2.2535096344562357e-07, + "loss": 0.2697, + "step": 37054 + }, + { + "epoch": 1.735841101794163, + "grad_norm": 0.5992090509146281, + "learning_rate": 2.2527228106487885e-07, + "loss": 0.2757, + "step": 37055 + }, + { + "epoch": 1.7358879467840915, + "grad_norm": 0.650376911456055, + "learning_rate": 2.2519361177462062e-07, + "loss": 0.2893, + "step": 37056 + }, + { + "epoch": 1.7359347917740198, + "grad_norm": 0.624836582312179, + "learning_rate": 2.2511495557530182e-07, + "loss": 0.2778, + "step": 37057 + }, + { + "epoch": 1.735981636763948, + "grad_norm": 0.597942695754453, + "learning_rate": 2.2503631246737433e-07, + "loss": 0.2615, + "step": 37058 + }, + { + "epoch": 1.7360284817538765, + "grad_norm": 0.5823064177574736, + "learning_rate": 2.249576824512914e-07, + "loss": 0.2809, + "step": 37059 + }, + { + "epoch": 1.7360753267438047, + "grad_norm": 0.6042142340720361, + "learning_rate": 2.248790655275049e-07, + "loss": 0.2778, + "step": 37060 + }, + { + "epoch": 1.736122171733733, + "grad_norm": 0.5915822301243687, + "learning_rate": 2.2480046169646718e-07, + "loss": 0.279, + "step": 37061 + }, + { + "epoch": 1.7361690167236614, + "grad_norm": 0.6128661668658635, + "learning_rate": 2.2472187095863128e-07, + "loss": 0.2846, + "step": 37062 + }, + { + "epoch": 1.7362158617135899, + "grad_norm": 0.5850020936640027, + "learning_rate": 2.246432933144496e-07, + "loss": 0.2705, + "step": 37063 + }, + { + "epoch": 1.736262706703518, + "grad_norm": 0.6364182652147066, + "learning_rate": 2.245647287643732e-07, + "loss": 0.2652, + "step": 37064 + }, + { + "epoch": 1.7363095516934464, + "grad_norm": 0.6408866972971411, + "learning_rate": 2.2448617730885553e-07, + "loss": 0.2933, + "step": 37065 + }, + { + "epoch": 1.7363563966833748, + "grad_norm": 0.614588156557217, + "learning_rate": 2.244076389483474e-07, + "loss": 0.2742, + "step": 37066 + }, + { + "epoch": 1.736403241673303, + "grad_norm": 0.6390798519745112, + "learning_rate": 2.2432911368330146e-07, + "loss": 0.2739, + "step": 37067 + }, + { + "epoch": 1.7364500866632313, + "grad_norm": 0.6465094614285826, + "learning_rate": 2.2425060151416932e-07, + "loss": 0.2897, + "step": 37068 + }, + { + "epoch": 1.7364969316531598, + "grad_norm": 0.5902932256637934, + "learning_rate": 2.2417210244140314e-07, + "loss": 0.2652, + "step": 37069 + }, + { + "epoch": 1.736543776643088, + "grad_norm": 0.6392795489705965, + "learning_rate": 2.24093616465455e-07, + "loss": 0.2724, + "step": 37070 + }, + { + "epoch": 1.7365906216330163, + "grad_norm": 0.5788059553712595, + "learning_rate": 2.24015143586776e-07, + "loss": 0.243, + "step": 37071 + }, + { + "epoch": 1.7366374666229447, + "grad_norm": 0.6130322120821586, + "learning_rate": 2.239366838058174e-07, + "loss": 0.2827, + "step": 37072 + }, + { + "epoch": 1.736684311612873, + "grad_norm": 0.5593900469579587, + "learning_rate": 2.2385823712303106e-07, + "loss": 0.2536, + "step": 37073 + }, + { + "epoch": 1.7367311566028012, + "grad_norm": 0.612153037968893, + "learning_rate": 2.2377980353886858e-07, + "loss": 0.2662, + "step": 37074 + }, + { + "epoch": 1.7367780015927297, + "grad_norm": 0.5889802050001878, + "learning_rate": 2.2370138305378097e-07, + "loss": 0.2591, + "step": 37075 + }, + { + "epoch": 1.7368248465826581, + "grad_norm": 0.6097380243395327, + "learning_rate": 2.2362297566822012e-07, + "loss": 0.2848, + "step": 37076 + }, + { + "epoch": 1.7368716915725864, + "grad_norm": 0.5530411535309351, + "learning_rate": 2.2354458138263702e-07, + "loss": 0.2558, + "step": 37077 + }, + { + "epoch": 1.7369185365625146, + "grad_norm": 0.7477392449970603, + "learning_rate": 2.23466200197483e-07, + "loss": 0.288, + "step": 37078 + }, + { + "epoch": 1.736965381552443, + "grad_norm": 0.5993471882978364, + "learning_rate": 2.2338783211320798e-07, + "loss": 0.2763, + "step": 37079 + }, + { + "epoch": 1.7370122265423713, + "grad_norm": 0.5974122296782248, + "learning_rate": 2.2330947713026407e-07, + "loss": 0.2586, + "step": 37080 + }, + { + "epoch": 1.7370590715322995, + "grad_norm": 0.5585729364388728, + "learning_rate": 2.2323113524910178e-07, + "loss": 0.2565, + "step": 37081 + }, + { + "epoch": 1.737105916522228, + "grad_norm": 0.5869069174060775, + "learning_rate": 2.2315280647017213e-07, + "loss": 0.2708, + "step": 37082 + }, + { + "epoch": 1.7371527615121563, + "grad_norm": 0.6273360739895552, + "learning_rate": 2.230744907939264e-07, + "loss": 0.2825, + "step": 37083 + }, + { + "epoch": 1.7371996065020845, + "grad_norm": 0.6071771124878842, + "learning_rate": 2.2299618822081399e-07, + "loss": 0.2737, + "step": 37084 + }, + { + "epoch": 1.737246451492013, + "grad_norm": 0.5828636015416512, + "learning_rate": 2.229178987512867e-07, + "loss": 0.2668, + "step": 37085 + }, + { + "epoch": 1.7372932964819414, + "grad_norm": 0.5928084738780636, + "learning_rate": 2.2283962238579427e-07, + "loss": 0.2797, + "step": 37086 + }, + { + "epoch": 1.7373401414718694, + "grad_norm": 0.5323472364794214, + "learning_rate": 2.2276135912478735e-07, + "loss": 0.2602, + "step": 37087 + }, + { + "epoch": 1.737386986461798, + "grad_norm": 0.5868939331865827, + "learning_rate": 2.226831089687165e-07, + "loss": 0.2619, + "step": 37088 + }, + { + "epoch": 1.7374338314517264, + "grad_norm": 0.5877833292923407, + "learning_rate": 2.2260487191803237e-07, + "loss": 0.2581, + "step": 37089 + }, + { + "epoch": 1.7374806764416546, + "grad_norm": 0.6197726634233319, + "learning_rate": 2.225266479731844e-07, + "loss": 0.2671, + "step": 37090 + }, + { + "epoch": 1.7375275214315828, + "grad_norm": 0.6011412303727519, + "learning_rate": 2.2244843713462305e-07, + "loss": 0.2545, + "step": 37091 + }, + { + "epoch": 1.7375743664215113, + "grad_norm": 0.5699125125744788, + "learning_rate": 2.2237023940279907e-07, + "loss": 0.2585, + "step": 37092 + }, + { + "epoch": 1.7376212114114395, + "grad_norm": 0.5853046260560001, + "learning_rate": 2.2229205477816152e-07, + "loss": 0.2585, + "step": 37093 + }, + { + "epoch": 1.7376680564013678, + "grad_norm": 0.6123738760330257, + "learning_rate": 2.2221388326116062e-07, + "loss": 0.2784, + "step": 37094 + }, + { + "epoch": 1.7377149013912963, + "grad_norm": 0.6598929185275413, + "learning_rate": 2.2213572485224683e-07, + "loss": 0.2812, + "step": 37095 + }, + { + "epoch": 1.7377617463812245, + "grad_norm": 0.6279781847054864, + "learning_rate": 2.2205757955186896e-07, + "loss": 0.2721, + "step": 37096 + }, + { + "epoch": 1.7378085913711527, + "grad_norm": 0.5669227790821357, + "learning_rate": 2.2197944736047693e-07, + "loss": 0.26, + "step": 37097 + }, + { + "epoch": 1.7378554363610812, + "grad_norm": 0.5836140262289662, + "learning_rate": 2.2190132827852095e-07, + "loss": 0.267, + "step": 37098 + }, + { + "epoch": 1.7379022813510097, + "grad_norm": 0.6125348971057568, + "learning_rate": 2.2182322230645064e-07, + "loss": 0.2768, + "step": 37099 + }, + { + "epoch": 1.7379491263409377, + "grad_norm": 0.5633795523176941, + "learning_rate": 2.2174512944471455e-07, + "loss": 0.2553, + "step": 37100 + }, + { + "epoch": 1.7379959713308661, + "grad_norm": 0.6198603636447133, + "learning_rate": 2.2166704969376256e-07, + "loss": 0.277, + "step": 37101 + }, + { + "epoch": 1.7380428163207946, + "grad_norm": 0.5605666333528486, + "learning_rate": 2.2158898305404465e-07, + "loss": 0.2456, + "step": 37102 + }, + { + "epoch": 1.7380896613107228, + "grad_norm": 0.5849776246218767, + "learning_rate": 2.21510929526009e-07, + "loss": 0.2769, + "step": 37103 + }, + { + "epoch": 1.738136506300651, + "grad_norm": 0.5935276652101641, + "learning_rate": 2.214328891101053e-07, + "loss": 0.2718, + "step": 37104 + }, + { + "epoch": 1.7381833512905795, + "grad_norm": 0.6421264289319114, + "learning_rate": 2.2135486180678235e-07, + "loss": 0.2833, + "step": 37105 + }, + { + "epoch": 1.7382301962805078, + "grad_norm": 0.6038328124791328, + "learning_rate": 2.2127684761649033e-07, + "loss": 0.262, + "step": 37106 + }, + { + "epoch": 1.738277041270436, + "grad_norm": 0.6269169561183404, + "learning_rate": 2.2119884653967666e-07, + "loss": 0.2839, + "step": 37107 + }, + { + "epoch": 1.7383238862603645, + "grad_norm": 0.6050196612693566, + "learning_rate": 2.2112085857679127e-07, + "loss": 0.2685, + "step": 37108 + }, + { + "epoch": 1.7383707312502927, + "grad_norm": 0.5808352809627344, + "learning_rate": 2.2104288372828213e-07, + "loss": 0.2615, + "step": 37109 + }, + { + "epoch": 1.738417576240221, + "grad_norm": 0.5755561655924009, + "learning_rate": 2.2096492199459863e-07, + "loss": 0.2618, + "step": 37110 + }, + { + "epoch": 1.7384644212301494, + "grad_norm": 0.5683831736391446, + "learning_rate": 2.2088697337618898e-07, + "loss": 0.2625, + "step": 37111 + }, + { + "epoch": 1.738511266220078, + "grad_norm": 0.6023339549368236, + "learning_rate": 2.2080903787350228e-07, + "loss": 0.2709, + "step": 37112 + }, + { + "epoch": 1.7385581112100061, + "grad_norm": 0.5465769523254411, + "learning_rate": 2.207311154869865e-07, + "loss": 0.2546, + "step": 37113 + }, + { + "epoch": 1.7386049561999344, + "grad_norm": 0.6318867485217301, + "learning_rate": 2.2065320621709075e-07, + "loss": 0.2756, + "step": 37114 + }, + { + "epoch": 1.7386518011898628, + "grad_norm": 0.5345341552319822, + "learning_rate": 2.2057531006426213e-07, + "loss": 0.2475, + "step": 37115 + }, + { + "epoch": 1.738698646179791, + "grad_norm": 0.5920755797943688, + "learning_rate": 2.2049742702895006e-07, + "loss": 0.2605, + "step": 37116 + }, + { + "epoch": 1.7387454911697193, + "grad_norm": 0.6225422696503579, + "learning_rate": 2.2041955711160217e-07, + "loss": 0.2928, + "step": 37117 + }, + { + "epoch": 1.7387923361596478, + "grad_norm": 0.5757595856727465, + "learning_rate": 2.2034170031266678e-07, + "loss": 0.2499, + "step": 37118 + }, + { + "epoch": 1.738839181149576, + "grad_norm": 0.6157152782228725, + "learning_rate": 2.2026385663259263e-07, + "loss": 0.2745, + "step": 37119 + }, + { + "epoch": 1.7388860261395043, + "grad_norm": 0.6211471538514866, + "learning_rate": 2.2018602607182665e-07, + "loss": 0.2715, + "step": 37120 + }, + { + "epoch": 1.7389328711294327, + "grad_norm": 0.6074687423962525, + "learning_rate": 2.201082086308168e-07, + "loss": 0.2681, + "step": 37121 + }, + { + "epoch": 1.7389797161193612, + "grad_norm": 0.6556464421405678, + "learning_rate": 2.2003040431001128e-07, + "loss": 0.2741, + "step": 37122 + }, + { + "epoch": 1.7390265611092892, + "grad_norm": 0.5738352349546094, + "learning_rate": 2.1995261310985754e-07, + "loss": 0.2651, + "step": 37123 + }, + { + "epoch": 1.7390734060992177, + "grad_norm": 0.565412347034742, + "learning_rate": 2.1987483503080358e-07, + "loss": 0.2606, + "step": 37124 + }, + { + "epoch": 1.7391202510891461, + "grad_norm": 0.5784513907762049, + "learning_rate": 2.1979707007329681e-07, + "loss": 0.258, + "step": 37125 + }, + { + "epoch": 1.7391670960790744, + "grad_norm": 0.6097808279272618, + "learning_rate": 2.197193182377852e-07, + "loss": 0.2885, + "step": 37126 + }, + { + "epoch": 1.7392139410690026, + "grad_norm": 0.6179801971163716, + "learning_rate": 2.1964157952471588e-07, + "loss": 0.2752, + "step": 37127 + }, + { + "epoch": 1.739260786058931, + "grad_norm": 0.6282298825835898, + "learning_rate": 2.1956385393453544e-07, + "loss": 0.2836, + "step": 37128 + }, + { + "epoch": 1.7393076310488593, + "grad_norm": 0.5986939453521369, + "learning_rate": 2.1948614146769215e-07, + "loss": 0.2644, + "step": 37129 + }, + { + "epoch": 1.7393544760387876, + "grad_norm": 0.5584137715164201, + "learning_rate": 2.1940844212463287e-07, + "loss": 0.2502, + "step": 37130 + }, + { + "epoch": 1.739401321028716, + "grad_norm": 0.5903938622899083, + "learning_rate": 2.193307559058047e-07, + "loss": 0.277, + "step": 37131 + }, + { + "epoch": 1.7394481660186443, + "grad_norm": 0.5793123304002453, + "learning_rate": 2.1925308281165512e-07, + "loss": 0.2613, + "step": 37132 + }, + { + "epoch": 1.7394950110085725, + "grad_norm": 0.5891549852098715, + "learning_rate": 2.191754228426307e-07, + "loss": 0.2489, + "step": 37133 + }, + { + "epoch": 1.739541855998501, + "grad_norm": 0.5940405653887559, + "learning_rate": 2.190977759991786e-07, + "loss": 0.2703, + "step": 37134 + }, + { + "epoch": 1.7395887009884294, + "grad_norm": 0.6221430466990683, + "learning_rate": 2.1902014228174534e-07, + "loss": 0.2744, + "step": 37135 + }, + { + "epoch": 1.7396355459783575, + "grad_norm": 0.5729743188420469, + "learning_rate": 2.1894252169077756e-07, + "loss": 0.257, + "step": 37136 + }, + { + "epoch": 1.739682390968286, + "grad_norm": 0.6216501416646364, + "learning_rate": 2.188649142267224e-07, + "loss": 0.2852, + "step": 37137 + }, + { + "epoch": 1.7397292359582144, + "grad_norm": 0.5615529854524967, + "learning_rate": 2.1878731989002673e-07, + "loss": 0.2545, + "step": 37138 + }, + { + "epoch": 1.7397760809481426, + "grad_norm": 0.5858480494075906, + "learning_rate": 2.1870973868113626e-07, + "loss": 0.2675, + "step": 37139 + }, + { + "epoch": 1.7398229259380709, + "grad_norm": 0.5758940718823233, + "learning_rate": 2.186321706004979e-07, + "loss": 0.2692, + "step": 37140 + }, + { + "epoch": 1.7398697709279993, + "grad_norm": 0.5968813300216885, + "learning_rate": 2.185546156485585e-07, + "loss": 0.2772, + "step": 37141 + }, + { + "epoch": 1.7399166159179276, + "grad_norm": 0.5880375486964994, + "learning_rate": 2.1847707382576327e-07, + "loss": 0.2733, + "step": 37142 + }, + { + "epoch": 1.7399634609078558, + "grad_norm": 0.5585336425318718, + "learning_rate": 2.1839954513255906e-07, + "loss": 0.2518, + "step": 37143 + }, + { + "epoch": 1.7400103058977843, + "grad_norm": 0.5983040445848476, + "learning_rate": 2.1832202956939247e-07, + "loss": 0.2597, + "step": 37144 + }, + { + "epoch": 1.7400571508877125, + "grad_norm": 0.601153693354498, + "learning_rate": 2.182445271367087e-07, + "loss": 0.2692, + "step": 37145 + }, + { + "epoch": 1.7401039958776408, + "grad_norm": 0.5973961696261849, + "learning_rate": 2.181670378349543e-07, + "loss": 0.2593, + "step": 37146 + }, + { + "epoch": 1.7401508408675692, + "grad_norm": 0.5961979462831365, + "learning_rate": 2.1808956166457507e-07, + "loss": 0.284, + "step": 37147 + }, + { + "epoch": 1.7401976858574977, + "grad_norm": 0.5977086140363528, + "learning_rate": 2.1801209862601703e-07, + "loss": 0.2595, + "step": 37148 + }, + { + "epoch": 1.740244530847426, + "grad_norm": 0.6013484819959058, + "learning_rate": 2.1793464871972563e-07, + "loss": 0.2738, + "step": 37149 + }, + { + "epoch": 1.7402913758373542, + "grad_norm": 0.6264732603961216, + "learning_rate": 2.1785721194614696e-07, + "loss": 0.2754, + "step": 37150 + }, + { + "epoch": 1.7403382208272826, + "grad_norm": 0.5937187709134594, + "learning_rate": 2.1777978830572615e-07, + "loss": 0.265, + "step": 37151 + }, + { + "epoch": 1.7403850658172109, + "grad_norm": 0.5894075870747579, + "learning_rate": 2.1770237779890903e-07, + "loss": 0.2576, + "step": 37152 + }, + { + "epoch": 1.740431910807139, + "grad_norm": 0.590862857562147, + "learning_rate": 2.1762498042614127e-07, + "loss": 0.272, + "step": 37153 + }, + { + "epoch": 1.7404787557970676, + "grad_norm": 0.6264878843368791, + "learning_rate": 2.1754759618786786e-07, + "loss": 0.2804, + "step": 37154 + }, + { + "epoch": 1.7405256007869958, + "grad_norm": 0.611676266525999, + "learning_rate": 2.174702250845348e-07, + "loss": 0.2581, + "step": 37155 + }, + { + "epoch": 1.740572445776924, + "grad_norm": 0.605926215133827, + "learning_rate": 2.1739286711658703e-07, + "loss": 0.2653, + "step": 37156 + }, + { + "epoch": 1.7406192907668525, + "grad_norm": 0.6007059125843466, + "learning_rate": 2.1731552228446918e-07, + "loss": 0.273, + "step": 37157 + }, + { + "epoch": 1.740666135756781, + "grad_norm": 0.6171075961289993, + "learning_rate": 2.1723819058862673e-07, + "loss": 0.2742, + "step": 37158 + }, + { + "epoch": 1.740712980746709, + "grad_norm": 0.6039841023788115, + "learning_rate": 2.171608720295046e-07, + "loss": 0.2691, + "step": 37159 + }, + { + "epoch": 1.7407598257366375, + "grad_norm": 0.5860598171158855, + "learning_rate": 2.17083566607548e-07, + "loss": 0.2555, + "step": 37160 + }, + { + "epoch": 1.740806670726566, + "grad_norm": 0.5808604678375705, + "learning_rate": 2.170062743232021e-07, + "loss": 0.2645, + "step": 37161 + }, + { + "epoch": 1.7408535157164942, + "grad_norm": 0.5722472088430395, + "learning_rate": 2.1692899517691075e-07, + "loss": 0.2619, + "step": 37162 + }, + { + "epoch": 1.7409003607064224, + "grad_norm": 0.5917861160002398, + "learning_rate": 2.1685172916911996e-07, + "loss": 0.2736, + "step": 37163 + }, + { + "epoch": 1.7409472056963509, + "grad_norm": 0.5750936168338082, + "learning_rate": 2.1677447630027272e-07, + "loss": 0.2577, + "step": 37164 + }, + { + "epoch": 1.740994050686279, + "grad_norm": 0.6030415563637428, + "learning_rate": 2.1669723657081477e-07, + "loss": 0.271, + "step": 37165 + }, + { + "epoch": 1.7410408956762073, + "grad_norm": 0.6041951842244303, + "learning_rate": 2.166200099811905e-07, + "loss": 0.2784, + "step": 37166 + }, + { + "epoch": 1.7410877406661358, + "grad_norm": 0.6039821489326519, + "learning_rate": 2.1654279653184395e-07, + "loss": 0.2667, + "step": 37167 + }, + { + "epoch": 1.741134585656064, + "grad_norm": 0.6126503754483128, + "learning_rate": 2.1646559622322012e-07, + "loss": 0.2772, + "step": 37168 + }, + { + "epoch": 1.7411814306459923, + "grad_norm": 0.6233534728238347, + "learning_rate": 2.1638840905576274e-07, + "loss": 0.2763, + "step": 37169 + }, + { + "epoch": 1.7412282756359208, + "grad_norm": 0.6240351758123011, + "learning_rate": 2.1631123502991598e-07, + "loss": 0.2696, + "step": 37170 + }, + { + "epoch": 1.7412751206258492, + "grad_norm": 0.5622073378797912, + "learning_rate": 2.1623407414612386e-07, + "loss": 0.2558, + "step": 37171 + }, + { + "epoch": 1.7413219656157772, + "grad_norm": 0.6145944893395674, + "learning_rate": 2.161569264048305e-07, + "loss": 0.2872, + "step": 37172 + }, + { + "epoch": 1.7413688106057057, + "grad_norm": 0.5709981370472956, + "learning_rate": 2.1607979180648026e-07, + "loss": 0.253, + "step": 37173 + }, + { + "epoch": 1.7414156555956342, + "grad_norm": 0.5989922340669517, + "learning_rate": 2.160026703515167e-07, + "loss": 0.2765, + "step": 37174 + }, + { + "epoch": 1.7414625005855624, + "grad_norm": 0.6058521328021886, + "learning_rate": 2.1592556204038412e-07, + "loss": 0.2541, + "step": 37175 + }, + { + "epoch": 1.7415093455754906, + "grad_norm": 0.6312614498541561, + "learning_rate": 2.1584846687352585e-07, + "loss": 0.2748, + "step": 37176 + }, + { + "epoch": 1.741556190565419, + "grad_norm": 0.6126404672021917, + "learning_rate": 2.157713848513851e-07, + "loss": 0.2671, + "step": 37177 + }, + { + "epoch": 1.7416030355553473, + "grad_norm": 0.5547949286799967, + "learning_rate": 2.1569431597440598e-07, + "loss": 0.2705, + "step": 37178 + }, + { + "epoch": 1.7416498805452756, + "grad_norm": 0.6619645794831577, + "learning_rate": 2.1561726024303202e-07, + "loss": 0.2805, + "step": 37179 + }, + { + "epoch": 1.741696725535204, + "grad_norm": 0.5653864542743903, + "learning_rate": 2.155402176577065e-07, + "loss": 0.2578, + "step": 37180 + }, + { + "epoch": 1.7417435705251323, + "grad_norm": 0.5740474919848466, + "learning_rate": 2.1546318821887318e-07, + "loss": 0.2686, + "step": 37181 + }, + { + "epoch": 1.7417904155150605, + "grad_norm": 0.5601312197515625, + "learning_rate": 2.1538617192697452e-07, + "loss": 0.2516, + "step": 37182 + }, + { + "epoch": 1.741837260504989, + "grad_norm": 0.6233041346246034, + "learning_rate": 2.153091687824549e-07, + "loss": 0.2722, + "step": 37183 + }, + { + "epoch": 1.7418841054949175, + "grad_norm": 0.6511791474286582, + "learning_rate": 2.1523217878575614e-07, + "loss": 0.2666, + "step": 37184 + }, + { + "epoch": 1.7419309504848457, + "grad_norm": 0.5668865199199036, + "learning_rate": 2.151552019373221e-07, + "loss": 0.2642, + "step": 37185 + }, + { + "epoch": 1.741977795474774, + "grad_norm": 0.5587779841264965, + "learning_rate": 2.1507823823759545e-07, + "loss": 0.2669, + "step": 37186 + }, + { + "epoch": 1.7420246404647024, + "grad_norm": 0.6203777535585459, + "learning_rate": 2.1500128768701973e-07, + "loss": 0.2851, + "step": 37187 + }, + { + "epoch": 1.7420714854546306, + "grad_norm": 0.5535320740482786, + "learning_rate": 2.149243502860368e-07, + "loss": 0.2527, + "step": 37188 + }, + { + "epoch": 1.7421183304445589, + "grad_norm": 0.6118570389006988, + "learning_rate": 2.1484742603508996e-07, + "loss": 0.2759, + "step": 37189 + }, + { + "epoch": 1.7421651754344873, + "grad_norm": 0.5782936702716456, + "learning_rate": 2.1477051493462242e-07, + "loss": 0.2627, + "step": 37190 + }, + { + "epoch": 1.7422120204244156, + "grad_norm": 0.5847450845612828, + "learning_rate": 2.146936169850755e-07, + "loss": 0.2635, + "step": 37191 + }, + { + "epoch": 1.7422588654143438, + "grad_norm": 0.5698834946049566, + "learning_rate": 2.146167321868925e-07, + "loss": 0.2696, + "step": 37192 + }, + { + "epoch": 1.7423057104042723, + "grad_norm": 0.5821447143973384, + "learning_rate": 2.1453986054051635e-07, + "loss": 0.2518, + "step": 37193 + }, + { + "epoch": 1.7423525553942008, + "grad_norm": 0.6237355512305586, + "learning_rate": 2.1446300204638838e-07, + "loss": 0.2861, + "step": 37194 + }, + { + "epoch": 1.7423994003841288, + "grad_norm": 0.6407812090495508, + "learning_rate": 2.1438615670495128e-07, + "loss": 0.2961, + "step": 37195 + }, + { + "epoch": 1.7424462453740572, + "grad_norm": 0.5994233870257536, + "learning_rate": 2.1430932451664748e-07, + "loss": 0.2621, + "step": 37196 + }, + { + "epoch": 1.7424930903639857, + "grad_norm": 0.6314378070817847, + "learning_rate": 2.1423250548191944e-07, + "loss": 0.2924, + "step": 37197 + }, + { + "epoch": 1.742539935353914, + "grad_norm": 0.5822187449990451, + "learning_rate": 2.1415569960120842e-07, + "loss": 0.26, + "step": 37198 + }, + { + "epoch": 1.7425867803438422, + "grad_norm": 0.5893849038911527, + "learning_rate": 2.1407890687495713e-07, + "loss": 0.2743, + "step": 37199 + }, + { + "epoch": 1.7426336253337706, + "grad_norm": 0.6385478262159108, + "learning_rate": 2.140021273036069e-07, + "loss": 0.2672, + "step": 37200 + }, + { + "epoch": 1.7426804703236989, + "grad_norm": 0.5850841288114048, + "learning_rate": 2.139253608875999e-07, + "loss": 0.2632, + "step": 37201 + }, + { + "epoch": 1.7427273153136271, + "grad_norm": 0.5957657650963707, + "learning_rate": 2.138486076273777e-07, + "loss": 0.2563, + "step": 37202 + }, + { + "epoch": 1.7427741603035556, + "grad_norm": 0.5720121099785483, + "learning_rate": 2.137718675233827e-07, + "loss": 0.2584, + "step": 37203 + }, + { + "epoch": 1.7428210052934838, + "grad_norm": 0.6502751386245891, + "learning_rate": 2.1369514057605566e-07, + "loss": 0.2761, + "step": 37204 + }, + { + "epoch": 1.742867850283412, + "grad_norm": 0.625807170679859, + "learning_rate": 2.1361842678583877e-07, + "loss": 0.2802, + "step": 37205 + }, + { + "epoch": 1.7429146952733405, + "grad_norm": 0.5917365952992366, + "learning_rate": 2.1354172615317276e-07, + "loss": 0.2668, + "step": 37206 + }, + { + "epoch": 1.742961540263269, + "grad_norm": 0.5696866385806544, + "learning_rate": 2.1346503867849922e-07, + "loss": 0.2499, + "step": 37207 + }, + { + "epoch": 1.743008385253197, + "grad_norm": 0.6197533637186762, + "learning_rate": 2.1338836436226e-07, + "loss": 0.2682, + "step": 37208 + }, + { + "epoch": 1.7430552302431255, + "grad_norm": 0.6154679084195821, + "learning_rate": 2.1331170320489593e-07, + "loss": 0.2718, + "step": 37209 + }, + { + "epoch": 1.743102075233054, + "grad_norm": 0.6199579355408662, + "learning_rate": 2.132350552068485e-07, + "loss": 0.2836, + "step": 37210 + }, + { + "epoch": 1.7431489202229822, + "grad_norm": 0.6226560918736574, + "learning_rate": 2.1315842036855883e-07, + "loss": 0.2779, + "step": 37211 + }, + { + "epoch": 1.7431957652129104, + "grad_norm": 0.5454880161751428, + "learning_rate": 2.1308179869046707e-07, + "loss": 0.2467, + "step": 37212 + }, + { + "epoch": 1.7432426102028389, + "grad_norm": 0.5990434747159873, + "learning_rate": 2.1300519017301487e-07, + "loss": 0.2776, + "step": 37213 + }, + { + "epoch": 1.7432894551927671, + "grad_norm": 0.5909500171079274, + "learning_rate": 2.129285948166429e-07, + "loss": 0.2686, + "step": 37214 + }, + { + "epoch": 1.7433363001826954, + "grad_norm": 0.5973423452717842, + "learning_rate": 2.12852012621792e-07, + "loss": 0.2651, + "step": 37215 + }, + { + "epoch": 1.7433831451726238, + "grad_norm": 0.6164617197088293, + "learning_rate": 2.1277544358890317e-07, + "loss": 0.2589, + "step": 37216 + }, + { + "epoch": 1.743429990162552, + "grad_norm": 0.5933959009797407, + "learning_rate": 2.126988877184169e-07, + "loss": 0.2645, + "step": 37217 + }, + { + "epoch": 1.7434768351524803, + "grad_norm": 0.5656029299869781, + "learning_rate": 2.1262234501077393e-07, + "loss": 0.2644, + "step": 37218 + }, + { + "epoch": 1.7435236801424088, + "grad_norm": 0.586474744907226, + "learning_rate": 2.1254581546641368e-07, + "loss": 0.2512, + "step": 37219 + }, + { + "epoch": 1.7435705251323372, + "grad_norm": 0.6303305988543813, + "learning_rate": 2.1246929908577767e-07, + "loss": 0.2744, + "step": 37220 + }, + { + "epoch": 1.7436173701222655, + "grad_norm": 0.6158130774516171, + "learning_rate": 2.123927958693056e-07, + "loss": 0.2631, + "step": 37221 + }, + { + "epoch": 1.7436642151121937, + "grad_norm": 0.6640856678797056, + "learning_rate": 2.1231630581743823e-07, + "loss": 0.2809, + "step": 37222 + }, + { + "epoch": 1.7437110601021222, + "grad_norm": 0.5828879165711733, + "learning_rate": 2.12239828930616e-07, + "loss": 0.2574, + "step": 37223 + }, + { + "epoch": 1.7437579050920504, + "grad_norm": 0.5997850942650218, + "learning_rate": 2.1216336520927804e-07, + "loss": 0.2586, + "step": 37224 + }, + { + "epoch": 1.7438047500819787, + "grad_norm": 0.596454942129048, + "learning_rate": 2.1208691465386538e-07, + "loss": 0.2671, + "step": 37225 + }, + { + "epoch": 1.7438515950719071, + "grad_norm": 0.6026095714222047, + "learning_rate": 2.1201047726481682e-07, + "loss": 0.2792, + "step": 37226 + }, + { + "epoch": 1.7438984400618354, + "grad_norm": 0.616569321640817, + "learning_rate": 2.1193405304257315e-07, + "loss": 0.2631, + "step": 37227 + }, + { + "epoch": 1.7439452850517636, + "grad_norm": 0.5684790016627954, + "learning_rate": 2.118576419875737e-07, + "loss": 0.2718, + "step": 37228 + }, + { + "epoch": 1.743992130041692, + "grad_norm": 0.5798175978073321, + "learning_rate": 2.1178124410025897e-07, + "loss": 0.262, + "step": 37229 + }, + { + "epoch": 1.7440389750316205, + "grad_norm": 0.5822990757756982, + "learning_rate": 2.1170485938106778e-07, + "loss": 0.2849, + "step": 37230 + }, + { + "epoch": 1.7440858200215485, + "grad_norm": 0.5828483325515724, + "learning_rate": 2.1162848783043977e-07, + "loss": 0.2755, + "step": 37231 + }, + { + "epoch": 1.744132665011477, + "grad_norm": 0.6030991651915991, + "learning_rate": 2.1155212944881516e-07, + "loss": 0.2689, + "step": 37232 + }, + { + "epoch": 1.7441795100014055, + "grad_norm": 0.6257020535819042, + "learning_rate": 2.1147578423663246e-07, + "loss": 0.2796, + "step": 37233 + }, + { + "epoch": 1.7442263549913337, + "grad_norm": 0.6340893557927114, + "learning_rate": 2.1139945219433133e-07, + "loss": 0.2859, + "step": 37234 + }, + { + "epoch": 1.744273199981262, + "grad_norm": 0.5760673294773857, + "learning_rate": 2.1132313332235173e-07, + "loss": 0.2605, + "step": 37235 + }, + { + "epoch": 1.7443200449711904, + "grad_norm": 0.577354964099884, + "learning_rate": 2.1124682762113158e-07, + "loss": 0.2612, + "step": 37236 + }, + { + "epoch": 1.7443668899611187, + "grad_norm": 0.593556416837446, + "learning_rate": 2.1117053509111085e-07, + "loss": 0.2746, + "step": 37237 + }, + { + "epoch": 1.744413734951047, + "grad_norm": 0.5728823534001636, + "learning_rate": 2.1109425573272834e-07, + "loss": 0.2696, + "step": 37238 + }, + { + "epoch": 1.7444605799409754, + "grad_norm": 0.6405474011439103, + "learning_rate": 2.110179895464237e-07, + "loss": 0.2743, + "step": 37239 + }, + { + "epoch": 1.7445074249309036, + "grad_norm": 0.5876784130401643, + "learning_rate": 2.1094173653263468e-07, + "loss": 0.2499, + "step": 37240 + }, + { + "epoch": 1.7445542699208318, + "grad_norm": 0.6435858782863816, + "learning_rate": 2.1086549669180056e-07, + "loss": 0.2964, + "step": 37241 + }, + { + "epoch": 1.7446011149107603, + "grad_norm": 0.6110087221651532, + "learning_rate": 2.107892700243605e-07, + "loss": 0.2681, + "step": 37242 + }, + { + "epoch": 1.7446479599006888, + "grad_norm": 0.6038547078392338, + "learning_rate": 2.1071305653075247e-07, + "loss": 0.2855, + "step": 37243 + }, + { + "epoch": 1.7446948048906168, + "grad_norm": 0.6035642733219576, + "learning_rate": 2.1063685621141555e-07, + "loss": 0.2636, + "step": 37244 + }, + { + "epoch": 1.7447416498805453, + "grad_norm": 0.5856589130824292, + "learning_rate": 2.10560669066788e-07, + "loss": 0.2791, + "step": 37245 + }, + { + "epoch": 1.7447884948704737, + "grad_norm": 0.5715915105635918, + "learning_rate": 2.1048449509730896e-07, + "loss": 0.2622, + "step": 37246 + }, + { + "epoch": 1.744835339860402, + "grad_norm": 0.6195474770646128, + "learning_rate": 2.104083343034155e-07, + "loss": 0.276, + "step": 37247 + }, + { + "epoch": 1.7448821848503302, + "grad_norm": 0.6052259813824394, + "learning_rate": 2.1033218668554733e-07, + "loss": 0.2765, + "step": 37248 + }, + { + "epoch": 1.7449290298402587, + "grad_norm": 0.6063430740117646, + "learning_rate": 2.1025605224414132e-07, + "loss": 0.2557, + "step": 37249 + }, + { + "epoch": 1.744975874830187, + "grad_norm": 0.6156603876735111, + "learning_rate": 2.1017993097963625e-07, + "loss": 0.2789, + "step": 37250 + }, + { + "epoch": 1.7450227198201151, + "grad_norm": 0.5800209894367266, + "learning_rate": 2.101038228924701e-07, + "loss": 0.2668, + "step": 37251 + }, + { + "epoch": 1.7450695648100436, + "grad_norm": 0.608845196655709, + "learning_rate": 2.1002772798308147e-07, + "loss": 0.2775, + "step": 37252 + }, + { + "epoch": 1.7451164097999718, + "grad_norm": 0.6043091543732294, + "learning_rate": 2.0995164625190716e-07, + "loss": 0.264, + "step": 37253 + }, + { + "epoch": 1.7451632547899, + "grad_norm": 0.5979608635425181, + "learning_rate": 2.0987557769938605e-07, + "loss": 0.2723, + "step": 37254 + }, + { + "epoch": 1.7452100997798285, + "grad_norm": 0.6092534427386028, + "learning_rate": 2.097995223259547e-07, + "loss": 0.2618, + "step": 37255 + }, + { + "epoch": 1.745256944769757, + "grad_norm": 0.5580134711013266, + "learning_rate": 2.097234801320519e-07, + "loss": 0.2504, + "step": 37256 + }, + { + "epoch": 1.7453037897596853, + "grad_norm": 0.5597569828934065, + "learning_rate": 2.0964745111811458e-07, + "loss": 0.2577, + "step": 37257 + }, + { + "epoch": 1.7453506347496135, + "grad_norm": 0.6505113492128922, + "learning_rate": 2.095714352845804e-07, + "loss": 0.2874, + "step": 37258 + }, + { + "epoch": 1.745397479739542, + "grad_norm": 0.5899351302867797, + "learning_rate": 2.0949543263188765e-07, + "loss": 0.265, + "step": 37259 + }, + { + "epoch": 1.7454443247294702, + "grad_norm": 0.6219161470797415, + "learning_rate": 2.094194431604729e-07, + "loss": 0.2778, + "step": 37260 + }, + { + "epoch": 1.7454911697193984, + "grad_norm": 0.552164741603176, + "learning_rate": 2.0934346687077305e-07, + "loss": 0.2595, + "step": 37261 + }, + { + "epoch": 1.745538014709327, + "grad_norm": 0.6510883295450488, + "learning_rate": 2.092675037632258e-07, + "loss": 0.2632, + "step": 37262 + }, + { + "epoch": 1.7455848596992551, + "grad_norm": 0.643191036780734, + "learning_rate": 2.0919155383826828e-07, + "loss": 0.2858, + "step": 37263 + }, + { + "epoch": 1.7456317046891834, + "grad_norm": 0.6040845995824601, + "learning_rate": 2.0911561709633792e-07, + "loss": 0.2561, + "step": 37264 + }, + { + "epoch": 1.7456785496791118, + "grad_norm": 0.5975320937802203, + "learning_rate": 2.0903969353787108e-07, + "loss": 0.2655, + "step": 37265 + }, + { + "epoch": 1.7457253946690403, + "grad_norm": 0.6210780449746307, + "learning_rate": 2.089637831633054e-07, + "loss": 0.2777, + "step": 37266 + }, + { + "epoch": 1.7457722396589683, + "grad_norm": 0.5953657797748712, + "learning_rate": 2.0888788597307753e-07, + "loss": 0.2685, + "step": 37267 + }, + { + "epoch": 1.7458190846488968, + "grad_norm": 0.6292089367449846, + "learning_rate": 2.0881200196762347e-07, + "loss": 0.2784, + "step": 37268 + }, + { + "epoch": 1.7458659296388253, + "grad_norm": 0.584694924967009, + "learning_rate": 2.087361311473804e-07, + "loss": 0.2725, + "step": 37269 + }, + { + "epoch": 1.7459127746287535, + "grad_norm": 0.5626910980972617, + "learning_rate": 2.0866027351278517e-07, + "loss": 0.2555, + "step": 37270 + }, + { + "epoch": 1.7459596196186817, + "grad_norm": 0.6151862386108011, + "learning_rate": 2.0858442906427384e-07, + "loss": 0.2619, + "step": 37271 + }, + { + "epoch": 1.7460064646086102, + "grad_norm": 0.5827305583286213, + "learning_rate": 2.0850859780228382e-07, + "loss": 0.2546, + "step": 37272 + }, + { + "epoch": 1.7460533095985384, + "grad_norm": 0.5639280622188182, + "learning_rate": 2.0843277972725062e-07, + "loss": 0.258, + "step": 37273 + }, + { + "epoch": 1.7461001545884667, + "grad_norm": 0.6043152772765642, + "learning_rate": 2.083569748396111e-07, + "loss": 0.2632, + "step": 37274 + }, + { + "epoch": 1.7461469995783951, + "grad_norm": 0.6177922024373167, + "learning_rate": 2.0828118313980074e-07, + "loss": 0.2793, + "step": 37275 + }, + { + "epoch": 1.7461938445683234, + "grad_norm": 0.6110988791177417, + "learning_rate": 2.082054046282561e-07, + "loss": 0.278, + "step": 37276 + }, + { + "epoch": 1.7462406895582516, + "grad_norm": 0.6175989143345666, + "learning_rate": 2.0812963930541357e-07, + "loss": 0.2799, + "step": 37277 + }, + { + "epoch": 1.74628753454818, + "grad_norm": 0.591999098850692, + "learning_rate": 2.0805388717170915e-07, + "loss": 0.2601, + "step": 37278 + }, + { + "epoch": 1.7463343795381085, + "grad_norm": 0.6119191698098855, + "learning_rate": 2.0797814822757834e-07, + "loss": 0.276, + "step": 37279 + }, + { + "epoch": 1.7463812245280366, + "grad_norm": 0.5715149373399308, + "learning_rate": 2.0790242247345687e-07, + "loss": 0.2598, + "step": 37280 + }, + { + "epoch": 1.746428069517965, + "grad_norm": 0.5761348421588001, + "learning_rate": 2.0782670990978165e-07, + "loss": 0.2534, + "step": 37281 + }, + { + "epoch": 1.7464749145078935, + "grad_norm": 0.5829795653052509, + "learning_rate": 2.0775101053698705e-07, + "loss": 0.257, + "step": 37282 + }, + { + "epoch": 1.7465217594978217, + "grad_norm": 0.5887181451330651, + "learning_rate": 2.0767532435550907e-07, + "loss": 0.2687, + "step": 37283 + }, + { + "epoch": 1.74656860448775, + "grad_norm": 0.5610433802676694, + "learning_rate": 2.075996513657841e-07, + "loss": 0.2651, + "step": 37284 + }, + { + "epoch": 1.7466154494776784, + "grad_norm": 0.6015115531272729, + "learning_rate": 2.0752399156824642e-07, + "loss": 0.2772, + "step": 37285 + }, + { + "epoch": 1.7466622944676067, + "grad_norm": 0.574676339773783, + "learning_rate": 2.074483449633319e-07, + "loss": 0.2812, + "step": 37286 + }, + { + "epoch": 1.746709139457535, + "grad_norm": 0.6153856698963965, + "learning_rate": 2.073727115514762e-07, + "loss": 0.2771, + "step": 37287 + }, + { + "epoch": 1.7467559844474634, + "grad_norm": 0.533297479347129, + "learning_rate": 2.072970913331146e-07, + "loss": 0.2481, + "step": 37288 + }, + { + "epoch": 1.7468028294373916, + "grad_norm": 0.5795463590429493, + "learning_rate": 2.0722148430868145e-07, + "loss": 0.2689, + "step": 37289 + }, + { + "epoch": 1.7468496744273199, + "grad_norm": 0.6298671067968737, + "learning_rate": 2.0714589047861278e-07, + "loss": 0.2744, + "step": 37290 + }, + { + "epoch": 1.7468965194172483, + "grad_norm": 0.5904974128666463, + "learning_rate": 2.0707030984334298e-07, + "loss": 0.2719, + "step": 37291 + }, + { + "epoch": 1.7469433644071768, + "grad_norm": 0.6077220829700035, + "learning_rate": 2.0699474240330725e-07, + "loss": 0.2631, + "step": 37292 + }, + { + "epoch": 1.746990209397105, + "grad_norm": 0.6014737802232304, + "learning_rate": 2.0691918815894025e-07, + "loss": 0.2577, + "step": 37293 + }, + { + "epoch": 1.7470370543870333, + "grad_norm": 0.6198949502223662, + "learning_rate": 2.0684364711067773e-07, + "loss": 0.2604, + "step": 37294 + }, + { + "epoch": 1.7470838993769617, + "grad_norm": 0.62347239322742, + "learning_rate": 2.0676811925895295e-07, + "loss": 0.2791, + "step": 37295 + }, + { + "epoch": 1.74713074436689, + "grad_norm": 0.5896027292047704, + "learning_rate": 2.066926046042017e-07, + "loss": 0.2746, + "step": 37296 + }, + { + "epoch": 1.7471775893568182, + "grad_norm": 0.5767927358446445, + "learning_rate": 2.0661710314685778e-07, + "loss": 0.2894, + "step": 37297 + }, + { + "epoch": 1.7472244343467467, + "grad_norm": 0.5917676915857762, + "learning_rate": 2.0654161488735614e-07, + "loss": 0.2755, + "step": 37298 + }, + { + "epoch": 1.747271279336675, + "grad_norm": 0.6159955176076148, + "learning_rate": 2.0646613982613084e-07, + "loss": 0.2772, + "step": 37299 + }, + { + "epoch": 1.7473181243266032, + "grad_norm": 0.651659781094744, + "learning_rate": 2.0639067796361655e-07, + "loss": 0.2902, + "step": 37300 + }, + { + "epoch": 1.7473649693165316, + "grad_norm": 0.5930629799424264, + "learning_rate": 2.0631522930024794e-07, + "loss": 0.269, + "step": 37301 + }, + { + "epoch": 1.74741181430646, + "grad_norm": 0.6232611802920576, + "learning_rate": 2.0623979383645797e-07, + "loss": 0.2705, + "step": 37302 + }, + { + "epoch": 1.747458659296388, + "grad_norm": 0.5853270048047349, + "learning_rate": 2.0616437157268217e-07, + "loss": 0.2586, + "step": 37303 + }, + { + "epoch": 1.7475055042863166, + "grad_norm": 0.6014579413417582, + "learning_rate": 2.0608896250935345e-07, + "loss": 0.2703, + "step": 37304 + }, + { + "epoch": 1.747552349276245, + "grad_norm": 0.6067512538311088, + "learning_rate": 2.0601356664690598e-07, + "loss": 0.2676, + "step": 37305 + }, + { + "epoch": 1.7475991942661733, + "grad_norm": 0.5553725657801938, + "learning_rate": 2.059381839857741e-07, + "loss": 0.2533, + "step": 37306 + }, + { + "epoch": 1.7476460392561015, + "grad_norm": 0.6140142224737343, + "learning_rate": 2.0586281452639133e-07, + "loss": 0.2929, + "step": 37307 + }, + { + "epoch": 1.74769288424603, + "grad_norm": 0.5655636828044885, + "learning_rate": 2.0578745826919184e-07, + "loss": 0.2602, + "step": 37308 + }, + { + "epoch": 1.7477397292359582, + "grad_norm": 0.6015686010056045, + "learning_rate": 2.0571211521460882e-07, + "loss": 0.2655, + "step": 37309 + }, + { + "epoch": 1.7477865742258865, + "grad_norm": 0.621241366277532, + "learning_rate": 2.0563678536307558e-07, + "loss": 0.2918, + "step": 37310 + }, + { + "epoch": 1.747833419215815, + "grad_norm": 0.604878798587381, + "learning_rate": 2.055614687150259e-07, + "loss": 0.2645, + "step": 37311 + }, + { + "epoch": 1.7478802642057432, + "grad_norm": 0.625983792640847, + "learning_rate": 2.0548616527089337e-07, + "loss": 0.2809, + "step": 37312 + }, + { + "epoch": 1.7479271091956714, + "grad_norm": 0.5758250051574108, + "learning_rate": 2.0541087503111124e-07, + "loss": 0.2685, + "step": 37313 + }, + { + "epoch": 1.7479739541855999, + "grad_norm": 0.6458863209130141, + "learning_rate": 2.0533559799611276e-07, + "loss": 0.2814, + "step": 37314 + }, + { + "epoch": 1.7480207991755283, + "grad_norm": 0.6444052000373631, + "learning_rate": 2.0526033416633172e-07, + "loss": 0.2638, + "step": 37315 + }, + { + "epoch": 1.7480676441654563, + "grad_norm": 0.6033875302995693, + "learning_rate": 2.0518508354220062e-07, + "loss": 0.2753, + "step": 37316 + }, + { + "epoch": 1.7481144891553848, + "grad_norm": 0.613396840591976, + "learning_rate": 2.0510984612415215e-07, + "loss": 0.2843, + "step": 37317 + }, + { + "epoch": 1.7481613341453133, + "grad_norm": 0.6288383757261042, + "learning_rate": 2.050346219126198e-07, + "loss": 0.2871, + "step": 37318 + }, + { + "epoch": 1.7482081791352415, + "grad_norm": 0.5887905953108679, + "learning_rate": 2.0495941090803635e-07, + "loss": 0.2634, + "step": 37319 + }, + { + "epoch": 1.7482550241251698, + "grad_norm": 0.6232079787078308, + "learning_rate": 2.0488421311083445e-07, + "loss": 0.2873, + "step": 37320 + }, + { + "epoch": 1.7483018691150982, + "grad_norm": 0.5967193800947647, + "learning_rate": 2.0480902852144768e-07, + "loss": 0.2682, + "step": 37321 + }, + { + "epoch": 1.7483487141050265, + "grad_norm": 0.5661814563934324, + "learning_rate": 2.0473385714030764e-07, + "loss": 0.2685, + "step": 37322 + }, + { + "epoch": 1.7483955590949547, + "grad_norm": 0.6039115641171707, + "learning_rate": 2.0465869896784758e-07, + "loss": 0.2828, + "step": 37323 + }, + { + "epoch": 1.7484424040848832, + "grad_norm": 0.6385280516142802, + "learning_rate": 2.0458355400449965e-07, + "loss": 0.2843, + "step": 37324 + }, + { + "epoch": 1.7484892490748114, + "grad_norm": 0.620822893982889, + "learning_rate": 2.0450842225069628e-07, + "loss": 0.2724, + "step": 37325 + }, + { + "epoch": 1.7485360940647396, + "grad_norm": 0.5943935289424989, + "learning_rate": 2.0443330370686993e-07, + "loss": 0.2634, + "step": 37326 + }, + { + "epoch": 1.748582939054668, + "grad_norm": 0.5761992612659675, + "learning_rate": 2.0435819837345356e-07, + "loss": 0.2662, + "step": 37327 + }, + { + "epoch": 1.7486297840445966, + "grad_norm": 0.6007634190679565, + "learning_rate": 2.042831062508782e-07, + "loss": 0.2744, + "step": 37328 + }, + { + "epoch": 1.7486766290345248, + "grad_norm": 0.6118264703457807, + "learning_rate": 2.042080273395766e-07, + "loss": 0.2758, + "step": 37329 + }, + { + "epoch": 1.748723474024453, + "grad_norm": 0.6147904355383936, + "learning_rate": 2.041329616399812e-07, + "loss": 0.2764, + "step": 37330 + }, + { + "epoch": 1.7487703190143815, + "grad_norm": 0.519966262195658, + "learning_rate": 2.0405790915252326e-07, + "loss": 0.2451, + "step": 37331 + }, + { + "epoch": 1.7488171640043098, + "grad_norm": 0.6409000992633833, + "learning_rate": 2.0398286987763472e-07, + "loss": 0.275, + "step": 37332 + }, + { + "epoch": 1.748864008994238, + "grad_norm": 0.5837110318777384, + "learning_rate": 2.0390784381574825e-07, + "loss": 0.2609, + "step": 37333 + }, + { + "epoch": 1.7489108539841665, + "grad_norm": 0.5772373434492429, + "learning_rate": 2.0383283096729462e-07, + "loss": 0.262, + "step": 37334 + }, + { + "epoch": 1.7489576989740947, + "grad_norm": 0.6214116416340015, + "learning_rate": 2.0375783133270598e-07, + "loss": 0.2786, + "step": 37335 + }, + { + "epoch": 1.749004543964023, + "grad_norm": 0.613674226745364, + "learning_rate": 2.0368284491241397e-07, + "loss": 0.2835, + "step": 37336 + }, + { + "epoch": 1.7490513889539514, + "grad_norm": 0.5657605008652815, + "learning_rate": 2.0360787170685015e-07, + "loss": 0.2625, + "step": 37337 + }, + { + "epoch": 1.7490982339438799, + "grad_norm": 0.5557538710555013, + "learning_rate": 2.0353291171644558e-07, + "loss": 0.2668, + "step": 37338 + }, + { + "epoch": 1.7491450789338079, + "grad_norm": 0.580571129125531, + "learning_rate": 2.034579649416324e-07, + "loss": 0.2653, + "step": 37339 + }, + { + "epoch": 1.7491919239237363, + "grad_norm": 0.5930516644680158, + "learning_rate": 2.033830313828411e-07, + "loss": 0.2688, + "step": 37340 + }, + { + "epoch": 1.7492387689136648, + "grad_norm": 0.5975308968982134, + "learning_rate": 2.0330811104050301e-07, + "loss": 0.2591, + "step": 37341 + }, + { + "epoch": 1.749285613903593, + "grad_norm": 0.617297926873573, + "learning_rate": 2.0323320391504948e-07, + "loss": 0.269, + "step": 37342 + }, + { + "epoch": 1.7493324588935213, + "grad_norm": 0.5976142861836282, + "learning_rate": 2.0315831000691206e-07, + "loss": 0.2788, + "step": 37343 + }, + { + "epoch": 1.7493793038834498, + "grad_norm": 0.6360043029052487, + "learning_rate": 2.0308342931652098e-07, + "loss": 0.2713, + "step": 37344 + }, + { + "epoch": 1.749426148873378, + "grad_norm": 0.5611728368369422, + "learning_rate": 2.0300856184430784e-07, + "loss": 0.2558, + "step": 37345 + }, + { + "epoch": 1.7494729938633062, + "grad_norm": 0.6559938719514494, + "learning_rate": 2.0293370759070257e-07, + "loss": 0.2843, + "step": 37346 + }, + { + "epoch": 1.7495198388532347, + "grad_norm": 0.6036788989185559, + "learning_rate": 2.0285886655613652e-07, + "loss": 0.2795, + "step": 37347 + }, + { + "epoch": 1.749566683843163, + "grad_norm": 0.6423095161952485, + "learning_rate": 2.0278403874104014e-07, + "loss": 0.2903, + "step": 37348 + }, + { + "epoch": 1.7496135288330912, + "grad_norm": 0.5940806081429604, + "learning_rate": 2.0270922414584447e-07, + "loss": 0.2793, + "step": 37349 + }, + { + "epoch": 1.7496603738230196, + "grad_norm": 0.5795916246726779, + "learning_rate": 2.0263442277098006e-07, + "loss": 0.2634, + "step": 37350 + }, + { + "epoch": 1.749707218812948, + "grad_norm": 0.5859959794333534, + "learning_rate": 2.0255963461687734e-07, + "loss": 0.2568, + "step": 37351 + }, + { + "epoch": 1.7497540638028761, + "grad_norm": 0.6271510398673515, + "learning_rate": 2.0248485968396597e-07, + "loss": 0.2765, + "step": 37352 + }, + { + "epoch": 1.7498009087928046, + "grad_norm": 0.6051254934247389, + "learning_rate": 2.0241009797267674e-07, + "loss": 0.2717, + "step": 37353 + }, + { + "epoch": 1.749847753782733, + "grad_norm": 0.5779140349095213, + "learning_rate": 2.0233534948343986e-07, + "loss": 0.2589, + "step": 37354 + }, + { + "epoch": 1.7498945987726613, + "grad_norm": 0.6102172341187335, + "learning_rate": 2.0226061421668581e-07, + "loss": 0.2648, + "step": 37355 + }, + { + "epoch": 1.7499414437625895, + "grad_norm": 0.6238402531688345, + "learning_rate": 2.0218589217284423e-07, + "loss": 0.2738, + "step": 37356 + }, + { + "epoch": 1.749988288752518, + "grad_norm": 0.5953657103004575, + "learning_rate": 2.0211118335234565e-07, + "loss": 0.263, + "step": 37357 + }, + { + "epoch": 1.7500351337424462, + "grad_norm": 0.5613745450856636, + "learning_rate": 2.0203648775561969e-07, + "loss": 0.2744, + "step": 37358 + }, + { + "epoch": 1.7500819787323745, + "grad_norm": 0.6402380679049363, + "learning_rate": 2.0196180538309574e-07, + "loss": 0.2716, + "step": 37359 + }, + { + "epoch": 1.750128823722303, + "grad_norm": 0.5743352029977117, + "learning_rate": 2.0188713623520427e-07, + "loss": 0.2663, + "step": 37360 + }, + { + "epoch": 1.7501756687122312, + "grad_norm": 0.6049847914234083, + "learning_rate": 2.0181248031237445e-07, + "loss": 0.2652, + "step": 37361 + }, + { + "epoch": 1.7502225137021594, + "grad_norm": 0.5642655820266292, + "learning_rate": 2.0173783761503612e-07, + "loss": 0.2513, + "step": 37362 + }, + { + "epoch": 1.7502693586920879, + "grad_norm": 0.6126919297310462, + "learning_rate": 2.0166320814361956e-07, + "loss": 0.2685, + "step": 37363 + }, + { + "epoch": 1.7503162036820163, + "grad_norm": 0.6246070182256256, + "learning_rate": 2.01588591898553e-07, + "loss": 0.2802, + "step": 37364 + }, + { + "epoch": 1.7503630486719446, + "grad_norm": 0.6508318493498177, + "learning_rate": 2.0151398888026668e-07, + "loss": 0.2871, + "step": 37365 + }, + { + "epoch": 1.7504098936618728, + "grad_norm": 0.5725594784785424, + "learning_rate": 2.0143939908918942e-07, + "loss": 0.2476, + "step": 37366 + }, + { + "epoch": 1.7504567386518013, + "grad_norm": 0.6167302251398258, + "learning_rate": 2.0136482252575086e-07, + "loss": 0.2648, + "step": 37367 + }, + { + "epoch": 1.7505035836417295, + "grad_norm": 0.5816178533528482, + "learning_rate": 2.012902591903798e-07, + "loss": 0.2723, + "step": 37368 + }, + { + "epoch": 1.7505504286316578, + "grad_norm": 0.6195443127925558, + "learning_rate": 2.0121570908350596e-07, + "loss": 0.2656, + "step": 37369 + }, + { + "epoch": 1.7505972736215862, + "grad_norm": 0.6553576081181612, + "learning_rate": 2.0114117220555756e-07, + "loss": 0.2837, + "step": 37370 + }, + { + "epoch": 1.7506441186115145, + "grad_norm": 0.6033962071564706, + "learning_rate": 2.0106664855696374e-07, + "loss": 0.269, + "step": 37371 + }, + { + "epoch": 1.7506909636014427, + "grad_norm": 0.6415533894735251, + "learning_rate": 2.0099213813815438e-07, + "loss": 0.2872, + "step": 37372 + }, + { + "epoch": 1.7507378085913712, + "grad_norm": 0.6539071683488862, + "learning_rate": 2.0091764094955667e-07, + "loss": 0.2614, + "step": 37373 + }, + { + "epoch": 1.7507846535812996, + "grad_norm": 0.5898852496512839, + "learning_rate": 2.008431569916e-07, + "loss": 0.2641, + "step": 37374 + }, + { + "epoch": 1.7508314985712277, + "grad_norm": 0.6080774470194656, + "learning_rate": 2.0076868626471345e-07, + "loss": 0.2633, + "step": 37375 + }, + { + "epoch": 1.7508783435611561, + "grad_norm": 0.5757190017109457, + "learning_rate": 2.0069422876932531e-07, + "loss": 0.2736, + "step": 37376 + }, + { + "epoch": 1.7509251885510846, + "grad_norm": 0.5709011711395565, + "learning_rate": 2.0061978450586384e-07, + "loss": 0.2612, + "step": 37377 + }, + { + "epoch": 1.7509720335410128, + "grad_norm": 0.5638491957146334, + "learning_rate": 2.005453534747573e-07, + "loss": 0.2693, + "step": 37378 + }, + { + "epoch": 1.751018878530941, + "grad_norm": 0.6075866133795808, + "learning_rate": 2.0047093567643506e-07, + "loss": 0.283, + "step": 37379 + }, + { + "epoch": 1.7510657235208695, + "grad_norm": 0.6222704848890851, + "learning_rate": 2.0039653111132372e-07, + "loss": 0.2699, + "step": 37380 + }, + { + "epoch": 1.7511125685107978, + "grad_norm": 0.5925028950323695, + "learning_rate": 2.003221397798527e-07, + "loss": 0.2684, + "step": 37381 + }, + { + "epoch": 1.751159413500726, + "grad_norm": 0.607664590952167, + "learning_rate": 2.0024776168245025e-07, + "loss": 0.2569, + "step": 37382 + }, + { + "epoch": 1.7512062584906545, + "grad_norm": 0.5667779477612597, + "learning_rate": 2.0017339681954324e-07, + "loss": 0.257, + "step": 37383 + }, + { + "epoch": 1.7512531034805827, + "grad_norm": 0.6200052009802611, + "learning_rate": 2.0009904519156047e-07, + "loss": 0.2724, + "step": 37384 + }, + { + "epoch": 1.751299948470511, + "grad_norm": 0.6164039502385917, + "learning_rate": 2.0002470679892972e-07, + "loss": 0.2812, + "step": 37385 + }, + { + "epoch": 1.7513467934604394, + "grad_norm": 0.620705833021777, + "learning_rate": 1.9995038164207892e-07, + "loss": 0.2603, + "step": 37386 + }, + { + "epoch": 1.7513936384503679, + "grad_norm": 0.6105673478879579, + "learning_rate": 1.9987606972143524e-07, + "loss": 0.2734, + "step": 37387 + }, + { + "epoch": 1.751440483440296, + "grad_norm": 0.5786956687634482, + "learning_rate": 1.99801771037427e-07, + "loss": 0.2565, + "step": 37388 + }, + { + "epoch": 1.7514873284302244, + "grad_norm": 0.6097799419705714, + "learning_rate": 1.99727485590481e-07, + "loss": 0.2767, + "step": 37389 + }, + { + "epoch": 1.7515341734201528, + "grad_norm": 0.5544859840497662, + "learning_rate": 1.9965321338102528e-07, + "loss": 0.2577, + "step": 37390 + }, + { + "epoch": 1.751581018410081, + "grad_norm": 0.5933354133258747, + "learning_rate": 1.9957895440948726e-07, + "loss": 0.2788, + "step": 37391 + }, + { + "epoch": 1.7516278634000093, + "grad_norm": 0.6663571422384981, + "learning_rate": 1.995047086762944e-07, + "loss": 0.2859, + "step": 37392 + }, + { + "epoch": 1.7516747083899378, + "grad_norm": 0.5742370785873236, + "learning_rate": 1.9943047618187329e-07, + "loss": 0.257, + "step": 37393 + }, + { + "epoch": 1.751721553379866, + "grad_norm": 0.5965440493508084, + "learning_rate": 1.9935625692665217e-07, + "loss": 0.2638, + "step": 37394 + }, + { + "epoch": 1.7517683983697943, + "grad_norm": 0.5819772439267761, + "learning_rate": 1.9928205091105685e-07, + "loss": 0.2582, + "step": 37395 + }, + { + "epoch": 1.7518152433597227, + "grad_norm": 0.6153431934647695, + "learning_rate": 1.992078581355153e-07, + "loss": 0.2809, + "step": 37396 + }, + { + "epoch": 1.751862088349651, + "grad_norm": 0.6610746861501076, + "learning_rate": 1.9913367860045412e-07, + "loss": 0.2951, + "step": 37397 + }, + { + "epoch": 1.7519089333395792, + "grad_norm": 0.5897878448519138, + "learning_rate": 1.990595123063005e-07, + "loss": 0.2757, + "step": 37398 + }, + { + "epoch": 1.7519557783295077, + "grad_norm": 0.6233902805921743, + "learning_rate": 1.989853592534813e-07, + "loss": 0.2801, + "step": 37399 + }, + { + "epoch": 1.7520026233194361, + "grad_norm": 0.618288185082217, + "learning_rate": 1.989112194424231e-07, + "loss": 0.2654, + "step": 37400 + }, + { + "epoch": 1.7520494683093644, + "grad_norm": 0.6064486351391429, + "learning_rate": 1.9883709287355197e-07, + "loss": 0.2709, + "step": 37401 + }, + { + "epoch": 1.7520963132992926, + "grad_norm": 0.5702854988803708, + "learning_rate": 1.9876297954729506e-07, + "loss": 0.2433, + "step": 37402 + }, + { + "epoch": 1.752143158289221, + "grad_norm": 0.5975708184726891, + "learning_rate": 1.986888794640787e-07, + "loss": 0.2584, + "step": 37403 + }, + { + "epoch": 1.7521900032791493, + "grad_norm": 0.6546038500866668, + "learning_rate": 1.9861479262432953e-07, + "loss": 0.2907, + "step": 37404 + }, + { + "epoch": 1.7522368482690776, + "grad_norm": 0.6007854671026884, + "learning_rate": 1.9854071902847382e-07, + "loss": 0.2672, + "step": 37405 + }, + { + "epoch": 1.752283693259006, + "grad_norm": 0.7935913353523744, + "learning_rate": 1.984666586769382e-07, + "loss": 0.2967, + "step": 37406 + }, + { + "epoch": 1.7523305382489343, + "grad_norm": 0.673687376514438, + "learning_rate": 1.9839261157014872e-07, + "loss": 0.2943, + "step": 37407 + }, + { + "epoch": 1.7523773832388625, + "grad_norm": 0.6120767841975697, + "learning_rate": 1.9831857770853058e-07, + "loss": 0.267, + "step": 37408 + }, + { + "epoch": 1.752424228228791, + "grad_norm": 0.6071789040043833, + "learning_rate": 1.982445570925104e-07, + "loss": 0.2818, + "step": 37409 + }, + { + "epoch": 1.7524710732187194, + "grad_norm": 0.5737907665112499, + "learning_rate": 1.9817054972251449e-07, + "loss": 0.2663, + "step": 37410 + }, + { + "epoch": 1.7525179182086474, + "grad_norm": 0.6176445563328762, + "learning_rate": 1.9809655559896863e-07, + "loss": 0.2926, + "step": 37411 + }, + { + "epoch": 1.752564763198576, + "grad_norm": 0.5607031682876794, + "learning_rate": 1.9802257472229886e-07, + "loss": 0.2551, + "step": 37412 + }, + { + "epoch": 1.7526116081885044, + "grad_norm": 0.6007269721509012, + "learning_rate": 1.9794860709292984e-07, + "loss": 0.2722, + "step": 37413 + }, + { + "epoch": 1.7526584531784326, + "grad_norm": 0.5845261798046308, + "learning_rate": 1.9787465271128874e-07, + "loss": 0.2577, + "step": 37414 + }, + { + "epoch": 1.7527052981683608, + "grad_norm": 0.6204834921714919, + "learning_rate": 1.9780071157779995e-07, + "loss": 0.2555, + "step": 37415 + }, + { + "epoch": 1.7527521431582893, + "grad_norm": 0.5992896217442925, + "learning_rate": 1.977267836928895e-07, + "loss": 0.277, + "step": 37416 + }, + { + "epoch": 1.7527989881482176, + "grad_norm": 0.6002751840494601, + "learning_rate": 1.976528690569826e-07, + "loss": 0.2559, + "step": 37417 + }, + { + "epoch": 1.7528458331381458, + "grad_norm": 0.6458324706562795, + "learning_rate": 1.9757896767050534e-07, + "loss": 0.2922, + "step": 37418 + }, + { + "epoch": 1.7528926781280743, + "grad_norm": 0.5736415349125681, + "learning_rate": 1.9750507953388176e-07, + "loss": 0.2744, + "step": 37419 + }, + { + "epoch": 1.7529395231180025, + "grad_norm": 0.6530596417572199, + "learning_rate": 1.9743120464753796e-07, + "loss": 0.2704, + "step": 37420 + }, + { + "epoch": 1.7529863681079307, + "grad_norm": 0.6059684143177634, + "learning_rate": 1.9735734301189913e-07, + "loss": 0.2836, + "step": 37421 + }, + { + "epoch": 1.7530332130978592, + "grad_norm": 0.5672883523756093, + "learning_rate": 1.9728349462738965e-07, + "loss": 0.2583, + "step": 37422 + }, + { + "epoch": 1.7530800580877877, + "grad_norm": 0.6728643099171626, + "learning_rate": 1.9720965949443506e-07, + "loss": 0.2847, + "step": 37423 + }, + { + "epoch": 1.7531269030777157, + "grad_norm": 0.5934922498804108, + "learning_rate": 1.9713583761346021e-07, + "loss": 0.2639, + "step": 37424 + }, + { + "epoch": 1.7531737480676441, + "grad_norm": 0.5953281816563678, + "learning_rate": 1.970620289848896e-07, + "loss": 0.2749, + "step": 37425 + }, + { + "epoch": 1.7532205930575726, + "grad_norm": 0.5986229925631471, + "learning_rate": 1.969882336091483e-07, + "loss": 0.2755, + "step": 37426 + }, + { + "epoch": 1.7532674380475008, + "grad_norm": 0.6351627894087079, + "learning_rate": 1.9691445148666056e-07, + "loss": 0.2687, + "step": 37427 + }, + { + "epoch": 1.753314283037429, + "grad_norm": 0.6063225001614205, + "learning_rate": 1.9684068261785206e-07, + "loss": 0.2852, + "step": 37428 + }, + { + "epoch": 1.7533611280273576, + "grad_norm": 0.5947931538743726, + "learning_rate": 1.967669270031458e-07, + "loss": 0.2599, + "step": 37429 + }, + { + "epoch": 1.7534079730172858, + "grad_norm": 0.5744331673983658, + "learning_rate": 1.966931846429676e-07, + "loss": 0.2557, + "step": 37430 + }, + { + "epoch": 1.753454818007214, + "grad_norm": 0.6087580655139764, + "learning_rate": 1.966194555377407e-07, + "loss": 0.2804, + "step": 37431 + }, + { + "epoch": 1.7535016629971425, + "grad_norm": 0.5786402085638005, + "learning_rate": 1.9654573968789004e-07, + "loss": 0.2616, + "step": 37432 + }, + { + "epoch": 1.7535485079870707, + "grad_norm": 0.6025522641877501, + "learning_rate": 1.9647203709383973e-07, + "loss": 0.2589, + "step": 37433 + }, + { + "epoch": 1.753595352976999, + "grad_norm": 0.6538156366675928, + "learning_rate": 1.9639834775601412e-07, + "loss": 0.2809, + "step": 37434 + }, + { + "epoch": 1.7536421979669274, + "grad_norm": 0.5993190803459081, + "learning_rate": 1.9632467167483655e-07, + "loss": 0.2669, + "step": 37435 + }, + { + "epoch": 1.753689042956856, + "grad_norm": 0.6209714515113475, + "learning_rate": 1.9625100885073218e-07, + "loss": 0.2694, + "step": 37436 + }, + { + "epoch": 1.7537358879467841, + "grad_norm": 0.5766530029732091, + "learning_rate": 1.9617735928412347e-07, + "loss": 0.2733, + "step": 37437 + }, + { + "epoch": 1.7537827329367124, + "grad_norm": 0.6255203776783523, + "learning_rate": 1.9610372297543535e-07, + "loss": 0.2764, + "step": 37438 + }, + { + "epoch": 1.7538295779266408, + "grad_norm": 0.6413940585111653, + "learning_rate": 1.960300999250911e-07, + "loss": 0.2822, + "step": 37439 + }, + { + "epoch": 1.753876422916569, + "grad_norm": 0.6274001372527989, + "learning_rate": 1.9595649013351454e-07, + "loss": 0.2768, + "step": 37440 + }, + { + "epoch": 1.7539232679064973, + "grad_norm": 0.6172936728295014, + "learning_rate": 1.958828936011295e-07, + "loss": 0.2861, + "step": 37441 + }, + { + "epoch": 1.7539701128964258, + "grad_norm": 0.5857462609769011, + "learning_rate": 1.9580931032835898e-07, + "loss": 0.2726, + "step": 37442 + }, + { + "epoch": 1.754016957886354, + "grad_norm": 0.6303163186161975, + "learning_rate": 1.9573574031562736e-07, + "loss": 0.2803, + "step": 37443 + }, + { + "epoch": 1.7540638028762823, + "grad_norm": 0.5041343969106875, + "learning_rate": 1.9566218356335682e-07, + "loss": 0.2433, + "step": 37444 + }, + { + "epoch": 1.7541106478662107, + "grad_norm": 0.5965947321066043, + "learning_rate": 1.9558864007197142e-07, + "loss": 0.2649, + "step": 37445 + }, + { + "epoch": 1.7541574928561392, + "grad_norm": 0.6099180783255398, + "learning_rate": 1.9551510984189393e-07, + "loss": 0.2956, + "step": 37446 + }, + { + "epoch": 1.7542043378460672, + "grad_norm": 0.6062380815440783, + "learning_rate": 1.9544159287354787e-07, + "loss": 0.2849, + "step": 37447 + }, + { + "epoch": 1.7542511828359957, + "grad_norm": 0.6257993114870493, + "learning_rate": 1.9536808916735678e-07, + "loss": 0.2823, + "step": 37448 + }, + { + "epoch": 1.7542980278259241, + "grad_norm": 0.6293086420528354, + "learning_rate": 1.9529459872374286e-07, + "loss": 0.2759, + "step": 37449 + }, + { + "epoch": 1.7543448728158524, + "grad_norm": 0.6034163024845784, + "learning_rate": 1.9522112154312905e-07, + "loss": 0.2552, + "step": 37450 + }, + { + "epoch": 1.7543917178057806, + "grad_norm": 0.6311614162972156, + "learning_rate": 1.9514765762593813e-07, + "loss": 0.2727, + "step": 37451 + }, + { + "epoch": 1.754438562795709, + "grad_norm": 0.5710992108939325, + "learning_rate": 1.9507420697259332e-07, + "loss": 0.2693, + "step": 37452 + }, + { + "epoch": 1.7544854077856373, + "grad_norm": 0.6067917204888036, + "learning_rate": 1.9500076958351737e-07, + "loss": 0.272, + "step": 37453 + }, + { + "epoch": 1.7545322527755656, + "grad_norm": 0.606889340196652, + "learning_rate": 1.9492734545913216e-07, + "loss": 0.2626, + "step": 37454 + }, + { + "epoch": 1.754579097765494, + "grad_norm": 0.6046412865325846, + "learning_rate": 1.948539345998615e-07, + "loss": 0.2516, + "step": 37455 + }, + { + "epoch": 1.7546259427554223, + "grad_norm": 0.5897892518389214, + "learning_rate": 1.9478053700612703e-07, + "loss": 0.2653, + "step": 37456 + }, + { + "epoch": 1.7546727877453505, + "grad_norm": 0.6244261418403725, + "learning_rate": 1.9470715267835062e-07, + "loss": 0.2846, + "step": 37457 + }, + { + "epoch": 1.754719632735279, + "grad_norm": 0.5980580598648316, + "learning_rate": 1.9463378161695523e-07, + "loss": 0.2827, + "step": 37458 + }, + { + "epoch": 1.7547664777252074, + "grad_norm": 0.5584675700375974, + "learning_rate": 1.9456042382236278e-07, + "loss": 0.2597, + "step": 37459 + }, + { + "epoch": 1.7548133227151355, + "grad_norm": 0.5760420878788635, + "learning_rate": 1.94487079294996e-07, + "loss": 0.2601, + "step": 37460 + }, + { + "epoch": 1.754860167705064, + "grad_norm": 0.6073262433508831, + "learning_rate": 1.9441374803527675e-07, + "loss": 0.2933, + "step": 37461 + }, + { + "epoch": 1.7549070126949924, + "grad_norm": 0.6341290327345772, + "learning_rate": 1.9434043004362662e-07, + "loss": 0.2789, + "step": 37462 + }, + { + "epoch": 1.7549538576849206, + "grad_norm": 0.6240667870753247, + "learning_rate": 1.942671253204681e-07, + "loss": 0.271, + "step": 37463 + }, + { + "epoch": 1.7550007026748489, + "grad_norm": 0.5935414837014596, + "learning_rate": 1.9419383386622248e-07, + "loss": 0.2629, + "step": 37464 + }, + { + "epoch": 1.7550475476647773, + "grad_norm": 0.6125386930860081, + "learning_rate": 1.9412055568131166e-07, + "loss": 0.269, + "step": 37465 + }, + { + "epoch": 1.7550943926547056, + "grad_norm": 0.6332109744234418, + "learning_rate": 1.9404729076615754e-07, + "loss": 0.2642, + "step": 37466 + }, + { + "epoch": 1.7551412376446338, + "grad_norm": 0.5900821242812139, + "learning_rate": 1.9397403912118222e-07, + "loss": 0.2723, + "step": 37467 + }, + { + "epoch": 1.7551880826345623, + "grad_norm": 0.5558798486805566, + "learning_rate": 1.93900800746806e-07, + "loss": 0.2639, + "step": 37468 + }, + { + "epoch": 1.7552349276244905, + "grad_norm": 0.6413421139389343, + "learning_rate": 1.9382757564345128e-07, + "loss": 0.2968, + "step": 37469 + }, + { + "epoch": 1.7552817726144188, + "grad_norm": 0.5981873584910629, + "learning_rate": 1.9375436381153967e-07, + "loss": 0.2806, + "step": 37470 + }, + { + "epoch": 1.7553286176043472, + "grad_norm": 0.595721555626101, + "learning_rate": 1.9368116525149167e-07, + "loss": 0.268, + "step": 37471 + }, + { + "epoch": 1.7553754625942757, + "grad_norm": 0.6005239050774033, + "learning_rate": 1.936079799637286e-07, + "loss": 0.2682, + "step": 37472 + }, + { + "epoch": 1.755422307584204, + "grad_norm": 0.6373450902985741, + "learning_rate": 1.9353480794867262e-07, + "loss": 0.2779, + "step": 37473 + }, + { + "epoch": 1.7554691525741322, + "grad_norm": 0.5884157194843789, + "learning_rate": 1.9346164920674342e-07, + "loss": 0.2693, + "step": 37474 + }, + { + "epoch": 1.7555159975640606, + "grad_norm": 0.6027367249587917, + "learning_rate": 1.9338850373836286e-07, + "loss": 0.2695, + "step": 37475 + }, + { + "epoch": 1.7555628425539889, + "grad_norm": 0.6416999534885954, + "learning_rate": 1.9331537154395175e-07, + "loss": 0.2746, + "step": 37476 + }, + { + "epoch": 1.755609687543917, + "grad_norm": 0.6168909667828942, + "learning_rate": 1.9324225262393137e-07, + "loss": 0.2714, + "step": 37477 + }, + { + "epoch": 1.7556565325338456, + "grad_norm": 0.5857329615020954, + "learning_rate": 1.931691469787217e-07, + "loss": 0.2608, + "step": 37478 + }, + { + "epoch": 1.7557033775237738, + "grad_norm": 0.5951647081704657, + "learning_rate": 1.9309605460874403e-07, + "loss": 0.2765, + "step": 37479 + }, + { + "epoch": 1.755750222513702, + "grad_norm": 0.607243593538072, + "learning_rate": 1.9302297551441834e-07, + "loss": 0.2453, + "step": 37480 + }, + { + "epoch": 1.7557970675036305, + "grad_norm": 0.5834430967017323, + "learning_rate": 1.9294990969616566e-07, + "loss": 0.2565, + "step": 37481 + }, + { + "epoch": 1.755843912493559, + "grad_norm": 0.6463292826549735, + "learning_rate": 1.9287685715440624e-07, + "loss": 0.2842, + "step": 37482 + }, + { + "epoch": 1.755890757483487, + "grad_norm": 0.6240373383112868, + "learning_rate": 1.928038178895611e-07, + "loss": 0.2699, + "step": 37483 + }, + { + "epoch": 1.7559376024734155, + "grad_norm": 0.5970111148455637, + "learning_rate": 1.927307919020499e-07, + "loss": 0.2653, + "step": 37484 + }, + { + "epoch": 1.755984447463344, + "grad_norm": 0.6034597618903776, + "learning_rate": 1.9265777919229318e-07, + "loss": 0.268, + "step": 37485 + }, + { + "epoch": 1.7560312924532722, + "grad_norm": 0.6392118770110058, + "learning_rate": 1.9258477976071083e-07, + "loss": 0.2692, + "step": 37486 + }, + { + "epoch": 1.7560781374432004, + "grad_norm": 0.6416846086279216, + "learning_rate": 1.925117936077231e-07, + "loss": 0.2935, + "step": 37487 + }, + { + "epoch": 1.7561249824331289, + "grad_norm": 0.608853186228418, + "learning_rate": 1.9243882073374992e-07, + "loss": 0.2961, + "step": 37488 + }, + { + "epoch": 1.756171827423057, + "grad_norm": 0.5541211394962702, + "learning_rate": 1.9236586113921152e-07, + "loss": 0.2447, + "step": 37489 + }, + { + "epoch": 1.7562186724129853, + "grad_norm": 0.5866638729669534, + "learning_rate": 1.9229291482452783e-07, + "loss": 0.271, + "step": 37490 + }, + { + "epoch": 1.7562655174029138, + "grad_norm": 0.6308712589245308, + "learning_rate": 1.9221998179011852e-07, + "loss": 0.2816, + "step": 37491 + }, + { + "epoch": 1.756312362392842, + "grad_norm": 0.6223855839615833, + "learning_rate": 1.9214706203640272e-07, + "loss": 0.2737, + "step": 37492 + }, + { + "epoch": 1.7563592073827703, + "grad_norm": 0.5721154112620033, + "learning_rate": 1.9207415556380033e-07, + "loss": 0.2645, + "step": 37493 + }, + { + "epoch": 1.7564060523726988, + "grad_norm": 0.6086719581054527, + "learning_rate": 1.9200126237273131e-07, + "loss": 0.2616, + "step": 37494 + }, + { + "epoch": 1.7564528973626272, + "grad_norm": 0.5769931997592325, + "learning_rate": 1.9192838246361478e-07, + "loss": 0.2608, + "step": 37495 + }, + { + "epoch": 1.7564997423525552, + "grad_norm": 0.6242405999722203, + "learning_rate": 1.9185551583687039e-07, + "loss": 0.2745, + "step": 37496 + }, + { + "epoch": 1.7565465873424837, + "grad_norm": 0.5881151537296873, + "learning_rate": 1.9178266249291784e-07, + "loss": 0.2673, + "step": 37497 + }, + { + "epoch": 1.7565934323324122, + "grad_norm": 0.5919438112234862, + "learning_rate": 1.917098224321759e-07, + "loss": 0.2779, + "step": 37498 + }, + { + "epoch": 1.7566402773223404, + "grad_norm": 0.5902154740921786, + "learning_rate": 1.9163699565506343e-07, + "loss": 0.2631, + "step": 37499 + }, + { + "epoch": 1.7566871223122686, + "grad_norm": 0.6302487735573765, + "learning_rate": 1.9156418216199956e-07, + "loss": 0.2791, + "step": 37500 + }, + { + "epoch": 1.756733967302197, + "grad_norm": 0.6230143413113668, + "learning_rate": 1.9149138195340394e-07, + "loss": 0.2775, + "step": 37501 + }, + { + "epoch": 1.7567808122921253, + "grad_norm": 0.6170683501988937, + "learning_rate": 1.914185950296951e-07, + "loss": 0.2719, + "step": 37502 + }, + { + "epoch": 1.7568276572820536, + "grad_norm": 0.5713706869309747, + "learning_rate": 1.913458213912925e-07, + "loss": 0.2773, + "step": 37503 + }, + { + "epoch": 1.756874502271982, + "grad_norm": 0.6145445547312253, + "learning_rate": 1.9127306103861375e-07, + "loss": 0.2799, + "step": 37504 + }, + { + "epoch": 1.7569213472619103, + "grad_norm": 0.6167179483042121, + "learning_rate": 1.912003139720789e-07, + "loss": 0.2781, + "step": 37505 + }, + { + "epoch": 1.7569681922518385, + "grad_norm": 0.6573643458658407, + "learning_rate": 1.911275801921056e-07, + "loss": 0.2867, + "step": 37506 + }, + { + "epoch": 1.757015037241767, + "grad_norm": 0.6223538036615329, + "learning_rate": 1.910548596991124e-07, + "loss": 0.2776, + "step": 37507 + }, + { + "epoch": 1.7570618822316955, + "grad_norm": 0.6200769514246949, + "learning_rate": 1.9098215249351848e-07, + "loss": 0.2681, + "step": 37508 + }, + { + "epoch": 1.7571087272216237, + "grad_norm": 0.6043715858189904, + "learning_rate": 1.9090945857574177e-07, + "loss": 0.2712, + "step": 37509 + }, + { + "epoch": 1.757155572211552, + "grad_norm": 0.5687975721259981, + "learning_rate": 1.9083677794620144e-07, + "loss": 0.2736, + "step": 37510 + }, + { + "epoch": 1.7572024172014804, + "grad_norm": 0.5591949906017778, + "learning_rate": 1.907641106053143e-07, + "loss": 0.2612, + "step": 37511 + }, + { + "epoch": 1.7572492621914086, + "grad_norm": 0.5525224816994462, + "learning_rate": 1.9069145655350007e-07, + "loss": 0.2681, + "step": 37512 + }, + { + "epoch": 1.7572961071813369, + "grad_norm": 0.6211662126815787, + "learning_rate": 1.9061881579117537e-07, + "loss": 0.2812, + "step": 37513 + }, + { + "epoch": 1.7573429521712653, + "grad_norm": 0.6513811496657995, + "learning_rate": 1.90546188318759e-07, + "loss": 0.2813, + "step": 37514 + }, + { + "epoch": 1.7573897971611936, + "grad_norm": 0.62849651082412, + "learning_rate": 1.9047357413666896e-07, + "loss": 0.2862, + "step": 37515 + }, + { + "epoch": 1.7574366421511218, + "grad_norm": 0.5798876091799151, + "learning_rate": 1.9040097324532326e-07, + "loss": 0.2706, + "step": 37516 + }, + { + "epoch": 1.7574834871410503, + "grad_norm": 0.5604238382733508, + "learning_rate": 1.9032838564513934e-07, + "loss": 0.2531, + "step": 37517 + }, + { + "epoch": 1.7575303321309788, + "grad_norm": 0.5897183823657386, + "learning_rate": 1.9025581133653464e-07, + "loss": 0.2607, + "step": 37518 + }, + { + "epoch": 1.7575771771209068, + "grad_norm": 0.5767828800203963, + "learning_rate": 1.90183250319928e-07, + "loss": 0.257, + "step": 37519 + }, + { + "epoch": 1.7576240221108352, + "grad_norm": 0.6243406633237816, + "learning_rate": 1.901107025957355e-07, + "loss": 0.2674, + "step": 37520 + }, + { + "epoch": 1.7576708671007637, + "grad_norm": 0.57385637419771, + "learning_rate": 1.9003816816437564e-07, + "loss": 0.2608, + "step": 37521 + }, + { + "epoch": 1.757717712090692, + "grad_norm": 0.6226589901304038, + "learning_rate": 1.8996564702626563e-07, + "loss": 0.2857, + "step": 37522 + }, + { + "epoch": 1.7577645570806202, + "grad_norm": 0.6546001620869242, + "learning_rate": 1.8989313918182263e-07, + "loss": 0.2871, + "step": 37523 + }, + { + "epoch": 1.7578114020705486, + "grad_norm": 0.6352271523804297, + "learning_rate": 1.8982064463146377e-07, + "loss": 0.2837, + "step": 37524 + }, + { + "epoch": 1.7578582470604769, + "grad_norm": 0.6246518421452363, + "learning_rate": 1.8974816337560654e-07, + "loss": 0.2771, + "step": 37525 + }, + { + "epoch": 1.7579050920504051, + "grad_norm": 0.6020243760721283, + "learning_rate": 1.8967569541466836e-07, + "loss": 0.2806, + "step": 37526 + }, + { + "epoch": 1.7579519370403336, + "grad_norm": 0.5999472899098965, + "learning_rate": 1.8960324074906554e-07, + "loss": 0.2694, + "step": 37527 + }, + { + "epoch": 1.7579987820302618, + "grad_norm": 0.6283792011947323, + "learning_rate": 1.8953079937921558e-07, + "loss": 0.2951, + "step": 37528 + }, + { + "epoch": 1.75804562702019, + "grad_norm": 0.5705237252181214, + "learning_rate": 1.894583713055348e-07, + "loss": 0.259, + "step": 37529 + }, + { + "epoch": 1.7580924720101185, + "grad_norm": 0.577567569927588, + "learning_rate": 1.8938595652844061e-07, + "loss": 0.2735, + "step": 37530 + }, + { + "epoch": 1.758139317000047, + "grad_norm": 0.6263763322555486, + "learning_rate": 1.893135550483491e-07, + "loss": 0.284, + "step": 37531 + }, + { + "epoch": 1.758186161989975, + "grad_norm": 0.6503082700948469, + "learning_rate": 1.8924116686567796e-07, + "loss": 0.2851, + "step": 37532 + }, + { + "epoch": 1.7582330069799035, + "grad_norm": 0.5717464561467885, + "learning_rate": 1.8916879198084271e-07, + "loss": 0.2586, + "step": 37533 + }, + { + "epoch": 1.758279851969832, + "grad_norm": 0.5844190457693798, + "learning_rate": 1.890964303942608e-07, + "loss": 0.2473, + "step": 37534 + }, + { + "epoch": 1.7583266969597602, + "grad_norm": 0.5756070160117042, + "learning_rate": 1.8902408210634744e-07, + "loss": 0.2651, + "step": 37535 + }, + { + "epoch": 1.7583735419496884, + "grad_norm": 0.6051294230281213, + "learning_rate": 1.8895174711751978e-07, + "loss": 0.273, + "step": 37536 + }, + { + "epoch": 1.7584203869396169, + "grad_norm": 0.5994102814919734, + "learning_rate": 1.8887942542819394e-07, + "loss": 0.2699, + "step": 37537 + }, + { + "epoch": 1.7584672319295451, + "grad_norm": 0.5728042462240374, + "learning_rate": 1.888071170387859e-07, + "loss": 0.2618, + "step": 37538 + }, + { + "epoch": 1.7585140769194734, + "grad_norm": 0.5970022573746848, + "learning_rate": 1.887348219497126e-07, + "loss": 0.2676, + "step": 37539 + }, + { + "epoch": 1.7585609219094018, + "grad_norm": 0.6199448431264735, + "learning_rate": 1.8866254016138953e-07, + "loss": 0.2743, + "step": 37540 + }, + { + "epoch": 1.75860776689933, + "grad_norm": 0.6080258313746091, + "learning_rate": 1.8859027167423216e-07, + "loss": 0.2762, + "step": 37541 + }, + { + "epoch": 1.7586546118892583, + "grad_norm": 0.6055344387144395, + "learning_rate": 1.8851801648865685e-07, + "loss": 0.2759, + "step": 37542 + }, + { + "epoch": 1.7587014568791868, + "grad_norm": 0.5778351350169842, + "learning_rate": 1.884457746050794e-07, + "loss": 0.2754, + "step": 37543 + }, + { + "epoch": 1.7587483018691152, + "grad_norm": 0.6090680998203853, + "learning_rate": 1.8837354602391556e-07, + "loss": 0.2728, + "step": 37544 + }, + { + "epoch": 1.7587951468590435, + "grad_norm": 0.6020273009536109, + "learning_rate": 1.883013307455808e-07, + "loss": 0.2865, + "step": 37545 + }, + { + "epoch": 1.7588419918489717, + "grad_norm": 0.591096465600434, + "learning_rate": 1.8822912877049155e-07, + "loss": 0.2775, + "step": 37546 + }, + { + "epoch": 1.7588888368389002, + "grad_norm": 0.639790346001871, + "learning_rate": 1.8815694009906239e-07, + "loss": 0.2861, + "step": 37547 + }, + { + "epoch": 1.7589356818288284, + "grad_norm": 0.6038430110821775, + "learning_rate": 1.8808476473170856e-07, + "loss": 0.257, + "step": 37548 + }, + { + "epoch": 1.7589825268187567, + "grad_norm": 0.6292518051831347, + "learning_rate": 1.8801260266884585e-07, + "loss": 0.2844, + "step": 37549 + }, + { + "epoch": 1.7590293718086851, + "grad_norm": 0.6029384924014425, + "learning_rate": 1.879404539108895e-07, + "loss": 0.2694, + "step": 37550 + }, + { + "epoch": 1.7590762167986134, + "grad_norm": 0.5767312579078085, + "learning_rate": 1.8786831845825498e-07, + "loss": 0.2698, + "step": 37551 + }, + { + "epoch": 1.7591230617885416, + "grad_norm": 0.5672308494952201, + "learning_rate": 1.8779619631135726e-07, + "loss": 0.2675, + "step": 37552 + }, + { + "epoch": 1.75916990677847, + "grad_norm": 0.6015872820137919, + "learning_rate": 1.87724087470611e-07, + "loss": 0.2721, + "step": 37553 + }, + { + "epoch": 1.7592167517683985, + "grad_norm": 0.553741735044516, + "learning_rate": 1.8765199193643168e-07, + "loss": 0.2466, + "step": 37554 + }, + { + "epoch": 1.7592635967583266, + "grad_norm": 0.5790652173129098, + "learning_rate": 1.8757990970923374e-07, + "loss": 0.2581, + "step": 37555 + }, + { + "epoch": 1.759310441748255, + "grad_norm": 0.6265684940975804, + "learning_rate": 1.8750784078943234e-07, + "loss": 0.2847, + "step": 37556 + }, + { + "epoch": 1.7593572867381835, + "grad_norm": 0.5912631964030465, + "learning_rate": 1.8743578517744194e-07, + "loss": 0.2693, + "step": 37557 + }, + { + "epoch": 1.7594041317281117, + "grad_norm": 0.6255869881926394, + "learning_rate": 1.873637428736777e-07, + "loss": 0.2595, + "step": 37558 + }, + { + "epoch": 1.75945097671804, + "grad_norm": 0.6375728151591897, + "learning_rate": 1.872917138785535e-07, + "loss": 0.2744, + "step": 37559 + }, + { + "epoch": 1.7594978217079684, + "grad_norm": 0.5601991645545847, + "learning_rate": 1.8721969819248426e-07, + "loss": 0.2579, + "step": 37560 + }, + { + "epoch": 1.7595446666978967, + "grad_norm": 0.5973970201246022, + "learning_rate": 1.8714769581588467e-07, + "loss": 0.2732, + "step": 37561 + }, + { + "epoch": 1.759591511687825, + "grad_norm": 0.6108079812400823, + "learning_rate": 1.8707570674916826e-07, + "loss": 0.2727, + "step": 37562 + }, + { + "epoch": 1.7596383566777534, + "grad_norm": 0.600834673762464, + "learning_rate": 1.8700373099274998e-07, + "loss": 0.2654, + "step": 37563 + }, + { + "epoch": 1.7596852016676816, + "grad_norm": 0.5941023062901073, + "learning_rate": 1.8693176854704453e-07, + "loss": 0.2666, + "step": 37564 + }, + { + "epoch": 1.7597320466576098, + "grad_norm": 0.6351811884037848, + "learning_rate": 1.8685981941246462e-07, + "loss": 0.2774, + "step": 37565 + }, + { + "epoch": 1.7597788916475383, + "grad_norm": 0.5919332654114706, + "learning_rate": 1.8678788358942518e-07, + "loss": 0.2688, + "step": 37566 + }, + { + "epoch": 1.7598257366374668, + "grad_norm": 0.5775541277421736, + "learning_rate": 1.8671596107834005e-07, + "loss": 0.2554, + "step": 37567 + }, + { + "epoch": 1.7598725816273948, + "grad_norm": 0.5804415891776805, + "learning_rate": 1.8664405187962364e-07, + "loss": 0.2691, + "step": 37568 + }, + { + "epoch": 1.7599194266173233, + "grad_norm": 0.5969877907004799, + "learning_rate": 1.8657215599368867e-07, + "loss": 0.2627, + "step": 37569 + }, + { + "epoch": 1.7599662716072517, + "grad_norm": 0.6279364281995484, + "learning_rate": 1.8650027342095006e-07, + "loss": 0.2652, + "step": 37570 + }, + { + "epoch": 1.76001311659718, + "grad_norm": 0.5610582795867045, + "learning_rate": 1.8642840416182055e-07, + "loss": 0.2618, + "step": 37571 + }, + { + "epoch": 1.7600599615871082, + "grad_norm": 0.6370352760344209, + "learning_rate": 1.86356548216714e-07, + "loss": 0.2731, + "step": 37572 + }, + { + "epoch": 1.7601068065770367, + "grad_norm": 0.5838439034028524, + "learning_rate": 1.8628470558604396e-07, + "loss": 0.2682, + "step": 37573 + }, + { + "epoch": 1.760153651566965, + "grad_norm": 0.6135408518079956, + "learning_rate": 1.862128762702242e-07, + "loss": 0.2729, + "step": 37574 + }, + { + "epoch": 1.7602004965568931, + "grad_norm": 0.6186459351115288, + "learning_rate": 1.8614106026966755e-07, + "loss": 0.2695, + "step": 37575 + }, + { + "epoch": 1.7602473415468216, + "grad_norm": 0.6108244354718698, + "learning_rate": 1.8606925758478806e-07, + "loss": 0.2759, + "step": 37576 + }, + { + "epoch": 1.7602941865367498, + "grad_norm": 0.564073613164164, + "learning_rate": 1.8599746821599763e-07, + "loss": 0.2675, + "step": 37577 + }, + { + "epoch": 1.760341031526678, + "grad_norm": 0.6631368973704627, + "learning_rate": 1.8592569216371036e-07, + "loss": 0.2892, + "step": 37578 + }, + { + "epoch": 1.7603878765166066, + "grad_norm": 0.577697723287697, + "learning_rate": 1.858539294283393e-07, + "loss": 0.2619, + "step": 37579 + }, + { + "epoch": 1.760434721506535, + "grad_norm": 0.621322070489391, + "learning_rate": 1.8578218001029686e-07, + "loss": 0.281, + "step": 37580 + }, + { + "epoch": 1.7604815664964633, + "grad_norm": 0.634146387246039, + "learning_rate": 1.8571044390999715e-07, + "loss": 0.2902, + "step": 37581 + }, + { + "epoch": 1.7605284114863915, + "grad_norm": 0.5864087453179814, + "learning_rate": 1.8563872112785153e-07, + "loss": 0.2681, + "step": 37582 + }, + { + "epoch": 1.76057525647632, + "grad_norm": 0.5825199782477954, + "learning_rate": 1.8556701166427383e-07, + "loss": 0.2696, + "step": 37583 + }, + { + "epoch": 1.7606221014662482, + "grad_norm": 0.6735953942155768, + "learning_rate": 1.8549531551967564e-07, + "loss": 0.2847, + "step": 37584 + }, + { + "epoch": 1.7606689464561764, + "grad_norm": 0.5843148958073785, + "learning_rate": 1.8542363269447056e-07, + "loss": 0.2737, + "step": 37585 + }, + { + "epoch": 1.760715791446105, + "grad_norm": 0.5485682300237176, + "learning_rate": 1.8535196318907046e-07, + "loss": 0.2615, + "step": 37586 + }, + { + "epoch": 1.7607626364360331, + "grad_norm": 0.5858859145912761, + "learning_rate": 1.8528030700388804e-07, + "loss": 0.2678, + "step": 37587 + }, + { + "epoch": 1.7608094814259614, + "grad_norm": 0.5733059853390117, + "learning_rate": 1.8520866413933607e-07, + "loss": 0.2612, + "step": 37588 + }, + { + "epoch": 1.7608563264158898, + "grad_norm": 0.5895199820634255, + "learning_rate": 1.851370345958267e-07, + "loss": 0.273, + "step": 37589 + }, + { + "epoch": 1.7609031714058183, + "grad_norm": 0.5830963165764237, + "learning_rate": 1.850654183737713e-07, + "loss": 0.2521, + "step": 37590 + }, + { + "epoch": 1.7609500163957463, + "grad_norm": 0.587681122212795, + "learning_rate": 1.8499381547358254e-07, + "loss": 0.2642, + "step": 37591 + }, + { + "epoch": 1.7609968613856748, + "grad_norm": 0.6576533567979801, + "learning_rate": 1.849222258956726e-07, + "loss": 0.2869, + "step": 37592 + }, + { + "epoch": 1.7610437063756033, + "grad_norm": 0.5978350284323355, + "learning_rate": 1.8485064964045312e-07, + "loss": 0.2548, + "step": 37593 + }, + { + "epoch": 1.7610905513655315, + "grad_norm": 0.5950729143908331, + "learning_rate": 1.8477908670833655e-07, + "loss": 0.269, + "step": 37594 + }, + { + "epoch": 1.7611373963554597, + "grad_norm": 0.618106333042174, + "learning_rate": 1.8470753709973477e-07, + "loss": 0.2773, + "step": 37595 + }, + { + "epoch": 1.7611842413453882, + "grad_norm": 0.5736422793164835, + "learning_rate": 1.846360008150591e-07, + "loss": 0.2695, + "step": 37596 + }, + { + "epoch": 1.7612310863353164, + "grad_norm": 0.6168196523641565, + "learning_rate": 1.8456447785472093e-07, + "loss": 0.269, + "step": 37597 + }, + { + "epoch": 1.7612779313252447, + "grad_norm": 0.6109827583104266, + "learning_rate": 1.8449296821913237e-07, + "loss": 0.2781, + "step": 37598 + }, + { + "epoch": 1.7613247763151731, + "grad_norm": 0.6316717886570602, + "learning_rate": 1.844214719087048e-07, + "loss": 0.2578, + "step": 37599 + }, + { + "epoch": 1.7613716213051014, + "grad_norm": 0.6336333189001037, + "learning_rate": 1.8434998892384953e-07, + "loss": 0.2799, + "step": 37600 + }, + { + "epoch": 1.7614184662950296, + "grad_norm": 0.6561247048567678, + "learning_rate": 1.8427851926497846e-07, + "loss": 0.2809, + "step": 37601 + }, + { + "epoch": 1.761465311284958, + "grad_norm": 0.5847760303561884, + "learning_rate": 1.8420706293250213e-07, + "loss": 0.271, + "step": 37602 + }, + { + "epoch": 1.7615121562748866, + "grad_norm": 0.6087594931280287, + "learning_rate": 1.8413561992683237e-07, + "loss": 0.2646, + "step": 37603 + }, + { + "epoch": 1.7615590012648146, + "grad_norm": 0.594946016407738, + "learning_rate": 1.8406419024837973e-07, + "loss": 0.2496, + "step": 37604 + }, + { + "epoch": 1.761605846254743, + "grad_norm": 0.6236886179824696, + "learning_rate": 1.8399277389755555e-07, + "loss": 0.2774, + "step": 37605 + }, + { + "epoch": 1.7616526912446715, + "grad_norm": 0.6263898156511032, + "learning_rate": 1.8392137087477086e-07, + "loss": 0.2966, + "step": 37606 + }, + { + "epoch": 1.7616995362345997, + "grad_norm": 0.6106150695541527, + "learning_rate": 1.83849981180437e-07, + "loss": 0.2922, + "step": 37607 + }, + { + "epoch": 1.761746381224528, + "grad_norm": 0.6288840838901079, + "learning_rate": 1.8377860481496395e-07, + "loss": 0.2607, + "step": 37608 + }, + { + "epoch": 1.7617932262144564, + "grad_norm": 0.5969195836353923, + "learning_rate": 1.8370724177876275e-07, + "loss": 0.279, + "step": 37609 + }, + { + "epoch": 1.7618400712043847, + "grad_norm": 0.6116597249671915, + "learning_rate": 1.8363589207224476e-07, + "loss": 0.2767, + "step": 37610 + }, + { + "epoch": 1.761886916194313, + "grad_norm": 0.610226047088021, + "learning_rate": 1.835645556958196e-07, + "loss": 0.271, + "step": 37611 + }, + { + "epoch": 1.7619337611842414, + "grad_norm": 0.5744817645269045, + "learning_rate": 1.834932326498981e-07, + "loss": 0.2675, + "step": 37612 + }, + { + "epoch": 1.7619806061741696, + "grad_norm": 0.619006661472654, + "learning_rate": 1.834219229348913e-07, + "loss": 0.2882, + "step": 37613 + }, + { + "epoch": 1.7620274511640979, + "grad_norm": 0.5808524092387967, + "learning_rate": 1.833506265512089e-07, + "loss": 0.2692, + "step": 37614 + }, + { + "epoch": 1.7620742961540263, + "grad_norm": 0.6012104293687176, + "learning_rate": 1.8327934349926108e-07, + "loss": 0.2759, + "step": 37615 + }, + { + "epoch": 1.7621211411439548, + "grad_norm": 0.5865990276058489, + "learning_rate": 1.8320807377945836e-07, + "loss": 0.2603, + "step": 37616 + }, + { + "epoch": 1.762167986133883, + "grad_norm": 0.6005196819870009, + "learning_rate": 1.8313681739221128e-07, + "loss": 0.287, + "step": 37617 + }, + { + "epoch": 1.7622148311238113, + "grad_norm": 0.6024996430567156, + "learning_rate": 1.8306557433792922e-07, + "loss": 0.2829, + "step": 37618 + }, + { + "epoch": 1.7622616761137397, + "grad_norm": 0.6096667861087017, + "learning_rate": 1.8299434461702264e-07, + "loss": 0.2629, + "step": 37619 + }, + { + "epoch": 1.762308521103668, + "grad_norm": 0.5733736600185098, + "learning_rate": 1.8292312822990099e-07, + "loss": 0.2667, + "step": 37620 + }, + { + "epoch": 1.7623553660935962, + "grad_norm": 0.5945860638498931, + "learning_rate": 1.8285192517697448e-07, + "loss": 0.2612, + "step": 37621 + }, + { + "epoch": 1.7624022110835247, + "grad_norm": 0.6155987511566944, + "learning_rate": 1.827807354586525e-07, + "loss": 0.2538, + "step": 37622 + }, + { + "epoch": 1.762449056073453, + "grad_norm": 0.5760969105240229, + "learning_rate": 1.8270955907534555e-07, + "loss": 0.2566, + "step": 37623 + }, + { + "epoch": 1.7624959010633812, + "grad_norm": 0.630785711811605, + "learning_rate": 1.826383960274622e-07, + "loss": 0.2891, + "step": 37624 + }, + { + "epoch": 1.7625427460533096, + "grad_norm": 0.5755277218834098, + "learning_rate": 1.8256724631541296e-07, + "loss": 0.2848, + "step": 37625 + }, + { + "epoch": 1.762589591043238, + "grad_norm": 0.6385465504845507, + "learning_rate": 1.824961099396061e-07, + "loss": 0.2966, + "step": 37626 + }, + { + "epoch": 1.762636436033166, + "grad_norm": 0.6064304290763222, + "learning_rate": 1.8242498690045184e-07, + "loss": 0.265, + "step": 37627 + }, + { + "epoch": 1.7626832810230946, + "grad_norm": 0.5927580987513973, + "learning_rate": 1.8235387719835907e-07, + "loss": 0.2644, + "step": 37628 + }, + { + "epoch": 1.762730126013023, + "grad_norm": 0.6174088416333232, + "learning_rate": 1.822827808337374e-07, + "loss": 0.2683, + "step": 37629 + }, + { + "epoch": 1.7627769710029513, + "grad_norm": 0.6312494177454451, + "learning_rate": 1.8221169780699627e-07, + "loss": 0.2761, + "step": 37630 + }, + { + "epoch": 1.7628238159928795, + "grad_norm": 0.6193540661129449, + "learning_rate": 1.821406281185442e-07, + "loss": 0.2772, + "step": 37631 + }, + { + "epoch": 1.762870660982808, + "grad_norm": 0.6410337116343394, + "learning_rate": 1.8206957176878954e-07, + "loss": 0.2808, + "step": 37632 + }, + { + "epoch": 1.7629175059727362, + "grad_norm": 0.5541806274698349, + "learning_rate": 1.8199852875814216e-07, + "loss": 0.2646, + "step": 37633 + }, + { + "epoch": 1.7629643509626645, + "grad_norm": 0.5987453598085362, + "learning_rate": 1.819274990870107e-07, + "loss": 0.2643, + "step": 37634 + }, + { + "epoch": 1.763011195952593, + "grad_norm": 0.6410137201753341, + "learning_rate": 1.818564827558039e-07, + "loss": 0.2752, + "step": 37635 + }, + { + "epoch": 1.7630580409425212, + "grad_norm": 0.6097591821086233, + "learning_rate": 1.8178547976493015e-07, + "loss": 0.264, + "step": 37636 + }, + { + "epoch": 1.7631048859324494, + "grad_norm": 0.583014292922214, + "learning_rate": 1.817144901147988e-07, + "loss": 0.2622, + "step": 37637 + }, + { + "epoch": 1.7631517309223779, + "grad_norm": 0.608556715981906, + "learning_rate": 1.8164351380581814e-07, + "loss": 0.2778, + "step": 37638 + }, + { + "epoch": 1.7631985759123063, + "grad_norm": 0.5705510231078205, + "learning_rate": 1.8157255083839558e-07, + "loss": 0.2649, + "step": 37639 + }, + { + "epoch": 1.7632454209022344, + "grad_norm": 0.6492160887077548, + "learning_rate": 1.8150160121294058e-07, + "loss": 0.2749, + "step": 37640 + }, + { + "epoch": 1.7632922658921628, + "grad_norm": 0.6010356016684216, + "learning_rate": 1.8143066492986084e-07, + "loss": 0.2798, + "step": 37641 + }, + { + "epoch": 1.7633391108820913, + "grad_norm": 0.5910592208290242, + "learning_rate": 1.8135974198956492e-07, + "loss": 0.2777, + "step": 37642 + }, + { + "epoch": 1.7633859558720195, + "grad_norm": 0.6214873634301501, + "learning_rate": 1.8128883239246165e-07, + "loss": 0.2726, + "step": 37643 + }, + { + "epoch": 1.7634328008619478, + "grad_norm": 0.5891144728226251, + "learning_rate": 1.8121793613895767e-07, + "loss": 0.2628, + "step": 37644 + }, + { + "epoch": 1.7634796458518762, + "grad_norm": 0.5948360058886449, + "learning_rate": 1.8114705322946208e-07, + "loss": 0.2793, + "step": 37645 + }, + { + "epoch": 1.7635264908418045, + "grad_norm": 0.6121899579374546, + "learning_rate": 1.8107618366438207e-07, + "loss": 0.2713, + "step": 37646 + }, + { + "epoch": 1.7635733358317327, + "grad_norm": 0.6066921073723713, + "learning_rate": 1.8100532744412563e-07, + "loss": 0.2769, + "step": 37647 + }, + { + "epoch": 1.7636201808216612, + "grad_norm": 0.6032468316593385, + "learning_rate": 1.809344845691008e-07, + "loss": 0.2697, + "step": 37648 + }, + { + "epoch": 1.7636670258115894, + "grad_norm": 0.5677861122560921, + "learning_rate": 1.8086365503971525e-07, + "loss": 0.2618, + "step": 37649 + }, + { + "epoch": 1.7637138708015176, + "grad_norm": 0.562521405263422, + "learning_rate": 1.807928388563765e-07, + "loss": 0.2583, + "step": 37650 + }, + { + "epoch": 1.763760715791446, + "grad_norm": 0.6232911108111883, + "learning_rate": 1.8072203601949194e-07, + "loss": 0.2871, + "step": 37651 + }, + { + "epoch": 1.7638075607813746, + "grad_norm": 0.6140163141661072, + "learning_rate": 1.8065124652946964e-07, + "loss": 0.2597, + "step": 37652 + }, + { + "epoch": 1.7638544057713028, + "grad_norm": 0.5996273102084965, + "learning_rate": 1.805804703867159e-07, + "loss": 0.2611, + "step": 37653 + }, + { + "epoch": 1.763901250761231, + "grad_norm": 1.0012173238388271, + "learning_rate": 1.8050970759163845e-07, + "loss": 0.2712, + "step": 37654 + }, + { + "epoch": 1.7639480957511595, + "grad_norm": 0.6395621336174189, + "learning_rate": 1.8043895814464474e-07, + "loss": 0.2951, + "step": 37655 + }, + { + "epoch": 1.7639949407410878, + "grad_norm": 0.582726700386224, + "learning_rate": 1.8036822204614252e-07, + "loss": 0.2687, + "step": 37656 + }, + { + "epoch": 1.764041785731016, + "grad_norm": 0.5598668739048411, + "learning_rate": 1.8029749929653755e-07, + "loss": 0.2656, + "step": 37657 + }, + { + "epoch": 1.7640886307209445, + "grad_norm": 0.6227852569118281, + "learning_rate": 1.8022678989623732e-07, + "loss": 0.2652, + "step": 37658 + }, + { + "epoch": 1.7641354757108727, + "grad_norm": 0.5551133192602701, + "learning_rate": 1.8015609384564925e-07, + "loss": 0.2573, + "step": 37659 + }, + { + "epoch": 1.764182320700801, + "grad_norm": 0.624309867635248, + "learning_rate": 1.800854111451797e-07, + "loss": 0.2641, + "step": 37660 + }, + { + "epoch": 1.7642291656907294, + "grad_norm": 0.5675490907631151, + "learning_rate": 1.8001474179523527e-07, + "loss": 0.2582, + "step": 37661 + }, + { + "epoch": 1.7642760106806579, + "grad_norm": 0.6068305670615035, + "learning_rate": 1.7994408579622312e-07, + "loss": 0.2769, + "step": 37662 + }, + { + "epoch": 1.7643228556705859, + "grad_norm": 0.6035237306166797, + "learning_rate": 1.7987344314854965e-07, + "loss": 0.252, + "step": 37663 + }, + { + "epoch": 1.7643697006605144, + "grad_norm": 0.646962792662346, + "learning_rate": 1.7980281385262116e-07, + "loss": 0.2605, + "step": 37664 + }, + { + "epoch": 1.7644165456504428, + "grad_norm": 0.6166953889859348, + "learning_rate": 1.797321979088443e-07, + "loss": 0.2699, + "step": 37665 + }, + { + "epoch": 1.764463390640371, + "grad_norm": 0.6212468784428115, + "learning_rate": 1.796615953176259e-07, + "loss": 0.2733, + "step": 37666 + }, + { + "epoch": 1.7645102356302993, + "grad_norm": 0.6274342311608397, + "learning_rate": 1.7959100607937152e-07, + "loss": 0.2726, + "step": 37667 + }, + { + "epoch": 1.7645570806202278, + "grad_norm": 0.6023358314052195, + "learning_rate": 1.7952043019448777e-07, + "loss": 0.2704, + "step": 37668 + }, + { + "epoch": 1.764603925610156, + "grad_norm": 0.5704678407956854, + "learning_rate": 1.7944986766338075e-07, + "loss": 0.2626, + "step": 37669 + }, + { + "epoch": 1.7646507706000842, + "grad_norm": 0.6271590245834431, + "learning_rate": 1.7937931848645618e-07, + "loss": 0.2797, + "step": 37670 + }, + { + "epoch": 1.7646976155900127, + "grad_norm": 0.5870260375171295, + "learning_rate": 1.7930878266412043e-07, + "loss": 0.2663, + "step": 37671 + }, + { + "epoch": 1.764744460579941, + "grad_norm": 0.6241495572010753, + "learning_rate": 1.7923826019677986e-07, + "loss": 0.2843, + "step": 37672 + }, + { + "epoch": 1.7647913055698692, + "grad_norm": 0.5642812337644382, + "learning_rate": 1.7916775108483914e-07, + "loss": 0.2608, + "step": 37673 + }, + { + "epoch": 1.7648381505597976, + "grad_norm": 0.5731403133208741, + "learning_rate": 1.7909725532870543e-07, + "loss": 0.2669, + "step": 37674 + }, + { + "epoch": 1.764884995549726, + "grad_norm": 0.6062134069684214, + "learning_rate": 1.7902677292878311e-07, + "loss": 0.2735, + "step": 37675 + }, + { + "epoch": 1.7649318405396541, + "grad_norm": 0.6645043373401404, + "learning_rate": 1.789563038854783e-07, + "loss": 0.2859, + "step": 37676 + }, + { + "epoch": 1.7649786855295826, + "grad_norm": 0.6054506382396826, + "learning_rate": 1.7888584819919675e-07, + "loss": 0.2632, + "step": 37677 + }, + { + "epoch": 1.765025530519511, + "grad_norm": 0.5909793345897165, + "learning_rate": 1.7881540587034368e-07, + "loss": 0.273, + "step": 37678 + }, + { + "epoch": 1.7650723755094393, + "grad_norm": 0.6080653881237573, + "learning_rate": 1.7874497689932492e-07, + "loss": 0.2784, + "step": 37679 + }, + { + "epoch": 1.7651192204993675, + "grad_norm": 0.6224482914035581, + "learning_rate": 1.7867456128654537e-07, + "loss": 0.2766, + "step": 37680 + }, + { + "epoch": 1.765166065489296, + "grad_norm": 0.5757760015117767, + "learning_rate": 1.7860415903241002e-07, + "loss": 0.273, + "step": 37681 + }, + { + "epoch": 1.7652129104792242, + "grad_norm": 0.5921663932257932, + "learning_rate": 1.785337701373241e-07, + "loss": 0.2785, + "step": 37682 + }, + { + "epoch": 1.7652597554691525, + "grad_norm": 0.6765967377630961, + "learning_rate": 1.7846339460169282e-07, + "loss": 0.2981, + "step": 37683 + }, + { + "epoch": 1.765306600459081, + "grad_norm": 0.5961105459088575, + "learning_rate": 1.7839303242592142e-07, + "loss": 0.2797, + "step": 37684 + }, + { + "epoch": 1.7653534454490092, + "grad_norm": 0.605720906262216, + "learning_rate": 1.7832268361041434e-07, + "loss": 0.2779, + "step": 37685 + }, + { + "epoch": 1.7654002904389374, + "grad_norm": 0.5784932714084586, + "learning_rate": 1.7825234815557729e-07, + "loss": 0.2682, + "step": 37686 + }, + { + "epoch": 1.7654471354288659, + "grad_norm": 0.6093037200484518, + "learning_rate": 1.7818202606181444e-07, + "loss": 0.2695, + "step": 37687 + }, + { + "epoch": 1.7654939804187944, + "grad_norm": 0.5981694118451762, + "learning_rate": 1.7811171732952992e-07, + "loss": 0.2747, + "step": 37688 + }, + { + "epoch": 1.7655408254087226, + "grad_norm": 0.5341176281996224, + "learning_rate": 1.7804142195912893e-07, + "loss": 0.2581, + "step": 37689 + }, + { + "epoch": 1.7655876703986508, + "grad_norm": 0.606671050441968, + "learning_rate": 1.779711399510159e-07, + "loss": 0.2742, + "step": 37690 + }, + { + "epoch": 1.7656345153885793, + "grad_norm": 0.5829081082688372, + "learning_rate": 1.7790087130559547e-07, + "loss": 0.271, + "step": 37691 + }, + { + "epoch": 1.7656813603785075, + "grad_norm": 0.5600739523535434, + "learning_rate": 1.7783061602327235e-07, + "loss": 0.2431, + "step": 37692 + }, + { + "epoch": 1.7657282053684358, + "grad_norm": 0.6099828205287127, + "learning_rate": 1.777603741044498e-07, + "loss": 0.2684, + "step": 37693 + }, + { + "epoch": 1.7657750503583642, + "grad_norm": 0.5746685324252166, + "learning_rate": 1.7769014554953305e-07, + "loss": 0.2573, + "step": 37694 + }, + { + "epoch": 1.7658218953482925, + "grad_norm": 0.6003711887234665, + "learning_rate": 1.7761993035892543e-07, + "loss": 0.2767, + "step": 37695 + }, + { + "epoch": 1.7658687403382207, + "grad_norm": 0.5742231882364561, + "learning_rate": 1.775497285330316e-07, + "loss": 0.2538, + "step": 37696 + }, + { + "epoch": 1.7659155853281492, + "grad_norm": 0.5843905638316806, + "learning_rate": 1.7747954007225508e-07, + "loss": 0.2664, + "step": 37697 + }, + { + "epoch": 1.7659624303180776, + "grad_norm": 0.5923536613585425, + "learning_rate": 1.7740936497700062e-07, + "loss": 0.2593, + "step": 37698 + }, + { + "epoch": 1.7660092753080057, + "grad_norm": 0.646187000718491, + "learning_rate": 1.773392032476709e-07, + "loss": 0.2771, + "step": 37699 + }, + { + "epoch": 1.7660561202979341, + "grad_norm": 0.6000881036945639, + "learning_rate": 1.772690548846706e-07, + "loss": 0.2763, + "step": 37700 + }, + { + "epoch": 1.7661029652878626, + "grad_norm": 0.5564836114493119, + "learning_rate": 1.7719891988840333e-07, + "loss": 0.2486, + "step": 37701 + }, + { + "epoch": 1.7661498102777908, + "grad_norm": 0.6018385965847904, + "learning_rate": 1.7712879825927203e-07, + "loss": 0.2615, + "step": 37702 + }, + { + "epoch": 1.766196655267719, + "grad_norm": 0.6261447038873985, + "learning_rate": 1.7705868999768062e-07, + "loss": 0.2867, + "step": 37703 + }, + { + "epoch": 1.7662435002576475, + "grad_norm": 0.5694014027623149, + "learning_rate": 1.7698859510403315e-07, + "loss": 0.258, + "step": 37704 + }, + { + "epoch": 1.7662903452475758, + "grad_norm": 0.6187776797475281, + "learning_rate": 1.7691851357873185e-07, + "loss": 0.2848, + "step": 37705 + }, + { + "epoch": 1.766337190237504, + "grad_norm": 0.6233213437270958, + "learning_rate": 1.7684844542218083e-07, + "loss": 0.2732, + "step": 37706 + }, + { + "epoch": 1.7663840352274325, + "grad_norm": 0.5795764030128654, + "learning_rate": 1.7677839063478307e-07, + "loss": 0.2545, + "step": 37707 + }, + { + "epoch": 1.7664308802173607, + "grad_norm": 0.625706976913832, + "learning_rate": 1.7670834921694218e-07, + "loss": 0.2665, + "step": 37708 + }, + { + "epoch": 1.766477725207289, + "grad_norm": 0.5552523726017132, + "learning_rate": 1.7663832116906033e-07, + "loss": 0.2478, + "step": 37709 + }, + { + "epoch": 1.7665245701972174, + "grad_norm": 0.6339881737935911, + "learning_rate": 1.7656830649154134e-07, + "loss": 0.2793, + "step": 37710 + }, + { + "epoch": 1.7665714151871459, + "grad_norm": 0.6415763022273129, + "learning_rate": 1.7649830518478738e-07, + "loss": 0.2883, + "step": 37711 + }, + { + "epoch": 1.766618260177074, + "grad_norm": 0.609908678913314, + "learning_rate": 1.7642831724920179e-07, + "loss": 0.2848, + "step": 37712 + }, + { + "epoch": 1.7666651051670024, + "grad_norm": 0.6028463193058707, + "learning_rate": 1.7635834268518697e-07, + "loss": 0.2687, + "step": 37713 + }, + { + "epoch": 1.7667119501569308, + "grad_norm": 0.5778535134963422, + "learning_rate": 1.7628838149314653e-07, + "loss": 0.2647, + "step": 37714 + }, + { + "epoch": 1.766758795146859, + "grad_norm": 0.5900947382025917, + "learning_rate": 1.762184336734818e-07, + "loss": 0.2759, + "step": 37715 + }, + { + "epoch": 1.7668056401367873, + "grad_norm": 0.5999083117686289, + "learning_rate": 1.7614849922659604e-07, + "loss": 0.2835, + "step": 37716 + }, + { + "epoch": 1.7668524851267158, + "grad_norm": 0.5933511302916119, + "learning_rate": 1.7607857815289204e-07, + "loss": 0.2819, + "step": 37717 + }, + { + "epoch": 1.766899330116644, + "grad_norm": 0.58032694824666, + "learning_rate": 1.7600867045277137e-07, + "loss": 0.2669, + "step": 37718 + }, + { + "epoch": 1.7669461751065723, + "grad_norm": 0.6282259963859196, + "learning_rate": 1.7593877612663651e-07, + "loss": 0.2826, + "step": 37719 + }, + { + "epoch": 1.7669930200965007, + "grad_norm": 0.6492868117701609, + "learning_rate": 1.758688951748902e-07, + "loss": 0.2633, + "step": 37720 + }, + { + "epoch": 1.767039865086429, + "grad_norm": 0.5603206746644358, + "learning_rate": 1.757990275979343e-07, + "loss": 0.2467, + "step": 37721 + }, + { + "epoch": 1.7670867100763572, + "grad_norm": 0.5847231167535918, + "learning_rate": 1.757291733961705e-07, + "loss": 0.2671, + "step": 37722 + }, + { + "epoch": 1.7671335550662857, + "grad_norm": 0.6102775531940848, + "learning_rate": 1.7565933257000152e-07, + "loss": 0.2702, + "step": 37723 + }, + { + "epoch": 1.7671804000562141, + "grad_norm": 0.5833030363465209, + "learning_rate": 1.7558950511982864e-07, + "loss": 0.2603, + "step": 37724 + }, + { + "epoch": 1.7672272450461424, + "grad_norm": 0.6251658590438837, + "learning_rate": 1.755196910460538e-07, + "loss": 0.2744, + "step": 37725 + }, + { + "epoch": 1.7672740900360706, + "grad_norm": 0.5737040824032036, + "learning_rate": 1.7544989034907895e-07, + "loss": 0.2677, + "step": 37726 + }, + { + "epoch": 1.767320935025999, + "grad_norm": 0.536260434514003, + "learning_rate": 1.7538010302930565e-07, + "loss": 0.2576, + "step": 37727 + }, + { + "epoch": 1.7673677800159273, + "grad_norm": 0.6081752368837481, + "learning_rate": 1.7531032908713608e-07, + "loss": 0.2727, + "step": 37728 + }, + { + "epoch": 1.7674146250058556, + "grad_norm": 0.6128086544744299, + "learning_rate": 1.7524056852297135e-07, + "loss": 0.2772, + "step": 37729 + }, + { + "epoch": 1.767461469995784, + "grad_norm": 0.5763856997306424, + "learning_rate": 1.751708213372122e-07, + "loss": 0.2567, + "step": 37730 + }, + { + "epoch": 1.7675083149857123, + "grad_norm": 0.5820205398909072, + "learning_rate": 1.7510108753026083e-07, + "loss": 0.2654, + "step": 37731 + }, + { + "epoch": 1.7675551599756405, + "grad_norm": 0.6038699747107464, + "learning_rate": 1.7503136710251834e-07, + "loss": 0.2472, + "step": 37732 + }, + { + "epoch": 1.767602004965569, + "grad_norm": 0.5789983868801306, + "learning_rate": 1.7496166005438575e-07, + "loss": 0.2606, + "step": 37733 + }, + { + "epoch": 1.7676488499554974, + "grad_norm": 0.6030588799226808, + "learning_rate": 1.7489196638626472e-07, + "loss": 0.2626, + "step": 37734 + }, + { + "epoch": 1.7676956949454254, + "grad_norm": 0.6064966773803651, + "learning_rate": 1.748222860985563e-07, + "loss": 0.28, + "step": 37735 + }, + { + "epoch": 1.767742539935354, + "grad_norm": 0.6095438808532931, + "learning_rate": 1.7475261919166127e-07, + "loss": 0.2611, + "step": 37736 + }, + { + "epoch": 1.7677893849252824, + "grad_norm": 0.5803853756828686, + "learning_rate": 1.7468296566598014e-07, + "loss": 0.2705, + "step": 37737 + }, + { + "epoch": 1.7678362299152106, + "grad_norm": 0.6262843951448113, + "learning_rate": 1.74613325521914e-07, + "loss": 0.2723, + "step": 37738 + }, + { + "epoch": 1.7678830749051389, + "grad_norm": 0.6155318760283148, + "learning_rate": 1.7454369875986364e-07, + "loss": 0.2819, + "step": 37739 + }, + { + "epoch": 1.7679299198950673, + "grad_norm": 0.5864324154950538, + "learning_rate": 1.7447408538022986e-07, + "loss": 0.2733, + "step": 37740 + }, + { + "epoch": 1.7679767648849956, + "grad_norm": 0.6501662380719311, + "learning_rate": 1.744044853834137e-07, + "loss": 0.2752, + "step": 37741 + }, + { + "epoch": 1.7680236098749238, + "grad_norm": 0.5792896226418501, + "learning_rate": 1.7433489876981486e-07, + "loss": 0.2627, + "step": 37742 + }, + { + "epoch": 1.7680704548648523, + "grad_norm": 0.6031933241599738, + "learning_rate": 1.742653255398344e-07, + "loss": 0.2801, + "step": 37743 + }, + { + "epoch": 1.7681172998547805, + "grad_norm": 0.5969253279675166, + "learning_rate": 1.7419576569387197e-07, + "loss": 0.2667, + "step": 37744 + }, + { + "epoch": 1.7681641448447087, + "grad_norm": 0.6066551249234265, + "learning_rate": 1.741262192323284e-07, + "loss": 0.2706, + "step": 37745 + }, + { + "epoch": 1.7682109898346372, + "grad_norm": 0.5851072813714787, + "learning_rate": 1.7405668615560396e-07, + "loss": 0.2772, + "step": 37746 + }, + { + "epoch": 1.7682578348245657, + "grad_norm": 0.583617475519784, + "learning_rate": 1.7398716646409907e-07, + "loss": 0.2615, + "step": 37747 + }, + { + "epoch": 1.7683046798144937, + "grad_norm": 0.5824546465486892, + "learning_rate": 1.7391766015821294e-07, + "loss": 0.2715, + "step": 37748 + }, + { + "epoch": 1.7683515248044221, + "grad_norm": 0.579795584521778, + "learning_rate": 1.738481672383463e-07, + "loss": 0.2565, + "step": 37749 + }, + { + "epoch": 1.7683983697943506, + "grad_norm": 0.6145579246645816, + "learning_rate": 1.7377868770489887e-07, + "loss": 0.2692, + "step": 37750 + }, + { + "epoch": 1.7684452147842789, + "grad_norm": 0.5917491650353303, + "learning_rate": 1.7370922155827004e-07, + "loss": 0.2767, + "step": 37751 + }, + { + "epoch": 1.768492059774207, + "grad_norm": 0.5911066117970856, + "learning_rate": 1.7363976879886003e-07, + "loss": 0.2648, + "step": 37752 + }, + { + "epoch": 1.7685389047641356, + "grad_norm": 0.5896662890812511, + "learning_rate": 1.7357032942706908e-07, + "loss": 0.2691, + "step": 37753 + }, + { + "epoch": 1.7685857497540638, + "grad_norm": 0.6041041145696817, + "learning_rate": 1.7350090344329552e-07, + "loss": 0.2668, + "step": 37754 + }, + { + "epoch": 1.768632594743992, + "grad_norm": 0.641599801065578, + "learning_rate": 1.7343149084793953e-07, + "loss": 0.269, + "step": 37755 + }, + { + "epoch": 1.7686794397339205, + "grad_norm": 0.6332660572818395, + "learning_rate": 1.7336209164140056e-07, + "loss": 0.2705, + "step": 37756 + }, + { + "epoch": 1.7687262847238487, + "grad_norm": 0.6120884722990853, + "learning_rate": 1.7329270582407825e-07, + "loss": 0.2647, + "step": 37757 + }, + { + "epoch": 1.768773129713777, + "grad_norm": 0.6008540051993166, + "learning_rate": 1.7322333339637122e-07, + "loss": 0.2749, + "step": 37758 + }, + { + "epoch": 1.7688199747037054, + "grad_norm": 0.5539671496201494, + "learning_rate": 1.7315397435867965e-07, + "loss": 0.2585, + "step": 37759 + }, + { + "epoch": 1.768866819693634, + "grad_norm": 0.5937526911018959, + "learning_rate": 1.7308462871140158e-07, + "loss": 0.2665, + "step": 37760 + }, + { + "epoch": 1.7689136646835621, + "grad_norm": 0.6197255286634942, + "learning_rate": 1.7301529645493698e-07, + "loss": 0.2791, + "step": 37761 + }, + { + "epoch": 1.7689605096734904, + "grad_norm": 0.5867176696024347, + "learning_rate": 1.7294597758968413e-07, + "loss": 0.273, + "step": 37762 + }, + { + "epoch": 1.7690073546634189, + "grad_norm": 0.5910872526400132, + "learning_rate": 1.728766721160427e-07, + "loss": 0.2738, + "step": 37763 + }, + { + "epoch": 1.769054199653347, + "grad_norm": 0.5567810899057897, + "learning_rate": 1.72807380034411e-07, + "loss": 0.2525, + "step": 37764 + }, + { + "epoch": 1.7691010446432753, + "grad_norm": 0.5950873638291703, + "learning_rate": 1.7273810134518815e-07, + "loss": 0.2737, + "step": 37765 + }, + { + "epoch": 1.7691478896332038, + "grad_norm": 0.5833071918805612, + "learning_rate": 1.7266883604877217e-07, + "loss": 0.2659, + "step": 37766 + }, + { + "epoch": 1.769194734623132, + "grad_norm": 0.5885596344131705, + "learning_rate": 1.725995841455619e-07, + "loss": 0.2669, + "step": 37767 + }, + { + "epoch": 1.7692415796130603, + "grad_norm": 0.5634976139626038, + "learning_rate": 1.7253034563595615e-07, + "loss": 0.255, + "step": 37768 + }, + { + "epoch": 1.7692884246029887, + "grad_norm": 0.5808196119037012, + "learning_rate": 1.7246112052035358e-07, + "loss": 0.2608, + "step": 37769 + }, + { + "epoch": 1.7693352695929172, + "grad_norm": 0.6004719831061734, + "learning_rate": 1.723919087991524e-07, + "loss": 0.2736, + "step": 37770 + }, + { + "epoch": 1.7693821145828452, + "grad_norm": 0.6122960222948233, + "learning_rate": 1.7232271047275067e-07, + "loss": 0.2638, + "step": 37771 + }, + { + "epoch": 1.7694289595727737, + "grad_norm": 0.5737373259845798, + "learning_rate": 1.7225352554154638e-07, + "loss": 0.2576, + "step": 37772 + }, + { + "epoch": 1.7694758045627021, + "grad_norm": 0.5685866442323522, + "learning_rate": 1.7218435400593814e-07, + "loss": 0.2707, + "step": 37773 + }, + { + "epoch": 1.7695226495526304, + "grad_norm": 0.5995534423964354, + "learning_rate": 1.721151958663239e-07, + "loss": 0.2729, + "step": 37774 + }, + { + "epoch": 1.7695694945425586, + "grad_norm": 0.5815218557553103, + "learning_rate": 1.7204605112310147e-07, + "loss": 0.2522, + "step": 37775 + }, + { + "epoch": 1.769616339532487, + "grad_norm": 0.6095542812272884, + "learning_rate": 1.719769197766688e-07, + "loss": 0.2575, + "step": 37776 + }, + { + "epoch": 1.7696631845224153, + "grad_norm": 0.6264939703206748, + "learning_rate": 1.719078018274245e-07, + "loss": 0.266, + "step": 37777 + }, + { + "epoch": 1.7697100295123436, + "grad_norm": 0.630728592954959, + "learning_rate": 1.7183869727576547e-07, + "loss": 0.2677, + "step": 37778 + }, + { + "epoch": 1.769756874502272, + "grad_norm": 0.5612289903942851, + "learning_rate": 1.7176960612208914e-07, + "loss": 0.2659, + "step": 37779 + }, + { + "epoch": 1.7698037194922003, + "grad_norm": 0.5736711105552579, + "learning_rate": 1.7170052836679358e-07, + "loss": 0.2617, + "step": 37780 + }, + { + "epoch": 1.7698505644821285, + "grad_norm": 0.6058266084716344, + "learning_rate": 1.7163146401027647e-07, + "loss": 0.2733, + "step": 37781 + }, + { + "epoch": 1.769897409472057, + "grad_norm": 0.6332926170569825, + "learning_rate": 1.7156241305293474e-07, + "loss": 0.2865, + "step": 37782 + }, + { + "epoch": 1.7699442544619854, + "grad_norm": 0.5830973728840433, + "learning_rate": 1.7149337549516643e-07, + "loss": 0.2569, + "step": 37783 + }, + { + "epoch": 1.7699910994519135, + "grad_norm": 0.5802883987084421, + "learning_rate": 1.7142435133736869e-07, + "loss": 0.2779, + "step": 37784 + }, + { + "epoch": 1.770037944441842, + "grad_norm": 0.5709080330313806, + "learning_rate": 1.713553405799387e-07, + "loss": 0.2734, + "step": 37785 + }, + { + "epoch": 1.7700847894317704, + "grad_norm": 0.6064121529494115, + "learning_rate": 1.7128634322327282e-07, + "loss": 0.2695, + "step": 37786 + }, + { + "epoch": 1.7701316344216986, + "grad_norm": 0.6117490404059681, + "learning_rate": 1.712173592677688e-07, + "loss": 0.2661, + "step": 37787 + }, + { + "epoch": 1.7701784794116269, + "grad_norm": 0.5706667884285958, + "learning_rate": 1.7114838871382378e-07, + "loss": 0.2664, + "step": 37788 + }, + { + "epoch": 1.7702253244015553, + "grad_norm": 0.5796244778078173, + "learning_rate": 1.7107943156183414e-07, + "loss": 0.2675, + "step": 37789 + }, + { + "epoch": 1.7702721693914836, + "grad_norm": 0.5947728805249781, + "learning_rate": 1.7101048781219765e-07, + "loss": 0.2632, + "step": 37790 + }, + { + "epoch": 1.7703190143814118, + "grad_norm": 0.614881356287348, + "learning_rate": 1.7094155746531004e-07, + "loss": 0.2739, + "step": 37791 + }, + { + "epoch": 1.7703658593713403, + "grad_norm": 0.6229306597747484, + "learning_rate": 1.7087264052156855e-07, + "loss": 0.2744, + "step": 37792 + }, + { + "epoch": 1.7704127043612685, + "grad_norm": 0.5815433687001282, + "learning_rate": 1.7080373698136948e-07, + "loss": 0.2615, + "step": 37793 + }, + { + "epoch": 1.7704595493511968, + "grad_norm": 0.5960532524245942, + "learning_rate": 1.7073484684510923e-07, + "loss": 0.2504, + "step": 37794 + }, + { + "epoch": 1.7705063943411252, + "grad_norm": 0.5726822863346669, + "learning_rate": 1.7066597011318464e-07, + "loss": 0.2674, + "step": 37795 + }, + { + "epoch": 1.7705532393310537, + "grad_norm": 0.5789784376735368, + "learning_rate": 1.705971067859924e-07, + "loss": 0.2596, + "step": 37796 + }, + { + "epoch": 1.770600084320982, + "grad_norm": 0.5891784352511342, + "learning_rate": 1.705282568639277e-07, + "loss": 0.2676, + "step": 37797 + }, + { + "epoch": 1.7706469293109102, + "grad_norm": 0.6255590568295598, + "learning_rate": 1.704594203473875e-07, + "loss": 0.2892, + "step": 37798 + }, + { + "epoch": 1.7706937743008386, + "grad_norm": 0.6433950809602725, + "learning_rate": 1.7039059723676837e-07, + "loss": 0.2831, + "step": 37799 + }, + { + "epoch": 1.7707406192907669, + "grad_norm": 0.6063281868869338, + "learning_rate": 1.7032178753246532e-07, + "loss": 0.2771, + "step": 37800 + }, + { + "epoch": 1.7707874642806951, + "grad_norm": 0.5459564273881048, + "learning_rate": 1.7025299123487493e-07, + "loss": 0.2617, + "step": 37801 + }, + { + "epoch": 1.7708343092706236, + "grad_norm": 0.5783980946873398, + "learning_rate": 1.701842083443933e-07, + "loss": 0.2746, + "step": 37802 + }, + { + "epoch": 1.7708811542605518, + "grad_norm": 0.5622730538807841, + "learning_rate": 1.7011543886141568e-07, + "loss": 0.2539, + "step": 37803 + }, + { + "epoch": 1.77092799925048, + "grad_norm": 0.6366896175811178, + "learning_rate": 1.7004668278633785e-07, + "loss": 0.2853, + "step": 37804 + }, + { + "epoch": 1.7709748442404085, + "grad_norm": 0.5698767434518006, + "learning_rate": 1.6997794011955588e-07, + "loss": 0.2611, + "step": 37805 + }, + { + "epoch": 1.771021689230337, + "grad_norm": 0.5775308872890581, + "learning_rate": 1.6990921086146584e-07, + "loss": 0.2548, + "step": 37806 + }, + { + "epoch": 1.771068534220265, + "grad_norm": 0.587382007814429, + "learning_rate": 1.6984049501246214e-07, + "loss": 0.2692, + "step": 37807 + }, + { + "epoch": 1.7711153792101935, + "grad_norm": 0.5755757242867603, + "learning_rate": 1.6977179257294114e-07, + "loss": 0.2734, + "step": 37808 + }, + { + "epoch": 1.771162224200122, + "grad_norm": 0.5975778172891778, + "learning_rate": 1.6970310354329723e-07, + "loss": 0.2734, + "step": 37809 + }, + { + "epoch": 1.7712090691900502, + "grad_norm": 0.6792624111382158, + "learning_rate": 1.6963442792392648e-07, + "loss": 0.2952, + "step": 37810 + }, + { + "epoch": 1.7712559141799784, + "grad_norm": 0.5876861528235061, + "learning_rate": 1.695657657152236e-07, + "loss": 0.2569, + "step": 37811 + }, + { + "epoch": 1.7713027591699069, + "grad_norm": 0.6000349709420935, + "learning_rate": 1.6949711691758465e-07, + "loss": 0.2763, + "step": 37812 + }, + { + "epoch": 1.7713496041598351, + "grad_norm": 0.6491736053670245, + "learning_rate": 1.6942848153140346e-07, + "loss": 0.2751, + "step": 37813 + }, + { + "epoch": 1.7713964491497634, + "grad_norm": 0.5946223186326013, + "learning_rate": 1.6935985955707612e-07, + "loss": 0.2692, + "step": 37814 + }, + { + "epoch": 1.7714432941396918, + "grad_norm": 0.6062445340401086, + "learning_rate": 1.6929125099499678e-07, + "loss": 0.2768, + "step": 37815 + }, + { + "epoch": 1.77149013912962, + "grad_norm": 0.6359143018331438, + "learning_rate": 1.692226558455601e-07, + "loss": 0.2747, + "step": 37816 + }, + { + "epoch": 1.7715369841195483, + "grad_norm": 0.5893929315110338, + "learning_rate": 1.6915407410916157e-07, + "loss": 0.2686, + "step": 37817 + }, + { + "epoch": 1.7715838291094768, + "grad_norm": 0.5534353616740595, + "learning_rate": 1.6908550578619538e-07, + "loss": 0.2559, + "step": 37818 + }, + { + "epoch": 1.7716306740994052, + "grad_norm": 0.5894920540054985, + "learning_rate": 1.6901695087705644e-07, + "loss": 0.2683, + "step": 37819 + }, + { + "epoch": 1.7716775190893332, + "grad_norm": 0.5704570926590001, + "learning_rate": 1.689484093821392e-07, + "loss": 0.2458, + "step": 37820 + }, + { + "epoch": 1.7717243640792617, + "grad_norm": 0.6025059355679444, + "learning_rate": 1.6887988130183773e-07, + "loss": 0.2737, + "step": 37821 + }, + { + "epoch": 1.7717712090691902, + "grad_norm": 0.5991727734285042, + "learning_rate": 1.6881136663654652e-07, + "loss": 0.2657, + "step": 37822 + }, + { + "epoch": 1.7718180540591184, + "grad_norm": 0.6131862000414404, + "learning_rate": 1.687428653866599e-07, + "loss": 0.2874, + "step": 37823 + }, + { + "epoch": 1.7718648990490466, + "grad_norm": 0.6101730210160597, + "learning_rate": 1.6867437755257233e-07, + "loss": 0.2795, + "step": 37824 + }, + { + "epoch": 1.7719117440389751, + "grad_norm": 0.6143893971211316, + "learning_rate": 1.6860590313467762e-07, + "loss": 0.2723, + "step": 37825 + }, + { + "epoch": 1.7719585890289034, + "grad_norm": 0.5925902636943456, + "learning_rate": 1.6853744213337048e-07, + "loss": 0.2753, + "step": 37826 + }, + { + "epoch": 1.7720054340188316, + "grad_norm": 0.5763993879989862, + "learning_rate": 1.6846899454904448e-07, + "loss": 0.2599, + "step": 37827 + }, + { + "epoch": 1.77205227900876, + "grad_norm": 0.5613430349472698, + "learning_rate": 1.6840056038209264e-07, + "loss": 0.2427, + "step": 37828 + }, + { + "epoch": 1.7720991239986883, + "grad_norm": 0.5817905268698457, + "learning_rate": 1.683321396329099e-07, + "loss": 0.2587, + "step": 37829 + }, + { + "epoch": 1.7721459689886165, + "grad_norm": 0.6035825149807565, + "learning_rate": 1.682637323018893e-07, + "loss": 0.257, + "step": 37830 + }, + { + "epoch": 1.772192813978545, + "grad_norm": 0.5644750666730017, + "learning_rate": 1.6819533838942526e-07, + "loss": 0.2749, + "step": 37831 + }, + { + "epoch": 1.7722396589684735, + "grad_norm": 0.6111621882741107, + "learning_rate": 1.6812695789591105e-07, + "loss": 0.2699, + "step": 37832 + }, + { + "epoch": 1.7722865039584017, + "grad_norm": 0.5827424047808537, + "learning_rate": 1.6805859082173997e-07, + "loss": 0.2773, + "step": 37833 + }, + { + "epoch": 1.77233334894833, + "grad_norm": 0.6572558792987646, + "learning_rate": 1.6799023716730563e-07, + "loss": 0.2612, + "step": 37834 + }, + { + "epoch": 1.7723801939382584, + "grad_norm": 0.591870100226032, + "learning_rate": 1.6792189693300127e-07, + "loss": 0.2753, + "step": 37835 + }, + { + "epoch": 1.7724270389281866, + "grad_norm": 0.5722738410491166, + "learning_rate": 1.6785357011922022e-07, + "loss": 0.2688, + "step": 37836 + }, + { + "epoch": 1.772473883918115, + "grad_norm": 0.5890908160913112, + "learning_rate": 1.677852567263555e-07, + "loss": 0.2758, + "step": 37837 + }, + { + "epoch": 1.7725207289080434, + "grad_norm": 0.6380430920001733, + "learning_rate": 1.6771695675480092e-07, + "loss": 0.2883, + "step": 37838 + }, + { + "epoch": 1.7725675738979716, + "grad_norm": 0.6170694537040943, + "learning_rate": 1.6764867020494874e-07, + "loss": 0.2777, + "step": 37839 + }, + { + "epoch": 1.7726144188878998, + "grad_norm": 0.5774276224747064, + "learning_rate": 1.675803970771922e-07, + "loss": 0.2589, + "step": 37840 + }, + { + "epoch": 1.7726612638778283, + "grad_norm": 0.6042119995136176, + "learning_rate": 1.6751213737192462e-07, + "loss": 0.2627, + "step": 37841 + }, + { + "epoch": 1.7727081088677568, + "grad_norm": 0.6314317213684905, + "learning_rate": 1.674438910895382e-07, + "loss": 0.2678, + "step": 37842 + }, + { + "epoch": 1.7727549538576848, + "grad_norm": 0.609979394478226, + "learning_rate": 1.673756582304259e-07, + "loss": 0.2885, + "step": 37843 + }, + { + "epoch": 1.7728017988476132, + "grad_norm": 0.622079404518883, + "learning_rate": 1.6730743879498053e-07, + "loss": 0.2723, + "step": 37844 + }, + { + "epoch": 1.7728486438375417, + "grad_norm": 0.6244793409019498, + "learning_rate": 1.672392327835945e-07, + "loss": 0.2515, + "step": 37845 + }, + { + "epoch": 1.77289548882747, + "grad_norm": 0.5943569542481268, + "learning_rate": 1.6717104019666003e-07, + "loss": 0.2578, + "step": 37846 + }, + { + "epoch": 1.7729423338173982, + "grad_norm": 0.5938048705233281, + "learning_rate": 1.6710286103457012e-07, + "loss": 0.2697, + "step": 37847 + }, + { + "epoch": 1.7729891788073266, + "grad_norm": 0.639225685276674, + "learning_rate": 1.6703469529771728e-07, + "loss": 0.2898, + "step": 37848 + }, + { + "epoch": 1.773036023797255, + "grad_norm": 0.6092429888098009, + "learning_rate": 1.669665429864928e-07, + "loss": 0.279, + "step": 37849 + }, + { + "epoch": 1.7730828687871831, + "grad_norm": 0.5783410203686101, + "learning_rate": 1.668984041012897e-07, + "loss": 0.2582, + "step": 37850 + }, + { + "epoch": 1.7731297137771116, + "grad_norm": 0.6196496732060505, + "learning_rate": 1.6683027864250024e-07, + "loss": 0.2826, + "step": 37851 + }, + { + "epoch": 1.7731765587670398, + "grad_norm": 0.5954748387418556, + "learning_rate": 1.667621666105157e-07, + "loss": 0.2741, + "step": 37852 + }, + { + "epoch": 1.773223403756968, + "grad_norm": 0.5920947602526128, + "learning_rate": 1.666940680057283e-07, + "loss": 0.2748, + "step": 37853 + }, + { + "epoch": 1.7732702487468965, + "grad_norm": 0.6267252840922974, + "learning_rate": 1.6662598282853077e-07, + "loss": 0.2827, + "step": 37854 + }, + { + "epoch": 1.773317093736825, + "grad_norm": 0.5728869793231456, + "learning_rate": 1.6655791107931362e-07, + "loss": 0.2683, + "step": 37855 + }, + { + "epoch": 1.773363938726753, + "grad_norm": 0.650927761264853, + "learning_rate": 1.6648985275846935e-07, + "loss": 0.3034, + "step": 37856 + }, + { + "epoch": 1.7734107837166815, + "grad_norm": 0.5765529655857268, + "learning_rate": 1.6642180786638984e-07, + "loss": 0.2624, + "step": 37857 + }, + { + "epoch": 1.77345762870661, + "grad_norm": 0.5876187476799513, + "learning_rate": 1.663537764034656e-07, + "loss": 0.2637, + "step": 37858 + }, + { + "epoch": 1.7735044736965382, + "grad_norm": 0.6109441275285842, + "learning_rate": 1.6628575837008914e-07, + "loss": 0.2955, + "step": 37859 + }, + { + "epoch": 1.7735513186864664, + "grad_norm": 0.6422941105266311, + "learning_rate": 1.662177537666515e-07, + "loss": 0.2797, + "step": 37860 + }, + { + "epoch": 1.773598163676395, + "grad_norm": 0.6071853908641984, + "learning_rate": 1.661497625935446e-07, + "loss": 0.2618, + "step": 37861 + }, + { + "epoch": 1.7736450086663231, + "grad_norm": 0.534851832632137, + "learning_rate": 1.6608178485115866e-07, + "loss": 0.2407, + "step": 37862 + }, + { + "epoch": 1.7736918536562514, + "grad_norm": 0.6000393779217544, + "learning_rate": 1.6601382053988618e-07, + "loss": 0.2771, + "step": 37863 + }, + { + "epoch": 1.7737386986461798, + "grad_norm": 0.6161459139274725, + "learning_rate": 1.6594586966011683e-07, + "loss": 0.2754, + "step": 37864 + }, + { + "epoch": 1.773785543636108, + "grad_norm": 0.6125643799502517, + "learning_rate": 1.6587793221224252e-07, + "loss": 0.2807, + "step": 37865 + }, + { + "epoch": 1.7738323886260363, + "grad_norm": 0.5800864977194662, + "learning_rate": 1.6581000819665376e-07, + "loss": 0.2626, + "step": 37866 + }, + { + "epoch": 1.7738792336159648, + "grad_norm": 0.5812867512005229, + "learning_rate": 1.6574209761374193e-07, + "loss": 0.2748, + "step": 37867 + }, + { + "epoch": 1.7739260786058932, + "grad_norm": 0.5935013039712974, + "learning_rate": 1.656742004638981e-07, + "loss": 0.2749, + "step": 37868 + }, + { + "epoch": 1.7739729235958213, + "grad_norm": 0.6073077234832308, + "learning_rate": 1.656063167475125e-07, + "loss": 0.2713, + "step": 37869 + }, + { + "epoch": 1.7740197685857497, + "grad_norm": 0.608291785138091, + "learning_rate": 1.6553844646497536e-07, + "loss": 0.2691, + "step": 37870 + }, + { + "epoch": 1.7740666135756782, + "grad_norm": 0.5625842099536085, + "learning_rate": 1.654705896166778e-07, + "loss": 0.257, + "step": 37871 + }, + { + "epoch": 1.7741134585656064, + "grad_norm": 0.5933952360458067, + "learning_rate": 1.6540274620301e-07, + "loss": 0.275, + "step": 37872 + }, + { + "epoch": 1.7741603035555347, + "grad_norm": 0.585869743333646, + "learning_rate": 1.653349162243628e-07, + "loss": 0.2765, + "step": 37873 + }, + { + "epoch": 1.7742071485454631, + "grad_norm": 0.6572425465768822, + "learning_rate": 1.6526709968112647e-07, + "loss": 0.2956, + "step": 37874 + }, + { + "epoch": 1.7742539935353914, + "grad_norm": 0.6423370577987821, + "learning_rate": 1.6519929657369148e-07, + "loss": 0.2846, + "step": 37875 + }, + { + "epoch": 1.7743008385253196, + "grad_norm": 0.5501928547944742, + "learning_rate": 1.6513150690244757e-07, + "loss": 0.2509, + "step": 37876 + }, + { + "epoch": 1.774347683515248, + "grad_norm": 0.5443535458949583, + "learning_rate": 1.6506373066778492e-07, + "loss": 0.2442, + "step": 37877 + }, + { + "epoch": 1.7743945285051763, + "grad_norm": 0.5990382245209955, + "learning_rate": 1.6499596787009325e-07, + "loss": 0.2836, + "step": 37878 + }, + { + "epoch": 1.7744413734951046, + "grad_norm": 0.602599992754074, + "learning_rate": 1.649282185097631e-07, + "loss": 0.2747, + "step": 37879 + }, + { + "epoch": 1.774488218485033, + "grad_norm": 0.5873620243388812, + "learning_rate": 1.6486048258718413e-07, + "loss": 0.2695, + "step": 37880 + }, + { + "epoch": 1.7745350634749615, + "grad_norm": 0.5812535000698495, + "learning_rate": 1.647927601027466e-07, + "loss": 0.2538, + "step": 37881 + }, + { + "epoch": 1.7745819084648897, + "grad_norm": 0.6150987192995312, + "learning_rate": 1.6472505105683934e-07, + "loss": 0.2726, + "step": 37882 + }, + { + "epoch": 1.774628753454818, + "grad_norm": 0.635653386996737, + "learning_rate": 1.646573554498529e-07, + "loss": 0.2774, + "step": 37883 + }, + { + "epoch": 1.7746755984447464, + "grad_norm": 0.6518480714008458, + "learning_rate": 1.6458967328217613e-07, + "loss": 0.286, + "step": 37884 + }, + { + "epoch": 1.7747224434346747, + "grad_norm": 0.6019921736729879, + "learning_rate": 1.6452200455419842e-07, + "loss": 0.2736, + "step": 37885 + }, + { + "epoch": 1.774769288424603, + "grad_norm": 0.6189652665771533, + "learning_rate": 1.6445434926631004e-07, + "loss": 0.2979, + "step": 37886 + }, + { + "epoch": 1.7748161334145314, + "grad_norm": 0.627698511814142, + "learning_rate": 1.6438670741889985e-07, + "loss": 0.2656, + "step": 37887 + }, + { + "epoch": 1.7748629784044596, + "grad_norm": 0.5915377585528719, + "learning_rate": 1.6431907901235694e-07, + "loss": 0.2719, + "step": 37888 + }, + { + "epoch": 1.7749098233943879, + "grad_norm": 0.5251736520083281, + "learning_rate": 1.6425146404707075e-07, + "loss": 0.2525, + "step": 37889 + }, + { + "epoch": 1.7749566683843163, + "grad_norm": 0.5893602241175879, + "learning_rate": 1.6418386252343072e-07, + "loss": 0.2542, + "step": 37890 + }, + { + "epoch": 1.7750035133742448, + "grad_norm": 0.597019428303946, + "learning_rate": 1.6411627444182483e-07, + "loss": 0.2758, + "step": 37891 + }, + { + "epoch": 1.7750503583641728, + "grad_norm": 0.6726573647859903, + "learning_rate": 1.6404869980264303e-07, + "loss": 0.2855, + "step": 37892 + }, + { + "epoch": 1.7750972033541013, + "grad_norm": 0.5966350275766984, + "learning_rate": 1.6398113860627396e-07, + "loss": 0.2788, + "step": 37893 + }, + { + "epoch": 1.7751440483440297, + "grad_norm": 0.5280921081344113, + "learning_rate": 1.6391359085310587e-07, + "loss": 0.2486, + "step": 37894 + }, + { + "epoch": 1.775190893333958, + "grad_norm": 0.616504165038833, + "learning_rate": 1.638460565435282e-07, + "loss": 0.2706, + "step": 37895 + }, + { + "epoch": 1.7752377383238862, + "grad_norm": 0.5887856819020405, + "learning_rate": 1.6377853567792923e-07, + "loss": 0.2671, + "step": 37896 + }, + { + "epoch": 1.7752845833138147, + "grad_norm": 0.6045085496403326, + "learning_rate": 1.6371102825669783e-07, + "loss": 0.2714, + "step": 37897 + }, + { + "epoch": 1.775331428303743, + "grad_norm": 0.634071193344318, + "learning_rate": 1.6364353428022201e-07, + "loss": 0.2819, + "step": 37898 + }, + { + "epoch": 1.7753782732936711, + "grad_norm": 0.5672275467905638, + "learning_rate": 1.6357605374889062e-07, + "loss": 0.2581, + "step": 37899 + }, + { + "epoch": 1.7754251182835996, + "grad_norm": 0.6248442615085186, + "learning_rate": 1.6350858666309145e-07, + "loss": 0.2727, + "step": 37900 + }, + { + "epoch": 1.7754719632735279, + "grad_norm": 0.6099207714116869, + "learning_rate": 1.634411330232133e-07, + "loss": 0.2766, + "step": 37901 + }, + { + "epoch": 1.775518808263456, + "grad_norm": 0.6304947641872413, + "learning_rate": 1.6337369282964393e-07, + "loss": 0.2775, + "step": 37902 + }, + { + "epoch": 1.7755656532533846, + "grad_norm": 0.6287716820951599, + "learning_rate": 1.6330626608277222e-07, + "loss": 0.2768, + "step": 37903 + }, + { + "epoch": 1.775612498243313, + "grad_norm": 0.6569055012443367, + "learning_rate": 1.6323885278298533e-07, + "loss": 0.2996, + "step": 37904 + }, + { + "epoch": 1.775659343233241, + "grad_norm": 0.5903150913095817, + "learning_rate": 1.631714529306719e-07, + "loss": 0.2684, + "step": 37905 + }, + { + "epoch": 1.7757061882231695, + "grad_norm": 0.6324902902062566, + "learning_rate": 1.6310406652621875e-07, + "loss": 0.2741, + "step": 37906 + }, + { + "epoch": 1.775753033213098, + "grad_norm": 0.6135096275590167, + "learning_rate": 1.630366935700145e-07, + "loss": 0.2636, + "step": 37907 + }, + { + "epoch": 1.7757998782030262, + "grad_norm": 0.6034717915496528, + "learning_rate": 1.6296933406244692e-07, + "loss": 0.2778, + "step": 37908 + }, + { + "epoch": 1.7758467231929544, + "grad_norm": 0.5935567308325921, + "learning_rate": 1.6290198800390318e-07, + "loss": 0.2753, + "step": 37909 + }, + { + "epoch": 1.775893568182883, + "grad_norm": 0.5845121810845227, + "learning_rate": 1.6283465539477154e-07, + "loss": 0.2734, + "step": 37910 + }, + { + "epoch": 1.7759404131728111, + "grad_norm": 0.6073708042311975, + "learning_rate": 1.62767336235439e-07, + "loss": 0.2524, + "step": 37911 + }, + { + "epoch": 1.7759872581627394, + "grad_norm": 0.5834150311086045, + "learning_rate": 1.627000305262927e-07, + "loss": 0.2687, + "step": 37912 + }, + { + "epoch": 1.7760341031526679, + "grad_norm": 0.6389840274160755, + "learning_rate": 1.626327382677201e-07, + "loss": 0.2777, + "step": 37913 + }, + { + "epoch": 1.776080948142596, + "grad_norm": 0.5822605319158737, + "learning_rate": 1.6256545946010867e-07, + "loss": 0.2653, + "step": 37914 + }, + { + "epoch": 1.7761277931325243, + "grad_norm": 0.55204292675602, + "learning_rate": 1.624981941038456e-07, + "loss": 0.2545, + "step": 37915 + }, + { + "epoch": 1.7761746381224528, + "grad_norm": 0.5987847258188335, + "learning_rate": 1.6243094219931782e-07, + "loss": 0.2683, + "step": 37916 + }, + { + "epoch": 1.7762214831123813, + "grad_norm": 0.6038685133591254, + "learning_rate": 1.6236370374691306e-07, + "loss": 0.2563, + "step": 37917 + }, + { + "epoch": 1.7762683281023095, + "grad_norm": 0.6053759513766405, + "learning_rate": 1.6229647874701744e-07, + "loss": 0.2755, + "step": 37918 + }, + { + "epoch": 1.7763151730922377, + "grad_norm": 0.6113962191251889, + "learning_rate": 1.622292672000178e-07, + "loss": 0.2612, + "step": 37919 + }, + { + "epoch": 1.7763620180821662, + "grad_norm": 0.6079891670393678, + "learning_rate": 1.6216206910630084e-07, + "loss": 0.2667, + "step": 37920 + }, + { + "epoch": 1.7764088630720944, + "grad_norm": 0.6075340494358235, + "learning_rate": 1.620948844662537e-07, + "loss": 0.2607, + "step": 37921 + }, + { + "epoch": 1.7764557080620227, + "grad_norm": 0.6222705200922883, + "learning_rate": 1.6202771328026308e-07, + "loss": 0.3013, + "step": 37922 + }, + { + "epoch": 1.7765025530519511, + "grad_norm": 0.6134997445835139, + "learning_rate": 1.6196055554871525e-07, + "loss": 0.2564, + "step": 37923 + }, + { + "epoch": 1.7765493980418794, + "grad_norm": 0.6157114216630549, + "learning_rate": 1.618934112719972e-07, + "loss": 0.2755, + "step": 37924 + }, + { + "epoch": 1.7765962430318076, + "grad_norm": 0.6245997522396304, + "learning_rate": 1.618262804504947e-07, + "loss": 0.2724, + "step": 37925 + }, + { + "epoch": 1.776643088021736, + "grad_norm": 0.5747900459609078, + "learning_rate": 1.617591630845941e-07, + "loss": 0.2545, + "step": 37926 + }, + { + "epoch": 1.7766899330116646, + "grad_norm": 0.561773816570675, + "learning_rate": 1.6169205917468177e-07, + "loss": 0.2747, + "step": 37927 + }, + { + "epoch": 1.7767367780015926, + "grad_norm": 0.5946029349602042, + "learning_rate": 1.616249687211438e-07, + "loss": 0.2654, + "step": 37928 + }, + { + "epoch": 1.776783622991521, + "grad_norm": 0.5916173615356457, + "learning_rate": 1.6155789172436653e-07, + "loss": 0.2609, + "step": 37929 + }, + { + "epoch": 1.7768304679814495, + "grad_norm": 0.6042823540600197, + "learning_rate": 1.6149082818473634e-07, + "loss": 0.2731, + "step": 37930 + }, + { + "epoch": 1.7768773129713777, + "grad_norm": 0.5963889450218373, + "learning_rate": 1.6142377810263815e-07, + "loss": 0.2614, + "step": 37931 + }, + { + "epoch": 1.776924157961306, + "grad_norm": 0.6542795040091743, + "learning_rate": 1.6135674147845892e-07, + "loss": 0.2929, + "step": 37932 + }, + { + "epoch": 1.7769710029512344, + "grad_norm": 0.6052528555378328, + "learning_rate": 1.6128971831258305e-07, + "loss": 0.2503, + "step": 37933 + }, + { + "epoch": 1.7770178479411627, + "grad_norm": 0.6211704716871033, + "learning_rate": 1.6122270860539746e-07, + "loss": 0.277, + "step": 37934 + }, + { + "epoch": 1.777064692931091, + "grad_norm": 0.6075229635634197, + "learning_rate": 1.611557123572871e-07, + "loss": 0.2841, + "step": 37935 + }, + { + "epoch": 1.7771115379210194, + "grad_norm": 0.5932371464248594, + "learning_rate": 1.6108872956863835e-07, + "loss": 0.2577, + "step": 37936 + }, + { + "epoch": 1.7771583829109476, + "grad_norm": 0.6326282607353688, + "learning_rate": 1.6102176023983534e-07, + "loss": 0.2667, + "step": 37937 + }, + { + "epoch": 1.7772052279008759, + "grad_norm": 0.6281391871935897, + "learning_rate": 1.609548043712647e-07, + "loss": 0.278, + "step": 37938 + }, + { + "epoch": 1.7772520728908043, + "grad_norm": 0.5581771336327183, + "learning_rate": 1.608878619633114e-07, + "loss": 0.2701, + "step": 37939 + }, + { + "epoch": 1.7772989178807328, + "grad_norm": 0.6090458314977749, + "learning_rate": 1.6082093301636015e-07, + "loss": 0.2669, + "step": 37940 + }, + { + "epoch": 1.7773457628706608, + "grad_norm": 0.56811731106069, + "learning_rate": 1.6075401753079645e-07, + "loss": 0.269, + "step": 37941 + }, + { + "epoch": 1.7773926078605893, + "grad_norm": 0.5986014887475023, + "learning_rate": 1.6068711550700582e-07, + "loss": 0.3006, + "step": 37942 + }, + { + "epoch": 1.7774394528505177, + "grad_norm": 0.5838917985643465, + "learning_rate": 1.6062022694537272e-07, + "loss": 0.2812, + "step": 37943 + }, + { + "epoch": 1.777486297840446, + "grad_norm": 0.6044126222265839, + "learning_rate": 1.6055335184628208e-07, + "loss": 0.2715, + "step": 37944 + }, + { + "epoch": 1.7775331428303742, + "grad_norm": 0.5985787216431941, + "learning_rate": 1.6048649021011886e-07, + "loss": 0.265, + "step": 37945 + }, + { + "epoch": 1.7775799878203027, + "grad_norm": 0.6013064627893623, + "learning_rate": 1.6041964203726834e-07, + "loss": 0.2744, + "step": 37946 + }, + { + "epoch": 1.777626832810231, + "grad_norm": 0.5878801899765634, + "learning_rate": 1.603528073281141e-07, + "loss": 0.2683, + "step": 37947 + }, + { + "epoch": 1.7776736778001592, + "grad_norm": 0.6218757347531121, + "learning_rate": 1.602859860830422e-07, + "loss": 0.2785, + "step": 37948 + }, + { + "epoch": 1.7777205227900876, + "grad_norm": 0.6251858877571866, + "learning_rate": 1.6021917830243565e-07, + "loss": 0.2766, + "step": 37949 + }, + { + "epoch": 1.7777673677800159, + "grad_norm": 0.5627418243978226, + "learning_rate": 1.6015238398667975e-07, + "loss": 0.2487, + "step": 37950 + }, + { + "epoch": 1.7778142127699441, + "grad_norm": 0.6550097697707132, + "learning_rate": 1.600856031361589e-07, + "loss": 0.2825, + "step": 37951 + }, + { + "epoch": 1.7778610577598726, + "grad_norm": 0.6050335387501906, + "learning_rate": 1.6001883575125775e-07, + "loss": 0.2864, + "step": 37952 + }, + { + "epoch": 1.777907902749801, + "grad_norm": 0.6173878402034086, + "learning_rate": 1.5995208183235938e-07, + "loss": 0.275, + "step": 37953 + }, + { + "epoch": 1.7779547477397293, + "grad_norm": 0.5751024448128396, + "learning_rate": 1.598853413798493e-07, + "loss": 0.2518, + "step": 37954 + }, + { + "epoch": 1.7780015927296575, + "grad_norm": 0.5663729407895611, + "learning_rate": 1.598186143941105e-07, + "loss": 0.2556, + "step": 37955 + }, + { + "epoch": 1.778048437719586, + "grad_norm": 0.6025490786814031, + "learning_rate": 1.5975190087552716e-07, + "loss": 0.2732, + "step": 37956 + }, + { + "epoch": 1.7780952827095142, + "grad_norm": 0.555664491803194, + "learning_rate": 1.596852008244837e-07, + "loss": 0.2562, + "step": 37957 + }, + { + "epoch": 1.7781421276994425, + "grad_norm": 0.6448857040430768, + "learning_rate": 1.5961851424136337e-07, + "loss": 0.2678, + "step": 37958 + }, + { + "epoch": 1.778188972689371, + "grad_norm": 0.5944984316769057, + "learning_rate": 1.595518411265509e-07, + "loss": 0.2636, + "step": 37959 + }, + { + "epoch": 1.7782358176792992, + "grad_norm": 0.5882706355887463, + "learning_rate": 1.5948518148042936e-07, + "loss": 0.2912, + "step": 37960 + }, + { + "epoch": 1.7782826626692274, + "grad_norm": 0.5836227463591972, + "learning_rate": 1.594185353033817e-07, + "loss": 0.2814, + "step": 37961 + }, + { + "epoch": 1.7783295076591559, + "grad_norm": 0.6092964072325927, + "learning_rate": 1.5935190259579236e-07, + "loss": 0.2672, + "step": 37962 + }, + { + "epoch": 1.7783763526490843, + "grad_norm": 0.6062494297530214, + "learning_rate": 1.5928528335804438e-07, + "loss": 0.2847, + "step": 37963 + }, + { + "epoch": 1.7784231976390124, + "grad_norm": 0.5850115024444774, + "learning_rate": 1.5921867759052134e-07, + "loss": 0.2592, + "step": 37964 + }, + { + "epoch": 1.7784700426289408, + "grad_norm": 0.6170973029516706, + "learning_rate": 1.5915208529360654e-07, + "loss": 0.276, + "step": 37965 + }, + { + "epoch": 1.7785168876188693, + "grad_norm": 0.537748813932748, + "learning_rate": 1.590855064676833e-07, + "loss": 0.2526, + "step": 37966 + }, + { + "epoch": 1.7785637326087975, + "grad_norm": 0.5921729937343525, + "learning_rate": 1.5901894111313488e-07, + "loss": 0.2589, + "step": 37967 + }, + { + "epoch": 1.7786105775987258, + "grad_norm": 0.5875952635222228, + "learning_rate": 1.589523892303435e-07, + "loss": 0.2675, + "step": 37968 + }, + { + "epoch": 1.7786574225886542, + "grad_norm": 0.6253639965624977, + "learning_rate": 1.5888585081969276e-07, + "loss": 0.2713, + "step": 37969 + }, + { + "epoch": 1.7787042675785825, + "grad_norm": 0.5489215755456472, + "learning_rate": 1.588193258815654e-07, + "loss": 0.2607, + "step": 37970 + }, + { + "epoch": 1.7787511125685107, + "grad_norm": 0.5960042019350618, + "learning_rate": 1.5875281441634444e-07, + "loss": 0.2761, + "step": 37971 + }, + { + "epoch": 1.7787979575584392, + "grad_norm": 0.6131201516864264, + "learning_rate": 1.5868631642441317e-07, + "loss": 0.2746, + "step": 37972 + }, + { + "epoch": 1.7788448025483674, + "grad_norm": 0.6212964184194977, + "learning_rate": 1.5861983190615328e-07, + "loss": 0.2702, + "step": 37973 + }, + { + "epoch": 1.7788916475382957, + "grad_norm": 0.6346339526946518, + "learning_rate": 1.58553360861948e-07, + "loss": 0.2682, + "step": 37974 + }, + { + "epoch": 1.7789384925282241, + "grad_norm": 0.6228800047586196, + "learning_rate": 1.584869032921793e-07, + "loss": 0.2837, + "step": 37975 + }, + { + "epoch": 1.7789853375181526, + "grad_norm": 0.6322770910885381, + "learning_rate": 1.5842045919722994e-07, + "loss": 0.262, + "step": 37976 + }, + { + "epoch": 1.7790321825080806, + "grad_norm": 0.5924575656551945, + "learning_rate": 1.5835402857748233e-07, + "loss": 0.2603, + "step": 37977 + }, + { + "epoch": 1.779079027498009, + "grad_norm": 0.6006356662408132, + "learning_rate": 1.58287611433319e-07, + "loss": 0.2718, + "step": 37978 + }, + { + "epoch": 1.7791258724879375, + "grad_norm": 0.5925794487456139, + "learning_rate": 1.5822120776512156e-07, + "loss": 0.2683, + "step": 37979 + }, + { + "epoch": 1.7791727174778658, + "grad_norm": 0.6080445582322626, + "learning_rate": 1.581548175732725e-07, + "loss": 0.275, + "step": 37980 + }, + { + "epoch": 1.779219562467794, + "grad_norm": 0.5843818369302138, + "learning_rate": 1.580884408581543e-07, + "loss": 0.2652, + "step": 37981 + }, + { + "epoch": 1.7792664074577225, + "grad_norm": 0.6216186644609523, + "learning_rate": 1.5802207762014826e-07, + "loss": 0.2709, + "step": 37982 + }, + { + "epoch": 1.7793132524476507, + "grad_norm": 0.6366854707181099, + "learning_rate": 1.5795572785963638e-07, + "loss": 0.2837, + "step": 37983 + }, + { + "epoch": 1.779360097437579, + "grad_norm": 0.619227305944626, + "learning_rate": 1.5788939157700078e-07, + "loss": 0.2724, + "step": 37984 + }, + { + "epoch": 1.7794069424275074, + "grad_norm": 0.5575737250524688, + "learning_rate": 1.5782306877262315e-07, + "loss": 0.2642, + "step": 37985 + }, + { + "epoch": 1.7794537874174357, + "grad_norm": 0.5444287172545742, + "learning_rate": 1.5775675944688484e-07, + "loss": 0.2514, + "step": 37986 + }, + { + "epoch": 1.779500632407364, + "grad_norm": 0.5801776742327316, + "learning_rate": 1.576904636001675e-07, + "loss": 0.256, + "step": 37987 + }, + { + "epoch": 1.7795474773972924, + "grad_norm": 0.578825614829115, + "learning_rate": 1.5762418123285357e-07, + "loss": 0.2655, + "step": 37988 + }, + { + "epoch": 1.7795943223872208, + "grad_norm": 0.668234356367765, + "learning_rate": 1.5755791234532304e-07, + "loss": 0.2783, + "step": 37989 + }, + { + "epoch": 1.779641167377149, + "grad_norm": 0.5885937236001076, + "learning_rate": 1.5749165693795808e-07, + "loss": 0.2608, + "step": 37990 + }, + { + "epoch": 1.7796880123670773, + "grad_norm": 0.6088984201836148, + "learning_rate": 1.5742541501114038e-07, + "loss": 0.277, + "step": 37991 + }, + { + "epoch": 1.7797348573570058, + "grad_norm": 0.5787177136644616, + "learning_rate": 1.5735918656525017e-07, + "loss": 0.2522, + "step": 37992 + }, + { + "epoch": 1.779781702346934, + "grad_norm": 0.5492666460042986, + "learning_rate": 1.5729297160066908e-07, + "loss": 0.2477, + "step": 37993 + }, + { + "epoch": 1.7798285473368622, + "grad_norm": 0.6144201195236911, + "learning_rate": 1.572267701177782e-07, + "loss": 0.2878, + "step": 37994 + }, + { + "epoch": 1.7798753923267907, + "grad_norm": 0.5915890356907045, + "learning_rate": 1.5716058211695834e-07, + "loss": 0.2723, + "step": 37995 + }, + { + "epoch": 1.779922237316719, + "grad_norm": 0.6456989095894828, + "learning_rate": 1.5709440759859057e-07, + "loss": 0.2866, + "step": 37996 + }, + { + "epoch": 1.7799690823066472, + "grad_norm": 0.6030088890706798, + "learning_rate": 1.5702824656305572e-07, + "loss": 0.2755, + "step": 37997 + }, + { + "epoch": 1.7800159272965757, + "grad_norm": 0.5983845998340355, + "learning_rate": 1.5696209901073428e-07, + "loss": 0.2657, + "step": 37998 + }, + { + "epoch": 1.7800627722865041, + "grad_norm": 0.5928858326875779, + "learning_rate": 1.5689596494200681e-07, + "loss": 0.2648, + "step": 37999 + }, + { + "epoch": 1.7801096172764321, + "grad_norm": 0.6166189682057953, + "learning_rate": 1.568298443572544e-07, + "loss": 0.2761, + "step": 38000 + }, + { + "epoch": 1.7801564622663606, + "grad_norm": 0.580079412016338, + "learning_rate": 1.5676373725685757e-07, + "loss": 0.261, + "step": 38001 + }, + { + "epoch": 1.780203307256289, + "grad_norm": 0.60213193593446, + "learning_rate": 1.56697643641196e-07, + "loss": 0.2697, + "step": 38002 + }, + { + "epoch": 1.7802501522462173, + "grad_norm": 0.5757642454329598, + "learning_rate": 1.5663156351065107e-07, + "loss": 0.2615, + "step": 38003 + }, + { + "epoch": 1.7802969972361455, + "grad_norm": 0.6101480153227226, + "learning_rate": 1.5656549686560218e-07, + "loss": 0.274, + "step": 38004 + }, + { + "epoch": 1.780343842226074, + "grad_norm": 0.6401110825431335, + "learning_rate": 1.564994437064296e-07, + "loss": 0.2826, + "step": 38005 + }, + { + "epoch": 1.7803906872160022, + "grad_norm": 0.5934208505598997, + "learning_rate": 1.5643340403351386e-07, + "loss": 0.2602, + "step": 38006 + }, + { + "epoch": 1.7804375322059305, + "grad_norm": 0.5975284187046815, + "learning_rate": 1.5636737784723494e-07, + "loss": 0.2741, + "step": 38007 + }, + { + "epoch": 1.780484377195859, + "grad_norm": 0.5931255515371775, + "learning_rate": 1.5630136514797305e-07, + "loss": 0.2745, + "step": 38008 + }, + { + "epoch": 1.7805312221857872, + "grad_norm": 0.6295799759216628, + "learning_rate": 1.5623536593610767e-07, + "loss": 0.2835, + "step": 38009 + }, + { + "epoch": 1.7805780671757154, + "grad_norm": 0.6143423274329367, + "learning_rate": 1.5616938021201817e-07, + "loss": 0.2769, + "step": 38010 + }, + { + "epoch": 1.780624912165644, + "grad_norm": 0.6105267570576068, + "learning_rate": 1.5610340797608509e-07, + "loss": 0.2776, + "step": 38011 + }, + { + "epoch": 1.7806717571555724, + "grad_norm": 0.6192699031025695, + "learning_rate": 1.5603744922868756e-07, + "loss": 0.2695, + "step": 38012 + }, + { + "epoch": 1.7807186021455004, + "grad_norm": 0.5795005829068207, + "learning_rate": 1.5597150397020534e-07, + "loss": 0.2492, + "step": 38013 + }, + { + "epoch": 1.7807654471354288, + "grad_norm": 0.5793664396936741, + "learning_rate": 1.5590557220101805e-07, + "loss": 0.2586, + "step": 38014 + }, + { + "epoch": 1.7808122921253573, + "grad_norm": 0.5950969818553541, + "learning_rate": 1.5583965392150542e-07, + "loss": 0.2703, + "step": 38015 + }, + { + "epoch": 1.7808591371152855, + "grad_norm": 0.6150221296067603, + "learning_rate": 1.5577374913204658e-07, + "loss": 0.2716, + "step": 38016 + }, + { + "epoch": 1.7809059821052138, + "grad_norm": 0.6181197381250935, + "learning_rate": 1.5570785783301984e-07, + "loss": 0.2776, + "step": 38017 + }, + { + "epoch": 1.7809528270951422, + "grad_norm": 0.5943529728510302, + "learning_rate": 1.5564198002480546e-07, + "loss": 0.271, + "step": 38018 + }, + { + "epoch": 1.7809996720850705, + "grad_norm": 0.5978042978268069, + "learning_rate": 1.5557611570778204e-07, + "loss": 0.2754, + "step": 38019 + }, + { + "epoch": 1.7810465170749987, + "grad_norm": 0.5969549444102797, + "learning_rate": 1.5551026488232894e-07, + "loss": 0.2686, + "step": 38020 + }, + { + "epoch": 1.7810933620649272, + "grad_norm": 0.6289129900202873, + "learning_rate": 1.5544442754882538e-07, + "loss": 0.2693, + "step": 38021 + }, + { + "epoch": 1.7811402070548554, + "grad_norm": 0.608646464399686, + "learning_rate": 1.5537860370764962e-07, + "loss": 0.2761, + "step": 38022 + }, + { + "epoch": 1.7811870520447837, + "grad_norm": 0.5636033159290336, + "learning_rate": 1.5531279335918083e-07, + "loss": 0.2541, + "step": 38023 + }, + { + "epoch": 1.7812338970347121, + "grad_norm": 0.5900539310907715, + "learning_rate": 1.5524699650379728e-07, + "loss": 0.2516, + "step": 38024 + }, + { + "epoch": 1.7812807420246406, + "grad_norm": 0.6484713710323217, + "learning_rate": 1.5518121314187812e-07, + "loss": 0.2773, + "step": 38025 + }, + { + "epoch": 1.7813275870145688, + "grad_norm": 0.5575344930435725, + "learning_rate": 1.5511544327380167e-07, + "loss": 0.2758, + "step": 38026 + }, + { + "epoch": 1.781374432004497, + "grad_norm": 0.591698687338854, + "learning_rate": 1.5504968689994654e-07, + "loss": 0.2647, + "step": 38027 + }, + { + "epoch": 1.7814212769944255, + "grad_norm": 0.5947312716100012, + "learning_rate": 1.54983944020691e-07, + "loss": 0.2819, + "step": 38028 + }, + { + "epoch": 1.7814681219843538, + "grad_norm": 0.5672881928311581, + "learning_rate": 1.5491821463641338e-07, + "loss": 0.2518, + "step": 38029 + }, + { + "epoch": 1.781514966974282, + "grad_norm": 0.5643104539334964, + "learning_rate": 1.5485249874749254e-07, + "loss": 0.2759, + "step": 38030 + }, + { + "epoch": 1.7815618119642105, + "grad_norm": 0.6151668619686615, + "learning_rate": 1.5478679635430565e-07, + "loss": 0.2719, + "step": 38031 + }, + { + "epoch": 1.7816086569541387, + "grad_norm": 0.6079525906888819, + "learning_rate": 1.5472110745723134e-07, + "loss": 0.2535, + "step": 38032 + }, + { + "epoch": 1.781655501944067, + "grad_norm": 0.5965951677459553, + "learning_rate": 1.546554320566479e-07, + "loss": 0.2713, + "step": 38033 + }, + { + "epoch": 1.7817023469339954, + "grad_norm": 0.5806070518493215, + "learning_rate": 1.5458977015293254e-07, + "loss": 0.2553, + "step": 38034 + }, + { + "epoch": 1.781749191923924, + "grad_norm": 0.5602976221159381, + "learning_rate": 1.5452412174646354e-07, + "loss": 0.263, + "step": 38035 + }, + { + "epoch": 1.781796036913852, + "grad_norm": 0.5725018625602981, + "learning_rate": 1.5445848683761893e-07, + "loss": 0.2663, + "step": 38036 + }, + { + "epoch": 1.7818428819037804, + "grad_norm": 0.5697245241093667, + "learning_rate": 1.543928654267765e-07, + "loss": 0.255, + "step": 38037 + }, + { + "epoch": 1.7818897268937088, + "grad_norm": 0.6346203333407517, + "learning_rate": 1.5432725751431315e-07, + "loss": 0.2641, + "step": 38038 + }, + { + "epoch": 1.781936571883637, + "grad_norm": 0.6290273077801325, + "learning_rate": 1.542616631006072e-07, + "loss": 0.2861, + "step": 38039 + }, + { + "epoch": 1.7819834168735653, + "grad_norm": 0.6606936623074205, + "learning_rate": 1.5419608218603553e-07, + "loss": 0.268, + "step": 38040 + }, + { + "epoch": 1.7820302618634938, + "grad_norm": 0.6125867936001722, + "learning_rate": 1.541305147709757e-07, + "loss": 0.2571, + "step": 38041 + }, + { + "epoch": 1.782077106853422, + "grad_norm": 0.6512276955543073, + "learning_rate": 1.5406496085580507e-07, + "loss": 0.29, + "step": 38042 + }, + { + "epoch": 1.7821239518433503, + "grad_norm": 0.6056231594979246, + "learning_rate": 1.539994204409015e-07, + "loss": 0.2573, + "step": 38043 + }, + { + "epoch": 1.7821707968332787, + "grad_norm": 0.5568724945930658, + "learning_rate": 1.539338935266413e-07, + "loss": 0.2522, + "step": 38044 + }, + { + "epoch": 1.782217641823207, + "grad_norm": 0.6191429772783594, + "learning_rate": 1.5386838011340221e-07, + "loss": 0.274, + "step": 38045 + }, + { + "epoch": 1.7822644868131352, + "grad_norm": 0.5943786519084684, + "learning_rate": 1.5380288020156036e-07, + "loss": 0.2725, + "step": 38046 + }, + { + "epoch": 1.7823113318030637, + "grad_norm": 0.6237245427602531, + "learning_rate": 1.537373937914935e-07, + "loss": 0.2657, + "step": 38047 + }, + { + "epoch": 1.7823581767929921, + "grad_norm": 0.6083077668847642, + "learning_rate": 1.5367192088357797e-07, + "loss": 0.2671, + "step": 38048 + }, + { + "epoch": 1.7824050217829202, + "grad_norm": 0.6166667566904392, + "learning_rate": 1.5360646147819098e-07, + "loss": 0.2667, + "step": 38049 + }, + { + "epoch": 1.7824518667728486, + "grad_norm": 0.6078005503696343, + "learning_rate": 1.5354101557570943e-07, + "loss": 0.269, + "step": 38050 + }, + { + "epoch": 1.782498711762777, + "grad_norm": 0.599319399993954, + "learning_rate": 1.5347558317650945e-07, + "loss": 0.262, + "step": 38051 + }, + { + "epoch": 1.7825455567527053, + "grad_norm": 0.524727349130041, + "learning_rate": 1.5341016428096767e-07, + "loss": 0.2504, + "step": 38052 + }, + { + "epoch": 1.7825924017426336, + "grad_norm": 0.590655499908429, + "learning_rate": 1.5334475888946016e-07, + "loss": 0.2714, + "step": 38053 + }, + { + "epoch": 1.782639246732562, + "grad_norm": 0.6265935726407427, + "learning_rate": 1.532793670023641e-07, + "loss": 0.2768, + "step": 38054 + }, + { + "epoch": 1.7826860917224903, + "grad_norm": 0.5721034152977097, + "learning_rate": 1.532139886200551e-07, + "loss": 0.2585, + "step": 38055 + }, + { + "epoch": 1.7827329367124185, + "grad_norm": 0.596400391342132, + "learning_rate": 1.5314862374291002e-07, + "loss": 0.2676, + "step": 38056 + }, + { + "epoch": 1.782779781702347, + "grad_norm": 0.6016224064351876, + "learning_rate": 1.530832723713052e-07, + "loss": 0.2716, + "step": 38057 + }, + { + "epoch": 1.7828266266922752, + "grad_norm": 0.628737608609407, + "learning_rate": 1.5301793450561596e-07, + "loss": 0.2667, + "step": 38058 + }, + { + "epoch": 1.7828734716822034, + "grad_norm": 0.5848002476685906, + "learning_rate": 1.5295261014621866e-07, + "loss": 0.2545, + "step": 38059 + }, + { + "epoch": 1.782920316672132, + "grad_norm": 0.5673546524984748, + "learning_rate": 1.5288729929348877e-07, + "loss": 0.2548, + "step": 38060 + }, + { + "epoch": 1.7829671616620604, + "grad_norm": 0.6526781681299405, + "learning_rate": 1.5282200194780273e-07, + "loss": 0.2808, + "step": 38061 + }, + { + "epoch": 1.7830140066519886, + "grad_norm": 0.6080325505213761, + "learning_rate": 1.527567181095363e-07, + "loss": 0.2674, + "step": 38062 + }, + { + "epoch": 1.7830608516419169, + "grad_norm": 0.6168339472041814, + "learning_rate": 1.5269144777906475e-07, + "loss": 0.2716, + "step": 38063 + }, + { + "epoch": 1.7831076966318453, + "grad_norm": 0.5919876750851688, + "learning_rate": 1.5262619095676446e-07, + "loss": 0.2708, + "step": 38064 + }, + { + "epoch": 1.7831545416217736, + "grad_norm": 0.6219312590566947, + "learning_rate": 1.5256094764301038e-07, + "loss": 0.2747, + "step": 38065 + }, + { + "epoch": 1.7832013866117018, + "grad_norm": 0.6315632705764648, + "learning_rate": 1.524957178381775e-07, + "loss": 0.2683, + "step": 38066 + }, + { + "epoch": 1.7832482316016303, + "grad_norm": 0.5674111449870948, + "learning_rate": 1.5243050154264193e-07, + "loss": 0.2669, + "step": 38067 + }, + { + "epoch": 1.7832950765915585, + "grad_norm": 0.5869298191805978, + "learning_rate": 1.5236529875677865e-07, + "loss": 0.2649, + "step": 38068 + }, + { + "epoch": 1.7833419215814867, + "grad_norm": 0.697651076934788, + "learning_rate": 1.5230010948096313e-07, + "loss": 0.2921, + "step": 38069 + }, + { + "epoch": 1.7833887665714152, + "grad_norm": 0.583607720022069, + "learning_rate": 1.5223493371557095e-07, + "loss": 0.2737, + "step": 38070 + }, + { + "epoch": 1.7834356115613437, + "grad_norm": 0.5883712205685228, + "learning_rate": 1.52169771460976e-07, + "loss": 0.2527, + "step": 38071 + }, + { + "epoch": 1.7834824565512717, + "grad_norm": 0.6017195002277139, + "learning_rate": 1.5210462271755433e-07, + "loss": 0.2724, + "step": 38072 + }, + { + "epoch": 1.7835293015412002, + "grad_norm": 0.6337800959931402, + "learning_rate": 1.5203948748568008e-07, + "loss": 0.2907, + "step": 38073 + }, + { + "epoch": 1.7835761465311286, + "grad_norm": 0.6006914327496085, + "learning_rate": 1.5197436576572854e-07, + "loss": 0.2836, + "step": 38074 + }, + { + "epoch": 1.7836229915210569, + "grad_norm": 0.5917978339571701, + "learning_rate": 1.519092575580744e-07, + "loss": 0.2752, + "step": 38075 + }, + { + "epoch": 1.783669836510985, + "grad_norm": 0.565151148854537, + "learning_rate": 1.518441628630926e-07, + "loss": 0.2601, + "step": 38076 + }, + { + "epoch": 1.7837166815009136, + "grad_norm": 0.5757303468241534, + "learning_rate": 1.5177908168115706e-07, + "loss": 0.2749, + "step": 38077 + }, + { + "epoch": 1.7837635264908418, + "grad_norm": 0.6218451225570539, + "learning_rate": 1.5171401401264274e-07, + "loss": 0.2653, + "step": 38078 + }, + { + "epoch": 1.78381037148077, + "grad_norm": 0.5788509943584452, + "learning_rate": 1.5164895985792433e-07, + "loss": 0.2593, + "step": 38079 + }, + { + "epoch": 1.7838572164706985, + "grad_norm": 0.5661023544399589, + "learning_rate": 1.515839192173757e-07, + "loss": 0.2706, + "step": 38080 + }, + { + "epoch": 1.7839040614606267, + "grad_norm": 0.5886718883921805, + "learning_rate": 1.5151889209137127e-07, + "loss": 0.2704, + "step": 38081 + }, + { + "epoch": 1.783950906450555, + "grad_norm": 0.6160746284532619, + "learning_rate": 1.5145387848028574e-07, + "loss": 0.2738, + "step": 38082 + }, + { + "epoch": 1.7839977514404834, + "grad_norm": 0.5345392368073162, + "learning_rate": 1.5138887838449246e-07, + "loss": 0.2548, + "step": 38083 + }, + { + "epoch": 1.784044596430412, + "grad_norm": 0.5778556537589319, + "learning_rate": 1.513238918043658e-07, + "loss": 0.2524, + "step": 38084 + }, + { + "epoch": 1.78409144142034, + "grad_norm": 0.6031757530596413, + "learning_rate": 1.5125891874027991e-07, + "loss": 0.2814, + "step": 38085 + }, + { + "epoch": 1.7841382864102684, + "grad_norm": 0.5918311876645221, + "learning_rate": 1.5119395919260897e-07, + "loss": 0.2721, + "step": 38086 + }, + { + "epoch": 1.7841851314001969, + "grad_norm": 0.582379290794651, + "learning_rate": 1.51129013161726e-07, + "loss": 0.2766, + "step": 38087 + }, + { + "epoch": 1.784231976390125, + "grad_norm": 0.6092442541404469, + "learning_rate": 1.510640806480057e-07, + "loss": 0.2676, + "step": 38088 + }, + { + "epoch": 1.7842788213800533, + "grad_norm": 0.6223955110306344, + "learning_rate": 1.5099916165182083e-07, + "loss": 0.2975, + "step": 38089 + }, + { + "epoch": 1.7843256663699818, + "grad_norm": 0.5805139797102253, + "learning_rate": 1.509342561735455e-07, + "loss": 0.2742, + "step": 38090 + }, + { + "epoch": 1.78437251135991, + "grad_norm": 0.5377801435511714, + "learning_rate": 1.508693642135528e-07, + "loss": 0.2538, + "step": 38091 + }, + { + "epoch": 1.7844193563498383, + "grad_norm": 0.5864933638666162, + "learning_rate": 1.508044857722171e-07, + "loss": 0.2755, + "step": 38092 + }, + { + "epoch": 1.7844662013397667, + "grad_norm": 0.6087443501789791, + "learning_rate": 1.5073962084991067e-07, + "loss": 0.2649, + "step": 38093 + }, + { + "epoch": 1.784513046329695, + "grad_norm": 0.5986746392202507, + "learning_rate": 1.506747694470076e-07, + "loss": 0.2566, + "step": 38094 + }, + { + "epoch": 1.7845598913196232, + "grad_norm": 0.5673952662429722, + "learning_rate": 1.5060993156388064e-07, + "loss": 0.2709, + "step": 38095 + }, + { + "epoch": 1.7846067363095517, + "grad_norm": 0.5670551457306957, + "learning_rate": 1.505451072009026e-07, + "loss": 0.2595, + "step": 38096 + }, + { + "epoch": 1.7846535812994802, + "grad_norm": 0.5982152530863853, + "learning_rate": 1.504802963584473e-07, + "loss": 0.2842, + "step": 38097 + }, + { + "epoch": 1.7847004262894084, + "grad_norm": 0.612029063532133, + "learning_rate": 1.5041549903688722e-07, + "loss": 0.277, + "step": 38098 + }, + { + "epoch": 1.7847472712793366, + "grad_norm": 0.6261259723930435, + "learning_rate": 1.5035071523659572e-07, + "loss": 0.2799, + "step": 38099 + }, + { + "epoch": 1.784794116269265, + "grad_norm": 0.5617749464228248, + "learning_rate": 1.5028594495794551e-07, + "loss": 0.2566, + "step": 38100 + }, + { + "epoch": 1.7848409612591933, + "grad_norm": 0.6083079622965107, + "learning_rate": 1.5022118820130855e-07, + "loss": 0.2829, + "step": 38101 + }, + { + "epoch": 1.7848878062491216, + "grad_norm": 0.6259246661811191, + "learning_rate": 1.501564449670581e-07, + "loss": 0.2742, + "step": 38102 + }, + { + "epoch": 1.78493465123905, + "grad_norm": 0.5672802870039051, + "learning_rate": 1.5009171525556672e-07, + "loss": 0.2678, + "step": 38103 + }, + { + "epoch": 1.7849814962289783, + "grad_norm": 0.6295507144613856, + "learning_rate": 1.500269990672068e-07, + "loss": 0.2826, + "step": 38104 + }, + { + "epoch": 1.7850283412189065, + "grad_norm": 0.6251612062564228, + "learning_rate": 1.4996229640235089e-07, + "loss": 0.2734, + "step": 38105 + }, + { + "epoch": 1.785075186208835, + "grad_norm": 0.6058901452405481, + "learning_rate": 1.498976072613717e-07, + "loss": 0.2758, + "step": 38106 + }, + { + "epoch": 1.7851220311987634, + "grad_norm": 0.5534336544955621, + "learning_rate": 1.4983293164464118e-07, + "loss": 0.2402, + "step": 38107 + }, + { + "epoch": 1.7851688761886915, + "grad_norm": 0.5917344810810686, + "learning_rate": 1.49768269552531e-07, + "loss": 0.2722, + "step": 38108 + }, + { + "epoch": 1.78521572117862, + "grad_norm": 0.5810495313958781, + "learning_rate": 1.4970362098541357e-07, + "loss": 0.2636, + "step": 38109 + }, + { + "epoch": 1.7852625661685484, + "grad_norm": 0.597170066126833, + "learning_rate": 1.4963898594366144e-07, + "loss": 0.2729, + "step": 38110 + }, + { + "epoch": 1.7853094111584766, + "grad_norm": 0.6023018747686129, + "learning_rate": 1.4957436442764595e-07, + "loss": 0.276, + "step": 38111 + }, + { + "epoch": 1.7853562561484049, + "grad_norm": 0.600680043366601, + "learning_rate": 1.4950975643773957e-07, + "loss": 0.271, + "step": 38112 + }, + { + "epoch": 1.7854031011383333, + "grad_norm": 0.5979528736162424, + "learning_rate": 1.494451619743134e-07, + "loss": 0.27, + "step": 38113 + }, + { + "epoch": 1.7854499461282616, + "grad_norm": 0.5650137150404628, + "learning_rate": 1.4938058103774024e-07, + "loss": 0.2475, + "step": 38114 + }, + { + "epoch": 1.7854967911181898, + "grad_norm": 0.641268520286322, + "learning_rate": 1.4931601362839027e-07, + "loss": 0.2859, + "step": 38115 + }, + { + "epoch": 1.7855436361081183, + "grad_norm": 0.6447582691983563, + "learning_rate": 1.4925145974663603e-07, + "loss": 0.2594, + "step": 38116 + }, + { + "epoch": 1.7855904810980465, + "grad_norm": 0.6089686016368033, + "learning_rate": 1.4918691939284858e-07, + "loss": 0.2838, + "step": 38117 + }, + { + "epoch": 1.7856373260879748, + "grad_norm": 0.6344974550729728, + "learning_rate": 1.4912239256740013e-07, + "loss": 0.281, + "step": 38118 + }, + { + "epoch": 1.7856841710779032, + "grad_norm": 0.5731864191751028, + "learning_rate": 1.4905787927066096e-07, + "loss": 0.2777, + "step": 38119 + }, + { + "epoch": 1.7857310160678317, + "grad_norm": 0.5668410266403027, + "learning_rate": 1.4899337950300296e-07, + "loss": 0.2595, + "step": 38120 + }, + { + "epoch": 1.7857778610577597, + "grad_norm": 0.6045453342671694, + "learning_rate": 1.4892889326479725e-07, + "loss": 0.2676, + "step": 38121 + }, + { + "epoch": 1.7858247060476882, + "grad_norm": 0.6179764957644859, + "learning_rate": 1.4886442055641464e-07, + "loss": 0.2739, + "step": 38122 + }, + { + "epoch": 1.7858715510376166, + "grad_norm": 0.5818723610714286, + "learning_rate": 1.487999613782265e-07, + "loss": 0.261, + "step": 38123 + }, + { + "epoch": 1.7859183960275449, + "grad_norm": 0.621353129190716, + "learning_rate": 1.4873551573060335e-07, + "loss": 0.2777, + "step": 38124 + }, + { + "epoch": 1.7859652410174731, + "grad_norm": 0.6755179258902105, + "learning_rate": 1.4867108361391686e-07, + "loss": 0.2716, + "step": 38125 + }, + { + "epoch": 1.7860120860074016, + "grad_norm": 0.5887001546499039, + "learning_rate": 1.48606665028537e-07, + "loss": 0.2785, + "step": 38126 + }, + { + "epoch": 1.7860589309973298, + "grad_norm": 0.5945582347266183, + "learning_rate": 1.4854225997483457e-07, + "loss": 0.2501, + "step": 38127 + }, + { + "epoch": 1.786105775987258, + "grad_norm": 0.5986905268653766, + "learning_rate": 1.4847786845318096e-07, + "loss": 0.2587, + "step": 38128 + }, + { + "epoch": 1.7861526209771865, + "grad_norm": 0.5903997214974628, + "learning_rate": 1.4841349046394588e-07, + "loss": 0.2839, + "step": 38129 + }, + { + "epoch": 1.7861994659671148, + "grad_norm": 0.5880573657609403, + "learning_rate": 1.4834912600750011e-07, + "loss": 0.2915, + "step": 38130 + }, + { + "epoch": 1.786246310957043, + "grad_norm": 0.6338451681224033, + "learning_rate": 1.4828477508421425e-07, + "loss": 0.279, + "step": 38131 + }, + { + "epoch": 1.7862931559469715, + "grad_norm": 0.5627646248820745, + "learning_rate": 1.4822043769445792e-07, + "loss": 0.2594, + "step": 38132 + }, + { + "epoch": 1.7863400009369, + "grad_norm": 0.5648959910447136, + "learning_rate": 1.481561138386023e-07, + "loss": 0.2599, + "step": 38133 + }, + { + "epoch": 1.7863868459268282, + "grad_norm": 0.6316715678722116, + "learning_rate": 1.480918035170173e-07, + "loss": 0.2923, + "step": 38134 + }, + { + "epoch": 1.7864336909167564, + "grad_norm": 0.5725293772094721, + "learning_rate": 1.4802750673007238e-07, + "loss": 0.2543, + "step": 38135 + }, + { + "epoch": 1.7864805359066849, + "grad_norm": 0.6220136564489561, + "learning_rate": 1.4796322347813808e-07, + "loss": 0.2897, + "step": 38136 + }, + { + "epoch": 1.7865273808966131, + "grad_norm": 0.5944780389681924, + "learning_rate": 1.4789895376158464e-07, + "loss": 0.2618, + "step": 38137 + }, + { + "epoch": 1.7865742258865414, + "grad_norm": 0.5853880204789174, + "learning_rate": 1.4783469758078123e-07, + "loss": 0.2817, + "step": 38138 + }, + { + "epoch": 1.7866210708764698, + "grad_norm": 0.5859505301338098, + "learning_rate": 1.477704549360978e-07, + "loss": 0.2567, + "step": 38139 + }, + { + "epoch": 1.786667915866398, + "grad_norm": 0.6121891065967556, + "learning_rate": 1.4770622582790438e-07, + "loss": 0.2527, + "step": 38140 + }, + { + "epoch": 1.7867147608563263, + "grad_norm": 0.5827750244997932, + "learning_rate": 1.4764201025657065e-07, + "loss": 0.27, + "step": 38141 + }, + { + "epoch": 1.7867616058462548, + "grad_norm": 0.6104412916798498, + "learning_rate": 1.475778082224655e-07, + "loss": 0.2734, + "step": 38142 + }, + { + "epoch": 1.7868084508361832, + "grad_norm": 0.5848805802982899, + "learning_rate": 1.4751361972595912e-07, + "loss": 0.2702, + "step": 38143 + }, + { + "epoch": 1.7868552958261112, + "grad_norm": 0.5941222181169032, + "learning_rate": 1.4744944476742046e-07, + "loss": 0.2697, + "step": 38144 + }, + { + "epoch": 1.7869021408160397, + "grad_norm": 0.5868291993894216, + "learning_rate": 1.473852833472189e-07, + "loss": 0.275, + "step": 38145 + }, + { + "epoch": 1.7869489858059682, + "grad_norm": 0.6142009184136545, + "learning_rate": 1.4732113546572364e-07, + "loss": 0.2769, + "step": 38146 + }, + { + "epoch": 1.7869958307958964, + "grad_norm": 0.6724721977071298, + "learning_rate": 1.4725700112330376e-07, + "loss": 0.2978, + "step": 38147 + }, + { + "epoch": 1.7870426757858247, + "grad_norm": 0.5842531311770223, + "learning_rate": 1.4719288032032902e-07, + "loss": 0.2807, + "step": 38148 + }, + { + "epoch": 1.7870895207757531, + "grad_norm": 0.6030654091072631, + "learning_rate": 1.47128773057168e-07, + "loss": 0.2683, + "step": 38149 + }, + { + "epoch": 1.7871363657656814, + "grad_norm": 0.5932609549306033, + "learning_rate": 1.47064679334189e-07, + "loss": 0.2617, + "step": 38150 + }, + { + "epoch": 1.7871832107556096, + "grad_norm": 0.6063859550277789, + "learning_rate": 1.4700059915176145e-07, + "loss": 0.2649, + "step": 38151 + }, + { + "epoch": 1.787230055745538, + "grad_norm": 0.5906646103005865, + "learning_rate": 1.4693653251025398e-07, + "loss": 0.2711, + "step": 38152 + }, + { + "epoch": 1.7872769007354663, + "grad_norm": 0.6292424237257144, + "learning_rate": 1.468724794100354e-07, + "loss": 0.2965, + "step": 38153 + }, + { + "epoch": 1.7873237457253945, + "grad_norm": 0.5955939332738113, + "learning_rate": 1.4680843985147437e-07, + "loss": 0.2645, + "step": 38154 + }, + { + "epoch": 1.787370590715323, + "grad_norm": 0.6285264143640309, + "learning_rate": 1.467444138349397e-07, + "loss": 0.2682, + "step": 38155 + }, + { + "epoch": 1.7874174357052515, + "grad_norm": 0.6269999903536947, + "learning_rate": 1.466804013607992e-07, + "loss": 0.2601, + "step": 38156 + }, + { + "epoch": 1.7874642806951795, + "grad_norm": 0.5844173637287513, + "learning_rate": 1.4661640242942143e-07, + "loss": 0.2766, + "step": 38157 + }, + { + "epoch": 1.787511125685108, + "grad_norm": 0.5934337376456005, + "learning_rate": 1.4655241704117446e-07, + "loss": 0.2619, + "step": 38158 + }, + { + "epoch": 1.7875579706750364, + "grad_norm": 0.5621041375129663, + "learning_rate": 1.4648844519642713e-07, + "loss": 0.2513, + "step": 38159 + }, + { + "epoch": 1.7876048156649647, + "grad_norm": 0.5358539628558043, + "learning_rate": 1.4642448689554695e-07, + "loss": 0.2573, + "step": 38160 + }, + { + "epoch": 1.787651660654893, + "grad_norm": 0.5956496330557508, + "learning_rate": 1.4636054213890276e-07, + "loss": 0.2764, + "step": 38161 + }, + { + "epoch": 1.7876985056448214, + "grad_norm": 0.5727413132400645, + "learning_rate": 1.4629661092686182e-07, + "loss": 0.2517, + "step": 38162 + }, + { + "epoch": 1.7877453506347496, + "grad_norm": 0.5918486563474233, + "learning_rate": 1.4623269325979266e-07, + "loss": 0.2585, + "step": 38163 + }, + { + "epoch": 1.7877921956246778, + "grad_norm": 0.5754679965507244, + "learning_rate": 1.4616878913806255e-07, + "loss": 0.2524, + "step": 38164 + }, + { + "epoch": 1.7878390406146063, + "grad_norm": 0.6155830375515075, + "learning_rate": 1.4610489856203918e-07, + "loss": 0.2706, + "step": 38165 + }, + { + "epoch": 1.7878858856045345, + "grad_norm": 0.6040931896606411, + "learning_rate": 1.4604102153209038e-07, + "loss": 0.2703, + "step": 38166 + }, + { + "epoch": 1.7879327305944628, + "grad_norm": 0.5713555964724653, + "learning_rate": 1.4597715804858443e-07, + "loss": 0.2631, + "step": 38167 + }, + { + "epoch": 1.7879795755843912, + "grad_norm": 0.611750664274985, + "learning_rate": 1.4591330811188797e-07, + "loss": 0.2694, + "step": 38168 + }, + { + "epoch": 1.7880264205743197, + "grad_norm": 0.6173263604300309, + "learning_rate": 1.4584947172236853e-07, + "loss": 0.2672, + "step": 38169 + }, + { + "epoch": 1.788073265564248, + "grad_norm": 0.615906927711723, + "learning_rate": 1.4578564888039437e-07, + "loss": 0.281, + "step": 38170 + }, + { + "epoch": 1.7881201105541762, + "grad_norm": 0.6639012730600784, + "learning_rate": 1.4572183958633136e-07, + "loss": 0.2833, + "step": 38171 + }, + { + "epoch": 1.7881669555441047, + "grad_norm": 0.5851226921729239, + "learning_rate": 1.4565804384054776e-07, + "loss": 0.2592, + "step": 38172 + }, + { + "epoch": 1.788213800534033, + "grad_norm": 0.5816310328413197, + "learning_rate": 1.4559426164341056e-07, + "loss": 0.2569, + "step": 38173 + }, + { + "epoch": 1.7882606455239611, + "grad_norm": 0.5938793148646632, + "learning_rate": 1.455304929952864e-07, + "loss": 0.2697, + "step": 38174 + }, + { + "epoch": 1.7883074905138896, + "grad_norm": 0.6334595563079577, + "learning_rate": 1.4546673789654214e-07, + "loss": 0.2677, + "step": 38175 + }, + { + "epoch": 1.7883543355038178, + "grad_norm": 0.5805948323126355, + "learning_rate": 1.4540299634754535e-07, + "loss": 0.2583, + "step": 38176 + }, + { + "epoch": 1.788401180493746, + "grad_norm": 0.6257902527630008, + "learning_rate": 1.4533926834866292e-07, + "loss": 0.2771, + "step": 38177 + }, + { + "epoch": 1.7884480254836745, + "grad_norm": 0.5748163736533609, + "learning_rate": 1.4527555390026066e-07, + "loss": 0.2637, + "step": 38178 + }, + { + "epoch": 1.788494870473603, + "grad_norm": 0.6298630002776514, + "learning_rate": 1.4521185300270607e-07, + "loss": 0.2865, + "step": 38179 + }, + { + "epoch": 1.788541715463531, + "grad_norm": 0.6123594981995926, + "learning_rate": 1.4514816565636497e-07, + "loss": 0.2678, + "step": 38180 + }, + { + "epoch": 1.7885885604534595, + "grad_norm": 0.5583844085754976, + "learning_rate": 1.4508449186160457e-07, + "loss": 0.2645, + "step": 38181 + }, + { + "epoch": 1.788635405443388, + "grad_norm": 0.6706842165116021, + "learning_rate": 1.4502083161879093e-07, + "loss": 0.2906, + "step": 38182 + }, + { + "epoch": 1.7886822504333162, + "grad_norm": 0.6095420961965806, + "learning_rate": 1.4495718492829075e-07, + "loss": 0.2713, + "step": 38183 + }, + { + "epoch": 1.7887290954232444, + "grad_norm": 0.5712443422213028, + "learning_rate": 1.4489355179046982e-07, + "loss": 0.2681, + "step": 38184 + }, + { + "epoch": 1.788775940413173, + "grad_norm": 0.6169789515537465, + "learning_rate": 1.448299322056948e-07, + "loss": 0.2768, + "step": 38185 + }, + { + "epoch": 1.7888227854031011, + "grad_norm": 0.5866904221248316, + "learning_rate": 1.4476632617433122e-07, + "loss": 0.2605, + "step": 38186 + }, + { + "epoch": 1.7888696303930294, + "grad_norm": 0.5585690052427009, + "learning_rate": 1.4470273369674575e-07, + "loss": 0.2618, + "step": 38187 + }, + { + "epoch": 1.7889164753829578, + "grad_norm": 0.6450432260029512, + "learning_rate": 1.4463915477330366e-07, + "loss": 0.2778, + "step": 38188 + }, + { + "epoch": 1.788963320372886, + "grad_norm": 0.66104164991771, + "learning_rate": 1.4457558940437155e-07, + "loss": 0.3036, + "step": 38189 + }, + { + "epoch": 1.7890101653628143, + "grad_norm": 0.5954917193363027, + "learning_rate": 1.4451203759031501e-07, + "loss": 0.2745, + "step": 38190 + }, + { + "epoch": 1.7890570103527428, + "grad_norm": 0.6153899647514498, + "learning_rate": 1.4444849933149985e-07, + "loss": 0.2783, + "step": 38191 + }, + { + "epoch": 1.7891038553426712, + "grad_norm": 0.6041459646797862, + "learning_rate": 1.4438497462829105e-07, + "loss": 0.2666, + "step": 38192 + }, + { + "epoch": 1.7891507003325993, + "grad_norm": 0.6167700278258006, + "learning_rate": 1.4432146348105497e-07, + "loss": 0.273, + "step": 38193 + }, + { + "epoch": 1.7891975453225277, + "grad_norm": 0.6398072349328133, + "learning_rate": 1.4425796589015635e-07, + "loss": 0.2865, + "step": 38194 + }, + { + "epoch": 1.7892443903124562, + "grad_norm": 0.5731097186471589, + "learning_rate": 1.4419448185596126e-07, + "loss": 0.2767, + "step": 38195 + }, + { + "epoch": 1.7892912353023844, + "grad_norm": 0.6168068128810653, + "learning_rate": 1.4413101137883495e-07, + "loss": 0.2695, + "step": 38196 + }, + { + "epoch": 1.7893380802923127, + "grad_norm": 0.6655693729018489, + "learning_rate": 1.4406755445914272e-07, + "loss": 0.2826, + "step": 38197 + }, + { + "epoch": 1.7893849252822411, + "grad_norm": 0.6300481612013218, + "learning_rate": 1.4400411109724955e-07, + "loss": 0.2887, + "step": 38198 + }, + { + "epoch": 1.7894317702721694, + "grad_norm": 0.6065389920819819, + "learning_rate": 1.439406812935204e-07, + "loss": 0.2752, + "step": 38199 + }, + { + "epoch": 1.7894786152620976, + "grad_norm": 0.5515054206512532, + "learning_rate": 1.4387726504832029e-07, + "loss": 0.2502, + "step": 38200 + }, + { + "epoch": 1.789525460252026, + "grad_norm": 0.610230166433642, + "learning_rate": 1.4381386236201417e-07, + "loss": 0.2809, + "step": 38201 + }, + { + "epoch": 1.7895723052419543, + "grad_norm": 0.647154512877947, + "learning_rate": 1.4375047323496733e-07, + "loss": 0.259, + "step": 38202 + }, + { + "epoch": 1.7896191502318826, + "grad_norm": 0.6027310860903048, + "learning_rate": 1.436870976675442e-07, + "loss": 0.268, + "step": 38203 + }, + { + "epoch": 1.789665995221811, + "grad_norm": 0.6287371915814893, + "learning_rate": 1.4362373566011002e-07, + "loss": 0.2762, + "step": 38204 + }, + { + "epoch": 1.7897128402117395, + "grad_norm": 0.6186739174826897, + "learning_rate": 1.4356038721302894e-07, + "loss": 0.2577, + "step": 38205 + }, + { + "epoch": 1.7897596852016677, + "grad_norm": 0.5812818643501799, + "learning_rate": 1.4349705232666517e-07, + "loss": 0.2487, + "step": 38206 + }, + { + "epoch": 1.789806530191596, + "grad_norm": 0.6067169986596368, + "learning_rate": 1.4343373100138364e-07, + "loss": 0.2944, + "step": 38207 + }, + { + "epoch": 1.7898533751815244, + "grad_norm": 0.6141906338528396, + "learning_rate": 1.433704232375488e-07, + "loss": 0.2777, + "step": 38208 + }, + { + "epoch": 1.7899002201714527, + "grad_norm": 0.5831422815303263, + "learning_rate": 1.4330712903552453e-07, + "loss": 0.2732, + "step": 38209 + }, + { + "epoch": 1.789947065161381, + "grad_norm": 0.5728037815650567, + "learning_rate": 1.4324384839567608e-07, + "loss": 0.2678, + "step": 38210 + }, + { + "epoch": 1.7899939101513094, + "grad_norm": 0.6017346876843201, + "learning_rate": 1.431805813183665e-07, + "loss": 0.2691, + "step": 38211 + }, + { + "epoch": 1.7900407551412376, + "grad_norm": 0.6400181344100502, + "learning_rate": 1.431173278039605e-07, + "loss": 0.2742, + "step": 38212 + }, + { + "epoch": 1.7900876001311659, + "grad_norm": 0.6366448187211478, + "learning_rate": 1.4305408785282165e-07, + "loss": 0.283, + "step": 38213 + }, + { + "epoch": 1.7901344451210943, + "grad_norm": 0.6185073178229309, + "learning_rate": 1.4299086146531415e-07, + "loss": 0.2632, + "step": 38214 + }, + { + "epoch": 1.7901812901110228, + "grad_norm": 0.6517751609927769, + "learning_rate": 1.4292764864180186e-07, + "loss": 0.2808, + "step": 38215 + }, + { + "epoch": 1.7902281351009508, + "grad_norm": 0.6197637485772909, + "learning_rate": 1.4286444938264866e-07, + "loss": 0.2919, + "step": 38216 + }, + { + "epoch": 1.7902749800908793, + "grad_norm": 0.6169125080183249, + "learning_rate": 1.4280126368821812e-07, + "loss": 0.2742, + "step": 38217 + }, + { + "epoch": 1.7903218250808077, + "grad_norm": 0.6114997953954889, + "learning_rate": 1.4273809155887359e-07, + "loss": 0.2767, + "step": 38218 + }, + { + "epoch": 1.790368670070736, + "grad_norm": 0.6054771331009269, + "learning_rate": 1.426749329949792e-07, + "loss": 0.265, + "step": 38219 + }, + { + "epoch": 1.7904155150606642, + "grad_norm": 0.584269243938498, + "learning_rate": 1.4261178799689772e-07, + "loss": 0.2671, + "step": 38220 + }, + { + "epoch": 1.7904623600505927, + "grad_norm": 0.5831932836241789, + "learning_rate": 1.4254865656499306e-07, + "loss": 0.2701, + "step": 38221 + }, + { + "epoch": 1.790509205040521, + "grad_norm": 0.6252977836104819, + "learning_rate": 1.4248553869962877e-07, + "loss": 0.2893, + "step": 38222 + }, + { + "epoch": 1.7905560500304492, + "grad_norm": 0.5887103710915442, + "learning_rate": 1.424224344011671e-07, + "loss": 0.2494, + "step": 38223 + }, + { + "epoch": 1.7906028950203776, + "grad_norm": 0.6051304910277379, + "learning_rate": 1.423593436699719e-07, + "loss": 0.2659, + "step": 38224 + }, + { + "epoch": 1.7906497400103059, + "grad_norm": 0.5753822124143629, + "learning_rate": 1.4229626650640593e-07, + "loss": 0.2551, + "step": 38225 + }, + { + "epoch": 1.790696585000234, + "grad_norm": 0.5657056240247893, + "learning_rate": 1.422332029108328e-07, + "loss": 0.2564, + "step": 38226 + }, + { + "epoch": 1.7907434299901626, + "grad_norm": 0.5976923266213081, + "learning_rate": 1.4217015288361474e-07, + "loss": 0.2654, + "step": 38227 + }, + { + "epoch": 1.790790274980091, + "grad_norm": 0.6111394770972937, + "learning_rate": 1.4210711642511505e-07, + "loss": 0.2719, + "step": 38228 + }, + { + "epoch": 1.790837119970019, + "grad_norm": 0.5975862542255846, + "learning_rate": 1.4204409353569592e-07, + "loss": 0.2529, + "step": 38229 + }, + { + "epoch": 1.7908839649599475, + "grad_norm": 0.5905073999879383, + "learning_rate": 1.4198108421572044e-07, + "loss": 0.2592, + "step": 38230 + }, + { + "epoch": 1.790930809949876, + "grad_norm": 0.5788446984411537, + "learning_rate": 1.4191808846555132e-07, + "loss": 0.2757, + "step": 38231 + }, + { + "epoch": 1.7909776549398042, + "grad_norm": 0.6000293318696687, + "learning_rate": 1.418551062855511e-07, + "loss": 0.2656, + "step": 38232 + }, + { + "epoch": 1.7910244999297324, + "grad_norm": 0.6001363943772063, + "learning_rate": 1.4179213767608168e-07, + "loss": 0.2708, + "step": 38233 + }, + { + "epoch": 1.791071344919661, + "grad_norm": 0.561629326484542, + "learning_rate": 1.4172918263750612e-07, + "loss": 0.2604, + "step": 38234 + }, + { + "epoch": 1.7911181899095892, + "grad_norm": 0.6012200847743949, + "learning_rate": 1.4166624117018607e-07, + "loss": 0.2681, + "step": 38235 + }, + { + "epoch": 1.7911650348995174, + "grad_norm": 0.6195913185707937, + "learning_rate": 1.4160331327448402e-07, + "loss": 0.2802, + "step": 38236 + }, + { + "epoch": 1.7912118798894459, + "grad_norm": 0.5787156533722257, + "learning_rate": 1.4154039895076216e-07, + "loss": 0.2769, + "step": 38237 + }, + { + "epoch": 1.791258724879374, + "grad_norm": 0.5747892387308066, + "learning_rate": 1.4147749819938245e-07, + "loss": 0.2764, + "step": 38238 + }, + { + "epoch": 1.7913055698693023, + "grad_norm": 0.5963304222732339, + "learning_rate": 1.4141461102070764e-07, + "loss": 0.2595, + "step": 38239 + }, + { + "epoch": 1.7913524148592308, + "grad_norm": 0.5984877475901651, + "learning_rate": 1.4135173741509856e-07, + "loss": 0.2694, + "step": 38240 + }, + { + "epoch": 1.7913992598491593, + "grad_norm": 0.5738469522069445, + "learning_rate": 1.4128887738291714e-07, + "loss": 0.2646, + "step": 38241 + }, + { + "epoch": 1.7914461048390875, + "grad_norm": 0.6216078652617514, + "learning_rate": 1.4122603092452558e-07, + "loss": 0.2817, + "step": 38242 + }, + { + "epoch": 1.7914929498290157, + "grad_norm": 0.5590577500955229, + "learning_rate": 1.4116319804028527e-07, + "loss": 0.263, + "step": 38243 + }, + { + "epoch": 1.7915397948189442, + "grad_norm": 0.5894761317940482, + "learning_rate": 1.4110037873055759e-07, + "loss": 0.2507, + "step": 38244 + }, + { + "epoch": 1.7915866398088724, + "grad_norm": 0.5906004799248169, + "learning_rate": 1.4103757299570476e-07, + "loss": 0.284, + "step": 38245 + }, + { + "epoch": 1.7916334847988007, + "grad_norm": 0.615719743928019, + "learning_rate": 1.4097478083608785e-07, + "loss": 0.274, + "step": 38246 + }, + { + "epoch": 1.7916803297887292, + "grad_norm": 0.6251636342207562, + "learning_rate": 1.4091200225206824e-07, + "loss": 0.2774, + "step": 38247 + }, + { + "epoch": 1.7917271747786574, + "grad_norm": 0.6071933161231508, + "learning_rate": 1.408492372440068e-07, + "loss": 0.271, + "step": 38248 + }, + { + "epoch": 1.7917740197685856, + "grad_norm": 0.6025777031572314, + "learning_rate": 1.4078648581226512e-07, + "loss": 0.2717, + "step": 38249 + }, + { + "epoch": 1.791820864758514, + "grad_norm": 0.639016915813361, + "learning_rate": 1.4072374795720434e-07, + "loss": 0.2718, + "step": 38250 + }, + { + "epoch": 1.7918677097484426, + "grad_norm": 0.6105604508659229, + "learning_rate": 1.40661023679185e-07, + "loss": 0.2751, + "step": 38251 + }, + { + "epoch": 1.7919145547383706, + "grad_norm": 0.579738062266376, + "learning_rate": 1.405983129785693e-07, + "loss": 0.2554, + "step": 38252 + }, + { + "epoch": 1.791961399728299, + "grad_norm": 0.5640255006211045, + "learning_rate": 1.405356158557167e-07, + "loss": 0.2688, + "step": 38253 + }, + { + "epoch": 1.7920082447182275, + "grad_norm": 0.5681449804233683, + "learning_rate": 1.404729323109888e-07, + "loss": 0.2479, + "step": 38254 + }, + { + "epoch": 1.7920550897081557, + "grad_norm": 0.6134266564647639, + "learning_rate": 1.404102623447462e-07, + "loss": 0.2666, + "step": 38255 + }, + { + "epoch": 1.792101934698084, + "grad_norm": 0.5704894588104956, + "learning_rate": 1.4034760595734915e-07, + "loss": 0.2631, + "step": 38256 + }, + { + "epoch": 1.7921487796880124, + "grad_norm": 0.5972064586983569, + "learning_rate": 1.4028496314915847e-07, + "loss": 0.2663, + "step": 38257 + }, + { + "epoch": 1.7921956246779407, + "grad_norm": 0.6016094763255411, + "learning_rate": 1.4022233392053525e-07, + "loss": 0.2832, + "step": 38258 + }, + { + "epoch": 1.792242469667869, + "grad_norm": 0.6136242822060871, + "learning_rate": 1.4015971827183923e-07, + "loss": 0.2619, + "step": 38259 + }, + { + "epoch": 1.7922893146577974, + "grad_norm": 0.5756833510804933, + "learning_rate": 1.4009711620343064e-07, + "loss": 0.2637, + "step": 38260 + }, + { + "epoch": 1.7923361596477256, + "grad_norm": 0.5975151643464544, + "learning_rate": 1.4003452771567034e-07, + "loss": 0.2648, + "step": 38261 + }, + { + "epoch": 1.7923830046376539, + "grad_norm": 0.5571089384057317, + "learning_rate": 1.3997195280891802e-07, + "loss": 0.263, + "step": 38262 + }, + { + "epoch": 1.7924298496275823, + "grad_norm": 0.5763724684866591, + "learning_rate": 1.3990939148353395e-07, + "loss": 0.2584, + "step": 38263 + }, + { + "epoch": 1.7924766946175108, + "grad_norm": 0.5915253921870145, + "learning_rate": 1.3984684373987783e-07, + "loss": 0.2847, + "step": 38264 + }, + { + "epoch": 1.7925235396074388, + "grad_norm": 0.6104994974315815, + "learning_rate": 1.397843095783105e-07, + "loss": 0.2859, + "step": 38265 + }, + { + "epoch": 1.7925703845973673, + "grad_norm": 0.5588993042864071, + "learning_rate": 1.3972178899919086e-07, + "loss": 0.2625, + "step": 38266 + }, + { + "epoch": 1.7926172295872957, + "grad_norm": 0.6097420103929034, + "learning_rate": 1.3965928200287915e-07, + "loss": 0.253, + "step": 38267 + }, + { + "epoch": 1.792664074577224, + "grad_norm": 0.5789561114870108, + "learning_rate": 1.3959678858973536e-07, + "loss": 0.2701, + "step": 38268 + }, + { + "epoch": 1.7927109195671522, + "grad_norm": 0.5792939858646233, + "learning_rate": 1.3953430876011836e-07, + "loss": 0.2625, + "step": 38269 + }, + { + "epoch": 1.7927577645570807, + "grad_norm": 0.5869271247873528, + "learning_rate": 1.3947184251438816e-07, + "loss": 0.2681, + "step": 38270 + }, + { + "epoch": 1.792804609547009, + "grad_norm": 0.6566102893668612, + "learning_rate": 1.3940938985290475e-07, + "loss": 0.2939, + "step": 38271 + }, + { + "epoch": 1.7928514545369372, + "grad_norm": 0.6263730103561256, + "learning_rate": 1.3934695077602645e-07, + "loss": 0.2767, + "step": 38272 + }, + { + "epoch": 1.7928982995268656, + "grad_norm": 0.5894015614292626, + "learning_rate": 1.3928452528411295e-07, + "loss": 0.2688, + "step": 38273 + }, + { + "epoch": 1.7929451445167939, + "grad_norm": 0.6019273869056322, + "learning_rate": 1.392221133775243e-07, + "loss": 0.2702, + "step": 38274 + }, + { + "epoch": 1.7929919895067221, + "grad_norm": 0.6712522180691357, + "learning_rate": 1.3915971505661873e-07, + "loss": 0.2915, + "step": 38275 + }, + { + "epoch": 1.7930388344966506, + "grad_norm": 0.5931499559835554, + "learning_rate": 1.390973303217555e-07, + "loss": 0.2639, + "step": 38276 + }, + { + "epoch": 1.793085679486579, + "grad_norm": 0.5338161009106636, + "learning_rate": 1.3903495917329395e-07, + "loss": 0.2642, + "step": 38277 + }, + { + "epoch": 1.7931325244765073, + "grad_norm": 0.5650536912598126, + "learning_rate": 1.3897260161159248e-07, + "loss": 0.2676, + "step": 38278 + }, + { + "epoch": 1.7931793694664355, + "grad_norm": 0.5760206110177978, + "learning_rate": 1.389102576370105e-07, + "loss": 0.259, + "step": 38279 + }, + { + "epoch": 1.793226214456364, + "grad_norm": 0.5567613032667884, + "learning_rate": 1.3884792724990632e-07, + "loss": 0.2606, + "step": 38280 + }, + { + "epoch": 1.7932730594462922, + "grad_norm": 0.5701183587814297, + "learning_rate": 1.3878561045063937e-07, + "loss": 0.26, + "step": 38281 + }, + { + "epoch": 1.7933199044362205, + "grad_norm": 0.5785516810404674, + "learning_rate": 1.3872330723956746e-07, + "loss": 0.2672, + "step": 38282 + }, + { + "epoch": 1.793366749426149, + "grad_norm": 0.592929611248769, + "learning_rate": 1.386610176170497e-07, + "loss": 0.2809, + "step": 38283 + }, + { + "epoch": 1.7934135944160772, + "grad_norm": 0.6417226272292913, + "learning_rate": 1.385987415834439e-07, + "loss": 0.2951, + "step": 38284 + }, + { + "epoch": 1.7934604394060054, + "grad_norm": 0.5676345655379442, + "learning_rate": 1.3853647913910863e-07, + "loss": 0.2587, + "step": 38285 + }, + { + "epoch": 1.7935072843959339, + "grad_norm": 0.5780958786808575, + "learning_rate": 1.3847423028440276e-07, + "loss": 0.2737, + "step": 38286 + }, + { + "epoch": 1.7935541293858623, + "grad_norm": 0.621261464660309, + "learning_rate": 1.3841199501968382e-07, + "loss": 0.2765, + "step": 38287 + }, + { + "epoch": 1.7936009743757904, + "grad_norm": 0.575535032519594, + "learning_rate": 1.3834977334531097e-07, + "loss": 0.2654, + "step": 38288 + }, + { + "epoch": 1.7936478193657188, + "grad_norm": 0.6073026748967321, + "learning_rate": 1.382875652616414e-07, + "loss": 0.2752, + "step": 38289 + }, + { + "epoch": 1.7936946643556473, + "grad_norm": 0.5935465426852489, + "learning_rate": 1.3822537076903286e-07, + "loss": 0.2666, + "step": 38290 + }, + { + "epoch": 1.7937415093455755, + "grad_norm": 0.5769168245565848, + "learning_rate": 1.3816318986784371e-07, + "loss": 0.2463, + "step": 38291 + }, + { + "epoch": 1.7937883543355038, + "grad_norm": 0.601447346874883, + "learning_rate": 1.38101022558432e-07, + "loss": 0.2702, + "step": 38292 + }, + { + "epoch": 1.7938351993254322, + "grad_norm": 0.5522410109984931, + "learning_rate": 1.3803886884115518e-07, + "loss": 0.2609, + "step": 38293 + }, + { + "epoch": 1.7938820443153605, + "grad_norm": 0.5846968263403324, + "learning_rate": 1.379767287163708e-07, + "loss": 0.27, + "step": 38294 + }, + { + "epoch": 1.7939288893052887, + "grad_norm": 0.6085097825286448, + "learning_rate": 1.3791460218443741e-07, + "loss": 0.2856, + "step": 38295 + }, + { + "epoch": 1.7939757342952172, + "grad_norm": 0.5762331192611492, + "learning_rate": 1.3785248924571143e-07, + "loss": 0.262, + "step": 38296 + }, + { + "epoch": 1.7940225792851454, + "grad_norm": 0.5596979852195038, + "learning_rate": 1.377903899005506e-07, + "loss": 0.2525, + "step": 38297 + }, + { + "epoch": 1.7940694242750737, + "grad_norm": 0.6425265688711813, + "learning_rate": 1.3772830414931215e-07, + "loss": 0.2944, + "step": 38298 + }, + { + "epoch": 1.7941162692650021, + "grad_norm": 0.5812141249176416, + "learning_rate": 1.3766623199235384e-07, + "loss": 0.2653, + "step": 38299 + }, + { + "epoch": 1.7941631142549306, + "grad_norm": 0.6027469015114699, + "learning_rate": 1.3760417343003234e-07, + "loss": 0.2743, + "step": 38300 + }, + { + "epoch": 1.7942099592448586, + "grad_norm": 0.6021915310021884, + "learning_rate": 1.3754212846270542e-07, + "loss": 0.2774, + "step": 38301 + }, + { + "epoch": 1.794256804234787, + "grad_norm": 0.5857052360241701, + "learning_rate": 1.3748009709072945e-07, + "loss": 0.2754, + "step": 38302 + }, + { + "epoch": 1.7943036492247155, + "grad_norm": 0.5715092580240317, + "learning_rate": 1.3741807931446221e-07, + "loss": 0.2761, + "step": 38303 + }, + { + "epoch": 1.7943504942146438, + "grad_norm": 0.6044178053242866, + "learning_rate": 1.3735607513425981e-07, + "loss": 0.2856, + "step": 38304 + }, + { + "epoch": 1.794397339204572, + "grad_norm": 0.558774943761141, + "learning_rate": 1.3729408455047917e-07, + "loss": 0.2582, + "step": 38305 + }, + { + "epoch": 1.7944441841945005, + "grad_norm": 0.5953097209432688, + "learning_rate": 1.3723210756347722e-07, + "loss": 0.2661, + "step": 38306 + }, + { + "epoch": 1.7944910291844287, + "grad_norm": 0.6240858421562561, + "learning_rate": 1.3717014417361118e-07, + "loss": 0.2877, + "step": 38307 + }, + { + "epoch": 1.794537874174357, + "grad_norm": 0.5715467790830157, + "learning_rate": 1.3710819438123661e-07, + "loss": 0.2477, + "step": 38308 + }, + { + "epoch": 1.7945847191642854, + "grad_norm": 0.5862599722312377, + "learning_rate": 1.3704625818671047e-07, + "loss": 0.2843, + "step": 38309 + }, + { + "epoch": 1.7946315641542137, + "grad_norm": 0.5876148052098324, + "learning_rate": 1.3698433559038938e-07, + "loss": 0.275, + "step": 38310 + }, + { + "epoch": 1.794678409144142, + "grad_norm": 0.6336540658947675, + "learning_rate": 1.3692242659262943e-07, + "loss": 0.2624, + "step": 38311 + }, + { + "epoch": 1.7947252541340704, + "grad_norm": 0.6544445725028435, + "learning_rate": 1.3686053119378707e-07, + "loss": 0.2672, + "step": 38312 + }, + { + "epoch": 1.7947720991239988, + "grad_norm": 0.581269446388141, + "learning_rate": 1.3679864939421861e-07, + "loss": 0.2721, + "step": 38313 + }, + { + "epoch": 1.794818944113927, + "grad_norm": 0.6139586913822823, + "learning_rate": 1.3673678119427936e-07, + "loss": 0.2539, + "step": 38314 + }, + { + "epoch": 1.7948657891038553, + "grad_norm": 0.599589156864659, + "learning_rate": 1.3667492659432625e-07, + "loss": 0.2825, + "step": 38315 + }, + { + "epoch": 1.7949126340937838, + "grad_norm": 0.5572050015335738, + "learning_rate": 1.366130855947148e-07, + "loss": 0.2503, + "step": 38316 + }, + { + "epoch": 1.794959479083712, + "grad_norm": 0.5756929780494473, + "learning_rate": 1.3655125819580146e-07, + "loss": 0.2673, + "step": 38317 + }, + { + "epoch": 1.7950063240736402, + "grad_norm": 0.6353740275230512, + "learning_rate": 1.3648944439794142e-07, + "loss": 0.2644, + "step": 38318 + }, + { + "epoch": 1.7950531690635687, + "grad_norm": 0.569799520295325, + "learning_rate": 1.3642764420149057e-07, + "loss": 0.2605, + "step": 38319 + }, + { + "epoch": 1.795100014053497, + "grad_norm": 0.5831354779873955, + "learning_rate": 1.3636585760680443e-07, + "loss": 0.2734, + "step": 38320 + }, + { + "epoch": 1.7951468590434252, + "grad_norm": 0.6231807347354756, + "learning_rate": 1.3630408461423882e-07, + "loss": 0.2755, + "step": 38321 + }, + { + "epoch": 1.7951937040333537, + "grad_norm": 0.5643335824124729, + "learning_rate": 1.3624232522414876e-07, + "loss": 0.253, + "step": 38322 + }, + { + "epoch": 1.7952405490232821, + "grad_norm": 0.6554116747822343, + "learning_rate": 1.3618057943689062e-07, + "loss": 0.2806, + "step": 38323 + }, + { + "epoch": 1.7952873940132101, + "grad_norm": 0.6180555654342033, + "learning_rate": 1.3611884725281882e-07, + "loss": 0.2719, + "step": 38324 + }, + { + "epoch": 1.7953342390031386, + "grad_norm": 0.5316592305903749, + "learning_rate": 1.3605712867228894e-07, + "loss": 0.2538, + "step": 38325 + }, + { + "epoch": 1.795381083993067, + "grad_norm": 0.5735568927190644, + "learning_rate": 1.3599542369565622e-07, + "loss": 0.2663, + "step": 38326 + }, + { + "epoch": 1.7954279289829953, + "grad_norm": 0.6037977436485118, + "learning_rate": 1.359337323232754e-07, + "loss": 0.2748, + "step": 38327 + }, + { + "epoch": 1.7954747739729235, + "grad_norm": 0.6018801237141734, + "learning_rate": 1.3587205455550173e-07, + "loss": 0.2731, + "step": 38328 + }, + { + "epoch": 1.795521618962852, + "grad_norm": 0.5814876122826662, + "learning_rate": 1.3581039039269023e-07, + "loss": 0.2581, + "step": 38329 + }, + { + "epoch": 1.7955684639527802, + "grad_norm": 0.5954860410133239, + "learning_rate": 1.3574873983519615e-07, + "loss": 0.2638, + "step": 38330 + }, + { + "epoch": 1.7956153089427085, + "grad_norm": 0.5636490669277574, + "learning_rate": 1.356871028833734e-07, + "loss": 0.257, + "step": 38331 + }, + { + "epoch": 1.795662153932637, + "grad_norm": 0.5534496424106817, + "learning_rate": 1.356254795375775e-07, + "loss": 0.247, + "step": 38332 + }, + { + "epoch": 1.7957089989225652, + "grad_norm": 0.5552730605247196, + "learning_rate": 1.3556386979816233e-07, + "loss": 0.2521, + "step": 38333 + }, + { + "epoch": 1.7957558439124934, + "grad_norm": 0.6147805436840018, + "learning_rate": 1.355022736654829e-07, + "loss": 0.279, + "step": 38334 + }, + { + "epoch": 1.795802688902422, + "grad_norm": 0.6136382577166716, + "learning_rate": 1.3544069113989339e-07, + "loss": 0.2921, + "step": 38335 + }, + { + "epoch": 1.7958495338923504, + "grad_norm": 0.5498841228090792, + "learning_rate": 1.3537912222174876e-07, + "loss": 0.2572, + "step": 38336 + }, + { + "epoch": 1.7958963788822784, + "grad_norm": 0.6737840144118845, + "learning_rate": 1.353175669114029e-07, + "loss": 0.2958, + "step": 38337 + }, + { + "epoch": 1.7959432238722068, + "grad_norm": 0.5620086662507583, + "learning_rate": 1.3525602520921027e-07, + "loss": 0.2607, + "step": 38338 + }, + { + "epoch": 1.7959900688621353, + "grad_norm": 0.5924311386144091, + "learning_rate": 1.3519449711552446e-07, + "loss": 0.2539, + "step": 38339 + }, + { + "epoch": 1.7960369138520635, + "grad_norm": 0.6133315551327765, + "learning_rate": 1.351329826307002e-07, + "loss": 0.2612, + "step": 38340 + }, + { + "epoch": 1.7960837588419918, + "grad_norm": 0.561005533610624, + "learning_rate": 1.3507148175509104e-07, + "loss": 0.2591, + "step": 38341 + }, + { + "epoch": 1.7961306038319202, + "grad_norm": 0.5563599841546116, + "learning_rate": 1.3500999448905096e-07, + "loss": 0.2683, + "step": 38342 + }, + { + "epoch": 1.7961774488218485, + "grad_norm": 0.6219685917929333, + "learning_rate": 1.349485208329343e-07, + "loss": 0.2674, + "step": 38343 + }, + { + "epoch": 1.7962242938117767, + "grad_norm": 0.5789960607449743, + "learning_rate": 1.3488706078709445e-07, + "loss": 0.2835, + "step": 38344 + }, + { + "epoch": 1.7962711388017052, + "grad_norm": 0.5994430990369809, + "learning_rate": 1.348256143518853e-07, + "loss": 0.2673, + "step": 38345 + }, + { + "epoch": 1.7963179837916334, + "grad_norm": 0.6086054621166697, + "learning_rate": 1.3476418152765986e-07, + "loss": 0.2849, + "step": 38346 + }, + { + "epoch": 1.7963648287815617, + "grad_norm": 0.5807363838822183, + "learning_rate": 1.3470276231477231e-07, + "loss": 0.2527, + "step": 38347 + }, + { + "epoch": 1.7964116737714901, + "grad_norm": 0.6637228199884344, + "learning_rate": 1.3464135671357543e-07, + "loss": 0.2912, + "step": 38348 + }, + { + "epoch": 1.7964585187614186, + "grad_norm": 0.6197189964979968, + "learning_rate": 1.3457996472442337e-07, + "loss": 0.2779, + "step": 38349 + }, + { + "epoch": 1.7965053637513468, + "grad_norm": 0.5893374907177016, + "learning_rate": 1.3451858634766946e-07, + "loss": 0.2535, + "step": 38350 + }, + { + "epoch": 1.796552208741275, + "grad_norm": 0.6197606837642075, + "learning_rate": 1.3445722158366592e-07, + "loss": 0.2761, + "step": 38351 + }, + { + "epoch": 1.7965990537312035, + "grad_norm": 0.5784689194795148, + "learning_rate": 1.343958704327672e-07, + "loss": 0.2664, + "step": 38352 + }, + { + "epoch": 1.7966458987211318, + "grad_norm": 0.6056926308789492, + "learning_rate": 1.3433453289532493e-07, + "loss": 0.2748, + "step": 38353 + }, + { + "epoch": 1.79669274371106, + "grad_norm": 0.5975054850445207, + "learning_rate": 1.3427320897169305e-07, + "loss": 0.2748, + "step": 38354 + }, + { + "epoch": 1.7967395887009885, + "grad_norm": 0.6979708454607152, + "learning_rate": 1.342118986622243e-07, + "loss": 0.2971, + "step": 38355 + }, + { + "epoch": 1.7967864336909167, + "grad_norm": 0.6326776562697315, + "learning_rate": 1.3415060196727169e-07, + "loss": 0.2814, + "step": 38356 + }, + { + "epoch": 1.796833278680845, + "grad_norm": 0.5964953159534649, + "learning_rate": 1.3408931888718752e-07, + "loss": 0.2625, + "step": 38357 + }, + { + "epoch": 1.7968801236707734, + "grad_norm": 0.6118630189448707, + "learning_rate": 1.340280494223245e-07, + "loss": 0.2761, + "step": 38358 + }, + { + "epoch": 1.796926968660702, + "grad_norm": 0.6257019397986024, + "learning_rate": 1.33966793573036e-07, + "loss": 0.2679, + "step": 38359 + }, + { + "epoch": 1.79697381365063, + "grad_norm": 0.6279163497967942, + "learning_rate": 1.3390555133967336e-07, + "loss": 0.2766, + "step": 38360 + }, + { + "epoch": 1.7970206586405584, + "grad_norm": 0.6158663402186423, + "learning_rate": 1.338443227225897e-07, + "loss": 0.2589, + "step": 38361 + }, + { + "epoch": 1.7970675036304868, + "grad_norm": 0.5793810683149325, + "learning_rate": 1.3378310772213743e-07, + "loss": 0.2695, + "step": 38362 + }, + { + "epoch": 1.797114348620415, + "grad_norm": 0.5973888406062262, + "learning_rate": 1.3372190633866856e-07, + "loss": 0.2526, + "step": 38363 + }, + { + "epoch": 1.7971611936103433, + "grad_norm": 0.643711617427548, + "learning_rate": 1.3366071857253498e-07, + "loss": 0.266, + "step": 38364 + }, + { + "epoch": 1.7972080386002718, + "grad_norm": 0.5730591946396331, + "learning_rate": 1.335995444240895e-07, + "loss": 0.2583, + "step": 38365 + }, + { + "epoch": 1.7972548835902, + "grad_norm": 0.6203697352401577, + "learning_rate": 1.3353838389368435e-07, + "loss": 0.2762, + "step": 38366 + }, + { + "epoch": 1.7973017285801283, + "grad_norm": 0.6293480987291845, + "learning_rate": 1.3347723698167032e-07, + "loss": 0.2886, + "step": 38367 + }, + { + "epoch": 1.7973485735700567, + "grad_norm": 0.560741276257405, + "learning_rate": 1.3341610368840047e-07, + "loss": 0.2693, + "step": 38368 + }, + { + "epoch": 1.797395418559985, + "grad_norm": 0.5775168794841472, + "learning_rate": 1.3335498401422593e-07, + "loss": 0.2644, + "step": 38369 + }, + { + "epoch": 1.7974422635499132, + "grad_norm": 0.6062046973541761, + "learning_rate": 1.3329387795949834e-07, + "loss": 0.278, + "step": 38370 + }, + { + "epoch": 1.7974891085398417, + "grad_norm": 0.6292847357347249, + "learning_rate": 1.3323278552456991e-07, + "loss": 0.2798, + "step": 38371 + }, + { + "epoch": 1.7975359535297701, + "grad_norm": 0.6365573775814549, + "learning_rate": 1.3317170670979208e-07, + "loss": 0.2704, + "step": 38372 + }, + { + "epoch": 1.7975827985196982, + "grad_norm": 0.6224837797891615, + "learning_rate": 1.331106415155159e-07, + "loss": 0.2723, + "step": 38373 + }, + { + "epoch": 1.7976296435096266, + "grad_norm": 0.5690488243161824, + "learning_rate": 1.330495899420936e-07, + "loss": 0.2639, + "step": 38374 + }, + { + "epoch": 1.797676488499555, + "grad_norm": 0.6286877800431945, + "learning_rate": 1.3298855198987548e-07, + "loss": 0.2561, + "step": 38375 + }, + { + "epoch": 1.7977233334894833, + "grad_norm": 0.5926836259633501, + "learning_rate": 1.3292752765921317e-07, + "loss": 0.2776, + "step": 38376 + }, + { + "epoch": 1.7977701784794116, + "grad_norm": 0.6201893433903841, + "learning_rate": 1.3286651695045838e-07, + "loss": 0.2811, + "step": 38377 + }, + { + "epoch": 1.79781702346934, + "grad_norm": 0.6009819083902838, + "learning_rate": 1.3280551986396162e-07, + "loss": 0.2688, + "step": 38378 + }, + { + "epoch": 1.7978638684592683, + "grad_norm": 0.5651061306634572, + "learning_rate": 1.3274453640007428e-07, + "loss": 0.2589, + "step": 38379 + }, + { + "epoch": 1.7979107134491965, + "grad_norm": 0.6283328000638099, + "learning_rate": 1.326835665591475e-07, + "loss": 0.272, + "step": 38380 + }, + { + "epoch": 1.797957558439125, + "grad_norm": 0.5764830463886775, + "learning_rate": 1.3262261034153123e-07, + "loss": 0.2522, + "step": 38381 + }, + { + "epoch": 1.7980044034290532, + "grad_norm": 0.5895292718400466, + "learning_rate": 1.3256166774757688e-07, + "loss": 0.2606, + "step": 38382 + }, + { + "epoch": 1.7980512484189815, + "grad_norm": 0.5940184148262008, + "learning_rate": 1.3250073877763504e-07, + "loss": 0.2642, + "step": 38383 + }, + { + "epoch": 1.79809809340891, + "grad_norm": 0.5541385792153176, + "learning_rate": 1.3243982343205648e-07, + "loss": 0.2581, + "step": 38384 + }, + { + "epoch": 1.7981449383988384, + "grad_norm": 0.6448030268295781, + "learning_rate": 1.323789217111915e-07, + "loss": 0.2897, + "step": 38385 + }, + { + "epoch": 1.7981917833887666, + "grad_norm": 0.5961671645798949, + "learning_rate": 1.3231803361539148e-07, + "loss": 0.2729, + "step": 38386 + }, + { + "epoch": 1.7982386283786949, + "grad_norm": 0.5841259383069823, + "learning_rate": 1.322571591450056e-07, + "loss": 0.264, + "step": 38387 + }, + { + "epoch": 1.7982854733686233, + "grad_norm": 0.5850250736744305, + "learning_rate": 1.3219629830038467e-07, + "loss": 0.2566, + "step": 38388 + }, + { + "epoch": 1.7983323183585516, + "grad_norm": 0.5729182688753075, + "learning_rate": 1.3213545108187868e-07, + "loss": 0.2614, + "step": 38389 + }, + { + "epoch": 1.7983791633484798, + "grad_norm": 0.6125238517980383, + "learning_rate": 1.3207461748983824e-07, + "loss": 0.2889, + "step": 38390 + }, + { + "epoch": 1.7984260083384083, + "grad_norm": 0.5977173620250107, + "learning_rate": 1.3201379752461302e-07, + "loss": 0.269, + "step": 38391 + }, + { + "epoch": 1.7984728533283365, + "grad_norm": 0.578010270178775, + "learning_rate": 1.3195299118655386e-07, + "loss": 0.2633, + "step": 38392 + }, + { + "epoch": 1.7985196983182647, + "grad_norm": 0.5611517610688863, + "learning_rate": 1.3189219847600936e-07, + "loss": 0.2584, + "step": 38393 + }, + { + "epoch": 1.7985665433081932, + "grad_norm": 0.5855358705045574, + "learning_rate": 1.3183141939333065e-07, + "loss": 0.2507, + "step": 38394 + }, + { + "epoch": 1.7986133882981217, + "grad_norm": 0.608874341824411, + "learning_rate": 1.3177065393886633e-07, + "loss": 0.2634, + "step": 38395 + }, + { + "epoch": 1.7986602332880497, + "grad_norm": 0.597707520456687, + "learning_rate": 1.3170990211296698e-07, + "loss": 0.2727, + "step": 38396 + }, + { + "epoch": 1.7987070782779782, + "grad_norm": 0.6086369764001638, + "learning_rate": 1.316491639159817e-07, + "loss": 0.2662, + "step": 38397 + }, + { + "epoch": 1.7987539232679066, + "grad_norm": 0.5497370169861179, + "learning_rate": 1.3158843934826027e-07, + "loss": 0.2398, + "step": 38398 + }, + { + "epoch": 1.7988007682578349, + "grad_norm": 0.6352298540252368, + "learning_rate": 1.315277284101524e-07, + "loss": 0.2579, + "step": 38399 + }, + { + "epoch": 1.798847613247763, + "grad_norm": 0.5866807198472548, + "learning_rate": 1.3146703110200694e-07, + "loss": 0.2679, + "step": 38400 + }, + { + "epoch": 1.7988944582376916, + "grad_norm": 0.5773932573447034, + "learning_rate": 1.3140634742417363e-07, + "loss": 0.2543, + "step": 38401 + }, + { + "epoch": 1.7989413032276198, + "grad_norm": 0.6231714711456625, + "learning_rate": 1.3134567737700111e-07, + "loss": 0.2668, + "step": 38402 + }, + { + "epoch": 1.798988148217548, + "grad_norm": 0.5658161513704685, + "learning_rate": 1.3128502096083878e-07, + "loss": 0.2617, + "step": 38403 + }, + { + "epoch": 1.7990349932074765, + "grad_norm": 0.6365067041486339, + "learning_rate": 1.3122437817603585e-07, + "loss": 0.2762, + "step": 38404 + }, + { + "epoch": 1.7990818381974047, + "grad_norm": 0.617681921388483, + "learning_rate": 1.3116374902294171e-07, + "loss": 0.2591, + "step": 38405 + }, + { + "epoch": 1.799128683187333, + "grad_norm": 0.5852759200211627, + "learning_rate": 1.3110313350190446e-07, + "loss": 0.2672, + "step": 38406 + }, + { + "epoch": 1.7991755281772615, + "grad_norm": 0.5640827807747198, + "learning_rate": 1.3104253161327325e-07, + "loss": 0.2532, + "step": 38407 + }, + { + "epoch": 1.79922237316719, + "grad_norm": 0.6394409275223062, + "learning_rate": 1.3098194335739722e-07, + "loss": 0.2766, + "step": 38408 + }, + { + "epoch": 1.799269218157118, + "grad_norm": 0.5894244660074459, + "learning_rate": 1.3092136873462418e-07, + "loss": 0.255, + "step": 38409 + }, + { + "epoch": 1.7993160631470464, + "grad_norm": 0.5634977982540024, + "learning_rate": 1.308608077453033e-07, + "loss": 0.2608, + "step": 38410 + }, + { + "epoch": 1.7993629081369749, + "grad_norm": 0.5856399137085911, + "learning_rate": 1.3080026038978344e-07, + "loss": 0.2756, + "step": 38411 + }, + { + "epoch": 1.799409753126903, + "grad_norm": 0.6155152125562658, + "learning_rate": 1.307397266684124e-07, + "loss": 0.2736, + "step": 38412 + }, + { + "epoch": 1.7994565981168313, + "grad_norm": 0.6044687726200215, + "learning_rate": 1.306792065815385e-07, + "loss": 0.2518, + "step": 38413 + }, + { + "epoch": 1.7995034431067598, + "grad_norm": 0.5560839149883742, + "learning_rate": 1.306187001295106e-07, + "loss": 0.2528, + "step": 38414 + }, + { + "epoch": 1.799550288096688, + "grad_norm": 0.5930977486190927, + "learning_rate": 1.3055820731267626e-07, + "loss": 0.2731, + "step": 38415 + }, + { + "epoch": 1.7995971330866163, + "grad_norm": 0.699737581723133, + "learning_rate": 1.3049772813138378e-07, + "loss": 0.2809, + "step": 38416 + }, + { + "epoch": 1.7996439780765447, + "grad_norm": 0.5707503285493931, + "learning_rate": 1.3043726258598204e-07, + "loss": 0.2611, + "step": 38417 + }, + { + "epoch": 1.799690823066473, + "grad_norm": 0.59978895742893, + "learning_rate": 1.3037681067681744e-07, + "loss": 0.2696, + "step": 38418 + }, + { + "epoch": 1.7997376680564012, + "grad_norm": 0.636707849214421, + "learning_rate": 1.3031637240423916e-07, + "loss": 0.2826, + "step": 38419 + }, + { + "epoch": 1.7997845130463297, + "grad_norm": 0.5729000684967372, + "learning_rate": 1.302559477685944e-07, + "loss": 0.266, + "step": 38420 + }, + { + "epoch": 1.7998313580362582, + "grad_norm": 0.6044950708758785, + "learning_rate": 1.3019553677023122e-07, + "loss": 0.2575, + "step": 38421 + }, + { + "epoch": 1.7998782030261864, + "grad_norm": 0.5973085424323751, + "learning_rate": 1.3013513940949686e-07, + "loss": 0.2725, + "step": 38422 + }, + { + "epoch": 1.7999250480161146, + "grad_norm": 0.6560285672736771, + "learning_rate": 1.300747556867396e-07, + "loss": 0.2836, + "step": 38423 + }, + { + "epoch": 1.799971893006043, + "grad_norm": 0.5930271572175968, + "learning_rate": 1.3001438560230616e-07, + "loss": 0.2777, + "step": 38424 + }, + { + "epoch": 1.8000187379959713, + "grad_norm": 0.6339707178672866, + "learning_rate": 1.2995402915654405e-07, + "loss": 0.2809, + "step": 38425 + }, + { + "epoch": 1.8000655829858996, + "grad_norm": 0.5558931951351072, + "learning_rate": 1.2989368634980098e-07, + "loss": 0.2646, + "step": 38426 + }, + { + "epoch": 1.800112427975828, + "grad_norm": 0.5965588206936402, + "learning_rate": 1.2983335718242397e-07, + "loss": 0.2833, + "step": 38427 + }, + { + "epoch": 1.8001592729657563, + "grad_norm": 0.5873255258443735, + "learning_rate": 1.2977304165476046e-07, + "loss": 0.2643, + "step": 38428 + }, + { + "epoch": 1.8002061179556845, + "grad_norm": 0.571179793606713, + "learning_rate": 1.2971273976715742e-07, + "loss": 0.2805, + "step": 38429 + }, + { + "epoch": 1.800252962945613, + "grad_norm": 0.6234320590411239, + "learning_rate": 1.2965245151996152e-07, + "loss": 0.2645, + "step": 38430 + }, + { + "epoch": 1.8002998079355415, + "grad_norm": 0.5711940447524307, + "learning_rate": 1.2959217691352029e-07, + "loss": 0.2632, + "step": 38431 + }, + { + "epoch": 1.8003466529254695, + "grad_norm": 0.6470518829105544, + "learning_rate": 1.2953191594817975e-07, + "loss": 0.2729, + "step": 38432 + }, + { + "epoch": 1.800393497915398, + "grad_norm": 0.5932909703214561, + "learning_rate": 1.294716686242878e-07, + "loss": 0.2726, + "step": 38433 + }, + { + "epoch": 1.8004403429053264, + "grad_norm": 0.5775410785358082, + "learning_rate": 1.2941143494219017e-07, + "loss": 0.2442, + "step": 38434 + }, + { + "epoch": 1.8004871878952546, + "grad_norm": 0.5899422939277678, + "learning_rate": 1.293512149022344e-07, + "loss": 0.2604, + "step": 38435 + }, + { + "epoch": 1.8005340328851829, + "grad_norm": 0.6179801934869176, + "learning_rate": 1.2929100850476662e-07, + "loss": 0.2682, + "step": 38436 + }, + { + "epoch": 1.8005808778751113, + "grad_norm": 0.5895010633996108, + "learning_rate": 1.2923081575013263e-07, + "loss": 0.2513, + "step": 38437 + }, + { + "epoch": 1.8006277228650396, + "grad_norm": 0.6022126168707184, + "learning_rate": 1.291706366386797e-07, + "loss": 0.2717, + "step": 38438 + }, + { + "epoch": 1.8006745678549678, + "grad_norm": 0.6138256084219139, + "learning_rate": 1.2911047117075386e-07, + "loss": 0.2844, + "step": 38439 + }, + { + "epoch": 1.8007214128448963, + "grad_norm": 0.602630183722802, + "learning_rate": 1.290503193467013e-07, + "loss": 0.28, + "step": 38440 + }, + { + "epoch": 1.8007682578348245, + "grad_norm": 0.5898921844652135, + "learning_rate": 1.289901811668684e-07, + "loss": 0.2726, + "step": 38441 + }, + { + "epoch": 1.8008151028247528, + "grad_norm": 0.5694713042859375, + "learning_rate": 1.2893005663160096e-07, + "loss": 0.2553, + "step": 38442 + }, + { + "epoch": 1.8008619478146812, + "grad_norm": 0.5692608807598005, + "learning_rate": 1.288699457412454e-07, + "loss": 0.2661, + "step": 38443 + }, + { + "epoch": 1.8009087928046097, + "grad_norm": 0.6188563673431994, + "learning_rate": 1.2880984849614696e-07, + "loss": 0.2884, + "step": 38444 + }, + { + "epoch": 1.8009556377945377, + "grad_norm": 0.5704210330663414, + "learning_rate": 1.287497648966518e-07, + "loss": 0.2781, + "step": 38445 + }, + { + "epoch": 1.8010024827844662, + "grad_norm": 0.5883545350387259, + "learning_rate": 1.2868969494310573e-07, + "loss": 0.2776, + "step": 38446 + }, + { + "epoch": 1.8010493277743946, + "grad_norm": 0.5921191680708884, + "learning_rate": 1.2862963863585516e-07, + "loss": 0.2681, + "step": 38447 + }, + { + "epoch": 1.8010961727643229, + "grad_norm": 0.5986442747879296, + "learning_rate": 1.285695959752442e-07, + "loss": 0.2713, + "step": 38448 + }, + { + "epoch": 1.8011430177542511, + "grad_norm": 0.5867702954202648, + "learning_rate": 1.2850956696161932e-07, + "loss": 0.2666, + "step": 38449 + }, + { + "epoch": 1.8011898627441796, + "grad_norm": 0.6028375946733, + "learning_rate": 1.2844955159532628e-07, + "loss": 0.2715, + "step": 38450 + }, + { + "epoch": 1.8012367077341078, + "grad_norm": 0.6212092796034099, + "learning_rate": 1.283895498767096e-07, + "loss": 0.2725, + "step": 38451 + }, + { + "epoch": 1.801283552724036, + "grad_norm": 0.5904207143427189, + "learning_rate": 1.2832956180611504e-07, + "loss": 0.2657, + "step": 38452 + }, + { + "epoch": 1.8013303977139645, + "grad_norm": 0.5794263355201523, + "learning_rate": 1.2826958738388794e-07, + "loss": 0.2732, + "step": 38453 + }, + { + "epoch": 1.8013772427038928, + "grad_norm": 0.5923152202797966, + "learning_rate": 1.28209626610373e-07, + "loss": 0.2706, + "step": 38454 + }, + { + "epoch": 1.801424087693821, + "grad_norm": 0.6087701658378318, + "learning_rate": 1.2814967948591577e-07, + "loss": 0.2754, + "step": 38455 + }, + { + "epoch": 1.8014709326837495, + "grad_norm": 0.5996564127383306, + "learning_rate": 1.2808974601086072e-07, + "loss": 0.2827, + "step": 38456 + }, + { + "epoch": 1.801517777673678, + "grad_norm": 0.576158078876884, + "learning_rate": 1.2802982618555339e-07, + "loss": 0.2599, + "step": 38457 + }, + { + "epoch": 1.8015646226636062, + "grad_norm": 0.6120168300237052, + "learning_rate": 1.2796992001033793e-07, + "loss": 0.2767, + "step": 38458 + }, + { + "epoch": 1.8016114676535344, + "grad_norm": 0.5826769938345954, + "learning_rate": 1.2791002748555963e-07, + "loss": 0.2629, + "step": 38459 + }, + { + "epoch": 1.8016583126434629, + "grad_norm": 0.6106299930664213, + "learning_rate": 1.278501486115627e-07, + "loss": 0.2642, + "step": 38460 + }, + { + "epoch": 1.8017051576333911, + "grad_norm": 0.6179073739153386, + "learning_rate": 1.2779028338869183e-07, + "loss": 0.2599, + "step": 38461 + }, + { + "epoch": 1.8017520026233194, + "grad_norm": 0.5893213088233066, + "learning_rate": 1.277304318172917e-07, + "loss": 0.2698, + "step": 38462 + }, + { + "epoch": 1.8017988476132478, + "grad_norm": 0.7181040677076277, + "learning_rate": 1.2767059389770713e-07, + "loss": 0.278, + "step": 38463 + }, + { + "epoch": 1.801845692603176, + "grad_norm": 0.5406762080382043, + "learning_rate": 1.276107696302817e-07, + "loss": 0.2568, + "step": 38464 + }, + { + "epoch": 1.8018925375931043, + "grad_norm": 0.5809106743536647, + "learning_rate": 1.275509590153598e-07, + "loss": 0.2628, + "step": 38465 + }, + { + "epoch": 1.8019393825830328, + "grad_norm": 0.5591861801939, + "learning_rate": 1.274911620532865e-07, + "loss": 0.2482, + "step": 38466 + }, + { + "epoch": 1.8019862275729612, + "grad_norm": 0.6134423903914746, + "learning_rate": 1.2743137874440452e-07, + "loss": 0.2558, + "step": 38467 + }, + { + "epoch": 1.8020330725628892, + "grad_norm": 0.6153936048683368, + "learning_rate": 1.2737160908905895e-07, + "loss": 0.2746, + "step": 38468 + }, + { + "epoch": 1.8020799175528177, + "grad_norm": 0.5583082153110467, + "learning_rate": 1.273118530875933e-07, + "loss": 0.2605, + "step": 38469 + }, + { + "epoch": 1.8021267625427462, + "grad_norm": 0.59423488606176, + "learning_rate": 1.272521107403521e-07, + "loss": 0.2697, + "step": 38470 + }, + { + "epoch": 1.8021736075326744, + "grad_norm": 0.586093906308464, + "learning_rate": 1.2719238204767808e-07, + "loss": 0.2725, + "step": 38471 + }, + { + "epoch": 1.8022204525226027, + "grad_norm": 0.5651462273756852, + "learning_rate": 1.2713266700991573e-07, + "loss": 0.263, + "step": 38472 + }, + { + "epoch": 1.8022672975125311, + "grad_norm": 0.6030092173414571, + "learning_rate": 1.2707296562740834e-07, + "loss": 0.2538, + "step": 38473 + }, + { + "epoch": 1.8023141425024594, + "grad_norm": 0.6129806948792065, + "learning_rate": 1.2701327790049984e-07, + "loss": 0.2586, + "step": 38474 + }, + { + "epoch": 1.8023609874923876, + "grad_norm": 0.608645169336044, + "learning_rate": 1.2695360382953325e-07, + "loss": 0.2763, + "step": 38475 + }, + { + "epoch": 1.802407832482316, + "grad_norm": 0.6551680419481071, + "learning_rate": 1.2689394341485223e-07, + "loss": 0.266, + "step": 38476 + }, + { + "epoch": 1.8024546774722443, + "grad_norm": 0.6283486110715166, + "learning_rate": 1.2683429665680036e-07, + "loss": 0.2578, + "step": 38477 + }, + { + "epoch": 1.8025015224621725, + "grad_norm": 0.5963855288675363, + "learning_rate": 1.267746635557207e-07, + "loss": 0.2829, + "step": 38478 + }, + { + "epoch": 1.802548367452101, + "grad_norm": 0.5724939747147546, + "learning_rate": 1.2671504411195606e-07, + "loss": 0.2643, + "step": 38479 + }, + { + "epoch": 1.8025952124420295, + "grad_norm": 0.6478720276135349, + "learning_rate": 1.2665543832585002e-07, + "loss": 0.2676, + "step": 38480 + }, + { + "epoch": 1.8026420574319575, + "grad_norm": 0.6030204444844666, + "learning_rate": 1.265958461977451e-07, + "loss": 0.2742, + "step": 38481 + }, + { + "epoch": 1.802688902421886, + "grad_norm": 0.5329996991380964, + "learning_rate": 1.2653626772798467e-07, + "loss": 0.2459, + "step": 38482 + }, + { + "epoch": 1.8027357474118144, + "grad_norm": 0.6031845050803654, + "learning_rate": 1.2647670291691145e-07, + "loss": 0.2666, + "step": 38483 + }, + { + "epoch": 1.8027825924017427, + "grad_norm": 0.6156480440840896, + "learning_rate": 1.264171517648688e-07, + "loss": 0.2757, + "step": 38484 + }, + { + "epoch": 1.802829437391671, + "grad_norm": 0.6126029094607015, + "learning_rate": 1.2635761427219867e-07, + "loss": 0.2662, + "step": 38485 + }, + { + "epoch": 1.8028762823815994, + "grad_norm": 0.5870242400307093, + "learning_rate": 1.2629809043924357e-07, + "loss": 0.2601, + "step": 38486 + }, + { + "epoch": 1.8029231273715276, + "grad_norm": 0.6461507897325618, + "learning_rate": 1.262385802663463e-07, + "loss": 0.2852, + "step": 38487 + }, + { + "epoch": 1.8029699723614558, + "grad_norm": 0.6264976781169654, + "learning_rate": 1.261790837538493e-07, + "loss": 0.2885, + "step": 38488 + }, + { + "epoch": 1.8030168173513843, + "grad_norm": 0.5765647755392627, + "learning_rate": 1.2611960090209512e-07, + "loss": 0.2584, + "step": 38489 + }, + { + "epoch": 1.8030636623413125, + "grad_norm": 0.5892358048301953, + "learning_rate": 1.2606013171142655e-07, + "loss": 0.2593, + "step": 38490 + }, + { + "epoch": 1.8031105073312408, + "grad_norm": 0.5979179280684779, + "learning_rate": 1.260006761821847e-07, + "loss": 0.277, + "step": 38491 + }, + { + "epoch": 1.8031573523211692, + "grad_norm": 0.5805107292351279, + "learning_rate": 1.259412343147126e-07, + "loss": 0.2537, + "step": 38492 + }, + { + "epoch": 1.8032041973110977, + "grad_norm": 0.6215362385804083, + "learning_rate": 1.258818061093517e-07, + "loss": 0.2612, + "step": 38493 + }, + { + "epoch": 1.803251042301026, + "grad_norm": 0.5811690165025146, + "learning_rate": 1.2582239156644417e-07, + "loss": 0.2731, + "step": 38494 + }, + { + "epoch": 1.8032978872909542, + "grad_norm": 0.6058491067230148, + "learning_rate": 1.2576299068633224e-07, + "loss": 0.2755, + "step": 38495 + }, + { + "epoch": 1.8033447322808827, + "grad_norm": 0.5842564501480445, + "learning_rate": 1.2570360346935788e-07, + "loss": 0.2515, + "step": 38496 + }, + { + "epoch": 1.803391577270811, + "grad_norm": 0.6484906576216636, + "learning_rate": 1.2564422991586223e-07, + "loss": 0.2782, + "step": 38497 + }, + { + "epoch": 1.8034384222607391, + "grad_norm": 0.5950373908073883, + "learning_rate": 1.2558487002618692e-07, + "loss": 0.266, + "step": 38498 + }, + { + "epoch": 1.8034852672506676, + "grad_norm": 0.6297201049698132, + "learning_rate": 1.2552552380067473e-07, + "loss": 0.289, + "step": 38499 + }, + { + "epoch": 1.8035321122405958, + "grad_norm": 0.6100438494920198, + "learning_rate": 1.254661912396657e-07, + "loss": 0.2744, + "step": 38500 + }, + { + "epoch": 1.803578957230524, + "grad_norm": 0.5691848133383002, + "learning_rate": 1.2540687234350203e-07, + "loss": 0.2474, + "step": 38501 + }, + { + "epoch": 1.8036258022204525, + "grad_norm": 0.6105503698068858, + "learning_rate": 1.253475671125251e-07, + "loss": 0.2689, + "step": 38502 + }, + { + "epoch": 1.803672647210381, + "grad_norm": 0.6254725787490513, + "learning_rate": 1.2528827554707608e-07, + "loss": 0.2752, + "step": 38503 + }, + { + "epoch": 1.803719492200309, + "grad_norm": 0.5806520352823769, + "learning_rate": 1.2522899764749602e-07, + "loss": 0.2693, + "step": 38504 + }, + { + "epoch": 1.8037663371902375, + "grad_norm": 0.5955322819957359, + "learning_rate": 1.251697334141261e-07, + "loss": 0.2629, + "step": 38505 + }, + { + "epoch": 1.803813182180166, + "grad_norm": 0.5935496150515518, + "learning_rate": 1.2511048284730793e-07, + "loss": 0.2547, + "step": 38506 + }, + { + "epoch": 1.8038600271700942, + "grad_norm": 0.5863029402970328, + "learning_rate": 1.2505124594738154e-07, + "loss": 0.2739, + "step": 38507 + }, + { + "epoch": 1.8039068721600224, + "grad_norm": 0.5988300738641251, + "learning_rate": 1.249920227146889e-07, + "loss": 0.2582, + "step": 38508 + }, + { + "epoch": 1.803953717149951, + "grad_norm": 0.615165167122043, + "learning_rate": 1.2493281314956972e-07, + "loss": 0.2771, + "step": 38509 + }, + { + "epoch": 1.8040005621398791, + "grad_norm": 0.6079897515043311, + "learning_rate": 1.2487361725236513e-07, + "loss": 0.2632, + "step": 38510 + }, + { + "epoch": 1.8040474071298074, + "grad_norm": 0.5671474302639556, + "learning_rate": 1.2481443502341624e-07, + "loss": 0.2668, + "step": 38511 + }, + { + "epoch": 1.8040942521197358, + "grad_norm": 0.6602700162447581, + "learning_rate": 1.247552664630633e-07, + "loss": 0.2826, + "step": 38512 + }, + { + "epoch": 1.804141097109664, + "grad_norm": 0.6296705652578903, + "learning_rate": 1.2469611157164663e-07, + "loss": 0.2823, + "step": 38513 + }, + { + "epoch": 1.8041879420995923, + "grad_norm": 0.635530730178548, + "learning_rate": 1.2463697034950734e-07, + "loss": 0.2838, + "step": 38514 + }, + { + "epoch": 1.8042347870895208, + "grad_norm": 0.551006405393528, + "learning_rate": 1.245778427969846e-07, + "loss": 0.2506, + "step": 38515 + }, + { + "epoch": 1.8042816320794492, + "grad_norm": 0.5802573669874654, + "learning_rate": 1.2451872891441952e-07, + "loss": 0.2661, + "step": 38516 + }, + { + "epoch": 1.8043284770693773, + "grad_norm": 0.6031040661004007, + "learning_rate": 1.244596287021521e-07, + "loss": 0.2638, + "step": 38517 + }, + { + "epoch": 1.8043753220593057, + "grad_norm": 0.6489524304757689, + "learning_rate": 1.244005421605224e-07, + "loss": 0.291, + "step": 38518 + }, + { + "epoch": 1.8044221670492342, + "grad_norm": 0.6842586788189374, + "learning_rate": 1.2434146928987063e-07, + "loss": 0.2885, + "step": 38519 + }, + { + "epoch": 1.8044690120391624, + "grad_norm": 0.6405803371611531, + "learning_rate": 1.2428241009053682e-07, + "loss": 0.2663, + "step": 38520 + }, + { + "epoch": 1.8045158570290907, + "grad_norm": 0.5517248037215582, + "learning_rate": 1.2422336456286044e-07, + "loss": 0.2612, + "step": 38521 + }, + { + "epoch": 1.8045627020190191, + "grad_norm": 0.5913729940850249, + "learning_rate": 1.241643327071812e-07, + "loss": 0.2576, + "step": 38522 + }, + { + "epoch": 1.8046095470089474, + "grad_norm": 0.5798293397749088, + "learning_rate": 1.2410531452383912e-07, + "loss": 0.2791, + "step": 38523 + }, + { + "epoch": 1.8046563919988756, + "grad_norm": 0.626814733530415, + "learning_rate": 1.2404631001317391e-07, + "loss": 0.2776, + "step": 38524 + }, + { + "epoch": 1.804703236988804, + "grad_norm": 0.6029810857557167, + "learning_rate": 1.2398731917552503e-07, + "loss": 0.2701, + "step": 38525 + }, + { + "epoch": 1.8047500819787323, + "grad_norm": 0.6135185930324288, + "learning_rate": 1.239283420112322e-07, + "loss": 0.2833, + "step": 38526 + }, + { + "epoch": 1.8047969269686606, + "grad_norm": 0.6462481083235428, + "learning_rate": 1.2386937852063463e-07, + "loss": 0.2976, + "step": 38527 + }, + { + "epoch": 1.804843771958589, + "grad_norm": 0.5823970346482452, + "learning_rate": 1.2381042870407116e-07, + "loss": 0.2688, + "step": 38528 + }, + { + "epoch": 1.8048906169485175, + "grad_norm": 0.6155154573515683, + "learning_rate": 1.2375149256188157e-07, + "loss": 0.2687, + "step": 38529 + }, + { + "epoch": 1.8049374619384457, + "grad_norm": 0.5479695754639746, + "learning_rate": 1.2369257009440473e-07, + "loss": 0.2496, + "step": 38530 + }, + { + "epoch": 1.804984306928374, + "grad_norm": 0.5658458539605369, + "learning_rate": 1.236336613019798e-07, + "loss": 0.2722, + "step": 38531 + }, + { + "epoch": 1.8050311519183024, + "grad_norm": 0.5630593092647403, + "learning_rate": 1.235747661849465e-07, + "loss": 0.2579, + "step": 38532 + }, + { + "epoch": 1.8050779969082307, + "grad_norm": 0.5504509311934759, + "learning_rate": 1.235158847436424e-07, + "loss": 0.2488, + "step": 38533 + }, + { + "epoch": 1.805124841898159, + "grad_norm": 0.5904627548964226, + "learning_rate": 1.2345701697840773e-07, + "loss": 0.2708, + "step": 38534 + }, + { + "epoch": 1.8051716868880874, + "grad_norm": 0.5811814353532745, + "learning_rate": 1.233981628895803e-07, + "loss": 0.2509, + "step": 38535 + }, + { + "epoch": 1.8052185318780156, + "grad_norm": 0.6062244152518426, + "learning_rate": 1.2333932247749896e-07, + "loss": 0.275, + "step": 38536 + }, + { + "epoch": 1.8052653768679439, + "grad_norm": 0.6471583197860549, + "learning_rate": 1.2328049574250238e-07, + "loss": 0.2763, + "step": 38537 + }, + { + "epoch": 1.8053122218578723, + "grad_norm": 0.6179632791170099, + "learning_rate": 1.2322168268492945e-07, + "loss": 0.2783, + "step": 38538 + }, + { + "epoch": 1.8053590668478008, + "grad_norm": 0.587607473215347, + "learning_rate": 1.231628833051185e-07, + "loss": 0.2768, + "step": 38539 + }, + { + "epoch": 1.8054059118377288, + "grad_norm": 0.594573926207886, + "learning_rate": 1.2310409760340758e-07, + "loss": 0.2734, + "step": 38540 + }, + { + "epoch": 1.8054527568276573, + "grad_norm": 0.6201607262573476, + "learning_rate": 1.2304532558013532e-07, + "loss": 0.2845, + "step": 38541 + }, + { + "epoch": 1.8054996018175857, + "grad_norm": 0.5971865923685902, + "learning_rate": 1.2298656723563952e-07, + "loss": 0.2702, + "step": 38542 + }, + { + "epoch": 1.805546446807514, + "grad_norm": 0.60420338170753, + "learning_rate": 1.229278225702585e-07, + "loss": 0.2615, + "step": 38543 + }, + { + "epoch": 1.8055932917974422, + "grad_norm": 0.6252052868789911, + "learning_rate": 1.2286909158433057e-07, + "loss": 0.2856, + "step": 38544 + }, + { + "epoch": 1.8056401367873707, + "grad_norm": 0.6101143608914951, + "learning_rate": 1.2281037427819387e-07, + "loss": 0.2807, + "step": 38545 + }, + { + "epoch": 1.805686981777299, + "grad_norm": 0.5747289547723883, + "learning_rate": 1.2275167065218557e-07, + "loss": 0.2668, + "step": 38546 + }, + { + "epoch": 1.8057338267672272, + "grad_norm": 0.6209608363001711, + "learning_rate": 1.2269298070664375e-07, + "loss": 0.2678, + "step": 38547 + }, + { + "epoch": 1.8057806717571556, + "grad_norm": 0.5683375637375703, + "learning_rate": 1.2263430444190678e-07, + "loss": 0.2531, + "step": 38548 + }, + { + "epoch": 1.8058275167470839, + "grad_norm": 0.5954492758494898, + "learning_rate": 1.2257564185831155e-07, + "loss": 0.2699, + "step": 38549 + }, + { + "epoch": 1.805874361737012, + "grad_norm": 0.6363337253282249, + "learning_rate": 1.2251699295619618e-07, + "loss": 0.2948, + "step": 38550 + }, + { + "epoch": 1.8059212067269406, + "grad_norm": 0.5783971027913135, + "learning_rate": 1.2245835773589814e-07, + "loss": 0.2569, + "step": 38551 + }, + { + "epoch": 1.805968051716869, + "grad_norm": 0.6147280813877266, + "learning_rate": 1.2239973619775414e-07, + "loss": 0.2665, + "step": 38552 + }, + { + "epoch": 1.806014896706797, + "grad_norm": 0.6112026741921343, + "learning_rate": 1.2234112834210222e-07, + "loss": 0.282, + "step": 38553 + }, + { + "epoch": 1.8060617416967255, + "grad_norm": 0.6252357181625205, + "learning_rate": 1.2228253416927987e-07, + "loss": 0.2681, + "step": 38554 + }, + { + "epoch": 1.806108586686654, + "grad_norm": 0.6053635254812242, + "learning_rate": 1.2222395367962354e-07, + "loss": 0.2689, + "step": 38555 + }, + { + "epoch": 1.8061554316765822, + "grad_norm": 0.6086441903411505, + "learning_rate": 1.2216538687347068e-07, + "loss": 0.2738, + "step": 38556 + }, + { + "epoch": 1.8062022766665105, + "grad_norm": 0.5805494563922, + "learning_rate": 1.2210683375115885e-07, + "loss": 0.2667, + "step": 38557 + }, + { + "epoch": 1.806249121656439, + "grad_norm": 0.5880737255241102, + "learning_rate": 1.220482943130241e-07, + "loss": 0.2638, + "step": 38558 + }, + { + "epoch": 1.8062959666463672, + "grad_norm": 0.5918452748139381, + "learning_rate": 1.2198976855940375e-07, + "loss": 0.2655, + "step": 38559 + }, + { + "epoch": 1.8063428116362954, + "grad_norm": 0.6108940042684035, + "learning_rate": 1.2193125649063469e-07, + "loss": 0.278, + "step": 38560 + }, + { + "epoch": 1.8063896566262239, + "grad_norm": 0.6125521504292074, + "learning_rate": 1.2187275810705363e-07, + "loss": 0.2715, + "step": 38561 + }, + { + "epoch": 1.806436501616152, + "grad_norm": 0.5832992982547087, + "learning_rate": 1.2181427340899692e-07, + "loss": 0.2752, + "step": 38562 + }, + { + "epoch": 1.8064833466060803, + "grad_norm": 0.6603978549113695, + "learning_rate": 1.2175580239680184e-07, + "loss": 0.2867, + "step": 38563 + }, + { + "epoch": 1.8065301915960088, + "grad_norm": 0.5552309690703244, + "learning_rate": 1.2169734507080366e-07, + "loss": 0.246, + "step": 38564 + }, + { + "epoch": 1.8065770365859373, + "grad_norm": 0.5645962267780413, + "learning_rate": 1.216389014313399e-07, + "loss": 0.2624, + "step": 38565 + }, + { + "epoch": 1.8066238815758655, + "grad_norm": 0.6098283945946066, + "learning_rate": 1.2158047147874635e-07, + "loss": 0.2709, + "step": 38566 + }, + { + "epoch": 1.8066707265657937, + "grad_norm": 0.6120098107221115, + "learning_rate": 1.215220552133592e-07, + "loss": 0.2686, + "step": 38567 + }, + { + "epoch": 1.8067175715557222, + "grad_norm": 0.6245422838246488, + "learning_rate": 1.2146365263551534e-07, + "loss": 0.2972, + "step": 38568 + }, + { + "epoch": 1.8067644165456505, + "grad_norm": 0.5778079719556342, + "learning_rate": 1.214052637455504e-07, + "loss": 0.2697, + "step": 38569 + }, + { + "epoch": 1.8068112615355787, + "grad_norm": 0.6200276795451863, + "learning_rate": 1.213468885437999e-07, + "loss": 0.2727, + "step": 38570 + }, + { + "epoch": 1.8068581065255072, + "grad_norm": 0.5480457868882572, + "learning_rate": 1.2128852703059996e-07, + "loss": 0.2393, + "step": 38571 + }, + { + "epoch": 1.8069049515154354, + "grad_norm": 0.592447945987567, + "learning_rate": 1.2123017920628698e-07, + "loss": 0.2785, + "step": 38572 + }, + { + "epoch": 1.8069517965053636, + "grad_norm": 0.6010246077869491, + "learning_rate": 1.2117184507119628e-07, + "loss": 0.2793, + "step": 38573 + }, + { + "epoch": 1.806998641495292, + "grad_norm": 0.5469444104646617, + "learning_rate": 1.2111352462566396e-07, + "loss": 0.2569, + "step": 38574 + }, + { + "epoch": 1.8070454864852206, + "grad_norm": 0.5991870675299051, + "learning_rate": 1.2105521787002556e-07, + "loss": 0.2681, + "step": 38575 + }, + { + "epoch": 1.8070923314751486, + "grad_norm": 0.621458497678896, + "learning_rate": 1.209969248046164e-07, + "loss": 0.2762, + "step": 38576 + }, + { + "epoch": 1.807139176465077, + "grad_norm": 0.6082996562410604, + "learning_rate": 1.2093864542977174e-07, + "loss": 0.2643, + "step": 38577 + }, + { + "epoch": 1.8071860214550055, + "grad_norm": 0.6094547295759317, + "learning_rate": 1.2088037974582718e-07, + "loss": 0.2589, + "step": 38578 + }, + { + "epoch": 1.8072328664449337, + "grad_norm": 0.6063902191541676, + "learning_rate": 1.2082212775311825e-07, + "loss": 0.2615, + "step": 38579 + }, + { + "epoch": 1.807279711434862, + "grad_norm": 0.6460153586761699, + "learning_rate": 1.207638894519797e-07, + "loss": 0.3013, + "step": 38580 + }, + { + "epoch": 1.8073265564247905, + "grad_norm": 0.5653745362370034, + "learning_rate": 1.2070566484274764e-07, + "loss": 0.2589, + "step": 38581 + }, + { + "epoch": 1.8073734014147187, + "grad_norm": 0.6028776258359226, + "learning_rate": 1.2064745392575572e-07, + "loss": 0.271, + "step": 38582 + }, + { + "epoch": 1.807420246404647, + "grad_norm": 0.6074278292426176, + "learning_rate": 1.2058925670134031e-07, + "loss": 0.289, + "step": 38583 + }, + { + "epoch": 1.8074670913945754, + "grad_norm": 0.6170530342449099, + "learning_rate": 1.205310731698353e-07, + "loss": 0.2676, + "step": 38584 + }, + { + "epoch": 1.8075139363845036, + "grad_norm": 0.5892814309952464, + "learning_rate": 1.2047290333157575e-07, + "loss": 0.2612, + "step": 38585 + }, + { + "epoch": 1.8075607813744319, + "grad_norm": 0.6094070441064412, + "learning_rate": 1.2041474718689689e-07, + "loss": 0.2778, + "step": 38586 + }, + { + "epoch": 1.8076076263643603, + "grad_norm": 0.5956136575248769, + "learning_rate": 1.203566047361332e-07, + "loss": 0.2645, + "step": 38587 + }, + { + "epoch": 1.8076544713542888, + "grad_norm": 0.5798294396646936, + "learning_rate": 1.2029847597961887e-07, + "loss": 0.2737, + "step": 38588 + }, + { + "epoch": 1.8077013163442168, + "grad_norm": 0.618708362529504, + "learning_rate": 1.202403609176886e-07, + "loss": 0.2962, + "step": 38589 + }, + { + "epoch": 1.8077481613341453, + "grad_norm": 0.6152218474579478, + "learning_rate": 1.2018225955067713e-07, + "loss": 0.2787, + "step": 38590 + }, + { + "epoch": 1.8077950063240737, + "grad_norm": 0.5903510241441021, + "learning_rate": 1.201241718789184e-07, + "loss": 0.2813, + "step": 38591 + }, + { + "epoch": 1.807841851314002, + "grad_norm": 0.5962079370674112, + "learning_rate": 1.200660979027471e-07, + "loss": 0.2686, + "step": 38592 + }, + { + "epoch": 1.8078886963039302, + "grad_norm": 0.6053376746440202, + "learning_rate": 1.200080376224974e-07, + "loss": 0.2968, + "step": 38593 + }, + { + "epoch": 1.8079355412938587, + "grad_norm": 0.6312619161872168, + "learning_rate": 1.1994999103850297e-07, + "loss": 0.2721, + "step": 38594 + }, + { + "epoch": 1.807982386283787, + "grad_norm": 0.6109618801665194, + "learning_rate": 1.1989195815109795e-07, + "loss": 0.2708, + "step": 38595 + }, + { + "epoch": 1.8080292312737152, + "grad_norm": 0.6025386605609762, + "learning_rate": 1.1983393896061678e-07, + "loss": 0.2786, + "step": 38596 + }, + { + "epoch": 1.8080760762636436, + "grad_norm": 0.5889924543722993, + "learning_rate": 1.1977593346739315e-07, + "loss": 0.2587, + "step": 38597 + }, + { + "epoch": 1.8081229212535719, + "grad_norm": 0.6303212482583628, + "learning_rate": 1.1971794167176059e-07, + "loss": 0.2655, + "step": 38598 + }, + { + "epoch": 1.8081697662435001, + "grad_norm": 0.6025464964227909, + "learning_rate": 1.1965996357405334e-07, + "loss": 0.2662, + "step": 38599 + }, + { + "epoch": 1.8082166112334286, + "grad_norm": 0.5820500591500244, + "learning_rate": 1.1960199917460447e-07, + "loss": 0.2704, + "step": 38600 + }, + { + "epoch": 1.808263456223357, + "grad_norm": 0.5681949769059959, + "learning_rate": 1.1954404847374756e-07, + "loss": 0.2473, + "step": 38601 + }, + { + "epoch": 1.8083103012132853, + "grad_norm": 0.6108040994848606, + "learning_rate": 1.1948611147181654e-07, + "loss": 0.2751, + "step": 38602 + }, + { + "epoch": 1.8083571462032135, + "grad_norm": 0.6203901046771426, + "learning_rate": 1.1942818816914504e-07, + "loss": 0.2699, + "step": 38603 + }, + { + "epoch": 1.808403991193142, + "grad_norm": 0.5844504071345197, + "learning_rate": 1.1937027856606555e-07, + "loss": 0.277, + "step": 38604 + }, + { + "epoch": 1.8084508361830702, + "grad_norm": 0.6064135266828247, + "learning_rate": 1.193123826629117e-07, + "loss": 0.2681, + "step": 38605 + }, + { + "epoch": 1.8084976811729985, + "grad_norm": 0.5815532945607088, + "learning_rate": 1.1925450046001709e-07, + "loss": 0.264, + "step": 38606 + }, + { + "epoch": 1.808544526162927, + "grad_norm": 0.6102090764426494, + "learning_rate": 1.1919663195771396e-07, + "loss": 0.2826, + "step": 38607 + }, + { + "epoch": 1.8085913711528552, + "grad_norm": 0.6061392037769039, + "learning_rate": 1.1913877715633599e-07, + "loss": 0.2653, + "step": 38608 + }, + { + "epoch": 1.8086382161427834, + "grad_norm": 0.6124589528222756, + "learning_rate": 1.190809360562159e-07, + "loss": 0.2795, + "step": 38609 + }, + { + "epoch": 1.8086850611327119, + "grad_norm": 0.5721779031092747, + "learning_rate": 1.1902310865768707e-07, + "loss": 0.2591, + "step": 38610 + }, + { + "epoch": 1.8087319061226403, + "grad_norm": 0.603713848659284, + "learning_rate": 1.1896529496108145e-07, + "loss": 0.2822, + "step": 38611 + }, + { + "epoch": 1.8087787511125684, + "grad_norm": 0.6210150376113027, + "learning_rate": 1.1890749496673237e-07, + "loss": 0.2727, + "step": 38612 + }, + { + "epoch": 1.8088255961024968, + "grad_norm": 0.5845384654021629, + "learning_rate": 1.188497086749718e-07, + "loss": 0.254, + "step": 38613 + }, + { + "epoch": 1.8088724410924253, + "grad_norm": 0.5822249892015184, + "learning_rate": 1.1879193608613282e-07, + "loss": 0.2776, + "step": 38614 + }, + { + "epoch": 1.8089192860823535, + "grad_norm": 0.6208628880357036, + "learning_rate": 1.1873417720054764e-07, + "loss": 0.2688, + "step": 38615 + }, + { + "epoch": 1.8089661310722818, + "grad_norm": 0.5665078343186495, + "learning_rate": 1.1867643201854878e-07, + "loss": 0.276, + "step": 38616 + }, + { + "epoch": 1.8090129760622102, + "grad_norm": 0.5793448496136031, + "learning_rate": 1.1861870054046876e-07, + "loss": 0.2662, + "step": 38617 + }, + { + "epoch": 1.8090598210521385, + "grad_norm": 0.5917939467069614, + "learning_rate": 1.1856098276663979e-07, + "loss": 0.2683, + "step": 38618 + }, + { + "epoch": 1.8091066660420667, + "grad_norm": 0.5287773776920587, + "learning_rate": 1.185032786973933e-07, + "loss": 0.2482, + "step": 38619 + }, + { + "epoch": 1.8091535110319952, + "grad_norm": 0.5779594484050108, + "learning_rate": 1.1844558833306208e-07, + "loss": 0.2619, + "step": 38620 + }, + { + "epoch": 1.8092003560219234, + "grad_norm": 0.6249597302526705, + "learning_rate": 1.1838791167397779e-07, + "loss": 0.2963, + "step": 38621 + }, + { + "epoch": 1.8092472010118517, + "grad_norm": 0.6054519156460414, + "learning_rate": 1.1833024872047238e-07, + "loss": 0.2696, + "step": 38622 + }, + { + "epoch": 1.8092940460017801, + "grad_norm": 0.5557531721174861, + "learning_rate": 1.1827259947287784e-07, + "loss": 0.2584, + "step": 38623 + }, + { + "epoch": 1.8093408909917086, + "grad_norm": 0.6253132055277117, + "learning_rate": 1.1821496393152638e-07, + "loss": 0.2727, + "step": 38624 + }, + { + "epoch": 1.8093877359816366, + "grad_norm": 0.5734696619236138, + "learning_rate": 1.1815734209674884e-07, + "loss": 0.2717, + "step": 38625 + }, + { + "epoch": 1.809434580971565, + "grad_norm": 0.6019236716441301, + "learning_rate": 1.1809973396887692e-07, + "loss": 0.2725, + "step": 38626 + }, + { + "epoch": 1.8094814259614935, + "grad_norm": 0.5683119953758118, + "learning_rate": 1.1804213954824256e-07, + "loss": 0.2583, + "step": 38627 + }, + { + "epoch": 1.8095282709514218, + "grad_norm": 0.6207090716712401, + "learning_rate": 1.179845588351769e-07, + "loss": 0.2819, + "step": 38628 + }, + { + "epoch": 1.80957511594135, + "grad_norm": 0.6283724433070949, + "learning_rate": 1.1792699183001133e-07, + "loss": 0.2843, + "step": 38629 + }, + { + "epoch": 1.8096219609312785, + "grad_norm": 0.5994239697223567, + "learning_rate": 1.1786943853307753e-07, + "loss": 0.2561, + "step": 38630 + }, + { + "epoch": 1.8096688059212067, + "grad_norm": 0.59644726621446, + "learning_rate": 1.1781189894470607e-07, + "loss": 0.2738, + "step": 38631 + }, + { + "epoch": 1.809715650911135, + "grad_norm": 0.6133747005218321, + "learning_rate": 1.1775437306522891e-07, + "loss": 0.2657, + "step": 38632 + }, + { + "epoch": 1.8097624959010634, + "grad_norm": 0.6076479075299196, + "learning_rate": 1.1769686089497606e-07, + "loss": 0.2583, + "step": 38633 + }, + { + "epoch": 1.8098093408909917, + "grad_norm": 0.5270378626264295, + "learning_rate": 1.1763936243427921e-07, + "loss": 0.2346, + "step": 38634 + }, + { + "epoch": 1.80985618588092, + "grad_norm": 0.5626095155347579, + "learning_rate": 1.175818776834689e-07, + "loss": 0.2629, + "step": 38635 + }, + { + "epoch": 1.8099030308708484, + "grad_norm": 0.6070858206865206, + "learning_rate": 1.1752440664287629e-07, + "loss": 0.2678, + "step": 38636 + }, + { + "epoch": 1.8099498758607768, + "grad_norm": 0.5962605006274043, + "learning_rate": 1.1746694931283165e-07, + "loss": 0.2539, + "step": 38637 + }, + { + "epoch": 1.809996720850705, + "grad_norm": 0.5739501261274813, + "learning_rate": 1.1740950569366582e-07, + "loss": 0.2631, + "step": 38638 + }, + { + "epoch": 1.8100435658406333, + "grad_norm": 0.6152034803510387, + "learning_rate": 1.1735207578570996e-07, + "loss": 0.2687, + "step": 38639 + }, + { + "epoch": 1.8100904108305618, + "grad_norm": 0.6015797265192795, + "learning_rate": 1.1729465958929375e-07, + "loss": 0.2631, + "step": 38640 + }, + { + "epoch": 1.81013725582049, + "grad_norm": 0.5873560019322364, + "learning_rate": 1.1723725710474781e-07, + "loss": 0.28, + "step": 38641 + }, + { + "epoch": 1.8101841008104183, + "grad_norm": 0.6132439255666493, + "learning_rate": 1.1717986833240297e-07, + "loss": 0.281, + "step": 38642 + }, + { + "epoch": 1.8102309458003467, + "grad_norm": 0.6116017748806857, + "learning_rate": 1.171224932725884e-07, + "loss": 0.2744, + "step": 38643 + }, + { + "epoch": 1.810277790790275, + "grad_norm": 0.5917164038934942, + "learning_rate": 1.1706513192563551e-07, + "loss": 0.272, + "step": 38644 + }, + { + "epoch": 1.8103246357802032, + "grad_norm": 0.5814006070405355, + "learning_rate": 1.1700778429187376e-07, + "loss": 0.2597, + "step": 38645 + }, + { + "epoch": 1.8103714807701317, + "grad_norm": 0.590822560892829, + "learning_rate": 1.1695045037163317e-07, + "loss": 0.2763, + "step": 38646 + }, + { + "epoch": 1.8104183257600601, + "grad_norm": 0.5502580971060388, + "learning_rate": 1.1689313016524374e-07, + "loss": 0.2608, + "step": 38647 + }, + { + "epoch": 1.8104651707499881, + "grad_norm": 0.5880188051030057, + "learning_rate": 1.1683582367303547e-07, + "loss": 0.259, + "step": 38648 + }, + { + "epoch": 1.8105120157399166, + "grad_norm": 0.6337543542784543, + "learning_rate": 1.1677853089533814e-07, + "loss": 0.2849, + "step": 38649 + }, + { + "epoch": 1.810558860729845, + "grad_norm": 0.5884763630655947, + "learning_rate": 1.1672125183248117e-07, + "loss": 0.2587, + "step": 38650 + }, + { + "epoch": 1.8106057057197733, + "grad_norm": 0.617204590389752, + "learning_rate": 1.1666398648479432e-07, + "loss": 0.2637, + "step": 38651 + }, + { + "epoch": 1.8106525507097015, + "grad_norm": 0.5787324999994664, + "learning_rate": 1.166067348526076e-07, + "loss": 0.2691, + "step": 38652 + }, + { + "epoch": 1.81069939569963, + "grad_norm": 0.5917593229766643, + "learning_rate": 1.1654949693624962e-07, + "loss": 0.278, + "step": 38653 + }, + { + "epoch": 1.8107462406895583, + "grad_norm": 0.6026840893353945, + "learning_rate": 1.1649227273605068e-07, + "loss": 0.2765, + "step": 38654 + }, + { + "epoch": 1.8107930856794865, + "grad_norm": 0.6242728246020544, + "learning_rate": 1.1643506225233913e-07, + "loss": 0.2763, + "step": 38655 + }, + { + "epoch": 1.810839930669415, + "grad_norm": 0.5795401689219276, + "learning_rate": 1.16377865485445e-07, + "loss": 0.2598, + "step": 38656 + }, + { + "epoch": 1.8108867756593432, + "grad_norm": 0.6100881345410013, + "learning_rate": 1.1632068243569688e-07, + "loss": 0.2699, + "step": 38657 + }, + { + "epoch": 1.8109336206492714, + "grad_norm": 0.6055254153228429, + "learning_rate": 1.1626351310342427e-07, + "loss": 0.2814, + "step": 38658 + }, + { + "epoch": 1.8109804656392, + "grad_norm": 0.6458617863941629, + "learning_rate": 1.162063574889563e-07, + "loss": 0.2839, + "step": 38659 + }, + { + "epoch": 1.8110273106291284, + "grad_norm": 0.620859045816916, + "learning_rate": 1.1614921559262165e-07, + "loss": 0.277, + "step": 38660 + }, + { + "epoch": 1.8110741556190564, + "grad_norm": 0.5681633281559567, + "learning_rate": 1.1609208741474892e-07, + "loss": 0.2637, + "step": 38661 + }, + { + "epoch": 1.8111210006089848, + "grad_norm": 0.6019821895911645, + "learning_rate": 1.1603497295566673e-07, + "loss": 0.252, + "step": 38662 + }, + { + "epoch": 1.8111678455989133, + "grad_norm": 0.5825686969782685, + "learning_rate": 1.1597787221570428e-07, + "loss": 0.268, + "step": 38663 + }, + { + "epoch": 1.8112146905888415, + "grad_norm": 0.596294971008741, + "learning_rate": 1.159207851951899e-07, + "loss": 0.2647, + "step": 38664 + }, + { + "epoch": 1.8112615355787698, + "grad_norm": 0.5789293007293417, + "learning_rate": 1.1586371189445223e-07, + "loss": 0.2682, + "step": 38665 + }, + { + "epoch": 1.8113083805686983, + "grad_norm": 0.5445154090943732, + "learning_rate": 1.1580665231381988e-07, + "loss": 0.2474, + "step": 38666 + }, + { + "epoch": 1.8113552255586265, + "grad_norm": 0.6175567756326561, + "learning_rate": 1.1574960645362122e-07, + "loss": 0.2665, + "step": 38667 + }, + { + "epoch": 1.8114020705485547, + "grad_norm": 0.6136378408623423, + "learning_rate": 1.1569257431418402e-07, + "loss": 0.2778, + "step": 38668 + }, + { + "epoch": 1.8114489155384832, + "grad_norm": 0.60253592962761, + "learning_rate": 1.1563555589583663e-07, + "loss": 0.2811, + "step": 38669 + }, + { + "epoch": 1.8114957605284114, + "grad_norm": 0.605826141153534, + "learning_rate": 1.1557855119890715e-07, + "loss": 0.2788, + "step": 38670 + }, + { + "epoch": 1.8115426055183397, + "grad_norm": 0.6000736804952465, + "learning_rate": 1.155215602237239e-07, + "loss": 0.2658, + "step": 38671 + }, + { + "epoch": 1.8115894505082681, + "grad_norm": 0.6150443947020656, + "learning_rate": 1.1546458297061496e-07, + "loss": 0.274, + "step": 38672 + }, + { + "epoch": 1.8116362954981966, + "grad_norm": 0.5752585049166964, + "learning_rate": 1.1540761943990814e-07, + "loss": 0.2479, + "step": 38673 + }, + { + "epoch": 1.8116831404881248, + "grad_norm": 0.5498169746057532, + "learning_rate": 1.153506696319312e-07, + "loss": 0.2504, + "step": 38674 + }, + { + "epoch": 1.811729985478053, + "grad_norm": 0.6077055001822231, + "learning_rate": 1.1529373354701168e-07, + "loss": 0.2689, + "step": 38675 + }, + { + "epoch": 1.8117768304679815, + "grad_norm": 0.5589434208580608, + "learning_rate": 1.1523681118547708e-07, + "loss": 0.2609, + "step": 38676 + }, + { + "epoch": 1.8118236754579098, + "grad_norm": 0.6304593509139057, + "learning_rate": 1.151799025476552e-07, + "loss": 0.266, + "step": 38677 + }, + { + "epoch": 1.811870520447838, + "grad_norm": 0.59451215778334, + "learning_rate": 1.1512300763387385e-07, + "loss": 0.2535, + "step": 38678 + }, + { + "epoch": 1.8119173654377665, + "grad_norm": 0.6004724208528944, + "learning_rate": 1.1506612644446025e-07, + "loss": 0.2706, + "step": 38679 + }, + { + "epoch": 1.8119642104276947, + "grad_norm": 0.6075669330550264, + "learning_rate": 1.1500925897974136e-07, + "loss": 0.2752, + "step": 38680 + }, + { + "epoch": 1.812011055417623, + "grad_norm": 0.5953692487272042, + "learning_rate": 1.1495240524004526e-07, + "loss": 0.2689, + "step": 38681 + }, + { + "epoch": 1.8120579004075514, + "grad_norm": 0.5788854721179875, + "learning_rate": 1.1489556522569805e-07, + "loss": 0.265, + "step": 38682 + }, + { + "epoch": 1.81210474539748, + "grad_norm": 0.5584660494672747, + "learning_rate": 1.1483873893702757e-07, + "loss": 0.2629, + "step": 38683 + }, + { + "epoch": 1.812151590387408, + "grad_norm": 0.5894431438910374, + "learning_rate": 1.1478192637436048e-07, + "loss": 0.2667, + "step": 38684 + }, + { + "epoch": 1.8121984353773364, + "grad_norm": 0.9225334560401202, + "learning_rate": 1.1472512753802456e-07, + "loss": 0.2824, + "step": 38685 + }, + { + "epoch": 1.8122452803672648, + "grad_norm": 0.6620999754702507, + "learning_rate": 1.146683424283454e-07, + "loss": 0.2869, + "step": 38686 + }, + { + "epoch": 1.812292125357193, + "grad_norm": 0.6289485862553823, + "learning_rate": 1.1461157104565051e-07, + "loss": 0.2643, + "step": 38687 + }, + { + "epoch": 1.8123389703471213, + "grad_norm": 0.5891203582273431, + "learning_rate": 1.1455481339026658e-07, + "loss": 0.2686, + "step": 38688 + }, + { + "epoch": 1.8123858153370498, + "grad_norm": 0.6200492857633568, + "learning_rate": 1.1449806946252001e-07, + "loss": 0.2663, + "step": 38689 + }, + { + "epoch": 1.812432660326978, + "grad_norm": 0.6208430497685786, + "learning_rate": 1.1444133926273748e-07, + "loss": 0.2795, + "step": 38690 + }, + { + "epoch": 1.8124795053169063, + "grad_norm": 0.5941709132202461, + "learning_rate": 1.1438462279124568e-07, + "loss": 0.2584, + "step": 38691 + }, + { + "epoch": 1.8125263503068347, + "grad_norm": 0.5566260664143979, + "learning_rate": 1.1432792004837073e-07, + "loss": 0.2665, + "step": 38692 + }, + { + "epoch": 1.812573195296763, + "grad_norm": 0.6146428013665227, + "learning_rate": 1.1427123103443877e-07, + "loss": 0.2536, + "step": 38693 + }, + { + "epoch": 1.8126200402866912, + "grad_norm": 0.6054223522058778, + "learning_rate": 1.1421455574977647e-07, + "loss": 0.2944, + "step": 38694 + }, + { + "epoch": 1.8126668852766197, + "grad_norm": 0.6091276845232473, + "learning_rate": 1.1415789419470968e-07, + "loss": 0.2658, + "step": 38695 + }, + { + "epoch": 1.8127137302665481, + "grad_norm": 0.5633942833397384, + "learning_rate": 1.1410124636956426e-07, + "loss": 0.2631, + "step": 38696 + }, + { + "epoch": 1.8127605752564762, + "grad_norm": 0.6349467052297851, + "learning_rate": 1.1404461227466717e-07, + "loss": 0.2685, + "step": 38697 + }, + { + "epoch": 1.8128074202464046, + "grad_norm": 0.6144864448377259, + "learning_rate": 1.1398799191034315e-07, + "loss": 0.2806, + "step": 38698 + }, + { + "epoch": 1.812854265236333, + "grad_norm": 0.6025887803462452, + "learning_rate": 1.1393138527691832e-07, + "loss": 0.2648, + "step": 38699 + }, + { + "epoch": 1.8129011102262613, + "grad_norm": 0.6058157944677729, + "learning_rate": 1.1387479237471882e-07, + "loss": 0.268, + "step": 38700 + }, + { + "epoch": 1.8129479552161896, + "grad_norm": 0.5806679334920289, + "learning_rate": 1.138182132040705e-07, + "loss": 0.2778, + "step": 38701 + }, + { + "epoch": 1.812994800206118, + "grad_norm": 0.572098884144273, + "learning_rate": 1.1376164776529836e-07, + "loss": 0.2566, + "step": 38702 + }, + { + "epoch": 1.8130416451960463, + "grad_norm": 0.6610828582346667, + "learning_rate": 1.1370509605872854e-07, + "loss": 0.2848, + "step": 38703 + }, + { + "epoch": 1.8130884901859745, + "grad_norm": 0.6213406105122623, + "learning_rate": 1.1364855808468578e-07, + "loss": 0.2773, + "step": 38704 + }, + { + "epoch": 1.813135335175903, + "grad_norm": 0.5592932401478692, + "learning_rate": 1.1359203384349565e-07, + "loss": 0.2512, + "step": 38705 + }, + { + "epoch": 1.8131821801658312, + "grad_norm": 0.6315662842505109, + "learning_rate": 1.1353552333548346e-07, + "loss": 0.2882, + "step": 38706 + }, + { + "epoch": 1.8132290251557595, + "grad_norm": 0.5903511535201144, + "learning_rate": 1.1347902656097476e-07, + "loss": 0.2604, + "step": 38707 + }, + { + "epoch": 1.813275870145688, + "grad_norm": 0.6244059536382879, + "learning_rate": 1.1342254352029459e-07, + "loss": 0.2652, + "step": 38708 + }, + { + "epoch": 1.8133227151356164, + "grad_norm": 0.5737678976728554, + "learning_rate": 1.1336607421376794e-07, + "loss": 0.272, + "step": 38709 + }, + { + "epoch": 1.8133695601255446, + "grad_norm": 0.6237813173835682, + "learning_rate": 1.133096186417193e-07, + "loss": 0.2776, + "step": 38710 + }, + { + "epoch": 1.8134164051154729, + "grad_norm": 0.6564753622030277, + "learning_rate": 1.1325317680447395e-07, + "loss": 0.2774, + "step": 38711 + }, + { + "epoch": 1.8134632501054013, + "grad_norm": 0.6024752926938257, + "learning_rate": 1.1319674870235692e-07, + "loss": 0.2646, + "step": 38712 + }, + { + "epoch": 1.8135100950953296, + "grad_norm": 0.5813322066557999, + "learning_rate": 1.1314033433569238e-07, + "loss": 0.2674, + "step": 38713 + }, + { + "epoch": 1.8135569400852578, + "grad_norm": 0.6278986625769345, + "learning_rate": 1.1308393370480536e-07, + "loss": 0.278, + "step": 38714 + }, + { + "epoch": 1.8136037850751863, + "grad_norm": 0.5778334931561199, + "learning_rate": 1.1302754681002087e-07, + "loss": 0.2602, + "step": 38715 + }, + { + "epoch": 1.8136506300651145, + "grad_norm": 0.5763614716487224, + "learning_rate": 1.1297117365166255e-07, + "loss": 0.2597, + "step": 38716 + }, + { + "epoch": 1.8136974750550428, + "grad_norm": 0.5823624571744096, + "learning_rate": 1.1291481423005513e-07, + "loss": 0.2533, + "step": 38717 + }, + { + "epoch": 1.8137443200449712, + "grad_norm": 0.5562728730510941, + "learning_rate": 1.1285846854552307e-07, + "loss": 0.2623, + "step": 38718 + }, + { + "epoch": 1.8137911650348997, + "grad_norm": 0.6179413642853651, + "learning_rate": 1.1280213659839029e-07, + "loss": 0.2673, + "step": 38719 + }, + { + "epoch": 1.8138380100248277, + "grad_norm": 0.5908767929264281, + "learning_rate": 1.1274581838898124e-07, + "loss": 0.2676, + "step": 38720 + }, + { + "epoch": 1.8138848550147562, + "grad_norm": 0.5752436414950006, + "learning_rate": 1.1268951391762039e-07, + "loss": 0.2622, + "step": 38721 + }, + { + "epoch": 1.8139317000046846, + "grad_norm": 0.6205274640426263, + "learning_rate": 1.1263322318463111e-07, + "loss": 0.2714, + "step": 38722 + }, + { + "epoch": 1.8139785449946129, + "grad_norm": 0.6035742632033739, + "learning_rate": 1.1257694619033782e-07, + "loss": 0.2653, + "step": 38723 + }, + { + "epoch": 1.814025389984541, + "grad_norm": 0.6011653937818572, + "learning_rate": 1.1252068293506391e-07, + "loss": 0.2637, + "step": 38724 + }, + { + "epoch": 1.8140722349744696, + "grad_norm": 0.5850236731616792, + "learning_rate": 1.1246443341913327e-07, + "loss": 0.279, + "step": 38725 + }, + { + "epoch": 1.8141190799643978, + "grad_norm": 0.6127235913374038, + "learning_rate": 1.1240819764286981e-07, + "loss": 0.2864, + "step": 38726 + }, + { + "epoch": 1.814165924954326, + "grad_norm": 0.6038105667810034, + "learning_rate": 1.1235197560659744e-07, + "loss": 0.2593, + "step": 38727 + }, + { + "epoch": 1.8142127699442545, + "grad_norm": 0.556958478476123, + "learning_rate": 1.1229576731063896e-07, + "loss": 0.2657, + "step": 38728 + }, + { + "epoch": 1.8142596149341828, + "grad_norm": 0.5763460886484449, + "learning_rate": 1.12239572755318e-07, + "loss": 0.2727, + "step": 38729 + }, + { + "epoch": 1.814306459924111, + "grad_norm": 0.5778122038008762, + "learning_rate": 1.1218339194095873e-07, + "loss": 0.2566, + "step": 38730 + }, + { + "epoch": 1.8143533049140395, + "grad_norm": 0.6067434716829136, + "learning_rate": 1.1212722486788368e-07, + "loss": 0.2767, + "step": 38731 + }, + { + "epoch": 1.814400149903968, + "grad_norm": 0.612895417213469, + "learning_rate": 1.1207107153641594e-07, + "loss": 0.2759, + "step": 38732 + }, + { + "epoch": 1.814446994893896, + "grad_norm": 0.5560896161912704, + "learning_rate": 1.1201493194687968e-07, + "loss": 0.246, + "step": 38733 + }, + { + "epoch": 1.8144938398838244, + "grad_norm": 0.6392770954485356, + "learning_rate": 1.1195880609959658e-07, + "loss": 0.2833, + "step": 38734 + }, + { + "epoch": 1.8145406848737529, + "grad_norm": 0.6590295178753603, + "learning_rate": 1.1190269399489056e-07, + "loss": 0.2867, + "step": 38735 + }, + { + "epoch": 1.814587529863681, + "grad_norm": 0.6124469197157096, + "learning_rate": 1.1184659563308442e-07, + "loss": 0.2616, + "step": 38736 + }, + { + "epoch": 1.8146343748536093, + "grad_norm": 0.5675989772821096, + "learning_rate": 1.1179051101450095e-07, + "loss": 0.2643, + "step": 38737 + }, + { + "epoch": 1.8146812198435378, + "grad_norm": 0.6034706696280752, + "learning_rate": 1.1173444013946266e-07, + "loss": 0.2611, + "step": 38738 + }, + { + "epoch": 1.814728064833466, + "grad_norm": 0.5789436842484654, + "learning_rate": 1.1167838300829236e-07, + "loss": 0.2639, + "step": 38739 + }, + { + "epoch": 1.8147749098233943, + "grad_norm": 0.5823239152200633, + "learning_rate": 1.1162233962131314e-07, + "loss": 0.264, + "step": 38740 + }, + { + "epoch": 1.8148217548133228, + "grad_norm": 0.6168389923152572, + "learning_rate": 1.1156630997884666e-07, + "loss": 0.2606, + "step": 38741 + }, + { + "epoch": 1.814868599803251, + "grad_norm": 0.5679532272158712, + "learning_rate": 1.1151029408121572e-07, + "loss": 0.2709, + "step": 38742 + }, + { + "epoch": 1.8149154447931792, + "grad_norm": 0.6227134509276817, + "learning_rate": 1.1145429192874285e-07, + "loss": 0.2719, + "step": 38743 + }, + { + "epoch": 1.8149622897831077, + "grad_norm": 0.5831845290698806, + "learning_rate": 1.1139830352175029e-07, + "loss": 0.248, + "step": 38744 + }, + { + "epoch": 1.8150091347730362, + "grad_norm": 0.6216138759633003, + "learning_rate": 1.1134232886055974e-07, + "loss": 0.2821, + "step": 38745 + }, + { + "epoch": 1.8150559797629644, + "grad_norm": 0.5897786419400868, + "learning_rate": 1.1128636794549424e-07, + "loss": 0.2597, + "step": 38746 + }, + { + "epoch": 1.8151028247528926, + "grad_norm": 0.6070917465085665, + "learning_rate": 1.1123042077687495e-07, + "loss": 0.2669, + "step": 38747 + }, + { + "epoch": 1.815149669742821, + "grad_norm": 0.6245570821911738, + "learning_rate": 1.1117448735502413e-07, + "loss": 0.277, + "step": 38748 + }, + { + "epoch": 1.8151965147327493, + "grad_norm": 0.5848775004761269, + "learning_rate": 1.1111856768026396e-07, + "loss": 0.2664, + "step": 38749 + }, + { + "epoch": 1.8152433597226776, + "grad_norm": 0.5590697597966958, + "learning_rate": 1.1106266175291619e-07, + "loss": 0.2567, + "step": 38750 + }, + { + "epoch": 1.815290204712606, + "grad_norm": 0.6047725667309353, + "learning_rate": 1.1100676957330192e-07, + "loss": 0.2759, + "step": 38751 + }, + { + "epoch": 1.8153370497025343, + "grad_norm": 0.601465658172591, + "learning_rate": 1.1095089114174367e-07, + "loss": 0.261, + "step": 38752 + }, + { + "epoch": 1.8153838946924625, + "grad_norm": 0.5871123748297136, + "learning_rate": 1.1089502645856232e-07, + "loss": 0.2639, + "step": 38753 + }, + { + "epoch": 1.815430739682391, + "grad_norm": 0.5535984963762278, + "learning_rate": 1.1083917552407952e-07, + "loss": 0.2592, + "step": 38754 + }, + { + "epoch": 1.8154775846723195, + "grad_norm": 0.5780309877063936, + "learning_rate": 1.1078333833861671e-07, + "loss": 0.2863, + "step": 38755 + }, + { + "epoch": 1.8155244296622475, + "grad_norm": 0.6477005937520627, + "learning_rate": 1.1072751490249556e-07, + "loss": 0.2795, + "step": 38756 + }, + { + "epoch": 1.815571274652176, + "grad_norm": 0.6209689348310853, + "learning_rate": 1.1067170521603693e-07, + "loss": 0.2749, + "step": 38757 + }, + { + "epoch": 1.8156181196421044, + "grad_norm": 0.5787489421459142, + "learning_rate": 1.106159092795625e-07, + "loss": 0.2714, + "step": 38758 + }, + { + "epoch": 1.8156649646320326, + "grad_norm": 0.5846083846550542, + "learning_rate": 1.1056012709339231e-07, + "loss": 0.2658, + "step": 38759 + }, + { + "epoch": 1.8157118096219609, + "grad_norm": 0.611654703723256, + "learning_rate": 1.1050435865784831e-07, + "loss": 0.2824, + "step": 38760 + }, + { + "epoch": 1.8157586546118893, + "grad_norm": 0.5945254606818525, + "learning_rate": 1.1044860397325107e-07, + "loss": 0.2549, + "step": 38761 + }, + { + "epoch": 1.8158054996018176, + "grad_norm": 0.5786365990650462, + "learning_rate": 1.1039286303992148e-07, + "loss": 0.2717, + "step": 38762 + }, + { + "epoch": 1.8158523445917458, + "grad_norm": 0.6188971899585356, + "learning_rate": 1.1033713585818034e-07, + "loss": 0.2851, + "step": 38763 + }, + { + "epoch": 1.8158991895816743, + "grad_norm": 0.5882510184620804, + "learning_rate": 1.1028142242834883e-07, + "loss": 0.2533, + "step": 38764 + }, + { + "epoch": 1.8159460345716025, + "grad_norm": 0.6150569432659002, + "learning_rate": 1.1022572275074695e-07, + "loss": 0.2727, + "step": 38765 + }, + { + "epoch": 1.8159928795615308, + "grad_norm": 0.5680848748974648, + "learning_rate": 1.10170036825695e-07, + "loss": 0.2613, + "step": 38766 + }, + { + "epoch": 1.8160397245514592, + "grad_norm": 0.6155286821994553, + "learning_rate": 1.1011436465351411e-07, + "loss": 0.2833, + "step": 38767 + }, + { + "epoch": 1.8160865695413877, + "grad_norm": 0.5989228286453377, + "learning_rate": 1.1005870623452403e-07, + "loss": 0.2695, + "step": 38768 + }, + { + "epoch": 1.8161334145313157, + "grad_norm": 0.6293011931973141, + "learning_rate": 1.1000306156904561e-07, + "loss": 0.2683, + "step": 38769 + }, + { + "epoch": 1.8161802595212442, + "grad_norm": 0.5866776632512419, + "learning_rate": 1.0994743065739915e-07, + "loss": 0.2652, + "step": 38770 + }, + { + "epoch": 1.8162271045111726, + "grad_norm": 0.5898442890417954, + "learning_rate": 1.0989181349990413e-07, + "loss": 0.2624, + "step": 38771 + }, + { + "epoch": 1.8162739495011009, + "grad_norm": 0.6317333082224066, + "learning_rate": 1.0983621009688139e-07, + "loss": 0.2747, + "step": 38772 + }, + { + "epoch": 1.8163207944910291, + "grad_norm": 0.6288607956106168, + "learning_rate": 1.0978062044865012e-07, + "loss": 0.2879, + "step": 38773 + }, + { + "epoch": 1.8163676394809576, + "grad_norm": 0.6455721684898857, + "learning_rate": 1.0972504455553062e-07, + "loss": 0.277, + "step": 38774 + }, + { + "epoch": 1.8164144844708858, + "grad_norm": 0.6012668956695569, + "learning_rate": 1.0966948241784292e-07, + "loss": 0.2589, + "step": 38775 + }, + { + "epoch": 1.816461329460814, + "grad_norm": 0.5774506971630714, + "learning_rate": 1.0961393403590675e-07, + "loss": 0.2726, + "step": 38776 + }, + { + "epoch": 1.8165081744507425, + "grad_norm": 0.6100434418276017, + "learning_rate": 1.0955839941004132e-07, + "loss": 0.2764, + "step": 38777 + }, + { + "epoch": 1.8165550194406708, + "grad_norm": 0.6075262736495318, + "learning_rate": 1.0950287854056635e-07, + "loss": 0.2702, + "step": 38778 + }, + { + "epoch": 1.816601864430599, + "grad_norm": 0.5754106666022903, + "learning_rate": 1.0944737142780187e-07, + "loss": 0.2661, + "step": 38779 + }, + { + "epoch": 1.8166487094205275, + "grad_norm": 0.5759554535433634, + "learning_rate": 1.0939187807206653e-07, + "loss": 0.2576, + "step": 38780 + }, + { + "epoch": 1.816695554410456, + "grad_norm": 0.6080162323867373, + "learning_rate": 1.0933639847368033e-07, + "loss": 0.2838, + "step": 38781 + }, + { + "epoch": 1.8167423994003842, + "grad_norm": 0.620182608944764, + "learning_rate": 1.092809326329622e-07, + "loss": 0.2778, + "step": 38782 + }, + { + "epoch": 1.8167892443903124, + "grad_norm": 0.5929275362970283, + "learning_rate": 1.0922548055023158e-07, + "loss": 0.2849, + "step": 38783 + }, + { + "epoch": 1.8168360893802409, + "grad_norm": 0.6318933519676779, + "learning_rate": 1.0917004222580712e-07, + "loss": 0.2918, + "step": 38784 + }, + { + "epoch": 1.8168829343701691, + "grad_norm": 0.5716850798764448, + "learning_rate": 1.091146176600083e-07, + "loss": 0.2665, + "step": 38785 + }, + { + "epoch": 1.8169297793600974, + "grad_norm": 0.6361695488942104, + "learning_rate": 1.0905920685315402e-07, + "loss": 0.2915, + "step": 38786 + }, + { + "epoch": 1.8169766243500258, + "grad_norm": 0.5812535099702422, + "learning_rate": 1.0900380980556291e-07, + "loss": 0.2639, + "step": 38787 + }, + { + "epoch": 1.817023469339954, + "grad_norm": 0.5658937455799314, + "learning_rate": 1.0894842651755416e-07, + "loss": 0.2587, + "step": 38788 + }, + { + "epoch": 1.8170703143298823, + "grad_norm": 0.5776209359966357, + "learning_rate": 1.0889305698944585e-07, + "loss": 0.2723, + "step": 38789 + }, + { + "epoch": 1.8171171593198108, + "grad_norm": 0.6315558323192678, + "learning_rate": 1.0883770122155718e-07, + "loss": 0.2808, + "step": 38790 + }, + { + "epoch": 1.8171640043097392, + "grad_norm": 0.6141776006585972, + "learning_rate": 1.0878235921420649e-07, + "loss": 0.2832, + "step": 38791 + }, + { + "epoch": 1.8172108492996673, + "grad_norm": 0.6055028684356978, + "learning_rate": 1.087270309677127e-07, + "loss": 0.2794, + "step": 38792 + }, + { + "epoch": 1.8172576942895957, + "grad_norm": 0.586987859969744, + "learning_rate": 1.0867171648239333e-07, + "loss": 0.26, + "step": 38793 + }, + { + "epoch": 1.8173045392795242, + "grad_norm": 0.5763845018929578, + "learning_rate": 1.0861641575856785e-07, + "loss": 0.2587, + "step": 38794 + }, + { + "epoch": 1.8173513842694524, + "grad_norm": 0.5867630262012772, + "learning_rate": 1.0856112879655323e-07, + "loss": 0.2588, + "step": 38795 + }, + { + "epoch": 1.8173982292593807, + "grad_norm": 0.6083120294853012, + "learning_rate": 1.0850585559666837e-07, + "loss": 0.2662, + "step": 38796 + }, + { + "epoch": 1.8174450742493091, + "grad_norm": 0.5979091210059907, + "learning_rate": 1.0845059615923109e-07, + "loss": 0.2801, + "step": 38797 + }, + { + "epoch": 1.8174919192392374, + "grad_norm": 0.6079247794160216, + "learning_rate": 1.0839535048455974e-07, + "loss": 0.2767, + "step": 38798 + }, + { + "epoch": 1.8175387642291656, + "grad_norm": 0.5802070611569713, + "learning_rate": 1.0834011857297211e-07, + "loss": 0.2687, + "step": 38799 + }, + { + "epoch": 1.817585609219094, + "grad_norm": 0.6152587714712167, + "learning_rate": 1.0828490042478628e-07, + "loss": 0.2643, + "step": 38800 + }, + { + "epoch": 1.8176324542090223, + "grad_norm": 0.5720219637533385, + "learning_rate": 1.0822969604031924e-07, + "loss": 0.2458, + "step": 38801 + }, + { + "epoch": 1.8176792991989505, + "grad_norm": 0.6311270337016621, + "learning_rate": 1.0817450541988905e-07, + "loss": 0.2709, + "step": 38802 + }, + { + "epoch": 1.817726144188879, + "grad_norm": 0.5629898089280669, + "learning_rate": 1.0811932856381352e-07, + "loss": 0.2453, + "step": 38803 + }, + { + "epoch": 1.8177729891788075, + "grad_norm": 0.6113414035005831, + "learning_rate": 1.0806416547241016e-07, + "loss": 0.2755, + "step": 38804 + }, + { + "epoch": 1.8178198341687355, + "grad_norm": 0.5774612178768513, + "learning_rate": 1.0800901614599624e-07, + "loss": 0.2786, + "step": 38805 + }, + { + "epoch": 1.817866679158664, + "grad_norm": 0.6501989726730262, + "learning_rate": 1.0795388058488954e-07, + "loss": 0.2661, + "step": 38806 + }, + { + "epoch": 1.8179135241485924, + "grad_norm": 0.6027702715241342, + "learning_rate": 1.0789875878940703e-07, + "loss": 0.2824, + "step": 38807 + }, + { + "epoch": 1.8179603691385207, + "grad_norm": 0.5653352406435367, + "learning_rate": 1.078436507598657e-07, + "loss": 0.2583, + "step": 38808 + }, + { + "epoch": 1.818007214128449, + "grad_norm": 0.6459674162403468, + "learning_rate": 1.0778855649658276e-07, + "loss": 0.2885, + "step": 38809 + }, + { + "epoch": 1.8180540591183774, + "grad_norm": 0.6096380197746234, + "learning_rate": 1.077334759998755e-07, + "loss": 0.2775, + "step": 38810 + }, + { + "epoch": 1.8181009041083056, + "grad_norm": 0.6231247463105614, + "learning_rate": 1.0767840927006085e-07, + "loss": 0.2761, + "step": 38811 + }, + { + "epoch": 1.8181477490982338, + "grad_norm": 0.5901206559887558, + "learning_rate": 1.0762335630745552e-07, + "loss": 0.277, + "step": 38812 + }, + { + "epoch": 1.8181945940881623, + "grad_norm": 0.5592274631806647, + "learning_rate": 1.0756831711237703e-07, + "loss": 0.2602, + "step": 38813 + }, + { + "epoch": 1.8182414390780905, + "grad_norm": 0.6259094682837821, + "learning_rate": 1.0751329168514124e-07, + "loss": 0.2659, + "step": 38814 + }, + { + "epoch": 1.8182882840680188, + "grad_norm": 0.5771581917152878, + "learning_rate": 1.0745828002606511e-07, + "loss": 0.2741, + "step": 38815 + }, + { + "epoch": 1.8183351290579473, + "grad_norm": 0.6320659646974619, + "learning_rate": 1.0740328213546508e-07, + "loss": 0.2647, + "step": 38816 + }, + { + "epoch": 1.8183819740478757, + "grad_norm": 0.5663612008182343, + "learning_rate": 1.0734829801365781e-07, + "loss": 0.2557, + "step": 38817 + }, + { + "epoch": 1.818428819037804, + "grad_norm": 0.5587998976554248, + "learning_rate": 1.0729332766095973e-07, + "loss": 0.2697, + "step": 38818 + }, + { + "epoch": 1.8184756640277322, + "grad_norm": 0.6106560670070564, + "learning_rate": 1.0723837107768753e-07, + "loss": 0.2784, + "step": 38819 + }, + { + "epoch": 1.8185225090176607, + "grad_norm": 0.5995585845861763, + "learning_rate": 1.0718342826415679e-07, + "loss": 0.2759, + "step": 38820 + }, + { + "epoch": 1.818569354007589, + "grad_norm": 0.5718668333191312, + "learning_rate": 1.0712849922068447e-07, + "loss": 0.2638, + "step": 38821 + }, + { + "epoch": 1.8186161989975171, + "grad_norm": 0.600072246815348, + "learning_rate": 1.070735839475856e-07, + "loss": 0.2711, + "step": 38822 + }, + { + "epoch": 1.8186630439874456, + "grad_norm": 0.6184441802632955, + "learning_rate": 1.0701868244517716e-07, + "loss": 0.2753, + "step": 38823 + }, + { + "epoch": 1.8187098889773738, + "grad_norm": 0.6012741253902518, + "learning_rate": 1.0696379471377444e-07, + "loss": 0.2776, + "step": 38824 + }, + { + "epoch": 1.818756733967302, + "grad_norm": 0.6514523877359806, + "learning_rate": 1.0690892075369413e-07, + "loss": 0.2746, + "step": 38825 + }, + { + "epoch": 1.8188035789572305, + "grad_norm": 0.5934731019799366, + "learning_rate": 1.0685406056525099e-07, + "loss": 0.2696, + "step": 38826 + }, + { + "epoch": 1.818850423947159, + "grad_norm": 0.5627924782594871, + "learning_rate": 1.0679921414876115e-07, + "loss": 0.2572, + "step": 38827 + }, + { + "epoch": 1.818897268937087, + "grad_norm": 0.6445435227695574, + "learning_rate": 1.0674438150454102e-07, + "loss": 0.2816, + "step": 38828 + }, + { + "epoch": 1.8189441139270155, + "grad_norm": 0.584409124483138, + "learning_rate": 1.066895626329048e-07, + "loss": 0.2628, + "step": 38829 + }, + { + "epoch": 1.818990958916944, + "grad_norm": 0.5947532910976688, + "learning_rate": 1.0663475753416891e-07, + "loss": 0.2646, + "step": 38830 + }, + { + "epoch": 1.8190378039068722, + "grad_norm": 0.6074111078802291, + "learning_rate": 1.0657996620864863e-07, + "loss": 0.2796, + "step": 38831 + }, + { + "epoch": 1.8190846488968004, + "grad_norm": 0.5612096249282094, + "learning_rate": 1.0652518865665874e-07, + "loss": 0.2564, + "step": 38832 + }, + { + "epoch": 1.819131493886729, + "grad_norm": 0.5782477757733583, + "learning_rate": 1.0647042487851478e-07, + "loss": 0.2688, + "step": 38833 + }, + { + "epoch": 1.8191783388766571, + "grad_norm": 0.6103728244396102, + "learning_rate": 1.0641567487453208e-07, + "loss": 0.2889, + "step": 38834 + }, + { + "epoch": 1.8192251838665854, + "grad_norm": 0.603176059804986, + "learning_rate": 1.0636093864502539e-07, + "loss": 0.2725, + "step": 38835 + }, + { + "epoch": 1.8192720288565138, + "grad_norm": 0.6178078805779372, + "learning_rate": 1.0630621619031e-07, + "loss": 0.2805, + "step": 38836 + }, + { + "epoch": 1.819318873846442, + "grad_norm": 0.6743217425787559, + "learning_rate": 1.0625150751070096e-07, + "loss": 0.2906, + "step": 38837 + }, + { + "epoch": 1.8193657188363703, + "grad_norm": 0.629574426070385, + "learning_rate": 1.0619681260651244e-07, + "loss": 0.2824, + "step": 38838 + }, + { + "epoch": 1.8194125638262988, + "grad_norm": 0.6072889176856893, + "learning_rate": 1.0614213147805974e-07, + "loss": 0.2794, + "step": 38839 + }, + { + "epoch": 1.8194594088162273, + "grad_norm": 0.5315919549200624, + "learning_rate": 1.0608746412565734e-07, + "loss": 0.2435, + "step": 38840 + }, + { + "epoch": 1.8195062538061553, + "grad_norm": 0.5821268624344682, + "learning_rate": 1.0603281054962028e-07, + "loss": 0.2808, + "step": 38841 + }, + { + "epoch": 1.8195530987960837, + "grad_norm": 0.6283190523236544, + "learning_rate": 1.0597817075026246e-07, + "loss": 0.2741, + "step": 38842 + }, + { + "epoch": 1.8195999437860122, + "grad_norm": 0.6002953862223037, + "learning_rate": 1.0592354472789861e-07, + "loss": 0.2902, + "step": 38843 + }, + { + "epoch": 1.8196467887759404, + "grad_norm": 0.6483959436039123, + "learning_rate": 1.0586893248284269e-07, + "loss": 0.2712, + "step": 38844 + }, + { + "epoch": 1.8196936337658687, + "grad_norm": 0.5868403431493772, + "learning_rate": 1.0581433401540969e-07, + "loss": 0.2691, + "step": 38845 + }, + { + "epoch": 1.8197404787557971, + "grad_norm": 0.5960308062754703, + "learning_rate": 1.0575974932591299e-07, + "loss": 0.2681, + "step": 38846 + }, + { + "epoch": 1.8197873237457254, + "grad_norm": 0.6341647257466225, + "learning_rate": 1.057051784146676e-07, + "loss": 0.2741, + "step": 38847 + }, + { + "epoch": 1.8198341687356536, + "grad_norm": 0.5791857786250036, + "learning_rate": 1.0565062128198717e-07, + "loss": 0.255, + "step": 38848 + }, + { + "epoch": 1.819881013725582, + "grad_norm": 0.5793684519945864, + "learning_rate": 1.055960779281856e-07, + "loss": 0.2674, + "step": 38849 + }, + { + "epoch": 1.8199278587155103, + "grad_norm": 0.6341530043925389, + "learning_rate": 1.0554154835357655e-07, + "loss": 0.2891, + "step": 38850 + }, + { + "epoch": 1.8199747037054386, + "grad_norm": 0.6458912376961686, + "learning_rate": 1.054870325584742e-07, + "loss": 0.2717, + "step": 38851 + }, + { + "epoch": 1.820021548695367, + "grad_norm": 0.5942261405775117, + "learning_rate": 1.0543253054319191e-07, + "loss": 0.2743, + "step": 38852 + }, + { + "epoch": 1.8200683936852955, + "grad_norm": 0.6205550101376993, + "learning_rate": 1.0537804230804361e-07, + "loss": 0.279, + "step": 38853 + }, + { + "epoch": 1.8201152386752237, + "grad_norm": 0.6000794144615629, + "learning_rate": 1.0532356785334291e-07, + "loss": 0.2622, + "step": 38854 + }, + { + "epoch": 1.820162083665152, + "grad_norm": 0.5670337261059806, + "learning_rate": 1.0526910717940348e-07, + "loss": 0.258, + "step": 38855 + }, + { + "epoch": 1.8202089286550804, + "grad_norm": 0.5955397802064294, + "learning_rate": 1.0521466028653837e-07, + "loss": 0.2591, + "step": 38856 + }, + { + "epoch": 1.8202557736450087, + "grad_norm": 0.599494222809529, + "learning_rate": 1.0516022717506069e-07, + "loss": 0.2634, + "step": 38857 + }, + { + "epoch": 1.820302618634937, + "grad_norm": 0.6111050303809861, + "learning_rate": 1.0510580784528407e-07, + "loss": 0.283, + "step": 38858 + }, + { + "epoch": 1.8203494636248654, + "grad_norm": 0.5259393560289003, + "learning_rate": 1.0505140229752159e-07, + "loss": 0.2529, + "step": 38859 + }, + { + "epoch": 1.8203963086147936, + "grad_norm": 0.5596913489994986, + "learning_rate": 1.0499701053208605e-07, + "loss": 0.2507, + "step": 38860 + }, + { + "epoch": 1.8204431536047219, + "grad_norm": 0.6793716025389026, + "learning_rate": 1.0494263254929138e-07, + "loss": 0.2924, + "step": 38861 + }, + { + "epoch": 1.8204899985946503, + "grad_norm": 0.5603842655182484, + "learning_rate": 1.0488826834944954e-07, + "loss": 0.2501, + "step": 38862 + }, + { + "epoch": 1.8205368435845788, + "grad_norm": 0.6084967104739357, + "learning_rate": 1.048339179328739e-07, + "loss": 0.2685, + "step": 38863 + }, + { + "epoch": 1.8205836885745068, + "grad_norm": 0.594650485525024, + "learning_rate": 1.0477958129987697e-07, + "loss": 0.2799, + "step": 38864 + }, + { + "epoch": 1.8206305335644353, + "grad_norm": 0.5968157109768357, + "learning_rate": 1.0472525845077131e-07, + "loss": 0.2709, + "step": 38865 + }, + { + "epoch": 1.8206773785543637, + "grad_norm": 0.6175075042357745, + "learning_rate": 1.0467094938586997e-07, + "loss": 0.2739, + "step": 38866 + }, + { + "epoch": 1.820724223544292, + "grad_norm": 0.6427806273921328, + "learning_rate": 1.0461665410548549e-07, + "loss": 0.2744, + "step": 38867 + }, + { + "epoch": 1.8207710685342202, + "grad_norm": 0.6376507185379213, + "learning_rate": 1.0456237260992957e-07, + "loss": 0.2846, + "step": 38868 + }, + { + "epoch": 1.8208179135241487, + "grad_norm": 0.6155178274134265, + "learning_rate": 1.045081048995153e-07, + "loss": 0.2793, + "step": 38869 + }, + { + "epoch": 1.820864758514077, + "grad_norm": 0.5670519543071606, + "learning_rate": 1.0445385097455518e-07, + "loss": 0.2723, + "step": 38870 + }, + { + "epoch": 1.8209116035040052, + "grad_norm": 0.5968544250798661, + "learning_rate": 1.0439961083536066e-07, + "loss": 0.2609, + "step": 38871 + }, + { + "epoch": 1.8209584484939336, + "grad_norm": 0.6317046788743567, + "learning_rate": 1.0434538448224451e-07, + "loss": 0.2719, + "step": 38872 + }, + { + "epoch": 1.8210052934838619, + "grad_norm": 0.6063524561541053, + "learning_rate": 1.0429117191551846e-07, + "loss": 0.2845, + "step": 38873 + }, + { + "epoch": 1.82105213847379, + "grad_norm": 0.6183618996751259, + "learning_rate": 1.0423697313549446e-07, + "loss": 0.2685, + "step": 38874 + }, + { + "epoch": 1.8210989834637186, + "grad_norm": 0.5746272379067774, + "learning_rate": 1.0418278814248451e-07, + "loss": 0.258, + "step": 38875 + }, + { + "epoch": 1.821145828453647, + "grad_norm": 0.6132317765152236, + "learning_rate": 1.0412861693680055e-07, + "loss": 0.278, + "step": 38876 + }, + { + "epoch": 1.821192673443575, + "grad_norm": 0.6016096127442041, + "learning_rate": 1.0407445951875456e-07, + "loss": 0.254, + "step": 38877 + }, + { + "epoch": 1.8212395184335035, + "grad_norm": 0.6164871871413307, + "learning_rate": 1.0402031588865741e-07, + "loss": 0.2674, + "step": 38878 + }, + { + "epoch": 1.821286363423432, + "grad_norm": 0.5928457977887306, + "learning_rate": 1.0396618604682108e-07, + "loss": 0.2717, + "step": 38879 + }, + { + "epoch": 1.8213332084133602, + "grad_norm": 0.5674302990946273, + "learning_rate": 1.0391206999355752e-07, + "loss": 0.2701, + "step": 38880 + }, + { + "epoch": 1.8213800534032885, + "grad_norm": 0.5865089479353824, + "learning_rate": 1.038579677291776e-07, + "loss": 0.2659, + "step": 38881 + }, + { + "epoch": 1.821426898393217, + "grad_norm": 0.5543281024342217, + "learning_rate": 1.038038792539925e-07, + "loss": 0.269, + "step": 38882 + }, + { + "epoch": 1.8214737433831452, + "grad_norm": 0.60177696606514, + "learning_rate": 1.037498045683144e-07, + "loss": 0.2681, + "step": 38883 + }, + { + "epoch": 1.8215205883730734, + "grad_norm": 0.5755437931050555, + "learning_rate": 1.0369574367245338e-07, + "loss": 0.2568, + "step": 38884 + }, + { + "epoch": 1.8215674333630019, + "grad_norm": 0.5703705351921222, + "learning_rate": 1.0364169656672112e-07, + "loss": 0.2667, + "step": 38885 + }, + { + "epoch": 1.82161427835293, + "grad_norm": 0.5797222557944376, + "learning_rate": 1.0358766325142905e-07, + "loss": 0.262, + "step": 38886 + }, + { + "epoch": 1.8216611233428583, + "grad_norm": 0.5899998286505355, + "learning_rate": 1.0353364372688746e-07, + "loss": 0.2701, + "step": 38887 + }, + { + "epoch": 1.8217079683327868, + "grad_norm": 0.6180180371631073, + "learning_rate": 1.034796379934072e-07, + "loss": 0.2795, + "step": 38888 + }, + { + "epoch": 1.8217548133227153, + "grad_norm": 0.55314342518387, + "learning_rate": 1.0342564605129918e-07, + "loss": 0.2416, + "step": 38889 + }, + { + "epoch": 1.8218016583126435, + "grad_norm": 0.5567484440058947, + "learning_rate": 1.0337166790087477e-07, + "loss": 0.261, + "step": 38890 + }, + { + "epoch": 1.8218485033025718, + "grad_norm": 0.6160571939829528, + "learning_rate": 1.0331770354244347e-07, + "loss": 0.2764, + "step": 38891 + }, + { + "epoch": 1.8218953482925002, + "grad_norm": 0.6019398690990642, + "learning_rate": 1.0326375297631697e-07, + "loss": 0.2739, + "step": 38892 + }, + { + "epoch": 1.8219421932824285, + "grad_norm": 0.577155411925289, + "learning_rate": 1.0320981620280473e-07, + "loss": 0.2585, + "step": 38893 + }, + { + "epoch": 1.8219890382723567, + "grad_norm": 0.6151343234909689, + "learning_rate": 1.0315589322221763e-07, + "loss": 0.2917, + "step": 38894 + }, + { + "epoch": 1.8220358832622852, + "grad_norm": 0.6268554984840394, + "learning_rate": 1.0310198403486598e-07, + "loss": 0.2748, + "step": 38895 + }, + { + "epoch": 1.8220827282522134, + "grad_norm": 0.5661711430063542, + "learning_rate": 1.0304808864105981e-07, + "loss": 0.2663, + "step": 38896 + }, + { + "epoch": 1.8221295732421416, + "grad_norm": 0.5820952921318612, + "learning_rate": 1.0299420704110996e-07, + "loss": 0.2747, + "step": 38897 + }, + { + "epoch": 1.82217641823207, + "grad_norm": 0.6021295471417717, + "learning_rate": 1.0294033923532565e-07, + "loss": 0.2862, + "step": 38898 + }, + { + "epoch": 1.8222232632219986, + "grad_norm": 0.5479041185028831, + "learning_rate": 1.0288648522401717e-07, + "loss": 0.253, + "step": 38899 + }, + { + "epoch": 1.8222701082119266, + "grad_norm": 0.580516461047927, + "learning_rate": 1.0283264500749458e-07, + "loss": 0.2627, + "step": 38900 + }, + { + "epoch": 1.822316953201855, + "grad_norm": 0.6705499635525859, + "learning_rate": 1.0277881858606731e-07, + "loss": 0.2834, + "step": 38901 + }, + { + "epoch": 1.8223637981917835, + "grad_norm": 0.6275019328363864, + "learning_rate": 1.027250059600457e-07, + "loss": 0.2796, + "step": 38902 + }, + { + "epoch": 1.8224106431817118, + "grad_norm": 0.6635973807858428, + "learning_rate": 1.0267120712973894e-07, + "loss": 0.2774, + "step": 38903 + }, + { + "epoch": 1.82245748817164, + "grad_norm": 0.5519476144404879, + "learning_rate": 1.0261742209545732e-07, + "loss": 0.257, + "step": 38904 + }, + { + "epoch": 1.8225043331615685, + "grad_norm": 0.5988531128278777, + "learning_rate": 1.025636508575098e-07, + "loss": 0.2751, + "step": 38905 + }, + { + "epoch": 1.8225511781514967, + "grad_norm": 0.6210174511979855, + "learning_rate": 1.0250989341620554e-07, + "loss": 0.2915, + "step": 38906 + }, + { + "epoch": 1.822598023141425, + "grad_norm": 0.5725484983195388, + "learning_rate": 1.0245614977185431e-07, + "loss": 0.2643, + "step": 38907 + }, + { + "epoch": 1.8226448681313534, + "grad_norm": 0.6157701698338869, + "learning_rate": 1.024024199247653e-07, + "loss": 0.2785, + "step": 38908 + }, + { + "epoch": 1.8226917131212816, + "grad_norm": 0.6740299262216561, + "learning_rate": 1.023487038752477e-07, + "loss": 0.2658, + "step": 38909 + }, + { + "epoch": 1.8227385581112099, + "grad_norm": 0.5853267375441908, + "learning_rate": 1.02295001623611e-07, + "loss": 0.2839, + "step": 38910 + }, + { + "epoch": 1.8227854031011383, + "grad_norm": 0.6288343013898757, + "learning_rate": 1.0224131317016383e-07, + "loss": 0.2706, + "step": 38911 + }, + { + "epoch": 1.8228322480910668, + "grad_norm": 0.6034927696142861, + "learning_rate": 1.021876385152154e-07, + "loss": 0.2704, + "step": 38912 + }, + { + "epoch": 1.8228790930809948, + "grad_norm": 0.5742114571316876, + "learning_rate": 1.0213397765907406e-07, + "loss": 0.2655, + "step": 38913 + }, + { + "epoch": 1.8229259380709233, + "grad_norm": 0.571603974607148, + "learning_rate": 1.0208033060204903e-07, + "loss": 0.2522, + "step": 38914 + }, + { + "epoch": 1.8229727830608518, + "grad_norm": 0.5828433401606513, + "learning_rate": 1.0202669734444892e-07, + "loss": 0.2799, + "step": 38915 + }, + { + "epoch": 1.82301962805078, + "grad_norm": 0.5758742138527417, + "learning_rate": 1.0197307788658295e-07, + "loss": 0.2589, + "step": 38916 + }, + { + "epoch": 1.8230664730407082, + "grad_norm": 0.5940134945614914, + "learning_rate": 1.0191947222875892e-07, + "loss": 0.2525, + "step": 38917 + }, + { + "epoch": 1.8231133180306367, + "grad_norm": 0.6040616932886574, + "learning_rate": 1.0186588037128548e-07, + "loss": 0.27, + "step": 38918 + }, + { + "epoch": 1.823160163020565, + "grad_norm": 0.5892986580349932, + "learning_rate": 1.0181230231447154e-07, + "loss": 0.2585, + "step": 38919 + }, + { + "epoch": 1.8232070080104932, + "grad_norm": 0.5951789320404598, + "learning_rate": 1.0175873805862463e-07, + "loss": 0.2586, + "step": 38920 + }, + { + "epoch": 1.8232538530004216, + "grad_norm": 0.5780462067589814, + "learning_rate": 1.0170518760405341e-07, + "loss": 0.2443, + "step": 38921 + }, + { + "epoch": 1.8233006979903499, + "grad_norm": 0.6391259788890004, + "learning_rate": 1.0165165095106649e-07, + "loss": 0.2979, + "step": 38922 + }, + { + "epoch": 1.8233475429802781, + "grad_norm": 0.6551244877420704, + "learning_rate": 1.0159812809997116e-07, + "loss": 0.2805, + "step": 38923 + }, + { + "epoch": 1.8233943879702066, + "grad_norm": 0.6131141683818893, + "learning_rate": 1.0154461905107549e-07, + "loss": 0.2685, + "step": 38924 + }, + { + "epoch": 1.823441232960135, + "grad_norm": 0.584930824002958, + "learning_rate": 1.014911238046884e-07, + "loss": 0.2763, + "step": 38925 + }, + { + "epoch": 1.8234880779500633, + "grad_norm": 0.6092550378348562, + "learning_rate": 1.0143764236111659e-07, + "loss": 0.27, + "step": 38926 + }, + { + "epoch": 1.8235349229399915, + "grad_norm": 0.6033862742538818, + "learning_rate": 1.0138417472066842e-07, + "loss": 0.2904, + "step": 38927 + }, + { + "epoch": 1.82358176792992, + "grad_norm": 0.6028600367551087, + "learning_rate": 1.0133072088365142e-07, + "loss": 0.2887, + "step": 38928 + }, + { + "epoch": 1.8236286129198482, + "grad_norm": 0.5750076768991342, + "learning_rate": 1.0127728085037314e-07, + "loss": 0.2598, + "step": 38929 + }, + { + "epoch": 1.8236754579097765, + "grad_norm": 0.587708763810947, + "learning_rate": 1.0122385462114138e-07, + "loss": 0.2672, + "step": 38930 + }, + { + "epoch": 1.823722302899705, + "grad_norm": 0.6262702167987592, + "learning_rate": 1.0117044219626337e-07, + "loss": 0.2746, + "step": 38931 + }, + { + "epoch": 1.8237691478896332, + "grad_norm": 0.577957782771191, + "learning_rate": 1.0111704357604667e-07, + "loss": 0.239, + "step": 38932 + }, + { + "epoch": 1.8238159928795614, + "grad_norm": 0.5900592480719821, + "learning_rate": 1.0106365876079827e-07, + "loss": 0.2787, + "step": 38933 + }, + { + "epoch": 1.8238628378694899, + "grad_norm": 0.5930725974973083, + "learning_rate": 1.0101028775082566e-07, + "loss": 0.2685, + "step": 38934 + }, + { + "epoch": 1.8239096828594183, + "grad_norm": 0.5750695004133843, + "learning_rate": 1.0095693054643585e-07, + "loss": 0.2625, + "step": 38935 + }, + { + "epoch": 1.8239565278493464, + "grad_norm": 0.5867126518860131, + "learning_rate": 1.009035871479358e-07, + "loss": 0.2657, + "step": 38936 + }, + { + "epoch": 1.8240033728392748, + "grad_norm": 0.667057026273192, + "learning_rate": 1.0085025755563277e-07, + "loss": 0.3162, + "step": 38937 + }, + { + "epoch": 1.8240502178292033, + "grad_norm": 0.6185011682562986, + "learning_rate": 1.0079694176983318e-07, + "loss": 0.2756, + "step": 38938 + }, + { + "epoch": 1.8240970628191315, + "grad_norm": 0.6567353157682047, + "learning_rate": 1.0074363979084484e-07, + "loss": 0.2767, + "step": 38939 + }, + { + "epoch": 1.8241439078090598, + "grad_norm": 0.6069825933610864, + "learning_rate": 1.0069035161897361e-07, + "loss": 0.2692, + "step": 38940 + }, + { + "epoch": 1.8241907527989882, + "grad_norm": 0.6160067972919295, + "learning_rate": 1.006370772545262e-07, + "loss": 0.2747, + "step": 38941 + }, + { + "epoch": 1.8242375977889165, + "grad_norm": 0.675283620079249, + "learning_rate": 1.0058381669780904e-07, + "loss": 0.286, + "step": 38942 + }, + { + "epoch": 1.8242844427788447, + "grad_norm": 0.5999964979598394, + "learning_rate": 1.0053056994912935e-07, + "loss": 0.2712, + "step": 38943 + }, + { + "epoch": 1.8243312877687732, + "grad_norm": 0.6139798411564964, + "learning_rate": 1.0047733700879276e-07, + "loss": 0.2615, + "step": 38944 + }, + { + "epoch": 1.8243781327587014, + "grad_norm": 0.5497241205940558, + "learning_rate": 1.004241178771062e-07, + "loss": 0.2453, + "step": 38945 + }, + { + "epoch": 1.8244249777486297, + "grad_norm": 0.5715015008172257, + "learning_rate": 1.0037091255437615e-07, + "loss": 0.2684, + "step": 38946 + }, + { + "epoch": 1.8244718227385581, + "grad_norm": 0.5924028379065388, + "learning_rate": 1.0031772104090815e-07, + "loss": 0.27, + "step": 38947 + }, + { + "epoch": 1.8245186677284866, + "grad_norm": 0.6535330288169512, + "learning_rate": 1.0026454333700835e-07, + "loss": 0.2878, + "step": 38948 + }, + { + "epoch": 1.8245655127184146, + "grad_norm": 0.601489852063654, + "learning_rate": 1.0021137944298293e-07, + "loss": 0.2657, + "step": 38949 + }, + { + "epoch": 1.824612357708343, + "grad_norm": 0.5956166342170643, + "learning_rate": 1.00158229359138e-07, + "loss": 0.2754, + "step": 38950 + }, + { + "epoch": 1.8246592026982715, + "grad_norm": 0.6001256525373315, + "learning_rate": 1.0010509308577915e-07, + "loss": 0.2717, + "step": 38951 + }, + { + "epoch": 1.8247060476881998, + "grad_norm": 0.6070287466097186, + "learning_rate": 1.0005197062321226e-07, + "loss": 0.2679, + "step": 38952 + }, + { + "epoch": 1.824752892678128, + "grad_norm": 0.6107160709618626, + "learning_rate": 9.999886197174347e-08, + "loss": 0.2748, + "step": 38953 + }, + { + "epoch": 1.8247997376680565, + "grad_norm": 0.5942241953419025, + "learning_rate": 9.994576713167781e-08, + "loss": 0.2551, + "step": 38954 + }, + { + "epoch": 1.8248465826579847, + "grad_norm": 0.5477814915807695, + "learning_rate": 9.989268610332087e-08, + "loss": 0.2458, + "step": 38955 + }, + { + "epoch": 1.824893427647913, + "grad_norm": 0.5985461477927799, + "learning_rate": 9.983961888697824e-08, + "loss": 0.281, + "step": 38956 + }, + { + "epoch": 1.8249402726378414, + "grad_norm": 0.6096788072899071, + "learning_rate": 9.97865654829555e-08, + "loss": 0.2572, + "step": 38957 + }, + { + "epoch": 1.8249871176277697, + "grad_norm": 0.61854511372398, + "learning_rate": 9.973352589155772e-08, + "loss": 0.2656, + "step": 38958 + }, + { + "epoch": 1.825033962617698, + "grad_norm": 0.5884939338455778, + "learning_rate": 9.968050011309044e-08, + "loss": 0.2733, + "step": 38959 + }, + { + "epoch": 1.8250808076076264, + "grad_norm": 0.6096671653045341, + "learning_rate": 9.962748814785817e-08, + "loss": 0.2785, + "step": 38960 + }, + { + "epoch": 1.8251276525975548, + "grad_norm": 0.6252483277881801, + "learning_rate": 9.957448999616704e-08, + "loss": 0.2707, + "step": 38961 + }, + { + "epoch": 1.825174497587483, + "grad_norm": 0.636018356424198, + "learning_rate": 9.952150565832069e-08, + "loss": 0.2725, + "step": 38962 + }, + { + "epoch": 1.8252213425774113, + "grad_norm": 0.5935223812581026, + "learning_rate": 9.9468535134625e-08, + "loss": 0.2742, + "step": 38963 + }, + { + "epoch": 1.8252681875673398, + "grad_norm": 0.6235035034420835, + "learning_rate": 9.941557842538446e-08, + "loss": 0.2722, + "step": 38964 + }, + { + "epoch": 1.825315032557268, + "grad_norm": 0.5752023394048453, + "learning_rate": 9.936263553090436e-08, + "loss": 0.2683, + "step": 38965 + }, + { + "epoch": 1.8253618775471963, + "grad_norm": 0.5549152644737225, + "learning_rate": 9.930970645148835e-08, + "loss": 0.2503, + "step": 38966 + }, + { + "epoch": 1.8254087225371247, + "grad_norm": 0.5740124144216032, + "learning_rate": 9.925679118744174e-08, + "loss": 0.2547, + "step": 38967 + }, + { + "epoch": 1.825455567527053, + "grad_norm": 0.6398080253929985, + "learning_rate": 9.920388973906931e-08, + "loss": 0.2657, + "step": 38968 + }, + { + "epoch": 1.8255024125169812, + "grad_norm": 0.5689558641090914, + "learning_rate": 9.915100210667466e-08, + "loss": 0.2712, + "step": 38969 + }, + { + "epoch": 1.8255492575069097, + "grad_norm": 0.6027000473545415, + "learning_rate": 9.909812829056231e-08, + "loss": 0.2714, + "step": 38970 + }, + { + "epoch": 1.8255961024968381, + "grad_norm": 0.6106126311854376, + "learning_rate": 9.904526829103756e-08, + "loss": 0.267, + "step": 38971 + }, + { + "epoch": 1.8256429474867661, + "grad_norm": 0.5596321936831279, + "learning_rate": 9.899242210840321e-08, + "loss": 0.2551, + "step": 38972 + }, + { + "epoch": 1.8256897924766946, + "grad_norm": 0.5661946369011067, + "learning_rate": 9.893958974296402e-08, + "loss": 0.257, + "step": 38973 + }, + { + "epoch": 1.825736637466623, + "grad_norm": 0.6531850933504064, + "learning_rate": 9.888677119502449e-08, + "loss": 0.2758, + "step": 38974 + }, + { + "epoch": 1.8257834824565513, + "grad_norm": 0.590268494365804, + "learning_rate": 9.883396646488769e-08, + "loss": 0.256, + "step": 38975 + }, + { + "epoch": 1.8258303274464796, + "grad_norm": 0.6051611266353017, + "learning_rate": 9.878117555285782e-08, + "loss": 0.2803, + "step": 38976 + }, + { + "epoch": 1.825877172436408, + "grad_norm": 0.5978872613198623, + "learning_rate": 9.872839845923909e-08, + "loss": 0.2646, + "step": 38977 + }, + { + "epoch": 1.8259240174263363, + "grad_norm": 0.5841747986156036, + "learning_rate": 9.86756351843346e-08, + "loss": 0.2616, + "step": 38978 + }, + { + "epoch": 1.8259708624162645, + "grad_norm": 0.5499690693190471, + "learning_rate": 9.862288572844824e-08, + "loss": 0.2593, + "step": 38979 + }, + { + "epoch": 1.826017707406193, + "grad_norm": 0.5865456980609942, + "learning_rate": 9.85701500918837e-08, + "loss": 0.2789, + "step": 38980 + }, + { + "epoch": 1.8260645523961212, + "grad_norm": 0.5588407475567231, + "learning_rate": 9.851742827494487e-08, + "loss": 0.2759, + "step": 38981 + }, + { + "epoch": 1.8261113973860494, + "grad_norm": 0.5485861686352822, + "learning_rate": 9.846472027793403e-08, + "loss": 0.2667, + "step": 38982 + }, + { + "epoch": 1.826158242375978, + "grad_norm": 0.5777759360504074, + "learning_rate": 9.841202610115564e-08, + "loss": 0.2664, + "step": 38983 + }, + { + "epoch": 1.8262050873659064, + "grad_norm": 0.6232615740852825, + "learning_rate": 9.835934574491196e-08, + "loss": 0.278, + "step": 38984 + }, + { + "epoch": 1.8262519323558344, + "grad_norm": 0.5828212720204724, + "learning_rate": 9.830667920950665e-08, + "loss": 0.2562, + "step": 38985 + }, + { + "epoch": 1.8262987773457628, + "grad_norm": 0.5953087960946088, + "learning_rate": 9.825402649524279e-08, + "loss": 0.2494, + "step": 38986 + }, + { + "epoch": 1.8263456223356913, + "grad_norm": 0.6127614996078028, + "learning_rate": 9.820138760242321e-08, + "loss": 0.271, + "step": 38987 + }, + { + "epoch": 1.8263924673256196, + "grad_norm": 0.5630127878258178, + "learning_rate": 9.814876253135152e-08, + "loss": 0.2511, + "step": 38988 + }, + { + "epoch": 1.8264393123155478, + "grad_norm": 0.5855823316371118, + "learning_rate": 9.809615128232974e-08, + "loss": 0.2574, + "step": 38989 + }, + { + "epoch": 1.8264861573054763, + "grad_norm": 0.6441072537736838, + "learning_rate": 9.804355385566094e-08, + "loss": 0.2713, + "step": 38990 + }, + { + "epoch": 1.8265330022954045, + "grad_norm": 0.5974225953286123, + "learning_rate": 9.799097025164739e-08, + "loss": 0.2867, + "step": 38991 + }, + { + "epoch": 1.8265798472853327, + "grad_norm": 0.5447157120473829, + "learning_rate": 9.793840047059244e-08, + "loss": 0.2459, + "step": 38992 + }, + { + "epoch": 1.8266266922752612, + "grad_norm": 0.5920425566588895, + "learning_rate": 9.788584451279808e-08, + "loss": 0.2647, + "step": 38993 + }, + { + "epoch": 1.8266735372651894, + "grad_norm": 0.5842611534309009, + "learning_rate": 9.783330237856687e-08, + "loss": 0.268, + "step": 38994 + }, + { + "epoch": 1.8267203822551177, + "grad_norm": 0.6105350015863403, + "learning_rate": 9.778077406820186e-08, + "loss": 0.2818, + "step": 38995 + }, + { + "epoch": 1.8267672272450461, + "grad_norm": 0.584853771217353, + "learning_rate": 9.772825958200449e-08, + "loss": 0.2711, + "step": 38996 + }, + { + "epoch": 1.8268140722349746, + "grad_norm": 0.5843140623740661, + "learning_rate": 9.767575892027675e-08, + "loss": 0.2777, + "step": 38997 + }, + { + "epoch": 1.8268609172249028, + "grad_norm": 0.5589573630635952, + "learning_rate": 9.762327208332145e-08, + "loss": 0.2518, + "step": 38998 + }, + { + "epoch": 1.826907762214831, + "grad_norm": 0.6195411608528432, + "learning_rate": 9.757079907144029e-08, + "loss": 0.2786, + "step": 38999 + }, + { + "epoch": 1.8269546072047596, + "grad_norm": 0.6337202498681312, + "learning_rate": 9.751833988493553e-08, + "loss": 0.2713, + "step": 39000 + }, + { + "epoch": 1.8270014521946878, + "grad_norm": 0.5711971262069745, + "learning_rate": 9.746589452410887e-08, + "loss": 0.2571, + "step": 39001 + }, + { + "epoch": 1.827048297184616, + "grad_norm": 0.5654296796889065, + "learning_rate": 9.741346298926202e-08, + "loss": 0.2483, + "step": 39002 + }, + { + "epoch": 1.8270951421745445, + "grad_norm": 0.644324779630704, + "learning_rate": 9.736104528069723e-08, + "loss": 0.268, + "step": 39003 + }, + { + "epoch": 1.8271419871644727, + "grad_norm": 0.6606228068340891, + "learning_rate": 9.730864139871537e-08, + "loss": 0.2756, + "step": 39004 + }, + { + "epoch": 1.827188832154401, + "grad_norm": 0.6045394058200504, + "learning_rate": 9.725625134361844e-08, + "loss": 0.259, + "step": 39005 + }, + { + "epoch": 1.8272356771443294, + "grad_norm": 0.6012838671603052, + "learning_rate": 9.720387511570756e-08, + "loss": 0.2699, + "step": 39006 + }, + { + "epoch": 1.827282522134258, + "grad_norm": 0.6171268449510846, + "learning_rate": 9.7151512715285e-08, + "loss": 0.3001, + "step": 39007 + }, + { + "epoch": 1.827329367124186, + "grad_norm": 0.5695896339700532, + "learning_rate": 9.709916414265136e-08, + "loss": 0.264, + "step": 39008 + }, + { + "epoch": 1.8273762121141144, + "grad_norm": 0.5959801913426728, + "learning_rate": 9.704682939810777e-08, + "loss": 0.2834, + "step": 39009 + }, + { + "epoch": 1.8274230571040428, + "grad_norm": 0.5844069533197657, + "learning_rate": 9.699450848195625e-08, + "loss": 0.2594, + "step": 39010 + }, + { + "epoch": 1.827469902093971, + "grad_norm": 0.6186674253189335, + "learning_rate": 9.694220139449706e-08, + "loss": 0.2694, + "step": 39011 + }, + { + "epoch": 1.8275167470838993, + "grad_norm": 0.6293923865625918, + "learning_rate": 9.688990813603139e-08, + "loss": 0.2762, + "step": 39012 + }, + { + "epoch": 1.8275635920738278, + "grad_norm": 0.593082758671668, + "learning_rate": 9.683762870686064e-08, + "loss": 0.2665, + "step": 39013 + }, + { + "epoch": 1.827610437063756, + "grad_norm": 0.5628419591171059, + "learning_rate": 9.678536310728515e-08, + "loss": 0.2731, + "step": 39014 + }, + { + "epoch": 1.8276572820536843, + "grad_norm": 0.6017353312113028, + "learning_rate": 9.673311133760576e-08, + "loss": 0.2669, + "step": 39015 + }, + { + "epoch": 1.8277041270436127, + "grad_norm": 0.6021598733911355, + "learning_rate": 9.668087339812337e-08, + "loss": 0.2581, + "step": 39016 + }, + { + "epoch": 1.827750972033541, + "grad_norm": 0.6623732941919689, + "learning_rate": 9.662864928913885e-08, + "loss": 0.2873, + "step": 39017 + }, + { + "epoch": 1.8277978170234692, + "grad_norm": 0.5738246264574184, + "learning_rate": 9.657643901095193e-08, + "loss": 0.2628, + "step": 39018 + }, + { + "epoch": 1.8278446620133977, + "grad_norm": 0.5904217113168957, + "learning_rate": 9.65242425638635e-08, + "loss": 0.2845, + "step": 39019 + }, + { + "epoch": 1.8278915070033261, + "grad_norm": 0.6152222997707997, + "learning_rate": 9.647205994817443e-08, + "loss": 0.2728, + "step": 39020 + }, + { + "epoch": 1.8279383519932542, + "grad_norm": 0.5952919140253159, + "learning_rate": 9.641989116418421e-08, + "loss": 0.2637, + "step": 39021 + }, + { + "epoch": 1.8279851969831826, + "grad_norm": 0.61658870265459, + "learning_rate": 9.636773621219314e-08, + "loss": 0.2769, + "step": 39022 + }, + { + "epoch": 1.828032041973111, + "grad_norm": 0.5866870157398985, + "learning_rate": 9.63155950925021e-08, + "loss": 0.2661, + "step": 39023 + }, + { + "epoch": 1.8280788869630393, + "grad_norm": 0.595768055899774, + "learning_rate": 9.626346780541058e-08, + "loss": 0.2611, + "step": 39024 + }, + { + "epoch": 1.8281257319529676, + "grad_norm": 0.5897111086537745, + "learning_rate": 9.621135435121831e-08, + "loss": 0.2762, + "step": 39025 + }, + { + "epoch": 1.828172576942896, + "grad_norm": 0.6068412878289994, + "learning_rate": 9.615925473022592e-08, + "loss": 0.2779, + "step": 39026 + }, + { + "epoch": 1.8282194219328243, + "grad_norm": 0.5875209912173559, + "learning_rate": 9.610716894273258e-08, + "loss": 0.2569, + "step": 39027 + }, + { + "epoch": 1.8282662669227525, + "grad_norm": 0.5790711090874665, + "learning_rate": 9.605509698903836e-08, + "loss": 0.2536, + "step": 39028 + }, + { + "epoch": 1.828313111912681, + "grad_norm": 0.580430215011247, + "learning_rate": 9.600303886944273e-08, + "loss": 0.2757, + "step": 39029 + }, + { + "epoch": 1.8283599569026092, + "grad_norm": 0.5737352984706866, + "learning_rate": 9.595099458424573e-08, + "loss": 0.2617, + "step": 39030 + }, + { + "epoch": 1.8284068018925375, + "grad_norm": 0.5961318756180707, + "learning_rate": 9.589896413374627e-08, + "loss": 0.2725, + "step": 39031 + }, + { + "epoch": 1.828453646882466, + "grad_norm": 0.5523806514655826, + "learning_rate": 9.584694751824442e-08, + "loss": 0.2598, + "step": 39032 + }, + { + "epoch": 1.8285004918723944, + "grad_norm": 0.5954385118008024, + "learning_rate": 9.579494473803852e-08, + "loss": 0.2866, + "step": 39033 + }, + { + "epoch": 1.8285473368623226, + "grad_norm": 0.5822191134163283, + "learning_rate": 9.574295579342862e-08, + "loss": 0.2625, + "step": 39034 + }, + { + "epoch": 1.8285941818522509, + "grad_norm": 0.6861803624410571, + "learning_rate": 9.569098068471367e-08, + "loss": 0.2632, + "step": 39035 + }, + { + "epoch": 1.8286410268421793, + "grad_norm": 0.6156621070523024, + "learning_rate": 9.563901941219312e-08, + "loss": 0.2671, + "step": 39036 + }, + { + "epoch": 1.8286878718321076, + "grad_norm": 0.5804484554954485, + "learning_rate": 9.558707197616562e-08, + "loss": 0.2808, + "step": 39037 + }, + { + "epoch": 1.8287347168220358, + "grad_norm": 0.6223919436175899, + "learning_rate": 9.55351383769304e-08, + "loss": 0.2713, + "step": 39038 + }, + { + "epoch": 1.8287815618119643, + "grad_norm": 0.6223908542136548, + "learning_rate": 9.548321861478554e-08, + "loss": 0.272, + "step": 39039 + }, + { + "epoch": 1.8288284068018925, + "grad_norm": 0.586988221877286, + "learning_rate": 9.543131269003081e-08, + "loss": 0.2639, + "step": 39040 + }, + { + "epoch": 1.8288752517918208, + "grad_norm": 0.5712679244099982, + "learning_rate": 9.537942060296429e-08, + "loss": 0.2672, + "step": 39041 + }, + { + "epoch": 1.8289220967817492, + "grad_norm": 0.6132145036039136, + "learning_rate": 9.532754235388463e-08, + "loss": 0.254, + "step": 39042 + }, + { + "epoch": 1.8289689417716777, + "grad_norm": 0.5675628244583023, + "learning_rate": 9.527567794309078e-08, + "loss": 0.2533, + "step": 39043 + }, + { + "epoch": 1.8290157867616057, + "grad_norm": 0.582002204147845, + "learning_rate": 9.522382737088137e-08, + "loss": 0.2847, + "step": 39044 + }, + { + "epoch": 1.8290626317515342, + "grad_norm": 0.607688485346482, + "learning_rate": 9.51719906375545e-08, + "loss": 0.2736, + "step": 39045 + }, + { + "epoch": 1.8291094767414626, + "grad_norm": 0.6517033346900525, + "learning_rate": 9.512016774340799e-08, + "loss": 0.2753, + "step": 39046 + }, + { + "epoch": 1.8291563217313909, + "grad_norm": 0.6075830396363459, + "learning_rate": 9.50683586887402e-08, + "loss": 0.2778, + "step": 39047 + }, + { + "epoch": 1.829203166721319, + "grad_norm": 0.6135880541112586, + "learning_rate": 9.50165634738498e-08, + "loss": 0.2658, + "step": 39048 + }, + { + "epoch": 1.8292500117112476, + "grad_norm": 0.6219687852117306, + "learning_rate": 9.49647820990346e-08, + "loss": 0.2748, + "step": 39049 + }, + { + "epoch": 1.8292968567011758, + "grad_norm": 0.5959318078705428, + "learning_rate": 9.491301456459296e-08, + "loss": 0.2496, + "step": 39050 + }, + { + "epoch": 1.829343701691104, + "grad_norm": 0.5655197842720606, + "learning_rate": 9.486126087082188e-08, + "loss": 0.2582, + "step": 39051 + }, + { + "epoch": 1.8293905466810325, + "grad_norm": 0.6169575200186403, + "learning_rate": 9.480952101802026e-08, + "loss": 0.2899, + "step": 39052 + }, + { + "epoch": 1.8294373916709608, + "grad_norm": 0.5825143601688088, + "learning_rate": 9.475779500648485e-08, + "loss": 0.2674, + "step": 39053 + }, + { + "epoch": 1.829484236660889, + "grad_norm": 0.5840535740324608, + "learning_rate": 9.4706082836514e-08, + "loss": 0.2664, + "step": 39054 + }, + { + "epoch": 1.8295310816508175, + "grad_norm": 0.6101829889871603, + "learning_rate": 9.465438450840498e-08, + "loss": 0.2768, + "step": 39055 + }, + { + "epoch": 1.829577926640746, + "grad_norm": 0.6178762571232345, + "learning_rate": 9.460270002245586e-08, + "loss": 0.281, + "step": 39056 + }, + { + "epoch": 1.829624771630674, + "grad_norm": 0.5658073048200775, + "learning_rate": 9.455102937896309e-08, + "loss": 0.265, + "step": 39057 + }, + { + "epoch": 1.8296716166206024, + "grad_norm": 0.6742800019530155, + "learning_rate": 9.449937257822478e-08, + "loss": 0.2684, + "step": 39058 + }, + { + "epoch": 1.8297184616105309, + "grad_norm": 0.644681659477363, + "learning_rate": 9.444772962053816e-08, + "loss": 0.2767, + "step": 39059 + }, + { + "epoch": 1.829765306600459, + "grad_norm": 0.6303776283762874, + "learning_rate": 9.439610050619996e-08, + "loss": 0.2781, + "step": 39060 + }, + { + "epoch": 1.8298121515903873, + "grad_norm": 0.5746182654047449, + "learning_rate": 9.434448523550743e-08, + "loss": 0.2584, + "step": 39061 + }, + { + "epoch": 1.8298589965803158, + "grad_norm": 0.626139780743503, + "learning_rate": 9.42928838087584e-08, + "loss": 0.2883, + "step": 39062 + }, + { + "epoch": 1.829905841570244, + "grad_norm": 0.5494247314345538, + "learning_rate": 9.424129622624845e-08, + "loss": 0.265, + "step": 39063 + }, + { + "epoch": 1.8299526865601723, + "grad_norm": 0.56663368150324, + "learning_rate": 9.418972248827541e-08, + "loss": 0.2667, + "step": 39064 + }, + { + "epoch": 1.8299995315501008, + "grad_norm": 0.6159569361103553, + "learning_rate": 9.413816259513625e-08, + "loss": 0.2791, + "step": 39065 + }, + { + "epoch": 1.830046376540029, + "grad_norm": 0.6291272443449841, + "learning_rate": 9.408661654712686e-08, + "loss": 0.2906, + "step": 39066 + }, + { + "epoch": 1.8300932215299572, + "grad_norm": 0.5778956998978277, + "learning_rate": 9.403508434454423e-08, + "loss": 0.2597, + "step": 39067 + }, + { + "epoch": 1.8301400665198857, + "grad_norm": 0.6232526670749364, + "learning_rate": 9.398356598768532e-08, + "loss": 0.2655, + "step": 39068 + }, + { + "epoch": 1.8301869115098142, + "grad_norm": 0.5988963732065109, + "learning_rate": 9.393206147684575e-08, + "loss": 0.2883, + "step": 39069 + }, + { + "epoch": 1.8302337564997424, + "grad_norm": 0.6147301256126455, + "learning_rate": 9.388057081232278e-08, + "loss": 0.2747, + "step": 39070 + }, + { + "epoch": 1.8302806014896706, + "grad_norm": 0.6251943980865136, + "learning_rate": 9.382909399441225e-08, + "loss": 0.2796, + "step": 39071 + }, + { + "epoch": 1.830327446479599, + "grad_norm": 0.6084085139311921, + "learning_rate": 9.377763102341065e-08, + "loss": 0.2529, + "step": 39072 + }, + { + "epoch": 1.8303742914695273, + "grad_norm": 0.6032129727264073, + "learning_rate": 9.37261818996138e-08, + "loss": 0.2718, + "step": 39073 + }, + { + "epoch": 1.8304211364594556, + "grad_norm": 0.6200192540123476, + "learning_rate": 9.367474662331843e-08, + "loss": 0.2773, + "step": 39074 + }, + { + "epoch": 1.830467981449384, + "grad_norm": 0.6259728900108285, + "learning_rate": 9.36233251948196e-08, + "loss": 0.2741, + "step": 39075 + }, + { + "epoch": 1.8305148264393123, + "grad_norm": 0.6004166681708183, + "learning_rate": 9.35719176144137e-08, + "loss": 0.2579, + "step": 39076 + }, + { + "epoch": 1.8305616714292405, + "grad_norm": 0.6060789024893484, + "learning_rate": 9.35205238823969e-08, + "loss": 0.2675, + "step": 39077 + }, + { + "epoch": 1.830608516419169, + "grad_norm": 0.5590638500510995, + "learning_rate": 9.346914399906426e-08, + "loss": 0.2507, + "step": 39078 + }, + { + "epoch": 1.8306553614090975, + "grad_norm": 0.6192572250324517, + "learning_rate": 9.341777796471218e-08, + "loss": 0.2769, + "step": 39079 + }, + { + "epoch": 1.8307022063990255, + "grad_norm": 0.6137988524023754, + "learning_rate": 9.336642577963572e-08, + "loss": 0.2759, + "step": 39080 + }, + { + "epoch": 1.830749051388954, + "grad_norm": 0.5983656517544081, + "learning_rate": 9.331508744413104e-08, + "loss": 0.2604, + "step": 39081 + }, + { + "epoch": 1.8307958963788824, + "grad_norm": 0.5842526813180016, + "learning_rate": 9.32637629584926e-08, + "loss": 0.2546, + "step": 39082 + }, + { + "epoch": 1.8308427413688106, + "grad_norm": 0.5903627897384712, + "learning_rate": 9.321245232301629e-08, + "loss": 0.2808, + "step": 39083 + }, + { + "epoch": 1.8308895863587389, + "grad_norm": 0.5509499291004517, + "learning_rate": 9.31611555379977e-08, + "loss": 0.2609, + "step": 39084 + }, + { + "epoch": 1.8309364313486673, + "grad_norm": 0.5810804666556204, + "learning_rate": 9.310987260373134e-08, + "loss": 0.2575, + "step": 39085 + }, + { + "epoch": 1.8309832763385956, + "grad_norm": 0.6755888729279431, + "learning_rate": 9.305860352051333e-08, + "loss": 0.3006, + "step": 39086 + }, + { + "epoch": 1.8310301213285238, + "grad_norm": 0.5720727394318939, + "learning_rate": 9.300734828863788e-08, + "loss": 0.2627, + "step": 39087 + }, + { + "epoch": 1.8310769663184523, + "grad_norm": 0.6013485488599294, + "learning_rate": 9.295610690839979e-08, + "loss": 0.2516, + "step": 39088 + }, + { + "epoch": 1.8311238113083805, + "grad_norm": 0.6172138605328544, + "learning_rate": 9.290487938009462e-08, + "loss": 0.26, + "step": 39089 + }, + { + "epoch": 1.8311706562983088, + "grad_norm": 0.5888435113097071, + "learning_rate": 9.28536657040166e-08, + "loss": 0.2714, + "step": 39090 + }, + { + "epoch": 1.8312175012882372, + "grad_norm": 0.5907277128919524, + "learning_rate": 9.280246588046076e-08, + "loss": 0.2707, + "step": 39091 + }, + { + "epoch": 1.8312643462781657, + "grad_norm": 0.5905491272386033, + "learning_rate": 9.275127990972188e-08, + "loss": 0.2779, + "step": 39092 + }, + { + "epoch": 1.8313111912680937, + "grad_norm": 0.5966628283804075, + "learning_rate": 9.270010779209442e-08, + "loss": 0.2535, + "step": 39093 + }, + { + "epoch": 1.8313580362580222, + "grad_norm": 0.6112868506358445, + "learning_rate": 9.26489495278729e-08, + "loss": 0.2868, + "step": 39094 + }, + { + "epoch": 1.8314048812479506, + "grad_norm": 0.6202073928113114, + "learning_rate": 9.25978051173515e-08, + "loss": 0.2729, + "step": 39095 + }, + { + "epoch": 1.8314517262378789, + "grad_norm": 0.5782801003263528, + "learning_rate": 9.254667456082444e-08, + "loss": 0.2683, + "step": 39096 + }, + { + "epoch": 1.8314985712278071, + "grad_norm": 0.5524433049165137, + "learning_rate": 9.249555785858622e-08, + "loss": 0.2573, + "step": 39097 + }, + { + "epoch": 1.8315454162177356, + "grad_norm": 0.665754649733698, + "learning_rate": 9.244445501093075e-08, + "loss": 0.2937, + "step": 39098 + }, + { + "epoch": 1.8315922612076638, + "grad_norm": 0.6600654205237665, + "learning_rate": 9.239336601815279e-08, + "loss": 0.2806, + "step": 39099 + }, + { + "epoch": 1.831639106197592, + "grad_norm": 0.5815189943070878, + "learning_rate": 9.234229088054575e-08, + "loss": 0.2568, + "step": 39100 + }, + { + "epoch": 1.8316859511875205, + "grad_norm": 0.5982085150634356, + "learning_rate": 9.22912295984038e-08, + "loss": 0.2699, + "step": 39101 + }, + { + "epoch": 1.8317327961774488, + "grad_norm": 0.6187301072508955, + "learning_rate": 9.224018217202036e-08, + "loss": 0.281, + "step": 39102 + }, + { + "epoch": 1.831779641167377, + "grad_norm": 0.5744624248283166, + "learning_rate": 9.218914860168959e-08, + "loss": 0.2746, + "step": 39103 + }, + { + "epoch": 1.8318264861573055, + "grad_norm": 0.5792149807275203, + "learning_rate": 9.213812888770518e-08, + "loss": 0.2543, + "step": 39104 + }, + { + "epoch": 1.831873331147234, + "grad_norm": 0.6061379908329779, + "learning_rate": 9.208712303036078e-08, + "loss": 0.2768, + "step": 39105 + }, + { + "epoch": 1.8319201761371622, + "grad_norm": 0.5789206622292278, + "learning_rate": 9.203613102994946e-08, + "loss": 0.2574, + "step": 39106 + }, + { + "epoch": 1.8319670211270904, + "grad_norm": 0.6279607860582864, + "learning_rate": 9.19851528867649e-08, + "loss": 0.2781, + "step": 39107 + }, + { + "epoch": 1.8320138661170189, + "grad_norm": 0.6147851426203278, + "learning_rate": 9.193418860110104e-08, + "loss": 0.2628, + "step": 39108 + }, + { + "epoch": 1.8320607111069471, + "grad_norm": 0.594686607934151, + "learning_rate": 9.18832381732504e-08, + "loss": 0.2683, + "step": 39109 + }, + { + "epoch": 1.8321075560968754, + "grad_norm": 0.6093651300164602, + "learning_rate": 9.183230160350637e-08, + "loss": 0.2714, + "step": 39110 + }, + { + "epoch": 1.8321544010868038, + "grad_norm": 0.6148857700781398, + "learning_rate": 9.17813788921626e-08, + "loss": 0.2843, + "step": 39111 + }, + { + "epoch": 1.832201246076732, + "grad_norm": 0.6211185610481332, + "learning_rate": 9.173047003951136e-08, + "loss": 0.2645, + "step": 39112 + }, + { + "epoch": 1.8322480910666603, + "grad_norm": 0.5588060358890656, + "learning_rate": 9.167957504584574e-08, + "loss": 0.2593, + "step": 39113 + }, + { + "epoch": 1.8322949360565888, + "grad_norm": 0.5456215914007415, + "learning_rate": 9.16286939114594e-08, + "loss": 0.254, + "step": 39114 + }, + { + "epoch": 1.8323417810465172, + "grad_norm": 0.544493746407112, + "learning_rate": 9.157782663664433e-08, + "loss": 0.2543, + "step": 39115 + }, + { + "epoch": 1.8323886260364453, + "grad_norm": 0.5923745083409679, + "learning_rate": 9.152697322169336e-08, + "loss": 0.2655, + "step": 39116 + }, + { + "epoch": 1.8324354710263737, + "grad_norm": 0.5677013031075325, + "learning_rate": 9.147613366689983e-08, + "loss": 0.2551, + "step": 39117 + }, + { + "epoch": 1.8324823160163022, + "grad_norm": 0.6330135323831343, + "learning_rate": 9.142530797255522e-08, + "loss": 0.2725, + "step": 39118 + }, + { + "epoch": 1.8325291610062304, + "grad_norm": 0.6143348504712327, + "learning_rate": 9.137449613895289e-08, + "loss": 0.278, + "step": 39119 + }, + { + "epoch": 1.8325760059961587, + "grad_norm": 0.6165930252336952, + "learning_rate": 9.132369816638481e-08, + "loss": 0.2832, + "step": 39120 + }, + { + "epoch": 1.8326228509860871, + "grad_norm": 0.5815400771233894, + "learning_rate": 9.127291405514354e-08, + "loss": 0.2735, + "step": 39121 + }, + { + "epoch": 1.8326696959760154, + "grad_norm": 0.5545320892032541, + "learning_rate": 9.122214380552136e-08, + "loss": 0.2511, + "step": 39122 + }, + { + "epoch": 1.8327165409659436, + "grad_norm": 0.5828458308915273, + "learning_rate": 9.117138741781023e-08, + "loss": 0.2621, + "step": 39123 + }, + { + "epoch": 1.832763385955872, + "grad_norm": 0.614782126393993, + "learning_rate": 9.112064489230215e-08, + "loss": 0.2692, + "step": 39124 + }, + { + "epoch": 1.8328102309458003, + "grad_norm": 0.5871829256423349, + "learning_rate": 9.10699162292894e-08, + "loss": 0.2532, + "step": 39125 + }, + { + "epoch": 1.8328570759357286, + "grad_norm": 0.5808196999599172, + "learning_rate": 9.101920142906395e-08, + "loss": 0.2776, + "step": 39126 + }, + { + "epoch": 1.832903920925657, + "grad_norm": 0.6129464425606221, + "learning_rate": 9.096850049191724e-08, + "loss": 0.2678, + "step": 39127 + }, + { + "epoch": 1.8329507659155855, + "grad_norm": 0.5675021971471769, + "learning_rate": 9.091781341814182e-08, + "loss": 0.2544, + "step": 39128 + }, + { + "epoch": 1.8329976109055135, + "grad_norm": 0.5705303332648742, + "learning_rate": 9.086714020802856e-08, + "loss": 0.2631, + "step": 39129 + }, + { + "epoch": 1.833044455895442, + "grad_norm": 0.6146621455373645, + "learning_rate": 9.081648086186945e-08, + "loss": 0.2771, + "step": 39130 + }, + { + "epoch": 1.8330913008853704, + "grad_norm": 0.6206012461796403, + "learning_rate": 9.076583537995593e-08, + "loss": 0.259, + "step": 39131 + }, + { + "epoch": 1.8331381458752987, + "grad_norm": 0.6386180907211549, + "learning_rate": 9.071520376257942e-08, + "loss": 0.2742, + "step": 39132 + }, + { + "epoch": 1.833184990865227, + "grad_norm": 0.597019989213581, + "learning_rate": 9.066458601003136e-08, + "loss": 0.2697, + "step": 39133 + }, + { + "epoch": 1.8332318358551554, + "grad_norm": 0.5984598764705275, + "learning_rate": 9.061398212260292e-08, + "loss": 0.262, + "step": 39134 + }, + { + "epoch": 1.8332786808450836, + "grad_norm": 0.6316693688836766, + "learning_rate": 9.056339210058579e-08, + "loss": 0.2858, + "step": 39135 + }, + { + "epoch": 1.8333255258350118, + "grad_norm": 0.6282980553419022, + "learning_rate": 9.051281594427086e-08, + "loss": 0.2701, + "step": 39136 + }, + { + "epoch": 1.8333723708249403, + "grad_norm": 0.5518750749697491, + "learning_rate": 9.046225365394873e-08, + "loss": 0.2593, + "step": 39137 + }, + { + "epoch": 1.8334192158148686, + "grad_norm": 0.5717797117901308, + "learning_rate": 9.041170522991055e-08, + "loss": 0.2615, + "step": 39138 + }, + { + "epoch": 1.8334660608047968, + "grad_norm": 0.636505477313721, + "learning_rate": 9.036117067244776e-08, + "loss": 0.2549, + "step": 39139 + }, + { + "epoch": 1.8335129057947253, + "grad_norm": 0.637475125387662, + "learning_rate": 9.031064998185041e-08, + "loss": 0.2734, + "step": 39140 + }, + { + "epoch": 1.8335597507846537, + "grad_norm": 0.5930440736424305, + "learning_rate": 9.02601431584102e-08, + "loss": 0.2719, + "step": 39141 + }, + { + "epoch": 1.833606595774582, + "grad_norm": 0.6053403552193958, + "learning_rate": 9.020965020241663e-08, + "loss": 0.2715, + "step": 39142 + }, + { + "epoch": 1.8336534407645102, + "grad_norm": 0.6054924744806703, + "learning_rate": 9.01591711141614e-08, + "loss": 0.2667, + "step": 39143 + }, + { + "epoch": 1.8337002857544387, + "grad_norm": 0.5964749052585528, + "learning_rate": 9.0108705893934e-08, + "loss": 0.2804, + "step": 39144 + }, + { + "epoch": 1.833747130744367, + "grad_norm": 0.6192695810557065, + "learning_rate": 9.005825454202532e-08, + "loss": 0.2826, + "step": 39145 + }, + { + "epoch": 1.8337939757342951, + "grad_norm": 0.6282409635091311, + "learning_rate": 9.000781705872569e-08, + "loss": 0.2688, + "step": 39146 + }, + { + "epoch": 1.8338408207242236, + "grad_norm": 0.6153640224663861, + "learning_rate": 8.995739344432541e-08, + "loss": 0.2444, + "step": 39147 + }, + { + "epoch": 1.8338876657141518, + "grad_norm": 0.563060313982264, + "learning_rate": 8.990698369911482e-08, + "loss": 0.2511, + "step": 39148 + }, + { + "epoch": 1.83393451070408, + "grad_norm": 0.6293918009590298, + "learning_rate": 8.985658782338341e-08, + "loss": 0.2818, + "step": 39149 + }, + { + "epoch": 1.8339813556940086, + "grad_norm": 0.6310461498297442, + "learning_rate": 8.980620581742178e-08, + "loss": 0.2826, + "step": 39150 + }, + { + "epoch": 1.834028200683937, + "grad_norm": 0.61580907703378, + "learning_rate": 8.975583768151968e-08, + "loss": 0.2622, + "step": 39151 + }, + { + "epoch": 1.834075045673865, + "grad_norm": 0.6604121534451834, + "learning_rate": 8.970548341596663e-08, + "loss": 0.2846, + "step": 39152 + }, + { + "epoch": 1.8341218906637935, + "grad_norm": 0.60286647138619, + "learning_rate": 8.965514302105293e-08, + "loss": 0.2799, + "step": 39153 + }, + { + "epoch": 1.834168735653722, + "grad_norm": 0.614826448939202, + "learning_rate": 8.960481649706838e-08, + "loss": 0.2732, + "step": 39154 + }, + { + "epoch": 1.8342155806436502, + "grad_norm": 0.6425914227503731, + "learning_rate": 8.955450384430187e-08, + "loss": 0.2662, + "step": 39155 + }, + { + "epoch": 1.8342624256335784, + "grad_norm": 0.6152471897548929, + "learning_rate": 8.950420506304347e-08, + "loss": 0.2762, + "step": 39156 + }, + { + "epoch": 1.834309270623507, + "grad_norm": 0.6087283777827583, + "learning_rate": 8.945392015358268e-08, + "loss": 0.259, + "step": 39157 + }, + { + "epoch": 1.8343561156134351, + "grad_norm": 0.5855951707285189, + "learning_rate": 8.940364911620842e-08, + "loss": 0.2739, + "step": 39158 + }, + { + "epoch": 1.8344029606033634, + "grad_norm": 0.6279183846893946, + "learning_rate": 8.935339195121017e-08, + "loss": 0.282, + "step": 39159 + }, + { + "epoch": 1.8344498055932918, + "grad_norm": 0.6356546853596498, + "learning_rate": 8.930314865887773e-08, + "loss": 0.2652, + "step": 39160 + }, + { + "epoch": 1.83449665058322, + "grad_norm": 0.6269666768090469, + "learning_rate": 8.925291923949947e-08, + "loss": 0.2771, + "step": 39161 + }, + { + "epoch": 1.8345434955731483, + "grad_norm": 0.6138329947715446, + "learning_rate": 8.920270369336459e-08, + "loss": 0.265, + "step": 39162 + }, + { + "epoch": 1.8345903405630768, + "grad_norm": 0.5776156767325757, + "learning_rate": 8.915250202076287e-08, + "loss": 0.2585, + "step": 39163 + }, + { + "epoch": 1.8346371855530053, + "grad_norm": 0.5880570273855027, + "learning_rate": 8.910231422198185e-08, + "loss": 0.2608, + "step": 39164 + }, + { + "epoch": 1.8346840305429333, + "grad_norm": 0.6028268642031739, + "learning_rate": 8.905214029731129e-08, + "loss": 0.2761, + "step": 39165 + }, + { + "epoch": 1.8347308755328617, + "grad_norm": 0.5651473026507349, + "learning_rate": 8.900198024703987e-08, + "loss": 0.2636, + "step": 39166 + }, + { + "epoch": 1.8347777205227902, + "grad_norm": 0.6585021960671943, + "learning_rate": 8.895183407145597e-08, + "loss": 0.2795, + "step": 39167 + }, + { + "epoch": 1.8348245655127184, + "grad_norm": 0.5508656581193941, + "learning_rate": 8.890170177084794e-08, + "loss": 0.2546, + "step": 39168 + }, + { + "epoch": 1.8348714105026467, + "grad_norm": 0.5715669511473397, + "learning_rate": 8.885158334550503e-08, + "loss": 0.2542, + "step": 39169 + }, + { + "epoch": 1.8349182554925751, + "grad_norm": 0.5813500918591831, + "learning_rate": 8.880147879571505e-08, + "loss": 0.2653, + "step": 39170 + }, + { + "epoch": 1.8349651004825034, + "grad_norm": 0.6252266117439212, + "learning_rate": 8.875138812176664e-08, + "loss": 0.2841, + "step": 39171 + }, + { + "epoch": 1.8350119454724316, + "grad_norm": 0.5759877890427186, + "learning_rate": 8.870131132394793e-08, + "loss": 0.272, + "step": 39172 + }, + { + "epoch": 1.83505879046236, + "grad_norm": 0.6037349240352563, + "learning_rate": 8.865124840254703e-08, + "loss": 0.2732, + "step": 39173 + }, + { + "epoch": 1.8351056354522883, + "grad_norm": 0.5854884919117254, + "learning_rate": 8.860119935785228e-08, + "loss": 0.2645, + "step": 39174 + }, + { + "epoch": 1.8351524804422166, + "grad_norm": 0.6007672738804548, + "learning_rate": 8.855116419015126e-08, + "loss": 0.2551, + "step": 39175 + }, + { + "epoch": 1.835199325432145, + "grad_norm": 0.6061207484606632, + "learning_rate": 8.850114289973232e-08, + "loss": 0.2626, + "step": 39176 + }, + { + "epoch": 1.8352461704220735, + "grad_norm": 0.595417429619405, + "learning_rate": 8.845113548688333e-08, + "loss": 0.271, + "step": 39177 + }, + { + "epoch": 1.8352930154120017, + "grad_norm": 0.5425695760294234, + "learning_rate": 8.840114195189209e-08, + "loss": 0.2548, + "step": 39178 + }, + { + "epoch": 1.83533986040193, + "grad_norm": 0.6126027973724948, + "learning_rate": 8.835116229504586e-08, + "loss": 0.2854, + "step": 39179 + }, + { + "epoch": 1.8353867053918584, + "grad_norm": 0.6086000146916399, + "learning_rate": 8.830119651663249e-08, + "loss": 0.2603, + "step": 39180 + }, + { + "epoch": 1.8354335503817867, + "grad_norm": 0.6077466713728192, + "learning_rate": 8.825124461693952e-08, + "loss": 0.2817, + "step": 39181 + }, + { + "epoch": 1.835480395371715, + "grad_norm": 0.6175500266310152, + "learning_rate": 8.820130659625448e-08, + "loss": 0.298, + "step": 39182 + }, + { + "epoch": 1.8355272403616434, + "grad_norm": 0.591040871843826, + "learning_rate": 8.815138245486493e-08, + "loss": 0.2692, + "step": 39183 + }, + { + "epoch": 1.8355740853515716, + "grad_norm": 0.5633490455314051, + "learning_rate": 8.810147219305815e-08, + "loss": 0.2641, + "step": 39184 + }, + { + "epoch": 1.8356209303414999, + "grad_norm": 0.6655876954188705, + "learning_rate": 8.805157581112111e-08, + "loss": 0.305, + "step": 39185 + }, + { + "epoch": 1.8356677753314283, + "grad_norm": 0.6170005826329988, + "learning_rate": 8.800169330934083e-08, + "loss": 0.2877, + "step": 39186 + }, + { + "epoch": 1.8357146203213568, + "grad_norm": 0.6033520856511433, + "learning_rate": 8.795182468800457e-08, + "loss": 0.2688, + "step": 39187 + }, + { + "epoch": 1.8357614653112848, + "grad_norm": 0.6041259248031671, + "learning_rate": 8.79019699473993e-08, + "loss": 0.2785, + "step": 39188 + }, + { + "epoch": 1.8358083103012133, + "grad_norm": 0.6688806690229673, + "learning_rate": 8.785212908781176e-08, + "loss": 0.2893, + "step": 39189 + }, + { + "epoch": 1.8358551552911417, + "grad_norm": 0.5628768146847709, + "learning_rate": 8.78023021095295e-08, + "loss": 0.2524, + "step": 39190 + }, + { + "epoch": 1.83590200028107, + "grad_norm": 0.5724524819636625, + "learning_rate": 8.775248901283839e-08, + "loss": 0.2589, + "step": 39191 + }, + { + "epoch": 1.8359488452709982, + "grad_norm": 0.6348984542054947, + "learning_rate": 8.770268979802543e-08, + "loss": 0.2837, + "step": 39192 + }, + { + "epoch": 1.8359956902609267, + "grad_norm": 0.5929054705231777, + "learning_rate": 8.765290446537705e-08, + "loss": 0.2788, + "step": 39193 + }, + { + "epoch": 1.836042535250855, + "grad_norm": 0.6120799398408063, + "learning_rate": 8.760313301517998e-08, + "loss": 0.2637, + "step": 39194 + }, + { + "epoch": 1.8360893802407832, + "grad_norm": 0.626882758849027, + "learning_rate": 8.755337544772064e-08, + "loss": 0.2758, + "step": 39195 + }, + { + "epoch": 1.8361362252307116, + "grad_norm": 0.5685475708904489, + "learning_rate": 8.75036317632852e-08, + "loss": 0.2716, + "step": 39196 + }, + { + "epoch": 1.8361830702206399, + "grad_norm": 0.6152026958306268, + "learning_rate": 8.745390196216009e-08, + "loss": 0.2773, + "step": 39197 + }, + { + "epoch": 1.836229915210568, + "grad_norm": 0.5505725853902366, + "learning_rate": 8.740418604463119e-08, + "loss": 0.2544, + "step": 39198 + }, + { + "epoch": 1.8362767602004966, + "grad_norm": 0.5715762004295019, + "learning_rate": 8.735448401098523e-08, + "loss": 0.2658, + "step": 39199 + }, + { + "epoch": 1.836323605190425, + "grad_norm": 0.6351492169702596, + "learning_rate": 8.730479586150726e-08, + "loss": 0.2909, + "step": 39200 + }, + { + "epoch": 1.836370450180353, + "grad_norm": 0.6020549371221692, + "learning_rate": 8.725512159648397e-08, + "loss": 0.2548, + "step": 39201 + }, + { + "epoch": 1.8364172951702815, + "grad_norm": 0.6036668128429065, + "learning_rate": 8.720546121620154e-08, + "loss": 0.2642, + "step": 39202 + }, + { + "epoch": 1.83646414016021, + "grad_norm": 0.6066332255855136, + "learning_rate": 8.715581472094475e-08, + "loss": 0.2742, + "step": 39203 + }, + { + "epoch": 1.8365109851501382, + "grad_norm": 0.5525026481174924, + "learning_rate": 8.710618211099975e-08, + "loss": 0.2497, + "step": 39204 + }, + { + "epoch": 1.8365578301400665, + "grad_norm": 0.5982573093655829, + "learning_rate": 8.705656338665242e-08, + "loss": 0.2727, + "step": 39205 + }, + { + "epoch": 1.836604675129995, + "grad_norm": 0.5235868856980862, + "learning_rate": 8.700695854818808e-08, + "loss": 0.2478, + "step": 39206 + }, + { + "epoch": 1.8366515201199232, + "grad_norm": 0.656649369104556, + "learning_rate": 8.695736759589207e-08, + "loss": 0.2812, + "step": 39207 + }, + { + "epoch": 1.8366983651098514, + "grad_norm": 0.6170308352629328, + "learning_rate": 8.690779053005027e-08, + "loss": 0.2853, + "step": 39208 + }, + { + "epoch": 1.8367452100997799, + "grad_norm": 0.573597720391666, + "learning_rate": 8.685822735094718e-08, + "loss": 0.2688, + "step": 39209 + }, + { + "epoch": 1.836792055089708, + "grad_norm": 0.6050825002910603, + "learning_rate": 8.680867805886866e-08, + "loss": 0.259, + "step": 39210 + }, + { + "epoch": 1.8368389000796363, + "grad_norm": 0.6242721530910207, + "learning_rate": 8.675914265409952e-08, + "loss": 0.2774, + "step": 39211 + }, + { + "epoch": 1.8368857450695648, + "grad_norm": 0.5729327544169749, + "learning_rate": 8.670962113692533e-08, + "loss": 0.2535, + "step": 39212 + }, + { + "epoch": 1.8369325900594933, + "grad_norm": 0.5863878664294437, + "learning_rate": 8.666011350763032e-08, + "loss": 0.2632, + "step": 39213 + }, + { + "epoch": 1.8369794350494215, + "grad_norm": 0.606326656619359, + "learning_rate": 8.661061976650009e-08, + "loss": 0.2762, + "step": 39214 + }, + { + "epoch": 1.8370262800393498, + "grad_norm": 0.6140033787251786, + "learning_rate": 8.656113991381887e-08, + "loss": 0.2765, + "step": 39215 + }, + { + "epoch": 1.8370731250292782, + "grad_norm": 0.5634658171328897, + "learning_rate": 8.651167394987198e-08, + "loss": 0.2554, + "step": 39216 + }, + { + "epoch": 1.8371199700192065, + "grad_norm": 0.6323393785837788, + "learning_rate": 8.646222187494363e-08, + "loss": 0.2861, + "step": 39217 + }, + { + "epoch": 1.8371668150091347, + "grad_norm": 0.6004161051505119, + "learning_rate": 8.641278368931833e-08, + "loss": 0.2624, + "step": 39218 + }, + { + "epoch": 1.8372136599990632, + "grad_norm": 0.6787658161185259, + "learning_rate": 8.636335939328139e-08, + "loss": 0.2725, + "step": 39219 + }, + { + "epoch": 1.8372605049889914, + "grad_norm": 0.6352969805164056, + "learning_rate": 8.631394898711621e-08, + "loss": 0.2769, + "step": 39220 + }, + { + "epoch": 1.8373073499789196, + "grad_norm": 0.6123230660370704, + "learning_rate": 8.626455247110809e-08, + "loss": 0.2757, + "step": 39221 + }, + { + "epoch": 1.837354194968848, + "grad_norm": 0.5573371224116583, + "learning_rate": 8.621516984554046e-08, + "loss": 0.2505, + "step": 39222 + }, + { + "epoch": 1.8374010399587766, + "grad_norm": 0.6055373806941765, + "learning_rate": 8.616580111069778e-08, + "loss": 0.252, + "step": 39223 + }, + { + "epoch": 1.8374478849487046, + "grad_norm": 0.5811259407918197, + "learning_rate": 8.611644626686427e-08, + "loss": 0.2716, + "step": 39224 + }, + { + "epoch": 1.837494729938633, + "grad_norm": 0.6290424252386899, + "learning_rate": 8.606710531432361e-08, + "loss": 0.2753, + "step": 39225 + }, + { + "epoch": 1.8375415749285615, + "grad_norm": 0.5752658879385572, + "learning_rate": 8.601777825336083e-08, + "loss": 0.2641, + "step": 39226 + }, + { + "epoch": 1.8375884199184898, + "grad_norm": 0.5817813768852754, + "learning_rate": 8.596846508425877e-08, + "loss": 0.2681, + "step": 39227 + }, + { + "epoch": 1.837635264908418, + "grad_norm": 0.6124326777835224, + "learning_rate": 8.591916580730109e-08, + "loss": 0.2821, + "step": 39228 + }, + { + "epoch": 1.8376821098983465, + "grad_norm": 0.5736039567222767, + "learning_rate": 8.586988042277172e-08, + "loss": 0.2505, + "step": 39229 + }, + { + "epoch": 1.8377289548882747, + "grad_norm": 0.618588177986457, + "learning_rate": 8.582060893095462e-08, + "loss": 0.2748, + "step": 39230 + }, + { + "epoch": 1.837775799878203, + "grad_norm": 0.5887142585160369, + "learning_rate": 8.577135133213316e-08, + "loss": 0.258, + "step": 39231 + }, + { + "epoch": 1.8378226448681314, + "grad_norm": 0.6133342412420386, + "learning_rate": 8.572210762659073e-08, + "loss": 0.2771, + "step": 39232 + }, + { + "epoch": 1.8378694898580596, + "grad_norm": 0.5613141473241355, + "learning_rate": 8.567287781461126e-08, + "loss": 0.2799, + "step": 39233 + }, + { + "epoch": 1.8379163348479879, + "grad_norm": 0.612583447707381, + "learning_rate": 8.56236618964773e-08, + "loss": 0.2741, + "step": 39234 + }, + { + "epoch": 1.8379631798379163, + "grad_norm": 0.5997265852257331, + "learning_rate": 8.557445987247199e-08, + "loss": 0.2708, + "step": 39235 + }, + { + "epoch": 1.8380100248278448, + "grad_norm": 0.6066710765731662, + "learning_rate": 8.552527174287894e-08, + "loss": 0.2673, + "step": 39236 + }, + { + "epoch": 1.8380568698177728, + "grad_norm": 0.6048293797742237, + "learning_rate": 8.54760975079813e-08, + "loss": 0.2764, + "step": 39237 + }, + { + "epoch": 1.8381037148077013, + "grad_norm": 0.6117859194989529, + "learning_rate": 8.54269371680616e-08, + "loss": 0.2798, + "step": 39238 + }, + { + "epoch": 1.8381505597976298, + "grad_norm": 0.6330813294190154, + "learning_rate": 8.537779072340324e-08, + "loss": 0.2754, + "step": 39239 + }, + { + "epoch": 1.838197404787558, + "grad_norm": 0.6384651795123738, + "learning_rate": 8.532865817428875e-08, + "loss": 0.2734, + "step": 39240 + }, + { + "epoch": 1.8382442497774862, + "grad_norm": 0.6088585533206277, + "learning_rate": 8.527953952100098e-08, + "loss": 0.28, + "step": 39241 + }, + { + "epoch": 1.8382910947674147, + "grad_norm": 0.6373435072868454, + "learning_rate": 8.52304347638222e-08, + "loss": 0.2905, + "step": 39242 + }, + { + "epoch": 1.838337939757343, + "grad_norm": 0.6304708473115734, + "learning_rate": 8.518134390303551e-08, + "loss": 0.2751, + "step": 39243 + }, + { + "epoch": 1.8383847847472712, + "grad_norm": 0.5798547745864623, + "learning_rate": 8.513226693892346e-08, + "loss": 0.2686, + "step": 39244 + }, + { + "epoch": 1.8384316297371996, + "grad_norm": 0.57315475627235, + "learning_rate": 8.508320387176805e-08, + "loss": 0.2718, + "step": 39245 + }, + { + "epoch": 1.8384784747271279, + "grad_norm": 0.6446542701303907, + "learning_rate": 8.503415470185184e-08, + "loss": 0.2801, + "step": 39246 + }, + { + "epoch": 1.8385253197170561, + "grad_norm": 0.6118666403323388, + "learning_rate": 8.498511942945708e-08, + "loss": 0.2759, + "step": 39247 + }, + { + "epoch": 1.8385721647069846, + "grad_norm": 0.6209567942572012, + "learning_rate": 8.493609805486636e-08, + "loss": 0.2815, + "step": 39248 + }, + { + "epoch": 1.838619009696913, + "grad_norm": 0.5692766280335105, + "learning_rate": 8.488709057836081e-08, + "loss": 0.2687, + "step": 39249 + }, + { + "epoch": 1.8386658546868413, + "grad_norm": 0.5989896696449984, + "learning_rate": 8.483809700022328e-08, + "loss": 0.2785, + "step": 39250 + }, + { + "epoch": 1.8387126996767695, + "grad_norm": 0.6357515569380557, + "learning_rate": 8.478911732073574e-08, + "loss": 0.2783, + "step": 39251 + }, + { + "epoch": 1.838759544666698, + "grad_norm": 0.5896247672805208, + "learning_rate": 8.474015154017939e-08, + "loss": 0.262, + "step": 39252 + }, + { + "epoch": 1.8388063896566262, + "grad_norm": 0.6186102932604896, + "learning_rate": 8.469119965883649e-08, + "loss": 0.2831, + "step": 39253 + }, + { + "epoch": 1.8388532346465545, + "grad_norm": 0.5889781954699658, + "learning_rate": 8.464226167698902e-08, + "loss": 0.2509, + "step": 39254 + }, + { + "epoch": 1.838900079636483, + "grad_norm": 0.5891922737131362, + "learning_rate": 8.459333759491789e-08, + "loss": 0.2708, + "step": 39255 + }, + { + "epoch": 1.8389469246264112, + "grad_norm": 0.6361655301490706, + "learning_rate": 8.454442741290509e-08, + "loss": 0.2747, + "step": 39256 + }, + { + "epoch": 1.8389937696163394, + "grad_norm": 0.5635664906609266, + "learning_rate": 8.449553113123204e-08, + "loss": 0.2602, + "step": 39257 + }, + { + "epoch": 1.8390406146062679, + "grad_norm": 0.5844037975188139, + "learning_rate": 8.444664875017994e-08, + "loss": 0.2674, + "step": 39258 + }, + { + "epoch": 1.8390874595961964, + "grad_norm": 0.6087747246084989, + "learning_rate": 8.439778027003049e-08, + "loss": 0.2795, + "step": 39259 + }, + { + "epoch": 1.8391343045861244, + "grad_norm": 0.6873918244401783, + "learning_rate": 8.434892569106429e-08, + "loss": 0.2971, + "step": 39260 + }, + { + "epoch": 1.8391811495760528, + "grad_norm": 0.602328407911905, + "learning_rate": 8.430008501356335e-08, + "loss": 0.2618, + "step": 39261 + }, + { + "epoch": 1.8392279945659813, + "grad_norm": 0.610309649825345, + "learning_rate": 8.4251258237808e-08, + "loss": 0.2751, + "step": 39262 + }, + { + "epoch": 1.8392748395559095, + "grad_norm": 0.6517540810543472, + "learning_rate": 8.420244536407968e-08, + "loss": 0.2698, + "step": 39263 + }, + { + "epoch": 1.8393216845458378, + "grad_norm": 0.6210781121485711, + "learning_rate": 8.4153646392659e-08, + "loss": 0.2818, + "step": 39264 + }, + { + "epoch": 1.8393685295357662, + "grad_norm": 0.5626320034495187, + "learning_rate": 8.410486132382683e-08, + "loss": 0.2635, + "step": 39265 + }, + { + "epoch": 1.8394153745256945, + "grad_norm": 0.5989654022472161, + "learning_rate": 8.405609015786381e-08, + "loss": 0.2751, + "step": 39266 + }, + { + "epoch": 1.8394622195156227, + "grad_norm": 0.5719688918155459, + "learning_rate": 8.400733289505081e-08, + "loss": 0.2575, + "step": 39267 + }, + { + "epoch": 1.8395090645055512, + "grad_norm": 0.5990107681443388, + "learning_rate": 8.395858953566871e-08, + "loss": 0.2718, + "step": 39268 + }, + { + "epoch": 1.8395559094954794, + "grad_norm": 0.6288801193246455, + "learning_rate": 8.390986007999757e-08, + "loss": 0.28, + "step": 39269 + }, + { + "epoch": 1.8396027544854077, + "grad_norm": 0.6767519425435479, + "learning_rate": 8.386114452831801e-08, + "loss": 0.2891, + "step": 39270 + }, + { + "epoch": 1.8396495994753361, + "grad_norm": 0.6183628694014285, + "learning_rate": 8.381244288090978e-08, + "loss": 0.2759, + "step": 39271 + }, + { + "epoch": 1.8396964444652646, + "grad_norm": 0.576871148344616, + "learning_rate": 8.376375513805407e-08, + "loss": 0.2632, + "step": 39272 + }, + { + "epoch": 1.8397432894551926, + "grad_norm": 0.6018285378065094, + "learning_rate": 8.371508130003037e-08, + "loss": 0.2615, + "step": 39273 + }, + { + "epoch": 1.839790134445121, + "grad_norm": 0.6018582964434288, + "learning_rate": 8.366642136711928e-08, + "loss": 0.2671, + "step": 39274 + }, + { + "epoch": 1.8398369794350495, + "grad_norm": 0.5911720735171458, + "learning_rate": 8.361777533960058e-08, + "loss": 0.2771, + "step": 39275 + }, + { + "epoch": 1.8398838244249778, + "grad_norm": 0.5635984719436308, + "learning_rate": 8.356914321775461e-08, + "loss": 0.2596, + "step": 39276 + }, + { + "epoch": 1.839930669414906, + "grad_norm": 0.660381420530069, + "learning_rate": 8.35205250018603e-08, + "loss": 0.2827, + "step": 39277 + }, + { + "epoch": 1.8399775144048345, + "grad_norm": 0.5880639526265469, + "learning_rate": 8.347192069219828e-08, + "loss": 0.2644, + "step": 39278 + }, + { + "epoch": 1.8400243593947627, + "grad_norm": 0.6100204212347949, + "learning_rate": 8.342333028904775e-08, + "loss": 0.2805, + "step": 39279 + }, + { + "epoch": 1.840071204384691, + "grad_norm": 0.6356172077020898, + "learning_rate": 8.337475379268878e-08, + "loss": 0.2809, + "step": 39280 + }, + { + "epoch": 1.8401180493746194, + "grad_norm": 0.5832792786917523, + "learning_rate": 8.332619120340085e-08, + "loss": 0.2473, + "step": 39281 + }, + { + "epoch": 1.8401648943645477, + "grad_norm": 0.5730155583214359, + "learning_rate": 8.327764252146291e-08, + "loss": 0.2471, + "step": 39282 + }, + { + "epoch": 1.840211739354476, + "grad_norm": 0.560013532840179, + "learning_rate": 8.322910774715531e-08, + "loss": 0.2443, + "step": 39283 + }, + { + "epoch": 1.8402585843444044, + "grad_norm": 0.5925240401041552, + "learning_rate": 8.318058688075614e-08, + "loss": 0.2563, + "step": 39284 + }, + { + "epoch": 1.8403054293343328, + "grad_norm": 0.6087292550817053, + "learning_rate": 8.313207992254546e-08, + "loss": 0.274, + "step": 39285 + }, + { + "epoch": 1.840352274324261, + "grad_norm": 0.5787719104385308, + "learning_rate": 8.308358687280222e-08, + "loss": 0.2704, + "step": 39286 + }, + { + "epoch": 1.8403991193141893, + "grad_norm": 0.5649773892041453, + "learning_rate": 8.303510773180534e-08, + "loss": 0.2531, + "step": 39287 + }, + { + "epoch": 1.8404459643041178, + "grad_norm": 0.5989157008735965, + "learning_rate": 8.298664249983435e-08, + "loss": 0.2639, + "step": 39288 + }, + { + "epoch": 1.840492809294046, + "grad_norm": 0.6081369138244093, + "learning_rate": 8.293819117716733e-08, + "loss": 0.2762, + "step": 39289 + }, + { + "epoch": 1.8405396542839743, + "grad_norm": 0.6152438540481807, + "learning_rate": 8.288975376408382e-08, + "loss": 0.2672, + "step": 39290 + }, + { + "epoch": 1.8405864992739027, + "grad_norm": 0.5955804198039614, + "learning_rate": 8.284133026086189e-08, + "loss": 0.2659, + "step": 39291 + }, + { + "epoch": 1.840633344263831, + "grad_norm": 0.6161197728876113, + "learning_rate": 8.279292066778077e-08, + "loss": 0.2925, + "step": 39292 + }, + { + "epoch": 1.8406801892537592, + "grad_norm": 0.6006497983324159, + "learning_rate": 8.274452498511859e-08, + "loss": 0.2553, + "step": 39293 + }, + { + "epoch": 1.8407270342436877, + "grad_norm": 0.6428755974880959, + "learning_rate": 8.269614321315456e-08, + "loss": 0.2692, + "step": 39294 + }, + { + "epoch": 1.8407738792336161, + "grad_norm": 0.5621596323438631, + "learning_rate": 8.26477753521665e-08, + "loss": 0.254, + "step": 39295 + }, + { + "epoch": 1.8408207242235441, + "grad_norm": 0.5678411081754273, + "learning_rate": 8.259942140243282e-08, + "loss": 0.2569, + "step": 39296 + }, + { + "epoch": 1.8408675692134726, + "grad_norm": 0.5908445613348149, + "learning_rate": 8.255108136423217e-08, + "loss": 0.2651, + "step": 39297 + }, + { + "epoch": 1.840914414203401, + "grad_norm": 0.5767554350574229, + "learning_rate": 8.250275523784212e-08, + "loss": 0.2666, + "step": 39298 + }, + { + "epoch": 1.8409612591933293, + "grad_norm": 0.6037031547282591, + "learning_rate": 8.245444302354105e-08, + "loss": 0.2676, + "step": 39299 + }, + { + "epoch": 1.8410081041832576, + "grad_norm": 0.5781438846215178, + "learning_rate": 8.240614472160735e-08, + "loss": 0.2653, + "step": 39300 + }, + { + "epoch": 1.841054949173186, + "grad_norm": 0.5931144382747422, + "learning_rate": 8.23578603323183e-08, + "loss": 0.2637, + "step": 39301 + }, + { + "epoch": 1.8411017941631143, + "grad_norm": 0.614659902190194, + "learning_rate": 8.230958985595228e-08, + "loss": 0.2804, + "step": 39302 + }, + { + "epoch": 1.8411486391530425, + "grad_norm": 0.6035517217618762, + "learning_rate": 8.226133329278713e-08, + "loss": 0.2685, + "step": 39303 + }, + { + "epoch": 1.841195484142971, + "grad_norm": 0.599658522696257, + "learning_rate": 8.221309064310012e-08, + "loss": 0.263, + "step": 39304 + }, + { + "epoch": 1.8412423291328992, + "grad_norm": 0.6315848478260557, + "learning_rate": 8.216486190716883e-08, + "loss": 0.2947, + "step": 39305 + }, + { + "epoch": 1.8412891741228274, + "grad_norm": 0.6042123816765266, + "learning_rate": 8.211664708527161e-08, + "loss": 0.2552, + "step": 39306 + }, + { + "epoch": 1.841336019112756, + "grad_norm": 0.6121300955101288, + "learning_rate": 8.206844617768494e-08, + "loss": 0.2722, + "step": 39307 + }, + { + "epoch": 1.8413828641026844, + "grad_norm": 0.5981954596555709, + "learning_rate": 8.202025918468664e-08, + "loss": 0.2706, + "step": 39308 + }, + { + "epoch": 1.8414297090926124, + "grad_norm": 0.5835176611593883, + "learning_rate": 8.1972086106554e-08, + "loss": 0.2641, + "step": 39309 + }, + { + "epoch": 1.8414765540825409, + "grad_norm": 0.6211496161289772, + "learning_rate": 8.192392694356483e-08, + "loss": 0.2782, + "step": 39310 + }, + { + "epoch": 1.8415233990724693, + "grad_norm": 0.6277076797610798, + "learning_rate": 8.187578169599503e-08, + "loss": 0.2719, + "step": 39311 + }, + { + "epoch": 1.8415702440623976, + "grad_norm": 0.6210846107431394, + "learning_rate": 8.18276503641227e-08, + "loss": 0.2712, + "step": 39312 + }, + { + "epoch": 1.8416170890523258, + "grad_norm": 0.6257086807586474, + "learning_rate": 8.177953294822433e-08, + "loss": 0.2831, + "step": 39313 + }, + { + "epoch": 1.8416639340422543, + "grad_norm": 0.5580127479108008, + "learning_rate": 8.173142944857687e-08, + "loss": 0.2491, + "step": 39314 + }, + { + "epoch": 1.8417107790321825, + "grad_norm": 0.5864491046767624, + "learning_rate": 8.168333986545735e-08, + "loss": 0.2773, + "step": 39315 + }, + { + "epoch": 1.8417576240221107, + "grad_norm": 0.5491265828725956, + "learning_rate": 8.163526419914219e-08, + "loss": 0.2411, + "step": 39316 + }, + { + "epoch": 1.8418044690120392, + "grad_norm": 0.6740520944477877, + "learning_rate": 8.158720244990842e-08, + "loss": 0.3036, + "step": 39317 + }, + { + "epoch": 1.8418513140019674, + "grad_norm": 0.6172907939987232, + "learning_rate": 8.153915461803275e-08, + "loss": 0.2592, + "step": 39318 + }, + { + "epoch": 1.8418981589918957, + "grad_norm": 0.6040396765298813, + "learning_rate": 8.149112070379106e-08, + "loss": 0.2647, + "step": 39319 + }, + { + "epoch": 1.8419450039818241, + "grad_norm": 0.6541103384819088, + "learning_rate": 8.144310070746009e-08, + "loss": 0.2706, + "step": 39320 + }, + { + "epoch": 1.8419918489717526, + "grad_norm": 0.6083895630975251, + "learning_rate": 8.1395094629316e-08, + "loss": 0.2758, + "step": 39321 + }, + { + "epoch": 1.8420386939616809, + "grad_norm": 0.6293964176684781, + "learning_rate": 8.13471024696355e-08, + "loss": 0.2762, + "step": 39322 + }, + { + "epoch": 1.842085538951609, + "grad_norm": 0.562017219068532, + "learning_rate": 8.129912422869424e-08, + "loss": 0.2585, + "step": 39323 + }, + { + "epoch": 1.8421323839415376, + "grad_norm": 0.6231560975660333, + "learning_rate": 8.125115990676919e-08, + "loss": 0.303, + "step": 39324 + }, + { + "epoch": 1.8421792289314658, + "grad_norm": 0.6593705724342723, + "learning_rate": 8.120320950413569e-08, + "loss": 0.2922, + "step": 39325 + }, + { + "epoch": 1.842226073921394, + "grad_norm": 0.6092105949202516, + "learning_rate": 8.115527302106935e-08, + "loss": 0.2798, + "step": 39326 + }, + { + "epoch": 1.8422729189113225, + "grad_norm": 0.5791287323172735, + "learning_rate": 8.110735045784663e-08, + "loss": 0.263, + "step": 39327 + }, + { + "epoch": 1.8423197639012507, + "grad_norm": 0.5489781792175296, + "learning_rate": 8.105944181474284e-08, + "loss": 0.2523, + "step": 39328 + }, + { + "epoch": 1.842366608891179, + "grad_norm": 0.6435238483227627, + "learning_rate": 8.101154709203445e-08, + "loss": 0.2871, + "step": 39329 + }, + { + "epoch": 1.8424134538811074, + "grad_norm": 0.5918466854347265, + "learning_rate": 8.096366628999653e-08, + "loss": 0.275, + "step": 39330 + }, + { + "epoch": 1.842460298871036, + "grad_norm": 0.6019024378294637, + "learning_rate": 8.091579940890465e-08, + "loss": 0.2672, + "step": 39331 + }, + { + "epoch": 1.842507143860964, + "grad_norm": 0.5712506672378999, + "learning_rate": 8.086794644903445e-08, + "loss": 0.2663, + "step": 39332 + }, + { + "epoch": 1.8425539888508924, + "grad_norm": 0.6167976879557165, + "learning_rate": 8.082010741066098e-08, + "loss": 0.2782, + "step": 39333 + }, + { + "epoch": 1.8426008338408209, + "grad_norm": 0.6099753248149441, + "learning_rate": 8.077228229405987e-08, + "loss": 0.2705, + "step": 39334 + }, + { + "epoch": 1.842647678830749, + "grad_norm": 0.5824985959560774, + "learning_rate": 8.072447109950615e-08, + "loss": 0.2695, + "step": 39335 + }, + { + "epoch": 1.8426945238206773, + "grad_norm": 0.5729376226559663, + "learning_rate": 8.067667382727518e-08, + "loss": 0.2507, + "step": 39336 + }, + { + "epoch": 1.8427413688106058, + "grad_norm": 0.5631892111552494, + "learning_rate": 8.062889047764172e-08, + "loss": 0.2596, + "step": 39337 + }, + { + "epoch": 1.842788213800534, + "grad_norm": 0.60235030950406, + "learning_rate": 8.058112105088083e-08, + "loss": 0.2601, + "step": 39338 + }, + { + "epoch": 1.8428350587904623, + "grad_norm": 0.5933748957884731, + "learning_rate": 8.053336554726787e-08, + "loss": 0.2666, + "step": 39339 + }, + { + "epoch": 1.8428819037803907, + "grad_norm": 0.6040771907913983, + "learning_rate": 8.048562396707704e-08, + "loss": 0.2746, + "step": 39340 + }, + { + "epoch": 1.842928748770319, + "grad_norm": 0.6276549608853932, + "learning_rate": 8.043789631058313e-08, + "loss": 0.269, + "step": 39341 + }, + { + "epoch": 1.8429755937602472, + "grad_norm": 0.6623689261551494, + "learning_rate": 8.039018257806147e-08, + "loss": 0.3002, + "step": 39342 + }, + { + "epoch": 1.8430224387501757, + "grad_norm": 0.6339368992248933, + "learning_rate": 8.034248276978573e-08, + "loss": 0.2915, + "step": 39343 + }, + { + "epoch": 1.8430692837401041, + "grad_norm": 0.5881704151781827, + "learning_rate": 8.029479688603097e-08, + "loss": 0.2656, + "step": 39344 + }, + { + "epoch": 1.8431161287300322, + "grad_norm": 0.5996766400933783, + "learning_rate": 8.02471249270717e-08, + "loss": 0.2646, + "step": 39345 + }, + { + "epoch": 1.8431629737199606, + "grad_norm": 0.573819457794182, + "learning_rate": 8.019946689318159e-08, + "loss": 0.2503, + "step": 39346 + }, + { + "epoch": 1.843209818709889, + "grad_norm": 0.5610173267584462, + "learning_rate": 8.015182278463567e-08, + "loss": 0.2645, + "step": 39347 + }, + { + "epoch": 1.8432566636998173, + "grad_norm": 0.5815471520434028, + "learning_rate": 8.010419260170793e-08, + "loss": 0.2637, + "step": 39348 + }, + { + "epoch": 1.8433035086897456, + "grad_norm": 0.5986255667390884, + "learning_rate": 8.005657634467201e-08, + "loss": 0.2691, + "step": 39349 + }, + { + "epoch": 1.843350353679674, + "grad_norm": 0.602439193976143, + "learning_rate": 8.000897401380243e-08, + "loss": 0.2674, + "step": 39350 + }, + { + "epoch": 1.8433971986696023, + "grad_norm": 0.6090437222414973, + "learning_rate": 7.996138560937283e-08, + "loss": 0.2582, + "step": 39351 + }, + { + "epoch": 1.8434440436595305, + "grad_norm": 0.6026518127823444, + "learning_rate": 7.991381113165747e-08, + "loss": 0.2767, + "step": 39352 + }, + { + "epoch": 1.843490888649459, + "grad_norm": 0.622644229178924, + "learning_rate": 7.986625058092973e-08, + "loss": 0.2838, + "step": 39353 + }, + { + "epoch": 1.8435377336393872, + "grad_norm": 0.5679852794658553, + "learning_rate": 7.981870395746327e-08, + "loss": 0.2792, + "step": 39354 + }, + { + "epoch": 1.8435845786293155, + "grad_norm": 0.6119467742320464, + "learning_rate": 7.977117126153233e-08, + "loss": 0.27, + "step": 39355 + }, + { + "epoch": 1.843631423619244, + "grad_norm": 0.5855139317224416, + "learning_rate": 7.972365249340946e-08, + "loss": 0.264, + "step": 39356 + }, + { + "epoch": 1.8436782686091724, + "grad_norm": 0.5717151879535494, + "learning_rate": 7.967614765336889e-08, + "loss": 0.2668, + "step": 39357 + }, + { + "epoch": 1.8437251135991006, + "grad_norm": 0.6328960067298537, + "learning_rate": 7.962865674168374e-08, + "loss": 0.279, + "step": 39358 + }, + { + "epoch": 1.8437719585890289, + "grad_norm": 0.5920746942784848, + "learning_rate": 7.95811797586274e-08, + "loss": 0.2686, + "step": 39359 + }, + { + "epoch": 1.8438188035789573, + "grad_norm": 0.5844914848200486, + "learning_rate": 7.953371670447297e-08, + "loss": 0.2701, + "step": 39360 + }, + { + "epoch": 1.8438656485688856, + "grad_norm": 0.5940789993008679, + "learning_rate": 7.948626757949385e-08, + "loss": 0.2618, + "step": 39361 + }, + { + "epoch": 1.8439124935588138, + "grad_norm": 0.6128278267731061, + "learning_rate": 7.94388323839626e-08, + "loss": 0.2517, + "step": 39362 + }, + { + "epoch": 1.8439593385487423, + "grad_norm": 0.5390737376980974, + "learning_rate": 7.939141111815236e-08, + "loss": 0.2546, + "step": 39363 + }, + { + "epoch": 1.8440061835386705, + "grad_norm": 0.6339341320119529, + "learning_rate": 7.934400378233647e-08, + "loss": 0.2572, + "step": 39364 + }, + { + "epoch": 1.8440530285285988, + "grad_norm": 0.5592198015012312, + "learning_rate": 7.929661037678727e-08, + "loss": 0.2601, + "step": 39365 + }, + { + "epoch": 1.8440998735185272, + "grad_norm": 0.6469987538599365, + "learning_rate": 7.924923090177784e-08, + "loss": 0.2791, + "step": 39366 + }, + { + "epoch": 1.8441467185084557, + "grad_norm": 0.5807984121317711, + "learning_rate": 7.920186535758073e-08, + "loss": 0.2681, + "step": 39367 + }, + { + "epoch": 1.8441935634983837, + "grad_norm": 0.5722410061812728, + "learning_rate": 7.915451374446798e-08, + "loss": 0.258, + "step": 39368 + }, + { + "epoch": 1.8442404084883122, + "grad_norm": 0.6380266463903428, + "learning_rate": 7.910717606271295e-08, + "loss": 0.2669, + "step": 39369 + }, + { + "epoch": 1.8442872534782406, + "grad_norm": 0.5988650523786898, + "learning_rate": 7.90598523125874e-08, + "loss": 0.2634, + "step": 39370 + }, + { + "epoch": 1.8443340984681689, + "grad_norm": 0.5848542223765598, + "learning_rate": 7.901254249436386e-08, + "loss": 0.2593, + "step": 39371 + }, + { + "epoch": 1.844380943458097, + "grad_norm": 0.5825063854511652, + "learning_rate": 7.896524660831462e-08, + "loss": 0.2659, + "step": 39372 + }, + { + "epoch": 1.8444277884480256, + "grad_norm": 0.6213313517699504, + "learning_rate": 7.891796465471224e-08, + "loss": 0.2712, + "step": 39373 + }, + { + "epoch": 1.8444746334379538, + "grad_norm": 0.5574040857981989, + "learning_rate": 7.887069663382873e-08, + "loss": 0.2529, + "step": 39374 + }, + { + "epoch": 1.844521478427882, + "grad_norm": 0.6094353644085951, + "learning_rate": 7.882344254593527e-08, + "loss": 0.2626, + "step": 39375 + }, + { + "epoch": 1.8445683234178105, + "grad_norm": 0.6494981893834697, + "learning_rate": 7.87762023913044e-08, + "loss": 0.2908, + "step": 39376 + }, + { + "epoch": 1.8446151684077388, + "grad_norm": 0.5591028028765099, + "learning_rate": 7.872897617020786e-08, + "loss": 0.2663, + "step": 39377 + }, + { + "epoch": 1.844662013397667, + "grad_norm": 0.6245527061735259, + "learning_rate": 7.868176388291765e-08, + "loss": 0.2807, + "step": 39378 + }, + { + "epoch": 1.8447088583875955, + "grad_norm": 0.6294629528540596, + "learning_rate": 7.86345655297055e-08, + "loss": 0.2713, + "step": 39379 + }, + { + "epoch": 1.844755703377524, + "grad_norm": 0.5798011704338676, + "learning_rate": 7.858738111084258e-08, + "loss": 0.272, + "step": 39380 + }, + { + "epoch": 1.844802548367452, + "grad_norm": 0.640982841213555, + "learning_rate": 7.854021062660089e-08, + "loss": 0.2872, + "step": 39381 + }, + { + "epoch": 1.8448493933573804, + "grad_norm": 0.6728794083295611, + "learning_rate": 7.849305407725133e-08, + "loss": 0.2611, + "step": 39382 + }, + { + "epoch": 1.8448962383473089, + "grad_norm": 0.6188516014861493, + "learning_rate": 7.84459114630659e-08, + "loss": 0.2654, + "step": 39383 + }, + { + "epoch": 1.844943083337237, + "grad_norm": 0.5801011326788557, + "learning_rate": 7.839878278431551e-08, + "loss": 0.2662, + "step": 39384 + }, + { + "epoch": 1.8449899283271654, + "grad_norm": 0.5944161830128721, + "learning_rate": 7.835166804127159e-08, + "loss": 0.2614, + "step": 39385 + }, + { + "epoch": 1.8450367733170938, + "grad_norm": 0.5986876453386291, + "learning_rate": 7.830456723420504e-08, + "loss": 0.2877, + "step": 39386 + }, + { + "epoch": 1.845083618307022, + "grad_norm": 0.6226112085737663, + "learning_rate": 7.825748036338704e-08, + "loss": 0.2651, + "step": 39387 + }, + { + "epoch": 1.8451304632969503, + "grad_norm": 0.6506855051307557, + "learning_rate": 7.821040742908875e-08, + "loss": 0.2747, + "step": 39388 + }, + { + "epoch": 1.8451773082868788, + "grad_norm": 0.5832047110108942, + "learning_rate": 7.816334843158052e-08, + "loss": 0.2507, + "step": 39389 + }, + { + "epoch": 1.845224153276807, + "grad_norm": 0.5740308606373421, + "learning_rate": 7.81163033711338e-08, + "loss": 0.2672, + "step": 39390 + }, + { + "epoch": 1.8452709982667352, + "grad_norm": 0.580489082395037, + "learning_rate": 7.806927224801919e-08, + "loss": 0.2818, + "step": 39391 + }, + { + "epoch": 1.8453178432566637, + "grad_norm": 0.6159327531853638, + "learning_rate": 7.802225506250676e-08, + "loss": 0.2602, + "step": 39392 + }, + { + "epoch": 1.8453646882465922, + "grad_norm": 0.6140662069331312, + "learning_rate": 7.79752518148677e-08, + "loss": 0.2766, + "step": 39393 + }, + { + "epoch": 1.8454115332365204, + "grad_norm": 0.5724893660212746, + "learning_rate": 7.792826250537233e-08, + "loss": 0.282, + "step": 39394 + }, + { + "epoch": 1.8454583782264486, + "grad_norm": 0.62920568682208, + "learning_rate": 7.788128713429099e-08, + "loss": 0.2739, + "step": 39395 + }, + { + "epoch": 1.845505223216377, + "grad_norm": 0.5764947414648023, + "learning_rate": 7.783432570189403e-08, + "loss": 0.2579, + "step": 39396 + }, + { + "epoch": 1.8455520682063054, + "grad_norm": 0.5802517524237124, + "learning_rate": 7.778737820845206e-08, + "loss": 0.2543, + "step": 39397 + }, + { + "epoch": 1.8455989131962336, + "grad_norm": 0.6280702202109377, + "learning_rate": 7.774044465423458e-08, + "loss": 0.2551, + "step": 39398 + }, + { + "epoch": 1.845645758186162, + "grad_norm": 0.5925774711992141, + "learning_rate": 7.769352503951221e-08, + "loss": 0.2695, + "step": 39399 + }, + { + "epoch": 1.8456926031760903, + "grad_norm": 0.6029788152736658, + "learning_rate": 7.764661936455447e-08, + "loss": 0.2664, + "step": 39400 + }, + { + "epoch": 1.8457394481660185, + "grad_norm": 0.6161830937659513, + "learning_rate": 7.759972762963197e-08, + "loss": 0.2754, + "step": 39401 + }, + { + "epoch": 1.845786293155947, + "grad_norm": 0.5879175871723855, + "learning_rate": 7.755284983501393e-08, + "loss": 0.274, + "step": 39402 + }, + { + "epoch": 1.8458331381458755, + "grad_norm": 0.5740313335087639, + "learning_rate": 7.75059859809707e-08, + "loss": 0.2744, + "step": 39403 + }, + { + "epoch": 1.8458799831358035, + "grad_norm": 0.635387630291161, + "learning_rate": 7.74591360677715e-08, + "loss": 0.2776, + "step": 39404 + }, + { + "epoch": 1.845926828125732, + "grad_norm": 0.5910199757569361, + "learning_rate": 7.741230009568584e-08, + "loss": 0.2724, + "step": 39405 + }, + { + "epoch": 1.8459736731156604, + "grad_norm": 0.5969667765101515, + "learning_rate": 7.736547806498379e-08, + "loss": 0.2758, + "step": 39406 + }, + { + "epoch": 1.8460205181055886, + "grad_norm": 0.5913612875218859, + "learning_rate": 7.731866997593428e-08, + "loss": 0.2703, + "step": 39407 + }, + { + "epoch": 1.8460673630955169, + "grad_norm": 0.6443313815888564, + "learning_rate": 7.727187582880713e-08, + "loss": 0.2854, + "step": 39408 + }, + { + "epoch": 1.8461142080854454, + "grad_norm": 0.5898015244137477, + "learning_rate": 7.722509562387153e-08, + "loss": 0.2532, + "step": 39409 + }, + { + "epoch": 1.8461610530753736, + "grad_norm": 0.5825894032963149, + "learning_rate": 7.717832936139646e-08, + "loss": 0.2733, + "step": 39410 + }, + { + "epoch": 1.8462078980653018, + "grad_norm": 0.6048974246053911, + "learning_rate": 7.713157704165087e-08, + "loss": 0.2791, + "step": 39411 + }, + { + "epoch": 1.8462547430552303, + "grad_norm": 0.5960229452407035, + "learning_rate": 7.708483866490424e-08, + "loss": 0.2634, + "step": 39412 + }, + { + "epoch": 1.8463015880451585, + "grad_norm": 0.5679948652676543, + "learning_rate": 7.703811423142526e-08, + "loss": 0.2685, + "step": 39413 + }, + { + "epoch": 1.8463484330350868, + "grad_norm": 0.5575393225066844, + "learning_rate": 7.69914037414829e-08, + "loss": 0.2722, + "step": 39414 + }, + { + "epoch": 1.8463952780250152, + "grad_norm": 0.5811404615037872, + "learning_rate": 7.694470719534635e-08, + "loss": 0.2661, + "step": 39415 + }, + { + "epoch": 1.8464421230149437, + "grad_norm": 0.574030352691785, + "learning_rate": 7.689802459328404e-08, + "loss": 0.258, + "step": 39416 + }, + { + "epoch": 1.8464889680048717, + "grad_norm": 0.5882621414495572, + "learning_rate": 7.685135593556436e-08, + "loss": 0.2683, + "step": 39417 + }, + { + "epoch": 1.8465358129948002, + "grad_norm": 0.6065014328602756, + "learning_rate": 7.680470122245598e-08, + "loss": 0.2782, + "step": 39418 + }, + { + "epoch": 1.8465826579847286, + "grad_norm": 0.5919771052717493, + "learning_rate": 7.675806045422757e-08, + "loss": 0.2729, + "step": 39419 + }, + { + "epoch": 1.846629502974657, + "grad_norm": 0.5868233038003602, + "learning_rate": 7.671143363114753e-08, + "loss": 0.2755, + "step": 39420 + }, + { + "epoch": 1.8466763479645851, + "grad_norm": 0.6326633990965017, + "learning_rate": 7.666482075348397e-08, + "loss": 0.2845, + "step": 39421 + }, + { + "epoch": 1.8467231929545136, + "grad_norm": 0.6395466771916519, + "learning_rate": 7.661822182150558e-08, + "loss": 0.2701, + "step": 39422 + }, + { + "epoch": 1.8467700379444418, + "grad_norm": 0.5649719816252285, + "learning_rate": 7.657163683548047e-08, + "loss": 0.2632, + "step": 39423 + }, + { + "epoch": 1.84681688293437, + "grad_norm": 0.6190789354786039, + "learning_rate": 7.652506579567593e-08, + "loss": 0.2856, + "step": 39424 + }, + { + "epoch": 1.8468637279242985, + "grad_norm": 0.5634616198304693, + "learning_rate": 7.647850870236061e-08, + "loss": 0.2566, + "step": 39425 + }, + { + "epoch": 1.8469105729142268, + "grad_norm": 0.5818403591437968, + "learning_rate": 7.64319655558024e-08, + "loss": 0.2624, + "step": 39426 + }, + { + "epoch": 1.846957417904155, + "grad_norm": 0.5711261079013953, + "learning_rate": 7.638543635626883e-08, + "loss": 0.2573, + "step": 39427 + }, + { + "epoch": 1.8470042628940835, + "grad_norm": 0.6007583583120982, + "learning_rate": 7.633892110402857e-08, + "loss": 0.2656, + "step": 39428 + }, + { + "epoch": 1.847051107884012, + "grad_norm": 0.6050429182445415, + "learning_rate": 7.629241979934809e-08, + "loss": 0.2613, + "step": 39429 + }, + { + "epoch": 1.8470979528739402, + "grad_norm": 0.619929631264408, + "learning_rate": 7.624593244249606e-08, + "loss": 0.2637, + "step": 39430 + }, + { + "epoch": 1.8471447978638684, + "grad_norm": 0.5686562167629775, + "learning_rate": 7.619945903373893e-08, + "loss": 0.2651, + "step": 39431 + }, + { + "epoch": 1.847191642853797, + "grad_norm": 0.6412859342928919, + "learning_rate": 7.615299957334483e-08, + "loss": 0.2787, + "step": 39432 + }, + { + "epoch": 1.8472384878437251, + "grad_norm": 0.564765691682898, + "learning_rate": 7.61065540615813e-08, + "loss": 0.2449, + "step": 39433 + }, + { + "epoch": 1.8472853328336534, + "grad_norm": 0.6161584199646599, + "learning_rate": 7.606012249871536e-08, + "loss": 0.2695, + "step": 39434 + }, + { + "epoch": 1.8473321778235818, + "grad_norm": 0.5941561199872227, + "learning_rate": 7.601370488501375e-08, + "loss": 0.2888, + "step": 39435 + }, + { + "epoch": 1.84737902281351, + "grad_norm": 0.5569882736405318, + "learning_rate": 7.59673012207443e-08, + "loss": 0.2595, + "step": 39436 + }, + { + "epoch": 1.8474258678034383, + "grad_norm": 0.6205059609812609, + "learning_rate": 7.592091150617375e-08, + "loss": 0.2743, + "step": 39437 + }, + { + "epoch": 1.8474727127933668, + "grad_norm": 0.5867808351867159, + "learning_rate": 7.587453574156912e-08, + "loss": 0.2643, + "step": 39438 + }, + { + "epoch": 1.8475195577832952, + "grad_norm": 0.6150906865731722, + "learning_rate": 7.58281739271971e-08, + "loss": 0.2578, + "step": 39439 + }, + { + "epoch": 1.8475664027732233, + "grad_norm": 0.6049702211851585, + "learning_rate": 7.578182606332502e-08, + "loss": 0.277, + "step": 39440 + }, + { + "epoch": 1.8476132477631517, + "grad_norm": 0.6175164358516273, + "learning_rate": 7.573549215021875e-08, + "loss": 0.2769, + "step": 39441 + }, + { + "epoch": 1.8476600927530802, + "grad_norm": 0.6306372179390276, + "learning_rate": 7.568917218814559e-08, + "loss": 0.2873, + "step": 39442 + }, + { + "epoch": 1.8477069377430084, + "grad_norm": 0.5936366642021063, + "learning_rate": 7.564286617737226e-08, + "loss": 0.2723, + "step": 39443 + }, + { + "epoch": 1.8477537827329367, + "grad_norm": 0.5524883097235113, + "learning_rate": 7.55965741181644e-08, + "loss": 0.2692, + "step": 39444 + }, + { + "epoch": 1.8478006277228651, + "grad_norm": 0.6076953084779493, + "learning_rate": 7.5550296010789e-08, + "loss": 0.2695, + "step": 39445 + }, + { + "epoch": 1.8478474727127934, + "grad_norm": 0.5737690697116145, + "learning_rate": 7.550403185551253e-08, + "loss": 0.2658, + "step": 39446 + }, + { + "epoch": 1.8478943177027216, + "grad_norm": 0.5945662034223931, + "learning_rate": 7.545778165260087e-08, + "loss": 0.2644, + "step": 39447 + }, + { + "epoch": 1.84794116269265, + "grad_norm": 0.5820760702633136, + "learning_rate": 7.54115454023202e-08, + "loss": 0.264, + "step": 39448 + }, + { + "epoch": 1.8479880076825783, + "grad_norm": 0.5822643270819166, + "learning_rate": 7.536532310493672e-08, + "loss": 0.2667, + "step": 39449 + }, + { + "epoch": 1.8480348526725066, + "grad_norm": 0.5152133687508853, + "learning_rate": 7.531911476071658e-08, + "loss": 0.2476, + "step": 39450 + }, + { + "epoch": 1.848081697662435, + "grad_norm": 0.5943847868918846, + "learning_rate": 7.527292036992511e-08, + "loss": 0.2791, + "step": 39451 + }, + { + "epoch": 1.8481285426523635, + "grad_norm": 0.5717204752540839, + "learning_rate": 7.522673993282908e-08, + "loss": 0.2646, + "step": 39452 + }, + { + "epoch": 1.8481753876422915, + "grad_norm": 0.647177911830244, + "learning_rate": 7.518057344969326e-08, + "loss": 0.2936, + "step": 39453 + }, + { + "epoch": 1.84822223263222, + "grad_norm": 0.6048876343724235, + "learning_rate": 7.513442092078382e-08, + "loss": 0.2823, + "step": 39454 + }, + { + "epoch": 1.8482690776221484, + "grad_norm": 0.6505252794617788, + "learning_rate": 7.508828234636639e-08, + "loss": 0.2901, + "step": 39455 + }, + { + "epoch": 1.8483159226120767, + "grad_norm": 0.6295782268474667, + "learning_rate": 7.504215772670631e-08, + "loss": 0.2773, + "step": 39456 + }, + { + "epoch": 1.848362767602005, + "grad_norm": 0.5664578662868921, + "learning_rate": 7.499604706206948e-08, + "loss": 0.2643, + "step": 39457 + }, + { + "epoch": 1.8484096125919334, + "grad_norm": 0.6247812935693488, + "learning_rate": 7.494995035272095e-08, + "loss": 0.2677, + "step": 39458 + }, + { + "epoch": 1.8484564575818616, + "grad_norm": 0.6175428457238548, + "learning_rate": 7.490386759892581e-08, + "loss": 0.2757, + "step": 39459 + }, + { + "epoch": 1.8485033025717899, + "grad_norm": 0.5960728205879587, + "learning_rate": 7.485779880094912e-08, + "loss": 0.2558, + "step": 39460 + }, + { + "epoch": 1.8485501475617183, + "grad_norm": 0.6072940667399217, + "learning_rate": 7.481174395905622e-08, + "loss": 0.2659, + "step": 39461 + }, + { + "epoch": 1.8485969925516466, + "grad_norm": 0.6143035100044986, + "learning_rate": 7.476570307351244e-08, + "loss": 0.272, + "step": 39462 + }, + { + "epoch": 1.8486438375415748, + "grad_norm": 0.6212139260183035, + "learning_rate": 7.47196761445823e-08, + "loss": 0.2746, + "step": 39463 + }, + { + "epoch": 1.8486906825315033, + "grad_norm": 0.5603638067478824, + "learning_rate": 7.467366317253117e-08, + "loss": 0.2626, + "step": 39464 + }, + { + "epoch": 1.8487375275214317, + "grad_norm": 0.6245187688132471, + "learning_rate": 7.462766415762351e-08, + "loss": 0.2767, + "step": 39465 + }, + { + "epoch": 1.84878437251136, + "grad_norm": 0.6418025235693684, + "learning_rate": 7.458167910012387e-08, + "loss": 0.2805, + "step": 39466 + }, + { + "epoch": 1.8488312175012882, + "grad_norm": 0.6406542564008556, + "learning_rate": 7.453570800029675e-08, + "loss": 0.283, + "step": 39467 + }, + { + "epoch": 1.8488780624912167, + "grad_norm": 0.6743402991615769, + "learning_rate": 7.448975085840748e-08, + "loss": 0.2989, + "step": 39468 + }, + { + "epoch": 1.848924907481145, + "grad_norm": 0.5797974887363886, + "learning_rate": 7.444380767471975e-08, + "loss": 0.2731, + "step": 39469 + }, + { + "epoch": 1.8489717524710731, + "grad_norm": 0.5872536167336538, + "learning_rate": 7.439787844949864e-08, + "loss": 0.2554, + "step": 39470 + }, + { + "epoch": 1.8490185974610016, + "grad_norm": 0.6042713215468334, + "learning_rate": 7.435196318300781e-08, + "loss": 0.2764, + "step": 39471 + }, + { + "epoch": 1.8490654424509299, + "grad_norm": 0.5863236850425214, + "learning_rate": 7.430606187551203e-08, + "loss": 0.2598, + "step": 39472 + }, + { + "epoch": 1.849112287440858, + "grad_norm": 0.5979999387021666, + "learning_rate": 7.426017452727474e-08, + "loss": 0.285, + "step": 39473 + }, + { + "epoch": 1.8491591324307866, + "grad_norm": 0.6162414693962355, + "learning_rate": 7.42143011385607e-08, + "loss": 0.2705, + "step": 39474 + }, + { + "epoch": 1.849205977420715, + "grad_norm": 0.5768906933566543, + "learning_rate": 7.41684417096336e-08, + "loss": 0.2585, + "step": 39475 + }, + { + "epoch": 1.849252822410643, + "grad_norm": 0.6394411328690112, + "learning_rate": 7.412259624075768e-08, + "loss": 0.2936, + "step": 39476 + }, + { + "epoch": 1.8492996674005715, + "grad_norm": 0.5940405829009485, + "learning_rate": 7.407676473219604e-08, + "loss": 0.2629, + "step": 39477 + }, + { + "epoch": 1.8493465123905, + "grad_norm": 0.6041196409165098, + "learning_rate": 7.403094718421322e-08, + "loss": 0.2748, + "step": 39478 + }, + { + "epoch": 1.8493933573804282, + "grad_norm": 0.6000050477485769, + "learning_rate": 7.39851435970726e-08, + "loss": 0.2828, + "step": 39479 + }, + { + "epoch": 1.8494402023703564, + "grad_norm": 0.6230610983749396, + "learning_rate": 7.393935397103757e-08, + "loss": 0.2861, + "step": 39480 + }, + { + "epoch": 1.849487047360285, + "grad_norm": 0.5984404656543352, + "learning_rate": 7.389357830637156e-08, + "loss": 0.2771, + "step": 39481 + }, + { + "epoch": 1.8495338923502131, + "grad_norm": 0.6326696075566243, + "learning_rate": 7.384781660333878e-08, + "loss": 0.287, + "step": 39482 + }, + { + "epoch": 1.8495807373401414, + "grad_norm": 0.5889032915875988, + "learning_rate": 7.380206886220153e-08, + "loss": 0.2552, + "step": 39483 + }, + { + "epoch": 1.8496275823300699, + "grad_norm": 0.6382110168774038, + "learning_rate": 7.375633508322378e-08, + "loss": 0.2758, + "step": 39484 + }, + { + "epoch": 1.849674427319998, + "grad_norm": 0.6019545856870262, + "learning_rate": 7.371061526666861e-08, + "loss": 0.2655, + "step": 39485 + }, + { + "epoch": 1.8497212723099263, + "grad_norm": 0.566689332808872, + "learning_rate": 7.366490941279892e-08, + "loss": 0.253, + "step": 39486 + }, + { + "epoch": 1.8497681172998548, + "grad_norm": 0.660845740584634, + "learning_rate": 7.361921752187779e-08, + "loss": 0.2969, + "step": 39487 + }, + { + "epoch": 1.8498149622897833, + "grad_norm": 0.593541325566154, + "learning_rate": 7.357353959416807e-08, + "loss": 0.2677, + "step": 39488 + }, + { + "epoch": 1.8498618072797113, + "grad_norm": 0.6144063069382514, + "learning_rate": 7.352787562993319e-08, + "loss": 0.2767, + "step": 39489 + }, + { + "epoch": 1.8499086522696397, + "grad_norm": 0.5565965746846385, + "learning_rate": 7.348222562943514e-08, + "loss": 0.248, + "step": 39490 + }, + { + "epoch": 1.8499554972595682, + "grad_norm": 0.6148145911487449, + "learning_rate": 7.343658959293704e-08, + "loss": 0.2798, + "step": 39491 + }, + { + "epoch": 1.8500023422494964, + "grad_norm": 0.637031687383985, + "learning_rate": 7.339096752070201e-08, + "loss": 0.2938, + "step": 39492 + }, + { + "epoch": 1.8500491872394247, + "grad_norm": 0.6294252246621375, + "learning_rate": 7.334535941299154e-08, + "loss": 0.281, + "step": 39493 + }, + { + "epoch": 1.8500960322293531, + "grad_norm": 0.5779227237794051, + "learning_rate": 7.329976527006843e-08, + "loss": 0.2677, + "step": 39494 + }, + { + "epoch": 1.8501428772192814, + "grad_norm": 0.6409525740739577, + "learning_rate": 7.325418509219584e-08, + "loss": 0.2771, + "step": 39495 + }, + { + "epoch": 1.8501897222092096, + "grad_norm": 0.6346908689993104, + "learning_rate": 7.320861887963494e-08, + "loss": 0.268, + "step": 39496 + }, + { + "epoch": 1.850236567199138, + "grad_norm": 0.6314190595857633, + "learning_rate": 7.316306663264854e-08, + "loss": 0.281, + "step": 39497 + }, + { + "epoch": 1.8502834121890663, + "grad_norm": 0.6054908839411194, + "learning_rate": 7.3117528351499e-08, + "loss": 0.2836, + "step": 39498 + }, + { + "epoch": 1.8503302571789946, + "grad_norm": 0.5641695015644208, + "learning_rate": 7.307200403644798e-08, + "loss": 0.265, + "step": 39499 + }, + { + "epoch": 1.850377102168923, + "grad_norm": 0.6032876496791242, + "learning_rate": 7.302649368775754e-08, + "loss": 0.274, + "step": 39500 + }, + { + "epoch": 1.8504239471588515, + "grad_norm": 0.6171037906530604, + "learning_rate": 7.298099730568997e-08, + "loss": 0.2597, + "step": 39501 + }, + { + "epoch": 1.8504707921487797, + "grad_norm": 0.5736892675299857, + "learning_rate": 7.293551489050643e-08, + "loss": 0.2557, + "step": 39502 + }, + { + "epoch": 1.850517637138708, + "grad_norm": 0.5601659526584142, + "learning_rate": 7.289004644246894e-08, + "loss": 0.2567, + "step": 39503 + }, + { + "epoch": 1.8505644821286364, + "grad_norm": 0.6047060751836904, + "learning_rate": 7.284459196183924e-08, + "loss": 0.2666, + "step": 39504 + }, + { + "epoch": 1.8506113271185647, + "grad_norm": 0.6095013816327175, + "learning_rate": 7.279915144887878e-08, + "loss": 0.2625, + "step": 39505 + }, + { + "epoch": 1.850658172108493, + "grad_norm": 0.5883703295404438, + "learning_rate": 7.275372490384929e-08, + "loss": 0.2723, + "step": 39506 + }, + { + "epoch": 1.8507050170984214, + "grad_norm": 0.5722668558868003, + "learning_rate": 7.270831232701225e-08, + "loss": 0.2678, + "step": 39507 + }, + { + "epoch": 1.8507518620883496, + "grad_norm": 0.6027172836349758, + "learning_rate": 7.266291371862854e-08, + "loss": 0.2564, + "step": 39508 + }, + { + "epoch": 1.8507987070782779, + "grad_norm": 0.608623916474929, + "learning_rate": 7.261752907895964e-08, + "loss": 0.2778, + "step": 39509 + }, + { + "epoch": 1.8508455520682063, + "grad_norm": 0.5792080542304664, + "learning_rate": 7.25721584082667e-08, + "loss": 0.261, + "step": 39510 + }, + { + "epoch": 1.8508923970581348, + "grad_norm": 0.6041982172482336, + "learning_rate": 7.252680170681092e-08, + "loss": 0.2656, + "step": 39511 + }, + { + "epoch": 1.8509392420480628, + "grad_norm": 0.6211926767755735, + "learning_rate": 7.248145897485348e-08, + "loss": 0.2646, + "step": 39512 + }, + { + "epoch": 1.8509860870379913, + "grad_norm": 0.6061644030191177, + "learning_rate": 7.2436130212655e-08, + "loss": 0.2757, + "step": 39513 + }, + { + "epoch": 1.8510329320279197, + "grad_norm": 0.5587004805782818, + "learning_rate": 7.239081542047665e-08, + "loss": 0.2479, + "step": 39514 + }, + { + "epoch": 1.851079777017848, + "grad_norm": 0.6001792581298481, + "learning_rate": 7.23455145985788e-08, + "loss": 0.2585, + "step": 39515 + }, + { + "epoch": 1.8511266220077762, + "grad_norm": 0.5815829219330337, + "learning_rate": 7.230022774722234e-08, + "loss": 0.2785, + "step": 39516 + }, + { + "epoch": 1.8511734669977047, + "grad_norm": 0.6120272064601382, + "learning_rate": 7.22549548666679e-08, + "loss": 0.2765, + "step": 39517 + }, + { + "epoch": 1.851220311987633, + "grad_norm": 0.6205373783824059, + "learning_rate": 7.22096959571758e-08, + "loss": 0.2729, + "step": 39518 + }, + { + "epoch": 1.8512671569775612, + "grad_norm": 0.5986158318588756, + "learning_rate": 7.216445101900699e-08, + "loss": 0.2683, + "step": 39519 + }, + { + "epoch": 1.8513140019674896, + "grad_norm": 0.5868427018357506, + "learning_rate": 7.21192200524215e-08, + "loss": 0.2743, + "step": 39520 + }, + { + "epoch": 1.8513608469574179, + "grad_norm": 0.6407218501444583, + "learning_rate": 7.20740030576797e-08, + "loss": 0.2774, + "step": 39521 + }, + { + "epoch": 1.8514076919473461, + "grad_norm": 0.5949070058141417, + "learning_rate": 7.202880003504165e-08, + "loss": 0.266, + "step": 39522 + }, + { + "epoch": 1.8514545369372746, + "grad_norm": 0.6042772697887147, + "learning_rate": 7.19836109847677e-08, + "loss": 0.2816, + "step": 39523 + }, + { + "epoch": 1.851501381927203, + "grad_norm": 0.6260751582991873, + "learning_rate": 7.193843590711763e-08, + "loss": 0.2664, + "step": 39524 + }, + { + "epoch": 1.851548226917131, + "grad_norm": 0.5880240323536248, + "learning_rate": 7.189327480235181e-08, + "loss": 0.2708, + "step": 39525 + }, + { + "epoch": 1.8515950719070595, + "grad_norm": 0.6116905665812855, + "learning_rate": 7.184812767072974e-08, + "loss": 0.2659, + "step": 39526 + }, + { + "epoch": 1.851641916896988, + "grad_norm": 0.5695153761651883, + "learning_rate": 7.180299451251149e-08, + "loss": 0.2591, + "step": 39527 + }, + { + "epoch": 1.8516887618869162, + "grad_norm": 0.6216368356465883, + "learning_rate": 7.175787532795686e-08, + "loss": 0.2752, + "step": 39528 + }, + { + "epoch": 1.8517356068768445, + "grad_norm": 0.6124899066719964, + "learning_rate": 7.171277011732508e-08, + "loss": 0.2693, + "step": 39529 + }, + { + "epoch": 1.851782451866773, + "grad_norm": 0.5924156280114784, + "learning_rate": 7.166767888087623e-08, + "loss": 0.2815, + "step": 39530 + }, + { + "epoch": 1.8518292968567012, + "grad_norm": 0.586301163421329, + "learning_rate": 7.162260161886952e-08, + "loss": 0.2687, + "step": 39531 + }, + { + "epoch": 1.8518761418466294, + "grad_norm": 0.6311037114345344, + "learning_rate": 7.157753833156422e-08, + "loss": 0.2878, + "step": 39532 + }, + { + "epoch": 1.8519229868365579, + "grad_norm": 0.6084759114210562, + "learning_rate": 7.153248901921983e-08, + "loss": 0.2751, + "step": 39533 + }, + { + "epoch": 1.8519698318264861, + "grad_norm": 0.5969467152306445, + "learning_rate": 7.148745368209586e-08, + "loss": 0.2769, + "step": 39534 + }, + { + "epoch": 1.8520166768164144, + "grad_norm": 0.602427664439061, + "learning_rate": 7.144243232045073e-08, + "loss": 0.2778, + "step": 39535 + }, + { + "epoch": 1.8520635218063428, + "grad_norm": 0.5875507928413162, + "learning_rate": 7.139742493454421e-08, + "loss": 0.269, + "step": 39536 + }, + { + "epoch": 1.8521103667962713, + "grad_norm": 0.6158462830605652, + "learning_rate": 7.135243152463556e-08, + "loss": 0.2676, + "step": 39537 + }, + { + "epoch": 1.8521572117861995, + "grad_norm": 0.5905470800376807, + "learning_rate": 7.13074520909826e-08, + "loss": 0.273, + "step": 39538 + }, + { + "epoch": 1.8522040567761278, + "grad_norm": 0.6384692736523824, + "learning_rate": 7.126248663384517e-08, + "loss": 0.2704, + "step": 39539 + }, + { + "epoch": 1.8522509017660562, + "grad_norm": 0.5681011900609668, + "learning_rate": 7.121753515348134e-08, + "loss": 0.2403, + "step": 39540 + }, + { + "epoch": 1.8522977467559845, + "grad_norm": 0.6206228157263535, + "learning_rate": 7.117259765015067e-08, + "loss": 0.2606, + "step": 39541 + }, + { + "epoch": 1.8523445917459127, + "grad_norm": 0.5993306842119903, + "learning_rate": 7.112767412411098e-08, + "loss": 0.2725, + "step": 39542 + }, + { + "epoch": 1.8523914367358412, + "grad_norm": 0.5613822156633455, + "learning_rate": 7.108276457562124e-08, + "loss": 0.2599, + "step": 39543 + }, + { + "epoch": 1.8524382817257694, + "grad_norm": 0.5837812493179144, + "learning_rate": 7.10378690049393e-08, + "loss": 0.2696, + "step": 39544 + }, + { + "epoch": 1.8524851267156976, + "grad_norm": 0.5926115869494963, + "learning_rate": 7.099298741232414e-08, + "loss": 0.2579, + "step": 39545 + }, + { + "epoch": 1.8525319717056261, + "grad_norm": 0.5897558597289474, + "learning_rate": 7.094811979803356e-08, + "loss": 0.2798, + "step": 39546 + }, + { + "epoch": 1.8525788166955546, + "grad_norm": 0.5857601052566354, + "learning_rate": 7.090326616232629e-08, + "loss": 0.2618, + "step": 39547 + }, + { + "epoch": 1.8526256616854826, + "grad_norm": 0.6148212838043826, + "learning_rate": 7.085842650546043e-08, + "loss": 0.2595, + "step": 39548 + }, + { + "epoch": 1.852672506675411, + "grad_norm": 0.6095120086432837, + "learning_rate": 7.081360082769384e-08, + "loss": 0.265, + "step": 39549 + }, + { + "epoch": 1.8527193516653395, + "grad_norm": 0.6092277984158816, + "learning_rate": 7.076878912928409e-08, + "loss": 0.2922, + "step": 39550 + }, + { + "epoch": 1.8527661966552678, + "grad_norm": 0.5854857547156909, + "learning_rate": 7.07239914104893e-08, + "loss": 0.2712, + "step": 39551 + }, + { + "epoch": 1.852813041645196, + "grad_norm": 0.6050316857622582, + "learning_rate": 7.067920767156732e-08, + "loss": 0.2722, + "step": 39552 + }, + { + "epoch": 1.8528598866351245, + "grad_norm": 0.5982209164544204, + "learning_rate": 7.063443791277602e-08, + "loss": 0.2587, + "step": 39553 + }, + { + "epoch": 1.8529067316250527, + "grad_norm": 0.6396335170098089, + "learning_rate": 7.058968213437295e-08, + "loss": 0.2722, + "step": 39554 + }, + { + "epoch": 1.852953576614981, + "grad_norm": 0.6167758203643283, + "learning_rate": 7.054494033661596e-08, + "loss": 0.2825, + "step": 39555 + }, + { + "epoch": 1.8530004216049094, + "grad_norm": 0.5783896795087945, + "learning_rate": 7.050021251976208e-08, + "loss": 0.276, + "step": 39556 + }, + { + "epoch": 1.8530472665948376, + "grad_norm": 0.5827153204399215, + "learning_rate": 7.04554986840686e-08, + "loss": 0.2721, + "step": 39557 + }, + { + "epoch": 1.853094111584766, + "grad_norm": 0.6964657157622278, + "learning_rate": 7.041079882979334e-08, + "loss": 0.2846, + "step": 39558 + }, + { + "epoch": 1.8531409565746944, + "grad_norm": 0.6043354020908565, + "learning_rate": 7.03661129571931e-08, + "loss": 0.2776, + "step": 39559 + }, + { + "epoch": 1.8531878015646228, + "grad_norm": 0.603975976193684, + "learning_rate": 7.03214410665251e-08, + "loss": 0.2708, + "step": 39560 + }, + { + "epoch": 1.8532346465545508, + "grad_norm": 0.577158037686676, + "learning_rate": 7.027678315804671e-08, + "loss": 0.2633, + "step": 39561 + }, + { + "epoch": 1.8532814915444793, + "grad_norm": 0.5872524912830956, + "learning_rate": 7.023213923201489e-08, + "loss": 0.248, + "step": 39562 + }, + { + "epoch": 1.8533283365344078, + "grad_norm": 0.598019773526494, + "learning_rate": 7.018750928868639e-08, + "loss": 0.2699, + "step": 39563 + }, + { + "epoch": 1.853375181524336, + "grad_norm": 0.6087337791668455, + "learning_rate": 7.014289332831769e-08, + "loss": 0.2841, + "step": 39564 + }, + { + "epoch": 1.8534220265142642, + "grad_norm": 0.597212348166132, + "learning_rate": 7.009829135116608e-08, + "loss": 0.2577, + "step": 39565 + }, + { + "epoch": 1.8534688715041927, + "grad_norm": 0.6006372714657028, + "learning_rate": 7.005370335748829e-08, + "loss": 0.2789, + "step": 39566 + }, + { + "epoch": 1.853515716494121, + "grad_norm": 0.584781622854406, + "learning_rate": 7.000912934754023e-08, + "loss": 0.2788, + "step": 39567 + }, + { + "epoch": 1.8535625614840492, + "grad_norm": 0.5986968305020728, + "learning_rate": 6.996456932157946e-08, + "loss": 0.2757, + "step": 39568 + }, + { + "epoch": 1.8536094064739776, + "grad_norm": 0.5904556580513599, + "learning_rate": 6.992002327986164e-08, + "loss": 0.2583, + "step": 39569 + }, + { + "epoch": 1.853656251463906, + "grad_norm": 0.6207168940396118, + "learning_rate": 6.98754912226432e-08, + "loss": 0.2935, + "step": 39570 + }, + { + "epoch": 1.8537030964538341, + "grad_norm": 0.6226201220335236, + "learning_rate": 6.983097315018062e-08, + "loss": 0.2753, + "step": 39571 + }, + { + "epoch": 1.8537499414437626, + "grad_norm": 0.6188017607650281, + "learning_rate": 6.978646906272979e-08, + "loss": 0.2709, + "step": 39572 + }, + { + "epoch": 1.853796786433691, + "grad_norm": 0.5630620040725778, + "learning_rate": 6.974197896054718e-08, + "loss": 0.2546, + "step": 39573 + }, + { + "epoch": 1.8538436314236193, + "grad_norm": 0.584571126243313, + "learning_rate": 6.969750284388871e-08, + "loss": 0.2677, + "step": 39574 + }, + { + "epoch": 1.8538904764135475, + "grad_norm": 0.6146744985847318, + "learning_rate": 6.965304071300999e-08, + "loss": 0.2827, + "step": 39575 + }, + { + "epoch": 1.853937321403476, + "grad_norm": 0.59079308741108, + "learning_rate": 6.960859256816749e-08, + "loss": 0.2543, + "step": 39576 + }, + { + "epoch": 1.8539841663934042, + "grad_norm": 0.5639157586809821, + "learning_rate": 6.956415840961656e-08, + "loss": 0.2648, + "step": 39577 + }, + { + "epoch": 1.8540310113833325, + "grad_norm": 0.5740216704632933, + "learning_rate": 6.95197382376131e-08, + "loss": 0.2549, + "step": 39578 + }, + { + "epoch": 1.854077856373261, + "grad_norm": 0.5801708126907605, + "learning_rate": 6.947533205241247e-08, + "loss": 0.2859, + "step": 39579 + }, + { + "epoch": 1.8541247013631892, + "grad_norm": 0.6520051432193458, + "learning_rate": 6.943093985427086e-08, + "loss": 0.2826, + "step": 39580 + }, + { + "epoch": 1.8541715463531174, + "grad_norm": 0.5743544791013578, + "learning_rate": 6.938656164344276e-08, + "loss": 0.2532, + "step": 39581 + }, + { + "epoch": 1.854218391343046, + "grad_norm": 0.5534248051131517, + "learning_rate": 6.934219742018439e-08, + "loss": 0.2663, + "step": 39582 + }, + { + "epoch": 1.8542652363329744, + "grad_norm": 0.6271282143141302, + "learning_rate": 6.92978471847508e-08, + "loss": 0.2739, + "step": 39583 + }, + { + "epoch": 1.8543120813229024, + "grad_norm": 0.5968311441002466, + "learning_rate": 6.925351093739679e-08, + "loss": 0.2783, + "step": 39584 + }, + { + "epoch": 1.8543589263128308, + "grad_norm": 0.5993855470937063, + "learning_rate": 6.920918867837829e-08, + "loss": 0.2629, + "step": 39585 + }, + { + "epoch": 1.8544057713027593, + "grad_norm": 0.5970884783547901, + "learning_rate": 6.916488040794978e-08, + "loss": 0.2743, + "step": 39586 + }, + { + "epoch": 1.8544526162926875, + "grad_norm": 0.6249666061305126, + "learning_rate": 6.912058612636635e-08, + "loss": 0.2561, + "step": 39587 + }, + { + "epoch": 1.8544994612826158, + "grad_norm": 0.6415164170290262, + "learning_rate": 6.907630583388309e-08, + "loss": 0.2769, + "step": 39588 + }, + { + "epoch": 1.8545463062725442, + "grad_norm": 0.5663840232129315, + "learning_rate": 6.903203953075449e-08, + "loss": 0.2693, + "step": 39589 + }, + { + "epoch": 1.8545931512624725, + "grad_norm": 0.5798479294967711, + "learning_rate": 6.898778721723592e-08, + "loss": 0.2737, + "step": 39590 + }, + { + "epoch": 1.8546399962524007, + "grad_norm": 0.5776354560235063, + "learning_rate": 6.894354889358135e-08, + "loss": 0.2614, + "step": 39591 + }, + { + "epoch": 1.8546868412423292, + "grad_norm": 0.6076173250389637, + "learning_rate": 6.889932456004583e-08, + "loss": 0.2724, + "step": 39592 + }, + { + "epoch": 1.8547336862322574, + "grad_norm": 0.5709507061499272, + "learning_rate": 6.885511421688335e-08, + "loss": 0.2815, + "step": 39593 + }, + { + "epoch": 1.8547805312221857, + "grad_norm": 0.6290979784169688, + "learning_rate": 6.881091786434868e-08, + "loss": 0.2929, + "step": 39594 + }, + { + "epoch": 1.8548273762121141, + "grad_norm": 0.6234836773779745, + "learning_rate": 6.876673550269608e-08, + "loss": 0.281, + "step": 39595 + }, + { + "epoch": 1.8548742212020426, + "grad_norm": 0.5410608326420943, + "learning_rate": 6.872256713218006e-08, + "loss": 0.245, + "step": 39596 + }, + { + "epoch": 1.8549210661919706, + "grad_norm": 0.5571055842881272, + "learning_rate": 6.867841275305459e-08, + "loss": 0.2469, + "step": 39597 + }, + { + "epoch": 1.854967911181899, + "grad_norm": 0.6139775475787916, + "learning_rate": 6.863427236557391e-08, + "loss": 0.2517, + "step": 39598 + }, + { + "epoch": 1.8550147561718275, + "grad_norm": 0.5785081685028779, + "learning_rate": 6.85901459699917e-08, + "loss": 0.2511, + "step": 39599 + }, + { + "epoch": 1.8550616011617558, + "grad_norm": 0.6173432264301845, + "learning_rate": 6.854603356656193e-08, + "loss": 0.2806, + "step": 39600 + }, + { + "epoch": 1.855108446151684, + "grad_norm": 0.5816684450946272, + "learning_rate": 6.850193515553855e-08, + "loss": 0.2619, + "step": 39601 + }, + { + "epoch": 1.8551552911416125, + "grad_norm": 0.624936831129078, + "learning_rate": 6.845785073717553e-08, + "loss": 0.2711, + "step": 39602 + }, + { + "epoch": 1.8552021361315407, + "grad_norm": 0.5873907822726616, + "learning_rate": 6.841378031172658e-08, + "loss": 0.2659, + "step": 39603 + }, + { + "epoch": 1.855248981121469, + "grad_norm": 0.6024381145920386, + "learning_rate": 6.836972387944507e-08, + "loss": 0.2724, + "step": 39604 + }, + { + "epoch": 1.8552958261113974, + "grad_norm": 0.5878227787751542, + "learning_rate": 6.832568144058499e-08, + "loss": 0.2564, + "step": 39605 + }, + { + "epoch": 1.8553426711013257, + "grad_norm": 0.6028965430240335, + "learning_rate": 6.82816529953989e-08, + "loss": 0.2623, + "step": 39606 + }, + { + "epoch": 1.855389516091254, + "grad_norm": 0.555044958574678, + "learning_rate": 6.823763854414078e-08, + "loss": 0.2449, + "step": 39607 + }, + { + "epoch": 1.8554363610811824, + "grad_norm": 0.5397537311541905, + "learning_rate": 6.819363808706403e-08, + "loss": 0.2557, + "step": 39608 + }, + { + "epoch": 1.8554832060711108, + "grad_norm": 0.5662900165652641, + "learning_rate": 6.81496516244215e-08, + "loss": 0.2666, + "step": 39609 + }, + { + "epoch": 1.855530051061039, + "grad_norm": 0.5517662195496283, + "learning_rate": 6.810567915646688e-08, + "loss": 0.2559, + "step": 39610 + }, + { + "epoch": 1.8555768960509673, + "grad_norm": 0.6230085934210909, + "learning_rate": 6.806172068345246e-08, + "loss": 0.2607, + "step": 39611 + }, + { + "epoch": 1.8556237410408958, + "grad_norm": 0.6279951616422674, + "learning_rate": 6.801777620563194e-08, + "loss": 0.2796, + "step": 39612 + }, + { + "epoch": 1.855670586030824, + "grad_norm": 0.551387885281356, + "learning_rate": 6.79738457232576e-08, + "loss": 0.2468, + "step": 39613 + }, + { + "epoch": 1.8557174310207523, + "grad_norm": 0.5740432130124057, + "learning_rate": 6.792992923658232e-08, + "loss": 0.2673, + "step": 39614 + }, + { + "epoch": 1.8557642760106807, + "grad_norm": 0.6609819531373378, + "learning_rate": 6.788602674585921e-08, + "loss": 0.29, + "step": 39615 + }, + { + "epoch": 1.855811121000609, + "grad_norm": 0.6589948676278089, + "learning_rate": 6.784213825134112e-08, + "loss": 0.2795, + "step": 39616 + }, + { + "epoch": 1.8558579659905372, + "grad_norm": 0.6094180652944969, + "learning_rate": 6.77982637532798e-08, + "loss": 0.2816, + "step": 39617 + }, + { + "epoch": 1.8559048109804657, + "grad_norm": 0.5778710685802934, + "learning_rate": 6.775440325192812e-08, + "loss": 0.2717, + "step": 39618 + }, + { + "epoch": 1.8559516559703941, + "grad_norm": 0.5722678421371655, + "learning_rate": 6.771055674753862e-08, + "loss": 0.2593, + "step": 39619 + }, + { + "epoch": 1.8559985009603222, + "grad_norm": 0.5997477554026972, + "learning_rate": 6.766672424036364e-08, + "loss": 0.2751, + "step": 39620 + }, + { + "epoch": 1.8560453459502506, + "grad_norm": 0.6045511674508732, + "learning_rate": 6.762290573065517e-08, + "loss": 0.2663, + "step": 39621 + }, + { + "epoch": 1.856092190940179, + "grad_norm": 0.5791384826985786, + "learning_rate": 6.757910121866579e-08, + "loss": 0.2559, + "step": 39622 + }, + { + "epoch": 1.8561390359301073, + "grad_norm": 0.5890631766836639, + "learning_rate": 6.753531070464698e-08, + "loss": 0.2891, + "step": 39623 + }, + { + "epoch": 1.8561858809200356, + "grad_norm": 0.6222500112831026, + "learning_rate": 6.749153418885102e-08, + "loss": 0.2946, + "step": 39624 + }, + { + "epoch": 1.856232725909964, + "grad_norm": 0.6026161838454108, + "learning_rate": 6.744777167153022e-08, + "loss": 0.2793, + "step": 39625 + }, + { + "epoch": 1.8562795708998923, + "grad_norm": 0.5978501216113227, + "learning_rate": 6.740402315293604e-08, + "loss": 0.2779, + "step": 39626 + }, + { + "epoch": 1.8563264158898205, + "grad_norm": 0.5928341765094238, + "learning_rate": 6.736028863332022e-08, + "loss": 0.2679, + "step": 39627 + }, + { + "epoch": 1.856373260879749, + "grad_norm": 0.615175992138107, + "learning_rate": 6.731656811293424e-08, + "loss": 0.2744, + "step": 39628 + }, + { + "epoch": 1.8564201058696772, + "grad_norm": 0.5846849119528365, + "learning_rate": 6.727286159203039e-08, + "loss": 0.2735, + "step": 39629 + }, + { + "epoch": 1.8564669508596054, + "grad_norm": 0.5960813383532633, + "learning_rate": 6.722916907085958e-08, + "loss": 0.2541, + "step": 39630 + }, + { + "epoch": 1.856513795849534, + "grad_norm": 0.6226932956002871, + "learning_rate": 6.718549054967355e-08, + "loss": 0.2706, + "step": 39631 + }, + { + "epoch": 1.8565606408394624, + "grad_norm": 0.588245305718597, + "learning_rate": 6.714182602872377e-08, + "loss": 0.2674, + "step": 39632 + }, + { + "epoch": 1.8566074858293904, + "grad_norm": 0.606102825952645, + "learning_rate": 6.709817550826087e-08, + "loss": 0.2713, + "step": 39633 + }, + { + "epoch": 1.8566543308193189, + "grad_norm": 0.5879991973779698, + "learning_rate": 6.705453898853658e-08, + "loss": 0.2713, + "step": 39634 + }, + { + "epoch": 1.8567011758092473, + "grad_norm": 0.5827461346768338, + "learning_rate": 6.701091646980213e-08, + "loss": 0.2587, + "step": 39635 + }, + { + "epoch": 1.8567480207991756, + "grad_norm": 0.6084127630928512, + "learning_rate": 6.696730795230838e-08, + "loss": 0.2759, + "step": 39636 + }, + { + "epoch": 1.8567948657891038, + "grad_norm": 0.6133394195771262, + "learning_rate": 6.6923713436306e-08, + "loss": 0.2664, + "step": 39637 + }, + { + "epoch": 1.8568417107790323, + "grad_norm": 0.569292383655261, + "learning_rate": 6.688013292204615e-08, + "loss": 0.2641, + "step": 39638 + }, + { + "epoch": 1.8568885557689605, + "grad_norm": 0.5795371572943869, + "learning_rate": 6.683656640977976e-08, + "loss": 0.2731, + "step": 39639 + }, + { + "epoch": 1.8569354007588887, + "grad_norm": 0.6203711602052037, + "learning_rate": 6.679301389975718e-08, + "loss": 0.2699, + "step": 39640 + }, + { + "epoch": 1.8569822457488172, + "grad_norm": 0.6507806676423101, + "learning_rate": 6.674947539222959e-08, + "loss": 0.2963, + "step": 39641 + }, + { + "epoch": 1.8570290907387454, + "grad_norm": 0.6706358678311848, + "learning_rate": 6.67059508874468e-08, + "loss": 0.2805, + "step": 39642 + }, + { + "epoch": 1.8570759357286737, + "grad_norm": 0.5917964127549386, + "learning_rate": 6.666244038565973e-08, + "loss": 0.2764, + "step": 39643 + }, + { + "epoch": 1.8571227807186022, + "grad_norm": 0.5998314272850069, + "learning_rate": 6.66189438871187e-08, + "loss": 0.2763, + "step": 39644 + }, + { + "epoch": 1.8571696257085306, + "grad_norm": 0.6056222464004971, + "learning_rate": 6.65754613920741e-08, + "loss": 0.2757, + "step": 39645 + }, + { + "epoch": 1.8572164706984589, + "grad_norm": 0.595116227681567, + "learning_rate": 6.653199290077628e-08, + "loss": 0.2677, + "step": 39646 + }, + { + "epoch": 1.857263315688387, + "grad_norm": 0.5662115932906773, + "learning_rate": 6.648853841347531e-08, + "loss": 0.2648, + "step": 39647 + }, + { + "epoch": 1.8573101606783156, + "grad_norm": 0.5942972336476318, + "learning_rate": 6.64450979304207e-08, + "loss": 0.2672, + "step": 39648 + }, + { + "epoch": 1.8573570056682438, + "grad_norm": 0.6278311541543808, + "learning_rate": 6.64016714518631e-08, + "loss": 0.2763, + "step": 39649 + }, + { + "epoch": 1.857403850658172, + "grad_norm": 0.58609606241488, + "learning_rate": 6.635825897805231e-08, + "loss": 0.2684, + "step": 39650 + }, + { + "epoch": 1.8574506956481005, + "grad_norm": 0.6187587491456366, + "learning_rate": 6.631486050923785e-08, + "loss": 0.2759, + "step": 39651 + }, + { + "epoch": 1.8574975406380287, + "grad_norm": 0.5823012107576324, + "learning_rate": 6.62714760456698e-08, + "loss": 0.2558, + "step": 39652 + }, + { + "epoch": 1.857544385627957, + "grad_norm": 0.6182158363915861, + "learning_rate": 6.622810558759795e-08, + "loss": 0.2738, + "step": 39653 + }, + { + "epoch": 1.8575912306178854, + "grad_norm": 0.6430044649935912, + "learning_rate": 6.618474913527157e-08, + "loss": 0.2759, + "step": 39654 + }, + { + "epoch": 1.857638075607814, + "grad_norm": 0.6053443614619766, + "learning_rate": 6.614140668894015e-08, + "loss": 0.288, + "step": 39655 + }, + { + "epoch": 1.857684920597742, + "grad_norm": 0.6309486609186897, + "learning_rate": 6.609807824885295e-08, + "loss": 0.2643, + "step": 39656 + }, + { + "epoch": 1.8577317655876704, + "grad_norm": 0.5660994287293308, + "learning_rate": 6.605476381525977e-08, + "loss": 0.2632, + "step": 39657 + }, + { + "epoch": 1.8577786105775989, + "grad_norm": 0.6224366953235739, + "learning_rate": 6.601146338840959e-08, + "loss": 0.2694, + "step": 39658 + }, + { + "epoch": 1.857825455567527, + "grad_norm": 0.6150391646756416, + "learning_rate": 6.596817696855217e-08, + "loss": 0.2916, + "step": 39659 + }, + { + "epoch": 1.8578723005574553, + "grad_norm": 0.5457181012825273, + "learning_rate": 6.592490455593542e-08, + "loss": 0.2566, + "step": 39660 + }, + { + "epoch": 1.8579191455473838, + "grad_norm": 0.6463831516109937, + "learning_rate": 6.588164615080967e-08, + "loss": 0.2842, + "step": 39661 + }, + { + "epoch": 1.857965990537312, + "grad_norm": 0.6266914503991864, + "learning_rate": 6.583840175342304e-08, + "loss": 0.2631, + "step": 39662 + }, + { + "epoch": 1.8580128355272403, + "grad_norm": 0.5932197728069837, + "learning_rate": 6.579517136402424e-08, + "loss": 0.2717, + "step": 39663 + }, + { + "epoch": 1.8580596805171687, + "grad_norm": 0.5706373745868145, + "learning_rate": 6.57519549828628e-08, + "loss": 0.2479, + "step": 39664 + }, + { + "epoch": 1.858106525507097, + "grad_norm": 0.5910469438458428, + "learning_rate": 6.570875261018711e-08, + "loss": 0.2573, + "step": 39665 + }, + { + "epoch": 1.8581533704970252, + "grad_norm": 0.589026652049614, + "learning_rate": 6.566556424624532e-08, + "loss": 0.2617, + "step": 39666 + }, + { + "epoch": 1.8582002154869537, + "grad_norm": 0.5816857722609781, + "learning_rate": 6.562238989128666e-08, + "loss": 0.2826, + "step": 39667 + }, + { + "epoch": 1.8582470604768822, + "grad_norm": 0.6334090803888587, + "learning_rate": 6.557922954555929e-08, + "loss": 0.2786, + "step": 39668 + }, + { + "epoch": 1.8582939054668102, + "grad_norm": 0.5922006515974246, + "learning_rate": 6.553608320931159e-08, + "loss": 0.2596, + "step": 39669 + }, + { + "epoch": 1.8583407504567386, + "grad_norm": 0.5775201404317425, + "learning_rate": 6.549295088279173e-08, + "loss": 0.2697, + "step": 39670 + }, + { + "epoch": 1.858387595446667, + "grad_norm": 0.5848650537928517, + "learning_rate": 6.54498325662481e-08, + "loss": 0.2702, + "step": 39671 + }, + { + "epoch": 1.8584344404365953, + "grad_norm": 0.5721578974094256, + "learning_rate": 6.540672825992883e-08, + "loss": 0.2741, + "step": 39672 + }, + { + "epoch": 1.8584812854265236, + "grad_norm": 0.5931416414311982, + "learning_rate": 6.536363796408179e-08, + "loss": 0.2643, + "step": 39673 + }, + { + "epoch": 1.858528130416452, + "grad_norm": 0.5712812269686717, + "learning_rate": 6.532056167895512e-08, + "loss": 0.2506, + "step": 39674 + }, + { + "epoch": 1.8585749754063803, + "grad_norm": 0.6031202046379933, + "learning_rate": 6.527749940479667e-08, + "loss": 0.2834, + "step": 39675 + }, + { + "epoch": 1.8586218203963085, + "grad_norm": 0.6325241074941808, + "learning_rate": 6.523445114185428e-08, + "loss": 0.2771, + "step": 39676 + }, + { + "epoch": 1.858668665386237, + "grad_norm": 0.6399542818749147, + "learning_rate": 6.519141689037612e-08, + "loss": 0.2627, + "step": 39677 + }, + { + "epoch": 1.8587155103761652, + "grad_norm": 0.5502152376805238, + "learning_rate": 6.514839665060891e-08, + "loss": 0.2491, + "step": 39678 + }, + { + "epoch": 1.8587623553660935, + "grad_norm": 0.6024093838888595, + "learning_rate": 6.51053904228005e-08, + "loss": 0.2666, + "step": 39679 + }, + { + "epoch": 1.858809200356022, + "grad_norm": 0.5869996584984444, + "learning_rate": 6.506239820719878e-08, + "loss": 0.2572, + "step": 39680 + }, + { + "epoch": 1.8588560453459504, + "grad_norm": 0.6250262230548349, + "learning_rate": 6.501942000405132e-08, + "loss": 0.2686, + "step": 39681 + }, + { + "epoch": 1.8589028903358786, + "grad_norm": 0.5784286215694607, + "learning_rate": 6.497645581360456e-08, + "loss": 0.2568, + "step": 39682 + }, + { + "epoch": 1.8589497353258069, + "grad_norm": 0.6182154173439499, + "learning_rate": 6.493350563610696e-08, + "loss": 0.288, + "step": 39683 + }, + { + "epoch": 1.8589965803157353, + "grad_norm": 0.5823945177068696, + "learning_rate": 6.489056947180439e-08, + "loss": 0.263, + "step": 39684 + }, + { + "epoch": 1.8590434253056636, + "grad_norm": 0.6212882312656776, + "learning_rate": 6.484764732094473e-08, + "loss": 0.2885, + "step": 39685 + }, + { + "epoch": 1.8590902702955918, + "grad_norm": 0.546910435357108, + "learning_rate": 6.480473918377473e-08, + "loss": 0.2508, + "step": 39686 + }, + { + "epoch": 1.8591371152855203, + "grad_norm": 0.5733481516617225, + "learning_rate": 6.476184506054139e-08, + "loss": 0.2724, + "step": 39687 + }, + { + "epoch": 1.8591839602754485, + "grad_norm": 0.5967919368509833, + "learning_rate": 6.471896495149176e-08, + "loss": 0.2681, + "step": 39688 + }, + { + "epoch": 1.8592308052653768, + "grad_norm": 0.6089892723326911, + "learning_rate": 6.467609885687232e-08, + "loss": 0.2695, + "step": 39689 + }, + { + "epoch": 1.8592776502553052, + "grad_norm": 0.5733611631697544, + "learning_rate": 6.463324677692979e-08, + "loss": 0.2615, + "step": 39690 + }, + { + "epoch": 1.8593244952452337, + "grad_norm": 0.6085998994764282, + "learning_rate": 6.459040871191063e-08, + "loss": 0.2691, + "step": 39691 + }, + { + "epoch": 1.8593713402351617, + "grad_norm": 0.5910570869617273, + "learning_rate": 6.45475846620619e-08, + "loss": 0.2692, + "step": 39692 + }, + { + "epoch": 1.8594181852250902, + "grad_norm": 0.6707324563049485, + "learning_rate": 6.450477462762949e-08, + "loss": 0.2725, + "step": 39693 + }, + { + "epoch": 1.8594650302150186, + "grad_norm": 0.5733894687138108, + "learning_rate": 6.44619786088599e-08, + "loss": 0.2735, + "step": 39694 + }, + { + "epoch": 1.8595118752049469, + "grad_norm": 0.6214892511358975, + "learning_rate": 6.441919660599954e-08, + "loss": 0.279, + "step": 39695 + }, + { + "epoch": 1.8595587201948751, + "grad_norm": 0.6139800834361985, + "learning_rate": 6.437642861929494e-08, + "loss": 0.2687, + "step": 39696 + }, + { + "epoch": 1.8596055651848036, + "grad_norm": 0.5564179960496944, + "learning_rate": 6.433367464899142e-08, + "loss": 0.2729, + "step": 39697 + }, + { + "epoch": 1.8596524101747318, + "grad_norm": 0.5924610975665789, + "learning_rate": 6.429093469533521e-08, + "loss": 0.2817, + "step": 39698 + }, + { + "epoch": 1.85969925516466, + "grad_norm": 0.6138676820506188, + "learning_rate": 6.424820875857274e-08, + "loss": 0.2805, + "step": 39699 + }, + { + "epoch": 1.8597461001545885, + "grad_norm": 0.609474763037427, + "learning_rate": 6.420549683894967e-08, + "loss": 0.2743, + "step": 39700 + }, + { + "epoch": 1.8597929451445168, + "grad_norm": 0.5346681606184198, + "learning_rate": 6.416279893671162e-08, + "loss": 0.2536, + "step": 39701 + }, + { + "epoch": 1.859839790134445, + "grad_norm": 0.6065691722344145, + "learning_rate": 6.412011505210453e-08, + "loss": 0.2784, + "step": 39702 + }, + { + "epoch": 1.8598866351243735, + "grad_norm": 0.6016037674035466, + "learning_rate": 6.407744518537428e-08, + "loss": 0.2581, + "step": 39703 + }, + { + "epoch": 1.859933480114302, + "grad_norm": 0.5594218261371645, + "learning_rate": 6.403478933676543e-08, + "loss": 0.2462, + "step": 39704 + }, + { + "epoch": 1.85998032510423, + "grad_norm": 0.6020972847708708, + "learning_rate": 6.399214750652444e-08, + "loss": 0.2802, + "step": 39705 + }, + { + "epoch": 1.8600271700941584, + "grad_norm": 0.5608616759518084, + "learning_rate": 6.394951969489638e-08, + "loss": 0.2567, + "step": 39706 + }, + { + "epoch": 1.8600740150840869, + "grad_norm": 0.5936688605730772, + "learning_rate": 6.390690590212634e-08, + "loss": 0.2664, + "step": 39707 + }, + { + "epoch": 1.8601208600740151, + "grad_norm": 0.637895850054575, + "learning_rate": 6.386430612846023e-08, + "loss": 0.2611, + "step": 39708 + }, + { + "epoch": 1.8601677050639434, + "grad_norm": 0.6265799400187745, + "learning_rate": 6.38217203741423e-08, + "loss": 0.2804, + "step": 39709 + }, + { + "epoch": 1.8602145500538718, + "grad_norm": 0.5942847680303156, + "learning_rate": 6.377914863941847e-08, + "loss": 0.2754, + "step": 39710 + }, + { + "epoch": 1.8602613950438, + "grad_norm": 0.6143862348998596, + "learning_rate": 6.373659092453299e-08, + "loss": 0.2868, + "step": 39711 + }, + { + "epoch": 1.8603082400337283, + "grad_norm": 0.5665406677530296, + "learning_rate": 6.369404722973122e-08, + "loss": 0.2557, + "step": 39712 + }, + { + "epoch": 1.8603550850236568, + "grad_norm": 0.608403043254855, + "learning_rate": 6.365151755525767e-08, + "loss": 0.2705, + "step": 39713 + }, + { + "epoch": 1.860401930013585, + "grad_norm": 0.6138613745466412, + "learning_rate": 6.360900190135743e-08, + "loss": 0.2787, + "step": 39714 + }, + { + "epoch": 1.8604487750035132, + "grad_norm": 0.6322258559105468, + "learning_rate": 6.356650026827504e-08, + "loss": 0.2892, + "step": 39715 + }, + { + "epoch": 1.8604956199934417, + "grad_norm": 0.555605224235935, + "learning_rate": 6.352401265625501e-08, + "loss": 0.2419, + "step": 39716 + }, + { + "epoch": 1.8605424649833702, + "grad_norm": 0.6515103518562687, + "learning_rate": 6.348153906554216e-08, + "loss": 0.2844, + "step": 39717 + }, + { + "epoch": 1.8605893099732984, + "grad_norm": 0.6109371510669886, + "learning_rate": 6.343907949638046e-08, + "loss": 0.278, + "step": 39718 + }, + { + "epoch": 1.8606361549632267, + "grad_norm": 0.5655100604686264, + "learning_rate": 6.339663394901413e-08, + "loss": 0.2522, + "step": 39719 + }, + { + "epoch": 1.8606829999531551, + "grad_norm": 0.5623348567253061, + "learning_rate": 6.335420242368828e-08, + "loss": 0.2555, + "step": 39720 + }, + { + "epoch": 1.8607298449430834, + "grad_norm": 0.5953845602181281, + "learning_rate": 6.331178492064632e-08, + "loss": 0.2758, + "step": 39721 + }, + { + "epoch": 1.8607766899330116, + "grad_norm": 0.6042725655936992, + "learning_rate": 6.326938144013251e-08, + "loss": 0.2746, + "step": 39722 + }, + { + "epoch": 1.86082353492294, + "grad_norm": 0.5875720393190625, + "learning_rate": 6.322699198239135e-08, + "loss": 0.2612, + "step": 39723 + }, + { + "epoch": 1.8608703799128683, + "grad_norm": 0.5978828352414657, + "learning_rate": 6.3184616547666e-08, + "loss": 0.2703, + "step": 39724 + }, + { + "epoch": 1.8609172249027965, + "grad_norm": 0.5800672367633489, + "learning_rate": 6.31422551362007e-08, + "loss": 0.2636, + "step": 39725 + }, + { + "epoch": 1.860964069892725, + "grad_norm": 0.5785383664515741, + "learning_rate": 6.30999077482397e-08, + "loss": 0.2736, + "step": 39726 + }, + { + "epoch": 1.8610109148826535, + "grad_norm": 0.5729015584639156, + "learning_rate": 6.305757438402588e-08, + "loss": 0.2654, + "step": 39727 + }, + { + "epoch": 1.8610577598725815, + "grad_norm": 0.6068998855822192, + "learning_rate": 6.301525504380318e-08, + "loss": 0.2801, + "step": 39728 + }, + { + "epoch": 1.86110460486251, + "grad_norm": 0.6293766379214428, + "learning_rate": 6.297294972781504e-08, + "loss": 0.2841, + "step": 39729 + }, + { + "epoch": 1.8611514498524384, + "grad_norm": 0.5761146110940428, + "learning_rate": 6.293065843630541e-08, + "loss": 0.2707, + "step": 39730 + }, + { + "epoch": 1.8611982948423667, + "grad_norm": 0.619111752210402, + "learning_rate": 6.288838116951717e-08, + "loss": 0.2876, + "step": 39731 + }, + { + "epoch": 1.861245139832295, + "grad_norm": 0.6053081048044917, + "learning_rate": 6.284611792769374e-08, + "loss": 0.269, + "step": 39732 + }, + { + "epoch": 1.8612919848222234, + "grad_norm": 0.628851845356747, + "learning_rate": 6.280386871107824e-08, + "loss": 0.2781, + "step": 39733 + }, + { + "epoch": 1.8613388298121516, + "grad_norm": 0.5660096988867774, + "learning_rate": 6.27616335199141e-08, + "loss": 0.2571, + "step": 39734 + }, + { + "epoch": 1.8613856748020798, + "grad_norm": 0.6381141037414666, + "learning_rate": 6.27194123544439e-08, + "loss": 0.2729, + "step": 39735 + }, + { + "epoch": 1.8614325197920083, + "grad_norm": 0.6033130380112275, + "learning_rate": 6.267720521491105e-08, + "loss": 0.2758, + "step": 39736 + }, + { + "epoch": 1.8614793647819365, + "grad_norm": 0.6172686298654652, + "learning_rate": 6.263501210155843e-08, + "loss": 0.2566, + "step": 39737 + }, + { + "epoch": 1.8615262097718648, + "grad_norm": 0.5628000091205432, + "learning_rate": 6.259283301462887e-08, + "loss": 0.2586, + "step": 39738 + }, + { + "epoch": 1.8615730547617932, + "grad_norm": 0.6100212692904944, + "learning_rate": 6.255066795436443e-08, + "loss": 0.2832, + "step": 39739 + }, + { + "epoch": 1.8616198997517217, + "grad_norm": 0.5625444287877568, + "learning_rate": 6.250851692100852e-08, + "loss": 0.2564, + "step": 39740 + }, + { + "epoch": 1.8616667447416497, + "grad_norm": 0.5927216661697121, + "learning_rate": 6.246637991480341e-08, + "loss": 0.2781, + "step": 39741 + }, + { + "epoch": 1.8617135897315782, + "grad_norm": 0.5975223631115124, + "learning_rate": 6.242425693599146e-08, + "loss": 0.2551, + "step": 39742 + }, + { + "epoch": 1.8617604347215067, + "grad_norm": 0.6113251578746957, + "learning_rate": 6.238214798481551e-08, + "loss": 0.2766, + "step": 39743 + }, + { + "epoch": 1.861807279711435, + "grad_norm": 0.616214937330539, + "learning_rate": 6.234005306151758e-08, + "loss": 0.2677, + "step": 39744 + }, + { + "epoch": 1.8618541247013631, + "grad_norm": 0.6638854156302241, + "learning_rate": 6.229797216634026e-08, + "loss": 0.2976, + "step": 39745 + }, + { + "epoch": 1.8619009696912916, + "grad_norm": 0.5637488083114004, + "learning_rate": 6.225590529952502e-08, + "loss": 0.2634, + "step": 39746 + }, + { + "epoch": 1.8619478146812198, + "grad_norm": 0.6112016210394124, + "learning_rate": 6.221385246131418e-08, + "loss": 0.276, + "step": 39747 + }, + { + "epoch": 1.861994659671148, + "grad_norm": 0.6272540605667075, + "learning_rate": 6.21718136519503e-08, + "loss": 0.2733, + "step": 39748 + }, + { + "epoch": 1.8620415046610765, + "grad_norm": 0.5813442130293263, + "learning_rate": 6.212978887167459e-08, + "loss": 0.2522, + "step": 39749 + }, + { + "epoch": 1.8620883496510048, + "grad_norm": 0.6469980034613824, + "learning_rate": 6.208777812072964e-08, + "loss": 0.27, + "step": 39750 + }, + { + "epoch": 1.862135194640933, + "grad_norm": 0.5832352333089756, + "learning_rate": 6.204578139935635e-08, + "loss": 0.2658, + "step": 39751 + }, + { + "epoch": 1.8621820396308615, + "grad_norm": 0.5827640654101767, + "learning_rate": 6.200379870779705e-08, + "loss": 0.2649, + "step": 39752 + }, + { + "epoch": 1.86222888462079, + "grad_norm": 0.5658635636052052, + "learning_rate": 6.19618300462932e-08, + "loss": 0.2463, + "step": 39753 + }, + { + "epoch": 1.8622757296107182, + "grad_norm": 0.6172708870811476, + "learning_rate": 6.191987541508598e-08, + "loss": 0.2757, + "step": 39754 + }, + { + "epoch": 1.8623225746006464, + "grad_norm": 0.6064028743015398, + "learning_rate": 6.187793481441717e-08, + "loss": 0.2571, + "step": 39755 + }, + { + "epoch": 1.862369419590575, + "grad_norm": 0.5860268204356301, + "learning_rate": 6.183600824452824e-08, + "loss": 0.2588, + "step": 39756 + }, + { + "epoch": 1.8624162645805031, + "grad_norm": 0.5591380493754606, + "learning_rate": 6.179409570566008e-08, + "loss": 0.2673, + "step": 39757 + }, + { + "epoch": 1.8624631095704314, + "grad_norm": 0.5987102937554246, + "learning_rate": 6.175219719805391e-08, + "loss": 0.2629, + "step": 39758 + }, + { + "epoch": 1.8625099545603598, + "grad_norm": 0.6017500772085208, + "learning_rate": 6.17103127219515e-08, + "loss": 0.2816, + "step": 39759 + }, + { + "epoch": 1.862556799550288, + "grad_norm": 0.6028521316117846, + "learning_rate": 6.166844227759289e-08, + "loss": 0.2805, + "step": 39760 + }, + { + "epoch": 1.8626036445402163, + "grad_norm": 0.576860797454601, + "learning_rate": 6.162658586521986e-08, + "loss": 0.2633, + "step": 39761 + }, + { + "epoch": 1.8626504895301448, + "grad_norm": 0.5766445169365971, + "learning_rate": 6.158474348507277e-08, + "loss": 0.2702, + "step": 39762 + }, + { + "epoch": 1.8626973345200732, + "grad_norm": 0.6082904790238822, + "learning_rate": 6.15429151373928e-08, + "loss": 0.2744, + "step": 39763 + }, + { + "epoch": 1.8627441795100013, + "grad_norm": 0.619933463450353, + "learning_rate": 6.150110082242034e-08, + "loss": 0.2829, + "step": 39764 + }, + { + "epoch": 1.8627910244999297, + "grad_norm": 0.5678794433152707, + "learning_rate": 6.145930054039628e-08, + "loss": 0.2512, + "step": 39765 + }, + { + "epoch": 1.8628378694898582, + "grad_norm": 0.5815040215312693, + "learning_rate": 6.141751429156101e-08, + "loss": 0.2646, + "step": 39766 + }, + { + "epoch": 1.8628847144797864, + "grad_norm": 0.5815586848692865, + "learning_rate": 6.137574207615488e-08, + "loss": 0.2586, + "step": 39767 + }, + { + "epoch": 1.8629315594697147, + "grad_norm": 0.5789425202128087, + "learning_rate": 6.133398389441853e-08, + "loss": 0.2729, + "step": 39768 + }, + { + "epoch": 1.8629784044596431, + "grad_norm": 0.5722368205190657, + "learning_rate": 6.129223974659232e-08, + "loss": 0.2582, + "step": 39769 + }, + { + "epoch": 1.8630252494495714, + "grad_norm": 0.6132505603446604, + "learning_rate": 6.125050963291607e-08, + "loss": 0.2611, + "step": 39770 + }, + { + "epoch": 1.8630720944394996, + "grad_norm": 0.6267197017485646, + "learning_rate": 6.120879355363012e-08, + "loss": 0.2832, + "step": 39771 + }, + { + "epoch": 1.863118939429428, + "grad_norm": 0.6229629203272116, + "learning_rate": 6.116709150897516e-08, + "loss": 0.2799, + "step": 39772 + }, + { + "epoch": 1.8631657844193563, + "grad_norm": 0.6054878848482577, + "learning_rate": 6.112540349919011e-08, + "loss": 0.271, + "step": 39773 + }, + { + "epoch": 1.8632126294092846, + "grad_norm": 0.5887188949184218, + "learning_rate": 6.108372952451536e-08, + "loss": 0.2742, + "step": 39774 + }, + { + "epoch": 1.863259474399213, + "grad_norm": 0.5939795833553155, + "learning_rate": 6.104206958519099e-08, + "loss": 0.2664, + "step": 39775 + }, + { + "epoch": 1.8633063193891415, + "grad_norm": 0.5831637249296463, + "learning_rate": 6.100042368145626e-08, + "loss": 0.2758, + "step": 39776 + }, + { + "epoch": 1.8633531643790695, + "grad_norm": 0.6274932674167851, + "learning_rate": 6.095879181355125e-08, + "loss": 0.2772, + "step": 39777 + }, + { + "epoch": 1.863400009368998, + "grad_norm": 0.5631756723624735, + "learning_rate": 6.09171739817152e-08, + "loss": 0.2571, + "step": 39778 + }, + { + "epoch": 1.8634468543589264, + "grad_norm": 0.5906082791039677, + "learning_rate": 6.087557018618795e-08, + "loss": 0.253, + "step": 39779 + }, + { + "epoch": 1.8634936993488547, + "grad_norm": 0.5788374674072542, + "learning_rate": 6.083398042720872e-08, + "loss": 0.2733, + "step": 39780 + }, + { + "epoch": 1.863540544338783, + "grad_norm": 0.6095491710730755, + "learning_rate": 6.07924047050168e-08, + "loss": 0.2803, + "step": 39781 + }, + { + "epoch": 1.8635873893287114, + "grad_norm": 0.6210529780524904, + "learning_rate": 6.075084301985168e-08, + "loss": 0.2888, + "step": 39782 + }, + { + "epoch": 1.8636342343186396, + "grad_norm": 0.5726421787544265, + "learning_rate": 6.070929537195209e-08, + "loss": 0.2739, + "step": 39783 + }, + { + "epoch": 1.8636810793085679, + "grad_norm": 0.5671081353029604, + "learning_rate": 6.066776176155753e-08, + "loss": 0.2681, + "step": 39784 + }, + { + "epoch": 1.8637279242984963, + "grad_norm": 0.6284041975630525, + "learning_rate": 6.062624218890672e-08, + "loss": 0.2652, + "step": 39785 + }, + { + "epoch": 1.8637747692884246, + "grad_norm": 0.6293050872386676, + "learning_rate": 6.05847366542392e-08, + "loss": 0.2837, + "step": 39786 + }, + { + "epoch": 1.8638216142783528, + "grad_norm": 0.5637272495449801, + "learning_rate": 6.054324515779336e-08, + "loss": 0.2565, + "step": 39787 + }, + { + "epoch": 1.8638684592682813, + "grad_norm": 0.5975719865903567, + "learning_rate": 6.050176769980765e-08, + "loss": 0.2623, + "step": 39788 + }, + { + "epoch": 1.8639153042582097, + "grad_norm": 0.5972784298982715, + "learning_rate": 6.04603042805213e-08, + "loss": 0.2751, + "step": 39789 + }, + { + "epoch": 1.8639621492481377, + "grad_norm": 0.5837195335695038, + "learning_rate": 6.041885490017274e-08, + "loss": 0.2627, + "step": 39790 + }, + { + "epoch": 1.8640089942380662, + "grad_norm": 0.6485297525006202, + "learning_rate": 6.03774195590004e-08, + "loss": 0.2802, + "step": 39791 + }, + { + "epoch": 1.8640558392279947, + "grad_norm": 0.5658029519036787, + "learning_rate": 6.033599825724295e-08, + "loss": 0.2529, + "step": 39792 + }, + { + "epoch": 1.864102684217923, + "grad_norm": 0.6223925976455138, + "learning_rate": 6.029459099513857e-08, + "loss": 0.2644, + "step": 39793 + }, + { + "epoch": 1.8641495292078512, + "grad_norm": 0.6221121351521457, + "learning_rate": 6.025319777292593e-08, + "loss": 0.2841, + "step": 39794 + }, + { + "epoch": 1.8641963741977796, + "grad_norm": 0.6126622713721142, + "learning_rate": 6.021181859084263e-08, + "loss": 0.2724, + "step": 39795 + }, + { + "epoch": 1.8642432191877079, + "grad_norm": 0.6014522389684273, + "learning_rate": 6.017045344912709e-08, + "loss": 0.2785, + "step": 39796 + }, + { + "epoch": 1.864290064177636, + "grad_norm": 0.6077282133421699, + "learning_rate": 6.012910234801744e-08, + "loss": 0.2861, + "step": 39797 + }, + { + "epoch": 1.8643369091675646, + "grad_norm": 0.6525052433742864, + "learning_rate": 6.008776528775129e-08, + "loss": 0.2815, + "step": 39798 + }, + { + "epoch": 1.8643837541574928, + "grad_norm": 0.5225563085009453, + "learning_rate": 6.004644226856732e-08, + "loss": 0.242, + "step": 39799 + }, + { + "epoch": 1.864430599147421, + "grad_norm": 0.6268505245424586, + "learning_rate": 6.000513329070256e-08, + "loss": 0.279, + "step": 39800 + }, + { + "epoch": 1.8644774441373495, + "grad_norm": 0.5535116517933585, + "learning_rate": 5.996383835439517e-08, + "loss": 0.2417, + "step": 39801 + }, + { + "epoch": 1.864524289127278, + "grad_norm": 0.593711508176025, + "learning_rate": 5.992255745988219e-08, + "loss": 0.2584, + "step": 39802 + }, + { + "epoch": 1.8645711341172062, + "grad_norm": 0.6019683037184591, + "learning_rate": 5.988129060740173e-08, + "loss": 0.2801, + "step": 39803 + }, + { + "epoch": 1.8646179791071344, + "grad_norm": 0.557651995991983, + "learning_rate": 5.984003779719138e-08, + "loss": 0.2595, + "step": 39804 + }, + { + "epoch": 1.864664824097063, + "grad_norm": 0.5999032374643121, + "learning_rate": 5.979879902948821e-08, + "loss": 0.2643, + "step": 39805 + }, + { + "epoch": 1.8647116690869912, + "grad_norm": 0.5911088886908352, + "learning_rate": 5.97575743045295e-08, + "loss": 0.2606, + "step": 39806 + }, + { + "epoch": 1.8647585140769194, + "grad_norm": 0.5784208731499616, + "learning_rate": 5.971636362255256e-08, + "loss": 0.2494, + "step": 39807 + }, + { + "epoch": 1.8648053590668479, + "grad_norm": 0.616746648853565, + "learning_rate": 5.9675166983795e-08, + "loss": 0.2699, + "step": 39808 + }, + { + "epoch": 1.864852204056776, + "grad_norm": 0.6121305889027517, + "learning_rate": 5.963398438849327e-08, + "loss": 0.2741, + "step": 39809 + }, + { + "epoch": 1.8648990490467043, + "grad_norm": 0.6086620823275485, + "learning_rate": 5.959281583688442e-08, + "loss": 0.2768, + "step": 39810 + }, + { + "epoch": 1.8649458940366328, + "grad_norm": 0.6500179931728909, + "learning_rate": 5.9551661329206036e-08, + "loss": 0.2722, + "step": 39811 + }, + { + "epoch": 1.8649927390265613, + "grad_norm": 0.6187806600060425, + "learning_rate": 5.951052086569403e-08, + "loss": 0.2803, + "step": 39812 + }, + { + "epoch": 1.8650395840164893, + "grad_norm": 0.5732789164532931, + "learning_rate": 5.946939444658545e-08, + "loss": 0.2515, + "step": 39813 + }, + { + "epoch": 1.8650864290064177, + "grad_norm": 0.603030011342488, + "learning_rate": 5.9428282072117595e-08, + "loss": 0.2679, + "step": 39814 + }, + { + "epoch": 1.8651332739963462, + "grad_norm": 0.6056141181539415, + "learning_rate": 5.938718374252611e-08, + "loss": 0.2787, + "step": 39815 + }, + { + "epoch": 1.8651801189862744, + "grad_norm": 0.5956527267491767, + "learning_rate": 5.934609945804804e-08, + "loss": 0.2728, + "step": 39816 + }, + { + "epoch": 1.8652269639762027, + "grad_norm": 0.5897076989778889, + "learning_rate": 5.930502921892012e-08, + "loss": 0.2651, + "step": 39817 + }, + { + "epoch": 1.8652738089661312, + "grad_norm": 0.5839718234715431, + "learning_rate": 5.926397302537801e-08, + "loss": 0.2638, + "step": 39818 + }, + { + "epoch": 1.8653206539560594, + "grad_norm": 0.593194332886877, + "learning_rate": 5.922293087765818e-08, + "loss": 0.265, + "step": 39819 + }, + { + "epoch": 1.8653674989459876, + "grad_norm": 0.5768270281587328, + "learning_rate": 5.9181902775996836e-08, + "loss": 0.2733, + "step": 39820 + }, + { + "epoch": 1.865414343935916, + "grad_norm": 0.586350732350135, + "learning_rate": 5.914088872063073e-08, + "loss": 0.2568, + "step": 39821 + }, + { + "epoch": 1.8654611889258443, + "grad_norm": 0.6085533811073279, + "learning_rate": 5.909988871179467e-08, + "loss": 0.2713, + "step": 39822 + }, + { + "epoch": 1.8655080339157726, + "grad_norm": 0.5974426876105553, + "learning_rate": 5.90589027497257e-08, + "loss": 0.2679, + "step": 39823 + }, + { + "epoch": 1.865554878905701, + "grad_norm": 0.5874917790307832, + "learning_rate": 5.9017930834659176e-08, + "loss": 0.2591, + "step": 39824 + }, + { + "epoch": 1.8656017238956295, + "grad_norm": 0.6073316471986953, + "learning_rate": 5.8976972966830746e-08, + "loss": 0.2798, + "step": 39825 + }, + { + "epoch": 1.8656485688855575, + "grad_norm": 0.5881270864456045, + "learning_rate": 5.8936029146476334e-08, + "loss": 0.2628, + "step": 39826 + }, + { + "epoch": 1.865695413875486, + "grad_norm": 0.5933412218518548, + "learning_rate": 5.8895099373831586e-08, + "loss": 0.2708, + "step": 39827 + }, + { + "epoch": 1.8657422588654144, + "grad_norm": 0.6103761020240545, + "learning_rate": 5.8854183649132144e-08, + "loss": 0.2682, + "step": 39828 + }, + { + "epoch": 1.8657891038553427, + "grad_norm": 0.5448820812801939, + "learning_rate": 5.88132819726131e-08, + "loss": 0.2497, + "step": 39829 + }, + { + "epoch": 1.865835948845271, + "grad_norm": 0.6045014232472673, + "learning_rate": 5.8772394344510096e-08, + "loss": 0.2573, + "step": 39830 + }, + { + "epoch": 1.8658827938351994, + "grad_norm": 0.6280341581321155, + "learning_rate": 5.87315207650585e-08, + "loss": 0.2777, + "step": 39831 + }, + { + "epoch": 1.8659296388251276, + "grad_norm": 0.5944237905472531, + "learning_rate": 5.869066123449313e-08, + "loss": 0.2762, + "step": 39832 + }, + { + "epoch": 1.8659764838150559, + "grad_norm": 0.6211043649053947, + "learning_rate": 5.864981575304962e-08, + "loss": 0.2758, + "step": 39833 + }, + { + "epoch": 1.8660233288049843, + "grad_norm": 0.6084081147311489, + "learning_rate": 5.8608984320962794e-08, + "loss": 0.2801, + "step": 39834 + }, + { + "epoch": 1.8660701737949126, + "grad_norm": 0.6244682877940532, + "learning_rate": 5.856816693846773e-08, + "loss": 0.2682, + "step": 39835 + }, + { + "epoch": 1.8661170187848408, + "grad_norm": 0.5801277764804135, + "learning_rate": 5.8527363605799246e-08, + "loss": 0.2614, + "step": 39836 + }, + { + "epoch": 1.8661638637747693, + "grad_norm": 0.5312065694991374, + "learning_rate": 5.848657432319216e-08, + "loss": 0.2593, + "step": 39837 + }, + { + "epoch": 1.8662107087646977, + "grad_norm": 0.6235013160505957, + "learning_rate": 5.8445799090880995e-08, + "loss": 0.2779, + "step": 39838 + }, + { + "epoch": 1.866257553754626, + "grad_norm": 0.6090877730592741, + "learning_rate": 5.840503790910057e-08, + "loss": 0.2728, + "step": 39839 + }, + { + "epoch": 1.8663043987445542, + "grad_norm": 0.5695833263694403, + "learning_rate": 5.836429077808542e-08, + "loss": 0.2643, + "step": 39840 + }, + { + "epoch": 1.8663512437344827, + "grad_norm": 0.5587511619406865, + "learning_rate": 5.832355769807035e-08, + "loss": 0.2554, + "step": 39841 + }, + { + "epoch": 1.866398088724411, + "grad_norm": 0.6469212234758391, + "learning_rate": 5.828283866928935e-08, + "loss": 0.2823, + "step": 39842 + }, + { + "epoch": 1.8664449337143392, + "grad_norm": 0.5747440014879961, + "learning_rate": 5.824213369197723e-08, + "loss": 0.2594, + "step": 39843 + }, + { + "epoch": 1.8664917787042676, + "grad_norm": 0.6273991676745823, + "learning_rate": 5.820144276636769e-08, + "loss": 0.2685, + "step": 39844 + }, + { + "epoch": 1.8665386236941959, + "grad_norm": 0.6406179183030124, + "learning_rate": 5.816076589269498e-08, + "loss": 0.2745, + "step": 39845 + }, + { + "epoch": 1.8665854686841241, + "grad_norm": 0.6153269083622798, + "learning_rate": 5.812010307119337e-08, + "loss": 0.2669, + "step": 39846 + }, + { + "epoch": 1.8666323136740526, + "grad_norm": 0.6055944753965339, + "learning_rate": 5.807945430209683e-08, + "loss": 0.2814, + "step": 39847 + }, + { + "epoch": 1.866679158663981, + "grad_norm": 0.6411262951165394, + "learning_rate": 5.803881958563962e-08, + "loss": 0.2986, + "step": 39848 + }, + { + "epoch": 1.866726003653909, + "grad_norm": 0.6209089997830213, + "learning_rate": 5.7998198922054885e-08, + "loss": 0.2745, + "step": 39849 + }, + { + "epoch": 1.8667728486438375, + "grad_norm": 0.5878302353117615, + "learning_rate": 5.7957592311576885e-08, + "loss": 0.2622, + "step": 39850 + }, + { + "epoch": 1.866819693633766, + "grad_norm": 0.592606646915443, + "learning_rate": 5.791699975443904e-08, + "loss": 0.2636, + "step": 39851 + }, + { + "epoch": 1.8668665386236942, + "grad_norm": 0.5884860007251949, + "learning_rate": 5.787642125087506e-08, + "loss": 0.2708, + "step": 39852 + }, + { + "epoch": 1.8669133836136225, + "grad_norm": 0.6377113995758157, + "learning_rate": 5.7835856801118365e-08, + "loss": 0.2807, + "step": 39853 + }, + { + "epoch": 1.866960228603551, + "grad_norm": 0.5875981720561058, + "learning_rate": 5.7795306405402926e-08, + "loss": 0.269, + "step": 39854 + }, + { + "epoch": 1.8670070735934792, + "grad_norm": 0.5958995581508946, + "learning_rate": 5.775477006396135e-08, + "loss": 0.2653, + "step": 39855 + }, + { + "epoch": 1.8670539185834074, + "grad_norm": 0.6134261854937969, + "learning_rate": 5.7714247777027334e-08, + "loss": 0.2632, + "step": 39856 + }, + { + "epoch": 1.8671007635733359, + "grad_norm": 0.6224532826797038, + "learning_rate": 5.7673739544834015e-08, + "loss": 0.2701, + "step": 39857 + }, + { + "epoch": 1.8671476085632641, + "grad_norm": 0.579034704713894, + "learning_rate": 5.763324536761428e-08, + "loss": 0.2544, + "step": 39858 + }, + { + "epoch": 1.8671944535531924, + "grad_norm": 0.6394706467733267, + "learning_rate": 5.759276524560154e-08, + "loss": 0.2738, + "step": 39859 + }, + { + "epoch": 1.8672412985431208, + "grad_norm": 0.5901220925326638, + "learning_rate": 5.7552299179028656e-08, + "loss": 0.2663, + "step": 39860 + }, + { + "epoch": 1.8672881435330493, + "grad_norm": 0.6102884809882163, + "learning_rate": 5.7511847168128245e-08, + "loss": 0.2589, + "step": 39861 + }, + { + "epoch": 1.8673349885229773, + "grad_norm": 0.606464879817508, + "learning_rate": 5.747140921313316e-08, + "loss": 0.2767, + "step": 39862 + }, + { + "epoch": 1.8673818335129058, + "grad_norm": 0.5885924956530123, + "learning_rate": 5.743098531427627e-08, + "loss": 0.2793, + "step": 39863 + }, + { + "epoch": 1.8674286785028342, + "grad_norm": 0.6241663661586363, + "learning_rate": 5.7390575471790166e-08, + "loss": 0.2721, + "step": 39864 + }, + { + "epoch": 1.8674755234927625, + "grad_norm": 0.5812406851425312, + "learning_rate": 5.735017968590745e-08, + "loss": 0.2524, + "step": 39865 + }, + { + "epoch": 1.8675223684826907, + "grad_norm": 0.5876192759046249, + "learning_rate": 5.7309797956860424e-08, + "loss": 0.2612, + "step": 39866 + }, + { + "epoch": 1.8675692134726192, + "grad_norm": 0.6058933907018291, + "learning_rate": 5.726943028488169e-08, + "loss": 0.2596, + "step": 39867 + }, + { + "epoch": 1.8676160584625474, + "grad_norm": 0.5610980605940575, + "learning_rate": 5.722907667020328e-08, + "loss": 0.2702, + "step": 39868 + }, + { + "epoch": 1.8676629034524757, + "grad_norm": 0.5639000426026418, + "learning_rate": 5.71887371130575e-08, + "loss": 0.2483, + "step": 39869 + }, + { + "epoch": 1.8677097484424041, + "grad_norm": 0.6296529046216625, + "learning_rate": 5.714841161367668e-08, + "loss": 0.2794, + "step": 39870 + }, + { + "epoch": 1.8677565934323324, + "grad_norm": 0.5548945800494127, + "learning_rate": 5.7108100172292844e-08, + "loss": 0.2533, + "step": 39871 + }, + { + "epoch": 1.8678034384222606, + "grad_norm": 0.5820934187727189, + "learning_rate": 5.7067802789138035e-08, + "loss": 0.269, + "step": 39872 + }, + { + "epoch": 1.867850283412189, + "grad_norm": 0.5715169858484663, + "learning_rate": 5.702751946444346e-08, + "loss": 0.258, + "step": 39873 + }, + { + "epoch": 1.8678971284021175, + "grad_norm": 0.599624361963056, + "learning_rate": 5.698725019844198e-08, + "loss": 0.2579, + "step": 39874 + }, + { + "epoch": 1.8679439733920458, + "grad_norm": 0.5469595336222085, + "learning_rate": 5.694699499136452e-08, + "loss": 0.261, + "step": 39875 + }, + { + "epoch": 1.867990818381974, + "grad_norm": 0.5843541668496272, + "learning_rate": 5.690675384344313e-08, + "loss": 0.2539, + "step": 39876 + }, + { + "epoch": 1.8680376633719025, + "grad_norm": 0.6201396932031646, + "learning_rate": 5.6866526754909555e-08, + "loss": 0.2864, + "step": 39877 + }, + { + "epoch": 1.8680845083618307, + "grad_norm": 0.6216548795706797, + "learning_rate": 5.682631372599501e-08, + "loss": 0.2669, + "step": 39878 + }, + { + "epoch": 1.868131353351759, + "grad_norm": 0.5779610468282725, + "learning_rate": 5.678611475693097e-08, + "loss": 0.2547, + "step": 39879 + }, + { + "epoch": 1.8681781983416874, + "grad_norm": 0.5690119564005346, + "learning_rate": 5.6745929847948634e-08, + "loss": 0.2601, + "step": 39880 + }, + { + "epoch": 1.8682250433316157, + "grad_norm": 0.5813541839713758, + "learning_rate": 5.670575899927921e-08, + "loss": 0.2637, + "step": 39881 + }, + { + "epoch": 1.868271888321544, + "grad_norm": 0.6060907460545261, + "learning_rate": 5.6665602211154195e-08, + "loss": 0.2719, + "step": 39882 + }, + { + "epoch": 1.8683187333114724, + "grad_norm": 0.6110066655692382, + "learning_rate": 5.662545948380449e-08, + "loss": 0.2685, + "step": 39883 + }, + { + "epoch": 1.8683655783014008, + "grad_norm": 0.6040863866047703, + "learning_rate": 5.658533081746159e-08, + "loss": 0.2673, + "step": 39884 + }, + { + "epoch": 1.8684124232913288, + "grad_norm": 0.5422905666736223, + "learning_rate": 5.654521621235559e-08, + "loss": 0.2455, + "step": 39885 + }, + { + "epoch": 1.8684592682812573, + "grad_norm": 0.603030091390137, + "learning_rate": 5.650511566871769e-08, + "loss": 0.2773, + "step": 39886 + }, + { + "epoch": 1.8685061132711858, + "grad_norm": 0.6043249083976637, + "learning_rate": 5.646502918677882e-08, + "loss": 0.2785, + "step": 39887 + }, + { + "epoch": 1.868552958261114, + "grad_norm": 0.6115403392609813, + "learning_rate": 5.6424956766769346e-08, + "loss": 0.2741, + "step": 39888 + }, + { + "epoch": 1.8685998032510422, + "grad_norm": 0.5968140768393106, + "learning_rate": 5.6384898408920196e-08, + "loss": 0.2699, + "step": 39889 + }, + { + "epoch": 1.8686466482409707, + "grad_norm": 0.5956358374906432, + "learning_rate": 5.634485411346202e-08, + "loss": 0.2603, + "step": 39890 + }, + { + "epoch": 1.868693493230899, + "grad_norm": 0.5708303158532395, + "learning_rate": 5.630482388062492e-08, + "loss": 0.2573, + "step": 39891 + }, + { + "epoch": 1.8687403382208272, + "grad_norm": 0.6399328112902396, + "learning_rate": 5.6264807710639245e-08, + "loss": 0.3036, + "step": 39892 + }, + { + "epoch": 1.8687871832107557, + "grad_norm": 0.5574969001929403, + "learning_rate": 5.622480560373539e-08, + "loss": 0.2485, + "step": 39893 + }, + { + "epoch": 1.868834028200684, + "grad_norm": 0.6176237760321281, + "learning_rate": 5.618481756014343e-08, + "loss": 0.2804, + "step": 39894 + }, + { + "epoch": 1.8688808731906121, + "grad_norm": 0.6506223409004686, + "learning_rate": 5.6144843580093754e-08, + "loss": 0.2869, + "step": 39895 + }, + { + "epoch": 1.8689277181805406, + "grad_norm": 0.6191565755629251, + "learning_rate": 5.610488366381644e-08, + "loss": 0.266, + "step": 39896 + }, + { + "epoch": 1.868974563170469, + "grad_norm": 0.5702605904051721, + "learning_rate": 5.6064937811541045e-08, + "loss": 0.2668, + "step": 39897 + }, + { + "epoch": 1.869021408160397, + "grad_norm": 0.6066278405665086, + "learning_rate": 5.602500602349764e-08, + "loss": 0.2685, + "step": 39898 + }, + { + "epoch": 1.8690682531503255, + "grad_norm": 0.5922161776686602, + "learning_rate": 5.598508829991634e-08, + "loss": 0.2564, + "step": 39899 + }, + { + "epoch": 1.869115098140254, + "grad_norm": 0.6189148298769723, + "learning_rate": 5.594518464102638e-08, + "loss": 0.2821, + "step": 39900 + }, + { + "epoch": 1.8691619431301822, + "grad_norm": 0.5945101744099548, + "learning_rate": 5.590529504705733e-08, + "loss": 0.2781, + "step": 39901 + }, + { + "epoch": 1.8692087881201105, + "grad_norm": 0.6323552968901253, + "learning_rate": 5.5865419518239264e-08, + "loss": 0.2891, + "step": 39902 + }, + { + "epoch": 1.869255633110039, + "grad_norm": 0.6123057861857996, + "learning_rate": 5.582555805480172e-08, + "loss": 0.2644, + "step": 39903 + }, + { + "epoch": 1.8693024780999672, + "grad_norm": 0.589517473829712, + "learning_rate": 5.578571065697341e-08, + "loss": 0.2602, + "step": 39904 + }, + { + "epoch": 1.8693493230898954, + "grad_norm": 0.5887660498043051, + "learning_rate": 5.5745877324984154e-08, + "loss": 0.2722, + "step": 39905 + }, + { + "epoch": 1.869396168079824, + "grad_norm": 0.5935328372704655, + "learning_rate": 5.570605805906293e-08, + "loss": 0.287, + "step": 39906 + }, + { + "epoch": 1.8694430130697521, + "grad_norm": 0.6344297669795466, + "learning_rate": 5.5666252859438715e-08, + "loss": 0.2845, + "step": 39907 + }, + { + "epoch": 1.8694898580596804, + "grad_norm": 0.6146478360652233, + "learning_rate": 5.5626461726341065e-08, + "loss": 0.2722, + "step": 39908 + }, + { + "epoch": 1.8695367030496088, + "grad_norm": 0.6099472836619109, + "learning_rate": 5.5586684659998946e-08, + "loss": 0.2631, + "step": 39909 + }, + { + "epoch": 1.8695835480395373, + "grad_norm": 0.5883550185413751, + "learning_rate": 5.5546921660641076e-08, + "loss": 0.2712, + "step": 39910 + }, + { + "epoch": 1.8696303930294655, + "grad_norm": 0.586474816412384, + "learning_rate": 5.550717272849587e-08, + "loss": 0.2828, + "step": 39911 + }, + { + "epoch": 1.8696772380193938, + "grad_norm": 0.570236904237221, + "learning_rate": 5.546743786379316e-08, + "loss": 0.2484, + "step": 39912 + }, + { + "epoch": 1.8697240830093222, + "grad_norm": 0.6118502549973148, + "learning_rate": 5.5427717066760534e-08, + "loss": 0.2738, + "step": 39913 + }, + { + "epoch": 1.8697709279992505, + "grad_norm": 0.6133232599176438, + "learning_rate": 5.5388010337626693e-08, + "loss": 0.2712, + "step": 39914 + }, + { + "epoch": 1.8698177729891787, + "grad_norm": 0.6137016039029733, + "learning_rate": 5.534831767662091e-08, + "loss": 0.2833, + "step": 39915 + }, + { + "epoch": 1.8698646179791072, + "grad_norm": 0.5898315574048995, + "learning_rate": 5.530863908397105e-08, + "loss": 0.2669, + "step": 39916 + }, + { + "epoch": 1.8699114629690354, + "grad_norm": 0.5906873176706755, + "learning_rate": 5.5268974559905264e-08, + "loss": 0.273, + "step": 39917 + }, + { + "epoch": 1.8699583079589637, + "grad_norm": 0.6248762369858709, + "learning_rate": 5.522932410465198e-08, + "loss": 0.2759, + "step": 39918 + }, + { + "epoch": 1.8700051529488921, + "grad_norm": 0.6067812915042606, + "learning_rate": 5.518968771843991e-08, + "loss": 0.2614, + "step": 39919 + }, + { + "epoch": 1.8700519979388206, + "grad_norm": 0.6046276451369229, + "learning_rate": 5.515006540149637e-08, + "loss": 0.2734, + "step": 39920 + }, + { + "epoch": 1.8700988429287486, + "grad_norm": 0.5980339864361176, + "learning_rate": 5.511045715405006e-08, + "loss": 0.2849, + "step": 39921 + }, + { + "epoch": 1.870145687918677, + "grad_norm": 0.5501615828747792, + "learning_rate": 5.50708629763283e-08, + "loss": 0.2456, + "step": 39922 + }, + { + "epoch": 1.8701925329086055, + "grad_norm": 0.6360292031655772, + "learning_rate": 5.5031282868559246e-08, + "loss": 0.2734, + "step": 39923 + }, + { + "epoch": 1.8702393778985338, + "grad_norm": 0.6359504377244428, + "learning_rate": 5.4991716830970485e-08, + "loss": 0.2678, + "step": 39924 + }, + { + "epoch": 1.870286222888462, + "grad_norm": 0.5716313870517211, + "learning_rate": 5.49521648637899e-08, + "loss": 0.2635, + "step": 39925 + }, + { + "epoch": 1.8703330678783905, + "grad_norm": 0.6210055682500999, + "learning_rate": 5.491262696724536e-08, + "loss": 0.2716, + "step": 39926 + }, + { + "epoch": 1.8703799128683187, + "grad_norm": 0.5659766189255091, + "learning_rate": 5.487310314156419e-08, + "loss": 0.2672, + "step": 39927 + }, + { + "epoch": 1.870426757858247, + "grad_norm": 0.6101447409714098, + "learning_rate": 5.4833593386973413e-08, + "loss": 0.2676, + "step": 39928 + }, + { + "epoch": 1.8704736028481754, + "grad_norm": 0.5949881230465259, + "learning_rate": 5.479409770370092e-08, + "loss": 0.2747, + "step": 39929 + }, + { + "epoch": 1.8705204478381037, + "grad_norm": 0.6135159705517017, + "learning_rate": 5.4754616091973464e-08, + "loss": 0.2668, + "step": 39930 + }, + { + "epoch": 1.870567292828032, + "grad_norm": 0.5855489698980153, + "learning_rate": 5.471514855201893e-08, + "loss": 0.2755, + "step": 39931 + }, + { + "epoch": 1.8706141378179604, + "grad_norm": 0.6382905895195514, + "learning_rate": 5.467569508406378e-08, + "loss": 0.2818, + "step": 39932 + }, + { + "epoch": 1.8706609828078888, + "grad_norm": 0.5713818738001848, + "learning_rate": 5.463625568833592e-08, + "loss": 0.2546, + "step": 39933 + }, + { + "epoch": 1.8707078277978169, + "grad_norm": 0.5722232299295509, + "learning_rate": 5.4596830365061805e-08, + "loss": 0.2754, + "step": 39934 + }, + { + "epoch": 1.8707546727877453, + "grad_norm": 0.6120981456036346, + "learning_rate": 5.4557419114467935e-08, + "loss": 0.2819, + "step": 39935 + }, + { + "epoch": 1.8708015177776738, + "grad_norm": 0.5789799219026265, + "learning_rate": 5.451802193678135e-08, + "loss": 0.2614, + "step": 39936 + }, + { + "epoch": 1.870848362767602, + "grad_norm": 0.6012617509659117, + "learning_rate": 5.447863883222909e-08, + "loss": 0.274, + "step": 39937 + }, + { + "epoch": 1.8708952077575303, + "grad_norm": 0.6057546354683048, + "learning_rate": 5.443926980103764e-08, + "loss": 0.2696, + "step": 39938 + }, + { + "epoch": 1.8709420527474587, + "grad_norm": 0.5842071698746099, + "learning_rate": 5.439991484343377e-08, + "loss": 0.2628, + "step": 39939 + }, + { + "epoch": 1.870988897737387, + "grad_norm": 0.6352801243531832, + "learning_rate": 5.436057395964339e-08, + "loss": 0.2686, + "step": 39940 + }, + { + "epoch": 1.8710357427273152, + "grad_norm": 0.6675755995399728, + "learning_rate": 5.432124714989328e-08, + "loss": 0.2938, + "step": 39941 + }, + { + "epoch": 1.8710825877172437, + "grad_norm": 0.6353503453030461, + "learning_rate": 5.428193441440965e-08, + "loss": 0.2688, + "step": 39942 + }, + { + "epoch": 1.871129432707172, + "grad_norm": 0.5905348582551082, + "learning_rate": 5.4242635753418705e-08, + "loss": 0.2679, + "step": 39943 + }, + { + "epoch": 1.8711762776971002, + "grad_norm": 0.5925118571953915, + "learning_rate": 5.4203351167146926e-08, + "loss": 0.2582, + "step": 39944 + }, + { + "epoch": 1.8712231226870286, + "grad_norm": 0.572367552689428, + "learning_rate": 5.4164080655819965e-08, + "loss": 0.2722, + "step": 39945 + }, + { + "epoch": 1.871269967676957, + "grad_norm": 0.6419302510955636, + "learning_rate": 5.412482421966403e-08, + "loss": 0.2784, + "step": 39946 + }, + { + "epoch": 1.8713168126668853, + "grad_norm": 0.5763801831541207, + "learning_rate": 5.4085581858905055e-08, + "loss": 0.2635, + "step": 39947 + }, + { + "epoch": 1.8713636576568136, + "grad_norm": 0.5979947787523424, + "learning_rate": 5.404635357376869e-08, + "loss": 0.2763, + "step": 39948 + }, + { + "epoch": 1.871410502646742, + "grad_norm": 0.5763877666848625, + "learning_rate": 5.4007139364480874e-08, + "loss": 0.2755, + "step": 39949 + }, + { + "epoch": 1.8714573476366703, + "grad_norm": 0.5919735474837305, + "learning_rate": 5.3967939231266975e-08, + "loss": 0.2691, + "step": 39950 + }, + { + "epoch": 1.8715041926265985, + "grad_norm": 0.6312951123709332, + "learning_rate": 5.39287531743532e-08, + "loss": 0.2699, + "step": 39951 + }, + { + "epoch": 1.871551037616527, + "grad_norm": 0.6222265722650305, + "learning_rate": 5.388958119396437e-08, + "loss": 0.2623, + "step": 39952 + }, + { + "epoch": 1.8715978826064552, + "grad_norm": 0.6244033429990506, + "learning_rate": 5.3850423290326136e-08, + "loss": 0.275, + "step": 39953 + }, + { + "epoch": 1.8716447275963835, + "grad_norm": 0.611674167733722, + "learning_rate": 5.381127946366416e-08, + "loss": 0.2735, + "step": 39954 + }, + { + "epoch": 1.871691572586312, + "grad_norm": 0.5480256386342579, + "learning_rate": 5.377214971420325e-08, + "loss": 0.269, + "step": 39955 + }, + { + "epoch": 1.8717384175762404, + "grad_norm": 0.585786544001593, + "learning_rate": 5.373303404216879e-08, + "loss": 0.2546, + "step": 39956 + }, + { + "epoch": 1.8717852625661684, + "grad_norm": 0.6215970653145346, + "learning_rate": 5.369393244778615e-08, + "loss": 0.2817, + "step": 39957 + }, + { + "epoch": 1.8718321075560969, + "grad_norm": 0.6149981113670634, + "learning_rate": 5.3654844931279595e-08, + "loss": 0.275, + "step": 39958 + }, + { + "epoch": 1.8718789525460253, + "grad_norm": 0.6235634649842682, + "learning_rate": 5.361577149287478e-08, + "loss": 0.2716, + "step": 39959 + }, + { + "epoch": 1.8719257975359536, + "grad_norm": 0.6308204366406802, + "learning_rate": 5.357671213279625e-08, + "loss": 0.2911, + "step": 39960 + }, + { + "epoch": 1.8719726425258818, + "grad_norm": 0.5846165779200676, + "learning_rate": 5.35376668512691e-08, + "loss": 0.2715, + "step": 39961 + }, + { + "epoch": 1.8720194875158103, + "grad_norm": 0.5606530576495776, + "learning_rate": 5.3498635648517595e-08, + "loss": 0.2721, + "step": 39962 + }, + { + "epoch": 1.8720663325057385, + "grad_norm": 0.5770545469447326, + "learning_rate": 5.345961852476655e-08, + "loss": 0.2757, + "step": 39963 + }, + { + "epoch": 1.8721131774956667, + "grad_norm": 0.5655728522744934, + "learning_rate": 5.342061548024052e-08, + "loss": 0.27, + "step": 39964 + }, + { + "epoch": 1.8721600224855952, + "grad_norm": 0.5686389579039981, + "learning_rate": 5.3381626515163765e-08, + "loss": 0.2674, + "step": 39965 + }, + { + "epoch": 1.8722068674755235, + "grad_norm": 0.637249177673216, + "learning_rate": 5.3342651629760825e-08, + "loss": 0.2718, + "step": 39966 + }, + { + "epoch": 1.8722537124654517, + "grad_norm": 0.6449148969178796, + "learning_rate": 5.330369082425624e-08, + "loss": 0.285, + "step": 39967 + }, + { + "epoch": 1.8723005574553802, + "grad_norm": 0.6216104315903824, + "learning_rate": 5.3264744098874e-08, + "loss": 0.2672, + "step": 39968 + }, + { + "epoch": 1.8723474024453086, + "grad_norm": 0.614480625401912, + "learning_rate": 5.322581145383782e-08, + "loss": 0.2907, + "step": 39969 + }, + { + "epoch": 1.8723942474352366, + "grad_norm": 0.6205834569028671, + "learning_rate": 5.318689288937251e-08, + "loss": 0.2652, + "step": 39970 + }, + { + "epoch": 1.872441092425165, + "grad_norm": 0.6248293649523032, + "learning_rate": 5.3147988405701237e-08, + "loss": 0.2685, + "step": 39971 + }, + { + "epoch": 1.8724879374150936, + "grad_norm": 0.5327790707064712, + "learning_rate": 5.310909800304853e-08, + "loss": 0.2442, + "step": 39972 + }, + { + "epoch": 1.8725347824050218, + "grad_norm": 0.5621110308529141, + "learning_rate": 5.307022168163783e-08, + "loss": 0.2558, + "step": 39973 + }, + { + "epoch": 1.87258162739495, + "grad_norm": 0.6424948966491468, + "learning_rate": 5.303135944169313e-08, + "loss": 0.2884, + "step": 39974 + }, + { + "epoch": 1.8726284723848785, + "grad_norm": 0.6667154616613085, + "learning_rate": 5.299251128343813e-08, + "loss": 0.2746, + "step": 39975 + }, + { + "epoch": 1.8726753173748067, + "grad_norm": 0.6350487664154773, + "learning_rate": 5.295367720709599e-08, + "loss": 0.2788, + "step": 39976 + }, + { + "epoch": 1.872722162364735, + "grad_norm": 0.5259026264937557, + "learning_rate": 5.2914857212890414e-08, + "loss": 0.2431, + "step": 39977 + }, + { + "epoch": 1.8727690073546635, + "grad_norm": 0.5906609681632158, + "learning_rate": 5.2876051301044565e-08, + "loss": 0.2609, + "step": 39978 + }, + { + "epoch": 1.8728158523445917, + "grad_norm": 0.5819608159694963, + "learning_rate": 5.283725947178214e-08, + "loss": 0.2737, + "step": 39979 + }, + { + "epoch": 1.87286269733452, + "grad_norm": 0.5775778964489282, + "learning_rate": 5.279848172532631e-08, + "loss": 0.2569, + "step": 39980 + }, + { + "epoch": 1.8729095423244484, + "grad_norm": 0.5827149715858408, + "learning_rate": 5.2759718061899944e-08, + "loss": 0.2608, + "step": 39981 + }, + { + "epoch": 1.8729563873143769, + "grad_norm": 0.5626695166276542, + "learning_rate": 5.272096848172647e-08, + "loss": 0.2677, + "step": 39982 + }, + { + "epoch": 1.873003232304305, + "grad_norm": 0.6234377120298229, + "learning_rate": 5.268223298502878e-08, + "loss": 0.284, + "step": 39983 + }, + { + "epoch": 1.8730500772942333, + "grad_norm": 0.587344257243864, + "learning_rate": 5.2643511572029736e-08, + "loss": 0.274, + "step": 39984 + }, + { + "epoch": 1.8730969222841618, + "grad_norm": 0.6445606214954402, + "learning_rate": 5.260480424295194e-08, + "loss": 0.2744, + "step": 39985 + }, + { + "epoch": 1.87314376727409, + "grad_norm": 0.6542748482398394, + "learning_rate": 5.256611099801856e-08, + "loss": 0.2996, + "step": 39986 + }, + { + "epoch": 1.8731906122640183, + "grad_norm": 0.6003945962396113, + "learning_rate": 5.2527431837451906e-08, + "loss": 0.2748, + "step": 39987 + }, + { + "epoch": 1.8732374572539467, + "grad_norm": 0.5653902350341775, + "learning_rate": 5.248876676147485e-08, + "loss": 0.2544, + "step": 39988 + }, + { + "epoch": 1.873284302243875, + "grad_norm": 0.5763138780604743, + "learning_rate": 5.2450115770309725e-08, + "loss": 0.269, + "step": 39989 + }, + { + "epoch": 1.8733311472338032, + "grad_norm": 0.5940750252481447, + "learning_rate": 5.241147886417913e-08, + "loss": 0.2716, + "step": 39990 + }, + { + "epoch": 1.8733779922237317, + "grad_norm": 0.5768770560667146, + "learning_rate": 5.237285604330511e-08, + "loss": 0.2744, + "step": 39991 + }, + { + "epoch": 1.8734248372136602, + "grad_norm": 0.6124636452593872, + "learning_rate": 5.233424730791026e-08, + "loss": 0.2559, + "step": 39992 + }, + { + "epoch": 1.8734716822035882, + "grad_norm": 0.5687334123795865, + "learning_rate": 5.229565265821662e-08, + "loss": 0.2606, + "step": 39993 + }, + { + "epoch": 1.8735185271935166, + "grad_norm": 0.6029883027680925, + "learning_rate": 5.225707209444625e-08, + "loss": 0.2793, + "step": 39994 + }, + { + "epoch": 1.873565372183445, + "grad_norm": 0.5661936820311774, + "learning_rate": 5.221850561682118e-08, + "loss": 0.2593, + "step": 39995 + }, + { + "epoch": 1.8736122171733733, + "grad_norm": 0.5880030361895403, + "learning_rate": 5.217995322556346e-08, + "loss": 0.2605, + "step": 39996 + }, + { + "epoch": 1.8736590621633016, + "grad_norm": 0.5760831389406971, + "learning_rate": 5.214141492089486e-08, + "loss": 0.2722, + "step": 39997 + }, + { + "epoch": 1.87370590715323, + "grad_norm": 0.5817030616574543, + "learning_rate": 5.2102890703037146e-08, + "loss": 0.2728, + "step": 39998 + }, + { + "epoch": 1.8737527521431583, + "grad_norm": 0.5531715926075538, + "learning_rate": 5.20643805722118e-08, + "loss": 0.2571, + "step": 39999 + }, + { + "epoch": 1.8737995971330865, + "grad_norm": 0.5263506232990599, + "learning_rate": 5.202588452864116e-08, + "loss": 0.2401, + "step": 40000 + }, + { + "epoch": 1.873846442123015, + "grad_norm": 0.5955737163266291, + "learning_rate": 5.198740257254586e-08, + "loss": 0.2623, + "step": 40001 + }, + { + "epoch": 1.8738932871129432, + "grad_norm": 0.6077279466418876, + "learning_rate": 5.194893470414797e-08, + "loss": 0.2803, + "step": 40002 + }, + { + "epoch": 1.8739401321028715, + "grad_norm": 0.6047883038804607, + "learning_rate": 5.1910480923668684e-08, + "loss": 0.2756, + "step": 40003 + }, + { + "epoch": 1.8739869770928, + "grad_norm": 0.5940420338846372, + "learning_rate": 5.187204123132922e-08, + "loss": 0.2777, + "step": 40004 + }, + { + "epoch": 1.8740338220827284, + "grad_norm": 0.6286381752095076, + "learning_rate": 5.183361562735079e-08, + "loss": 0.2866, + "step": 40005 + }, + { + "epoch": 1.8740806670726564, + "grad_norm": 0.5749487684174347, + "learning_rate": 5.179520411195488e-08, + "loss": 0.2597, + "step": 40006 + }, + { + "epoch": 1.8741275120625849, + "grad_norm": 0.6078539458399054, + "learning_rate": 5.175680668536187e-08, + "loss": 0.2665, + "step": 40007 + }, + { + "epoch": 1.8741743570525133, + "grad_norm": 0.616150828158688, + "learning_rate": 5.1718423347793256e-08, + "loss": 0.2768, + "step": 40008 + }, + { + "epoch": 1.8742212020424416, + "grad_norm": 0.5889095871170551, + "learning_rate": 5.168005409946969e-08, + "loss": 0.2693, + "step": 40009 + }, + { + "epoch": 1.8742680470323698, + "grad_norm": 0.6181129611411336, + "learning_rate": 5.1641698940612105e-08, + "loss": 0.2818, + "step": 40010 + }, + { + "epoch": 1.8743148920222983, + "grad_norm": 0.5950839931267169, + "learning_rate": 5.160335787144116e-08, + "loss": 0.2735, + "step": 40011 + }, + { + "epoch": 1.8743617370122265, + "grad_norm": 0.6205218816824242, + "learning_rate": 5.1565030892177793e-08, + "loss": 0.2748, + "step": 40012 + }, + { + "epoch": 1.8744085820021548, + "grad_norm": 0.575006238625284, + "learning_rate": 5.152671800304182e-08, + "loss": 0.2587, + "step": 40013 + }, + { + "epoch": 1.8744554269920832, + "grad_norm": 0.5505584999986579, + "learning_rate": 5.148841920425446e-08, + "loss": 0.2623, + "step": 40014 + }, + { + "epoch": 1.8745022719820115, + "grad_norm": 0.5391174335239738, + "learning_rate": 5.1450134496035534e-08, + "loss": 0.2509, + "step": 40015 + }, + { + "epoch": 1.8745491169719397, + "grad_norm": 0.5856276062584358, + "learning_rate": 5.1411863878605693e-08, + "loss": 0.2626, + "step": 40016 + }, + { + "epoch": 1.8745959619618682, + "grad_norm": 0.6294558079116686, + "learning_rate": 5.1373607352185603e-08, + "loss": 0.2851, + "step": 40017 + }, + { + "epoch": 1.8746428069517966, + "grad_norm": 0.594912980402242, + "learning_rate": 5.133536491699481e-08, + "loss": 0.2612, + "step": 40018 + }, + { + "epoch": 1.8746896519417249, + "grad_norm": 0.5698890506453667, + "learning_rate": 5.129713657325341e-08, + "loss": 0.2697, + "step": 40019 + }, + { + "epoch": 1.8747364969316531, + "grad_norm": 0.5837926786911366, + "learning_rate": 5.125892232118152e-08, + "loss": 0.2465, + "step": 40020 + }, + { + "epoch": 1.8747833419215816, + "grad_norm": 0.5854568054582281, + "learning_rate": 5.122072216099894e-08, + "loss": 0.2658, + "step": 40021 + }, + { + "epoch": 1.8748301869115098, + "grad_norm": 0.6004298873725181, + "learning_rate": 5.118253609292578e-08, + "loss": 0.2761, + "step": 40022 + }, + { + "epoch": 1.874877031901438, + "grad_norm": 0.6497285189784882, + "learning_rate": 5.1144364117181597e-08, + "loss": 0.285, + "step": 40023 + }, + { + "epoch": 1.8749238768913665, + "grad_norm": 0.6285459110912972, + "learning_rate": 5.110620623398621e-08, + "loss": 0.2813, + "step": 40024 + }, + { + "epoch": 1.8749707218812948, + "grad_norm": 0.6264026188916977, + "learning_rate": 5.1068062443559163e-08, + "loss": 0.2702, + "step": 40025 + }, + { + "epoch": 1.875017566871223, + "grad_norm": 0.5947001706835328, + "learning_rate": 5.1029932746119724e-08, + "loss": 0.254, + "step": 40026 + }, + { + "epoch": 1.8750644118611515, + "grad_norm": 0.5908479050174248, + "learning_rate": 5.0991817141887444e-08, + "loss": 0.2623, + "step": 40027 + }, + { + "epoch": 1.87511125685108, + "grad_norm": 0.5790783498381733, + "learning_rate": 5.095371563108159e-08, + "loss": 0.2426, + "step": 40028 + }, + { + "epoch": 1.875158101841008, + "grad_norm": 0.6457175379601203, + "learning_rate": 5.091562821392171e-08, + "loss": 0.2877, + "step": 40029 + }, + { + "epoch": 1.8752049468309364, + "grad_norm": 0.637500053952122, + "learning_rate": 5.087755489062707e-08, + "loss": 0.2826, + "step": 40030 + }, + { + "epoch": 1.8752517918208649, + "grad_norm": 0.5812216098910478, + "learning_rate": 5.083949566141638e-08, + "loss": 0.2767, + "step": 40031 + }, + { + "epoch": 1.8752986368107931, + "grad_norm": 0.6037466804942594, + "learning_rate": 5.080145052650892e-08, + "loss": 0.2838, + "step": 40032 + }, + { + "epoch": 1.8753454818007214, + "grad_norm": 0.5889942594043169, + "learning_rate": 5.07634194861234e-08, + "loss": 0.2768, + "step": 40033 + }, + { + "epoch": 1.8753923267906498, + "grad_norm": 0.601935111947522, + "learning_rate": 5.072540254047881e-08, + "loss": 0.2599, + "step": 40034 + }, + { + "epoch": 1.875439171780578, + "grad_norm": 0.6011019850162795, + "learning_rate": 5.068739968979386e-08, + "loss": 0.2756, + "step": 40035 + }, + { + "epoch": 1.8754860167705063, + "grad_norm": 0.6069770330517362, + "learning_rate": 5.064941093428727e-08, + "loss": 0.2623, + "step": 40036 + }, + { + "epoch": 1.8755328617604348, + "grad_norm": 0.642528469283594, + "learning_rate": 5.061143627417803e-08, + "loss": 0.2819, + "step": 40037 + }, + { + "epoch": 1.875579706750363, + "grad_norm": 0.614274057368157, + "learning_rate": 5.0573475709684015e-08, + "loss": 0.274, + "step": 40038 + }, + { + "epoch": 1.8756265517402912, + "grad_norm": 0.5776706019122054, + "learning_rate": 5.0535529241024226e-08, + "loss": 0.2533, + "step": 40039 + }, + { + "epoch": 1.8756733967302197, + "grad_norm": 0.590499422388232, + "learning_rate": 5.049759686841682e-08, + "loss": 0.2588, + "step": 40040 + }, + { + "epoch": 1.8757202417201482, + "grad_norm": 0.5906449427937229, + "learning_rate": 5.0459678592079954e-08, + "loss": 0.263, + "step": 40041 + }, + { + "epoch": 1.8757670867100762, + "grad_norm": 0.6031522160431761, + "learning_rate": 5.0421774412231785e-08, + "loss": 0.2815, + "step": 40042 + }, + { + "epoch": 1.8758139317000047, + "grad_norm": 0.61317515722346, + "learning_rate": 5.0383884329091027e-08, + "loss": 0.2708, + "step": 40043 + }, + { + "epoch": 1.8758607766899331, + "grad_norm": 0.5592424128404064, + "learning_rate": 5.034600834287501e-08, + "loss": 0.2569, + "step": 40044 + }, + { + "epoch": 1.8759076216798614, + "grad_norm": 0.6022196081661763, + "learning_rate": 5.0308146453802444e-08, + "loss": 0.27, + "step": 40045 + }, + { + "epoch": 1.8759544666697896, + "grad_norm": 0.5997973016139277, + "learning_rate": 5.027029866209038e-08, + "loss": 0.2552, + "step": 40046 + }, + { + "epoch": 1.876001311659718, + "grad_norm": 0.6390483665977145, + "learning_rate": 5.023246496795697e-08, + "loss": 0.2852, + "step": 40047 + }, + { + "epoch": 1.8760481566496463, + "grad_norm": 0.5792757541969904, + "learning_rate": 5.019464537162011e-08, + "loss": 0.2798, + "step": 40048 + }, + { + "epoch": 1.8760950016395745, + "grad_norm": 0.6171520946736935, + "learning_rate": 5.015683987329767e-08, + "loss": 0.2762, + "step": 40049 + }, + { + "epoch": 1.876141846629503, + "grad_norm": 0.6254502277823403, + "learning_rate": 5.011904847320642e-08, + "loss": 0.2798, + "step": 40050 + }, + { + "epoch": 1.8761886916194312, + "grad_norm": 0.5884100028920956, + "learning_rate": 5.0081271171564526e-08, + "loss": 0.2814, + "step": 40051 + }, + { + "epoch": 1.8762355366093595, + "grad_norm": 0.5300396380914364, + "learning_rate": 5.004350796858931e-08, + "loss": 0.234, + "step": 40052 + }, + { + "epoch": 1.876282381599288, + "grad_norm": 0.6297595179279009, + "learning_rate": 5.000575886449754e-08, + "loss": 0.2782, + "step": 40053 + }, + { + "epoch": 1.8763292265892164, + "grad_norm": 0.5924200052383066, + "learning_rate": 4.996802385950711e-08, + "loss": 0.2719, + "step": 40054 + }, + { + "epoch": 1.8763760715791447, + "grad_norm": 0.5876872425595457, + "learning_rate": 4.993030295383478e-08, + "loss": 0.2762, + "step": 40055 + }, + { + "epoch": 1.876422916569073, + "grad_norm": 0.5926460281924688, + "learning_rate": 4.989259614769787e-08, + "loss": 0.2599, + "step": 40056 + }, + { + "epoch": 1.8764697615590014, + "grad_norm": 0.577345731968222, + "learning_rate": 4.985490344131316e-08, + "loss": 0.27, + "step": 40057 + }, + { + "epoch": 1.8765166065489296, + "grad_norm": 0.6402503417070755, + "learning_rate": 4.981722483489743e-08, + "loss": 0.2758, + "step": 40058 + }, + { + "epoch": 1.8765634515388578, + "grad_norm": 0.6253448491509732, + "learning_rate": 4.9779560328668264e-08, + "loss": 0.2842, + "step": 40059 + }, + { + "epoch": 1.8766102965287863, + "grad_norm": 0.623070062270877, + "learning_rate": 4.974190992284161e-08, + "loss": 0.2769, + "step": 40060 + }, + { + "epoch": 1.8766571415187145, + "grad_norm": 0.6062104255143234, + "learning_rate": 4.9704273617634525e-08, + "loss": 0.2749, + "step": 40061 + }, + { + "epoch": 1.8767039865086428, + "grad_norm": 0.6223384181009971, + "learning_rate": 4.9666651413263214e-08, + "loss": 0.2726, + "step": 40062 + }, + { + "epoch": 1.8767508314985712, + "grad_norm": 0.5819194063354651, + "learning_rate": 4.9629043309944725e-08, + "loss": 0.2651, + "step": 40063 + }, + { + "epoch": 1.8767976764884997, + "grad_norm": 0.5980821789078744, + "learning_rate": 4.9591449307895e-08, + "loss": 0.2606, + "step": 40064 + }, + { + "epoch": 1.8768445214784277, + "grad_norm": 0.5412330622123782, + "learning_rate": 4.955386940733054e-08, + "loss": 0.2618, + "step": 40065 + }, + { + "epoch": 1.8768913664683562, + "grad_norm": 0.5839282168283799, + "learning_rate": 4.9516303608468095e-08, + "loss": 0.2718, + "step": 40066 + }, + { + "epoch": 1.8769382114582847, + "grad_norm": 0.6521404183608468, + "learning_rate": 4.947875191152335e-08, + "loss": 0.2953, + "step": 40067 + }, + { + "epoch": 1.876985056448213, + "grad_norm": 0.6295940142194131, + "learning_rate": 4.9441214316712224e-08, + "loss": 0.2844, + "step": 40068 + }, + { + "epoch": 1.8770319014381411, + "grad_norm": 0.639076254588082, + "learning_rate": 4.940369082425095e-08, + "loss": 0.2644, + "step": 40069 + }, + { + "epoch": 1.8770787464280696, + "grad_norm": 0.6031930582869021, + "learning_rate": 4.936618143435545e-08, + "loss": 0.2765, + "step": 40070 + }, + { + "epoch": 1.8771255914179978, + "grad_norm": 0.5606080794097035, + "learning_rate": 4.932868614724168e-08, + "loss": 0.2571, + "step": 40071 + }, + { + "epoch": 1.877172436407926, + "grad_norm": 0.6474688686357064, + "learning_rate": 4.929120496312556e-08, + "loss": 0.2984, + "step": 40072 + }, + { + "epoch": 1.8772192813978545, + "grad_norm": 0.56549878597615, + "learning_rate": 4.9253737882222485e-08, + "loss": 0.256, + "step": 40073 + }, + { + "epoch": 1.8772661263877828, + "grad_norm": 0.5876966250999395, + "learning_rate": 4.921628490474839e-08, + "loss": 0.2641, + "step": 40074 + }, + { + "epoch": 1.877312971377711, + "grad_norm": 0.5739217896744427, + "learning_rate": 4.917884603091838e-08, + "loss": 0.2759, + "step": 40075 + }, + { + "epoch": 1.8773598163676395, + "grad_norm": 0.591055869594857, + "learning_rate": 4.914142126094812e-08, + "loss": 0.2644, + "step": 40076 + }, + { + "epoch": 1.877406661357568, + "grad_norm": 0.6621543630548484, + "learning_rate": 4.9104010595052706e-08, + "loss": 0.2937, + "step": 40077 + }, + { + "epoch": 1.877453506347496, + "grad_norm": 0.6383569844320507, + "learning_rate": 4.906661403344809e-08, + "loss": 0.2793, + "step": 40078 + }, + { + "epoch": 1.8775003513374244, + "grad_norm": 0.5910754200292981, + "learning_rate": 4.902923157634909e-08, + "loss": 0.2545, + "step": 40079 + }, + { + "epoch": 1.877547196327353, + "grad_norm": 0.6444411745515216, + "learning_rate": 4.899186322397054e-08, + "loss": 0.2821, + "step": 40080 + }, + { + "epoch": 1.8775940413172811, + "grad_norm": 0.5951088474023088, + "learning_rate": 4.895450897652837e-08, + "loss": 0.2747, + "step": 40081 + }, + { + "epoch": 1.8776408863072094, + "grad_norm": 0.5877818231003391, + "learning_rate": 4.8917168834236306e-08, + "loss": 0.263, + "step": 40082 + }, + { + "epoch": 1.8776877312971378, + "grad_norm": 0.5852100820738859, + "learning_rate": 4.887984279731001e-08, + "loss": 0.2666, + "step": 40083 + }, + { + "epoch": 1.877734576287066, + "grad_norm": 0.6085517504579824, + "learning_rate": 4.88425308659643e-08, + "loss": 0.2785, + "step": 40084 + }, + { + "epoch": 1.8777814212769943, + "grad_norm": 0.6086198949029267, + "learning_rate": 4.880523304041401e-08, + "loss": 0.2814, + "step": 40085 + }, + { + "epoch": 1.8778282662669228, + "grad_norm": 0.5870735452405296, + "learning_rate": 4.8767949320873144e-08, + "loss": 0.264, + "step": 40086 + }, + { + "epoch": 1.877875111256851, + "grad_norm": 0.6045684507680915, + "learning_rate": 4.8730679707556513e-08, + "loss": 0.2668, + "step": 40087 + }, + { + "epoch": 1.8779219562467793, + "grad_norm": 0.6156621711138465, + "learning_rate": 4.869342420067924e-08, + "loss": 0.2903, + "step": 40088 + }, + { + "epoch": 1.8779688012367077, + "grad_norm": 0.5914407429524118, + "learning_rate": 4.865618280045475e-08, + "loss": 0.2733, + "step": 40089 + }, + { + "epoch": 1.8780156462266362, + "grad_norm": 0.5406427140743707, + "learning_rate": 4.861895550709789e-08, + "loss": 0.2588, + "step": 40090 + }, + { + "epoch": 1.8780624912165644, + "grad_norm": 0.5879548670136235, + "learning_rate": 4.858174232082291e-08, + "loss": 0.2667, + "step": 40091 + }, + { + "epoch": 1.8781093362064927, + "grad_norm": 0.5945999096130352, + "learning_rate": 4.854454324184382e-08, + "loss": 0.2578, + "step": 40092 + }, + { + "epoch": 1.8781561811964211, + "grad_norm": 0.6143364898522808, + "learning_rate": 4.850735827037462e-08, + "loss": 0.282, + "step": 40093 + }, + { + "epoch": 1.8782030261863494, + "grad_norm": 0.6209259846352128, + "learning_rate": 4.8470187406629846e-08, + "loss": 0.2758, + "step": 40094 + }, + { + "epoch": 1.8782498711762776, + "grad_norm": 0.5933507987172609, + "learning_rate": 4.843303065082239e-08, + "loss": 0.2681, + "step": 40095 + }, + { + "epoch": 1.878296716166206, + "grad_norm": 0.5954662174490374, + "learning_rate": 4.83958880031668e-08, + "loss": 0.2756, + "step": 40096 + }, + { + "epoch": 1.8783435611561343, + "grad_norm": 0.5945004958187109, + "learning_rate": 4.835875946387708e-08, + "loss": 0.2626, + "step": 40097 + }, + { + "epoch": 1.8783904061460626, + "grad_norm": 0.5979902735063864, + "learning_rate": 4.832164503316611e-08, + "loss": 0.2757, + "step": 40098 + }, + { + "epoch": 1.878437251135991, + "grad_norm": 0.5824140996166453, + "learning_rate": 4.828454471124788e-08, + "loss": 0.2508, + "step": 40099 + }, + { + "epoch": 1.8784840961259195, + "grad_norm": 0.6296236646586612, + "learning_rate": 4.824745849833612e-08, + "loss": 0.2699, + "step": 40100 + }, + { + "epoch": 1.8785309411158475, + "grad_norm": 0.6037403782452606, + "learning_rate": 4.821038639464398e-08, + "loss": 0.2581, + "step": 40101 + }, + { + "epoch": 1.878577786105776, + "grad_norm": 0.6589456221514912, + "learning_rate": 4.8173328400384635e-08, + "loss": 0.2804, + "step": 40102 + }, + { + "epoch": 1.8786246310957044, + "grad_norm": 0.6038790420537468, + "learning_rate": 4.8136284515771517e-08, + "loss": 0.2775, + "step": 40103 + }, + { + "epoch": 1.8786714760856327, + "grad_norm": 0.6043610048462662, + "learning_rate": 4.8099254741018066e-08, + "loss": 0.2743, + "step": 40104 + }, + { + "epoch": 1.878718321075561, + "grad_norm": 0.5782685127763997, + "learning_rate": 4.806223907633717e-08, + "loss": 0.2641, + "step": 40105 + }, + { + "epoch": 1.8787651660654894, + "grad_norm": 0.60534417347771, + "learning_rate": 4.802523752194144e-08, + "loss": 0.2526, + "step": 40106 + }, + { + "epoch": 1.8788120110554176, + "grad_norm": 0.632473722008837, + "learning_rate": 4.7988250078044305e-08, + "loss": 0.2899, + "step": 40107 + }, + { + "epoch": 1.8788588560453459, + "grad_norm": 0.6072429376561188, + "learning_rate": 4.795127674485894e-08, + "loss": 0.261, + "step": 40108 + }, + { + "epoch": 1.8789057010352743, + "grad_norm": 0.5722015587244673, + "learning_rate": 4.791431752259712e-08, + "loss": 0.256, + "step": 40109 + }, + { + "epoch": 1.8789525460252026, + "grad_norm": 0.5872701631881891, + "learning_rate": 4.787737241147256e-08, + "loss": 0.2658, + "step": 40110 + }, + { + "epoch": 1.8789993910151308, + "grad_norm": 0.5893819144211289, + "learning_rate": 4.7840441411697024e-08, + "loss": 0.2709, + "step": 40111 + }, + { + "epoch": 1.8790462360050593, + "grad_norm": 0.6526071344633545, + "learning_rate": 4.780352452348369e-08, + "loss": 0.2777, + "step": 40112 + }, + { + "epoch": 1.8790930809949877, + "grad_norm": 0.603121671540779, + "learning_rate": 4.776662174704461e-08, + "loss": 0.2543, + "step": 40113 + }, + { + "epoch": 1.8791399259849157, + "grad_norm": 0.5989753496548583, + "learning_rate": 4.77297330825921e-08, + "loss": 0.2656, + "step": 40114 + }, + { + "epoch": 1.8791867709748442, + "grad_norm": 0.5847491271642911, + "learning_rate": 4.769285853033906e-08, + "loss": 0.2633, + "step": 40115 + }, + { + "epoch": 1.8792336159647727, + "grad_norm": 0.6302831495764352, + "learning_rate": 4.765599809049698e-08, + "loss": 0.2687, + "step": 40116 + }, + { + "epoch": 1.879280460954701, + "grad_norm": 0.6037387194598192, + "learning_rate": 4.7619151763278193e-08, + "loss": 0.2596, + "step": 40117 + }, + { + "epoch": 1.8793273059446292, + "grad_norm": 0.5778953295310384, + "learning_rate": 4.758231954889475e-08, + "loss": 0.267, + "step": 40118 + }, + { + "epoch": 1.8793741509345576, + "grad_norm": 0.6287584936279459, + "learning_rate": 4.7545501447558985e-08, + "loss": 0.2914, + "step": 40119 + }, + { + "epoch": 1.8794209959244859, + "grad_norm": 0.6039826212060172, + "learning_rate": 4.750869745948211e-08, + "loss": 0.2733, + "step": 40120 + }, + { + "epoch": 1.879467840914414, + "grad_norm": 0.5969715570952491, + "learning_rate": 4.7471907584876465e-08, + "loss": 0.2759, + "step": 40121 + }, + { + "epoch": 1.8795146859043426, + "grad_norm": 0.5781050179325355, + "learning_rate": 4.743513182395354e-08, + "loss": 0.272, + "step": 40122 + }, + { + "epoch": 1.8795615308942708, + "grad_norm": 0.5883894719873619, + "learning_rate": 4.739837017692511e-08, + "loss": 0.2767, + "step": 40123 + }, + { + "epoch": 1.879608375884199, + "grad_norm": 0.6047012489931327, + "learning_rate": 4.736162264400268e-08, + "loss": 0.296, + "step": 40124 + }, + { + "epoch": 1.8796552208741275, + "grad_norm": 0.6205122493821817, + "learning_rate": 4.7324889225397184e-08, + "loss": 0.2812, + "step": 40125 + }, + { + "epoch": 1.879702065864056, + "grad_norm": 0.6295907226045182, + "learning_rate": 4.728816992132096e-08, + "loss": 0.2835, + "step": 40126 + }, + { + "epoch": 1.8797489108539842, + "grad_norm": 0.6065042286561886, + "learning_rate": 4.7251464731984664e-08, + "loss": 0.2793, + "step": 40127 + }, + { + "epoch": 1.8797957558439125, + "grad_norm": 0.6147891261159427, + "learning_rate": 4.72147736575998e-08, + "loss": 0.278, + "step": 40128 + }, + { + "epoch": 1.879842600833841, + "grad_norm": 0.5977716096771677, + "learning_rate": 4.7178096698377307e-08, + "loss": 0.2837, + "step": 40129 + }, + { + "epoch": 1.8798894458237692, + "grad_norm": 0.6414397541306356, + "learning_rate": 4.7141433854528687e-08, + "loss": 0.2842, + "step": 40130 + }, + { + "epoch": 1.8799362908136974, + "grad_norm": 0.5880228861519824, + "learning_rate": 4.710478512626432e-08, + "loss": 0.2731, + "step": 40131 + }, + { + "epoch": 1.8799831358036259, + "grad_norm": 0.6211466694979526, + "learning_rate": 4.706815051379543e-08, + "loss": 0.2732, + "step": 40132 + }, + { + "epoch": 1.880029980793554, + "grad_norm": 0.6202970233031146, + "learning_rate": 4.7031530017332686e-08, + "loss": 0.2673, + "step": 40133 + }, + { + "epoch": 1.8800768257834823, + "grad_norm": 0.6545910357798845, + "learning_rate": 4.69949236370873e-08, + "loss": 0.2856, + "step": 40134 + }, + { + "epoch": 1.8801236707734108, + "grad_norm": 0.5931357512276378, + "learning_rate": 4.6958331373269386e-08, + "loss": 0.274, + "step": 40135 + }, + { + "epoch": 1.8801705157633393, + "grad_norm": 0.6204705051989882, + "learning_rate": 4.692175322608988e-08, + "loss": 0.28, + "step": 40136 + }, + { + "epoch": 1.8802173607532673, + "grad_norm": 0.6101593813699488, + "learning_rate": 4.6885189195758906e-08, + "loss": 0.2726, + "step": 40137 + }, + { + "epoch": 1.8802642057431957, + "grad_norm": 0.6039217306493588, + "learning_rate": 4.684863928248712e-08, + "loss": 0.2687, + "step": 40138 + }, + { + "epoch": 1.8803110507331242, + "grad_norm": 0.5958863658146586, + "learning_rate": 4.681210348648463e-08, + "loss": 0.2789, + "step": 40139 + }, + { + "epoch": 1.8803578957230525, + "grad_norm": 0.5572946612856772, + "learning_rate": 4.677558180796238e-08, + "loss": 0.258, + "step": 40140 + }, + { + "epoch": 1.8804047407129807, + "grad_norm": 0.6069045192385857, + "learning_rate": 4.6739074247129925e-08, + "loss": 0.271, + "step": 40141 + }, + { + "epoch": 1.8804515857029092, + "grad_norm": 0.568945290360588, + "learning_rate": 4.6702580804197376e-08, + "loss": 0.256, + "step": 40142 + }, + { + "epoch": 1.8804984306928374, + "grad_norm": 0.6351533829523488, + "learning_rate": 4.6666101479374834e-08, + "loss": 0.3008, + "step": 40143 + }, + { + "epoch": 1.8805452756827656, + "grad_norm": 0.6139350365556814, + "learning_rate": 4.662963627287242e-08, + "loss": 0.273, + "step": 40144 + }, + { + "epoch": 1.880592120672694, + "grad_norm": 0.6064211786061176, + "learning_rate": 4.659318518489969e-08, + "loss": 0.2753, + "step": 40145 + }, + { + "epoch": 1.8806389656626223, + "grad_norm": 0.6000198275803004, + "learning_rate": 4.655674821566675e-08, + "loss": 0.2729, + "step": 40146 + }, + { + "epoch": 1.8806858106525506, + "grad_norm": 0.5553581145551153, + "learning_rate": 4.652032536538259e-08, + "loss": 0.2485, + "step": 40147 + }, + { + "epoch": 1.880732655642479, + "grad_norm": 0.5749372661521535, + "learning_rate": 4.648391663425761e-08, + "loss": 0.2531, + "step": 40148 + }, + { + "epoch": 1.8807795006324075, + "grad_norm": 0.6238743936555499, + "learning_rate": 4.644752202250108e-08, + "loss": 0.2718, + "step": 40149 + }, + { + "epoch": 1.8808263456223355, + "grad_norm": 0.6047620084723331, + "learning_rate": 4.641114153032228e-08, + "loss": 0.285, + "step": 40150 + }, + { + "epoch": 1.880873190612264, + "grad_norm": 0.6365142877573727, + "learning_rate": 4.637477515793076e-08, + "loss": 0.273, + "step": 40151 + }, + { + "epoch": 1.8809200356021925, + "grad_norm": 0.5877596002180872, + "learning_rate": 4.633842290553581e-08, + "loss": 0.2681, + "step": 40152 + }, + { + "epoch": 1.8809668805921207, + "grad_norm": 0.607682947973624, + "learning_rate": 4.6302084773346415e-08, + "loss": 0.2779, + "step": 40153 + }, + { + "epoch": 1.881013725582049, + "grad_norm": 0.5943012800331143, + "learning_rate": 4.626576076157158e-08, + "loss": 0.2773, + "step": 40154 + }, + { + "epoch": 1.8810605705719774, + "grad_norm": 0.5808672863319337, + "learning_rate": 4.622945087042086e-08, + "loss": 0.2582, + "step": 40155 + }, + { + "epoch": 1.8811074155619056, + "grad_norm": 0.590305946997166, + "learning_rate": 4.619315510010269e-08, + "loss": 0.2692, + "step": 40156 + }, + { + "epoch": 1.8811542605518339, + "grad_norm": 0.6085253035626742, + "learning_rate": 4.615687345082637e-08, + "loss": 0.2683, + "step": 40157 + }, + { + "epoch": 1.8812011055417623, + "grad_norm": 0.5524665698438276, + "learning_rate": 4.6120605922800597e-08, + "loss": 0.2484, + "step": 40158 + }, + { + "epoch": 1.8812479505316906, + "grad_norm": 0.5956880700920304, + "learning_rate": 4.6084352516233836e-08, + "loss": 0.2703, + "step": 40159 + }, + { + "epoch": 1.8812947955216188, + "grad_norm": 0.5941373843564682, + "learning_rate": 4.604811323133479e-08, + "loss": 0.2677, + "step": 40160 + }, + { + "epoch": 1.8813416405115473, + "grad_norm": 0.6275050864552129, + "learning_rate": 4.6011888068311916e-08, + "loss": 0.2809, + "step": 40161 + }, + { + "epoch": 1.8813884855014757, + "grad_norm": 0.571376217328298, + "learning_rate": 4.5975677027373935e-08, + "loss": 0.2587, + "step": 40162 + }, + { + "epoch": 1.881435330491404, + "grad_norm": 0.5873089584324417, + "learning_rate": 4.5939480108729284e-08, + "loss": 0.2669, + "step": 40163 + }, + { + "epoch": 1.8814821754813322, + "grad_norm": 0.5843634668831713, + "learning_rate": 4.5903297312586137e-08, + "loss": 0.2586, + "step": 40164 + }, + { + "epoch": 1.8815290204712607, + "grad_norm": 0.5990167625708339, + "learning_rate": 4.5867128639152934e-08, + "loss": 0.27, + "step": 40165 + }, + { + "epoch": 1.881575865461189, + "grad_norm": 0.5894580125482536, + "learning_rate": 4.583097408863729e-08, + "loss": 0.2675, + "step": 40166 + }, + { + "epoch": 1.8816227104511172, + "grad_norm": 0.5891623524338039, + "learning_rate": 4.579483366124737e-08, + "loss": 0.2745, + "step": 40167 + }, + { + "epoch": 1.8816695554410456, + "grad_norm": 0.6021212880113042, + "learning_rate": 4.5758707357191626e-08, + "loss": 0.2762, + "step": 40168 + }, + { + "epoch": 1.8817164004309739, + "grad_norm": 0.6335129951867011, + "learning_rate": 4.572259517667737e-08, + "loss": 0.2924, + "step": 40169 + }, + { + "epoch": 1.8817632454209021, + "grad_norm": 0.5918521474875961, + "learning_rate": 4.56864971199128e-08, + "loss": 0.2706, + "step": 40170 + }, + { + "epoch": 1.8818100904108306, + "grad_norm": 0.6336917583382032, + "learning_rate": 4.5650413187106055e-08, + "loss": 0.2699, + "step": 40171 + }, + { + "epoch": 1.881856935400759, + "grad_norm": 0.6131197688911068, + "learning_rate": 4.561434337846393e-08, + "loss": 0.293, + "step": 40172 + }, + { + "epoch": 1.881903780390687, + "grad_norm": 0.5908860321356837, + "learning_rate": 4.557828769419431e-08, + "loss": 0.2598, + "step": 40173 + }, + { + "epoch": 1.8819506253806155, + "grad_norm": 0.6393252480406437, + "learning_rate": 4.5542246134504806e-08, + "loss": 0.2746, + "step": 40174 + }, + { + "epoch": 1.881997470370544, + "grad_norm": 0.6033193867631802, + "learning_rate": 4.5506218699602754e-08, + "loss": 0.2705, + "step": 40175 + }, + { + "epoch": 1.8820443153604722, + "grad_norm": 0.6009880529218058, + "learning_rate": 4.5470205389695486e-08, + "loss": 0.276, + "step": 40176 + }, + { + "epoch": 1.8820911603504005, + "grad_norm": 0.6171190225843488, + "learning_rate": 4.543420620499034e-08, + "loss": 0.2639, + "step": 40177 + }, + { + "epoch": 1.882138005340329, + "grad_norm": 0.6092626726964524, + "learning_rate": 4.539822114569437e-08, + "loss": 0.2842, + "step": 40178 + }, + { + "epoch": 1.8821848503302572, + "grad_norm": 0.6135707248749895, + "learning_rate": 4.53622502120149e-08, + "loss": 0.2629, + "step": 40179 + }, + { + "epoch": 1.8822316953201854, + "grad_norm": 0.6402767282460163, + "learning_rate": 4.532629340415845e-08, + "loss": 0.2832, + "step": 40180 + }, + { + "epoch": 1.8822785403101139, + "grad_norm": 0.6082146305037045, + "learning_rate": 4.529035072233234e-08, + "loss": 0.2739, + "step": 40181 + }, + { + "epoch": 1.8823253853000421, + "grad_norm": 0.6290183193583895, + "learning_rate": 4.5254422166743086e-08, + "loss": 0.2681, + "step": 40182 + }, + { + "epoch": 1.8823722302899704, + "grad_norm": 0.5837109876795125, + "learning_rate": 4.5218507737598006e-08, + "loss": 0.2727, + "step": 40183 + }, + { + "epoch": 1.8824190752798988, + "grad_norm": 0.6416153916388332, + "learning_rate": 4.5182607435103334e-08, + "loss": 0.2764, + "step": 40184 + }, + { + "epoch": 1.8824659202698273, + "grad_norm": 0.6374985162383711, + "learning_rate": 4.514672125946556e-08, + "loss": 0.2788, + "step": 40185 + }, + { + "epoch": 1.8825127652597553, + "grad_norm": 0.6231686221304712, + "learning_rate": 4.5110849210891485e-08, + "loss": 0.2788, + "step": 40186 + }, + { + "epoch": 1.8825596102496838, + "grad_norm": 0.6371380253086493, + "learning_rate": 4.5074991289587587e-08, + "loss": 0.2806, + "step": 40187 + }, + { + "epoch": 1.8826064552396122, + "grad_norm": 0.6076799498327249, + "learning_rate": 4.503914749575983e-08, + "loss": 0.2803, + "step": 40188 + }, + { + "epoch": 1.8826533002295405, + "grad_norm": 0.5619849480997056, + "learning_rate": 4.500331782961526e-08, + "loss": 0.2643, + "step": 40189 + }, + { + "epoch": 1.8827001452194687, + "grad_norm": 0.5690862096607059, + "learning_rate": 4.496750229135899e-08, + "loss": 0.2534, + "step": 40190 + }, + { + "epoch": 1.8827469902093972, + "grad_norm": 0.6387863860888957, + "learning_rate": 4.49317008811978e-08, + "loss": 0.2735, + "step": 40191 + }, + { + "epoch": 1.8827938351993254, + "grad_norm": 0.6211570694034694, + "learning_rate": 4.4895913599337924e-08, + "loss": 0.2678, + "step": 40192 + }, + { + "epoch": 1.8828406801892537, + "grad_norm": 0.6115475041047452, + "learning_rate": 4.486014044598475e-08, + "loss": 0.2668, + "step": 40193 + }, + { + "epoch": 1.8828875251791821, + "grad_norm": 0.5974255454342503, + "learning_rate": 4.4824381421344487e-08, + "loss": 0.2546, + "step": 40194 + }, + { + "epoch": 1.8829343701691104, + "grad_norm": 0.6120411349014865, + "learning_rate": 4.478863652562282e-08, + "loss": 0.277, + "step": 40195 + }, + { + "epoch": 1.8829812151590386, + "grad_norm": 0.588857730004493, + "learning_rate": 4.4752905759025413e-08, + "loss": 0.2704, + "step": 40196 + }, + { + "epoch": 1.883028060148967, + "grad_norm": 0.5750786073061348, + "learning_rate": 4.471718912175793e-08, + "loss": 0.2558, + "step": 40197 + }, + { + "epoch": 1.8830749051388955, + "grad_norm": 0.5940987669140199, + "learning_rate": 4.468148661402577e-08, + "loss": 0.2862, + "step": 40198 + }, + { + "epoch": 1.8831217501288238, + "grad_norm": 0.6408823593471281, + "learning_rate": 4.464579823603488e-08, + "loss": 0.3059, + "step": 40199 + }, + { + "epoch": 1.883168595118752, + "grad_norm": 0.5830786599096363, + "learning_rate": 4.4610123987990086e-08, + "loss": 0.2669, + "step": 40200 + }, + { + "epoch": 1.8832154401086805, + "grad_norm": 0.5985662257949145, + "learning_rate": 4.4574463870097063e-08, + "loss": 0.2744, + "step": 40201 + }, + { + "epoch": 1.8832622850986087, + "grad_norm": 0.6059273842427011, + "learning_rate": 4.453881788256065e-08, + "loss": 0.2695, + "step": 40202 + }, + { + "epoch": 1.883309130088537, + "grad_norm": 0.5999543125516668, + "learning_rate": 4.450318602558595e-08, + "loss": 0.2698, + "step": 40203 + }, + { + "epoch": 1.8833559750784654, + "grad_norm": 0.6487907878555009, + "learning_rate": 4.4467568299378646e-08, + "loss": 0.2629, + "step": 40204 + }, + { + "epoch": 1.8834028200683937, + "grad_norm": 0.640005758692839, + "learning_rate": 4.4431964704143014e-08, + "loss": 0.2759, + "step": 40205 + }, + { + "epoch": 1.883449665058322, + "grad_norm": 0.6368704481531156, + "learning_rate": 4.439637524008444e-08, + "loss": 0.2728, + "step": 40206 + }, + { + "epoch": 1.8834965100482504, + "grad_norm": 0.5620528256143139, + "learning_rate": 4.436079990740777e-08, + "loss": 0.2576, + "step": 40207 + }, + { + "epoch": 1.8835433550381788, + "grad_norm": 0.6275393496477006, + "learning_rate": 4.4325238706317274e-08, + "loss": 0.2804, + "step": 40208 + }, + { + "epoch": 1.8835902000281068, + "grad_norm": 0.5712708410404167, + "learning_rate": 4.428969163701752e-08, + "loss": 0.2515, + "step": 40209 + }, + { + "epoch": 1.8836370450180353, + "grad_norm": 0.6475140099366858, + "learning_rate": 4.425415869971361e-08, + "loss": 0.2635, + "step": 40210 + }, + { + "epoch": 1.8836838900079638, + "grad_norm": 0.5926017229357954, + "learning_rate": 4.4218639894609836e-08, + "loss": 0.2616, + "step": 40211 + }, + { + "epoch": 1.883730734997892, + "grad_norm": 0.5891853444705435, + "learning_rate": 4.4183135221910475e-08, + "loss": 0.2677, + "step": 40212 + }, + { + "epoch": 1.8837775799878202, + "grad_norm": 0.6208600927757234, + "learning_rate": 4.414764468182009e-08, + "loss": 0.2598, + "step": 40213 + }, + { + "epoch": 1.8838244249777487, + "grad_norm": 0.5915045435112316, + "learning_rate": 4.411216827454268e-08, + "loss": 0.265, + "step": 40214 + }, + { + "epoch": 1.883871269967677, + "grad_norm": 0.5983072820112888, + "learning_rate": 4.407670600028252e-08, + "loss": 0.274, + "step": 40215 + }, + { + "epoch": 1.8839181149576052, + "grad_norm": 0.6359541497880805, + "learning_rate": 4.404125785924335e-08, + "loss": 0.2778, + "step": 40216 + }, + { + "epoch": 1.8839649599475337, + "grad_norm": 0.5983174965406857, + "learning_rate": 4.400582385162971e-08, + "loss": 0.2609, + "step": 40217 + }, + { + "epoch": 1.884011804937462, + "grad_norm": 0.6298282984135716, + "learning_rate": 4.397040397764507e-08, + "loss": 0.2807, + "step": 40218 + }, + { + "epoch": 1.8840586499273901, + "grad_norm": 0.6032272435491696, + "learning_rate": 4.39349982374937e-08, + "loss": 0.2778, + "step": 40219 + }, + { + "epoch": 1.8841054949173186, + "grad_norm": 0.572072517921984, + "learning_rate": 4.389960663137904e-08, + "loss": 0.2672, + "step": 40220 + }, + { + "epoch": 1.884152339907247, + "grad_norm": 0.5976488895027556, + "learning_rate": 4.386422915950483e-08, + "loss": 0.2763, + "step": 40221 + }, + { + "epoch": 1.884199184897175, + "grad_norm": 0.58280778550312, + "learning_rate": 4.382886582207452e-08, + "loss": 0.2604, + "step": 40222 + }, + { + "epoch": 1.8842460298871035, + "grad_norm": 0.6089217925232456, + "learning_rate": 4.379351661929182e-08, + "loss": 0.2696, + "step": 40223 + }, + { + "epoch": 1.884292874877032, + "grad_norm": 0.5623843344773922, + "learning_rate": 4.375818155136019e-08, + "loss": 0.2548, + "step": 40224 + }, + { + "epoch": 1.8843397198669602, + "grad_norm": 0.6042380363194655, + "learning_rate": 4.37228606184828e-08, + "loss": 0.2711, + "step": 40225 + }, + { + "epoch": 1.8843865648568885, + "grad_norm": 0.5760583541592427, + "learning_rate": 4.368755382086309e-08, + "loss": 0.2613, + "step": 40226 + }, + { + "epoch": 1.884433409846817, + "grad_norm": 0.6041388207846082, + "learning_rate": 4.365226115870397e-08, + "loss": 0.274, + "step": 40227 + }, + { + "epoch": 1.8844802548367452, + "grad_norm": 0.6109037991615882, + "learning_rate": 4.361698263220887e-08, + "loss": 0.2794, + "step": 40228 + }, + { + "epoch": 1.8845270998266734, + "grad_norm": 0.6513219200232592, + "learning_rate": 4.3581718241580416e-08, + "loss": 0.2658, + "step": 40229 + }, + { + "epoch": 1.884573944816602, + "grad_norm": 0.601410112599377, + "learning_rate": 4.3546467987021776e-08, + "loss": 0.2747, + "step": 40230 + }, + { + "epoch": 1.8846207898065301, + "grad_norm": 0.5959415280127137, + "learning_rate": 4.3511231868736124e-08, + "loss": 0.271, + "step": 40231 + }, + { + "epoch": 1.8846676347964584, + "grad_norm": 0.5787588734495955, + "learning_rate": 4.3476009886925795e-08, + "loss": 0.2695, + "step": 40232 + }, + { + "epoch": 1.8847144797863868, + "grad_norm": 0.5838971306677115, + "learning_rate": 4.344080204179341e-08, + "loss": 0.2561, + "step": 40233 + }, + { + "epoch": 1.8847613247763153, + "grad_norm": 0.6350680358199913, + "learning_rate": 4.3405608333542125e-08, + "loss": 0.2825, + "step": 40234 + }, + { + "epoch": 1.8848081697662435, + "grad_norm": 0.5726193571083245, + "learning_rate": 4.337042876237374e-08, + "loss": 0.262, + "step": 40235 + }, + { + "epoch": 1.8848550147561718, + "grad_norm": 0.6125532809239183, + "learning_rate": 4.333526332849114e-08, + "loss": 0.2779, + "step": 40236 + }, + { + "epoch": 1.8849018597461002, + "grad_norm": 0.5875039025384211, + "learning_rate": 4.3300112032096944e-08, + "loss": 0.2781, + "step": 40237 + }, + { + "epoch": 1.8849487047360285, + "grad_norm": 0.5831401971754864, + "learning_rate": 4.3264974873392926e-08, + "loss": 0.2604, + "step": 40238 + }, + { + "epoch": 1.8849955497259567, + "grad_norm": 0.5535605544414935, + "learning_rate": 4.3229851852581436e-08, + "loss": 0.2487, + "step": 40239 + }, + { + "epoch": 1.8850423947158852, + "grad_norm": 0.6157652994003439, + "learning_rate": 4.319474296986481e-08, + "loss": 0.2577, + "step": 40240 + }, + { + "epoch": 1.8850892397058134, + "grad_norm": 0.6304561381580361, + "learning_rate": 4.315964822544483e-08, + "loss": 0.2774, + "step": 40241 + }, + { + "epoch": 1.8851360846957417, + "grad_norm": 0.586284903759854, + "learning_rate": 4.312456761952355e-08, + "loss": 0.26, + "step": 40242 + }, + { + "epoch": 1.8851829296856701, + "grad_norm": 0.5704214202806811, + "learning_rate": 4.3089501152303036e-08, + "loss": 0.2617, + "step": 40243 + }, + { + "epoch": 1.8852297746755986, + "grad_norm": 0.5974455977225603, + "learning_rate": 4.30544488239848e-08, + "loss": 0.2643, + "step": 40244 + }, + { + "epoch": 1.8852766196655266, + "grad_norm": 0.5429020700355328, + "learning_rate": 4.301941063477061e-08, + "loss": 0.2476, + "step": 40245 + }, + { + "epoch": 1.885323464655455, + "grad_norm": 0.6638817516885434, + "learning_rate": 4.298438658486226e-08, + "loss": 0.2828, + "step": 40246 + }, + { + "epoch": 1.8853703096453835, + "grad_norm": 0.6131329734755407, + "learning_rate": 4.294937667446125e-08, + "loss": 0.2753, + "step": 40247 + }, + { + "epoch": 1.8854171546353118, + "grad_norm": 0.6022392763921998, + "learning_rate": 4.291438090376909e-08, + "loss": 0.2828, + "step": 40248 + }, + { + "epoch": 1.88546399962524, + "grad_norm": 0.617203380990746, + "learning_rate": 4.287939927298701e-08, + "loss": 0.2923, + "step": 40249 + }, + { + "epoch": 1.8855108446151685, + "grad_norm": 0.5933173991059078, + "learning_rate": 4.284443178231651e-08, + "loss": 0.2781, + "step": 40250 + }, + { + "epoch": 1.8855576896050967, + "grad_norm": 0.5819105563539039, + "learning_rate": 4.280947843195854e-08, + "loss": 0.2699, + "step": 40251 + }, + { + "epoch": 1.885604534595025, + "grad_norm": 0.5655307607209074, + "learning_rate": 4.277453922211433e-08, + "loss": 0.2576, + "step": 40252 + }, + { + "epoch": 1.8856513795849534, + "grad_norm": 0.5995415244851192, + "learning_rate": 4.273961415298539e-08, + "loss": 0.2606, + "step": 40253 + }, + { + "epoch": 1.8856982245748817, + "grad_norm": 0.592358746427583, + "learning_rate": 4.27047032247721e-08, + "loss": 0.2753, + "step": 40254 + }, + { + "epoch": 1.88574506956481, + "grad_norm": 0.5904452844855785, + "learning_rate": 4.266980643767571e-08, + "loss": 0.2647, + "step": 40255 + }, + { + "epoch": 1.8857919145547384, + "grad_norm": 0.548220845973998, + "learning_rate": 4.2634923791897154e-08, + "loss": 0.2613, + "step": 40256 + }, + { + "epoch": 1.8858387595446668, + "grad_norm": 0.6002384852727, + "learning_rate": 4.260005528763683e-08, + "loss": 0.2793, + "step": 40257 + }, + { + "epoch": 1.8858856045345949, + "grad_norm": 0.6473344304988704, + "learning_rate": 4.256520092509542e-08, + "loss": 0.287, + "step": 40258 + }, + { + "epoch": 1.8859324495245233, + "grad_norm": 0.5613228507172874, + "learning_rate": 4.253036070447358e-08, + "loss": 0.2532, + "step": 40259 + }, + { + "epoch": 1.8859792945144518, + "grad_norm": 0.6472713110322843, + "learning_rate": 4.2495534625972e-08, + "loss": 0.2749, + "step": 40260 + }, + { + "epoch": 1.88602613950438, + "grad_norm": 0.5985732321033989, + "learning_rate": 4.246072268979079e-08, + "loss": 0.2645, + "step": 40261 + }, + { + "epoch": 1.8860729844943083, + "grad_norm": 0.5589828764064674, + "learning_rate": 4.24259248961309e-08, + "loss": 0.2653, + "step": 40262 + }, + { + "epoch": 1.8861198294842367, + "grad_norm": 0.5956980897823214, + "learning_rate": 4.239114124519189e-08, + "loss": 0.2666, + "step": 40263 + }, + { + "epoch": 1.886166674474165, + "grad_norm": 0.5837691936571514, + "learning_rate": 4.235637173717389e-08, + "loss": 0.271, + "step": 40264 + }, + { + "epoch": 1.8862135194640932, + "grad_norm": 0.5746471793236156, + "learning_rate": 4.2321616372277555e-08, + "loss": 0.265, + "step": 40265 + }, + { + "epoch": 1.8862603644540217, + "grad_norm": 0.6318377808088146, + "learning_rate": 4.228687515070246e-08, + "loss": 0.2775, + "step": 40266 + }, + { + "epoch": 1.88630720944395, + "grad_norm": 0.5809127274828967, + "learning_rate": 4.225214807264871e-08, + "loss": 0.2686, + "step": 40267 + }, + { + "epoch": 1.8863540544338782, + "grad_norm": 0.645463990967441, + "learning_rate": 4.221743513831616e-08, + "loss": 0.2886, + "step": 40268 + }, + { + "epoch": 1.8864008994238066, + "grad_norm": 0.6165028217553471, + "learning_rate": 4.2182736347904373e-08, + "loss": 0.2875, + "step": 40269 + }, + { + "epoch": 1.886447744413735, + "grad_norm": 0.5991139791731289, + "learning_rate": 4.214805170161346e-08, + "loss": 0.2815, + "step": 40270 + }, + { + "epoch": 1.8864945894036633, + "grad_norm": 0.5701874340021481, + "learning_rate": 4.21133811996427e-08, + "loss": 0.2627, + "step": 40271 + }, + { + "epoch": 1.8865414343935916, + "grad_norm": 0.618820356117372, + "learning_rate": 4.207872484219139e-08, + "loss": 0.281, + "step": 40272 + }, + { + "epoch": 1.88658827938352, + "grad_norm": 0.6253601235021615, + "learning_rate": 4.204408262945936e-08, + "loss": 0.2969, + "step": 40273 + }, + { + "epoch": 1.8866351243734483, + "grad_norm": 0.6227203996143739, + "learning_rate": 4.200945456164618e-08, + "loss": 0.2673, + "step": 40274 + }, + { + "epoch": 1.8866819693633765, + "grad_norm": 0.6560742960336623, + "learning_rate": 4.1974840638950296e-08, + "loss": 0.2769, + "step": 40275 + }, + { + "epoch": 1.886728814353305, + "grad_norm": 0.6022687412433155, + "learning_rate": 4.194024086157183e-08, + "loss": 0.2698, + "step": 40276 + }, + { + "epoch": 1.8867756593432332, + "grad_norm": 0.6250523006825512, + "learning_rate": 4.190565522970924e-08, + "loss": 0.2746, + "step": 40277 + }, + { + "epoch": 1.8868225043331615, + "grad_norm": 0.6461457627778003, + "learning_rate": 4.18710837435618e-08, + "loss": 0.2965, + "step": 40278 + }, + { + "epoch": 1.88686934932309, + "grad_norm": 0.6107670894677012, + "learning_rate": 4.183652640332852e-08, + "loss": 0.2661, + "step": 40279 + }, + { + "epoch": 1.8869161943130184, + "grad_norm": 0.6223079556128666, + "learning_rate": 4.180198320920814e-08, + "loss": 0.273, + "step": 40280 + }, + { + "epoch": 1.8869630393029464, + "grad_norm": 0.603914583904149, + "learning_rate": 4.176745416139938e-08, + "loss": 0.2846, + "step": 40281 + }, + { + "epoch": 1.8870098842928749, + "grad_norm": 0.5979481643482186, + "learning_rate": 4.1732939260101244e-08, + "loss": 0.2806, + "step": 40282 + }, + { + "epoch": 1.8870567292828033, + "grad_norm": 0.6070683190897754, + "learning_rate": 4.1698438505512196e-08, + "loss": 0.2721, + "step": 40283 + }, + { + "epoch": 1.8871035742727316, + "grad_norm": 0.5983377715547368, + "learning_rate": 4.166395189783068e-08, + "loss": 0.2641, + "step": 40284 + }, + { + "epoch": 1.8871504192626598, + "grad_norm": 0.5691426688325157, + "learning_rate": 4.162947943725515e-08, + "loss": 0.2561, + "step": 40285 + }, + { + "epoch": 1.8871972642525883, + "grad_norm": 0.6055881616891486, + "learning_rate": 4.1595021123984335e-08, + "loss": 0.2751, + "step": 40286 + }, + { + "epoch": 1.8872441092425165, + "grad_norm": 0.627419630741509, + "learning_rate": 4.1560576958216134e-08, + "loss": 0.2741, + "step": 40287 + }, + { + "epoch": 1.8872909542324448, + "grad_norm": 0.6043962259964, + "learning_rate": 4.1526146940148726e-08, + "loss": 0.2874, + "step": 40288 + }, + { + "epoch": 1.8873377992223732, + "grad_norm": 0.5412372151886323, + "learning_rate": 4.1491731069980554e-08, + "loss": 0.2653, + "step": 40289 + }, + { + "epoch": 1.8873846442123015, + "grad_norm": 0.5699878627401409, + "learning_rate": 4.1457329347909804e-08, + "loss": 0.2626, + "step": 40290 + }, + { + "epoch": 1.8874314892022297, + "grad_norm": 0.6180187232792778, + "learning_rate": 4.1422941774134086e-08, + "loss": 0.2708, + "step": 40291 + }, + { + "epoch": 1.8874783341921582, + "grad_norm": 0.6589593983267731, + "learning_rate": 4.13885683488513e-08, + "loss": 0.2762, + "step": 40292 + }, + { + "epoch": 1.8875251791820866, + "grad_norm": 0.6099314851822026, + "learning_rate": 4.1354209072259346e-08, + "loss": 0.2791, + "step": 40293 + }, + { + "epoch": 1.8875720241720146, + "grad_norm": 0.5911105362320422, + "learning_rate": 4.1319863944555836e-08, + "loss": 0.276, + "step": 40294 + }, + { + "epoch": 1.887618869161943, + "grad_norm": 0.5864646577921003, + "learning_rate": 4.128553296593868e-08, + "loss": 0.2725, + "step": 40295 + }, + { + "epoch": 1.8876657141518716, + "grad_norm": 0.5819839137639047, + "learning_rate": 4.125121613660521e-08, + "loss": 0.2596, + "step": 40296 + }, + { + "epoch": 1.8877125591417998, + "grad_norm": 0.6160306384835145, + "learning_rate": 4.1216913456753325e-08, + "loss": 0.2814, + "step": 40297 + }, + { + "epoch": 1.887759404131728, + "grad_norm": 0.6290771070171015, + "learning_rate": 4.1182624926579815e-08, + "loss": 0.2906, + "step": 40298 + }, + { + "epoch": 1.8878062491216565, + "grad_norm": 0.6079832097596122, + "learning_rate": 4.1148350546282304e-08, + "loss": 0.2581, + "step": 40299 + }, + { + "epoch": 1.8878530941115848, + "grad_norm": 0.5951358205083127, + "learning_rate": 4.111409031605812e-08, + "loss": 0.2743, + "step": 40300 + }, + { + "epoch": 1.887899939101513, + "grad_norm": 0.6428683194833741, + "learning_rate": 4.107984423610434e-08, + "loss": 0.2652, + "step": 40301 + }, + { + "epoch": 1.8879467840914415, + "grad_norm": 0.5525831181973202, + "learning_rate": 4.1045612306617746e-08, + "loss": 0.2613, + "step": 40302 + }, + { + "epoch": 1.8879936290813697, + "grad_norm": 0.6263012779559914, + "learning_rate": 4.101139452779595e-08, + "loss": 0.2682, + "step": 40303 + }, + { + "epoch": 1.888040474071298, + "grad_norm": 0.6138508196109363, + "learning_rate": 4.0977190899835484e-08, + "loss": 0.2639, + "step": 40304 + }, + { + "epoch": 1.8880873190612264, + "grad_norm": 0.6011992219588144, + "learning_rate": 4.094300142293339e-08, + "loss": 0.2705, + "step": 40305 + }, + { + "epoch": 1.8881341640511549, + "grad_norm": 0.6652507676978932, + "learning_rate": 4.090882609728591e-08, + "loss": 0.2813, + "step": 40306 + }, + { + "epoch": 1.888181009041083, + "grad_norm": 0.5684527520610536, + "learning_rate": 4.087466492309011e-08, + "loss": 0.2614, + "step": 40307 + }, + { + "epoch": 1.8882278540310113, + "grad_norm": 0.5555399769267504, + "learning_rate": 4.0840517900542774e-08, + "loss": 0.2493, + "step": 40308 + }, + { + "epoch": 1.8882746990209398, + "grad_norm": 0.6016669707745519, + "learning_rate": 4.0806385029839854e-08, + "loss": 0.2743, + "step": 40309 + }, + { + "epoch": 1.888321544010868, + "grad_norm": 0.5893688616752382, + "learning_rate": 4.077226631117842e-08, + "loss": 0.2648, + "step": 40310 + }, + { + "epoch": 1.8883683890007963, + "grad_norm": 0.59800493265255, + "learning_rate": 4.0738161744754426e-08, + "loss": 0.2601, + "step": 40311 + }, + { + "epoch": 1.8884152339907248, + "grad_norm": 0.6085676235878135, + "learning_rate": 4.0704071330764094e-08, + "loss": 0.2775, + "step": 40312 + }, + { + "epoch": 1.888462078980653, + "grad_norm": 0.5950483558716807, + "learning_rate": 4.0669995069403664e-08, + "loss": 0.2892, + "step": 40313 + }, + { + "epoch": 1.8885089239705812, + "grad_norm": 0.5936056022207269, + "learning_rate": 4.063593296086937e-08, + "loss": 0.2626, + "step": 40314 + }, + { + "epoch": 1.8885557689605097, + "grad_norm": 0.5736307067134802, + "learning_rate": 4.060188500535717e-08, + "loss": 0.2599, + "step": 40315 + }, + { + "epoch": 1.8886026139504382, + "grad_norm": 0.5666019803804977, + "learning_rate": 4.056785120306273e-08, + "loss": 0.2634, + "step": 40316 + }, + { + "epoch": 1.8886494589403662, + "grad_norm": 0.597711090602163, + "learning_rate": 4.0533831554182555e-08, + "loss": 0.2734, + "step": 40317 + }, + { + "epoch": 1.8886963039302946, + "grad_norm": 0.5870493151133179, + "learning_rate": 4.0499826058911786e-08, + "loss": 0.2598, + "step": 40318 + }, + { + "epoch": 1.888743148920223, + "grad_norm": 0.5549533779287056, + "learning_rate": 4.046583471744664e-08, + "loss": 0.2518, + "step": 40319 + }, + { + "epoch": 1.8887899939101513, + "grad_norm": 0.5617233995914077, + "learning_rate": 4.043185752998197e-08, + "loss": 0.2665, + "step": 40320 + }, + { + "epoch": 1.8888368389000796, + "grad_norm": 0.5939426131828438, + "learning_rate": 4.0397894496714e-08, + "loss": 0.2558, + "step": 40321 + }, + { + "epoch": 1.888883683890008, + "grad_norm": 0.5829800174455141, + "learning_rate": 4.036394561783785e-08, + "loss": 0.2736, + "step": 40322 + }, + { + "epoch": 1.8889305288799363, + "grad_norm": 0.6119572697465688, + "learning_rate": 4.033001089354921e-08, + "loss": 0.2798, + "step": 40323 + }, + { + "epoch": 1.8889773738698645, + "grad_norm": 0.5961453324238569, + "learning_rate": 4.029609032404291e-08, + "loss": 0.2653, + "step": 40324 + }, + { + "epoch": 1.889024218859793, + "grad_norm": 0.5432496111461272, + "learning_rate": 4.026218390951464e-08, + "loss": 0.2405, + "step": 40325 + }, + { + "epoch": 1.8890710638497212, + "grad_norm": 0.5955489669120012, + "learning_rate": 4.022829165015896e-08, + "loss": 0.2541, + "step": 40326 + }, + { + "epoch": 1.8891179088396495, + "grad_norm": 0.6125755145702741, + "learning_rate": 4.019441354617126e-08, + "loss": 0.2782, + "step": 40327 + }, + { + "epoch": 1.889164753829578, + "grad_norm": 0.5622891639764037, + "learning_rate": 4.016054959774668e-08, + "loss": 0.26, + "step": 40328 + }, + { + "epoch": 1.8892115988195064, + "grad_norm": 0.5810217090834674, + "learning_rate": 4.012669980507977e-08, + "loss": 0.2619, + "step": 40329 + }, + { + "epoch": 1.8892584438094344, + "grad_norm": 0.6013241660177073, + "learning_rate": 4.0092864168365384e-08, + "loss": 0.2747, + "step": 40330 + }, + { + "epoch": 1.8893052887993629, + "grad_norm": 0.6294335331164342, + "learning_rate": 4.0059042687798365e-08, + "loss": 0.2865, + "step": 40331 + }, + { + "epoch": 1.8893521337892913, + "grad_norm": 0.6138042665830638, + "learning_rate": 4.002523536357328e-08, + "loss": 0.2702, + "step": 40332 + }, + { + "epoch": 1.8893989787792196, + "grad_norm": 0.6082054861135616, + "learning_rate": 3.9991442195884686e-08, + "loss": 0.2753, + "step": 40333 + }, + { + "epoch": 1.8894458237691478, + "grad_norm": 0.5781017432341083, + "learning_rate": 3.995766318492689e-08, + "loss": 0.2601, + "step": 40334 + }, + { + "epoch": 1.8894926687590763, + "grad_norm": 0.6251053197226489, + "learning_rate": 3.992389833089472e-08, + "loss": 0.2762, + "step": 40335 + }, + { + "epoch": 1.8895395137490045, + "grad_norm": 0.5718793307720003, + "learning_rate": 3.989014763398191e-08, + "loss": 0.255, + "step": 40336 + }, + { + "epoch": 1.8895863587389328, + "grad_norm": 0.5678439939225505, + "learning_rate": 3.985641109438304e-08, + "loss": 0.2666, + "step": 40337 + }, + { + "epoch": 1.8896332037288612, + "grad_norm": 0.6373330349566904, + "learning_rate": 3.982268871229211e-08, + "loss": 0.2792, + "step": 40338 + }, + { + "epoch": 1.8896800487187895, + "grad_norm": 0.5664366455617353, + "learning_rate": 3.9788980487903696e-08, + "loss": 0.2641, + "step": 40339 + }, + { + "epoch": 1.8897268937087177, + "grad_norm": 0.5931474187700934, + "learning_rate": 3.975528642141097e-08, + "loss": 0.2564, + "step": 40340 + }, + { + "epoch": 1.8897737386986462, + "grad_norm": 0.6008998975763916, + "learning_rate": 3.9721606513008214e-08, + "loss": 0.2788, + "step": 40341 + }, + { + "epoch": 1.8898205836885746, + "grad_norm": 0.6177481134382778, + "learning_rate": 3.9687940762889454e-08, + "loss": 0.2838, + "step": 40342 + }, + { + "epoch": 1.8898674286785029, + "grad_norm": 0.5617723514715379, + "learning_rate": 3.965428917124786e-08, + "loss": 0.2429, + "step": 40343 + }, + { + "epoch": 1.8899142736684311, + "grad_norm": 0.6161675164067651, + "learning_rate": 3.962065173827773e-08, + "loss": 0.2762, + "step": 40344 + }, + { + "epoch": 1.8899611186583596, + "grad_norm": 0.5504435952485653, + "learning_rate": 3.958702846417223e-08, + "loss": 0.2426, + "step": 40345 + }, + { + "epoch": 1.8900079636482878, + "grad_norm": 0.5947027603114893, + "learning_rate": 3.95534193491251e-08, + "loss": 0.2566, + "step": 40346 + }, + { + "epoch": 1.890054808638216, + "grad_norm": 0.6423076428968003, + "learning_rate": 3.95198243933298e-08, + "loss": 0.2908, + "step": 40347 + }, + { + "epoch": 1.8901016536281445, + "grad_norm": 0.6068581080237563, + "learning_rate": 3.948624359697922e-08, + "loss": 0.2639, + "step": 40348 + }, + { + "epoch": 1.8901484986180728, + "grad_norm": 0.6520575199179479, + "learning_rate": 3.945267696026683e-08, + "loss": 0.2817, + "step": 40349 + }, + { + "epoch": 1.890195343608001, + "grad_norm": 0.5602202468228512, + "learning_rate": 3.9419124483385805e-08, + "loss": 0.257, + "step": 40350 + }, + { + "epoch": 1.8902421885979295, + "grad_norm": 0.6233778298908084, + "learning_rate": 3.9385586166529324e-08, + "loss": 0.2808, + "step": 40351 + }, + { + "epoch": 1.890289033587858, + "grad_norm": 0.5644657060841499, + "learning_rate": 3.935206200989056e-08, + "loss": 0.2616, + "step": 40352 + }, + { + "epoch": 1.890335878577786, + "grad_norm": 0.6512245659301484, + "learning_rate": 3.931855201366214e-08, + "loss": 0.2887, + "step": 40353 + }, + { + "epoch": 1.8903827235677144, + "grad_norm": 0.5903228249299588, + "learning_rate": 3.9285056178036964e-08, + "loss": 0.2841, + "step": 40354 + }, + { + "epoch": 1.8904295685576429, + "grad_norm": 0.6152306974876374, + "learning_rate": 3.925157450320794e-08, + "loss": 0.2753, + "step": 40355 + }, + { + "epoch": 1.8904764135475711, + "grad_norm": 0.5388875351033188, + "learning_rate": 3.92181069893674e-08, + "loss": 0.2501, + "step": 40356 + }, + { + "epoch": 1.8905232585374994, + "grad_norm": 0.6213459521054912, + "learning_rate": 3.918465363670798e-08, + "loss": 0.2824, + "step": 40357 + }, + { + "epoch": 1.8905701035274278, + "grad_norm": 0.5690569757532786, + "learning_rate": 3.915121444542258e-08, + "loss": 0.2569, + "step": 40358 + }, + { + "epoch": 1.890616948517356, + "grad_norm": 0.5759715641458328, + "learning_rate": 3.911778941570354e-08, + "loss": 0.2668, + "step": 40359 + }, + { + "epoch": 1.8906637935072843, + "grad_norm": 0.5722125119170957, + "learning_rate": 3.9084378547742944e-08, + "loss": 0.266, + "step": 40360 + }, + { + "epoch": 1.8907106384972128, + "grad_norm": 0.6083636451981531, + "learning_rate": 3.905098184173367e-08, + "loss": 0.2578, + "step": 40361 + }, + { + "epoch": 1.890757483487141, + "grad_norm": 0.6097239015890487, + "learning_rate": 3.901759929786697e-08, + "loss": 0.2848, + "step": 40362 + }, + { + "epoch": 1.8908043284770693, + "grad_norm": 0.5884615663465517, + "learning_rate": 3.898423091633546e-08, + "loss": 0.2707, + "step": 40363 + }, + { + "epoch": 1.8908511734669977, + "grad_norm": 0.6177206658565015, + "learning_rate": 3.895087669733122e-08, + "loss": 0.2821, + "step": 40364 + }, + { + "epoch": 1.8908980184569262, + "grad_norm": 0.5622871740192593, + "learning_rate": 3.89175366410463e-08, + "loss": 0.2594, + "step": 40365 + }, + { + "epoch": 1.8909448634468542, + "grad_norm": 0.5898904423559164, + "learning_rate": 3.888421074767224e-08, + "loss": 0.2856, + "step": 40366 + }, + { + "epoch": 1.8909917084367827, + "grad_norm": 0.5706556960018855, + "learning_rate": 3.88508990174008e-08, + "loss": 0.254, + "step": 40367 + }, + { + "epoch": 1.8910385534267111, + "grad_norm": 0.5736384236580369, + "learning_rate": 3.881760145042435e-08, + "loss": 0.249, + "step": 40368 + }, + { + "epoch": 1.8910853984166394, + "grad_norm": 0.6124634350430472, + "learning_rate": 3.8784318046933554e-08, + "loss": 0.2767, + "step": 40369 + }, + { + "epoch": 1.8911322434065676, + "grad_norm": 0.5865550032909472, + "learning_rate": 3.875104880712049e-08, + "loss": 0.2525, + "step": 40370 + }, + { + "epoch": 1.891179088396496, + "grad_norm": 0.6053376498552681, + "learning_rate": 3.871779373117668e-08, + "loss": 0.2706, + "step": 40371 + }, + { + "epoch": 1.8912259333864243, + "grad_norm": 0.5966941657517731, + "learning_rate": 3.868455281929306e-08, + "loss": 0.2625, + "step": 40372 + }, + { + "epoch": 1.8912727783763525, + "grad_norm": 0.5797577948781261, + "learning_rate": 3.865132607166145e-08, + "loss": 0.271, + "step": 40373 + }, + { + "epoch": 1.891319623366281, + "grad_norm": 0.5863674654666592, + "learning_rate": 3.861811348847277e-08, + "loss": 0.2618, + "step": 40374 + }, + { + "epoch": 1.8913664683562093, + "grad_norm": 0.5880975024854411, + "learning_rate": 3.8584915069918295e-08, + "loss": 0.2603, + "step": 40375 + }, + { + "epoch": 1.8914133133461375, + "grad_norm": 0.6676627661196817, + "learning_rate": 3.855173081618868e-08, + "loss": 0.3041, + "step": 40376 + }, + { + "epoch": 1.891460158336066, + "grad_norm": 0.6193423059741557, + "learning_rate": 3.851856072747545e-08, + "loss": 0.2713, + "step": 40377 + }, + { + "epoch": 1.8915070033259944, + "grad_norm": 0.5957885863698499, + "learning_rate": 3.848540480396928e-08, + "loss": 0.2507, + "step": 40378 + }, + { + "epoch": 1.8915538483159227, + "grad_norm": 0.6079739295169242, + "learning_rate": 3.845226304586058e-08, + "loss": 0.2629, + "step": 40379 + }, + { + "epoch": 1.891600693305851, + "grad_norm": 0.6177446381797697, + "learning_rate": 3.841913545334086e-08, + "loss": 0.2797, + "step": 40380 + }, + { + "epoch": 1.8916475382957794, + "grad_norm": 0.6093253437581266, + "learning_rate": 3.8386022026600247e-08, + "loss": 0.2602, + "step": 40381 + }, + { + "epoch": 1.8916943832857076, + "grad_norm": 0.6232386761648034, + "learning_rate": 3.835292276582914e-08, + "loss": 0.2711, + "step": 40382 + }, + { + "epoch": 1.8917412282756358, + "grad_norm": 0.6116905546215793, + "learning_rate": 3.8319837671218215e-08, + "loss": 0.2631, + "step": 40383 + }, + { + "epoch": 1.8917880732655643, + "grad_norm": 0.6003800702228456, + "learning_rate": 3.828676674295817e-08, + "loss": 0.274, + "step": 40384 + }, + { + "epoch": 1.8918349182554925, + "grad_norm": 0.6049432386297338, + "learning_rate": 3.825370998123884e-08, + "loss": 0.2767, + "step": 40385 + }, + { + "epoch": 1.8918817632454208, + "grad_norm": 0.6337651746287757, + "learning_rate": 3.822066738625063e-08, + "loss": 0.27, + "step": 40386 + }, + { + "epoch": 1.8919286082353493, + "grad_norm": 0.5935671832870868, + "learning_rate": 3.818763895818395e-08, + "loss": 0.2735, + "step": 40387 + }, + { + "epoch": 1.8919754532252777, + "grad_norm": 0.6071022658601368, + "learning_rate": 3.815462469722864e-08, + "loss": 0.273, + "step": 40388 + }, + { + "epoch": 1.8920222982152057, + "grad_norm": 0.6155000946244367, + "learning_rate": 3.812162460357455e-08, + "loss": 0.2631, + "step": 40389 + }, + { + "epoch": 1.8920691432051342, + "grad_norm": 0.6001615120701504, + "learning_rate": 3.808863867741208e-08, + "loss": 0.2594, + "step": 40390 + }, + { + "epoch": 1.8921159881950627, + "grad_norm": 0.5723967411395292, + "learning_rate": 3.8055666918930255e-08, + "loss": 0.2644, + "step": 40391 + }, + { + "epoch": 1.892162833184991, + "grad_norm": 0.597719042376822, + "learning_rate": 3.802270932831947e-08, + "loss": 0.273, + "step": 40392 + }, + { + "epoch": 1.8922096781749191, + "grad_norm": 0.6050013173757057, + "learning_rate": 3.79897659057693e-08, + "loss": 0.2769, + "step": 40393 + }, + { + "epoch": 1.8922565231648476, + "grad_norm": 0.5812071977949202, + "learning_rate": 3.795683665146904e-08, + "loss": 0.2612, + "step": 40394 + }, + { + "epoch": 1.8923033681547758, + "grad_norm": 0.5956982185182372, + "learning_rate": 3.7923921565608534e-08, + "loss": 0.2849, + "step": 40395 + }, + { + "epoch": 1.892350213144704, + "grad_norm": 0.6316014677662394, + "learning_rate": 3.789102064837708e-08, + "loss": 0.2849, + "step": 40396 + }, + { + "epoch": 1.8923970581346325, + "grad_norm": 0.6370527981224171, + "learning_rate": 3.7858133899963957e-08, + "loss": 0.2716, + "step": 40397 + }, + { + "epoch": 1.8924439031245608, + "grad_norm": 0.6056983940928748, + "learning_rate": 3.782526132055819e-08, + "loss": 0.27, + "step": 40398 + }, + { + "epoch": 1.892490748114489, + "grad_norm": 0.6037587309061349, + "learning_rate": 3.7792402910349356e-08, + "loss": 0.2652, + "step": 40399 + }, + { + "epoch": 1.8925375931044175, + "grad_norm": 0.594171338399054, + "learning_rate": 3.7759558669526466e-08, + "loss": 0.2647, + "step": 40400 + }, + { + "epoch": 1.892584438094346, + "grad_norm": 0.6546258484875942, + "learning_rate": 3.7726728598278254e-08, + "loss": 0.2845, + "step": 40401 + }, + { + "epoch": 1.892631283084274, + "grad_norm": 0.6246261582946657, + "learning_rate": 3.769391269679401e-08, + "loss": 0.2892, + "step": 40402 + }, + { + "epoch": 1.8926781280742024, + "grad_norm": 0.6029495019964927, + "learning_rate": 3.766111096526248e-08, + "loss": 0.2668, + "step": 40403 + }, + { + "epoch": 1.892724973064131, + "grad_norm": 0.5733090096547426, + "learning_rate": 3.76283234038724e-08, + "loss": 0.2797, + "step": 40404 + }, + { + "epoch": 1.8927718180540591, + "grad_norm": 0.6100958766909199, + "learning_rate": 3.759555001281223e-08, + "loss": 0.2761, + "step": 40405 + }, + { + "epoch": 1.8928186630439874, + "grad_norm": 0.5942019509458, + "learning_rate": 3.756279079227071e-08, + "loss": 0.2661, + "step": 40406 + }, + { + "epoch": 1.8928655080339158, + "grad_norm": 0.605770950840022, + "learning_rate": 3.753004574243657e-08, + "loss": 0.2751, + "step": 40407 + }, + { + "epoch": 1.892912353023844, + "grad_norm": 0.6476580818196057, + "learning_rate": 3.749731486349828e-08, + "loss": 0.2705, + "step": 40408 + }, + { + "epoch": 1.8929591980137723, + "grad_norm": 0.6093399739636984, + "learning_rate": 3.7464598155643736e-08, + "loss": 0.2656, + "step": 40409 + }, + { + "epoch": 1.8930060430037008, + "grad_norm": 0.599374213591509, + "learning_rate": 3.743189561906169e-08, + "loss": 0.2768, + "step": 40410 + }, + { + "epoch": 1.893052887993629, + "grad_norm": 0.5832208185266539, + "learning_rate": 3.739920725394003e-08, + "loss": 0.2617, + "step": 40411 + }, + { + "epoch": 1.8930997329835573, + "grad_norm": 0.6075459193031452, + "learning_rate": 3.736653306046695e-08, + "loss": 0.2598, + "step": 40412 + }, + { + "epoch": 1.8931465779734857, + "grad_norm": 0.5580512503263682, + "learning_rate": 3.733387303883035e-08, + "loss": 0.2576, + "step": 40413 + }, + { + "epoch": 1.8931934229634142, + "grad_norm": 0.6032305869572588, + "learning_rate": 3.730122718921869e-08, + "loss": 0.2765, + "step": 40414 + }, + { + "epoch": 1.8932402679533424, + "grad_norm": 0.5734041505754289, + "learning_rate": 3.726859551181933e-08, + "loss": 0.2605, + "step": 40415 + }, + { + "epoch": 1.8932871129432707, + "grad_norm": 0.6292938569295998, + "learning_rate": 3.723597800682016e-08, + "loss": 0.2841, + "step": 40416 + }, + { + "epoch": 1.8933339579331991, + "grad_norm": 0.6111068106651929, + "learning_rate": 3.72033746744091e-08, + "loss": 0.254, + "step": 40417 + }, + { + "epoch": 1.8933808029231274, + "grad_norm": 0.632560431034984, + "learning_rate": 3.717078551477349e-08, + "loss": 0.2739, + "step": 40418 + }, + { + "epoch": 1.8934276479130556, + "grad_norm": 0.5927621856040881, + "learning_rate": 3.713821052810096e-08, + "loss": 0.2622, + "step": 40419 + }, + { + "epoch": 1.893474492902984, + "grad_norm": 0.6188661799728498, + "learning_rate": 3.710564971457914e-08, + "loss": 0.2624, + "step": 40420 + }, + { + "epoch": 1.8935213378929123, + "grad_norm": 0.5803760317096844, + "learning_rate": 3.707310307439538e-08, + "loss": 0.2507, + "step": 40421 + }, + { + "epoch": 1.8935681828828406, + "grad_norm": 0.6186371705201255, + "learning_rate": 3.704057060773647e-08, + "loss": 0.2804, + "step": 40422 + }, + { + "epoch": 1.893615027872769, + "grad_norm": 0.5703354563442944, + "learning_rate": 3.7008052314790596e-08, + "loss": 0.2625, + "step": 40423 + }, + { + "epoch": 1.8936618728626975, + "grad_norm": 0.621691883950183, + "learning_rate": 3.6975548195744e-08, + "loss": 0.2793, + "step": 40424 + }, + { + "epoch": 1.8937087178526255, + "grad_norm": 0.611346155156504, + "learning_rate": 3.6943058250784036e-08, + "loss": 0.2903, + "step": 40425 + }, + { + "epoch": 1.893755562842554, + "grad_norm": 0.5850566163358809, + "learning_rate": 3.691058248009777e-08, + "loss": 0.2616, + "step": 40426 + }, + { + "epoch": 1.8938024078324824, + "grad_norm": 0.5937943144946671, + "learning_rate": 3.687812088387199e-08, + "loss": 0.2693, + "step": 40427 + }, + { + "epoch": 1.8938492528224107, + "grad_norm": 0.5696349287937594, + "learning_rate": 3.6845673462293506e-08, + "loss": 0.2628, + "step": 40428 + }, + { + "epoch": 1.893896097812339, + "grad_norm": 0.6048210114171999, + "learning_rate": 3.6813240215549104e-08, + "loss": 0.2759, + "step": 40429 + }, + { + "epoch": 1.8939429428022674, + "grad_norm": 0.5915077038177651, + "learning_rate": 3.678082114382558e-08, + "loss": 0.2521, + "step": 40430 + }, + { + "epoch": 1.8939897877921956, + "grad_norm": 0.5849496878092286, + "learning_rate": 3.674841624730918e-08, + "loss": 0.2695, + "step": 40431 + }, + { + "epoch": 1.8940366327821239, + "grad_norm": 0.6263068516787061, + "learning_rate": 3.6716025526186694e-08, + "loss": 0.2771, + "step": 40432 + }, + { + "epoch": 1.8940834777720523, + "grad_norm": 0.6195009626602951, + "learning_rate": 3.668364898064408e-08, + "loss": 0.2816, + "step": 40433 + }, + { + "epoch": 1.8941303227619806, + "grad_norm": 0.5666469525795406, + "learning_rate": 3.6651286610867867e-08, + "loss": 0.273, + "step": 40434 + }, + { + "epoch": 1.8941771677519088, + "grad_norm": 0.5849426020713621, + "learning_rate": 3.661893841704456e-08, + "loss": 0.2725, + "step": 40435 + }, + { + "epoch": 1.8942240127418373, + "grad_norm": 0.5659429532189867, + "learning_rate": 3.658660439936013e-08, + "loss": 0.2619, + "step": 40436 + }, + { + "epoch": 1.8942708577317657, + "grad_norm": 0.553018060427613, + "learning_rate": 3.655428455800081e-08, + "loss": 0.2588, + "step": 40437 + }, + { + "epoch": 1.8943177027216938, + "grad_norm": 0.5908549852344982, + "learning_rate": 3.652197889315229e-08, + "loss": 0.2571, + "step": 40438 + }, + { + "epoch": 1.8943645477116222, + "grad_norm": 0.5707964055344344, + "learning_rate": 3.6489687405000526e-08, + "loss": 0.2545, + "step": 40439 + }, + { + "epoch": 1.8944113927015507, + "grad_norm": 0.5923222892557564, + "learning_rate": 3.6457410093731215e-08, + "loss": 0.2723, + "step": 40440 + }, + { + "epoch": 1.894458237691479, + "grad_norm": 0.5778456724422667, + "learning_rate": 3.6425146959530586e-08, + "loss": 0.265, + "step": 40441 + }, + { + "epoch": 1.8945050826814072, + "grad_norm": 0.5635931037962618, + "learning_rate": 3.6392898002584054e-08, + "loss": 0.2709, + "step": 40442 + }, + { + "epoch": 1.8945519276713356, + "grad_norm": 0.5752335089428154, + "learning_rate": 3.636066322307702e-08, + "loss": 0.2697, + "step": 40443 + }, + { + "epoch": 1.8945987726612639, + "grad_norm": 0.6008593232915459, + "learning_rate": 3.632844262119545e-08, + "loss": 0.2656, + "step": 40444 + }, + { + "epoch": 1.894645617651192, + "grad_norm": 0.63397710625271, + "learning_rate": 3.629623619712447e-08, + "loss": 0.2851, + "step": 40445 + }, + { + "epoch": 1.8946924626411206, + "grad_norm": 0.576850110251982, + "learning_rate": 3.626404395104921e-08, + "loss": 0.2899, + "step": 40446 + }, + { + "epoch": 1.8947393076310488, + "grad_norm": 0.6376295024445303, + "learning_rate": 3.623186588315508e-08, + "loss": 0.2786, + "step": 40447 + }, + { + "epoch": 1.894786152620977, + "grad_norm": 0.6478532205342769, + "learning_rate": 3.619970199362749e-08, + "loss": 0.266, + "step": 40448 + }, + { + "epoch": 1.8948329976109055, + "grad_norm": 0.5835228504614267, + "learning_rate": 3.616755228265101e-08, + "loss": 0.2656, + "step": 40449 + }, + { + "epoch": 1.894879842600834, + "grad_norm": 0.6019797980082924, + "learning_rate": 3.613541675041132e-08, + "loss": 0.2693, + "step": 40450 + }, + { + "epoch": 1.8949266875907622, + "grad_norm": 0.5966339634777272, + "learning_rate": 3.610329539709329e-08, + "loss": 0.2775, + "step": 40451 + }, + { + "epoch": 1.8949735325806905, + "grad_norm": 0.6287604886476529, + "learning_rate": 3.6071188222881195e-08, + "loss": 0.2585, + "step": 40452 + }, + { + "epoch": 1.895020377570619, + "grad_norm": 0.6380359081465409, + "learning_rate": 3.6039095227960174e-08, + "loss": 0.2801, + "step": 40453 + }, + { + "epoch": 1.8950672225605472, + "grad_norm": 0.6191379117298327, + "learning_rate": 3.6007016412514804e-08, + "loss": 0.2751, + "step": 40454 + }, + { + "epoch": 1.8951140675504754, + "grad_norm": 0.5970032292557679, + "learning_rate": 3.597495177672966e-08, + "loss": 0.2671, + "step": 40455 + }, + { + "epoch": 1.8951609125404039, + "grad_norm": 0.5894250186429267, + "learning_rate": 3.594290132078959e-08, + "loss": 0.2693, + "step": 40456 + }, + { + "epoch": 1.895207757530332, + "grad_norm": 0.6301815858287377, + "learning_rate": 3.591086504487889e-08, + "loss": 0.2732, + "step": 40457 + }, + { + "epoch": 1.8952546025202603, + "grad_norm": 0.6109845140989941, + "learning_rate": 3.587884294918159e-08, + "loss": 0.2908, + "step": 40458 + }, + { + "epoch": 1.8953014475101888, + "grad_norm": 0.590964983708423, + "learning_rate": 3.584683503388226e-08, + "loss": 0.2598, + "step": 40459 + }, + { + "epoch": 1.8953482925001173, + "grad_norm": 0.5526030068352805, + "learning_rate": 3.581484129916518e-08, + "loss": 0.2568, + "step": 40460 + }, + { + "epoch": 1.8953951374900453, + "grad_norm": 0.5129313234445719, + "learning_rate": 3.5782861745214117e-08, + "loss": 0.2403, + "step": 40461 + }, + { + "epoch": 1.8954419824799738, + "grad_norm": 0.6142522424569574, + "learning_rate": 3.575089637221335e-08, + "loss": 0.2772, + "step": 40462 + }, + { + "epoch": 1.8954888274699022, + "grad_norm": 0.5678253900035861, + "learning_rate": 3.571894518034719e-08, + "loss": 0.2615, + "step": 40463 + }, + { + "epoch": 1.8955356724598305, + "grad_norm": 0.5695811655404052, + "learning_rate": 3.568700816979881e-08, + "loss": 0.2547, + "step": 40464 + }, + { + "epoch": 1.8955825174497587, + "grad_norm": 0.5878162741845271, + "learning_rate": 3.5655085340752515e-08, + "loss": 0.2629, + "step": 40465 + }, + { + "epoch": 1.8956293624396872, + "grad_norm": 0.5848584563784696, + "learning_rate": 3.562317669339149e-08, + "loss": 0.2671, + "step": 40466 + }, + { + "epoch": 1.8956762074296154, + "grad_norm": 0.5757145393030564, + "learning_rate": 3.559128222789976e-08, + "loss": 0.2593, + "step": 40467 + }, + { + "epoch": 1.8957230524195436, + "grad_norm": 0.6101198965120694, + "learning_rate": 3.555940194446078e-08, + "loss": 0.27, + "step": 40468 + }, + { + "epoch": 1.895769897409472, + "grad_norm": 0.5814664393245068, + "learning_rate": 3.55275358432583e-08, + "loss": 0.2578, + "step": 40469 + }, + { + "epoch": 1.8958167423994003, + "grad_norm": 0.5983981296120195, + "learning_rate": 3.549568392447522e-08, + "loss": 0.2514, + "step": 40470 + }, + { + "epoch": 1.8958635873893286, + "grad_norm": 0.5947609110834132, + "learning_rate": 3.546384618829502e-08, + "loss": 0.2692, + "step": 40471 + }, + { + "epoch": 1.895910432379257, + "grad_norm": 0.6176962805281249, + "learning_rate": 3.543202263490114e-08, + "loss": 0.2799, + "step": 40472 + }, + { + "epoch": 1.8959572773691855, + "grad_norm": 0.5838018456169529, + "learning_rate": 3.540021326447651e-08, + "loss": 0.2508, + "step": 40473 + }, + { + "epoch": 1.8960041223591135, + "grad_norm": 0.591405867476178, + "learning_rate": 3.536841807720404e-08, + "loss": 0.2586, + "step": 40474 + }, + { + "epoch": 1.896050967349042, + "grad_norm": 0.6098297037033712, + "learning_rate": 3.53366370732669e-08, + "loss": 0.2696, + "step": 40475 + }, + { + "epoch": 1.8960978123389705, + "grad_norm": 0.6287457523318524, + "learning_rate": 3.530487025284801e-08, + "loss": 0.2619, + "step": 40476 + }, + { + "epoch": 1.8961446573288987, + "grad_norm": 0.5635868831638976, + "learning_rate": 3.527311761613e-08, + "loss": 0.2646, + "step": 40477 + }, + { + "epoch": 1.896191502318827, + "grad_norm": 0.5959156742380277, + "learning_rate": 3.5241379163295785e-08, + "loss": 0.2774, + "step": 40478 + }, + { + "epoch": 1.8962383473087554, + "grad_norm": 0.631945506843569, + "learning_rate": 3.520965489452799e-08, + "loss": 0.289, + "step": 40479 + }, + { + "epoch": 1.8962851922986836, + "grad_norm": 0.5597994228096355, + "learning_rate": 3.5177944810008966e-08, + "loss": 0.2651, + "step": 40480 + }, + { + "epoch": 1.8963320372886119, + "grad_norm": 0.573401310913741, + "learning_rate": 3.514624890992163e-08, + "loss": 0.2606, + "step": 40481 + }, + { + "epoch": 1.8963788822785403, + "grad_norm": 0.5972984439327881, + "learning_rate": 3.511456719444778e-08, + "loss": 0.2834, + "step": 40482 + }, + { + "epoch": 1.8964257272684686, + "grad_norm": 0.6198045191155844, + "learning_rate": 3.508289966377032e-08, + "loss": 0.2717, + "step": 40483 + }, + { + "epoch": 1.8964725722583968, + "grad_norm": 0.6003258366122559, + "learning_rate": 3.5051246318071056e-08, + "loss": 0.2719, + "step": 40484 + }, + { + "epoch": 1.8965194172483253, + "grad_norm": 0.6405589341307696, + "learning_rate": 3.5019607157532335e-08, + "loss": 0.2923, + "step": 40485 + }, + { + "epoch": 1.8965662622382538, + "grad_norm": 0.5956598513258891, + "learning_rate": 3.498798218233651e-08, + "loss": 0.2718, + "step": 40486 + }, + { + "epoch": 1.896613107228182, + "grad_norm": 0.5607549945752932, + "learning_rate": 3.495637139266511e-08, + "loss": 0.2561, + "step": 40487 + }, + { + "epoch": 1.8966599522181102, + "grad_norm": 0.5564531628268413, + "learning_rate": 3.492477478870021e-08, + "loss": 0.2705, + "step": 40488 + }, + { + "epoch": 1.8967067972080387, + "grad_norm": 0.6791161508002894, + "learning_rate": 3.489319237062361e-08, + "loss": 0.2973, + "step": 40489 + }, + { + "epoch": 1.896753642197967, + "grad_norm": 0.5511874060702443, + "learning_rate": 3.48616241386171e-08, + "loss": 0.2439, + "step": 40490 + }, + { + "epoch": 1.8968004871878952, + "grad_norm": 0.6025456951930541, + "learning_rate": 3.483007009286249e-08, + "loss": 0.2676, + "step": 40491 + }, + { + "epoch": 1.8968473321778236, + "grad_norm": 0.594358831442709, + "learning_rate": 3.4798530233541014e-08, + "loss": 0.2755, + "step": 40492 + }, + { + "epoch": 1.8968941771677519, + "grad_norm": 0.613684440661602, + "learning_rate": 3.476700456083476e-08, + "loss": 0.2792, + "step": 40493 + }, + { + "epoch": 1.8969410221576801, + "grad_norm": 0.5757983105789904, + "learning_rate": 3.4735493074924684e-08, + "loss": 0.267, + "step": 40494 + }, + { + "epoch": 1.8969878671476086, + "grad_norm": 0.6334466734628889, + "learning_rate": 3.470399577599204e-08, + "loss": 0.2728, + "step": 40495 + }, + { + "epoch": 1.897034712137537, + "grad_norm": 0.5773193162275604, + "learning_rate": 3.467251266421806e-08, + "loss": 0.262, + "step": 40496 + }, + { + "epoch": 1.897081557127465, + "grad_norm": 0.5839260922425357, + "learning_rate": 3.464104373978455e-08, + "loss": 0.2765, + "step": 40497 + }, + { + "epoch": 1.8971284021173935, + "grad_norm": 0.6134951650472504, + "learning_rate": 3.4609589002871926e-08, + "loss": 0.276, + "step": 40498 + }, + { + "epoch": 1.897175247107322, + "grad_norm": 0.6152275725532781, + "learning_rate": 3.4578148453661696e-08, + "loss": 0.2779, + "step": 40499 + }, + { + "epoch": 1.8972220920972502, + "grad_norm": 0.5741814207547361, + "learning_rate": 3.454672209233428e-08, + "loss": 0.2699, + "step": 40500 + }, + { + "epoch": 1.8972689370871785, + "grad_norm": 0.592352890859809, + "learning_rate": 3.4515309919071204e-08, + "loss": 0.2653, + "step": 40501 + }, + { + "epoch": 1.897315782077107, + "grad_norm": 0.604060362967585, + "learning_rate": 3.4483911934052594e-08, + "loss": 0.2757, + "step": 40502 + }, + { + "epoch": 1.8973626270670352, + "grad_norm": 0.5903266365759893, + "learning_rate": 3.445252813745914e-08, + "loss": 0.2586, + "step": 40503 + }, + { + "epoch": 1.8974094720569634, + "grad_norm": 0.6326677454900663, + "learning_rate": 3.442115852947209e-08, + "loss": 0.2975, + "step": 40504 + }, + { + "epoch": 1.8974563170468919, + "grad_norm": 0.5630236699821555, + "learning_rate": 3.438980311027157e-08, + "loss": 0.2599, + "step": 40505 + }, + { + "epoch": 1.8975031620368201, + "grad_norm": 0.5893072266258751, + "learning_rate": 3.435846188003772e-08, + "loss": 0.2591, + "step": 40506 + }, + { + "epoch": 1.8975500070267484, + "grad_norm": 0.5662495324656281, + "learning_rate": 3.4327134838951224e-08, + "loss": 0.2612, + "step": 40507 + }, + { + "epoch": 1.8975968520166768, + "grad_norm": 0.6142130929545951, + "learning_rate": 3.429582198719278e-08, + "loss": 0.2732, + "step": 40508 + }, + { + "epoch": 1.8976436970066053, + "grad_norm": 0.5735999249417734, + "learning_rate": 3.426452332494168e-08, + "loss": 0.2512, + "step": 40509 + }, + { + "epoch": 1.8976905419965333, + "grad_norm": 0.5746186745334212, + "learning_rate": 3.423323885237889e-08, + "loss": 0.2506, + "step": 40510 + }, + { + "epoch": 1.8977373869864618, + "grad_norm": 0.57444923581532, + "learning_rate": 3.420196856968372e-08, + "loss": 0.2548, + "step": 40511 + }, + { + "epoch": 1.8977842319763902, + "grad_norm": 0.568571282505093, + "learning_rate": 3.417071247703685e-08, + "loss": 0.2632, + "step": 40512 + }, + { + "epoch": 1.8978310769663185, + "grad_norm": 0.5958369686141443, + "learning_rate": 3.413947057461731e-08, + "loss": 0.263, + "step": 40513 + }, + { + "epoch": 1.8978779219562467, + "grad_norm": 0.5922510074448625, + "learning_rate": 3.410824286260578e-08, + "loss": 0.2622, + "step": 40514 + }, + { + "epoch": 1.8979247669461752, + "grad_norm": 0.6076650193903238, + "learning_rate": 3.407702934118129e-08, + "loss": 0.2744, + "step": 40515 + }, + { + "epoch": 1.8979716119361034, + "grad_norm": 0.5921493200811712, + "learning_rate": 3.4045830010523694e-08, + "loss": 0.2863, + "step": 40516 + }, + { + "epoch": 1.8980184569260317, + "grad_norm": 0.622855822617305, + "learning_rate": 3.401464487081257e-08, + "loss": 0.2802, + "step": 40517 + }, + { + "epoch": 1.8980653019159601, + "grad_norm": 0.6570794336462636, + "learning_rate": 3.3983473922227506e-08, + "loss": 0.282, + "step": 40518 + }, + { + "epoch": 1.8981121469058884, + "grad_norm": 0.6202595597410355, + "learning_rate": 3.3952317164947514e-08, + "loss": 0.2817, + "step": 40519 + }, + { + "epoch": 1.8981589918958166, + "grad_norm": 0.5797511178570913, + "learning_rate": 3.3921174599152175e-08, + "loss": 0.2598, + "step": 40520 + }, + { + "epoch": 1.898205836885745, + "grad_norm": 0.5692580717008316, + "learning_rate": 3.389004622502079e-08, + "loss": 0.2766, + "step": 40521 + }, + { + "epoch": 1.8982526818756735, + "grad_norm": 0.5887010660377185, + "learning_rate": 3.385893204273211e-08, + "loss": 0.2909, + "step": 40522 + }, + { + "epoch": 1.8982995268656018, + "grad_norm": 0.6381867343215035, + "learning_rate": 3.38278320524657e-08, + "loss": 0.2871, + "step": 40523 + }, + { + "epoch": 1.89834637185553, + "grad_norm": 0.6346603299605174, + "learning_rate": 3.3796746254400324e-08, + "loss": 0.2651, + "step": 40524 + }, + { + "epoch": 1.8983932168454585, + "grad_norm": 0.616112677658672, + "learning_rate": 3.3765674648714445e-08, + "loss": 0.2705, + "step": 40525 + }, + { + "epoch": 1.8984400618353867, + "grad_norm": 0.5605274723067794, + "learning_rate": 3.373461723558763e-08, + "loss": 0.2635, + "step": 40526 + }, + { + "epoch": 1.898486906825315, + "grad_norm": 0.5942983506536559, + "learning_rate": 3.37035740151978e-08, + "loss": 0.2788, + "step": 40527 + }, + { + "epoch": 1.8985337518152434, + "grad_norm": 0.5471275870060774, + "learning_rate": 3.3672544987724544e-08, + "loss": 0.2533, + "step": 40528 + }, + { + "epoch": 1.8985805968051717, + "grad_norm": 0.6123309167728385, + "learning_rate": 3.364153015334548e-08, + "loss": 0.268, + "step": 40529 + }, + { + "epoch": 1.8986274417951, + "grad_norm": 0.598692339359727, + "learning_rate": 3.361052951223992e-08, + "loss": 0.2671, + "step": 40530 + }, + { + "epoch": 1.8986742867850284, + "grad_norm": 0.6341242562182761, + "learning_rate": 3.35795430645855e-08, + "loss": 0.2871, + "step": 40531 + }, + { + "epoch": 1.8987211317749568, + "grad_norm": 0.5527652946589181, + "learning_rate": 3.354857081056095e-08, + "loss": 0.2675, + "step": 40532 + }, + { + "epoch": 1.8987679767648848, + "grad_norm": 0.5659899494399846, + "learning_rate": 3.3517612750344485e-08, + "loss": 0.2654, + "step": 40533 + }, + { + "epoch": 1.8988148217548133, + "grad_norm": 0.5996899961756766, + "learning_rate": 3.3486668884114006e-08, + "loss": 0.2715, + "step": 40534 + }, + { + "epoch": 1.8988616667447418, + "grad_norm": 0.55650183494721, + "learning_rate": 3.345573921204826e-08, + "loss": 0.2492, + "step": 40535 + }, + { + "epoch": 1.89890851173467, + "grad_norm": 0.6138209078333118, + "learning_rate": 3.3424823734324596e-08, + "loss": 0.2705, + "step": 40536 + }, + { + "epoch": 1.8989553567245983, + "grad_norm": 0.5840236862695513, + "learning_rate": 3.339392245112122e-08, + "loss": 0.2501, + "step": 40537 + }, + { + "epoch": 1.8990022017145267, + "grad_norm": 0.5790830031413249, + "learning_rate": 3.336303536261576e-08, + "loss": 0.2664, + "step": 40538 + }, + { + "epoch": 1.899049046704455, + "grad_norm": 0.6276083129148109, + "learning_rate": 3.333216246898585e-08, + "loss": 0.2879, + "step": 40539 + }, + { + "epoch": 1.8990958916943832, + "grad_norm": 0.5955036661673241, + "learning_rate": 3.330130377040969e-08, + "loss": 0.2703, + "step": 40540 + }, + { + "epoch": 1.8991427366843117, + "grad_norm": 0.5944233648718912, + "learning_rate": 3.327045926706435e-08, + "loss": 0.2847, + "step": 40541 + }, + { + "epoch": 1.89918958167424, + "grad_norm": 0.5798681440929592, + "learning_rate": 3.3239628959127745e-08, + "loss": 0.2725, + "step": 40542 + }, + { + "epoch": 1.8992364266641681, + "grad_norm": 0.5925924092217668, + "learning_rate": 3.320881284677696e-08, + "loss": 0.2605, + "step": 40543 + }, + { + "epoch": 1.8992832716540966, + "grad_norm": 0.6106053702381464, + "learning_rate": 3.317801093018935e-08, + "loss": 0.274, + "step": 40544 + }, + { + "epoch": 1.899330116644025, + "grad_norm": 0.5937573192940775, + "learning_rate": 3.314722320954228e-08, + "loss": 0.262, + "step": 40545 + }, + { + "epoch": 1.899376961633953, + "grad_norm": 0.5771559258104123, + "learning_rate": 3.31164496850131e-08, + "loss": 0.2591, + "step": 40546 + }, + { + "epoch": 1.8994238066238815, + "grad_norm": 0.5602456165751384, + "learning_rate": 3.308569035677861e-08, + "loss": 0.2476, + "step": 40547 + }, + { + "epoch": 1.89947065161381, + "grad_norm": 0.5700121624636355, + "learning_rate": 3.30549452250159e-08, + "loss": 0.2727, + "step": 40548 + }, + { + "epoch": 1.8995174966037383, + "grad_norm": 0.5860190486362723, + "learning_rate": 3.302421428990204e-08, + "loss": 0.2645, + "step": 40549 + }, + { + "epoch": 1.8995643415936665, + "grad_norm": 0.5794638184543769, + "learning_rate": 3.299349755161385e-08, + "loss": 0.2631, + "step": 40550 + }, + { + "epoch": 1.899611186583595, + "grad_norm": 0.568321593796018, + "learning_rate": 3.296279501032784e-08, + "loss": 0.2657, + "step": 40551 + }, + { + "epoch": 1.8996580315735232, + "grad_norm": 0.6064396930035496, + "learning_rate": 3.293210666622082e-08, + "loss": 0.2719, + "step": 40552 + }, + { + "epoch": 1.8997048765634514, + "grad_norm": 0.6252944770995515, + "learning_rate": 3.290143251946959e-08, + "loss": 0.2783, + "step": 40553 + }, + { + "epoch": 1.89975172155338, + "grad_norm": 0.5995999057702359, + "learning_rate": 3.2870772570250675e-08, + "loss": 0.2744, + "step": 40554 + }, + { + "epoch": 1.8997985665433081, + "grad_norm": 0.6507032399102647, + "learning_rate": 3.284012681874005e-08, + "loss": 0.2804, + "step": 40555 + }, + { + "epoch": 1.8998454115332364, + "grad_norm": 0.6070705750000148, + "learning_rate": 3.280949526511479e-08, + "loss": 0.2781, + "step": 40556 + }, + { + "epoch": 1.8998922565231648, + "grad_norm": 0.6059365848654165, + "learning_rate": 3.2778877909550585e-08, + "loss": 0.2661, + "step": 40557 + }, + { + "epoch": 1.8999391015130933, + "grad_norm": 0.6025229142647467, + "learning_rate": 3.2748274752223694e-08, + "loss": 0.2599, + "step": 40558 + }, + { + "epoch": 1.8999859465030215, + "grad_norm": 0.6421681165382492, + "learning_rate": 3.2717685793310363e-08, + "loss": 0.2737, + "step": 40559 + }, + { + "epoch": 1.9000327914929498, + "grad_norm": 0.6064912935423797, + "learning_rate": 3.268711103298683e-08, + "loss": 0.2617, + "step": 40560 + }, + { + "epoch": 1.9000796364828783, + "grad_norm": 0.6172497991151394, + "learning_rate": 3.2656550471428794e-08, + "loss": 0.2607, + "step": 40561 + }, + { + "epoch": 1.9001264814728065, + "grad_norm": 0.6341819239229363, + "learning_rate": 3.262600410881195e-08, + "loss": 0.2783, + "step": 40562 + }, + { + "epoch": 1.9001733264627347, + "grad_norm": 0.6614334589828302, + "learning_rate": 3.259547194531254e-08, + "loss": 0.2867, + "step": 40563 + }, + { + "epoch": 1.9002201714526632, + "grad_norm": 0.5898462221902951, + "learning_rate": 3.256495398110598e-08, + "loss": 0.2681, + "step": 40564 + }, + { + "epoch": 1.9002670164425914, + "grad_norm": 0.6380324898485961, + "learning_rate": 3.253445021636797e-08, + "loss": 0.2887, + "step": 40565 + }, + { + "epoch": 1.9003138614325197, + "grad_norm": 0.5805072442763345, + "learning_rate": 3.2503960651273924e-08, + "loss": 0.2764, + "step": 40566 + }, + { + "epoch": 1.9003607064224481, + "grad_norm": 0.6442953115232606, + "learning_rate": 3.247348528599953e-08, + "loss": 0.2861, + "step": 40567 + }, + { + "epoch": 1.9004075514123766, + "grad_norm": 0.6427157484208, + "learning_rate": 3.244302412071965e-08, + "loss": 0.2696, + "step": 40568 + }, + { + "epoch": 1.9004543964023046, + "grad_norm": 0.6038429016361506, + "learning_rate": 3.241257715561025e-08, + "loss": 0.2614, + "step": 40569 + }, + { + "epoch": 1.900501241392233, + "grad_norm": 0.5715266964069481, + "learning_rate": 3.238214439084647e-08, + "loss": 0.2783, + "step": 40570 + }, + { + "epoch": 1.9005480863821616, + "grad_norm": 0.6161829751883378, + "learning_rate": 3.2351725826602896e-08, + "loss": 0.2492, + "step": 40571 + }, + { + "epoch": 1.9005949313720898, + "grad_norm": 0.5659195289854964, + "learning_rate": 3.232132146305522e-08, + "loss": 0.2743, + "step": 40572 + }, + { + "epoch": 1.900641776362018, + "grad_norm": 0.6416235978608833, + "learning_rate": 3.229093130037803e-08, + "loss": 0.2811, + "step": 40573 + }, + { + "epoch": 1.9006886213519465, + "grad_norm": 0.5447650683515701, + "learning_rate": 3.226055533874617e-08, + "loss": 0.264, + "step": 40574 + }, + { + "epoch": 1.9007354663418747, + "grad_norm": 0.6078064898118204, + "learning_rate": 3.223019357833451e-08, + "loss": 0.2764, + "step": 40575 + }, + { + "epoch": 1.900782311331803, + "grad_norm": 0.567274176855584, + "learning_rate": 3.219984601931792e-08, + "loss": 0.2779, + "step": 40576 + }, + { + "epoch": 1.9008291563217314, + "grad_norm": 0.5990921985924028, + "learning_rate": 3.216951266187124e-08, + "loss": 0.2681, + "step": 40577 + }, + { + "epoch": 1.9008760013116597, + "grad_norm": 0.6440744706037167, + "learning_rate": 3.213919350616851e-08, + "loss": 0.2692, + "step": 40578 + }, + { + "epoch": 1.900922846301588, + "grad_norm": 0.5868745563296807, + "learning_rate": 3.210888855238431e-08, + "loss": 0.2701, + "step": 40579 + }, + { + "epoch": 1.9009696912915164, + "grad_norm": 0.6363263741571303, + "learning_rate": 3.2078597800693223e-08, + "loss": 0.2884, + "step": 40580 + }, + { + "epoch": 1.9010165362814448, + "grad_norm": 0.5928872440199964, + "learning_rate": 3.2048321251269554e-08, + "loss": 0.2712, + "step": 40581 + }, + { + "epoch": 1.9010633812713729, + "grad_norm": 0.5602905273994042, + "learning_rate": 3.2018058904287604e-08, + "loss": 0.2605, + "step": 40582 + }, + { + "epoch": 1.9011102262613013, + "grad_norm": 0.5526048002910603, + "learning_rate": 3.198781075992141e-08, + "loss": 0.2513, + "step": 40583 + }, + { + "epoch": 1.9011570712512298, + "grad_norm": 0.5822855565812152, + "learning_rate": 3.195757681834499e-08, + "loss": 0.2762, + "step": 40584 + }, + { + "epoch": 1.901203916241158, + "grad_norm": 0.6321715305715531, + "learning_rate": 3.192735707973266e-08, + "loss": 0.279, + "step": 40585 + }, + { + "epoch": 1.9012507612310863, + "grad_norm": 0.584251412678831, + "learning_rate": 3.1897151544257595e-08, + "loss": 0.2591, + "step": 40586 + }, + { + "epoch": 1.9012976062210147, + "grad_norm": 0.6030729769828233, + "learning_rate": 3.18669602120944e-08, + "loss": 0.2709, + "step": 40587 + }, + { + "epoch": 1.901344451210943, + "grad_norm": 0.5995806146646969, + "learning_rate": 3.183678308341626e-08, + "loss": 0.2727, + "step": 40588 + }, + { + "epoch": 1.9013912962008712, + "grad_norm": 0.5867184113809892, + "learning_rate": 3.180662015839719e-08, + "loss": 0.2622, + "step": 40589 + }, + { + "epoch": 1.9014381411907997, + "grad_norm": 0.5616117344836669, + "learning_rate": 3.177647143721069e-08, + "loss": 0.2639, + "step": 40590 + }, + { + "epoch": 1.901484986180728, + "grad_norm": 0.6179242519128824, + "learning_rate": 3.1746336920030485e-08, + "loss": 0.2878, + "step": 40591 + }, + { + "epoch": 1.9015318311706562, + "grad_norm": 0.6052992283829417, + "learning_rate": 3.1716216607029504e-08, + "loss": 0.2786, + "step": 40592 + }, + { + "epoch": 1.9015786761605846, + "grad_norm": 0.5830429696068474, + "learning_rate": 3.1686110498381215e-08, + "loss": 0.2709, + "step": 40593 + }, + { + "epoch": 1.901625521150513, + "grad_norm": 0.5804460685549956, + "learning_rate": 3.16560185942591e-08, + "loss": 0.2814, + "step": 40594 + }, + { + "epoch": 1.9016723661404413, + "grad_norm": 0.592916407067423, + "learning_rate": 3.162594089483606e-08, + "loss": 0.2722, + "step": 40595 + }, + { + "epoch": 1.9017192111303696, + "grad_norm": 0.5769035452572931, + "learning_rate": 3.159587740028558e-08, + "loss": 0.2631, + "step": 40596 + }, + { + "epoch": 1.901766056120298, + "grad_norm": 0.5798907683484181, + "learning_rate": 3.1565828110780294e-08, + "loss": 0.2728, + "step": 40597 + }, + { + "epoch": 1.9018129011102263, + "grad_norm": 0.5526491472305609, + "learning_rate": 3.153579302649312e-08, + "loss": 0.2516, + "step": 40598 + }, + { + "epoch": 1.9018597461001545, + "grad_norm": 0.5677308021811994, + "learning_rate": 3.1505772147597256e-08, + "loss": 0.2752, + "step": 40599 + }, + { + "epoch": 1.901906591090083, + "grad_norm": 0.5898651843091209, + "learning_rate": 3.147576547426506e-08, + "loss": 0.2618, + "step": 40600 + }, + { + "epoch": 1.9019534360800112, + "grad_norm": 0.5364783288423991, + "learning_rate": 3.144577300666946e-08, + "loss": 0.2525, + "step": 40601 + }, + { + "epoch": 1.9020002810699395, + "grad_norm": 0.5692469729903558, + "learning_rate": 3.1415794744983075e-08, + "loss": 0.2501, + "step": 40602 + }, + { + "epoch": 1.902047126059868, + "grad_norm": 0.5667117037349474, + "learning_rate": 3.1385830689378284e-08, + "loss": 0.253, + "step": 40603 + }, + { + "epoch": 1.9020939710497964, + "grad_norm": 0.5876024371676458, + "learning_rate": 3.135588084002744e-08, + "loss": 0.255, + "step": 40604 + }, + { + "epoch": 1.9021408160397244, + "grad_norm": 0.5920749090973825, + "learning_rate": 3.1325945197103466e-08, + "loss": 0.2573, + "step": 40605 + }, + { + "epoch": 1.9021876610296529, + "grad_norm": 0.603007809217537, + "learning_rate": 3.129602376077762e-08, + "loss": 0.2644, + "step": 40606 + }, + { + "epoch": 1.9022345060195813, + "grad_norm": 0.6023222793622621, + "learning_rate": 3.126611653122308e-08, + "loss": 0.2829, + "step": 40607 + }, + { + "epoch": 1.9022813510095096, + "grad_norm": 0.6058937795650283, + "learning_rate": 3.123622350861111e-08, + "loss": 0.278, + "step": 40608 + }, + { + "epoch": 1.9023281959994378, + "grad_norm": 0.6014572396322015, + "learning_rate": 3.120634469311462e-08, + "loss": 0.2652, + "step": 40609 + }, + { + "epoch": 1.9023750409893663, + "grad_norm": 0.5922115637970489, + "learning_rate": 3.117648008490487e-08, + "loss": 0.2607, + "step": 40610 + }, + { + "epoch": 1.9024218859792945, + "grad_norm": 0.5770824252506104, + "learning_rate": 3.1146629684154214e-08, + "loss": 0.2699, + "step": 40611 + }, + { + "epoch": 1.9024687309692228, + "grad_norm": 0.5719601162187603, + "learning_rate": 3.111679349103392e-08, + "loss": 0.2675, + "step": 40612 + }, + { + "epoch": 1.9025155759591512, + "grad_norm": 0.5741735084894312, + "learning_rate": 3.1086971505716045e-08, + "loss": 0.2605, + "step": 40613 + }, + { + "epoch": 1.9025624209490795, + "grad_norm": 0.5623822405275505, + "learning_rate": 3.105716372837214e-08, + "loss": 0.2401, + "step": 40614 + }, + { + "epoch": 1.9026092659390077, + "grad_norm": 0.6166366552595738, + "learning_rate": 3.102737015917373e-08, + "loss": 0.2724, + "step": 40615 + }, + { + "epoch": 1.9026561109289362, + "grad_norm": 0.6036490348975507, + "learning_rate": 3.099759079829206e-08, + "loss": 0.2706, + "step": 40616 + }, + { + "epoch": 1.9027029559188646, + "grad_norm": 0.5950254310889868, + "learning_rate": 3.0967825645898956e-08, + "loss": 0.2622, + "step": 40617 + }, + { + "epoch": 1.9027498009087926, + "grad_norm": 0.5931945959334805, + "learning_rate": 3.093807470216509e-08, + "loss": 0.2482, + "step": 40618 + }, + { + "epoch": 1.902796645898721, + "grad_norm": 0.6661834211890025, + "learning_rate": 3.090833796726256e-08, + "loss": 0.2943, + "step": 40619 + }, + { + "epoch": 1.9028434908886496, + "grad_norm": 0.6239755447434938, + "learning_rate": 3.087861544136151e-08, + "loss": 0.277, + "step": 40620 + }, + { + "epoch": 1.9028903358785778, + "grad_norm": 0.6487139401267448, + "learning_rate": 3.084890712463373e-08, + "loss": 0.277, + "step": 40621 + }, + { + "epoch": 1.902937180868506, + "grad_norm": 0.577451190378543, + "learning_rate": 3.081921301724966e-08, + "loss": 0.272, + "step": 40622 + }, + { + "epoch": 1.9029840258584345, + "grad_norm": 0.5802145403592244, + "learning_rate": 3.0789533119380545e-08, + "loss": 0.2676, + "step": 40623 + }, + { + "epoch": 1.9030308708483628, + "grad_norm": 0.5840476149309137, + "learning_rate": 3.07598674311968e-08, + "loss": 0.26, + "step": 40624 + }, + { + "epoch": 1.903077715838291, + "grad_norm": 0.6268830771913781, + "learning_rate": 3.073021595286968e-08, + "loss": 0.2793, + "step": 40625 + }, + { + "epoch": 1.9031245608282195, + "grad_norm": 0.5821295551846887, + "learning_rate": 3.07005786845696e-08, + "loss": 0.2624, + "step": 40626 + }, + { + "epoch": 1.9031714058181477, + "grad_norm": 0.5863032712870258, + "learning_rate": 3.067095562646699e-08, + "loss": 0.275, + "step": 40627 + }, + { + "epoch": 1.903218250808076, + "grad_norm": 0.5440496801366418, + "learning_rate": 3.064134677873226e-08, + "loss": 0.2404, + "step": 40628 + }, + { + "epoch": 1.9032650957980044, + "grad_norm": 0.6064106345882099, + "learning_rate": 3.0611752141535834e-08, + "loss": 0.2708, + "step": 40629 + }, + { + "epoch": 1.9033119407879329, + "grad_norm": 0.6010485993966553, + "learning_rate": 3.0582171715048406e-08, + "loss": 0.2636, + "step": 40630 + }, + { + "epoch": 1.903358785777861, + "grad_norm": 0.6161145624837692, + "learning_rate": 3.0552605499439556e-08, + "loss": 0.2746, + "step": 40631 + }, + { + "epoch": 1.9034056307677893, + "grad_norm": 0.5799236722239237, + "learning_rate": 3.0523053494879994e-08, + "loss": 0.2521, + "step": 40632 + }, + { + "epoch": 1.9034524757577178, + "grad_norm": 0.5342771410017237, + "learning_rate": 3.0493515701539854e-08, + "loss": 0.2505, + "step": 40633 + }, + { + "epoch": 1.903499320747646, + "grad_norm": 0.5964180279847522, + "learning_rate": 3.046399211958873e-08, + "loss": 0.2594, + "step": 40634 + }, + { + "epoch": 1.9035461657375743, + "grad_norm": 0.6300016449189028, + "learning_rate": 3.043448274919647e-08, + "loss": 0.2711, + "step": 40635 + }, + { + "epoch": 1.9035930107275028, + "grad_norm": 0.6117545093422395, + "learning_rate": 3.0404987590532954e-08, + "loss": 0.2629, + "step": 40636 + }, + { + "epoch": 1.903639855717431, + "grad_norm": 0.6175077065009447, + "learning_rate": 3.037550664376804e-08, + "loss": 0.2703, + "step": 40637 + }, + { + "epoch": 1.9036867007073592, + "grad_norm": 0.6190256563315999, + "learning_rate": 3.034603990907159e-08, + "loss": 0.2858, + "step": 40638 + }, + { + "epoch": 1.9037335456972877, + "grad_norm": 0.5953417885154719, + "learning_rate": 3.031658738661292e-08, + "loss": 0.2706, + "step": 40639 + }, + { + "epoch": 1.9037803906872162, + "grad_norm": 0.5915708376730521, + "learning_rate": 3.0287149076561326e-08, + "loss": 0.2856, + "step": 40640 + }, + { + "epoch": 1.9038272356771442, + "grad_norm": 0.5207016672186268, + "learning_rate": 3.025772497908669e-08, + "loss": 0.2571, + "step": 40641 + }, + { + "epoch": 1.9038740806670726, + "grad_norm": 0.5953040860142632, + "learning_rate": 3.022831509435803e-08, + "loss": 0.2767, + "step": 40642 + }, + { + "epoch": 1.903920925657001, + "grad_norm": 0.5603666100307382, + "learning_rate": 3.019891942254466e-08, + "loss": 0.2543, + "step": 40643 + }, + { + "epoch": 1.9039677706469293, + "grad_norm": 0.5706047363515458, + "learning_rate": 3.016953796381561e-08, + "loss": 0.2583, + "step": 40644 + }, + { + "epoch": 1.9040146156368576, + "grad_norm": 0.5879813349410252, + "learning_rate": 3.0140170718340464e-08, + "loss": 0.2536, + "step": 40645 + }, + { + "epoch": 1.904061460626786, + "grad_norm": 0.6056168667102182, + "learning_rate": 3.011081768628743e-08, + "loss": 0.2643, + "step": 40646 + }, + { + "epoch": 1.9041083056167143, + "grad_norm": 0.650482119158659, + "learning_rate": 3.008147886782609e-08, + "loss": 0.2924, + "step": 40647 + }, + { + "epoch": 1.9041551506066425, + "grad_norm": 0.641950342766223, + "learning_rate": 3.0052154263125186e-08, + "loss": 0.2811, + "step": 40648 + }, + { + "epoch": 1.904201995596571, + "grad_norm": 0.625295989094081, + "learning_rate": 3.0022843872353214e-08, + "loss": 0.2633, + "step": 40649 + }, + { + "epoch": 1.9042488405864992, + "grad_norm": 0.6055941241326787, + "learning_rate": 2.9993547695678915e-08, + "loss": 0.2649, + "step": 40650 + }, + { + "epoch": 1.9042956855764275, + "grad_norm": 0.5961220298443889, + "learning_rate": 2.996426573327077e-08, + "loss": 0.2679, + "step": 40651 + }, + { + "epoch": 1.904342530566356, + "grad_norm": 0.6354795853710984, + "learning_rate": 2.9934997985297807e-08, + "loss": 0.2917, + "step": 40652 + }, + { + "epoch": 1.9043893755562844, + "grad_norm": 0.5712879710238606, + "learning_rate": 2.9905744451927674e-08, + "loss": 0.2712, + "step": 40653 + }, + { + "epoch": 1.9044362205462124, + "grad_norm": 0.6235533346260929, + "learning_rate": 2.9876505133329684e-08, + "loss": 0.2711, + "step": 40654 + }, + { + "epoch": 1.9044830655361409, + "grad_norm": 0.5977991726236117, + "learning_rate": 2.9847280029671186e-08, + "loss": 0.268, + "step": 40655 + }, + { + "epoch": 1.9045299105260693, + "grad_norm": 0.5841443725696291, + "learning_rate": 2.9818069141120673e-08, + "loss": 0.2604, + "step": 40656 + }, + { + "epoch": 1.9045767555159976, + "grad_norm": 0.5668254821661558, + "learning_rate": 2.978887246784662e-08, + "loss": 0.2542, + "step": 40657 + }, + { + "epoch": 1.9046236005059258, + "grad_norm": 0.6284978794353209, + "learning_rate": 2.975969001001666e-08, + "loss": 0.2834, + "step": 40658 + }, + { + "epoch": 1.9046704454958543, + "grad_norm": 0.6388070264010249, + "learning_rate": 2.9730521767798725e-08, + "loss": 0.2643, + "step": 40659 + }, + { + "epoch": 1.9047172904857825, + "grad_norm": 0.928855588375785, + "learning_rate": 2.970136774136073e-08, + "loss": 0.2674, + "step": 40660 + }, + { + "epoch": 1.9047641354757108, + "grad_norm": 0.5823405398657913, + "learning_rate": 2.96722279308706e-08, + "loss": 0.2615, + "step": 40661 + }, + { + "epoch": 1.9048109804656392, + "grad_norm": 0.6312647397551273, + "learning_rate": 2.96431023364957e-08, + "loss": 0.256, + "step": 40662 + }, + { + "epoch": 1.9048578254555675, + "grad_norm": 0.5380129422287306, + "learning_rate": 2.961399095840395e-08, + "loss": 0.2496, + "step": 40663 + }, + { + "epoch": 1.9049046704454957, + "grad_norm": 0.5846924996013393, + "learning_rate": 2.9584893796762993e-08, + "loss": 0.2681, + "step": 40664 + }, + { + "epoch": 1.9049515154354242, + "grad_norm": 0.6052579750117119, + "learning_rate": 2.955581085173992e-08, + "loss": 0.2799, + "step": 40665 + }, + { + "epoch": 1.9049983604253526, + "grad_norm": 0.6139795408272342, + "learning_rate": 2.9526742123502093e-08, + "loss": 0.2756, + "step": 40666 + }, + { + "epoch": 1.9050452054152809, + "grad_norm": 0.6056075097293439, + "learning_rate": 2.9497687612216885e-08, + "loss": 0.2902, + "step": 40667 + }, + { + "epoch": 1.9050920504052091, + "grad_norm": 0.6103281565217247, + "learning_rate": 2.9468647318051936e-08, + "loss": 0.2869, + "step": 40668 + }, + { + "epoch": 1.9051388953951376, + "grad_norm": 0.571238057889081, + "learning_rate": 2.94396212411735e-08, + "loss": 0.2522, + "step": 40669 + }, + { + "epoch": 1.9051857403850658, + "grad_norm": 0.5463517325039071, + "learning_rate": 2.94106093817495e-08, + "loss": 0.2369, + "step": 40670 + }, + { + "epoch": 1.905232585374994, + "grad_norm": 0.5861258014935451, + "learning_rate": 2.938161173994619e-08, + "loss": 0.2625, + "step": 40671 + }, + { + "epoch": 1.9052794303649225, + "grad_norm": 0.6354240094895324, + "learning_rate": 2.9352628315930943e-08, + "loss": 0.2876, + "step": 40672 + }, + { + "epoch": 1.9053262753548508, + "grad_norm": 0.604410405335194, + "learning_rate": 2.9323659109870284e-08, + "loss": 0.2876, + "step": 40673 + }, + { + "epoch": 1.905373120344779, + "grad_norm": 0.6327116882004191, + "learning_rate": 2.929470412193075e-08, + "loss": 0.2848, + "step": 40674 + }, + { + "epoch": 1.9054199653347075, + "grad_norm": 0.6322325667886988, + "learning_rate": 2.926576335227971e-08, + "loss": 0.2903, + "step": 40675 + }, + { + "epoch": 1.905466810324636, + "grad_norm": 0.6061258453633063, + "learning_rate": 2.923683680108286e-08, + "loss": 0.2712, + "step": 40676 + }, + { + "epoch": 1.905513655314564, + "grad_norm": 0.5889140406338029, + "learning_rate": 2.920792446850701e-08, + "loss": 0.266, + "step": 40677 + }, + { + "epoch": 1.9055605003044924, + "grad_norm": 0.5858680609076233, + "learning_rate": 2.91790263547187e-08, + "loss": 0.2709, + "step": 40678 + }, + { + "epoch": 1.9056073452944209, + "grad_norm": 0.5878546457073087, + "learning_rate": 2.91501424598839e-08, + "loss": 0.2553, + "step": 40679 + }, + { + "epoch": 1.9056541902843491, + "grad_norm": 0.6336505296445473, + "learning_rate": 2.9121272784168876e-08, + "loss": 0.2858, + "step": 40680 + }, + { + "epoch": 1.9057010352742774, + "grad_norm": 0.5568217908688454, + "learning_rate": 2.9092417327740153e-08, + "loss": 0.2531, + "step": 40681 + }, + { + "epoch": 1.9057478802642058, + "grad_norm": 0.6101868715645042, + "learning_rate": 2.9063576090763434e-08, + "loss": 0.2746, + "step": 40682 + }, + { + "epoch": 1.905794725254134, + "grad_norm": 0.5909798311533516, + "learning_rate": 2.9034749073404978e-08, + "loss": 0.2628, + "step": 40683 + }, + { + "epoch": 1.9058415702440623, + "grad_norm": 0.5887409813546467, + "learning_rate": 2.900593627583048e-08, + "loss": 0.2793, + "step": 40684 + }, + { + "epoch": 1.9058884152339908, + "grad_norm": 0.5837977989688271, + "learning_rate": 2.897713769820537e-08, + "loss": 0.2605, + "step": 40685 + }, + { + "epoch": 1.905935260223919, + "grad_norm": 0.6239026249142479, + "learning_rate": 2.89483533406959e-08, + "loss": 0.2895, + "step": 40686 + }, + { + "epoch": 1.9059821052138473, + "grad_norm": 0.6185981483255761, + "learning_rate": 2.8919583203467772e-08, + "loss": 0.2685, + "step": 40687 + }, + { + "epoch": 1.9060289502037757, + "grad_norm": 0.5939649661903987, + "learning_rate": 2.8890827286686406e-08, + "loss": 0.2584, + "step": 40688 + }, + { + "epoch": 1.9060757951937042, + "grad_norm": 0.6103293230707773, + "learning_rate": 2.886208559051695e-08, + "loss": 0.2711, + "step": 40689 + }, + { + "epoch": 1.9061226401836322, + "grad_norm": 0.6102497381545334, + "learning_rate": 2.883335811512539e-08, + "loss": 0.2658, + "step": 40690 + }, + { + "epoch": 1.9061694851735607, + "grad_norm": 0.6395116949105492, + "learning_rate": 2.8804644860676578e-08, + "loss": 0.2838, + "step": 40691 + }, + { + "epoch": 1.9062163301634891, + "grad_norm": 0.591720292401607, + "learning_rate": 2.877594582733595e-08, + "loss": 0.2797, + "step": 40692 + }, + { + "epoch": 1.9062631751534174, + "grad_norm": 0.6218024067295547, + "learning_rate": 2.8747261015268645e-08, + "loss": 0.2714, + "step": 40693 + }, + { + "epoch": 1.9063100201433456, + "grad_norm": 0.604732990501903, + "learning_rate": 2.8718590424639814e-08, + "loss": 0.275, + "step": 40694 + }, + { + "epoch": 1.906356865133274, + "grad_norm": 0.5650242054369434, + "learning_rate": 2.868993405561432e-08, + "loss": 0.2592, + "step": 40695 + }, + { + "epoch": 1.9064037101232023, + "grad_norm": 0.5850300569023048, + "learning_rate": 2.8661291908357035e-08, + "loss": 0.2644, + "step": 40696 + }, + { + "epoch": 1.9064505551131306, + "grad_norm": 0.5866309508988515, + "learning_rate": 2.8632663983032826e-08, + "loss": 0.2689, + "step": 40697 + }, + { + "epoch": 1.906497400103059, + "grad_norm": 0.6386050169454741, + "learning_rate": 2.860405027980656e-08, + "loss": 0.2881, + "step": 40698 + }, + { + "epoch": 1.9065442450929873, + "grad_norm": 0.6557578313671321, + "learning_rate": 2.857545079884283e-08, + "loss": 0.2814, + "step": 40699 + }, + { + "epoch": 1.9065910900829155, + "grad_norm": 0.6117528753146027, + "learning_rate": 2.8546865540306224e-08, + "loss": 0.2735, + "step": 40700 + }, + { + "epoch": 1.906637935072844, + "grad_norm": 0.5610656777368338, + "learning_rate": 2.8518294504361056e-08, + "loss": 0.2645, + "step": 40701 + }, + { + "epoch": 1.9066847800627724, + "grad_norm": 0.5776834391341666, + "learning_rate": 2.8489737691172193e-08, + "loss": 0.2683, + "step": 40702 + }, + { + "epoch": 1.9067316250527007, + "grad_norm": 0.5849877882206418, + "learning_rate": 2.8461195100903672e-08, + "loss": 0.2731, + "step": 40703 + }, + { + "epoch": 1.906778470042629, + "grad_norm": 0.5812626312627491, + "learning_rate": 2.8432666733719527e-08, + "loss": 0.255, + "step": 40704 + }, + { + "epoch": 1.9068253150325574, + "grad_norm": 0.6460672285079506, + "learning_rate": 2.840415258978435e-08, + "loss": 0.2804, + "step": 40705 + }, + { + "epoch": 1.9068721600224856, + "grad_norm": 0.564004511669994, + "learning_rate": 2.8375652669262176e-08, + "loss": 0.2585, + "step": 40706 + }, + { + "epoch": 1.9069190050124138, + "grad_norm": 0.5971322006163157, + "learning_rate": 2.8347166972316766e-08, + "loss": 0.2606, + "step": 40707 + }, + { + "epoch": 1.9069658500023423, + "grad_norm": 0.5896072867461368, + "learning_rate": 2.8318695499112148e-08, + "loss": 0.2794, + "step": 40708 + }, + { + "epoch": 1.9070126949922706, + "grad_norm": 0.5661137091879053, + "learning_rate": 2.8290238249812086e-08, + "loss": 0.2593, + "step": 40709 + }, + { + "epoch": 1.9070595399821988, + "grad_norm": 0.6002261764441882, + "learning_rate": 2.826179522458089e-08, + "loss": 0.2681, + "step": 40710 + }, + { + "epoch": 1.9071063849721273, + "grad_norm": 0.5356626177112579, + "learning_rate": 2.8233366423581486e-08, + "loss": 0.2551, + "step": 40711 + }, + { + "epoch": 1.9071532299620557, + "grad_norm": 0.5640372471326465, + "learning_rate": 2.8204951846977914e-08, + "loss": 0.2524, + "step": 40712 + }, + { + "epoch": 1.9072000749519837, + "grad_norm": 0.5750312782891351, + "learning_rate": 2.817655149493337e-08, + "loss": 0.2575, + "step": 40713 + }, + { + "epoch": 1.9072469199419122, + "grad_norm": 0.6151723815155705, + "learning_rate": 2.8148165367611612e-08, + "loss": 0.2752, + "step": 40714 + }, + { + "epoch": 1.9072937649318407, + "grad_norm": 0.6165058826142644, + "learning_rate": 2.811979346517585e-08, + "loss": 0.2792, + "step": 40715 + }, + { + "epoch": 1.907340609921769, + "grad_norm": 0.6321378455979938, + "learning_rate": 2.8091435787789557e-08, + "loss": 0.272, + "step": 40716 + }, + { + "epoch": 1.9073874549116971, + "grad_norm": 0.5969458883335995, + "learning_rate": 2.806309233561566e-08, + "loss": 0.2511, + "step": 40717 + }, + { + "epoch": 1.9074342999016256, + "grad_norm": 0.6119409142183244, + "learning_rate": 2.8034763108817363e-08, + "loss": 0.2801, + "step": 40718 + }, + { + "epoch": 1.9074811448915538, + "grad_norm": 0.6071998017270933, + "learning_rate": 2.800644810755787e-08, + "loss": 0.2621, + "step": 40719 + }, + { + "epoch": 1.907527989881482, + "grad_norm": 0.5817695893246353, + "learning_rate": 2.797814733199983e-08, + "loss": 0.2693, + "step": 40720 + }, + { + "epoch": 1.9075748348714106, + "grad_norm": 0.5798070913870371, + "learning_rate": 2.794986078230616e-08, + "loss": 0.2499, + "step": 40721 + }, + { + "epoch": 1.9076216798613388, + "grad_norm": 0.5730928876122041, + "learning_rate": 2.792158845863979e-08, + "loss": 0.2573, + "step": 40722 + }, + { + "epoch": 1.907668524851267, + "grad_norm": 0.6010946845263879, + "learning_rate": 2.7893330361163096e-08, + "loss": 0.2714, + "step": 40723 + }, + { + "epoch": 1.9077153698411955, + "grad_norm": 0.5701326906686955, + "learning_rate": 2.7865086490039274e-08, + "loss": 0.2586, + "step": 40724 + }, + { + "epoch": 1.907762214831124, + "grad_norm": 0.6059537287653325, + "learning_rate": 2.7836856845430692e-08, + "loss": 0.2615, + "step": 40725 + }, + { + "epoch": 1.907809059821052, + "grad_norm": 0.567589535028514, + "learning_rate": 2.7808641427499172e-08, + "loss": 0.2577, + "step": 40726 + }, + { + "epoch": 1.9078559048109804, + "grad_norm": 0.5894388203408438, + "learning_rate": 2.7780440236407914e-08, + "loss": 0.2669, + "step": 40727 + }, + { + "epoch": 1.907902749800909, + "grad_norm": 0.6025465649938397, + "learning_rate": 2.7752253272318453e-08, + "loss": 0.2754, + "step": 40728 + }, + { + "epoch": 1.9079495947908371, + "grad_norm": 0.5878485862395396, + "learning_rate": 2.7724080535393717e-08, + "loss": 0.2621, + "step": 40729 + }, + { + "epoch": 1.9079964397807654, + "grad_norm": 0.5481367508762373, + "learning_rate": 2.769592202579552e-08, + "loss": 0.2648, + "step": 40730 + }, + { + "epoch": 1.9080432847706938, + "grad_norm": 0.6106279511439628, + "learning_rate": 2.7667777743685953e-08, + "loss": 0.2692, + "step": 40731 + }, + { + "epoch": 1.908090129760622, + "grad_norm": 0.6530178698098531, + "learning_rate": 2.7639647689226833e-08, + "loss": 0.2796, + "step": 40732 + }, + { + "epoch": 1.9081369747505503, + "grad_norm": 0.61141496896607, + "learning_rate": 2.7611531862579978e-08, + "loss": 0.2794, + "step": 40733 + }, + { + "epoch": 1.9081838197404788, + "grad_norm": 0.6005361112517992, + "learning_rate": 2.7583430263907472e-08, + "loss": 0.2838, + "step": 40734 + }, + { + "epoch": 1.908230664730407, + "grad_norm": 0.5685689106989488, + "learning_rate": 2.755534289337086e-08, + "loss": 0.2658, + "step": 40735 + }, + { + "epoch": 1.9082775097203353, + "grad_norm": 0.5883530060855064, + "learning_rate": 2.7527269751131958e-08, + "loss": 0.2563, + "step": 40736 + }, + { + "epoch": 1.9083243547102637, + "grad_norm": 0.573030307456287, + "learning_rate": 2.749921083735202e-08, + "loss": 0.2725, + "step": 40737 + }, + { + "epoch": 1.9083711997001922, + "grad_norm": 0.6154088678421895, + "learning_rate": 2.7471166152192863e-08, + "loss": 0.2809, + "step": 40738 + }, + { + "epoch": 1.9084180446901204, + "grad_norm": 0.6270679377583713, + "learning_rate": 2.7443135695815748e-08, + "loss": 0.2928, + "step": 40739 + }, + { + "epoch": 1.9084648896800487, + "grad_norm": 0.5935225851532495, + "learning_rate": 2.7415119468381657e-08, + "loss": 0.2634, + "step": 40740 + }, + { + "epoch": 1.9085117346699771, + "grad_norm": 0.5921478297729064, + "learning_rate": 2.7387117470052126e-08, + "loss": 0.2825, + "step": 40741 + }, + { + "epoch": 1.9085585796599054, + "grad_norm": 0.5805022530344868, + "learning_rate": 2.7359129700988418e-08, + "loss": 0.2705, + "step": 40742 + }, + { + "epoch": 1.9086054246498336, + "grad_norm": 0.5753323853614715, + "learning_rate": 2.7331156161351513e-08, + "loss": 0.2716, + "step": 40743 + }, + { + "epoch": 1.908652269639762, + "grad_norm": 0.6605928672101556, + "learning_rate": 2.7303196851302115e-08, + "loss": 0.2836, + "step": 40744 + }, + { + "epoch": 1.9086991146296903, + "grad_norm": 0.5813706883470264, + "learning_rate": 2.7275251771001488e-08, + "loss": 0.2533, + "step": 40745 + }, + { + "epoch": 1.9087459596196186, + "grad_norm": 0.6046307155862222, + "learning_rate": 2.7247320920610056e-08, + "loss": 0.2633, + "step": 40746 + }, + { + "epoch": 1.908792804609547, + "grad_norm": 0.5551545240652889, + "learning_rate": 2.7219404300289075e-08, + "loss": 0.2586, + "step": 40747 + }, + { + "epoch": 1.9088396495994755, + "grad_norm": 0.555700406069411, + "learning_rate": 2.7191501910198705e-08, + "loss": 0.2614, + "step": 40748 + }, + { + "epoch": 1.9088864945894035, + "grad_norm": 0.6088501464270789, + "learning_rate": 2.7163613750499918e-08, + "loss": 0.2755, + "step": 40749 + }, + { + "epoch": 1.908933339579332, + "grad_norm": 0.5907264382192821, + "learning_rate": 2.713573982135287e-08, + "loss": 0.2563, + "step": 40750 + }, + { + "epoch": 1.9089801845692604, + "grad_norm": 0.6270946585824931, + "learning_rate": 2.7107880122917985e-08, + "loss": 0.2844, + "step": 40751 + }, + { + "epoch": 1.9090270295591887, + "grad_norm": 0.6059305507332442, + "learning_rate": 2.7080034655355968e-08, + "loss": 0.2785, + "step": 40752 + }, + { + "epoch": 1.909073874549117, + "grad_norm": 0.5994658816995503, + "learning_rate": 2.7052203418826695e-08, + "loss": 0.2618, + "step": 40753 + }, + { + "epoch": 1.9091207195390454, + "grad_norm": 0.606255550527834, + "learning_rate": 2.7024386413490312e-08, + "loss": 0.2857, + "step": 40754 + }, + { + "epoch": 1.9091675645289736, + "grad_norm": 0.5731058634115723, + "learning_rate": 2.6996583639507244e-08, + "loss": 0.2503, + "step": 40755 + }, + { + "epoch": 1.9092144095189019, + "grad_norm": 0.5733386153095397, + "learning_rate": 2.696879509703737e-08, + "loss": 0.2503, + "step": 40756 + }, + { + "epoch": 1.9092612545088303, + "grad_norm": 0.6584474462784765, + "learning_rate": 2.694102078624028e-08, + "loss": 0.2774, + "step": 40757 + }, + { + "epoch": 1.9093080994987586, + "grad_norm": 0.6228579098711551, + "learning_rate": 2.6913260707275847e-08, + "loss": 0.2801, + "step": 40758 + }, + { + "epoch": 1.9093549444886868, + "grad_norm": 0.580592500713264, + "learning_rate": 2.6885514860304495e-08, + "loss": 0.2664, + "step": 40759 + }, + { + "epoch": 1.9094017894786153, + "grad_norm": 0.569934281891308, + "learning_rate": 2.6857783245484993e-08, + "loss": 0.2558, + "step": 40760 + }, + { + "epoch": 1.9094486344685437, + "grad_norm": 0.5549817425535555, + "learning_rate": 2.683006586297776e-08, + "loss": 0.2482, + "step": 40761 + }, + { + "epoch": 1.9094954794584718, + "grad_norm": 0.5485076416408728, + "learning_rate": 2.6802362712941564e-08, + "loss": 0.2503, + "step": 40762 + }, + { + "epoch": 1.9095423244484002, + "grad_norm": 0.5830476006722238, + "learning_rate": 2.6774673795536277e-08, + "loss": 0.259, + "step": 40763 + }, + { + "epoch": 1.9095891694383287, + "grad_norm": 0.6103162639928952, + "learning_rate": 2.6746999110920934e-08, + "loss": 0.2674, + "step": 40764 + }, + { + "epoch": 1.909636014428257, + "grad_norm": 0.5849019591839945, + "learning_rate": 2.6719338659255134e-08, + "loss": 0.2704, + "step": 40765 + }, + { + "epoch": 1.9096828594181852, + "grad_norm": 0.5969507880963315, + "learning_rate": 2.6691692440697914e-08, + "loss": 0.2797, + "step": 40766 + }, + { + "epoch": 1.9097297044081136, + "grad_norm": 0.6551063637067668, + "learning_rate": 2.666406045540859e-08, + "loss": 0.2946, + "step": 40767 + }, + { + "epoch": 1.9097765493980419, + "grad_norm": 0.6711721651142346, + "learning_rate": 2.6636442703545652e-08, + "loss": 0.2858, + "step": 40768 + }, + { + "epoch": 1.90982339438797, + "grad_norm": 0.5983839190419138, + "learning_rate": 2.660883918526813e-08, + "loss": 0.2809, + "step": 40769 + }, + { + "epoch": 1.9098702393778986, + "grad_norm": 0.5649853759473185, + "learning_rate": 2.658124990073535e-08, + "loss": 0.2669, + "step": 40770 + }, + { + "epoch": 1.9099170843678268, + "grad_norm": 0.6732402461350419, + "learning_rate": 2.6553674850105794e-08, + "loss": 0.2777, + "step": 40771 + }, + { + "epoch": 1.909963929357755, + "grad_norm": 0.5848393775917243, + "learning_rate": 2.652611403353794e-08, + "loss": 0.2691, + "step": 40772 + }, + { + "epoch": 1.9100107743476835, + "grad_norm": 0.5993638484391304, + "learning_rate": 2.6498567451191113e-08, + "loss": 0.2576, + "step": 40773 + }, + { + "epoch": 1.910057619337612, + "grad_norm": 0.5744457170348704, + "learning_rate": 2.647103510322324e-08, + "loss": 0.2621, + "step": 40774 + }, + { + "epoch": 1.9101044643275402, + "grad_norm": 0.6024471024164276, + "learning_rate": 2.644351698979253e-08, + "loss": 0.2761, + "step": 40775 + }, + { + "epoch": 1.9101513093174685, + "grad_norm": 0.6294098008295718, + "learning_rate": 2.6416013111057736e-08, + "loss": 0.2937, + "step": 40776 + }, + { + "epoch": 1.910198154307397, + "grad_norm": 0.6143814272259998, + "learning_rate": 2.6388523467177074e-08, + "loss": 0.272, + "step": 40777 + }, + { + "epoch": 1.9102449992973252, + "grad_norm": 0.6454038231749791, + "learning_rate": 2.6361048058308748e-08, + "loss": 0.2779, + "step": 40778 + }, + { + "epoch": 1.9102918442872534, + "grad_norm": 0.6334268640306171, + "learning_rate": 2.633358688461124e-08, + "loss": 0.2916, + "step": 40779 + }, + { + "epoch": 1.9103386892771819, + "grad_norm": 0.6374609231379029, + "learning_rate": 2.630613994624165e-08, + "loss": 0.2804, + "step": 40780 + }, + { + "epoch": 1.91038553426711, + "grad_norm": 0.6590352950784666, + "learning_rate": 2.6278707243359014e-08, + "loss": 0.2828, + "step": 40781 + }, + { + "epoch": 1.9104323792570383, + "grad_norm": 0.6107939495383049, + "learning_rate": 2.6251288776120153e-08, + "loss": 0.2869, + "step": 40782 + }, + { + "epoch": 1.9104792242469668, + "grad_norm": 0.6459923584632811, + "learning_rate": 2.6223884544683832e-08, + "loss": 0.2727, + "step": 40783 + }, + { + "epoch": 1.9105260692368953, + "grad_norm": 0.5966873267472671, + "learning_rate": 2.6196494549206863e-08, + "loss": 0.2805, + "step": 40784 + }, + { + "epoch": 1.9105729142268233, + "grad_norm": 0.601473826663684, + "learning_rate": 2.616911878984746e-08, + "loss": 0.2605, + "step": 40785 + }, + { + "epoch": 1.9106197592167518, + "grad_norm": 0.6007427092644357, + "learning_rate": 2.6141757266763268e-08, + "loss": 0.278, + "step": 40786 + }, + { + "epoch": 1.9106666042066802, + "grad_norm": 0.6445395074294287, + "learning_rate": 2.611440998011111e-08, + "loss": 0.2747, + "step": 40787 + }, + { + "epoch": 1.9107134491966085, + "grad_norm": 0.6029952226269726, + "learning_rate": 2.6087076930049192e-08, + "loss": 0.2735, + "step": 40788 + }, + { + "epoch": 1.9107602941865367, + "grad_norm": 0.6125992224719055, + "learning_rate": 2.6059758116734056e-08, + "loss": 0.2635, + "step": 40789 + }, + { + "epoch": 1.9108071391764652, + "grad_norm": 0.617135444427265, + "learning_rate": 2.6032453540323078e-08, + "loss": 0.2812, + "step": 40790 + }, + { + "epoch": 1.9108539841663934, + "grad_norm": 0.5757423295396046, + "learning_rate": 2.6005163200973627e-08, + "loss": 0.2658, + "step": 40791 + }, + { + "epoch": 1.9109008291563216, + "grad_norm": 0.6191720775180038, + "learning_rate": 2.5977887098842802e-08, + "loss": 0.2672, + "step": 40792 + }, + { + "epoch": 1.91094767414625, + "grad_norm": 0.5756989054597796, + "learning_rate": 2.5950625234087145e-08, + "loss": 0.2575, + "step": 40793 + }, + { + "epoch": 1.9109945191361783, + "grad_norm": 0.6090520414376234, + "learning_rate": 2.5923377606864032e-08, + "loss": 0.2915, + "step": 40794 + }, + { + "epoch": 1.9110413641261066, + "grad_norm": 0.5753143738735156, + "learning_rate": 2.589614421733e-08, + "loss": 0.2669, + "step": 40795 + }, + { + "epoch": 1.911088209116035, + "grad_norm": 0.6133113810009984, + "learning_rate": 2.586892506564187e-08, + "loss": 0.2687, + "step": 40796 + }, + { + "epoch": 1.9111350541059635, + "grad_norm": 0.5840344812004744, + "learning_rate": 2.5841720151955908e-08, + "loss": 0.2587, + "step": 40797 + }, + { + "epoch": 1.9111818990958915, + "grad_norm": 0.6435118075253334, + "learning_rate": 2.5814529476429486e-08, + "loss": 0.2721, + "step": 40798 + }, + { + "epoch": 1.91122874408582, + "grad_norm": 0.6087740708637882, + "learning_rate": 2.578735303921831e-08, + "loss": 0.2652, + "step": 40799 + }, + { + "epoch": 1.9112755890757485, + "grad_norm": 0.5441751413275626, + "learning_rate": 2.5760190840478925e-08, + "loss": 0.2464, + "step": 40800 + }, + { + "epoch": 1.9113224340656767, + "grad_norm": 0.5930501786379321, + "learning_rate": 2.5733042880367876e-08, + "loss": 0.2784, + "step": 40801 + }, + { + "epoch": 1.911369279055605, + "grad_norm": 0.5966411137573739, + "learning_rate": 2.570590915904142e-08, + "loss": 0.2799, + "step": 40802 + }, + { + "epoch": 1.9114161240455334, + "grad_norm": 0.5859584730555071, + "learning_rate": 2.5678789676655268e-08, + "loss": 0.2603, + "step": 40803 + }, + { + "epoch": 1.9114629690354616, + "grad_norm": 0.611655932307087, + "learning_rate": 2.565168443336624e-08, + "loss": 0.2954, + "step": 40804 + }, + { + "epoch": 1.9115098140253899, + "grad_norm": 0.6405947507837432, + "learning_rate": 2.562459342932949e-08, + "loss": 0.269, + "step": 40805 + }, + { + "epoch": 1.9115566590153183, + "grad_norm": 0.5983866219086865, + "learning_rate": 2.5597516664701283e-08, + "loss": 0.2731, + "step": 40806 + }, + { + "epoch": 1.9116035040052466, + "grad_norm": 0.6123528068735113, + "learning_rate": 2.5570454139637602e-08, + "loss": 0.2787, + "step": 40807 + }, + { + "epoch": 1.9116503489951748, + "grad_norm": 0.5995415997055565, + "learning_rate": 2.5543405854294156e-08, + "loss": 0.2582, + "step": 40808 + }, + { + "epoch": 1.9116971939851033, + "grad_norm": 0.6149157550482932, + "learning_rate": 2.55163718088261e-08, + "loss": 0.2722, + "step": 40809 + }, + { + "epoch": 1.9117440389750318, + "grad_norm": 0.5976944323587344, + "learning_rate": 2.54893520033897e-08, + "loss": 0.2573, + "step": 40810 + }, + { + "epoch": 1.91179088396496, + "grad_norm": 0.5977223325233494, + "learning_rate": 2.5462346438140105e-08, + "loss": 0.263, + "step": 40811 + }, + { + "epoch": 1.9118377289548882, + "grad_norm": 0.6358584822662108, + "learning_rate": 2.543535511323275e-08, + "loss": 0.2782, + "step": 40812 + }, + { + "epoch": 1.9118845739448167, + "grad_norm": 0.6132680047537736, + "learning_rate": 2.5408378028822787e-08, + "loss": 0.2781, + "step": 40813 + }, + { + "epoch": 1.911931418934745, + "grad_norm": 0.5654949140084226, + "learning_rate": 2.538141518506565e-08, + "loss": 0.2624, + "step": 40814 + }, + { + "epoch": 1.9119782639246732, + "grad_norm": 0.6201111375293744, + "learning_rate": 2.5354466582116765e-08, + "loss": 0.2587, + "step": 40815 + }, + { + "epoch": 1.9120251089146016, + "grad_norm": 0.576299597730008, + "learning_rate": 2.5327532220130734e-08, + "loss": 0.2586, + "step": 40816 + }, + { + "epoch": 1.9120719539045299, + "grad_norm": 0.6138630188548562, + "learning_rate": 2.530061209926299e-08, + "loss": 0.2627, + "step": 40817 + }, + { + "epoch": 1.9121187988944581, + "grad_norm": 0.5709392223771051, + "learning_rate": 2.5273706219667848e-08, + "loss": 0.253, + "step": 40818 + }, + { + "epoch": 1.9121656438843866, + "grad_norm": 0.6278444585796772, + "learning_rate": 2.5246814581500746e-08, + "loss": 0.2683, + "step": 40819 + }, + { + "epoch": 1.912212488874315, + "grad_norm": 0.575783912325006, + "learning_rate": 2.5219937184916276e-08, + "loss": 0.272, + "step": 40820 + }, + { + "epoch": 1.912259333864243, + "grad_norm": 0.6127281621739419, + "learning_rate": 2.5193074030068766e-08, + "loss": 0.2751, + "step": 40821 + }, + { + "epoch": 1.9123061788541715, + "grad_norm": 0.5758905318703004, + "learning_rate": 2.516622511711364e-08, + "loss": 0.2606, + "step": 40822 + }, + { + "epoch": 1.9123530238441, + "grad_norm": 0.6265709528936352, + "learning_rate": 2.513939044620467e-08, + "loss": 0.2825, + "step": 40823 + }, + { + "epoch": 1.9123998688340282, + "grad_norm": 0.5716664456774567, + "learning_rate": 2.5112570017496173e-08, + "loss": 0.2714, + "step": 40824 + }, + { + "epoch": 1.9124467138239565, + "grad_norm": 0.5495441066024346, + "learning_rate": 2.5085763831143027e-08, + "loss": 0.246, + "step": 40825 + }, + { + "epoch": 1.912493558813885, + "grad_norm": 0.5792432419159068, + "learning_rate": 2.5058971887298998e-08, + "loss": 0.2719, + "step": 40826 + }, + { + "epoch": 1.9125404038038132, + "grad_norm": 0.5895731315708191, + "learning_rate": 2.5032194186118686e-08, + "loss": 0.2612, + "step": 40827 + }, + { + "epoch": 1.9125872487937414, + "grad_norm": 0.6086191755547508, + "learning_rate": 2.5005430727756407e-08, + "loss": 0.2648, + "step": 40828 + }, + { + "epoch": 1.9126340937836699, + "grad_norm": 0.569512914827929, + "learning_rate": 2.4978681512365378e-08, + "loss": 0.2573, + "step": 40829 + }, + { + "epoch": 1.9126809387735981, + "grad_norm": 0.6203081302105421, + "learning_rate": 2.4951946540100193e-08, + "loss": 0.289, + "step": 40830 + }, + { + "epoch": 1.9127277837635264, + "grad_norm": 0.5876599954224011, + "learning_rate": 2.492522581111434e-08, + "loss": 0.2672, + "step": 40831 + }, + { + "epoch": 1.9127746287534548, + "grad_norm": 0.6218601612034678, + "learning_rate": 2.489851932556159e-08, + "loss": 0.2887, + "step": 40832 + }, + { + "epoch": 1.9128214737433833, + "grad_norm": 0.6384553699399451, + "learning_rate": 2.4871827083595978e-08, + "loss": 0.2987, + "step": 40833 + }, + { + "epoch": 1.9128683187333113, + "grad_norm": 0.6239842437301665, + "learning_rate": 2.4845149085371e-08, + "loss": 0.2772, + "step": 40834 + }, + { + "epoch": 1.9129151637232398, + "grad_norm": 0.6065214170674341, + "learning_rate": 2.4818485331039865e-08, + "loss": 0.279, + "step": 40835 + }, + { + "epoch": 1.9129620087131682, + "grad_norm": 0.6099454791296015, + "learning_rate": 2.479183582075634e-08, + "loss": 0.2676, + "step": 40836 + }, + { + "epoch": 1.9130088537030965, + "grad_norm": 0.6194719248380529, + "learning_rate": 2.476520055467363e-08, + "loss": 0.2882, + "step": 40837 + }, + { + "epoch": 1.9130556986930247, + "grad_norm": 0.6000665080346371, + "learning_rate": 2.4738579532945228e-08, + "loss": 0.2672, + "step": 40838 + }, + { + "epoch": 1.9131025436829532, + "grad_norm": 0.6386954175875379, + "learning_rate": 2.471197275572379e-08, + "loss": 0.2746, + "step": 40839 + }, + { + "epoch": 1.9131493886728814, + "grad_norm": 0.6251502112531506, + "learning_rate": 2.468538022316308e-08, + "loss": 0.2671, + "step": 40840 + }, + { + "epoch": 1.9131962336628097, + "grad_norm": 0.6296143005480063, + "learning_rate": 2.4658801935415755e-08, + "loss": 0.2675, + "step": 40841 + }, + { + "epoch": 1.9132430786527381, + "grad_norm": 0.5796068969107014, + "learning_rate": 2.4632237892635025e-08, + "loss": 0.2727, + "step": 40842 + }, + { + "epoch": 1.9132899236426664, + "grad_norm": 0.6333734446888832, + "learning_rate": 2.4605688094973545e-08, + "loss": 0.2741, + "step": 40843 + }, + { + "epoch": 1.9133367686325946, + "grad_norm": 0.6224481837313385, + "learning_rate": 2.457915254258425e-08, + "loss": 0.2725, + "step": 40844 + }, + { + "epoch": 1.913383613622523, + "grad_norm": 0.5745024970836489, + "learning_rate": 2.455263123561952e-08, + "loss": 0.2594, + "step": 40845 + }, + { + "epoch": 1.9134304586124515, + "grad_norm": 0.5766210736314075, + "learning_rate": 2.4526124174232557e-08, + "loss": 0.2624, + "step": 40846 + }, + { + "epoch": 1.9134773036023798, + "grad_norm": 0.6175758570911548, + "learning_rate": 2.4499631358575195e-08, + "loss": 0.2669, + "step": 40847 + }, + { + "epoch": 1.913524148592308, + "grad_norm": 0.5604222383982808, + "learning_rate": 2.4473152788800358e-08, + "loss": 0.2675, + "step": 40848 + }, + { + "epoch": 1.9135709935822365, + "grad_norm": 0.6137524305927577, + "learning_rate": 2.4446688465060432e-08, + "loss": 0.2539, + "step": 40849 + }, + { + "epoch": 1.9136178385721647, + "grad_norm": 0.609797284026779, + "learning_rate": 2.442023838750751e-08, + "loss": 0.2746, + "step": 40850 + }, + { + "epoch": 1.913664683562093, + "grad_norm": 0.6139001170214572, + "learning_rate": 2.4393802556293977e-08, + "loss": 0.2965, + "step": 40851 + }, + { + "epoch": 1.9137115285520214, + "grad_norm": 0.5858145192146899, + "learning_rate": 2.4367380971571653e-08, + "loss": 0.2767, + "step": 40852 + }, + { + "epoch": 1.9137583735419497, + "grad_norm": 0.5860944944584093, + "learning_rate": 2.4340973633493192e-08, + "loss": 0.2477, + "step": 40853 + }, + { + "epoch": 1.913805218531878, + "grad_norm": 0.6324195535240604, + "learning_rate": 2.4314580542210143e-08, + "loss": 0.2768, + "step": 40854 + }, + { + "epoch": 1.9138520635218064, + "grad_norm": 0.5782772076620796, + "learning_rate": 2.4288201697874326e-08, + "loss": 0.2553, + "step": 40855 + }, + { + "epoch": 1.9138989085117348, + "grad_norm": 0.5881063082948661, + "learning_rate": 2.4261837100637563e-08, + "loss": 0.2566, + "step": 40856 + }, + { + "epoch": 1.9139457535016628, + "grad_norm": 0.6037633618313596, + "learning_rate": 2.4235486750651683e-08, + "loss": 0.2797, + "step": 40857 + }, + { + "epoch": 1.9139925984915913, + "grad_norm": 0.5732620531406393, + "learning_rate": 2.42091506480685e-08, + "loss": 0.2547, + "step": 40858 + }, + { + "epoch": 1.9140394434815198, + "grad_norm": 0.5781457087500173, + "learning_rate": 2.4182828793039292e-08, + "loss": 0.2578, + "step": 40859 + }, + { + "epoch": 1.914086288471448, + "grad_norm": 0.5980233101186583, + "learning_rate": 2.41565211857156e-08, + "loss": 0.2629, + "step": 40860 + }, + { + "epoch": 1.9141331334613763, + "grad_norm": 0.5340531064444631, + "learning_rate": 2.4130227826248964e-08, + "loss": 0.2335, + "step": 40861 + }, + { + "epoch": 1.9141799784513047, + "grad_norm": 0.5910881000562718, + "learning_rate": 2.410394871479038e-08, + "loss": 0.2643, + "step": 40862 + }, + { + "epoch": 1.914226823441233, + "grad_norm": 0.6322853751372811, + "learning_rate": 2.4077683851491395e-08, + "loss": 0.2823, + "step": 40863 + }, + { + "epoch": 1.9142736684311612, + "grad_norm": 0.5702270325910809, + "learning_rate": 2.4051433236503276e-08, + "loss": 0.2553, + "step": 40864 + }, + { + "epoch": 1.9143205134210897, + "grad_norm": 0.5917822565587687, + "learning_rate": 2.402519686997673e-08, + "loss": 0.261, + "step": 40865 + }, + { + "epoch": 1.914367358411018, + "grad_norm": 0.6216313149107504, + "learning_rate": 2.3998974752062753e-08, + "loss": 0.2793, + "step": 40866 + }, + { + "epoch": 1.9144142034009461, + "grad_norm": 0.5721827069184271, + "learning_rate": 2.397276688291206e-08, + "loss": 0.2622, + "step": 40867 + }, + { + "epoch": 1.9144610483908746, + "grad_norm": 0.6086068769073915, + "learning_rate": 2.394657326267619e-08, + "loss": 0.271, + "step": 40868 + }, + { + "epoch": 1.914507893380803, + "grad_norm": 0.5962668887880661, + "learning_rate": 2.3920393891505302e-08, + "loss": 0.2778, + "step": 40869 + }, + { + "epoch": 1.914554738370731, + "grad_norm": 0.5877600470384993, + "learning_rate": 2.389422876955011e-08, + "loss": 0.2651, + "step": 40870 + }, + { + "epoch": 1.9146015833606596, + "grad_norm": 0.5945753566133526, + "learning_rate": 2.3868077896961606e-08, + "loss": 0.2731, + "step": 40871 + }, + { + "epoch": 1.914648428350588, + "grad_norm": 0.5767750946172887, + "learning_rate": 2.3841941273889947e-08, + "loss": 0.2526, + "step": 40872 + }, + { + "epoch": 1.9146952733405163, + "grad_norm": 0.625649694268692, + "learning_rate": 2.3815818900485287e-08, + "loss": 0.2857, + "step": 40873 + }, + { + "epoch": 1.9147421183304445, + "grad_norm": 0.6145135108233805, + "learning_rate": 2.3789710776898345e-08, + "loss": 0.2808, + "step": 40874 + }, + { + "epoch": 1.914788963320373, + "grad_norm": 0.6386883241353704, + "learning_rate": 2.3763616903278996e-08, + "loss": 0.2974, + "step": 40875 + }, + { + "epoch": 1.9148358083103012, + "grad_norm": 0.5507369274782214, + "learning_rate": 2.3737537279777956e-08, + "loss": 0.2419, + "step": 40876 + }, + { + "epoch": 1.9148826533002294, + "grad_norm": 0.6220226092719059, + "learning_rate": 2.3711471906545103e-08, + "loss": 0.2717, + "step": 40877 + }, + { + "epoch": 1.914929498290158, + "grad_norm": 0.6076574495541346, + "learning_rate": 2.368542078373004e-08, + "loss": 0.2751, + "step": 40878 + }, + { + "epoch": 1.9149763432800861, + "grad_norm": 0.6338428979766615, + "learning_rate": 2.36593839114832e-08, + "loss": 0.2727, + "step": 40879 + }, + { + "epoch": 1.9150231882700144, + "grad_norm": 0.5689695310134463, + "learning_rate": 2.3633361289953916e-08, + "loss": 0.2692, + "step": 40880 + }, + { + "epoch": 1.9150700332599428, + "grad_norm": 0.585429682892876, + "learning_rate": 2.3607352919292336e-08, + "loss": 0.2688, + "step": 40881 + }, + { + "epoch": 1.9151168782498713, + "grad_norm": 0.6153540954088818, + "learning_rate": 2.3581358799648067e-08, + "loss": 0.273, + "step": 40882 + }, + { + "epoch": 1.9151637232397996, + "grad_norm": 0.5653133322328223, + "learning_rate": 2.3555378931170993e-08, + "loss": 0.2634, + "step": 40883 + }, + { + "epoch": 1.9152105682297278, + "grad_norm": 0.573136910382805, + "learning_rate": 2.3529413314009875e-08, + "loss": 0.2541, + "step": 40884 + }, + { + "epoch": 1.9152574132196563, + "grad_norm": 0.5905454203786864, + "learning_rate": 2.35034619483146e-08, + "loss": 0.2571, + "step": 40885 + }, + { + "epoch": 1.9153042582095845, + "grad_norm": 0.5806835434129729, + "learning_rate": 2.347752483423449e-08, + "loss": 0.2622, + "step": 40886 + }, + { + "epoch": 1.9153511031995127, + "grad_norm": 0.584925913970786, + "learning_rate": 2.345160197191887e-08, + "loss": 0.2692, + "step": 40887 + }, + { + "epoch": 1.9153979481894412, + "grad_norm": 0.6058818876287295, + "learning_rate": 2.3425693361516788e-08, + "loss": 0.2643, + "step": 40888 + }, + { + "epoch": 1.9154447931793694, + "grad_norm": 0.5983662172529338, + "learning_rate": 2.3399799003177293e-08, + "loss": 0.2653, + "step": 40889 + }, + { + "epoch": 1.9154916381692977, + "grad_norm": 0.6206587571084187, + "learning_rate": 2.337391889704943e-08, + "loss": 0.2748, + "step": 40890 + }, + { + "epoch": 1.9155384831592261, + "grad_norm": 0.5646745908917353, + "learning_rate": 2.3348053043282247e-08, + "loss": 0.265, + "step": 40891 + }, + { + "epoch": 1.9155853281491546, + "grad_norm": 0.5860217001885025, + "learning_rate": 2.3322201442024795e-08, + "loss": 0.286, + "step": 40892 + }, + { + "epoch": 1.9156321731390826, + "grad_norm": 0.5929347268404442, + "learning_rate": 2.3296364093425285e-08, + "loss": 0.2661, + "step": 40893 + }, + { + "epoch": 1.915679018129011, + "grad_norm": 0.6268499678353572, + "learning_rate": 2.327054099763276e-08, + "loss": 0.2761, + "step": 40894 + }, + { + "epoch": 1.9157258631189396, + "grad_norm": 0.6177125764915952, + "learning_rate": 2.3244732154795722e-08, + "loss": 0.2752, + "step": 40895 + }, + { + "epoch": 1.9157727081088678, + "grad_norm": 0.6264591520042885, + "learning_rate": 2.321893756506294e-08, + "loss": 0.3005, + "step": 40896 + }, + { + "epoch": 1.915819553098796, + "grad_norm": 0.5785872270935346, + "learning_rate": 2.319315722858234e-08, + "loss": 0.2707, + "step": 40897 + }, + { + "epoch": 1.9158663980887245, + "grad_norm": 0.6426102632696632, + "learning_rate": 2.3167391145502705e-08, + "loss": 0.2628, + "step": 40898 + }, + { + "epoch": 1.9159132430786527, + "grad_norm": 0.6013361939212688, + "learning_rate": 2.314163931597252e-08, + "loss": 0.281, + "step": 40899 + }, + { + "epoch": 1.915960088068581, + "grad_norm": 0.6237247208406022, + "learning_rate": 2.3115901740139446e-08, + "loss": 0.2663, + "step": 40900 + }, + { + "epoch": 1.9160069330585094, + "grad_norm": 0.6108625953755975, + "learning_rate": 2.30901784181517e-08, + "loss": 0.2799, + "step": 40901 + }, + { + "epoch": 1.9160537780484377, + "grad_norm": 0.5893555356288593, + "learning_rate": 2.3064469350157492e-08, + "loss": 0.2592, + "step": 40902 + }, + { + "epoch": 1.916100623038366, + "grad_norm": 0.644336825778218, + "learning_rate": 2.3038774536304763e-08, + "loss": 0.2707, + "step": 40903 + }, + { + "epoch": 1.9161474680282944, + "grad_norm": 0.5741289026984081, + "learning_rate": 2.3013093976741176e-08, + "loss": 0.2628, + "step": 40904 + }, + { + "epoch": 1.9161943130182229, + "grad_norm": 0.5950184550024439, + "learning_rate": 2.2987427671614938e-08, + "loss": 0.2664, + "step": 40905 + }, + { + "epoch": 1.9162411580081509, + "grad_norm": 0.5659528365467789, + "learning_rate": 2.2961775621073434e-08, + "loss": 0.2546, + "step": 40906 + }, + { + "epoch": 1.9162880029980793, + "grad_norm": 0.5592113975206677, + "learning_rate": 2.2936137825264328e-08, + "loss": 0.263, + "step": 40907 + }, + { + "epoch": 1.9163348479880078, + "grad_norm": 0.5689863012342851, + "learning_rate": 2.2910514284334994e-08, + "loss": 0.2626, + "step": 40908 + }, + { + "epoch": 1.916381692977936, + "grad_norm": 0.5818354320213286, + "learning_rate": 2.28849049984331e-08, + "loss": 0.2416, + "step": 40909 + }, + { + "epoch": 1.9164285379678643, + "grad_norm": 0.6301003952267662, + "learning_rate": 2.285930996770602e-08, + "loss": 0.2835, + "step": 40910 + }, + { + "epoch": 1.9164753829577927, + "grad_norm": 0.6093751037734492, + "learning_rate": 2.283372919230087e-08, + "loss": 0.2814, + "step": 40911 + }, + { + "epoch": 1.916522227947721, + "grad_norm": 0.6021870254363768, + "learning_rate": 2.280816267236502e-08, + "loss": 0.2781, + "step": 40912 + }, + { + "epoch": 1.9165690729376492, + "grad_norm": 0.6752440203402802, + "learning_rate": 2.2782610408045858e-08, + "loss": 0.2836, + "step": 40913 + }, + { + "epoch": 1.9166159179275777, + "grad_norm": 0.5982172609555401, + "learning_rate": 2.2757072399489934e-08, + "loss": 0.2816, + "step": 40914 + }, + { + "epoch": 1.916662762917506, + "grad_norm": 0.5787946785882687, + "learning_rate": 2.2731548646844624e-08, + "loss": 0.2801, + "step": 40915 + }, + { + "epoch": 1.9167096079074342, + "grad_norm": 0.6116162483224009, + "learning_rate": 2.270603915025621e-08, + "loss": 0.2836, + "step": 40916 + }, + { + "epoch": 1.9167564528973626, + "grad_norm": 0.5799214111416465, + "learning_rate": 2.2680543909872065e-08, + "loss": 0.2835, + "step": 40917 + }, + { + "epoch": 1.916803297887291, + "grad_norm": 0.616985191450778, + "learning_rate": 2.2655062925838745e-08, + "loss": 0.2712, + "step": 40918 + }, + { + "epoch": 1.9168501428772193, + "grad_norm": 0.5703487572275977, + "learning_rate": 2.262959619830307e-08, + "loss": 0.2583, + "step": 40919 + }, + { + "epoch": 1.9168969878671476, + "grad_norm": 0.573167770991848, + "learning_rate": 2.260414372741132e-08, + "loss": 0.2541, + "step": 40920 + }, + { + "epoch": 1.916943832857076, + "grad_norm": 0.5826930834632794, + "learning_rate": 2.257870551331004e-08, + "loss": 0.2679, + "step": 40921 + }, + { + "epoch": 1.9169906778470043, + "grad_norm": 0.6339912234958388, + "learning_rate": 2.255328155614578e-08, + "loss": 0.2804, + "step": 40922 + }, + { + "epoch": 1.9170375228369325, + "grad_norm": 0.6180577352110386, + "learning_rate": 2.2527871856064254e-08, + "loss": 0.2963, + "step": 40923 + }, + { + "epoch": 1.917084367826861, + "grad_norm": 0.6444359398636558, + "learning_rate": 2.250247641321257e-08, + "loss": 0.2763, + "step": 40924 + }, + { + "epoch": 1.9171312128167892, + "grad_norm": 0.5615720516633202, + "learning_rate": 2.2477095227736167e-08, + "loss": 0.2437, + "step": 40925 + }, + { + "epoch": 1.9171780578067175, + "grad_norm": 0.6141183932327057, + "learning_rate": 2.2451728299781594e-08, + "loss": 0.2676, + "step": 40926 + }, + { + "epoch": 1.917224902796646, + "grad_norm": 0.5673911466897507, + "learning_rate": 2.2426375629494566e-08, + "loss": 0.2655, + "step": 40927 + }, + { + "epoch": 1.9172717477865744, + "grad_norm": 0.5932745393605128, + "learning_rate": 2.2401037217021083e-08, + "loss": 0.272, + "step": 40928 + }, + { + "epoch": 1.9173185927765024, + "grad_norm": 0.6139458860115984, + "learning_rate": 2.2375713062506577e-08, + "loss": 0.2818, + "step": 40929 + }, + { + "epoch": 1.9173654377664309, + "grad_norm": 0.6045724092627003, + "learning_rate": 2.2350403166097322e-08, + "loss": 0.2576, + "step": 40930 + }, + { + "epoch": 1.9174122827563593, + "grad_norm": 0.5824538957483987, + "learning_rate": 2.2325107527938762e-08, + "loss": 0.2705, + "step": 40931 + }, + { + "epoch": 1.9174591277462876, + "grad_norm": 0.5885074057147921, + "learning_rate": 2.2299826148176607e-08, + "loss": 0.2639, + "step": 40932 + }, + { + "epoch": 1.9175059727362158, + "grad_norm": 0.5438024133270435, + "learning_rate": 2.227455902695602e-08, + "loss": 0.2501, + "step": 40933 + }, + { + "epoch": 1.9175528177261443, + "grad_norm": 0.5844717779985277, + "learning_rate": 2.2249306164422725e-08, + "loss": 0.2818, + "step": 40934 + }, + { + "epoch": 1.9175996627160725, + "grad_norm": 0.6172394204838194, + "learning_rate": 2.2224067560721874e-08, + "loss": 0.2789, + "step": 40935 + }, + { + "epoch": 1.9176465077060008, + "grad_norm": 0.5892849094506695, + "learning_rate": 2.2198843215998635e-08, + "loss": 0.2595, + "step": 40936 + }, + { + "epoch": 1.9176933526959292, + "grad_norm": 0.5809682865884758, + "learning_rate": 2.2173633130398442e-08, + "loss": 0.2689, + "step": 40937 + }, + { + "epoch": 1.9177401976858575, + "grad_norm": 0.6069935620172406, + "learning_rate": 2.2148437304066185e-08, + "loss": 0.2817, + "step": 40938 + }, + { + "epoch": 1.9177870426757857, + "grad_norm": 0.6020721778594458, + "learning_rate": 2.2123255737146742e-08, + "loss": 0.2813, + "step": 40939 + }, + { + "epoch": 1.9178338876657142, + "grad_norm": 0.5935892874508228, + "learning_rate": 2.209808842978528e-08, + "loss": 0.2731, + "step": 40940 + }, + { + "epoch": 1.9178807326556426, + "grad_norm": 0.599223209663805, + "learning_rate": 2.2072935382126683e-08, + "loss": 0.2642, + "step": 40941 + }, + { + "epoch": 1.9179275776455706, + "grad_norm": 0.58480017890016, + "learning_rate": 2.2047796594315275e-08, + "loss": 0.2531, + "step": 40942 + }, + { + "epoch": 1.917974422635499, + "grad_norm": 0.6176552535711793, + "learning_rate": 2.2022672066495944e-08, + "loss": 0.2721, + "step": 40943 + }, + { + "epoch": 1.9180212676254276, + "grad_norm": 0.5783249460499339, + "learning_rate": 2.199756179881357e-08, + "loss": 0.2563, + "step": 40944 + }, + { + "epoch": 1.9180681126153558, + "grad_norm": 0.6345686067582207, + "learning_rate": 2.197246579141221e-08, + "loss": 0.2882, + "step": 40945 + }, + { + "epoch": 1.918114957605284, + "grad_norm": 0.6322632877194302, + "learning_rate": 2.194738404443675e-08, + "loss": 0.2593, + "step": 40946 + }, + { + "epoch": 1.9181618025952125, + "grad_norm": 0.5874958432481399, + "learning_rate": 2.1922316558030954e-08, + "loss": 0.2587, + "step": 40947 + }, + { + "epoch": 1.9182086475851408, + "grad_norm": 0.5681669726804844, + "learning_rate": 2.1897263332339437e-08, + "loss": 0.2655, + "step": 40948 + }, + { + "epoch": 1.918255492575069, + "grad_norm": 0.5862002695179226, + "learning_rate": 2.1872224367506523e-08, + "loss": 0.2792, + "step": 40949 + }, + { + "epoch": 1.9183023375649975, + "grad_norm": 0.5957688882676556, + "learning_rate": 2.1847199663675987e-08, + "loss": 0.2615, + "step": 40950 + }, + { + "epoch": 1.9183491825549257, + "grad_norm": 0.6283453741408959, + "learning_rate": 2.1822189220991884e-08, + "loss": 0.2862, + "step": 40951 + }, + { + "epoch": 1.918396027544854, + "grad_norm": 0.5981744877646125, + "learning_rate": 2.1797193039598263e-08, + "loss": 0.2705, + "step": 40952 + }, + { + "epoch": 1.9184428725347824, + "grad_norm": 0.6440825526294092, + "learning_rate": 2.1772211119638896e-08, + "loss": 0.2901, + "step": 40953 + }, + { + "epoch": 1.9184897175247109, + "grad_norm": 0.5993424945205612, + "learning_rate": 2.1747243461257563e-08, + "loss": 0.27, + "step": 40954 + }, + { + "epoch": 1.918536562514639, + "grad_norm": 0.5799121490196016, + "learning_rate": 2.172229006459803e-08, + "loss": 0.2435, + "step": 40955 + }, + { + "epoch": 1.9185834075045674, + "grad_norm": 0.6245137950626996, + "learning_rate": 2.1697350929804072e-08, + "loss": 0.2694, + "step": 40956 + }, + { + "epoch": 1.9186302524944958, + "grad_norm": 0.5701036439243355, + "learning_rate": 2.1672426057018636e-08, + "loss": 0.2594, + "step": 40957 + }, + { + "epoch": 1.918677097484424, + "grad_norm": 0.6204260159914141, + "learning_rate": 2.1647515446385494e-08, + "loss": 0.2644, + "step": 40958 + }, + { + "epoch": 1.9187239424743523, + "grad_norm": 0.6025978560973343, + "learning_rate": 2.1622619098047858e-08, + "loss": 0.2821, + "step": 40959 + }, + { + "epoch": 1.9187707874642808, + "grad_norm": 0.6311290487342648, + "learning_rate": 2.1597737012149233e-08, + "loss": 0.2887, + "step": 40960 + }, + { + "epoch": 1.918817632454209, + "grad_norm": 0.5931629170870182, + "learning_rate": 2.1572869188832835e-08, + "loss": 0.2648, + "step": 40961 + }, + { + "epoch": 1.9188644774441372, + "grad_norm": 0.5979582514123751, + "learning_rate": 2.1548015628241604e-08, + "loss": 0.2899, + "step": 40962 + }, + { + "epoch": 1.9189113224340657, + "grad_norm": 0.5684216366561097, + "learning_rate": 2.1523176330518758e-08, + "loss": 0.2613, + "step": 40963 + }, + { + "epoch": 1.9189581674239942, + "grad_norm": 0.5993828807989798, + "learning_rate": 2.149835129580696e-08, + "loss": 0.2724, + "step": 40964 + }, + { + "epoch": 1.9190050124139222, + "grad_norm": 0.6115783422265666, + "learning_rate": 2.147354052424916e-08, + "loss": 0.2611, + "step": 40965 + }, + { + "epoch": 1.9190518574038506, + "grad_norm": 0.5979238204342598, + "learning_rate": 2.144874401598801e-08, + "loss": 0.2635, + "step": 40966 + }, + { + "epoch": 1.919098702393779, + "grad_norm": 0.5562140981830223, + "learning_rate": 2.1423961771166457e-08, + "loss": 0.2452, + "step": 40967 + }, + { + "epoch": 1.9191455473837074, + "grad_norm": 0.5802644148589889, + "learning_rate": 2.1399193789927442e-08, + "loss": 0.2585, + "step": 40968 + }, + { + "epoch": 1.9191923923736356, + "grad_norm": 0.6050621560929728, + "learning_rate": 2.1374440072412795e-08, + "loss": 0.2802, + "step": 40969 + }, + { + "epoch": 1.919239237363564, + "grad_norm": 0.5912933599827617, + "learning_rate": 2.1349700618765178e-08, + "loss": 0.2686, + "step": 40970 + }, + { + "epoch": 1.9192860823534923, + "grad_norm": 0.6164631383107901, + "learning_rate": 2.132497542912726e-08, + "loss": 0.27, + "step": 40971 + }, + { + "epoch": 1.9193329273434205, + "grad_norm": 0.6265644749460612, + "learning_rate": 2.1300264503640867e-08, + "loss": 0.2747, + "step": 40972 + }, + { + "epoch": 1.919379772333349, + "grad_norm": 0.6124717418813177, + "learning_rate": 2.1275567842448665e-08, + "loss": 0.2705, + "step": 40973 + }, + { + "epoch": 1.9194266173232772, + "grad_norm": 0.5745149322679571, + "learning_rate": 2.125088544569248e-08, + "loss": 0.2598, + "step": 40974 + }, + { + "epoch": 1.9194734623132055, + "grad_norm": 0.5615584868014458, + "learning_rate": 2.122621731351443e-08, + "loss": 0.258, + "step": 40975 + }, + { + "epoch": 1.919520307303134, + "grad_norm": 0.5815896741596533, + "learning_rate": 2.1201563446056616e-08, + "loss": 0.254, + "step": 40976 + }, + { + "epoch": 1.9195671522930624, + "grad_norm": 0.640721455610965, + "learning_rate": 2.1176923843460596e-08, + "loss": 0.2852, + "step": 40977 + }, + { + "epoch": 1.9196139972829904, + "grad_norm": 0.6071006932611999, + "learning_rate": 2.1152298505868195e-08, + "loss": 0.261, + "step": 40978 + }, + { + "epoch": 1.9196608422729189, + "grad_norm": 0.5842062302063855, + "learning_rate": 2.112768743342153e-08, + "loss": 0.2606, + "step": 40979 + }, + { + "epoch": 1.9197076872628474, + "grad_norm": 0.6080804994326944, + "learning_rate": 2.1103090626262147e-08, + "loss": 0.2674, + "step": 40980 + }, + { + "epoch": 1.9197545322527756, + "grad_norm": 0.5558917066215284, + "learning_rate": 2.1078508084531048e-08, + "loss": 0.2501, + "step": 40981 + }, + { + "epoch": 1.9198013772427038, + "grad_norm": 0.5609211513738067, + "learning_rate": 2.105393980837034e-08, + "loss": 0.2591, + "step": 40982 + }, + { + "epoch": 1.9198482222326323, + "grad_norm": 0.592299858371521, + "learning_rate": 2.102938579792102e-08, + "loss": 0.2565, + "step": 40983 + }, + { + "epoch": 1.9198950672225605, + "grad_norm": 0.5973990316955997, + "learning_rate": 2.1004846053324647e-08, + "loss": 0.2645, + "step": 40984 + }, + { + "epoch": 1.9199419122124888, + "grad_norm": 0.6034403977716818, + "learning_rate": 2.0980320574722214e-08, + "loss": 0.2852, + "step": 40985 + }, + { + "epoch": 1.9199887572024172, + "grad_norm": 0.6428829270509188, + "learning_rate": 2.0955809362255e-08, + "loss": 0.287, + "step": 40986 + }, + { + "epoch": 1.9200356021923455, + "grad_norm": 0.6279098514875756, + "learning_rate": 2.0931312416063997e-08, + "loss": 0.2745, + "step": 40987 + }, + { + "epoch": 1.9200824471822737, + "grad_norm": 0.5893155626239456, + "learning_rate": 2.0906829736290214e-08, + "loss": 0.2699, + "step": 40988 + }, + { + "epoch": 1.9201292921722022, + "grad_norm": 0.6007167184592043, + "learning_rate": 2.0882361323074364e-08, + "loss": 0.2777, + "step": 40989 + }, + { + "epoch": 1.9201761371621306, + "grad_norm": 0.602125745121152, + "learning_rate": 2.0857907176557722e-08, + "loss": 0.2602, + "step": 40990 + }, + { + "epoch": 1.9202229821520589, + "grad_norm": 0.5808293019028278, + "learning_rate": 2.083346729688074e-08, + "loss": 0.2731, + "step": 40991 + }, + { + "epoch": 1.9202698271419871, + "grad_norm": 0.5795730671028811, + "learning_rate": 2.0809041684183572e-08, + "loss": 0.2545, + "step": 40992 + }, + { + "epoch": 1.9203166721319156, + "grad_norm": 0.6123273073072028, + "learning_rate": 2.0784630338607782e-08, + "loss": 0.2712, + "step": 40993 + }, + { + "epoch": 1.9203635171218438, + "grad_norm": 0.6080509271966964, + "learning_rate": 2.0760233260292973e-08, + "loss": 0.2782, + "step": 40994 + }, + { + "epoch": 1.920410362111772, + "grad_norm": 0.6151061368049713, + "learning_rate": 2.0735850449380145e-08, + "loss": 0.288, + "step": 40995 + }, + { + "epoch": 1.9204572071017005, + "grad_norm": 0.5883383754127356, + "learning_rate": 2.0711481906009188e-08, + "loss": 0.2552, + "step": 40996 + }, + { + "epoch": 1.9205040520916288, + "grad_norm": 0.6002413684852445, + "learning_rate": 2.0687127630320546e-08, + "loss": 0.2695, + "step": 40997 + }, + { + "epoch": 1.920550897081557, + "grad_norm": 0.6301465213040814, + "learning_rate": 2.0662787622454105e-08, + "loss": 0.2824, + "step": 40998 + }, + { + "epoch": 1.9205977420714855, + "grad_norm": 0.6160614546905535, + "learning_rate": 2.0638461882550588e-08, + "loss": 0.2632, + "step": 40999 + }, + { + "epoch": 1.920644587061414, + "grad_norm": 0.6390753510733046, + "learning_rate": 2.061415041074932e-08, + "loss": 0.275, + "step": 41000 + }, + { + "epoch": 1.920691432051342, + "grad_norm": 0.5550946443288934, + "learning_rate": 2.058985320719048e-08, + "loss": 0.2662, + "step": 41001 + }, + { + "epoch": 1.9207382770412704, + "grad_norm": 0.5884525601772088, + "learning_rate": 2.0565570272013668e-08, + "loss": 0.2654, + "step": 41002 + }, + { + "epoch": 1.9207851220311989, + "grad_norm": 0.6094527133129488, + "learning_rate": 2.0541301605359054e-08, + "loss": 0.2577, + "step": 41003 + }, + { + "epoch": 1.9208319670211271, + "grad_norm": 0.5643496377336938, + "learning_rate": 2.051704720736597e-08, + "loss": 0.2578, + "step": 41004 + }, + { + "epoch": 1.9208788120110554, + "grad_norm": 0.5635860833396735, + "learning_rate": 2.0492807078174304e-08, + "loss": 0.256, + "step": 41005 + }, + { + "epoch": 1.9209256570009838, + "grad_norm": 0.5820729122851848, + "learning_rate": 2.0468581217923113e-08, + "loss": 0.2598, + "step": 41006 + }, + { + "epoch": 1.920972501990912, + "grad_norm": 0.5441892356124235, + "learning_rate": 2.0444369626752003e-08, + "loss": 0.2554, + "step": 41007 + }, + { + "epoch": 1.9210193469808403, + "grad_norm": 0.6082193392769242, + "learning_rate": 2.042017230480031e-08, + "loss": 0.2726, + "step": 41008 + }, + { + "epoch": 1.9210661919707688, + "grad_norm": 0.6151022684448636, + "learning_rate": 2.039598925220737e-08, + "loss": 0.2658, + "step": 41009 + }, + { + "epoch": 1.921113036960697, + "grad_norm": 0.5748975000745341, + "learning_rate": 2.0371820469112235e-08, + "loss": 0.2613, + "step": 41010 + }, + { + "epoch": 1.9211598819506253, + "grad_norm": 0.6683075959420434, + "learning_rate": 2.034766595565424e-08, + "loss": 0.2812, + "step": 41011 + }, + { + "epoch": 1.9212067269405537, + "grad_norm": 0.6403899575110242, + "learning_rate": 2.032352571197216e-08, + "loss": 0.2775, + "step": 41012 + }, + { + "epoch": 1.9212535719304822, + "grad_norm": 0.6435291921896796, + "learning_rate": 2.029939973820505e-08, + "loss": 0.2883, + "step": 41013 + }, + { + "epoch": 1.9213004169204102, + "grad_norm": 0.5442052993199664, + "learning_rate": 2.0275288034491414e-08, + "loss": 0.2469, + "step": 41014 + }, + { + "epoch": 1.9213472619103387, + "grad_norm": 0.5671907217581036, + "learning_rate": 2.0251190600970026e-08, + "loss": 0.241, + "step": 41015 + }, + { + "epoch": 1.9213941069002671, + "grad_norm": 0.5514536167484335, + "learning_rate": 2.0227107437779947e-08, + "loss": 0.249, + "step": 41016 + }, + { + "epoch": 1.9214409518901954, + "grad_norm": 0.5803857574663019, + "learning_rate": 2.020303854505995e-08, + "loss": 0.2689, + "step": 41017 + }, + { + "epoch": 1.9214877968801236, + "grad_norm": 0.6021489499964391, + "learning_rate": 2.0178983922947702e-08, + "loss": 0.2652, + "step": 41018 + }, + { + "epoch": 1.921534641870052, + "grad_norm": 0.6074072987184717, + "learning_rate": 2.015494357158254e-08, + "loss": 0.2659, + "step": 41019 + }, + { + "epoch": 1.9215814868599803, + "grad_norm": 0.6038551852498979, + "learning_rate": 2.0130917491102133e-08, + "loss": 0.2718, + "step": 41020 + }, + { + "epoch": 1.9216283318499086, + "grad_norm": 0.6212788361700992, + "learning_rate": 2.0106905681644972e-08, + "loss": 0.2744, + "step": 41021 + }, + { + "epoch": 1.921675176839837, + "grad_norm": 0.6275829145591083, + "learning_rate": 2.0082908143349287e-08, + "loss": 0.2785, + "step": 41022 + }, + { + "epoch": 1.9217220218297653, + "grad_norm": 0.6477219017255025, + "learning_rate": 2.0058924876353304e-08, + "loss": 0.3009, + "step": 41023 + }, + { + "epoch": 1.9217688668196935, + "grad_norm": 0.6234971189515923, + "learning_rate": 2.003495588079496e-08, + "loss": 0.2886, + "step": 41024 + }, + { + "epoch": 1.921815711809622, + "grad_norm": 0.6126969959182379, + "learning_rate": 2.0011001156811927e-08, + "loss": 0.2639, + "step": 41025 + }, + { + "epoch": 1.9218625567995504, + "grad_norm": 0.5942992505644977, + "learning_rate": 1.9987060704542426e-08, + "loss": 0.2738, + "step": 41026 + }, + { + "epoch": 1.9219094017894787, + "grad_norm": 0.5380599143858774, + "learning_rate": 1.9963134524123852e-08, + "loss": 0.2492, + "step": 41027 + }, + { + "epoch": 1.921956246779407, + "grad_norm": 0.5927169373111736, + "learning_rate": 1.9939222615694143e-08, + "loss": 0.2738, + "step": 41028 + }, + { + "epoch": 1.9220030917693354, + "grad_norm": 0.5935790050156166, + "learning_rate": 1.991532497939125e-08, + "loss": 0.2754, + "step": 41029 + }, + { + "epoch": 1.9220499367592636, + "grad_norm": 0.5869168627474326, + "learning_rate": 1.9891441615351725e-08, + "loss": 0.2672, + "step": 41030 + }, + { + "epoch": 1.9220967817491919, + "grad_norm": 0.5947144098897786, + "learning_rate": 1.9867572523714074e-08, + "loss": 0.2718, + "step": 41031 + }, + { + "epoch": 1.9221436267391203, + "grad_norm": 0.6161501097902969, + "learning_rate": 1.984371770461513e-08, + "loss": 0.2521, + "step": 41032 + }, + { + "epoch": 1.9221904717290486, + "grad_norm": 0.6399841869451083, + "learning_rate": 1.9819877158192e-08, + "loss": 0.2857, + "step": 41033 + }, + { + "epoch": 1.9222373167189768, + "grad_norm": 0.6327014343458638, + "learning_rate": 1.9796050884582087e-08, + "loss": 0.2919, + "step": 41034 + }, + { + "epoch": 1.9222841617089053, + "grad_norm": 0.6376000808618971, + "learning_rate": 1.9772238883922767e-08, + "loss": 0.2676, + "step": 41035 + }, + { + "epoch": 1.9223310066988337, + "grad_norm": 0.5716064909252292, + "learning_rate": 1.9748441156350606e-08, + "loss": 0.2629, + "step": 41036 + }, + { + "epoch": 1.9223778516887617, + "grad_norm": 0.6408826780460407, + "learning_rate": 1.9724657702002993e-08, + "loss": 0.2755, + "step": 41037 + }, + { + "epoch": 1.9224246966786902, + "grad_norm": 0.6365002040645351, + "learning_rate": 1.9700888521016202e-08, + "loss": 0.2673, + "step": 41038 + }, + { + "epoch": 1.9224715416686187, + "grad_norm": 0.6300040733928788, + "learning_rate": 1.967713361352791e-08, + "loss": 0.2764, + "step": 41039 + }, + { + "epoch": 1.922518386658547, + "grad_norm": 0.6150254102621414, + "learning_rate": 1.9653392979674115e-08, + "loss": 0.2676, + "step": 41040 + }, + { + "epoch": 1.9225652316484751, + "grad_norm": 0.6243803940435064, + "learning_rate": 1.9629666619591648e-08, + "loss": 0.2691, + "step": 41041 + }, + { + "epoch": 1.9226120766384036, + "grad_norm": 0.6212798064393378, + "learning_rate": 1.9605954533416794e-08, + "loss": 0.2775, + "step": 41042 + }, + { + "epoch": 1.9226589216283319, + "grad_norm": 0.6051259837593339, + "learning_rate": 1.9582256721286385e-08, + "loss": 0.2666, + "step": 41043 + }, + { + "epoch": 1.92270576661826, + "grad_norm": 0.5855865478895063, + "learning_rate": 1.9558573183336704e-08, + "loss": 0.2783, + "step": 41044 + }, + { + "epoch": 1.9227526116081886, + "grad_norm": 0.5473540792207621, + "learning_rate": 1.953490391970375e-08, + "loss": 0.2664, + "step": 41045 + }, + { + "epoch": 1.9227994565981168, + "grad_norm": 0.6133144203409914, + "learning_rate": 1.951124893052436e-08, + "loss": 0.2711, + "step": 41046 + }, + { + "epoch": 1.922846301588045, + "grad_norm": 0.6373448477563816, + "learning_rate": 1.948760821593426e-08, + "loss": 0.2925, + "step": 41047 + }, + { + "epoch": 1.9228931465779735, + "grad_norm": 0.5925592613424099, + "learning_rate": 1.9463981776069452e-08, + "loss": 0.2712, + "step": 41048 + }, + { + "epoch": 1.922939991567902, + "grad_norm": 0.6040877249983267, + "learning_rate": 1.9440369611065936e-08, + "loss": 0.2741, + "step": 41049 + }, + { + "epoch": 1.92298683655783, + "grad_norm": 0.6157550746554936, + "learning_rate": 1.9416771721059436e-08, + "loss": 0.2747, + "step": 41050 + }, + { + "epoch": 1.9230336815477584, + "grad_norm": 0.5357898807170222, + "learning_rate": 1.9393188106186235e-08, + "loss": 0.25, + "step": 41051 + }, + { + "epoch": 1.923080526537687, + "grad_norm": 0.5934496312438485, + "learning_rate": 1.9369618766581498e-08, + "loss": 0.2705, + "step": 41052 + }, + { + "epoch": 1.9231273715276151, + "grad_norm": 0.5996478626760278, + "learning_rate": 1.9346063702381233e-08, + "loss": 0.2688, + "step": 41053 + }, + { + "epoch": 1.9231742165175434, + "grad_norm": 0.6084248595876398, + "learning_rate": 1.9322522913721165e-08, + "loss": 0.2674, + "step": 41054 + }, + { + "epoch": 1.9232210615074719, + "grad_norm": 0.5873059345575284, + "learning_rate": 1.929899640073618e-08, + "loss": 0.2736, + "step": 41055 + }, + { + "epoch": 1.9232679064974, + "grad_norm": 0.5921089093888972, + "learning_rate": 1.9275484163562007e-08, + "loss": 0.2658, + "step": 41056 + }, + { + "epoch": 1.9233147514873283, + "grad_norm": 0.6042117018598997, + "learning_rate": 1.9251986202333815e-08, + "loss": 0.2627, + "step": 41057 + }, + { + "epoch": 1.9233615964772568, + "grad_norm": 0.6500474410334491, + "learning_rate": 1.9228502517187054e-08, + "loss": 0.2791, + "step": 41058 + }, + { + "epoch": 1.923408441467185, + "grad_norm": 0.5895243642759675, + "learning_rate": 1.9205033108256888e-08, + "loss": 0.2491, + "step": 41059 + }, + { + "epoch": 1.9234552864571133, + "grad_norm": 0.5871730215720229, + "learning_rate": 1.9181577975677936e-08, + "loss": 0.265, + "step": 41060 + }, + { + "epoch": 1.9235021314470417, + "grad_norm": 0.5566221259810931, + "learning_rate": 1.915813711958564e-08, + "loss": 0.2571, + "step": 41061 + }, + { + "epoch": 1.9235489764369702, + "grad_norm": 0.5872273811275844, + "learning_rate": 1.91347105401149e-08, + "loss": 0.2472, + "step": 41062 + }, + { + "epoch": 1.9235958214268984, + "grad_norm": 0.5673077173053706, + "learning_rate": 1.9111298237400046e-08, + "loss": 0.2636, + "step": 41063 + }, + { + "epoch": 1.9236426664168267, + "grad_norm": 0.5693614696899165, + "learning_rate": 1.908790021157625e-08, + "loss": 0.26, + "step": 41064 + }, + { + "epoch": 1.9236895114067551, + "grad_norm": 0.5850515509273165, + "learning_rate": 1.906451646277785e-08, + "loss": 0.2673, + "step": 41065 + }, + { + "epoch": 1.9237363563966834, + "grad_norm": 0.6026410955266599, + "learning_rate": 1.9041146991140014e-08, + "loss": 0.294, + "step": 41066 + }, + { + "epoch": 1.9237832013866116, + "grad_norm": 0.5625630777928468, + "learning_rate": 1.9017791796796526e-08, + "loss": 0.2633, + "step": 41067 + }, + { + "epoch": 1.92383004637654, + "grad_norm": 0.5628315628381418, + "learning_rate": 1.8994450879882277e-08, + "loss": 0.2585, + "step": 41068 + }, + { + "epoch": 1.9238768913664683, + "grad_norm": 0.6493771297072712, + "learning_rate": 1.8971124240531047e-08, + "loss": 0.2685, + "step": 41069 + }, + { + "epoch": 1.9239237363563966, + "grad_norm": 0.5911396073201802, + "learning_rate": 1.8947811878877453e-08, + "loss": 0.2752, + "step": 41070 + }, + { + "epoch": 1.923970581346325, + "grad_norm": 0.6039414493288692, + "learning_rate": 1.8924513795055832e-08, + "loss": 0.2651, + "step": 41071 + }, + { + "epoch": 1.9240174263362535, + "grad_norm": 0.6188020984711409, + "learning_rate": 1.8901229989199965e-08, + "loss": 0.2833, + "step": 41072 + }, + { + "epoch": 1.9240642713261815, + "grad_norm": 0.5997510239035931, + "learning_rate": 1.887796046144391e-08, + "loss": 0.2739, + "step": 41073 + }, + { + "epoch": 1.92411111631611, + "grad_norm": 0.5626588509089606, + "learning_rate": 1.8854705211921732e-08, + "loss": 0.2568, + "step": 41074 + }, + { + "epoch": 1.9241579613060384, + "grad_norm": 0.5778966365895953, + "learning_rate": 1.883146424076693e-08, + "loss": 0.2571, + "step": 41075 + }, + { + "epoch": 1.9242048062959667, + "grad_norm": 0.6105087274648013, + "learning_rate": 1.8808237548113563e-08, + "loss": 0.2693, + "step": 41076 + }, + { + "epoch": 1.924251651285895, + "grad_norm": 0.6209667383882685, + "learning_rate": 1.878502513409486e-08, + "loss": 0.27, + "step": 41077 + }, + { + "epoch": 1.9242984962758234, + "grad_norm": 0.5975749582029163, + "learning_rate": 1.8761826998845157e-08, + "loss": 0.271, + "step": 41078 + }, + { + "epoch": 1.9243453412657516, + "grad_norm": 0.6144430894382419, + "learning_rate": 1.8738643142497126e-08, + "loss": 0.2629, + "step": 41079 + }, + { + "epoch": 1.9243921862556799, + "grad_norm": 0.6030231128500012, + "learning_rate": 1.8715473565184826e-08, + "loss": 0.2629, + "step": 41080 + }, + { + "epoch": 1.9244390312456083, + "grad_norm": 0.5669864593797356, + "learning_rate": 1.869231826704121e-08, + "loss": 0.2612, + "step": 41081 + }, + { + "epoch": 1.9244858762355366, + "grad_norm": 0.6507197149683118, + "learning_rate": 1.86691772481995e-08, + "loss": 0.2821, + "step": 41082 + }, + { + "epoch": 1.9245327212254648, + "grad_norm": 0.5747707258888521, + "learning_rate": 1.86460505087932e-08, + "loss": 0.2628, + "step": 41083 + }, + { + "epoch": 1.9245795662153933, + "grad_norm": 0.6371704014684646, + "learning_rate": 1.8622938048955263e-08, + "loss": 0.2869, + "step": 41084 + }, + { + "epoch": 1.9246264112053217, + "grad_norm": 0.617088093985166, + "learning_rate": 1.8599839868818637e-08, + "loss": 0.2709, + "step": 41085 + }, + { + "epoch": 1.9246732561952498, + "grad_norm": 0.5969236351511831, + "learning_rate": 1.857675596851599e-08, + "loss": 0.2841, + "step": 41086 + }, + { + "epoch": 1.9247201011851782, + "grad_norm": 0.5537044990452905, + "learning_rate": 1.8553686348180554e-08, + "loss": 0.2511, + "step": 41087 + }, + { + "epoch": 1.9247669461751067, + "grad_norm": 0.5769384893799308, + "learning_rate": 1.8530631007945275e-08, + "loss": 0.2702, + "step": 41088 + }, + { + "epoch": 1.924813791165035, + "grad_norm": 0.5906550880160077, + "learning_rate": 1.8507589947942273e-08, + "loss": 0.2727, + "step": 41089 + }, + { + "epoch": 1.9248606361549632, + "grad_norm": 0.6011454804730835, + "learning_rate": 1.8484563168304493e-08, + "loss": 0.2696, + "step": 41090 + }, + { + "epoch": 1.9249074811448916, + "grad_norm": 0.6097226591492106, + "learning_rate": 1.846155066916405e-08, + "loss": 0.2654, + "step": 41091 + }, + { + "epoch": 1.9249543261348199, + "grad_norm": 0.6587713110257502, + "learning_rate": 1.84385524506539e-08, + "loss": 0.2945, + "step": 41092 + }, + { + "epoch": 1.9250011711247481, + "grad_norm": 0.6422794026730891, + "learning_rate": 1.8415568512906156e-08, + "loss": 0.2741, + "step": 41093 + }, + { + "epoch": 1.9250480161146766, + "grad_norm": 0.5854199645375328, + "learning_rate": 1.8392598856052934e-08, + "loss": 0.2533, + "step": 41094 + }, + { + "epoch": 1.9250948611046048, + "grad_norm": 0.5845562193981516, + "learning_rate": 1.8369643480226905e-08, + "loss": 0.2709, + "step": 41095 + }, + { + "epoch": 1.925141706094533, + "grad_norm": 0.6422104856748384, + "learning_rate": 1.8346702385559633e-08, + "loss": 0.2862, + "step": 41096 + }, + { + "epoch": 1.9251885510844615, + "grad_norm": 0.5752956113639621, + "learning_rate": 1.832377557218351e-08, + "loss": 0.2599, + "step": 41097 + }, + { + "epoch": 1.92523539607439, + "grad_norm": 0.5856824945620409, + "learning_rate": 1.83008630402301e-08, + "loss": 0.2841, + "step": 41098 + }, + { + "epoch": 1.9252822410643182, + "grad_norm": 0.5648227710168467, + "learning_rate": 1.8277964789831514e-08, + "loss": 0.258, + "step": 41099 + }, + { + "epoch": 1.9253290860542465, + "grad_norm": 0.62901904846166, + "learning_rate": 1.825508082111932e-08, + "loss": 0.2829, + "step": 41100 + }, + { + "epoch": 1.925375931044175, + "grad_norm": 0.5540363822510953, + "learning_rate": 1.823221113422563e-08, + "loss": 0.26, + "step": 41101 + }, + { + "epoch": 1.9254227760341032, + "grad_norm": 0.5797294969003174, + "learning_rate": 1.820935572928173e-08, + "loss": 0.2607, + "step": 41102 + }, + { + "epoch": 1.9254696210240314, + "grad_norm": 0.5737480751048908, + "learning_rate": 1.8186514606419458e-08, + "loss": 0.2675, + "step": 41103 + }, + { + "epoch": 1.9255164660139599, + "grad_norm": 0.5727557870263645, + "learning_rate": 1.8163687765769545e-08, + "loss": 0.2433, + "step": 41104 + }, + { + "epoch": 1.9255633110038881, + "grad_norm": 0.6188003520079102, + "learning_rate": 1.8140875207464104e-08, + "loss": 0.2894, + "step": 41105 + }, + { + "epoch": 1.9256101559938164, + "grad_norm": 0.5920799892777254, + "learning_rate": 1.8118076931634143e-08, + "loss": 0.2802, + "step": 41106 + }, + { + "epoch": 1.9256570009837448, + "grad_norm": 0.5838735345192874, + "learning_rate": 1.8095292938410668e-08, + "loss": 0.2569, + "step": 41107 + }, + { + "epoch": 1.9257038459736733, + "grad_norm": 0.5886995812869597, + "learning_rate": 1.8072523227925243e-08, + "loss": 0.2747, + "step": 41108 + }, + { + "epoch": 1.9257506909636013, + "grad_norm": 0.6083061648887399, + "learning_rate": 1.8049767800308315e-08, + "loss": 0.268, + "step": 41109 + }, + { + "epoch": 1.9257975359535298, + "grad_norm": 0.5936340290086367, + "learning_rate": 1.802702665569145e-08, + "loss": 0.275, + "step": 41110 + }, + { + "epoch": 1.9258443809434582, + "grad_norm": 0.6073240687032642, + "learning_rate": 1.8004299794205093e-08, + "loss": 0.273, + "step": 41111 + }, + { + "epoch": 1.9258912259333865, + "grad_norm": 0.6150153136011081, + "learning_rate": 1.7981587215980255e-08, + "loss": 0.277, + "step": 41112 + }, + { + "epoch": 1.9259380709233147, + "grad_norm": 0.5701948797511294, + "learning_rate": 1.7958888921147387e-08, + "loss": 0.2561, + "step": 41113 + }, + { + "epoch": 1.9259849159132432, + "grad_norm": 0.6074049993451084, + "learning_rate": 1.7936204909837497e-08, + "loss": 0.2692, + "step": 41114 + }, + { + "epoch": 1.9260317609031714, + "grad_norm": 0.6179963707116852, + "learning_rate": 1.791353518218075e-08, + "loss": 0.2876, + "step": 41115 + }, + { + "epoch": 1.9260786058930996, + "grad_norm": 0.614791165154929, + "learning_rate": 1.7890879738307886e-08, + "loss": 0.2741, + "step": 41116 + }, + { + "epoch": 1.9261254508830281, + "grad_norm": 0.5979428407178479, + "learning_rate": 1.7868238578349072e-08, + "loss": 0.2811, + "step": 41117 + }, + { + "epoch": 1.9261722958729564, + "grad_norm": 0.6109364183002263, + "learning_rate": 1.7845611702434485e-08, + "loss": 0.2592, + "step": 41118 + }, + { + "epoch": 1.9262191408628846, + "grad_norm": 0.6120564353232323, + "learning_rate": 1.782299911069485e-08, + "loss": 0.2767, + "step": 41119 + }, + { + "epoch": 1.926265985852813, + "grad_norm": 0.5896617959508288, + "learning_rate": 1.780040080325979e-08, + "loss": 0.2583, + "step": 41120 + }, + { + "epoch": 1.9263128308427415, + "grad_norm": 0.6203406072284576, + "learning_rate": 1.7777816780259748e-08, + "loss": 0.2636, + "step": 41121 + }, + { + "epoch": 1.9263596758326695, + "grad_norm": 0.5999607488636448, + "learning_rate": 1.7755247041824077e-08, + "loss": 0.2715, + "step": 41122 + }, + { + "epoch": 1.926406520822598, + "grad_norm": 0.6023716323137905, + "learning_rate": 1.7732691588083495e-08, + "loss": 0.2725, + "step": 41123 + }, + { + "epoch": 1.9264533658125265, + "grad_norm": 0.5829726790209739, + "learning_rate": 1.771015041916735e-08, + "loss": 0.2571, + "step": 41124 + }, + { + "epoch": 1.9265002108024547, + "grad_norm": 0.6072069313292419, + "learning_rate": 1.768762353520498e-08, + "loss": 0.2689, + "step": 41125 + }, + { + "epoch": 1.926547055792383, + "grad_norm": 0.6744040646092797, + "learning_rate": 1.7665110936326835e-08, + "loss": 0.2823, + "step": 41126 + }, + { + "epoch": 1.9265939007823114, + "grad_norm": 0.6086333683043854, + "learning_rate": 1.764261262266198e-08, + "loss": 0.2621, + "step": 41127 + }, + { + "epoch": 1.9266407457722396, + "grad_norm": 0.5809409348197985, + "learning_rate": 1.7620128594340036e-08, + "loss": 0.2636, + "step": 41128 + }, + { + "epoch": 1.926687590762168, + "grad_norm": 0.5774761889985198, + "learning_rate": 1.759765885149006e-08, + "loss": 0.2772, + "step": 41129 + }, + { + "epoch": 1.9267344357520964, + "grad_norm": 0.6065622470516892, + "learning_rate": 1.7575203394241958e-08, + "loss": 0.2812, + "step": 41130 + }, + { + "epoch": 1.9267812807420246, + "grad_norm": 0.5878165867269087, + "learning_rate": 1.7552762222724506e-08, + "loss": 0.2645, + "step": 41131 + }, + { + "epoch": 1.9268281257319528, + "grad_norm": 0.5843487023738079, + "learning_rate": 1.7530335337066772e-08, + "loss": 0.2661, + "step": 41132 + }, + { + "epoch": 1.9268749707218813, + "grad_norm": 0.5586439786486755, + "learning_rate": 1.7507922737398375e-08, + "loss": 0.2631, + "step": 41133 + }, + { + "epoch": 1.9269218157118098, + "grad_norm": 0.6197870823347282, + "learning_rate": 1.7485524423847543e-08, + "loss": 0.2701, + "step": 41134 + }, + { + "epoch": 1.926968660701738, + "grad_norm": 0.604588109055076, + "learning_rate": 1.746314039654362e-08, + "loss": 0.2808, + "step": 41135 + }, + { + "epoch": 1.9270155056916662, + "grad_norm": 0.5623778134238897, + "learning_rate": 1.7440770655615667e-08, + "loss": 0.2588, + "step": 41136 + }, + { + "epoch": 1.9270623506815947, + "grad_norm": 0.6049827548557424, + "learning_rate": 1.741841520119192e-08, + "loss": 0.276, + "step": 41137 + }, + { + "epoch": 1.927109195671523, + "grad_norm": 0.6028535679632259, + "learning_rate": 1.7396074033401156e-08, + "loss": 0.2658, + "step": 41138 + }, + { + "epoch": 1.9271560406614512, + "grad_norm": 0.6522915844244996, + "learning_rate": 1.7373747152372167e-08, + "loss": 0.2745, + "step": 41139 + }, + { + "epoch": 1.9272028856513796, + "grad_norm": 0.650070959726814, + "learning_rate": 1.7351434558233183e-08, + "loss": 0.2836, + "step": 41140 + }, + { + "epoch": 1.927249730641308, + "grad_norm": 0.6586873484134862, + "learning_rate": 1.732913625111271e-08, + "loss": 0.2825, + "step": 41141 + }, + { + "epoch": 1.9272965756312361, + "grad_norm": 0.6270491120646535, + "learning_rate": 1.7306852231138982e-08, + "loss": 0.28, + "step": 41142 + }, + { + "epoch": 1.9273434206211646, + "grad_norm": 0.6369276328827888, + "learning_rate": 1.7284582498440506e-08, + "loss": 0.2696, + "step": 41143 + }, + { + "epoch": 1.927390265611093, + "grad_norm": 0.6594567453918053, + "learning_rate": 1.7262327053145234e-08, + "loss": 0.283, + "step": 41144 + }, + { + "epoch": 1.927437110601021, + "grad_norm": 0.6324848546535308, + "learning_rate": 1.724008589538112e-08, + "loss": 0.2717, + "step": 41145 + }, + { + "epoch": 1.9274839555909495, + "grad_norm": 0.6048752351517543, + "learning_rate": 1.7217859025276397e-08, + "loss": 0.2774, + "step": 41146 + }, + { + "epoch": 1.927530800580878, + "grad_norm": 0.5692269517083869, + "learning_rate": 1.7195646442958735e-08, + "loss": 0.2593, + "step": 41147 + }, + { + "epoch": 1.9275776455708062, + "grad_norm": 0.5474661436758018, + "learning_rate": 1.7173448148556094e-08, + "loss": 0.2608, + "step": 41148 + }, + { + "epoch": 1.9276244905607345, + "grad_norm": 0.5823912951798675, + "learning_rate": 1.715126414219642e-08, + "loss": 0.2509, + "step": 41149 + }, + { + "epoch": 1.927671335550663, + "grad_norm": 0.6000861084788749, + "learning_rate": 1.7129094424006844e-08, + "loss": 0.2879, + "step": 41150 + }, + { + "epoch": 1.9277181805405912, + "grad_norm": 0.6216159946320532, + "learning_rate": 1.7106938994115585e-08, + "loss": 0.2722, + "step": 41151 + }, + { + "epoch": 1.9277650255305194, + "grad_norm": 0.6374392747132381, + "learning_rate": 1.7084797852650048e-08, + "loss": 0.2681, + "step": 41152 + }, + { + "epoch": 1.927811870520448, + "grad_norm": 0.6479787153436982, + "learning_rate": 1.7062670999737075e-08, + "loss": 0.2785, + "step": 41153 + }, + { + "epoch": 1.9278587155103761, + "grad_norm": 0.6095997307741474, + "learning_rate": 1.704055843550434e-08, + "loss": 0.2727, + "step": 41154 + }, + { + "epoch": 1.9279055605003044, + "grad_norm": 0.5962890422630455, + "learning_rate": 1.7018460160079243e-08, + "loss": 0.2617, + "step": 41155 + }, + { + "epoch": 1.9279524054902328, + "grad_norm": 0.5752210497737343, + "learning_rate": 1.6996376173588904e-08, + "loss": 0.2619, + "step": 41156 + }, + { + "epoch": 1.9279992504801613, + "grad_norm": 0.5977106052250682, + "learning_rate": 1.6974306476160162e-08, + "loss": 0.2677, + "step": 41157 + }, + { + "epoch": 1.9280460954700893, + "grad_norm": 0.5811680606384756, + "learning_rate": 1.695225106792042e-08, + "loss": 0.2545, + "step": 41158 + }, + { + "epoch": 1.9280929404600178, + "grad_norm": 0.547782434782335, + "learning_rate": 1.6930209948996246e-08, + "loss": 0.2428, + "step": 41159 + }, + { + "epoch": 1.9281397854499462, + "grad_norm": 0.5907808172739444, + "learning_rate": 1.6908183119514754e-08, + "loss": 0.2631, + "step": 41160 + }, + { + "epoch": 1.9281866304398745, + "grad_norm": 0.5974885902049274, + "learning_rate": 1.6886170579602234e-08, + "loss": 0.2709, + "step": 41161 + }, + { + "epoch": 1.9282334754298027, + "grad_norm": 0.5590732607230486, + "learning_rate": 1.6864172329385808e-08, + "loss": 0.2489, + "step": 41162 + }, + { + "epoch": 1.9282803204197312, + "grad_norm": 0.5595868323059154, + "learning_rate": 1.6842188368992318e-08, + "loss": 0.2609, + "step": 41163 + }, + { + "epoch": 1.9283271654096594, + "grad_norm": 0.574513631666438, + "learning_rate": 1.6820218698547497e-08, + "loss": 0.2701, + "step": 41164 + }, + { + "epoch": 1.9283740103995877, + "grad_norm": 0.5863585237959621, + "learning_rate": 1.6798263318178464e-08, + "loss": 0.2736, + "step": 41165 + }, + { + "epoch": 1.9284208553895161, + "grad_norm": 0.589598410468906, + "learning_rate": 1.677632222801123e-08, + "loss": 0.2563, + "step": 41166 + }, + { + "epoch": 1.9284677003794444, + "grad_norm": 0.5712703517352605, + "learning_rate": 1.6754395428172087e-08, + "loss": 0.2539, + "step": 41167 + }, + { + "epoch": 1.9285145453693726, + "grad_norm": 0.6626597351974592, + "learning_rate": 1.673248291878704e-08, + "loss": 0.2802, + "step": 41168 + }, + { + "epoch": 1.928561390359301, + "grad_norm": 0.5836364454402725, + "learning_rate": 1.6710584699982935e-08, + "loss": 0.2623, + "step": 41169 + }, + { + "epoch": 1.9286082353492295, + "grad_norm": 0.6231369361813186, + "learning_rate": 1.668870077188467e-08, + "loss": 0.2713, + "step": 41170 + }, + { + "epoch": 1.9286550803391578, + "grad_norm": 0.5955984182735452, + "learning_rate": 1.6666831134619087e-08, + "loss": 0.2791, + "step": 41171 + }, + { + "epoch": 1.928701925329086, + "grad_norm": 0.6175041632924791, + "learning_rate": 1.6644975788311925e-08, + "loss": 0.2727, + "step": 41172 + }, + { + "epoch": 1.9287487703190145, + "grad_norm": 0.5782029033972412, + "learning_rate": 1.6623134733088354e-08, + "loss": 0.2641, + "step": 41173 + }, + { + "epoch": 1.9287956153089427, + "grad_norm": 0.6025929201483576, + "learning_rate": 1.6601307969074665e-08, + "loss": 0.2693, + "step": 41174 + }, + { + "epoch": 1.928842460298871, + "grad_norm": 0.6303679740713353, + "learning_rate": 1.6579495496396315e-08, + "loss": 0.2839, + "step": 41175 + }, + { + "epoch": 1.9288893052887994, + "grad_norm": 0.636038455106684, + "learning_rate": 1.6557697315178757e-08, + "loss": 0.2843, + "step": 41176 + }, + { + "epoch": 1.9289361502787277, + "grad_norm": 0.6159633068979354, + "learning_rate": 1.6535913425547446e-08, + "loss": 0.2738, + "step": 41177 + }, + { + "epoch": 1.928982995268656, + "grad_norm": 0.6067554432456068, + "learning_rate": 1.651414382762756e-08, + "loss": 0.2766, + "step": 41178 + }, + { + "epoch": 1.9290298402585844, + "grad_norm": 0.6407711094303495, + "learning_rate": 1.6492388521544832e-08, + "loss": 0.287, + "step": 41179 + }, + { + "epoch": 1.9290766852485128, + "grad_norm": 0.57454189017261, + "learning_rate": 1.6470647507424165e-08, + "loss": 0.2716, + "step": 41180 + }, + { + "epoch": 1.9291235302384409, + "grad_norm": 0.5785074020812455, + "learning_rate": 1.644892078539073e-08, + "loss": 0.2607, + "step": 41181 + }, + { + "epoch": 1.9291703752283693, + "grad_norm": 0.6114805488734794, + "learning_rate": 1.6427208355569435e-08, + "loss": 0.2689, + "step": 41182 + }, + { + "epoch": 1.9292172202182978, + "grad_norm": 0.6025391209017004, + "learning_rate": 1.640551021808545e-08, + "loss": 0.2747, + "step": 41183 + }, + { + "epoch": 1.929264065208226, + "grad_norm": 0.5689965797475479, + "learning_rate": 1.6383826373063407e-08, + "loss": 0.2516, + "step": 41184 + }, + { + "epoch": 1.9293109101981543, + "grad_norm": 0.6153000495168548, + "learning_rate": 1.6362156820628195e-08, + "loss": 0.2729, + "step": 41185 + }, + { + "epoch": 1.9293577551880827, + "grad_norm": 0.570655064689598, + "learning_rate": 1.6340501560904722e-08, + "loss": 0.2616, + "step": 41186 + }, + { + "epoch": 1.929404600178011, + "grad_norm": 0.5678432508285091, + "learning_rate": 1.6318860594017328e-08, + "loss": 0.2644, + "step": 41187 + }, + { + "epoch": 1.9294514451679392, + "grad_norm": 0.5475923987361849, + "learning_rate": 1.629723392009036e-08, + "loss": 0.2478, + "step": 41188 + }, + { + "epoch": 1.9294982901578677, + "grad_norm": 0.5869327325535171, + "learning_rate": 1.6275621539249e-08, + "loss": 0.2604, + "step": 41189 + }, + { + "epoch": 1.929545135147796, + "grad_norm": 0.561479477138429, + "learning_rate": 1.625402345161675e-08, + "loss": 0.2657, + "step": 41190 + }, + { + "epoch": 1.9295919801377241, + "grad_norm": 0.6788561428555787, + "learning_rate": 1.623243965731852e-08, + "loss": 0.2722, + "step": 41191 + }, + { + "epoch": 1.9296388251276526, + "grad_norm": 0.5950810111269159, + "learning_rate": 1.6210870156478375e-08, + "loss": 0.263, + "step": 41192 + }, + { + "epoch": 1.929685670117581, + "grad_norm": 0.6138800230412852, + "learning_rate": 1.618931494922038e-08, + "loss": 0.2703, + "step": 41193 + }, + { + "epoch": 1.929732515107509, + "grad_norm": 0.5842199751421726, + "learning_rate": 1.6167774035668882e-08, + "loss": 0.2654, + "step": 41194 + }, + { + "epoch": 1.9297793600974376, + "grad_norm": 0.629503891624515, + "learning_rate": 1.6146247415947113e-08, + "loss": 0.2918, + "step": 41195 + }, + { + "epoch": 1.929826205087366, + "grad_norm": 0.6087600978199104, + "learning_rate": 1.612473509017942e-08, + "loss": 0.2697, + "step": 41196 + }, + { + "epoch": 1.9298730500772943, + "grad_norm": 0.6032940932589449, + "learning_rate": 1.6103237058489873e-08, + "loss": 0.2591, + "step": 41197 + }, + { + "epoch": 1.9299198950672225, + "grad_norm": 0.6108362708048469, + "learning_rate": 1.6081753321001703e-08, + "loss": 0.2535, + "step": 41198 + }, + { + "epoch": 1.929966740057151, + "grad_norm": 0.5758223140706787, + "learning_rate": 1.60602838778387e-08, + "loss": 0.264, + "step": 41199 + }, + { + "epoch": 1.9300135850470792, + "grad_norm": 0.5966012502923838, + "learning_rate": 1.6038828729124654e-08, + "loss": 0.2666, + "step": 41200 + }, + { + "epoch": 1.9300604300370074, + "grad_norm": 0.6026977372705636, + "learning_rate": 1.6017387874982528e-08, + "loss": 0.2548, + "step": 41201 + }, + { + "epoch": 1.930107275026936, + "grad_norm": 0.5974980867742695, + "learning_rate": 1.599596131553638e-08, + "loss": 0.2647, + "step": 41202 + }, + { + "epoch": 1.9301541200168641, + "grad_norm": 0.6126807153858692, + "learning_rate": 1.5974549050908895e-08, + "loss": 0.2813, + "step": 41203 + }, + { + "epoch": 1.9302009650067924, + "grad_norm": 0.5858794007988908, + "learning_rate": 1.5953151081223305e-08, + "loss": 0.2549, + "step": 41204 + }, + { + "epoch": 1.9302478099967209, + "grad_norm": 0.5658030689084109, + "learning_rate": 1.5931767406603405e-08, + "loss": 0.2637, + "step": 41205 + }, + { + "epoch": 1.9302946549866493, + "grad_norm": 0.6348763576134868, + "learning_rate": 1.5910398027171592e-08, + "loss": 0.2858, + "step": 41206 + }, + { + "epoch": 1.9303414999765776, + "grad_norm": 0.5683771558159049, + "learning_rate": 1.588904294305138e-08, + "loss": 0.2601, + "step": 41207 + }, + { + "epoch": 1.9303883449665058, + "grad_norm": 0.5899654866625245, + "learning_rate": 1.586770215436517e-08, + "loss": 0.261, + "step": 41208 + }, + { + "epoch": 1.9304351899564343, + "grad_norm": 0.5669187067922428, + "learning_rate": 1.5846375661235925e-08, + "loss": 0.268, + "step": 41209 + }, + { + "epoch": 1.9304820349463625, + "grad_norm": 0.5823641548253157, + "learning_rate": 1.5825063463786317e-08, + "loss": 0.2663, + "step": 41210 + }, + { + "epoch": 1.9305288799362907, + "grad_norm": 0.617803130091256, + "learning_rate": 1.5803765562139307e-08, + "loss": 0.288, + "step": 41211 + }, + { + "epoch": 1.9305757249262192, + "grad_norm": 0.573880680740867, + "learning_rate": 1.5782481956417296e-08, + "loss": 0.2611, + "step": 41212 + }, + { + "epoch": 1.9306225699161474, + "grad_norm": 0.6061056775624122, + "learning_rate": 1.5761212646742686e-08, + "loss": 0.2627, + "step": 41213 + }, + { + "epoch": 1.9306694149060757, + "grad_norm": 0.5499472424426487, + "learning_rate": 1.5739957633237878e-08, + "loss": 0.2493, + "step": 41214 + }, + { + "epoch": 1.9307162598960041, + "grad_norm": 0.5863829724922386, + "learning_rate": 1.5718716916025e-08, + "loss": 0.2564, + "step": 41215 + }, + { + "epoch": 1.9307631048859326, + "grad_norm": 0.5648938477873362, + "learning_rate": 1.5697490495226732e-08, + "loss": 0.2501, + "step": 41216 + }, + { + "epoch": 1.9308099498758606, + "grad_norm": 0.5945025782361261, + "learning_rate": 1.5676278370964915e-08, + "loss": 0.2709, + "step": 41217 + }, + { + "epoch": 1.930856794865789, + "grad_norm": 0.5926985179687323, + "learning_rate": 1.565508054336168e-08, + "loss": 0.2529, + "step": 41218 + }, + { + "epoch": 1.9309036398557176, + "grad_norm": 0.5779613644039676, + "learning_rate": 1.5633897012539145e-08, + "loss": 0.2689, + "step": 41219 + }, + { + "epoch": 1.9309504848456458, + "grad_norm": 0.5790609232181422, + "learning_rate": 1.5612727778619163e-08, + "loss": 0.2693, + "step": 41220 + }, + { + "epoch": 1.930997329835574, + "grad_norm": 0.6063048399278519, + "learning_rate": 1.5591572841723302e-08, + "loss": 0.2764, + "step": 41221 + }, + { + "epoch": 1.9310441748255025, + "grad_norm": 0.6413571452814164, + "learning_rate": 1.5570432201973685e-08, + "loss": 0.2766, + "step": 41222 + }, + { + "epoch": 1.9310910198154307, + "grad_norm": 0.5677642746018804, + "learning_rate": 1.5549305859491605e-08, + "loss": 0.2466, + "step": 41223 + }, + { + "epoch": 1.931137864805359, + "grad_norm": 0.5967046620038992, + "learning_rate": 1.5528193814399184e-08, + "loss": 0.2695, + "step": 41224 + }, + { + "epoch": 1.9311847097952874, + "grad_norm": 0.5929352832247119, + "learning_rate": 1.5507096066817164e-08, + "loss": 0.2754, + "step": 41225 + }, + { + "epoch": 1.9312315547852157, + "grad_norm": 0.5843063120938866, + "learning_rate": 1.5486012616867385e-08, + "loss": 0.2731, + "step": 41226 + }, + { + "epoch": 1.931278399775144, + "grad_norm": 0.6652952563544451, + "learning_rate": 1.5464943464671424e-08, + "loss": 0.2839, + "step": 41227 + }, + { + "epoch": 1.9313252447650724, + "grad_norm": 0.6076282182791501, + "learning_rate": 1.544388861035001e-08, + "loss": 0.2682, + "step": 41228 + }, + { + "epoch": 1.9313720897550009, + "grad_norm": 0.6764509579257216, + "learning_rate": 1.542284805402472e-08, + "loss": 0.2928, + "step": 41229 + }, + { + "epoch": 1.9314189347449289, + "grad_norm": 0.5594985759148218, + "learning_rate": 1.5401821795816287e-08, + "loss": 0.2599, + "step": 41230 + }, + { + "epoch": 1.9314657797348573, + "grad_norm": 0.6145902925109208, + "learning_rate": 1.5380809835846e-08, + "loss": 0.2626, + "step": 41231 + }, + { + "epoch": 1.9315126247247858, + "grad_norm": 0.6221226143071734, + "learning_rate": 1.53598121742346e-08, + "loss": 0.2701, + "step": 41232 + }, + { + "epoch": 1.931559469714714, + "grad_norm": 0.5817490281707222, + "learning_rate": 1.53388288111031e-08, + "loss": 0.2711, + "step": 41233 + }, + { + "epoch": 1.9316063147046423, + "grad_norm": 0.5661606401053425, + "learning_rate": 1.531785974657196e-08, + "loss": 0.2577, + "step": 41234 + }, + { + "epoch": 1.9316531596945707, + "grad_norm": 0.5957810230137338, + "learning_rate": 1.5296904980762195e-08, + "loss": 0.2873, + "step": 41235 + }, + { + "epoch": 1.931700004684499, + "grad_norm": 0.5958198349676398, + "learning_rate": 1.527596451379426e-08, + "loss": 0.2607, + "step": 41236 + }, + { + "epoch": 1.9317468496744272, + "grad_norm": 0.6118643681156564, + "learning_rate": 1.525503834578862e-08, + "loss": 0.2812, + "step": 41237 + }, + { + "epoch": 1.9317936946643557, + "grad_norm": 0.601043023361597, + "learning_rate": 1.523412647686545e-08, + "loss": 0.2845, + "step": 41238 + }, + { + "epoch": 1.931840539654284, + "grad_norm": 0.578369141566432, + "learning_rate": 1.5213228907145494e-08, + "loss": 0.2677, + "step": 41239 + }, + { + "epoch": 1.9318873846442122, + "grad_norm": 0.5993120085839669, + "learning_rate": 1.5192345636748927e-08, + "loss": 0.2762, + "step": 41240 + }, + { + "epoch": 1.9319342296341406, + "grad_norm": 0.6250524821484791, + "learning_rate": 1.5171476665795938e-08, + "loss": 0.2732, + "step": 41241 + }, + { + "epoch": 1.931981074624069, + "grad_norm": 0.571365164862776, + "learning_rate": 1.51506219944067e-08, + "loss": 0.2744, + "step": 41242 + }, + { + "epoch": 1.9320279196139973, + "grad_norm": 0.6595553537136182, + "learning_rate": 1.5129781622700844e-08, + "loss": 0.2916, + "step": 41243 + }, + { + "epoch": 1.9320747646039256, + "grad_norm": 0.5690961949157879, + "learning_rate": 1.5108955550798833e-08, + "loss": 0.2663, + "step": 41244 + }, + { + "epoch": 1.932121609593854, + "grad_norm": 0.6113164563705422, + "learning_rate": 1.508814377881973e-08, + "loss": 0.2804, + "step": 41245 + }, + { + "epoch": 1.9321684545837823, + "grad_norm": 0.6337021637281199, + "learning_rate": 1.5067346306884278e-08, + "loss": 0.2798, + "step": 41246 + }, + { + "epoch": 1.9322152995737105, + "grad_norm": 0.6187355737344158, + "learning_rate": 1.504656313511127e-08, + "loss": 0.2818, + "step": 41247 + }, + { + "epoch": 1.932262144563639, + "grad_norm": 0.6042237879658341, + "learning_rate": 1.5025794263621163e-08, + "loss": 0.2663, + "step": 41248 + }, + { + "epoch": 1.9323089895535672, + "grad_norm": 0.618885114739099, + "learning_rate": 1.500503969253275e-08, + "loss": 0.2675, + "step": 41249 + }, + { + "epoch": 1.9323558345434955, + "grad_norm": 0.6027079788170097, + "learning_rate": 1.4984299421965943e-08, + "loss": 0.2704, + "step": 41250 + }, + { + "epoch": 1.932402679533424, + "grad_norm": 0.6029637315076951, + "learning_rate": 1.4963573452039802e-08, + "loss": 0.269, + "step": 41251 + }, + { + "epoch": 1.9324495245233524, + "grad_norm": 0.6323704141984107, + "learning_rate": 1.4942861782873407e-08, + "loss": 0.2508, + "step": 41252 + }, + { + "epoch": 1.9324963695132804, + "grad_norm": 0.5939590471987491, + "learning_rate": 1.492216441458666e-08, + "loss": 0.2691, + "step": 41253 + }, + { + "epoch": 1.9325432145032089, + "grad_norm": 0.557858897924052, + "learning_rate": 1.4901481347298073e-08, + "loss": 0.263, + "step": 41254 + }, + { + "epoch": 1.9325900594931373, + "grad_norm": 0.589185421501096, + "learning_rate": 1.4880812581127002e-08, + "loss": 0.2676, + "step": 41255 + }, + { + "epoch": 1.9326369044830656, + "grad_norm": 0.5958134266871847, + "learning_rate": 1.4860158116192235e-08, + "loss": 0.2813, + "step": 41256 + }, + { + "epoch": 1.9326837494729938, + "grad_norm": 0.6419750048473497, + "learning_rate": 1.4839517952612569e-08, + "loss": 0.284, + "step": 41257 + }, + { + "epoch": 1.9327305944629223, + "grad_norm": 0.562356395592547, + "learning_rate": 1.4818892090506798e-08, + "loss": 0.2589, + "step": 41258 + }, + { + "epoch": 1.9327774394528505, + "grad_norm": 0.6345530657023947, + "learning_rate": 1.4798280529993714e-08, + "loss": 0.2678, + "step": 41259 + }, + { + "epoch": 1.9328242844427788, + "grad_norm": 0.6161661162772512, + "learning_rate": 1.4777683271191834e-08, + "loss": 0.289, + "step": 41260 + }, + { + "epoch": 1.9328711294327072, + "grad_norm": 0.5571018470893161, + "learning_rate": 1.4757100314219953e-08, + "loss": 0.2531, + "step": 41261 + }, + { + "epoch": 1.9329179744226355, + "grad_norm": 0.579966616125706, + "learning_rate": 1.473653165919603e-08, + "loss": 0.2605, + "step": 41262 + }, + { + "epoch": 1.9329648194125637, + "grad_norm": 0.5654070087349817, + "learning_rate": 1.471597730623886e-08, + "loss": 0.2532, + "step": 41263 + }, + { + "epoch": 1.9330116644024922, + "grad_norm": 0.624877787745103, + "learning_rate": 1.4695437255466683e-08, + "loss": 0.2738, + "step": 41264 + }, + { + "epoch": 1.9330585093924206, + "grad_norm": 0.5859285172569729, + "learning_rate": 1.4674911506997457e-08, + "loss": 0.2626, + "step": 41265 + }, + { + "epoch": 1.9331053543823487, + "grad_norm": 0.5819120237141749, + "learning_rate": 1.4654400060949426e-08, + "loss": 0.2552, + "step": 41266 + }, + { + "epoch": 1.9331521993722771, + "grad_norm": 0.5929886341421481, + "learning_rate": 1.4633902917440823e-08, + "loss": 0.2601, + "step": 41267 + }, + { + "epoch": 1.9331990443622056, + "grad_norm": 0.5840108303886431, + "learning_rate": 1.4613420076589336e-08, + "loss": 0.266, + "step": 41268 + }, + { + "epoch": 1.9332458893521338, + "grad_norm": 0.5930981474388652, + "learning_rate": 1.4592951538512922e-08, + "loss": 0.2701, + "step": 41269 + }, + { + "epoch": 1.933292734342062, + "grad_norm": 0.5834889349814536, + "learning_rate": 1.4572497303329547e-08, + "loss": 0.2847, + "step": 41270 + }, + { + "epoch": 1.9333395793319905, + "grad_norm": 0.6094974247341077, + "learning_rate": 1.4552057371156614e-08, + "loss": 0.2814, + "step": 41271 + }, + { + "epoch": 1.9333864243219188, + "grad_norm": 0.5998917307464505, + "learning_rate": 1.4531631742111807e-08, + "loss": 0.2695, + "step": 41272 + }, + { + "epoch": 1.933433269311847, + "grad_norm": 0.6170378719157993, + "learning_rate": 1.4511220416313088e-08, + "loss": 0.2629, + "step": 41273 + }, + { + "epoch": 1.9334801143017755, + "grad_norm": 0.6288309414428954, + "learning_rate": 1.4490823393877307e-08, + "loss": 0.2562, + "step": 41274 + }, + { + "epoch": 1.9335269592917037, + "grad_norm": 0.599588504783558, + "learning_rate": 1.4470440674922148e-08, + "loss": 0.2642, + "step": 41275 + }, + { + "epoch": 1.933573804281632, + "grad_norm": 0.639546091827621, + "learning_rate": 1.445007225956474e-08, + "loss": 0.2724, + "step": 41276 + }, + { + "epoch": 1.9336206492715604, + "grad_norm": 0.6118356142909832, + "learning_rate": 1.4429718147922767e-08, + "loss": 0.267, + "step": 41277 + }, + { + "epoch": 1.9336674942614889, + "grad_norm": 0.5842329785560096, + "learning_rate": 1.4409378340113078e-08, + "loss": 0.2571, + "step": 41278 + }, + { + "epoch": 1.9337143392514171, + "grad_norm": 0.6134937162873177, + "learning_rate": 1.4389052836252526e-08, + "loss": 0.2808, + "step": 41279 + }, + { + "epoch": 1.9337611842413454, + "grad_norm": 0.5699597300251195, + "learning_rate": 1.4368741636458238e-08, + "loss": 0.2639, + "step": 41280 + }, + { + "epoch": 1.9338080292312738, + "grad_norm": 0.6118465233559531, + "learning_rate": 1.4348444740847067e-08, + "loss": 0.2816, + "step": 41281 + }, + { + "epoch": 1.933854874221202, + "grad_norm": 0.5902682360822127, + "learning_rate": 1.4328162149535863e-08, + "loss": 0.2774, + "step": 41282 + }, + { + "epoch": 1.9339017192111303, + "grad_norm": 0.5967315295213735, + "learning_rate": 1.4307893862641197e-08, + "loss": 0.2588, + "step": 41283 + }, + { + "epoch": 1.9339485642010588, + "grad_norm": 0.5780772568989351, + "learning_rate": 1.4287639880280202e-08, + "loss": 0.267, + "step": 41284 + }, + { + "epoch": 1.933995409190987, + "grad_norm": 0.5777016813899538, + "learning_rate": 1.4267400202568894e-08, + "loss": 0.269, + "step": 41285 + }, + { + "epoch": 1.9340422541809152, + "grad_norm": 0.648628577797196, + "learning_rate": 1.4247174829623844e-08, + "loss": 0.2786, + "step": 41286 + }, + { + "epoch": 1.9340890991708437, + "grad_norm": 0.5965159901346441, + "learning_rate": 1.4226963761561629e-08, + "loss": 0.2657, + "step": 41287 + }, + { + "epoch": 1.9341359441607722, + "grad_norm": 0.5977526472510416, + "learning_rate": 1.4206766998498544e-08, + "loss": 0.2751, + "step": 41288 + }, + { + "epoch": 1.9341827891507002, + "grad_norm": 0.6014803025973564, + "learning_rate": 1.4186584540550607e-08, + "loss": 0.2544, + "step": 41289 + }, + { + "epoch": 1.9342296341406287, + "grad_norm": 0.6492525797071905, + "learning_rate": 1.4166416387834115e-08, + "loss": 0.2834, + "step": 41290 + }, + { + "epoch": 1.9342764791305571, + "grad_norm": 0.5622829878163498, + "learning_rate": 1.4146262540465084e-08, + "loss": 0.2689, + "step": 41291 + }, + { + "epoch": 1.9343233241204854, + "grad_norm": 0.599694917440185, + "learning_rate": 1.412612299855981e-08, + "loss": 0.2605, + "step": 41292 + }, + { + "epoch": 1.9343701691104136, + "grad_norm": 0.6027724506980763, + "learning_rate": 1.4105997762233481e-08, + "loss": 0.2656, + "step": 41293 + }, + { + "epoch": 1.934417014100342, + "grad_norm": 0.5957152724433652, + "learning_rate": 1.408588683160239e-08, + "loss": 0.2844, + "step": 41294 + }, + { + "epoch": 1.9344638590902703, + "grad_norm": 0.6047085068762023, + "learning_rate": 1.4065790206782281e-08, + "loss": 0.2665, + "step": 41295 + }, + { + "epoch": 1.9345107040801985, + "grad_norm": 0.5946115070617253, + "learning_rate": 1.4045707887888615e-08, + "loss": 0.2575, + "step": 41296 + }, + { + "epoch": 1.934557549070127, + "grad_norm": 0.5777778575888324, + "learning_rate": 1.4025639875037412e-08, + "loss": 0.2457, + "step": 41297 + }, + { + "epoch": 1.9346043940600552, + "grad_norm": 0.6177391161763498, + "learning_rate": 1.4005586168343577e-08, + "loss": 0.273, + "step": 41298 + }, + { + "epoch": 1.9346512390499835, + "grad_norm": 0.6200586407728379, + "learning_rate": 1.3985546767922852e-08, + "loss": 0.2839, + "step": 41299 + }, + { + "epoch": 1.934698084039912, + "grad_norm": 0.570223064164053, + "learning_rate": 1.3965521673890425e-08, + "loss": 0.2519, + "step": 41300 + }, + { + "epoch": 1.9347449290298404, + "grad_norm": 0.6043282837727377, + "learning_rate": 1.3945510886361758e-08, + "loss": 0.2779, + "step": 41301 + }, + { + "epoch": 1.9347917740197684, + "grad_norm": 0.5877072821633544, + "learning_rate": 1.3925514405451478e-08, + "loss": 0.2806, + "step": 41302 + }, + { + "epoch": 1.934838619009697, + "grad_norm": 0.6082627391623032, + "learning_rate": 1.3905532231275332e-08, + "loss": 0.2869, + "step": 41303 + }, + { + "epoch": 1.9348854639996254, + "grad_norm": 0.5900256684210475, + "learning_rate": 1.3885564363947668e-08, + "loss": 0.2706, + "step": 41304 + }, + { + "epoch": 1.9349323089895536, + "grad_norm": 0.5767146321944463, + "learning_rate": 1.3865610803584228e-08, + "loss": 0.253, + "step": 41305 + }, + { + "epoch": 1.9349791539794818, + "grad_norm": 0.5517590215146139, + "learning_rate": 1.3845671550298811e-08, + "loss": 0.2601, + "step": 41306 + }, + { + "epoch": 1.9350259989694103, + "grad_norm": 0.5751219447942814, + "learning_rate": 1.382574660420688e-08, + "loss": 0.2673, + "step": 41307 + }, + { + "epoch": 1.9350728439593385, + "grad_norm": 0.6228140219659251, + "learning_rate": 1.3805835965422787e-08, + "loss": 0.2701, + "step": 41308 + }, + { + "epoch": 1.9351196889492668, + "grad_norm": 0.5545023269712129, + "learning_rate": 1.378593963406144e-08, + "loss": 0.2575, + "step": 41309 + }, + { + "epoch": 1.9351665339391952, + "grad_norm": 0.6233053050970587, + "learning_rate": 1.3766057610236915e-08, + "loss": 0.2826, + "step": 41310 + }, + { + "epoch": 1.9352133789291235, + "grad_norm": 0.599852323194331, + "learning_rate": 1.3746189894064121e-08, + "loss": 0.2633, + "step": 41311 + }, + { + "epoch": 1.9352602239190517, + "grad_norm": 0.5949923075215325, + "learning_rate": 1.3726336485656855e-08, + "loss": 0.2611, + "step": 41312 + }, + { + "epoch": 1.9353070689089802, + "grad_norm": 0.5976273729572379, + "learning_rate": 1.3706497385129746e-08, + "loss": 0.2707, + "step": 41313 + }, + { + "epoch": 1.9353539138989087, + "grad_norm": 0.637414210384351, + "learning_rate": 1.3686672592596873e-08, + "loss": 0.3007, + "step": 41314 + }, + { + "epoch": 1.935400758888837, + "grad_norm": 0.6123727753681399, + "learning_rate": 1.3666862108172308e-08, + "loss": 0.2704, + "step": 41315 + }, + { + "epoch": 1.9354476038787651, + "grad_norm": 0.6434253685849217, + "learning_rate": 1.364706593196985e-08, + "loss": 0.2848, + "step": 41316 + }, + { + "epoch": 1.9354944488686936, + "grad_norm": 0.6188190795089419, + "learning_rate": 1.3627284064103574e-08, + "loss": 0.2868, + "step": 41317 + }, + { + "epoch": 1.9355412938586218, + "grad_norm": 0.6001478853493931, + "learning_rate": 1.3607516504687556e-08, + "loss": 0.2657, + "step": 41318 + }, + { + "epoch": 1.93558813884855, + "grad_norm": 0.561459753196544, + "learning_rate": 1.3587763253835318e-08, + "loss": 0.2603, + "step": 41319 + }, + { + "epoch": 1.9356349838384785, + "grad_norm": 0.5863398193545426, + "learning_rate": 1.3568024311660377e-08, + "loss": 0.2751, + "step": 41320 + }, + { + "epoch": 1.9356818288284068, + "grad_norm": 0.6162686197286902, + "learning_rate": 1.354829967827681e-08, + "loss": 0.2672, + "step": 41321 + }, + { + "epoch": 1.935728673818335, + "grad_norm": 0.600158759140803, + "learning_rate": 1.3528589353797583e-08, + "loss": 0.2663, + "step": 41322 + }, + { + "epoch": 1.9357755188082635, + "grad_norm": 0.5933888601091302, + "learning_rate": 1.3508893338336492e-08, + "loss": 0.2687, + "step": 41323 + }, + { + "epoch": 1.935822363798192, + "grad_norm": 0.6243429369529473, + "learning_rate": 1.3489211632006505e-08, + "loss": 0.276, + "step": 41324 + }, + { + "epoch": 1.93586920878812, + "grad_norm": 0.6422536099903544, + "learning_rate": 1.3469544234921416e-08, + "loss": 0.2691, + "step": 41325 + }, + { + "epoch": 1.9359160537780484, + "grad_norm": 0.5953407675672127, + "learning_rate": 1.3449891147194194e-08, + "loss": 0.2737, + "step": 41326 + }, + { + "epoch": 1.935962898767977, + "grad_norm": 0.6040990464373681, + "learning_rate": 1.3430252368937803e-08, + "loss": 0.2646, + "step": 41327 + }, + { + "epoch": 1.9360097437579051, + "grad_norm": 0.6075469752470704, + "learning_rate": 1.3410627900265205e-08, + "loss": 0.2674, + "step": 41328 + }, + { + "epoch": 1.9360565887478334, + "grad_norm": 0.5828940558984809, + "learning_rate": 1.3391017741289646e-08, + "loss": 0.2617, + "step": 41329 + }, + { + "epoch": 1.9361034337377618, + "grad_norm": 0.5751203363475101, + "learning_rate": 1.3371421892123814e-08, + "loss": 0.2641, + "step": 41330 + }, + { + "epoch": 1.93615027872769, + "grad_norm": 0.6304427548789633, + "learning_rate": 1.335184035288012e-08, + "loss": 0.2785, + "step": 41331 + }, + { + "epoch": 1.9361971237176183, + "grad_norm": 0.611755224141276, + "learning_rate": 1.3332273123671802e-08, + "loss": 0.2733, + "step": 41332 + }, + { + "epoch": 1.9362439687075468, + "grad_norm": 0.5585486528235851, + "learning_rate": 1.3312720204611274e-08, + "loss": 0.2512, + "step": 41333 + }, + { + "epoch": 1.936290813697475, + "grad_norm": 0.5768051171072845, + "learning_rate": 1.3293181595811222e-08, + "loss": 0.2779, + "step": 41334 + }, + { + "epoch": 1.9363376586874033, + "grad_norm": 0.6011118520144818, + "learning_rate": 1.327365729738378e-08, + "loss": 0.2854, + "step": 41335 + }, + { + "epoch": 1.9363845036773317, + "grad_norm": 0.575799583035925, + "learning_rate": 1.3254147309441357e-08, + "loss": 0.2685, + "step": 41336 + }, + { + "epoch": 1.9364313486672602, + "grad_norm": 0.5885542509964875, + "learning_rate": 1.3234651632096086e-08, + "loss": 0.2624, + "step": 41337 + }, + { + "epoch": 1.9364781936571882, + "grad_norm": 0.5801119189560691, + "learning_rate": 1.3215170265460653e-08, + "loss": 0.27, + "step": 41338 + }, + { + "epoch": 1.9365250386471167, + "grad_norm": 0.5895609514811461, + "learning_rate": 1.3195703209646915e-08, + "loss": 0.2736, + "step": 41339 + }, + { + "epoch": 1.9365718836370451, + "grad_norm": 0.6248188841449763, + "learning_rate": 1.3176250464766727e-08, + "loss": 0.2634, + "step": 41340 + }, + { + "epoch": 1.9366187286269734, + "grad_norm": 0.5565474179570029, + "learning_rate": 1.315681203093222e-08, + "loss": 0.2531, + "step": 41341 + }, + { + "epoch": 1.9366655736169016, + "grad_norm": 0.5627494121231758, + "learning_rate": 1.3137387908255251e-08, + "loss": 0.2486, + "step": 41342 + }, + { + "epoch": 1.93671241860683, + "grad_norm": 0.5937223555500093, + "learning_rate": 1.3117978096847395e-08, + "loss": 0.2691, + "step": 41343 + }, + { + "epoch": 1.9367592635967583, + "grad_norm": 0.5524030509408968, + "learning_rate": 1.3098582596820786e-08, + "loss": 0.2567, + "step": 41344 + }, + { + "epoch": 1.9368061085866866, + "grad_norm": 0.5815591498904742, + "learning_rate": 1.3079201408286446e-08, + "loss": 0.2687, + "step": 41345 + }, + { + "epoch": 1.936852953576615, + "grad_norm": 0.6150999802981346, + "learning_rate": 1.3059834531356507e-08, + "loss": 0.2648, + "step": 41346 + }, + { + "epoch": 1.9368997985665433, + "grad_norm": 0.5812050169194136, + "learning_rate": 1.3040481966141993e-08, + "loss": 0.2644, + "step": 41347 + }, + { + "epoch": 1.9369466435564715, + "grad_norm": 0.6053729519601703, + "learning_rate": 1.3021143712754759e-08, + "loss": 0.2861, + "step": 41348 + }, + { + "epoch": 1.9369934885464, + "grad_norm": 0.597834675810876, + "learning_rate": 1.3001819771305546e-08, + "loss": 0.2645, + "step": 41349 + }, + { + "epoch": 1.9370403335363284, + "grad_norm": 0.6109084401312156, + "learning_rate": 1.2982510141905657e-08, + "loss": 0.2714, + "step": 41350 + }, + { + "epoch": 1.9370871785262567, + "grad_norm": 0.5653001389868214, + "learning_rate": 1.2963214824666393e-08, + "loss": 0.2535, + "step": 41351 + }, + { + "epoch": 1.937134023516185, + "grad_norm": 0.5985028233120001, + "learning_rate": 1.2943933819698772e-08, + "loss": 0.2775, + "step": 41352 + }, + { + "epoch": 1.9371808685061134, + "grad_norm": 0.57824454722799, + "learning_rate": 1.2924667127113822e-08, + "loss": 0.2664, + "step": 41353 + }, + { + "epoch": 1.9372277134960416, + "grad_norm": 0.6193485261868877, + "learning_rate": 1.2905414747022282e-08, + "loss": 0.2746, + "step": 41354 + }, + { + "epoch": 1.9372745584859699, + "grad_norm": 0.5998137231981232, + "learning_rate": 1.2886176679534624e-08, + "loss": 0.279, + "step": 41355 + }, + { + "epoch": 1.9373214034758983, + "grad_norm": 0.609159140317301, + "learning_rate": 1.2866952924762143e-08, + "loss": 0.2591, + "step": 41356 + }, + { + "epoch": 1.9373682484658266, + "grad_norm": 0.5821639999739511, + "learning_rate": 1.284774348281531e-08, + "loss": 0.252, + "step": 41357 + }, + { + "epoch": 1.9374150934557548, + "grad_norm": 0.5739080982382263, + "learning_rate": 1.2828548353804593e-08, + "loss": 0.2696, + "step": 41358 + }, + { + "epoch": 1.9374619384456833, + "grad_norm": 0.6246770956042573, + "learning_rate": 1.2809367537840456e-08, + "loss": 0.2799, + "step": 41359 + }, + { + "epoch": 1.9375087834356117, + "grad_norm": 0.5667364593085369, + "learning_rate": 1.279020103503309e-08, + "loss": 0.2595, + "step": 41360 + }, + { + "epoch": 1.9375556284255397, + "grad_norm": 0.6210797838904168, + "learning_rate": 1.277104884549324e-08, + "loss": 0.2683, + "step": 41361 + }, + { + "epoch": 1.9376024734154682, + "grad_norm": 0.6438030320913574, + "learning_rate": 1.2751910969330817e-08, + "loss": 0.2777, + "step": 41362 + }, + { + "epoch": 1.9376493184053967, + "grad_norm": 0.6319807337618353, + "learning_rate": 1.2732787406655734e-08, + "loss": 0.2789, + "step": 41363 + }, + { + "epoch": 1.937696163395325, + "grad_norm": 0.5883407742019544, + "learning_rate": 1.2713678157578734e-08, + "loss": 0.2658, + "step": 41364 + }, + { + "epoch": 1.9377430083852532, + "grad_norm": 0.6258644582470424, + "learning_rate": 1.2694583222208901e-08, + "loss": 0.2824, + "step": 41365 + }, + { + "epoch": 1.9377898533751816, + "grad_norm": 0.5884501913729854, + "learning_rate": 1.2675502600656974e-08, + "loss": 0.2759, + "step": 41366 + }, + { + "epoch": 1.9378366983651099, + "grad_norm": 0.5532327140666095, + "learning_rate": 1.2656436293032037e-08, + "loss": 0.2464, + "step": 41367 + }, + { + "epoch": 1.937883543355038, + "grad_norm": 0.5474203484224683, + "learning_rate": 1.2637384299444278e-08, + "loss": 0.2474, + "step": 41368 + }, + { + "epoch": 1.9379303883449666, + "grad_norm": 0.604397208086283, + "learning_rate": 1.261834662000333e-08, + "loss": 0.2726, + "step": 41369 + }, + { + "epoch": 1.9379772333348948, + "grad_norm": 0.5775253870279476, + "learning_rate": 1.2599323254818553e-08, + "loss": 0.2546, + "step": 41370 + }, + { + "epoch": 1.938024078324823, + "grad_norm": 0.6006165767297663, + "learning_rate": 1.25803142039993e-08, + "loss": 0.2619, + "step": 41371 + }, + { + "epoch": 1.9380709233147515, + "grad_norm": 0.5694050362283694, + "learning_rate": 1.2561319467655208e-08, + "loss": 0.2709, + "step": 41372 + }, + { + "epoch": 1.93811776830468, + "grad_norm": 0.5735073777394292, + "learning_rate": 1.2542339045895634e-08, + "loss": 0.2679, + "step": 41373 + }, + { + "epoch": 1.938164613294608, + "grad_norm": 0.5825085932851506, + "learning_rate": 1.2523372938829658e-08, + "loss": 0.2725, + "step": 41374 + }, + { + "epoch": 1.9382114582845364, + "grad_norm": 0.6074987384436669, + "learning_rate": 1.2504421146566636e-08, + "loss": 0.2898, + "step": 41375 + }, + { + "epoch": 1.938258303274465, + "grad_norm": 0.6887347590145048, + "learning_rate": 1.2485483669215371e-08, + "loss": 0.3027, + "step": 41376 + }, + { + "epoch": 1.9383051482643932, + "grad_norm": 0.5708954009730554, + "learning_rate": 1.2466560506884939e-08, + "loss": 0.2473, + "step": 41377 + }, + { + "epoch": 1.9383519932543214, + "grad_norm": 0.6139100021169825, + "learning_rate": 1.2447651659683869e-08, + "loss": 0.2692, + "step": 41378 + }, + { + "epoch": 1.9383988382442499, + "grad_norm": 0.61119174903358, + "learning_rate": 1.242875712772179e-08, + "loss": 0.2765, + "step": 41379 + }, + { + "epoch": 1.938445683234178, + "grad_norm": 0.603141605859602, + "learning_rate": 1.2409876911106677e-08, + "loss": 0.2699, + "step": 41380 + }, + { + "epoch": 1.9384925282241063, + "grad_norm": 0.6153811097650479, + "learning_rate": 1.2391011009947606e-08, + "loss": 0.2764, + "step": 41381 + }, + { + "epoch": 1.9385393732140348, + "grad_norm": 0.5915227853411541, + "learning_rate": 1.2372159424353103e-08, + "loss": 0.2821, + "step": 41382 + }, + { + "epoch": 1.938586218203963, + "grad_norm": 0.5928686322286588, + "learning_rate": 1.235332215443169e-08, + "loss": 0.2677, + "step": 41383 + }, + { + "epoch": 1.9386330631938913, + "grad_norm": 0.5725379647284895, + "learning_rate": 1.2334499200291616e-08, + "loss": 0.2741, + "step": 41384 + }, + { + "epoch": 1.9386799081838197, + "grad_norm": 0.6032673453964179, + "learning_rate": 1.2315690562041127e-08, + "loss": 0.2617, + "step": 41385 + }, + { + "epoch": 1.9387267531737482, + "grad_norm": 0.5888626360911676, + "learning_rate": 1.229689623978847e-08, + "loss": 0.2746, + "step": 41386 + }, + { + "epoch": 1.9387735981636764, + "grad_norm": 0.6422539781872116, + "learning_rate": 1.227811623364189e-08, + "loss": 0.2811, + "step": 41387 + }, + { + "epoch": 1.9388204431536047, + "grad_norm": 0.5740678434952571, + "learning_rate": 1.2259350543709914e-08, + "loss": 0.2503, + "step": 41388 + }, + { + "epoch": 1.9388672881435332, + "grad_norm": 0.5840016175262875, + "learning_rate": 1.2240599170099676e-08, + "loss": 0.264, + "step": 41389 + }, + { + "epoch": 1.9389141331334614, + "grad_norm": 0.5986246676782371, + "learning_rate": 1.2221862112919702e-08, + "loss": 0.2544, + "step": 41390 + }, + { + "epoch": 1.9389609781233896, + "grad_norm": 0.587167270899548, + "learning_rate": 1.2203139372277684e-08, + "loss": 0.2586, + "step": 41391 + }, + { + "epoch": 1.939007823113318, + "grad_norm": 0.5727304064197424, + "learning_rate": 1.2184430948281034e-08, + "loss": 0.2628, + "step": 41392 + }, + { + "epoch": 1.9390546681032463, + "grad_norm": 0.6247494153828991, + "learning_rate": 1.2165736841038e-08, + "loss": 0.2751, + "step": 41393 + }, + { + "epoch": 1.9391015130931746, + "grad_norm": 0.5902950593957649, + "learning_rate": 1.214705705065572e-08, + "loss": 0.2495, + "step": 41394 + }, + { + "epoch": 1.939148358083103, + "grad_norm": 0.5846531320028606, + "learning_rate": 1.2128391577241883e-08, + "loss": 0.2673, + "step": 41395 + }, + { + "epoch": 1.9391952030730315, + "grad_norm": 0.572127407038913, + "learning_rate": 1.2109740420903903e-08, + "loss": 0.2668, + "step": 41396 + }, + { + "epoch": 1.9392420480629595, + "grad_norm": 0.594092002368428, + "learning_rate": 1.209110358174892e-08, + "loss": 0.2707, + "step": 41397 + }, + { + "epoch": 1.939288893052888, + "grad_norm": 0.5745308698265348, + "learning_rate": 1.2072481059884344e-08, + "loss": 0.2556, + "step": 41398 + }, + { + "epoch": 1.9393357380428164, + "grad_norm": 0.556244062878364, + "learning_rate": 1.2053872855417315e-08, + "loss": 0.2615, + "step": 41399 + }, + { + "epoch": 1.9393825830327447, + "grad_norm": 0.5819811855530136, + "learning_rate": 1.2035278968454967e-08, + "loss": 0.2601, + "step": 41400 + }, + { + "epoch": 1.939429428022673, + "grad_norm": 0.6170995971307153, + "learning_rate": 1.2016699399104437e-08, + "loss": 0.253, + "step": 41401 + }, + { + "epoch": 1.9394762730126014, + "grad_norm": 0.593726211163491, + "learning_rate": 1.1998134147472306e-08, + "loss": 0.2714, + "step": 41402 + }, + { + "epoch": 1.9395231180025296, + "grad_norm": 0.5902610940847909, + "learning_rate": 1.1979583213665713e-08, + "loss": 0.2707, + "step": 41403 + }, + { + "epoch": 1.9395699629924579, + "grad_norm": 0.639471969436233, + "learning_rate": 1.196104659779096e-08, + "loss": 0.2891, + "step": 41404 + }, + { + "epoch": 1.9396168079823863, + "grad_norm": 0.5528675123608568, + "learning_rate": 1.1942524299955182e-08, + "loss": 0.2521, + "step": 41405 + }, + { + "epoch": 1.9396636529723146, + "grad_norm": 0.5634073121101821, + "learning_rate": 1.1924016320264964e-08, + "loss": 0.2571, + "step": 41406 + }, + { + "epoch": 1.9397104979622428, + "grad_norm": 0.5913957190000964, + "learning_rate": 1.1905522658826608e-08, + "loss": 0.263, + "step": 41407 + }, + { + "epoch": 1.9397573429521713, + "grad_norm": 0.6368607423747507, + "learning_rate": 1.1887043315746417e-08, + "loss": 0.2643, + "step": 41408 + }, + { + "epoch": 1.9398041879420997, + "grad_norm": 0.5994289897539674, + "learning_rate": 1.1868578291131249e-08, + "loss": 0.2903, + "step": 41409 + }, + { + "epoch": 1.9398510329320278, + "grad_norm": 0.6066318897234645, + "learning_rate": 1.1850127585086856e-08, + "loss": 0.2667, + "step": 41410 + }, + { + "epoch": 1.9398978779219562, + "grad_norm": 0.6047915980709069, + "learning_rate": 1.1831691197719541e-08, + "loss": 0.2738, + "step": 41411 + }, + { + "epoch": 1.9399447229118847, + "grad_norm": 0.5841535215729222, + "learning_rate": 1.1813269129135607e-08, + "loss": 0.2627, + "step": 41412 + }, + { + "epoch": 1.939991567901813, + "grad_norm": 0.6126432607066091, + "learning_rate": 1.1794861379440803e-08, + "loss": 0.2615, + "step": 41413 + }, + { + "epoch": 1.9400384128917412, + "grad_norm": 0.6031015254425449, + "learning_rate": 1.1776467948741432e-08, + "loss": 0.2721, + "step": 41414 + }, + { + "epoch": 1.9400852578816696, + "grad_norm": 0.6115166857962401, + "learning_rate": 1.1758088837142967e-08, + "loss": 0.2795, + "step": 41415 + }, + { + "epoch": 1.9401321028715979, + "grad_norm": 0.6532965075472086, + "learning_rate": 1.1739724044751155e-08, + "loss": 0.2855, + "step": 41416 + }, + { + "epoch": 1.9401789478615261, + "grad_norm": 0.562867214036642, + "learning_rate": 1.1721373571672023e-08, + "loss": 0.261, + "step": 41417 + }, + { + "epoch": 1.9402257928514546, + "grad_norm": 0.5953657346061414, + "learning_rate": 1.1703037418010765e-08, + "loss": 0.2692, + "step": 41418 + }, + { + "epoch": 1.9402726378413828, + "grad_norm": 0.6065873708533948, + "learning_rate": 1.1684715583873408e-08, + "loss": 0.2609, + "step": 41419 + }, + { + "epoch": 1.940319482831311, + "grad_norm": 0.6485068817168416, + "learning_rate": 1.1666408069364866e-08, + "loss": 0.2695, + "step": 41420 + }, + { + "epoch": 1.9403663278212395, + "grad_norm": 0.5410384782472479, + "learning_rate": 1.1648114874590887e-08, + "loss": 0.255, + "step": 41421 + }, + { + "epoch": 1.940413172811168, + "grad_norm": 0.5877893574781218, + "learning_rate": 1.1629835999656392e-08, + "loss": 0.2773, + "step": 41422 + }, + { + "epoch": 1.9404600178010962, + "grad_norm": 0.6057411749949332, + "learning_rate": 1.1611571444666847e-08, + "loss": 0.2734, + "step": 41423 + }, + { + "epoch": 1.9405068627910245, + "grad_norm": 0.5855890746036976, + "learning_rate": 1.159332120972717e-08, + "loss": 0.2652, + "step": 41424 + }, + { + "epoch": 1.940553707780953, + "grad_norm": 0.5795384015945206, + "learning_rate": 1.1575085294942835e-08, + "loss": 0.2716, + "step": 41425 + }, + { + "epoch": 1.9406005527708812, + "grad_norm": 0.5929978204594679, + "learning_rate": 1.1556863700418198e-08, + "loss": 0.258, + "step": 41426 + }, + { + "epoch": 1.9406473977608094, + "grad_norm": 0.6193753924482055, + "learning_rate": 1.1538656426258454e-08, + "loss": 0.2737, + "step": 41427 + }, + { + "epoch": 1.9406942427507379, + "grad_norm": 0.6083300820619991, + "learning_rate": 1.1520463472567967e-08, + "loss": 0.2755, + "step": 41428 + }, + { + "epoch": 1.9407410877406661, + "grad_norm": 0.6515194190155941, + "learning_rate": 1.1502284839452205e-08, + "loss": 0.3044, + "step": 41429 + }, + { + "epoch": 1.9407879327305944, + "grad_norm": 0.5877040928378816, + "learning_rate": 1.1484120527014975e-08, + "loss": 0.2719, + "step": 41430 + }, + { + "epoch": 1.9408347777205228, + "grad_norm": 0.636675264660817, + "learning_rate": 1.146597053536147e-08, + "loss": 0.2707, + "step": 41431 + }, + { + "epoch": 1.9408816227104513, + "grad_norm": 0.6231607179644064, + "learning_rate": 1.1447834864595774e-08, + "loss": 0.2695, + "step": 41432 + }, + { + "epoch": 1.9409284677003793, + "grad_norm": 0.5991310734690727, + "learning_rate": 1.142971351482225e-08, + "loss": 0.2621, + "step": 41433 + }, + { + "epoch": 1.9409753126903078, + "grad_norm": 0.6419131058161834, + "learning_rate": 1.1411606486145254e-08, + "loss": 0.2576, + "step": 41434 + }, + { + "epoch": 1.9410221576802362, + "grad_norm": 0.6416120137590366, + "learning_rate": 1.1393513778668875e-08, + "loss": 0.2681, + "step": 41435 + }, + { + "epoch": 1.9410690026701645, + "grad_norm": 0.6251838668377125, + "learning_rate": 1.137543539249747e-08, + "loss": 0.2834, + "step": 41436 + }, + { + "epoch": 1.9411158476600927, + "grad_norm": 0.6014235343875348, + "learning_rate": 1.1357371327734844e-08, + "loss": 0.2808, + "step": 41437 + }, + { + "epoch": 1.9411626926500212, + "grad_norm": 0.5406246655319777, + "learning_rate": 1.1339321584485086e-08, + "loss": 0.252, + "step": 41438 + }, + { + "epoch": 1.9412095376399494, + "grad_norm": 0.62641296976782, + "learning_rate": 1.1321286162851996e-08, + "loss": 0.2769, + "step": 41439 + }, + { + "epoch": 1.9412563826298777, + "grad_norm": 0.5493728941164323, + "learning_rate": 1.130326506293966e-08, + "loss": 0.2683, + "step": 41440 + }, + { + "epoch": 1.9413032276198061, + "grad_norm": 0.5898526254111278, + "learning_rate": 1.1285258284851053e-08, + "loss": 0.2665, + "step": 41441 + }, + { + "epoch": 1.9413500726097344, + "grad_norm": 0.6177358775762889, + "learning_rate": 1.1267265828690533e-08, + "loss": 0.2505, + "step": 41442 + }, + { + "epoch": 1.9413969175996626, + "grad_norm": 0.6099144921232097, + "learning_rate": 1.1249287694561628e-08, + "loss": 0.2709, + "step": 41443 + }, + { + "epoch": 1.941443762589591, + "grad_norm": 0.5743787645310576, + "learning_rate": 1.1231323882567035e-08, + "loss": 0.2597, + "step": 41444 + }, + { + "epoch": 1.9414906075795195, + "grad_norm": 0.6281196076969783, + "learning_rate": 1.1213374392811117e-08, + "loss": 0.2673, + "step": 41445 + }, + { + "epoch": 1.9415374525694475, + "grad_norm": 0.5706284751060651, + "learning_rate": 1.1195439225396287e-08, + "loss": 0.2644, + "step": 41446 + }, + { + "epoch": 1.941584297559376, + "grad_norm": 0.6404052130936416, + "learning_rate": 1.1177518380426355e-08, + "loss": 0.2656, + "step": 41447 + }, + { + "epoch": 1.9416311425493045, + "grad_norm": 0.5942320130407504, + "learning_rate": 1.1159611858004294e-08, + "loss": 0.281, + "step": 41448 + }, + { + "epoch": 1.9416779875392327, + "grad_norm": 0.5716749138653526, + "learning_rate": 1.1141719658233353e-08, + "loss": 0.2613, + "step": 41449 + }, + { + "epoch": 1.941724832529161, + "grad_norm": 0.6003211323984987, + "learning_rate": 1.112384178121595e-08, + "loss": 0.2741, + "step": 41450 + }, + { + "epoch": 1.9417716775190894, + "grad_norm": 0.5770550316170178, + "learning_rate": 1.1105978227055336e-08, + "loss": 0.2656, + "step": 41451 + }, + { + "epoch": 1.9418185225090177, + "grad_norm": 0.6109080215823305, + "learning_rate": 1.1088128995854208e-08, + "loss": 0.2744, + "step": 41452 + }, + { + "epoch": 1.941865367498946, + "grad_norm": 0.6024583611728667, + "learning_rate": 1.1070294087715261e-08, + "loss": 0.2618, + "step": 41453 + }, + { + "epoch": 1.9419122124888744, + "grad_norm": 0.5797651335804217, + "learning_rate": 1.1052473502741467e-08, + "loss": 0.2538, + "step": 41454 + }, + { + "epoch": 1.9419590574788026, + "grad_norm": 0.551840776081418, + "learning_rate": 1.1034667241034968e-08, + "loss": 0.2741, + "step": 41455 + }, + { + "epoch": 1.9420059024687308, + "grad_norm": 0.604563497947849, + "learning_rate": 1.101687530269846e-08, + "loss": 0.2774, + "step": 41456 + }, + { + "epoch": 1.9420527474586593, + "grad_norm": 0.6110262997760012, + "learning_rate": 1.0999097687834082e-08, + "loss": 0.2769, + "step": 41457 + }, + { + "epoch": 1.9420995924485878, + "grad_norm": 0.6157915818392687, + "learning_rate": 1.0981334396544253e-08, + "loss": 0.2736, + "step": 41458 + }, + { + "epoch": 1.942146437438516, + "grad_norm": 0.5824457093025268, + "learning_rate": 1.0963585428931668e-08, + "loss": 0.2796, + "step": 41459 + }, + { + "epoch": 1.9421932824284442, + "grad_norm": 0.5932380240519255, + "learning_rate": 1.0945850785097633e-08, + "loss": 0.2683, + "step": 41460 + }, + { + "epoch": 1.9422401274183727, + "grad_norm": 0.6066754706402677, + "learning_rate": 1.0928130465144848e-08, + "loss": 0.2745, + "step": 41461 + }, + { + "epoch": 1.942286972408301, + "grad_norm": 0.6094809580742857, + "learning_rate": 1.0910424469175173e-08, + "loss": 0.2648, + "step": 41462 + }, + { + "epoch": 1.9423338173982292, + "grad_norm": 0.6209952302455577, + "learning_rate": 1.0892732797290196e-08, + "loss": 0.2558, + "step": 41463 + }, + { + "epoch": 1.9423806623881577, + "grad_norm": 0.5718129873247831, + "learning_rate": 1.0875055449591777e-08, + "loss": 0.2748, + "step": 41464 + }, + { + "epoch": 1.942427507378086, + "grad_norm": 0.5769829999488085, + "learning_rate": 1.0857392426182057e-08, + "loss": 0.2615, + "step": 41465 + }, + { + "epoch": 1.9424743523680141, + "grad_norm": 0.6153410430027014, + "learning_rate": 1.0839743727162344e-08, + "loss": 0.2784, + "step": 41466 + }, + { + "epoch": 1.9425211973579426, + "grad_norm": 0.5831636977883787, + "learning_rate": 1.0822109352634225e-08, + "loss": 0.2731, + "step": 41467 + }, + { + "epoch": 1.942568042347871, + "grad_norm": 0.5933757409384014, + "learning_rate": 1.0804489302699283e-08, + "loss": 0.2632, + "step": 41468 + }, + { + "epoch": 1.942614887337799, + "grad_norm": 0.6047320973997086, + "learning_rate": 1.0786883577458829e-08, + "loss": 0.2769, + "step": 41469 + }, + { + "epoch": 1.9426617323277275, + "grad_norm": 0.6017198940500853, + "learning_rate": 1.0769292177014445e-08, + "loss": 0.2782, + "step": 41470 + }, + { + "epoch": 1.942708577317656, + "grad_norm": 0.6108582681959153, + "learning_rate": 1.0751715101466887e-08, + "loss": 0.2834, + "step": 41471 + }, + { + "epoch": 1.9427554223075842, + "grad_norm": 0.6071672850152829, + "learning_rate": 1.073415235091746e-08, + "loss": 0.2693, + "step": 41472 + }, + { + "epoch": 1.9428022672975125, + "grad_norm": 0.5742367524097332, + "learning_rate": 1.071660392546775e-08, + "loss": 0.2478, + "step": 41473 + }, + { + "epoch": 1.942849112287441, + "grad_norm": 0.6286657745012056, + "learning_rate": 1.0699069825218233e-08, + "loss": 0.2803, + "step": 41474 + }, + { + "epoch": 1.9428959572773692, + "grad_norm": 0.592074372636775, + "learning_rate": 1.0681550050269662e-08, + "loss": 0.2688, + "step": 41475 + }, + { + "epoch": 1.9429428022672974, + "grad_norm": 0.6230192490464276, + "learning_rate": 1.0664044600723066e-08, + "loss": 0.2712, + "step": 41476 + }, + { + "epoch": 1.942989647257226, + "grad_norm": 0.6684110450655779, + "learning_rate": 1.0646553476679478e-08, + "loss": 0.2875, + "step": 41477 + }, + { + "epoch": 1.9430364922471541, + "grad_norm": 0.5837396861768955, + "learning_rate": 1.0629076678239092e-08, + "loss": 0.2632, + "step": 41478 + }, + { + "epoch": 1.9430833372370824, + "grad_norm": 0.6485359211033697, + "learning_rate": 1.0611614205502662e-08, + "loss": 0.2837, + "step": 41479 + }, + { + "epoch": 1.9431301822270108, + "grad_norm": 0.5808027263354336, + "learning_rate": 1.0594166058570942e-08, + "loss": 0.2617, + "step": 41480 + }, + { + "epoch": 1.9431770272169393, + "grad_norm": 0.6199359468820483, + "learning_rate": 1.057673223754413e-08, + "loss": 0.2807, + "step": 41481 + }, + { + "epoch": 1.9432238722068673, + "grad_norm": 0.5627876881530959, + "learning_rate": 1.0559312742522143e-08, + "loss": 0.2553, + "step": 41482 + }, + { + "epoch": 1.9432707171967958, + "grad_norm": 0.5899032600276446, + "learning_rate": 1.0541907573606015e-08, + "loss": 0.2684, + "step": 41483 + }, + { + "epoch": 1.9433175621867242, + "grad_norm": 0.5834679099564664, + "learning_rate": 1.0524516730895384e-08, + "loss": 0.2711, + "step": 41484 + }, + { + "epoch": 1.9433644071766525, + "grad_norm": 0.5779970266873452, + "learning_rate": 1.0507140214490174e-08, + "loss": 0.2705, + "step": 41485 + }, + { + "epoch": 1.9434112521665807, + "grad_norm": 0.5508077240845161, + "learning_rate": 1.0489778024491137e-08, + "loss": 0.2436, + "step": 41486 + }, + { + "epoch": 1.9434580971565092, + "grad_norm": 0.6234656642317568, + "learning_rate": 1.047243016099736e-08, + "loss": 0.2851, + "step": 41487 + }, + { + "epoch": 1.9435049421464374, + "grad_norm": 0.606167448966346, + "learning_rate": 1.0455096624109318e-08, + "loss": 0.2715, + "step": 41488 + }, + { + "epoch": 1.9435517871363657, + "grad_norm": 0.5613478750336007, + "learning_rate": 1.0437777413926098e-08, + "loss": 0.2531, + "step": 41489 + }, + { + "epoch": 1.9435986321262941, + "grad_norm": 0.6611692828897123, + "learning_rate": 1.0420472530548175e-08, + "loss": 0.2748, + "step": 41490 + }, + { + "epoch": 1.9436454771162224, + "grad_norm": 0.6228959625962915, + "learning_rate": 1.040318197407436e-08, + "loss": 0.2775, + "step": 41491 + }, + { + "epoch": 1.9436923221061506, + "grad_norm": 0.5978624181939818, + "learning_rate": 1.0385905744604574e-08, + "loss": 0.2664, + "step": 41492 + }, + { + "epoch": 1.943739167096079, + "grad_norm": 0.5815600735710438, + "learning_rate": 1.036864384223818e-08, + "loss": 0.2753, + "step": 41493 + }, + { + "epoch": 1.9437860120860075, + "grad_norm": 0.5863255886559691, + "learning_rate": 1.0351396267074542e-08, + "loss": 0.2477, + "step": 41494 + }, + { + "epoch": 1.9438328570759358, + "grad_norm": 0.6290317647849702, + "learning_rate": 1.0334163019213027e-08, + "loss": 0.2831, + "step": 41495 + }, + { + "epoch": 1.943879702065864, + "grad_norm": 0.6321286603989876, + "learning_rate": 1.031694409875217e-08, + "loss": 0.2766, + "step": 41496 + }, + { + "epoch": 1.9439265470557925, + "grad_norm": 0.6297329457108896, + "learning_rate": 1.0299739505791883e-08, + "loss": 0.2779, + "step": 41497 + }, + { + "epoch": 1.9439733920457207, + "grad_norm": 0.5965685616808131, + "learning_rate": 1.0282549240430984e-08, + "loss": 0.2717, + "step": 41498 + }, + { + "epoch": 1.944020237035649, + "grad_norm": 0.6324325830523385, + "learning_rate": 1.0265373302768e-08, + "loss": 0.2561, + "step": 41499 + }, + { + "epoch": 1.9440670820255774, + "grad_norm": 0.5418278041434126, + "learning_rate": 1.0248211692902021e-08, + "loss": 0.2457, + "step": 41500 + }, + { + "epoch": 1.9441139270155057, + "grad_norm": 0.612675747659721, + "learning_rate": 1.0231064410931857e-08, + "loss": 0.2809, + "step": 41501 + }, + { + "epoch": 1.944160772005434, + "grad_norm": 0.6067465435451205, + "learning_rate": 1.0213931456956038e-08, + "loss": 0.2766, + "step": 41502 + }, + { + "epoch": 1.9442076169953624, + "grad_norm": 0.6544583049633654, + "learning_rate": 1.0196812831073377e-08, + "loss": 0.2696, + "step": 41503 + }, + { + "epoch": 1.9442544619852908, + "grad_norm": 0.6477825694209282, + "learning_rate": 1.0179708533382404e-08, + "loss": 0.2955, + "step": 41504 + }, + { + "epoch": 1.9443013069752189, + "grad_norm": 0.6087133030064084, + "learning_rate": 1.0162618563981097e-08, + "loss": 0.2786, + "step": 41505 + }, + { + "epoch": 1.9443481519651473, + "grad_norm": 0.6366341542913827, + "learning_rate": 1.0145542922968266e-08, + "loss": 0.2769, + "step": 41506 + }, + { + "epoch": 1.9443949969550758, + "grad_norm": 0.5784022127492304, + "learning_rate": 1.0128481610442165e-08, + "loss": 0.2607, + "step": 41507 + }, + { + "epoch": 1.944441841945004, + "grad_norm": 0.6082947705568879, + "learning_rate": 1.011143462650077e-08, + "loss": 0.2756, + "step": 41508 + }, + { + "epoch": 1.9444886869349323, + "grad_norm": 0.5898049187244542, + "learning_rate": 1.0094401971242063e-08, + "loss": 0.2742, + "step": 41509 + }, + { + "epoch": 1.9445355319248607, + "grad_norm": 0.594822872472372, + "learning_rate": 1.0077383644764571e-08, + "loss": 0.265, + "step": 41510 + }, + { + "epoch": 1.944582376914789, + "grad_norm": 0.6355102081942401, + "learning_rate": 1.0060379647165718e-08, + "loss": 0.2697, + "step": 41511 + }, + { + "epoch": 1.9446292219047172, + "grad_norm": 0.6233619306008298, + "learning_rate": 1.0043389978543484e-08, + "loss": 0.2773, + "step": 41512 + }, + { + "epoch": 1.9446760668946457, + "grad_norm": 0.5932715482960041, + "learning_rate": 1.002641463899584e-08, + "loss": 0.2749, + "step": 41513 + }, + { + "epoch": 1.944722911884574, + "grad_norm": 0.6302549548836152, + "learning_rate": 1.0009453628620492e-08, + "loss": 0.2648, + "step": 41514 + }, + { + "epoch": 1.9447697568745022, + "grad_norm": 0.7035923880587642, + "learning_rate": 9.992506947514858e-09, + "loss": 0.284, + "step": 41515 + }, + { + "epoch": 1.9448166018644306, + "grad_norm": 0.6087374007859132, + "learning_rate": 9.97557459577636e-09, + "loss": 0.2742, + "step": 41516 + }, + { + "epoch": 1.944863446854359, + "grad_norm": 0.6055933944245796, + "learning_rate": 9.958656573502702e-09, + "loss": 0.2697, + "step": 41517 + }, + { + "epoch": 1.944910291844287, + "grad_norm": 0.5813578270412301, + "learning_rate": 9.941752880791023e-09, + "loss": 0.2615, + "step": 41518 + }, + { + "epoch": 1.9449571368342156, + "grad_norm": 0.5673236987144584, + "learning_rate": 9.92486351773847e-09, + "loss": 0.2406, + "step": 41519 + }, + { + "epoch": 1.945003981824144, + "grad_norm": 0.6142952037708808, + "learning_rate": 9.907988484442742e-09, + "loss": 0.2648, + "step": 41520 + }, + { + "epoch": 1.9450508268140723, + "grad_norm": 0.611505284801608, + "learning_rate": 9.891127781000709e-09, + "loss": 0.2838, + "step": 41521 + }, + { + "epoch": 1.9450976718040005, + "grad_norm": 0.592990490521992, + "learning_rate": 9.874281407509511e-09, + "loss": 0.2659, + "step": 41522 + }, + { + "epoch": 1.945144516793929, + "grad_norm": 0.5780075560337392, + "learning_rate": 9.85744936406574e-09, + "loss": 0.2761, + "step": 41523 + }, + { + "epoch": 1.9451913617838572, + "grad_norm": 0.6387434559642104, + "learning_rate": 9.840631650766541e-09, + "loss": 0.2612, + "step": 41524 + }, + { + "epoch": 1.9452382067737854, + "grad_norm": 0.6511747397149698, + "learning_rate": 9.823828267708502e-09, + "loss": 0.2698, + "step": 41525 + }, + { + "epoch": 1.945285051763714, + "grad_norm": 0.5821295312676453, + "learning_rate": 9.80703921498849e-09, + "loss": 0.2571, + "step": 41526 + }, + { + "epoch": 1.9453318967536422, + "grad_norm": 0.6013063474038667, + "learning_rate": 9.790264492703094e-09, + "loss": 0.2649, + "step": 41527 + }, + { + "epoch": 1.9453787417435704, + "grad_norm": 0.6165458154477355, + "learning_rate": 9.773504100948905e-09, + "loss": 0.285, + "step": 41528 + }, + { + "epoch": 1.9454255867334989, + "grad_norm": 0.5706972971901396, + "learning_rate": 9.756758039822233e-09, + "loss": 0.2753, + "step": 41529 + }, + { + "epoch": 1.9454724317234273, + "grad_norm": 0.6110550111975088, + "learning_rate": 9.74002630941967e-09, + "loss": 0.2791, + "step": 41530 + }, + { + "epoch": 1.9455192767133556, + "grad_norm": 0.5887121726241086, + "learning_rate": 9.723308909837248e-09, + "loss": 0.2585, + "step": 41531 + }, + { + "epoch": 1.9455661217032838, + "grad_norm": 0.6204701574562487, + "learning_rate": 9.70660584117128e-09, + "loss": 0.2858, + "step": 41532 + }, + { + "epoch": 1.9456129666932123, + "grad_norm": 0.5499744130700682, + "learning_rate": 9.689917103517798e-09, + "loss": 0.2547, + "step": 41533 + }, + { + "epoch": 1.9456598116831405, + "grad_norm": 0.6241700242150846, + "learning_rate": 9.673242696973117e-09, + "loss": 0.2827, + "step": 41534 + }, + { + "epoch": 1.9457066566730687, + "grad_norm": 0.5980138778884968, + "learning_rate": 9.656582621632993e-09, + "loss": 0.2746, + "step": 41535 + }, + { + "epoch": 1.9457535016629972, + "grad_norm": 0.5836995701018092, + "learning_rate": 9.639936877593181e-09, + "loss": 0.2527, + "step": 41536 + }, + { + "epoch": 1.9458003466529254, + "grad_norm": 0.6135824751504816, + "learning_rate": 9.623305464949716e-09, + "loss": 0.2686, + "step": 41537 + }, + { + "epoch": 1.9458471916428537, + "grad_norm": 0.5565409998137827, + "learning_rate": 9.606688383798079e-09, + "loss": 0.2645, + "step": 41538 + }, + { + "epoch": 1.9458940366327822, + "grad_norm": 0.6010004598539191, + "learning_rate": 9.590085634234025e-09, + "loss": 0.2841, + "step": 41539 + }, + { + "epoch": 1.9459408816227106, + "grad_norm": 0.5998174484841943, + "learning_rate": 9.573497216353034e-09, + "loss": 0.2661, + "step": 41540 + }, + { + "epoch": 1.9459877266126386, + "grad_norm": 0.5919665449509339, + "learning_rate": 9.556923130250583e-09, + "loss": 0.2666, + "step": 41541 + }, + { + "epoch": 1.946034571602567, + "grad_norm": 0.5944761246194588, + "learning_rate": 9.540363376022155e-09, + "loss": 0.2744, + "step": 41542 + }, + { + "epoch": 1.9460814165924956, + "grad_norm": 0.6009708751438888, + "learning_rate": 9.523817953763226e-09, + "loss": 0.2695, + "step": 41543 + }, + { + "epoch": 1.9461282615824238, + "grad_norm": 0.6381883465216426, + "learning_rate": 9.507286863568444e-09, + "loss": 0.2846, + "step": 41544 + }, + { + "epoch": 1.946175106572352, + "grad_norm": 0.5817114085564468, + "learning_rate": 9.490770105533564e-09, + "loss": 0.2537, + "step": 41545 + }, + { + "epoch": 1.9462219515622805, + "grad_norm": 0.6571459624744335, + "learning_rate": 9.474267679752958e-09, + "loss": 0.2632, + "step": 41546 + }, + { + "epoch": 1.9462687965522087, + "grad_norm": 0.6045048950634042, + "learning_rate": 9.45777958632238e-09, + "loss": 0.2756, + "step": 41547 + }, + { + "epoch": 1.946315641542137, + "grad_norm": 0.5926065450741586, + "learning_rate": 9.441305825335922e-09, + "loss": 0.2788, + "step": 41548 + }, + { + "epoch": 1.9463624865320655, + "grad_norm": 0.5869830973414666, + "learning_rate": 9.424846396889065e-09, + "loss": 0.2627, + "step": 41549 + }, + { + "epoch": 1.9464093315219937, + "grad_norm": 0.6236753164885062, + "learning_rate": 9.408401301076175e-09, + "loss": 0.2904, + "step": 41550 + }, + { + "epoch": 1.946456176511922, + "grad_norm": 0.6232556196172875, + "learning_rate": 9.3919705379919e-09, + "loss": 0.278, + "step": 41551 + }, + { + "epoch": 1.9465030215018504, + "grad_norm": 0.5699312212387732, + "learning_rate": 9.375554107730889e-09, + "loss": 0.2483, + "step": 41552 + }, + { + "epoch": 1.9465498664917789, + "grad_norm": 0.560250119792656, + "learning_rate": 9.359152010387506e-09, + "loss": 0.2511, + "step": 41553 + }, + { + "epoch": 1.9465967114817069, + "grad_norm": 0.5746509024242059, + "learning_rate": 9.342764246056123e-09, + "loss": 0.2766, + "step": 41554 + }, + { + "epoch": 1.9466435564716353, + "grad_norm": 0.5681173932149675, + "learning_rate": 9.32639081483111e-09, + "loss": 0.2606, + "step": 41555 + }, + { + "epoch": 1.9466904014615638, + "grad_norm": 0.608812004920662, + "learning_rate": 9.310031716806833e-09, + "loss": 0.2643, + "step": 41556 + }, + { + "epoch": 1.946737246451492, + "grad_norm": 0.6275448121980206, + "learning_rate": 9.293686952077385e-09, + "loss": 0.2804, + "step": 41557 + }, + { + "epoch": 1.9467840914414203, + "grad_norm": 0.6233592839451629, + "learning_rate": 9.27735652073658e-09, + "loss": 0.2698, + "step": 41558 + }, + { + "epoch": 1.9468309364313487, + "grad_norm": 0.6089297242063711, + "learning_rate": 9.261040422878786e-09, + "loss": 0.2672, + "step": 41559 + }, + { + "epoch": 1.946877781421277, + "grad_norm": 0.5628404159162709, + "learning_rate": 9.244738658597541e-09, + "loss": 0.2642, + "step": 41560 + }, + { + "epoch": 1.9469246264112052, + "grad_norm": 0.6035699968537805, + "learning_rate": 9.228451227986935e-09, + "loss": 0.2761, + "step": 41561 + }, + { + "epoch": 1.9469714714011337, + "grad_norm": 0.6276657239050463, + "learning_rate": 9.212178131140226e-09, + "loss": 0.2675, + "step": 41562 + }, + { + "epoch": 1.947018316391062, + "grad_norm": 0.6100252386230978, + "learning_rate": 9.195919368151784e-09, + "loss": 0.27, + "step": 41563 + }, + { + "epoch": 1.9470651613809902, + "grad_norm": 0.5813765833348487, + "learning_rate": 9.179674939114591e-09, + "loss": 0.2654, + "step": 41564 + }, + { + "epoch": 1.9471120063709186, + "grad_norm": 0.6166347874257011, + "learning_rate": 9.16344484412246e-09, + "loss": 0.2609, + "step": 41565 + }, + { + "epoch": 1.947158851360847, + "grad_norm": 0.6115293494324819, + "learning_rate": 9.147229083268372e-09, + "loss": 0.2642, + "step": 41566 + }, + { + "epoch": 1.9472056963507753, + "grad_norm": 0.5728894016575952, + "learning_rate": 9.13102765664614e-09, + "loss": 0.2512, + "step": 41567 + }, + { + "epoch": 1.9472525413407036, + "grad_norm": 0.5658302268572785, + "learning_rate": 9.114840564348749e-09, + "loss": 0.2582, + "step": 41568 + }, + { + "epoch": 1.947299386330632, + "grad_norm": 0.611273726309578, + "learning_rate": 9.098667806469452e-09, + "loss": 0.2597, + "step": 41569 + }, + { + "epoch": 1.9473462313205603, + "grad_norm": 0.6094047565923911, + "learning_rate": 9.082509383100957e-09, + "loss": 0.2677, + "step": 41570 + }, + { + "epoch": 1.9473930763104885, + "grad_norm": 0.612937621227923, + "learning_rate": 9.066365294336798e-09, + "loss": 0.283, + "step": 41571 + }, + { + "epoch": 1.947439921300417, + "grad_norm": 0.5764786025975204, + "learning_rate": 9.05023554026968e-09, + "loss": 0.2633, + "step": 41572 + }, + { + "epoch": 1.9474867662903452, + "grad_norm": 0.6264597899127043, + "learning_rate": 9.034120120992307e-09, + "loss": 0.2824, + "step": 41573 + }, + { + "epoch": 1.9475336112802735, + "grad_norm": 0.6339484562882556, + "learning_rate": 9.018019036597658e-09, + "loss": 0.272, + "step": 41574 + }, + { + "epoch": 1.947580456270202, + "grad_norm": 0.6325726528680307, + "learning_rate": 9.001932287177883e-09, + "loss": 0.2559, + "step": 41575 + }, + { + "epoch": 1.9476273012601304, + "grad_norm": 0.6141467110001886, + "learning_rate": 8.985859872826242e-09, + "loss": 0.269, + "step": 41576 + }, + { + "epoch": 1.9476741462500584, + "grad_norm": 0.5833008302832281, + "learning_rate": 8.969801793634602e-09, + "loss": 0.2599, + "step": 41577 + }, + { + "epoch": 1.9477209912399869, + "grad_norm": 0.6040803246831209, + "learning_rate": 8.953758049695948e-09, + "loss": 0.2742, + "step": 41578 + }, + { + "epoch": 1.9477678362299153, + "grad_norm": 0.5906152293814315, + "learning_rate": 8.93772864110215e-09, + "loss": 0.2779, + "step": 41579 + }, + { + "epoch": 1.9478146812198436, + "grad_norm": 0.5785136196030191, + "learning_rate": 8.921713567945633e-09, + "loss": 0.2645, + "step": 41580 + }, + { + "epoch": 1.9478615262097718, + "grad_norm": 0.5828629080918851, + "learning_rate": 8.905712830318547e-09, + "loss": 0.2503, + "step": 41581 + }, + { + "epoch": 1.9479083711997003, + "grad_norm": 0.6351724234325491, + "learning_rate": 8.889726428312762e-09, + "loss": 0.2772, + "step": 41582 + }, + { + "epoch": 1.9479552161896285, + "grad_norm": 0.569722613687044, + "learning_rate": 8.873754362020704e-09, + "loss": 0.2612, + "step": 41583 + }, + { + "epoch": 1.9480020611795568, + "grad_norm": 0.6061378974782177, + "learning_rate": 8.857796631534243e-09, + "loss": 0.2676, + "step": 41584 + }, + { + "epoch": 1.9480489061694852, + "grad_norm": 0.6011492271249227, + "learning_rate": 8.841853236944697e-09, + "loss": 0.2842, + "step": 41585 + }, + { + "epoch": 1.9480957511594135, + "grad_norm": 0.6010534611361702, + "learning_rate": 8.825924178344492e-09, + "loss": 0.2804, + "step": 41586 + }, + { + "epoch": 1.9481425961493417, + "grad_norm": 0.6447504749408903, + "learning_rate": 8.810009455824665e-09, + "loss": 0.2848, + "step": 41587 + }, + { + "epoch": 1.9481894411392702, + "grad_norm": 0.5759787407859508, + "learning_rate": 8.794109069477086e-09, + "loss": 0.2733, + "step": 41588 + }, + { + "epoch": 1.9482362861291986, + "grad_norm": 0.5801972173897948, + "learning_rate": 8.778223019393351e-09, + "loss": 0.2636, + "step": 41589 + }, + { + "epoch": 1.9482831311191267, + "grad_norm": 0.6199817455631468, + "learning_rate": 8.762351305664773e-09, + "loss": 0.2656, + "step": 41590 + }, + { + "epoch": 1.9483299761090551, + "grad_norm": 0.6119746872585979, + "learning_rate": 8.746493928382949e-09, + "loss": 0.287, + "step": 41591 + }, + { + "epoch": 1.9483768210989836, + "grad_norm": 0.6500683974870801, + "learning_rate": 8.730650887638636e-09, + "loss": 0.2654, + "step": 41592 + }, + { + "epoch": 1.9484236660889118, + "grad_norm": 0.6155557980333246, + "learning_rate": 8.714822183523431e-09, + "loss": 0.2692, + "step": 41593 + }, + { + "epoch": 1.94847051107884, + "grad_norm": 0.5605588726687256, + "learning_rate": 8.699007816128091e-09, + "loss": 0.2497, + "step": 41594 + }, + { + "epoch": 1.9485173560687685, + "grad_norm": 0.6089067729145147, + "learning_rate": 8.683207785543934e-09, + "loss": 0.2779, + "step": 41595 + }, + { + "epoch": 1.9485642010586968, + "grad_norm": 0.5931610310024265, + "learning_rate": 8.667422091861722e-09, + "loss": 0.2684, + "step": 41596 + }, + { + "epoch": 1.948611046048625, + "grad_norm": 0.586107228491228, + "learning_rate": 8.651650735172212e-09, + "loss": 0.2858, + "step": 41597 + }, + { + "epoch": 1.9486578910385535, + "grad_norm": 0.5773492827276006, + "learning_rate": 8.635893715566445e-09, + "loss": 0.255, + "step": 41598 + }, + { + "epoch": 1.9487047360284817, + "grad_norm": 0.5533654902233672, + "learning_rate": 8.620151033134905e-09, + "loss": 0.245, + "step": 41599 + }, + { + "epoch": 1.94875158101841, + "grad_norm": 0.6332170964669367, + "learning_rate": 8.604422687968072e-09, + "loss": 0.2718, + "step": 41600 + }, + { + "epoch": 1.9487984260083384, + "grad_norm": 0.5803038141952891, + "learning_rate": 8.58870868015671e-09, + "loss": 0.2762, + "step": 41601 + }, + { + "epoch": 1.9488452709982669, + "grad_norm": 0.5600863822112246, + "learning_rate": 8.573009009791022e-09, + "loss": 0.2565, + "step": 41602 + }, + { + "epoch": 1.9488921159881951, + "grad_norm": 0.6209865070341865, + "learning_rate": 8.55732367696177e-09, + "loss": 0.2721, + "step": 41603 + }, + { + "epoch": 1.9489389609781234, + "grad_norm": 0.5855047072510602, + "learning_rate": 8.541652681758606e-09, + "loss": 0.2755, + "step": 41604 + }, + { + "epoch": 1.9489858059680518, + "grad_norm": 0.5782884999372816, + "learning_rate": 8.52599602427201e-09, + "loss": 0.278, + "step": 41605 + }, + { + "epoch": 1.94903265095798, + "grad_norm": 0.6188049448242934, + "learning_rate": 8.510353704592189e-09, + "loss": 0.2709, + "step": 41606 + }, + { + "epoch": 1.9490794959479083, + "grad_norm": 0.5760987919923508, + "learning_rate": 8.494725722809071e-09, + "loss": 0.2758, + "step": 41607 + }, + { + "epoch": 1.9491263409378368, + "grad_norm": 0.5955623350638511, + "learning_rate": 8.479112079012863e-09, + "loss": 0.2854, + "step": 41608 + }, + { + "epoch": 1.949173185927765, + "grad_norm": 0.6310822305276685, + "learning_rate": 8.463512773292936e-09, + "loss": 0.2894, + "step": 41609 + }, + { + "epoch": 1.9492200309176932, + "grad_norm": 0.6023930881493671, + "learning_rate": 8.44792780573922e-09, + "loss": 0.2631, + "step": 41610 + }, + { + "epoch": 1.9492668759076217, + "grad_norm": 0.5749555328519219, + "learning_rate": 8.432357176441364e-09, + "loss": 0.252, + "step": 41611 + }, + { + "epoch": 1.9493137208975502, + "grad_norm": 0.570466031560433, + "learning_rate": 8.416800885489018e-09, + "loss": 0.2798, + "step": 41612 + }, + { + "epoch": 1.9493605658874782, + "grad_norm": 0.6692769620636079, + "learning_rate": 8.401258932972112e-09, + "loss": 0.2833, + "step": 41613 + }, + { + "epoch": 1.9494074108774067, + "grad_norm": 0.605613270612999, + "learning_rate": 8.385731318979462e-09, + "loss": 0.2751, + "step": 41614 + }, + { + "epoch": 1.9494542558673351, + "grad_norm": 0.6467530711586538, + "learning_rate": 8.37021804360072e-09, + "loss": 0.2818, + "step": 41615 + }, + { + "epoch": 1.9495011008572634, + "grad_norm": 0.578123961884406, + "learning_rate": 8.35471910692498e-09, + "loss": 0.2629, + "step": 41616 + }, + { + "epoch": 1.9495479458471916, + "grad_norm": 0.6518963350074524, + "learning_rate": 8.339234509041893e-09, + "loss": 0.2838, + "step": 41617 + }, + { + "epoch": 1.94959479083712, + "grad_norm": 0.606980537684437, + "learning_rate": 8.32376425004e-09, + "loss": 0.2686, + "step": 41618 + }, + { + "epoch": 1.9496416358270483, + "grad_norm": 0.5685832089473793, + "learning_rate": 8.308308330008675e-09, + "loss": 0.2513, + "step": 41619 + }, + { + "epoch": 1.9496884808169765, + "grad_norm": 0.6026850234575035, + "learning_rate": 8.292866749036732e-09, + "loss": 0.2673, + "step": 41620 + }, + { + "epoch": 1.949735325806905, + "grad_norm": 0.5918762641384354, + "learning_rate": 8.27743950721327e-09, + "loss": 0.2603, + "step": 41621 + }, + { + "epoch": 1.9497821707968332, + "grad_norm": 0.6067897005071407, + "learning_rate": 8.26202660462655e-09, + "loss": 0.2731, + "step": 41622 + }, + { + "epoch": 1.9498290157867615, + "grad_norm": 0.6147425843375923, + "learning_rate": 8.246628041365668e-09, + "loss": 0.271, + "step": 41623 + }, + { + "epoch": 1.94987586077669, + "grad_norm": 0.5572510334175013, + "learning_rate": 8.231243817519442e-09, + "loss": 0.254, + "step": 41624 + }, + { + "epoch": 1.9499227057666184, + "grad_norm": 0.5726177228561818, + "learning_rate": 8.215873933175855e-09, + "loss": 0.2715, + "step": 41625 + }, + { + "epoch": 1.9499695507565464, + "grad_norm": 0.5876857068262672, + "learning_rate": 8.200518388423729e-09, + "loss": 0.2686, + "step": 41626 + }, + { + "epoch": 1.950016395746475, + "grad_norm": 0.650088850664109, + "learning_rate": 8.185177183351323e-09, + "loss": 0.2881, + "step": 41627 + }, + { + "epoch": 1.9500632407364034, + "grad_norm": 0.5935212251307085, + "learning_rate": 8.169850318046902e-09, + "loss": 0.2699, + "step": 41628 + }, + { + "epoch": 1.9501100857263316, + "grad_norm": 0.6011214241016947, + "learning_rate": 8.154537792598726e-09, + "loss": 0.2631, + "step": 41629 + }, + { + "epoch": 1.9501569307162598, + "grad_norm": 0.5948584702755534, + "learning_rate": 8.139239607094784e-09, + "loss": 0.2829, + "step": 41630 + }, + { + "epoch": 1.9502037757061883, + "grad_norm": 0.5766899269059309, + "learning_rate": 8.123955761623337e-09, + "loss": 0.2562, + "step": 41631 + }, + { + "epoch": 1.9502506206961165, + "grad_norm": 0.6103202366337032, + "learning_rate": 8.108686256272092e-09, + "loss": 0.2745, + "step": 41632 + }, + { + "epoch": 1.9502974656860448, + "grad_norm": 0.578569187844867, + "learning_rate": 8.093431091129034e-09, + "loss": 0.2658, + "step": 41633 + }, + { + "epoch": 1.9503443106759732, + "grad_norm": 0.602420211278726, + "learning_rate": 8.078190266282149e-09, + "loss": 0.2837, + "step": 41634 + }, + { + "epoch": 1.9503911556659015, + "grad_norm": 0.6429170478350825, + "learning_rate": 8.062963781818867e-09, + "loss": 0.2853, + "step": 41635 + }, + { + "epoch": 1.9504380006558297, + "grad_norm": 0.5699801033418576, + "learning_rate": 8.047751637826896e-09, + "loss": 0.2517, + "step": 41636 + }, + { + "epoch": 1.9504848456457582, + "grad_norm": 0.6062749690036693, + "learning_rate": 8.032553834393664e-09, + "loss": 0.2639, + "step": 41637 + }, + { + "epoch": 1.9505316906356867, + "grad_norm": 0.606011002878068, + "learning_rate": 8.017370371607158e-09, + "loss": 0.2638, + "step": 41638 + }, + { + "epoch": 1.950578535625615, + "grad_norm": 0.6110131257484915, + "learning_rate": 8.002201249553976e-09, + "loss": 0.2789, + "step": 41639 + }, + { + "epoch": 1.9506253806155431, + "grad_norm": 0.6235985254786494, + "learning_rate": 7.987046468321824e-09, + "loss": 0.2934, + "step": 41640 + }, + { + "epoch": 1.9506722256054716, + "grad_norm": 0.5583604149588923, + "learning_rate": 7.971906027997855e-09, + "loss": 0.2389, + "step": 41641 + }, + { + "epoch": 1.9507190705953998, + "grad_norm": 0.5894017696488854, + "learning_rate": 7.9567799286695e-09, + "loss": 0.2697, + "step": 41642 + }, + { + "epoch": 1.950765915585328, + "grad_norm": 0.6382525169322512, + "learning_rate": 7.941668170423078e-09, + "loss": 0.2822, + "step": 41643 + }, + { + "epoch": 1.9508127605752565, + "grad_norm": 0.6082617067429987, + "learning_rate": 7.926570753346296e-09, + "loss": 0.2582, + "step": 41644 + }, + { + "epoch": 1.9508596055651848, + "grad_norm": 0.6144205273587758, + "learning_rate": 7.911487677525753e-09, + "loss": 0.2895, + "step": 41645 + }, + { + "epoch": 1.950906450555113, + "grad_norm": 0.5706360390920511, + "learning_rate": 7.896418943048045e-09, + "loss": 0.2592, + "step": 41646 + }, + { + "epoch": 1.9509532955450415, + "grad_norm": 0.6182058188922203, + "learning_rate": 7.881364550000325e-09, + "loss": 0.2955, + "step": 41647 + }, + { + "epoch": 1.95100014053497, + "grad_norm": 0.607161319265655, + "learning_rate": 7.866324498468913e-09, + "loss": 0.2791, + "step": 41648 + }, + { + "epoch": 1.951046985524898, + "grad_norm": 0.6317085243974653, + "learning_rate": 7.851298788540406e-09, + "loss": 0.2821, + "step": 41649 + }, + { + "epoch": 1.9510938305148264, + "grad_norm": 0.5549217814813416, + "learning_rate": 7.836287420301125e-09, + "loss": 0.2547, + "step": 41650 + }, + { + "epoch": 1.951140675504755, + "grad_norm": 0.6025277711854238, + "learning_rate": 7.821290393837944e-09, + "loss": 0.2604, + "step": 41651 + }, + { + "epoch": 1.9511875204946831, + "grad_norm": 0.6327150227072997, + "learning_rate": 7.806307709236627e-09, + "loss": 0.2619, + "step": 41652 + }, + { + "epoch": 1.9512343654846114, + "grad_norm": 0.5674355368428418, + "learning_rate": 7.791339366583494e-09, + "loss": 0.2645, + "step": 41653 + }, + { + "epoch": 1.9512812104745398, + "grad_norm": 0.5974229509208955, + "learning_rate": 7.776385365964866e-09, + "loss": 0.271, + "step": 41654 + }, + { + "epoch": 1.951328055464468, + "grad_norm": 0.6011975692898919, + "learning_rate": 7.761445707467064e-09, + "loss": 0.2896, + "step": 41655 + }, + { + "epoch": 1.9513749004543963, + "grad_norm": 0.6135510954203571, + "learning_rate": 7.746520391175572e-09, + "loss": 0.2629, + "step": 41656 + }, + { + "epoch": 1.9514217454443248, + "grad_norm": 0.6024983699853368, + "learning_rate": 7.731609417176434e-09, + "loss": 0.2778, + "step": 41657 + }, + { + "epoch": 1.951468590434253, + "grad_norm": 0.5631968223777399, + "learning_rate": 7.716712785555413e-09, + "loss": 0.2594, + "step": 41658 + }, + { + "epoch": 1.9515154354241813, + "grad_norm": 0.622707037494778, + "learning_rate": 7.701830496398278e-09, + "loss": 0.273, + "step": 41659 + }, + { + "epoch": 1.9515622804141097, + "grad_norm": 0.5847861993883601, + "learning_rate": 7.686962549790788e-09, + "loss": 0.2714, + "step": 41660 + }, + { + "epoch": 1.9516091254040382, + "grad_norm": 0.6548489983820505, + "learning_rate": 7.672108945818157e-09, + "loss": 0.2783, + "step": 41661 + }, + { + "epoch": 1.9516559703939662, + "grad_norm": 0.613836352929806, + "learning_rate": 7.657269684566426e-09, + "loss": 0.2637, + "step": 41662 + }, + { + "epoch": 1.9517028153838947, + "grad_norm": 0.6221945941496745, + "learning_rate": 7.642444766120804e-09, + "loss": 0.2916, + "step": 41663 + }, + { + "epoch": 1.9517496603738231, + "grad_norm": 0.5908628659830919, + "learning_rate": 7.627634190566225e-09, + "loss": 0.2428, + "step": 41664 + }, + { + "epoch": 1.9517965053637514, + "grad_norm": 0.6060981694814013, + "learning_rate": 7.612837957988173e-09, + "loss": 0.2815, + "step": 41665 + }, + { + "epoch": 1.9518433503536796, + "grad_norm": 0.6177811950360038, + "learning_rate": 7.59805606847186e-09, + "loss": 0.2685, + "step": 41666 + }, + { + "epoch": 1.951890195343608, + "grad_norm": 0.6075392314363113, + "learning_rate": 7.583288522102216e-09, + "loss": 0.2659, + "step": 41667 + }, + { + "epoch": 1.9519370403335363, + "grad_norm": 0.6236811768455104, + "learning_rate": 7.568535318964454e-09, + "loss": 0.2844, + "step": 41668 + }, + { + "epoch": 1.9519838853234646, + "grad_norm": 0.5701376040070911, + "learning_rate": 7.553796459143226e-09, + "loss": 0.2658, + "step": 41669 + }, + { + "epoch": 1.952030730313393, + "grad_norm": 0.6150334264545049, + "learning_rate": 7.539071942723464e-09, + "loss": 0.2811, + "step": 41670 + }, + { + "epoch": 1.9520775753033213, + "grad_norm": 0.5677811846508246, + "learning_rate": 7.524361769789823e-09, + "loss": 0.2541, + "step": 41671 + }, + { + "epoch": 1.9521244202932495, + "grad_norm": 0.6366827029157962, + "learning_rate": 7.509665940426959e-09, + "loss": 0.2756, + "step": 41672 + }, + { + "epoch": 1.952171265283178, + "grad_norm": 0.6085199877034849, + "learning_rate": 7.494984454719801e-09, + "loss": 0.2767, + "step": 41673 + }, + { + "epoch": 1.9522181102731064, + "grad_norm": 0.6216190742338134, + "learning_rate": 7.480317312752172e-09, + "loss": 0.2703, + "step": 41674 + }, + { + "epoch": 1.9522649552630347, + "grad_norm": 0.5825246765607311, + "learning_rate": 7.465664514609283e-09, + "loss": 0.2685, + "step": 41675 + }, + { + "epoch": 1.952311800252963, + "grad_norm": 0.61657099159876, + "learning_rate": 7.451026060374677e-09, + "loss": 0.2768, + "step": 41676 + }, + { + "epoch": 1.9523586452428914, + "grad_norm": 0.5822277551698682, + "learning_rate": 7.43640195013301e-09, + "loss": 0.271, + "step": 41677 + }, + { + "epoch": 1.9524054902328196, + "grad_norm": 0.5980140813237275, + "learning_rate": 7.42179218396838e-09, + "loss": 0.285, + "step": 41678 + }, + { + "epoch": 1.9524523352227479, + "grad_norm": 0.6213240557519142, + "learning_rate": 7.407196761964885e-09, + "loss": 0.2731, + "step": 41679 + }, + { + "epoch": 1.9524991802126763, + "grad_norm": 0.5615802252737655, + "learning_rate": 7.39261568420635e-09, + "loss": 0.259, + "step": 41680 + }, + { + "epoch": 1.9525460252026046, + "grad_norm": 0.5777950329073483, + "learning_rate": 7.378048950776873e-09, + "loss": 0.2485, + "step": 41681 + }, + { + "epoch": 1.9525928701925328, + "grad_norm": 0.5903607161566664, + "learning_rate": 7.363496561760275e-09, + "loss": 0.266, + "step": 41682 + }, + { + "epoch": 1.9526397151824613, + "grad_norm": 0.6306201529527439, + "learning_rate": 7.348958517240379e-09, + "loss": 0.2736, + "step": 41683 + }, + { + "epoch": 1.9526865601723897, + "grad_norm": 0.602948017885359, + "learning_rate": 7.334434817300728e-09, + "loss": 0.2629, + "step": 41684 + }, + { + "epoch": 1.9527334051623177, + "grad_norm": 0.6075947807163977, + "learning_rate": 7.319925462024591e-09, + "loss": 0.2635, + "step": 41685 + }, + { + "epoch": 1.9527802501522462, + "grad_norm": 0.5859894340326913, + "learning_rate": 7.305430451496065e-09, + "loss": 0.2564, + "step": 41686 + }, + { + "epoch": 1.9528270951421747, + "grad_norm": 0.5884079000998103, + "learning_rate": 7.2909497857981405e-09, + "loss": 0.2546, + "step": 41687 + }, + { + "epoch": 1.952873940132103, + "grad_norm": 0.5761081250328843, + "learning_rate": 7.276483465014361e-09, + "loss": 0.2655, + "step": 41688 + }, + { + "epoch": 1.9529207851220312, + "grad_norm": 0.6055067910398872, + "learning_rate": 7.262031489227716e-09, + "loss": 0.2788, + "step": 41689 + }, + { + "epoch": 1.9529676301119596, + "grad_norm": 0.5897973499480226, + "learning_rate": 7.2475938585217505e-09, + "loss": 0.262, + "step": 41690 + }, + { + "epoch": 1.9530144751018879, + "grad_norm": 0.639329701960578, + "learning_rate": 7.233170572979176e-09, + "loss": 0.2778, + "step": 41691 + }, + { + "epoch": 1.953061320091816, + "grad_norm": 0.6007134029926039, + "learning_rate": 7.218761632683258e-09, + "loss": 0.2807, + "step": 41692 + }, + { + "epoch": 1.9531081650817446, + "grad_norm": 0.6137503809414725, + "learning_rate": 7.204367037716986e-09, + "loss": 0.2718, + "step": 41693 + }, + { + "epoch": 1.9531550100716728, + "grad_norm": 0.6011259795200338, + "learning_rate": 7.189986788162795e-09, + "loss": 0.2764, + "step": 41694 + }, + { + "epoch": 1.953201855061601, + "grad_norm": 0.6314894803929826, + "learning_rate": 7.175620884103673e-09, + "loss": 0.2769, + "step": 41695 + }, + { + "epoch": 1.9532487000515295, + "grad_norm": 0.6075198753188011, + "learning_rate": 7.161269325622333e-09, + "loss": 0.2838, + "step": 41696 + }, + { + "epoch": 1.953295545041458, + "grad_norm": 0.5439966201055402, + "learning_rate": 7.146932112801485e-09, + "loss": 0.2403, + "step": 41697 + }, + { + "epoch": 1.953342390031386, + "grad_norm": 0.6373046085599989, + "learning_rate": 7.132609245723565e-09, + "loss": 0.2751, + "step": 41698 + }, + { + "epoch": 1.9533892350213145, + "grad_norm": 0.6313854658393147, + "learning_rate": 7.1183007244707284e-09, + "loss": 0.2847, + "step": 41699 + }, + { + "epoch": 1.953436080011243, + "grad_norm": 0.6024399768560635, + "learning_rate": 7.104006549125686e-09, + "loss": 0.263, + "step": 41700 + }, + { + "epoch": 1.9534829250011712, + "grad_norm": 0.6027732675495595, + "learning_rate": 7.089726719770318e-09, + "loss": 0.2798, + "step": 41701 + }, + { + "epoch": 1.9535297699910994, + "grad_norm": 0.5872124019117035, + "learning_rate": 7.075461236487058e-09, + "loss": 0.257, + "step": 41702 + }, + { + "epoch": 1.9535766149810279, + "grad_norm": 0.6430740990110321, + "learning_rate": 7.061210099357785e-09, + "loss": 0.2703, + "step": 41703 + }, + { + "epoch": 1.953623459970956, + "grad_norm": 0.576712062193696, + "learning_rate": 7.046973308464933e-09, + "loss": 0.2493, + "step": 41704 + }, + { + "epoch": 1.9536703049608843, + "grad_norm": 0.6257338830675403, + "learning_rate": 7.032750863890103e-09, + "loss": 0.2777, + "step": 41705 + }, + { + "epoch": 1.9537171499508128, + "grad_norm": 0.6234843646945892, + "learning_rate": 7.018542765715175e-09, + "loss": 0.2871, + "step": 41706 + }, + { + "epoch": 1.953763994940741, + "grad_norm": 0.6050274595506652, + "learning_rate": 7.004349014022027e-09, + "loss": 0.2613, + "step": 41707 + }, + { + "epoch": 1.9538108399306693, + "grad_norm": 0.623309177780887, + "learning_rate": 6.9901696088922614e-09, + "loss": 0.2745, + "step": 41708 + }, + { + "epoch": 1.9538576849205977, + "grad_norm": 0.6281791331251702, + "learning_rate": 6.976004550407201e-09, + "loss": 0.2828, + "step": 41709 + }, + { + "epoch": 1.9539045299105262, + "grad_norm": 0.6637400348600667, + "learning_rate": 6.961853838649002e-09, + "loss": 0.2765, + "step": 41710 + }, + { + "epoch": 1.9539513749004545, + "grad_norm": 0.5766372086609972, + "learning_rate": 6.947717473698435e-09, + "loss": 0.2501, + "step": 41711 + }, + { + "epoch": 1.9539982198903827, + "grad_norm": 0.6200980616059568, + "learning_rate": 6.933595455637376e-09, + "loss": 0.2919, + "step": 41712 + }, + { + "epoch": 1.9540450648803112, + "grad_norm": 0.6058299443649218, + "learning_rate": 6.919487784546875e-09, + "loss": 0.2756, + "step": 41713 + }, + { + "epoch": 1.9540919098702394, + "grad_norm": 0.6100426526510749, + "learning_rate": 6.905394460507974e-09, + "loss": 0.2911, + "step": 41714 + }, + { + "epoch": 1.9541387548601676, + "grad_norm": 0.5961635875659715, + "learning_rate": 6.891315483602001e-09, + "loss": 0.2691, + "step": 41715 + }, + { + "epoch": 1.954185599850096, + "grad_norm": 0.6131270631143658, + "learning_rate": 6.877250853909723e-09, + "loss": 0.27, + "step": 41716 + }, + { + "epoch": 1.9542324448400243, + "grad_norm": 0.5928001862610573, + "learning_rate": 6.8632005715124624e-09, + "loss": 0.2697, + "step": 41717 + }, + { + "epoch": 1.9542792898299526, + "grad_norm": 0.6304615747034121, + "learning_rate": 6.849164636490713e-09, + "loss": 0.2804, + "step": 41718 + }, + { + "epoch": 1.954326134819881, + "grad_norm": 0.6016579063341493, + "learning_rate": 6.835143048925519e-09, + "loss": 0.2787, + "step": 41719 + }, + { + "epoch": 1.9543729798098095, + "grad_norm": 0.6062267075525472, + "learning_rate": 6.821135808897372e-09, + "loss": 0.2773, + "step": 41720 + }, + { + "epoch": 1.9544198247997375, + "grad_norm": 0.6214021154948561, + "learning_rate": 6.8071429164867645e-09, + "loss": 0.2955, + "step": 41721 + }, + { + "epoch": 1.954466669789666, + "grad_norm": 0.5587989668036304, + "learning_rate": 6.793164371774464e-09, + "loss": 0.2448, + "step": 41722 + }, + { + "epoch": 1.9545135147795945, + "grad_norm": 0.6258405346535579, + "learning_rate": 6.779200174841239e-09, + "loss": 0.2703, + "step": 41723 + }, + { + "epoch": 1.9545603597695227, + "grad_norm": 0.6047555758651922, + "learning_rate": 6.765250325766748e-09, + "loss": 0.2717, + "step": 41724 + }, + { + "epoch": 1.954607204759451, + "grad_norm": 0.5746428556110996, + "learning_rate": 6.751314824631761e-09, + "loss": 0.267, + "step": 41725 + }, + { + "epoch": 1.9546540497493794, + "grad_norm": 0.614216594893617, + "learning_rate": 6.737393671515935e-09, + "loss": 0.2878, + "step": 41726 + }, + { + "epoch": 1.9547008947393076, + "grad_norm": 0.60859467529903, + "learning_rate": 6.7234868665000394e-09, + "loss": 0.2684, + "step": 41727 + }, + { + "epoch": 1.9547477397292359, + "grad_norm": 0.557382974467946, + "learning_rate": 6.7095944096637335e-09, + "loss": 0.25, + "step": 41728 + }, + { + "epoch": 1.9547945847191643, + "grad_norm": 0.6096288543708075, + "learning_rate": 6.6957163010872294e-09, + "loss": 0.2697, + "step": 41729 + }, + { + "epoch": 1.9548414297090926, + "grad_norm": 0.62311793976332, + "learning_rate": 6.681852540849909e-09, + "loss": 0.2711, + "step": 41730 + }, + { + "epoch": 1.9548882746990208, + "grad_norm": 0.5713125462594563, + "learning_rate": 6.6680031290319855e-09, + "loss": 0.2667, + "step": 41731 + }, + { + "epoch": 1.9549351196889493, + "grad_norm": 0.5963296228823026, + "learning_rate": 6.654168065713118e-09, + "loss": 0.2768, + "step": 41732 + }, + { + "epoch": 1.9549819646788777, + "grad_norm": 0.6291694709164966, + "learning_rate": 6.640347350972965e-09, + "loss": 0.2727, + "step": 41733 + }, + { + "epoch": 1.9550288096688058, + "grad_norm": 0.6204945702087064, + "learning_rate": 6.6265409848906284e-09, + "loss": 0.2771, + "step": 41734 + }, + { + "epoch": 1.9550756546587342, + "grad_norm": 0.6604711205708107, + "learning_rate": 6.6127489675460456e-09, + "loss": 0.2831, + "step": 41735 + }, + { + "epoch": 1.9551224996486627, + "grad_norm": 0.6349398697489056, + "learning_rate": 6.5989712990183194e-09, + "loss": 0.2697, + "step": 41736 + }, + { + "epoch": 1.955169344638591, + "grad_norm": 0.5850119823421946, + "learning_rate": 6.585207979386832e-09, + "loss": 0.2546, + "step": 41737 + }, + { + "epoch": 1.9552161896285192, + "grad_norm": 0.6173092523185119, + "learning_rate": 6.571459008730685e-09, + "loss": 0.2917, + "step": 41738 + }, + { + "epoch": 1.9552630346184476, + "grad_norm": 0.5918065432631512, + "learning_rate": 6.5577243871292605e-09, + "loss": 0.2666, + "step": 41739 + }, + { + "epoch": 1.9553098796083759, + "grad_norm": 0.6265707467708935, + "learning_rate": 6.5440041146613845e-09, + "loss": 0.2798, + "step": 41740 + }, + { + "epoch": 1.9553567245983041, + "grad_norm": 0.5753497649393905, + "learning_rate": 6.530298191405882e-09, + "loss": 0.2725, + "step": 41741 + }, + { + "epoch": 1.9554035695882326, + "grad_norm": 0.5752706065870393, + "learning_rate": 6.516606617442134e-09, + "loss": 0.2671, + "step": 41742 + }, + { + "epoch": 1.9554504145781608, + "grad_norm": 0.6033690619057871, + "learning_rate": 6.502929392848412e-09, + "loss": 0.244, + "step": 41743 + }, + { + "epoch": 1.955497259568089, + "grad_norm": 0.5991305856681468, + "learning_rate": 6.489266517703541e-09, + "loss": 0.2636, + "step": 41744 + }, + { + "epoch": 1.9555441045580175, + "grad_norm": 0.6237789234790219, + "learning_rate": 6.475617992086347e-09, + "loss": 0.2757, + "step": 41745 + }, + { + "epoch": 1.955590949547946, + "grad_norm": 0.6643761250387119, + "learning_rate": 6.461983816075101e-09, + "loss": 0.2861, + "step": 41746 + }, + { + "epoch": 1.955637794537874, + "grad_norm": 0.6094455460630975, + "learning_rate": 6.448363989748629e-09, + "loss": 0.2766, + "step": 41747 + }, + { + "epoch": 1.9556846395278025, + "grad_norm": 0.5891798804885, + "learning_rate": 6.434758513184924e-09, + "loss": 0.2777, + "step": 41748 + }, + { + "epoch": 1.955731484517731, + "grad_norm": 0.5630935364049304, + "learning_rate": 6.421167386462535e-09, + "loss": 0.2599, + "step": 41749 + }, + { + "epoch": 1.9557783295076592, + "grad_norm": 0.6394355287349657, + "learning_rate": 6.407590609659453e-09, + "loss": 0.2732, + "step": 41750 + }, + { + "epoch": 1.9558251744975874, + "grad_norm": 0.6073174086124631, + "learning_rate": 6.394028182854228e-09, + "loss": 0.2872, + "step": 41751 + }, + { + "epoch": 1.9558720194875159, + "grad_norm": 0.5748832042169395, + "learning_rate": 6.380480106124298e-09, + "loss": 0.2733, + "step": 41752 + }, + { + "epoch": 1.9559188644774441, + "grad_norm": 0.6266416757790844, + "learning_rate": 6.366946379548211e-09, + "loss": 0.2813, + "step": 41753 + }, + { + "epoch": 1.9559657094673724, + "grad_norm": 0.5645872443025638, + "learning_rate": 6.353427003203405e-09, + "loss": 0.2679, + "step": 41754 + }, + { + "epoch": 1.9560125544573008, + "grad_norm": 0.5737780500270866, + "learning_rate": 6.339921977168151e-09, + "loss": 0.257, + "step": 41755 + }, + { + "epoch": 1.956059399447229, + "grad_norm": 0.5920809581588249, + "learning_rate": 6.3264313015196086e-09, + "loss": 0.2673, + "step": 41756 + }, + { + "epoch": 1.9561062444371573, + "grad_norm": 0.5737051688976762, + "learning_rate": 6.312954976335772e-09, + "loss": 0.2497, + "step": 41757 + }, + { + "epoch": 1.9561530894270858, + "grad_norm": 0.6062327157896802, + "learning_rate": 6.299493001694079e-09, + "loss": 0.2765, + "step": 41758 + }, + { + "epoch": 1.9561999344170142, + "grad_norm": 0.608505530535984, + "learning_rate": 6.286045377671968e-09, + "loss": 0.2681, + "step": 41759 + }, + { + "epoch": 1.9562467794069425, + "grad_norm": 0.6101820596039447, + "learning_rate": 6.272612104347153e-09, + "loss": 0.2571, + "step": 41760 + }, + { + "epoch": 1.9562936243968707, + "grad_norm": 0.5547638539742469, + "learning_rate": 6.259193181796519e-09, + "loss": 0.2696, + "step": 41761 + }, + { + "epoch": 1.9563404693867992, + "grad_norm": 0.6173932525461615, + "learning_rate": 6.245788610097503e-09, + "loss": 0.2737, + "step": 41762 + }, + { + "epoch": 1.9563873143767274, + "grad_norm": 0.6082054931722817, + "learning_rate": 6.232398389326988e-09, + "loss": 0.2723, + "step": 41763 + }, + { + "epoch": 1.9564341593666557, + "grad_norm": 0.568960251056512, + "learning_rate": 6.219022519562412e-09, + "loss": 0.2545, + "step": 41764 + }, + { + "epoch": 1.9564810043565841, + "grad_norm": 0.6101944325190831, + "learning_rate": 6.2056610008806584e-09, + "loss": 0.2773, + "step": 41765 + }, + { + "epoch": 1.9565278493465124, + "grad_norm": 0.6171522351310353, + "learning_rate": 6.192313833358332e-09, + "loss": 0.2759, + "step": 41766 + }, + { + "epoch": 1.9565746943364406, + "grad_norm": 0.6139214296325088, + "learning_rate": 6.178981017072594e-09, + "loss": 0.2643, + "step": 41767 + }, + { + "epoch": 1.956621539326369, + "grad_norm": 0.6045825466688746, + "learning_rate": 6.165662552100049e-09, + "loss": 0.2674, + "step": 41768 + }, + { + "epoch": 1.9566683843162975, + "grad_norm": 0.5929794784537457, + "learning_rate": 6.152358438517303e-09, + "loss": 0.2569, + "step": 41769 + }, + { + "epoch": 1.9567152293062255, + "grad_norm": 0.6281794341038581, + "learning_rate": 6.1390686764006835e-09, + "loss": 0.2791, + "step": 41770 + }, + { + "epoch": 1.956762074296154, + "grad_norm": 0.6263448198991832, + "learning_rate": 6.125793265827074e-09, + "loss": 0.2721, + "step": 41771 + }, + { + "epoch": 1.9568089192860825, + "grad_norm": 0.6209614408218523, + "learning_rate": 6.112532206872801e-09, + "loss": 0.2766, + "step": 41772 + }, + { + "epoch": 1.9568557642760107, + "grad_norm": 0.5853950997869448, + "learning_rate": 6.099285499614194e-09, + "loss": 0.2801, + "step": 41773 + }, + { + "epoch": 1.956902609265939, + "grad_norm": 0.6172632390881442, + "learning_rate": 6.086053144127302e-09, + "loss": 0.2693, + "step": 41774 + }, + { + "epoch": 1.9569494542558674, + "grad_norm": 0.5730993888760267, + "learning_rate": 6.072835140488176e-09, + "loss": 0.2573, + "step": 41775 + }, + { + "epoch": 1.9569962992457957, + "grad_norm": 0.548205179094567, + "learning_rate": 6.059631488773143e-09, + "loss": 0.248, + "step": 41776 + }, + { + "epoch": 1.957043144235724, + "grad_norm": 0.5931012052657103, + "learning_rate": 6.0464421890579775e-09, + "loss": 0.2604, + "step": 41777 + }, + { + "epoch": 1.9570899892256524, + "grad_norm": 0.6000542402078749, + "learning_rate": 6.0332672414190055e-09, + "loss": 0.2843, + "step": 41778 + }, + { + "epoch": 1.9571368342155806, + "grad_norm": 0.5795150215401191, + "learning_rate": 6.020106645931445e-09, + "loss": 0.2622, + "step": 41779 + }, + { + "epoch": 1.9571836792055088, + "grad_norm": 0.5849801449267463, + "learning_rate": 6.006960402671347e-09, + "loss": 0.2556, + "step": 41780 + }, + { + "epoch": 1.9572305241954373, + "grad_norm": 0.5683704481839023, + "learning_rate": 5.993828511714761e-09, + "loss": 0.2565, + "step": 41781 + }, + { + "epoch": 1.9572773691853658, + "grad_norm": 0.5622240492766541, + "learning_rate": 5.98071097313635e-09, + "loss": 0.2681, + "step": 41782 + }, + { + "epoch": 1.9573242141752938, + "grad_norm": 0.5373105252567175, + "learning_rate": 5.967607787012441e-09, + "loss": 0.2541, + "step": 41783 + }, + { + "epoch": 1.9573710591652222, + "grad_norm": 0.5682385510788637, + "learning_rate": 5.9545189534179755e-09, + "loss": 0.2553, + "step": 41784 + }, + { + "epoch": 1.9574179041551507, + "grad_norm": 0.6009145136587702, + "learning_rate": 5.941444472428448e-09, + "loss": 0.2622, + "step": 41785 + }, + { + "epoch": 1.957464749145079, + "grad_norm": 0.5938703140624351, + "learning_rate": 5.928384344118798e-09, + "loss": 0.2742, + "step": 41786 + }, + { + "epoch": 1.9575115941350072, + "grad_norm": 0.6311001561046513, + "learning_rate": 5.915338568564521e-09, + "loss": 0.2528, + "step": 41787 + }, + { + "epoch": 1.9575584391249357, + "grad_norm": 0.596559134450917, + "learning_rate": 5.902307145840836e-09, + "loss": 0.2799, + "step": 41788 + }, + { + "epoch": 1.957605284114864, + "grad_norm": 0.6418866566871377, + "learning_rate": 5.889290076022403e-09, + "loss": 0.2813, + "step": 41789 + }, + { + "epoch": 1.9576521291047921, + "grad_norm": 0.6227083433959897, + "learning_rate": 5.876287359184163e-09, + "loss": 0.2673, + "step": 41790 + }, + { + "epoch": 1.9576989740947206, + "grad_norm": 0.5746219281456282, + "learning_rate": 5.8632989954010564e-09, + "loss": 0.2557, + "step": 41791 + }, + { + "epoch": 1.9577458190846488, + "grad_norm": 0.6079997254241032, + "learning_rate": 5.850324984747746e-09, + "loss": 0.2664, + "step": 41792 + }, + { + "epoch": 1.957792664074577, + "grad_norm": 0.5521488877393592, + "learning_rate": 5.8373653272988936e-09, + "loss": 0.2662, + "step": 41793 + }, + { + "epoch": 1.9578395090645055, + "grad_norm": 0.6005539600206022, + "learning_rate": 5.82442002312944e-09, + "loss": 0.27, + "step": 41794 + }, + { + "epoch": 1.957886354054434, + "grad_norm": 0.5973397171133255, + "learning_rate": 5.811489072313215e-09, + "loss": 0.27, + "step": 41795 + }, + { + "epoch": 1.9579331990443622, + "grad_norm": 0.6141051847443548, + "learning_rate": 5.798572474925157e-09, + "loss": 0.2677, + "step": 41796 + }, + { + "epoch": 1.9579800440342905, + "grad_norm": 0.6364484914340034, + "learning_rate": 5.7856702310393755e-09, + "loss": 0.2783, + "step": 41797 + }, + { + "epoch": 1.958026889024219, + "grad_norm": 0.5941296312848332, + "learning_rate": 5.772782340729976e-09, + "loss": 0.2667, + "step": 41798 + }, + { + "epoch": 1.9580737340141472, + "grad_norm": 0.6667470088354295, + "learning_rate": 5.7599088040716235e-09, + "loss": 0.2601, + "step": 41799 + }, + { + "epoch": 1.9581205790040754, + "grad_norm": 0.6269084310704485, + "learning_rate": 5.747049621137868e-09, + "loss": 0.2746, + "step": 41800 + }, + { + "epoch": 1.958167423994004, + "grad_norm": 0.5873952225641262, + "learning_rate": 5.734204792003095e-09, + "loss": 0.2704, + "step": 41801 + }, + { + "epoch": 1.9582142689839321, + "grad_norm": 0.6113780317241363, + "learning_rate": 5.721374316740857e-09, + "loss": 0.2608, + "step": 41802 + }, + { + "epoch": 1.9582611139738604, + "grad_norm": 0.583785988838063, + "learning_rate": 5.70855819542554e-09, + "loss": 0.2644, + "step": 41803 + }, + { + "epoch": 1.9583079589637888, + "grad_norm": 0.597690354554665, + "learning_rate": 5.695756428130139e-09, + "loss": 0.264, + "step": 41804 + }, + { + "epoch": 1.9583548039537173, + "grad_norm": 0.5610037093462742, + "learning_rate": 5.6829690149290404e-09, + "loss": 0.2592, + "step": 41805 + }, + { + "epoch": 1.9584016489436453, + "grad_norm": 0.5990027035645049, + "learning_rate": 5.670195955895241e-09, + "loss": 0.2696, + "step": 41806 + }, + { + "epoch": 1.9584484939335738, + "grad_norm": 0.6006256340688871, + "learning_rate": 5.657437251102849e-09, + "loss": 0.2751, + "step": 41807 + }, + { + "epoch": 1.9584953389235022, + "grad_norm": 0.5718074779417794, + "learning_rate": 5.64469290062486e-09, + "loss": 0.2694, + "step": 41808 + }, + { + "epoch": 1.9585421839134305, + "grad_norm": 0.5822782767965595, + "learning_rate": 5.631962904534827e-09, + "loss": 0.2695, + "step": 41809 + }, + { + "epoch": 1.9585890289033587, + "grad_norm": 0.6145721371767237, + "learning_rate": 5.619247262905747e-09, + "loss": 0.2769, + "step": 41810 + }, + { + "epoch": 1.9586358738932872, + "grad_norm": 0.6113929699267913, + "learning_rate": 5.606545975810896e-09, + "loss": 0.2595, + "step": 41811 + }, + { + "epoch": 1.9586827188832154, + "grad_norm": 0.5393912489732435, + "learning_rate": 5.5938590433235466e-09, + "loss": 0.2483, + "step": 41812 + }, + { + "epoch": 1.9587295638731437, + "grad_norm": 0.5512614323475205, + "learning_rate": 5.581186465516697e-09, + "loss": 0.2497, + "step": 41813 + }, + { + "epoch": 1.9587764088630721, + "grad_norm": 0.6006811361489169, + "learning_rate": 5.568528242463067e-09, + "loss": 0.2602, + "step": 41814 + }, + { + "epoch": 1.9588232538530004, + "grad_norm": 0.5965912879131594, + "learning_rate": 5.555884374235654e-09, + "loss": 0.2501, + "step": 41815 + }, + { + "epoch": 1.9588700988429286, + "grad_norm": 0.5993624666368387, + "learning_rate": 5.543254860907177e-09, + "loss": 0.26, + "step": 41816 + }, + { + "epoch": 1.958916943832857, + "grad_norm": 0.5420459478097566, + "learning_rate": 5.530639702550355e-09, + "loss": 0.2563, + "step": 41817 + }, + { + "epoch": 1.9589637888227855, + "grad_norm": 0.6317729998780689, + "learning_rate": 5.5180388992376325e-09, + "loss": 0.2769, + "step": 41818 + }, + { + "epoch": 1.9590106338127136, + "grad_norm": 0.5664563318347828, + "learning_rate": 5.505452451041448e-09, + "loss": 0.2874, + "step": 41819 + }, + { + "epoch": 1.959057478802642, + "grad_norm": 0.6722643970223212, + "learning_rate": 5.492880358034802e-09, + "loss": 0.2829, + "step": 41820 + }, + { + "epoch": 1.9591043237925705, + "grad_norm": 0.6011021315077705, + "learning_rate": 5.480322620289302e-09, + "loss": 0.2718, + "step": 41821 + }, + { + "epoch": 1.9591511687824987, + "grad_norm": 0.6172064397335226, + "learning_rate": 5.467779237877946e-09, + "loss": 0.247, + "step": 41822 + }, + { + "epoch": 1.959198013772427, + "grad_norm": 0.5882920718319818, + "learning_rate": 5.455250210872342e-09, + "loss": 0.2467, + "step": 41823 + }, + { + "epoch": 1.9592448587623554, + "grad_norm": 0.6416213280277356, + "learning_rate": 5.442735539344657e-09, + "loss": 0.2765, + "step": 41824 + }, + { + "epoch": 1.9592917037522837, + "grad_norm": 0.6010461174160621, + "learning_rate": 5.430235223367053e-09, + "loss": 0.2681, + "step": 41825 + }, + { + "epoch": 1.959338548742212, + "grad_norm": 0.6049691857439614, + "learning_rate": 5.417749263011695e-09, + "loss": 0.2901, + "step": 41826 + }, + { + "epoch": 1.9593853937321404, + "grad_norm": 0.5971956173245078, + "learning_rate": 5.405277658350194e-09, + "loss": 0.2753, + "step": 41827 + }, + { + "epoch": 1.9594322387220686, + "grad_norm": 0.5720866877881375, + "learning_rate": 5.392820409454158e-09, + "loss": 0.2845, + "step": 41828 + }, + { + "epoch": 1.9594790837119969, + "grad_norm": 0.5978811032589961, + "learning_rate": 5.380377516395474e-09, + "loss": 0.2669, + "step": 41829 + }, + { + "epoch": 1.9595259287019253, + "grad_norm": 0.6699133812626537, + "learning_rate": 5.367948979245751e-09, + "loss": 0.2551, + "step": 41830 + }, + { + "epoch": 1.9595727736918538, + "grad_norm": 0.586096919981953, + "learning_rate": 5.355534798076601e-09, + "loss": 0.2849, + "step": 41831 + }, + { + "epoch": 1.959619618681782, + "grad_norm": 0.5802329575442559, + "learning_rate": 5.343134972959074e-09, + "loss": 0.2492, + "step": 41832 + }, + { + "epoch": 1.9596664636717103, + "grad_norm": 0.5974226364613119, + "learning_rate": 5.33074950396506e-09, + "loss": 0.2701, + "step": 41833 + }, + { + "epoch": 1.9597133086616387, + "grad_norm": 0.56016418552872, + "learning_rate": 5.318378391165335e-09, + "loss": 0.2643, + "step": 41834 + }, + { + "epoch": 1.959760153651567, + "grad_norm": 0.5788371460052338, + "learning_rate": 5.306021634631509e-09, + "loss": 0.2609, + "step": 41835 + }, + { + "epoch": 1.9598069986414952, + "grad_norm": 0.6036993657657459, + "learning_rate": 5.293679234434357e-09, + "loss": 0.2706, + "step": 41836 + }, + { + "epoch": 1.9598538436314237, + "grad_norm": 0.5595236690637801, + "learning_rate": 5.281351190645212e-09, + "loss": 0.2725, + "step": 41837 + }, + { + "epoch": 1.959900688621352, + "grad_norm": 0.577574003811645, + "learning_rate": 5.269037503334851e-09, + "loss": 0.2692, + "step": 41838 + }, + { + "epoch": 1.9599475336112802, + "grad_norm": 0.6032360791110892, + "learning_rate": 5.256738172574327e-09, + "loss": 0.2725, + "step": 41839 + }, + { + "epoch": 1.9599943786012086, + "grad_norm": 0.6921647402465563, + "learning_rate": 5.244453198433863e-09, + "loss": 0.2781, + "step": 41840 + }, + { + "epoch": 1.960041223591137, + "grad_norm": 0.6158394343214084, + "learning_rate": 5.232182580984791e-09, + "loss": 0.2718, + "step": 41841 + }, + { + "epoch": 1.960088068581065, + "grad_norm": 0.5699957302051588, + "learning_rate": 5.219926320297331e-09, + "loss": 0.2685, + "step": 41842 + }, + { + "epoch": 1.9601349135709936, + "grad_norm": 0.6411916951250018, + "learning_rate": 5.20768441644226e-09, + "loss": 0.292, + "step": 41843 + }, + { + "epoch": 1.960181758560922, + "grad_norm": 0.5909094417774412, + "learning_rate": 5.195456869489801e-09, + "loss": 0.2466, + "step": 41844 + }, + { + "epoch": 1.9602286035508503, + "grad_norm": 0.6364628918222331, + "learning_rate": 5.1832436795104524e-09, + "loss": 0.2845, + "step": 41845 + }, + { + "epoch": 1.9602754485407785, + "grad_norm": 0.5826381914331117, + "learning_rate": 5.171044846574713e-09, + "loss": 0.2632, + "step": 41846 + }, + { + "epoch": 1.960322293530707, + "grad_norm": 0.6134846610299685, + "learning_rate": 5.15886037075225e-09, + "loss": 0.2747, + "step": 41847 + }, + { + "epoch": 1.9603691385206352, + "grad_norm": 0.5584240928091895, + "learning_rate": 5.146690252113562e-09, + "loss": 0.2565, + "step": 41848 + }, + { + "epoch": 1.9604159835105635, + "grad_norm": 0.6238603182854491, + "learning_rate": 5.134534490728871e-09, + "loss": 0.2791, + "step": 41849 + }, + { + "epoch": 1.960462828500492, + "grad_norm": 0.5933469131406883, + "learning_rate": 5.122393086667566e-09, + "loss": 0.2567, + "step": 41850 + }, + { + "epoch": 1.9605096734904202, + "grad_norm": 0.6169966411282629, + "learning_rate": 5.1102660400001445e-09, + "loss": 0.2656, + "step": 41851 + }, + { + "epoch": 1.9605565184803484, + "grad_norm": 0.6527343586832198, + "learning_rate": 5.09815335079572e-09, + "loss": 0.2641, + "step": 41852 + }, + { + "epoch": 1.9606033634702769, + "grad_norm": 0.6024101761080789, + "learning_rate": 5.0860550191245135e-09, + "loss": 0.2662, + "step": 41853 + }, + { + "epoch": 1.9606502084602053, + "grad_norm": 0.6400243257132644, + "learning_rate": 5.0739710450559145e-09, + "loss": 0.2824, + "step": 41854 + }, + { + "epoch": 1.9606970534501333, + "grad_norm": 0.6409025947739582, + "learning_rate": 5.061901428659588e-09, + "loss": 0.2815, + "step": 41855 + }, + { + "epoch": 1.9607438984400618, + "grad_norm": 0.5801004786016476, + "learning_rate": 5.049846170005201e-09, + "loss": 0.2591, + "step": 41856 + }, + { + "epoch": 1.9607907434299903, + "grad_norm": 0.581562406859693, + "learning_rate": 5.0378052691615886e-09, + "loss": 0.2513, + "step": 41857 + }, + { + "epoch": 1.9608375884199185, + "grad_norm": 0.5827500357266905, + "learning_rate": 5.025778726198416e-09, + "loss": 0.2651, + "step": 41858 + }, + { + "epoch": 1.9608844334098467, + "grad_norm": 0.6016252966263486, + "learning_rate": 5.013766541184794e-09, + "loss": 0.2573, + "step": 41859 + }, + { + "epoch": 1.9609312783997752, + "grad_norm": 0.5838799190128496, + "learning_rate": 5.0017687141895586e-09, + "loss": 0.2647, + "step": 41860 + }, + { + "epoch": 1.9609781233897035, + "grad_norm": 0.5496805026794067, + "learning_rate": 4.9897852452823744e-09, + "loss": 0.2552, + "step": 41861 + }, + { + "epoch": 1.9610249683796317, + "grad_norm": 0.6020106359022516, + "learning_rate": 4.977816134531799e-09, + "loss": 0.2555, + "step": 41862 + }, + { + "epoch": 1.9610718133695602, + "grad_norm": 0.6247950460814347, + "learning_rate": 4.965861382006942e-09, + "loss": 0.276, + "step": 41863 + }, + { + "epoch": 1.9611186583594884, + "grad_norm": 0.5779173412985561, + "learning_rate": 4.953920987776084e-09, + "loss": 0.2692, + "step": 41864 + }, + { + "epoch": 1.9611655033494166, + "grad_norm": 0.6121773129283088, + "learning_rate": 4.941994951908613e-09, + "loss": 0.2711, + "step": 41865 + }, + { + "epoch": 1.961212348339345, + "grad_norm": 0.6407252536044664, + "learning_rate": 4.930083274472808e-09, + "loss": 0.2881, + "step": 41866 + }, + { + "epoch": 1.9612591933292736, + "grad_norm": 0.6008428989672326, + "learning_rate": 4.918185955537224e-09, + "loss": 0.2692, + "step": 41867 + }, + { + "epoch": 1.9613060383192018, + "grad_norm": 0.6297803546451645, + "learning_rate": 4.906302995170142e-09, + "loss": 0.2919, + "step": 41868 + }, + { + "epoch": 1.96135288330913, + "grad_norm": 0.5734157889550828, + "learning_rate": 4.894434393440672e-09, + "loss": 0.2442, + "step": 41869 + }, + { + "epoch": 1.9613997282990585, + "grad_norm": 0.6112656366518464, + "learning_rate": 4.88258015041626e-09, + "loss": 0.2599, + "step": 41870 + }, + { + "epoch": 1.9614465732889867, + "grad_norm": 0.6141064426709696, + "learning_rate": 4.8707402661654635e-09, + "loss": 0.2456, + "step": 41871 + }, + { + "epoch": 1.961493418278915, + "grad_norm": 0.5793390355004112, + "learning_rate": 4.858914740756559e-09, + "loss": 0.2568, + "step": 41872 + }, + { + "epoch": 1.9615402632688435, + "grad_norm": 0.640252844393563, + "learning_rate": 4.847103574257273e-09, + "loss": 0.2765, + "step": 41873 + }, + { + "epoch": 1.9615871082587717, + "grad_norm": 0.5981696512394569, + "learning_rate": 4.835306766735881e-09, + "loss": 0.2764, + "step": 41874 + }, + { + "epoch": 1.9616339532487, + "grad_norm": 0.6196741820489386, + "learning_rate": 4.823524318260386e-09, + "loss": 0.2749, + "step": 41875 + }, + { + "epoch": 1.9616807982386284, + "grad_norm": 0.5779233782624318, + "learning_rate": 4.811756228897957e-09, + "loss": 0.2603, + "step": 41876 + }, + { + "epoch": 1.9617276432285569, + "grad_norm": 0.5706069219212236, + "learning_rate": 4.800002498717149e-09, + "loss": 0.2729, + "step": 41877 + }, + { + "epoch": 1.9617744882184849, + "grad_norm": 0.6184044176504899, + "learning_rate": 4.788263127784854e-09, + "loss": 0.2891, + "step": 41878 + }, + { + "epoch": 1.9618213332084133, + "grad_norm": 0.6038230284538936, + "learning_rate": 4.776538116169072e-09, + "loss": 0.2824, + "step": 41879 + }, + { + "epoch": 1.9618681781983418, + "grad_norm": 0.6015322009976147, + "learning_rate": 4.764827463937249e-09, + "loss": 0.2491, + "step": 41880 + }, + { + "epoch": 1.96191502318827, + "grad_norm": 0.5777967608290737, + "learning_rate": 4.753131171156555e-09, + "loss": 0.2699, + "step": 41881 + }, + { + "epoch": 1.9619618681781983, + "grad_norm": 0.6732495350553276, + "learning_rate": 4.741449237894435e-09, + "loss": 0.3006, + "step": 41882 + }, + { + "epoch": 1.9620087131681268, + "grad_norm": 0.6555775931534454, + "learning_rate": 4.7297816642183355e-09, + "loss": 0.2726, + "step": 41883 + }, + { + "epoch": 1.962055558158055, + "grad_norm": 0.5773565734782425, + "learning_rate": 4.71812845019487e-09, + "loss": 0.2603, + "step": 41884 + }, + { + "epoch": 1.9621024031479832, + "grad_norm": 0.6029676075239646, + "learning_rate": 4.706489595891484e-09, + "loss": 0.2765, + "step": 41885 + }, + { + "epoch": 1.9621492481379117, + "grad_norm": 0.6599494478897501, + "learning_rate": 4.694865101375346e-09, + "loss": 0.3115, + "step": 41886 + }, + { + "epoch": 1.96219609312784, + "grad_norm": 0.5901301897613541, + "learning_rate": 4.683254966712791e-09, + "loss": 0.2753, + "step": 41887 + }, + { + "epoch": 1.9622429381177682, + "grad_norm": 0.609483261661625, + "learning_rate": 4.671659191971268e-09, + "loss": 0.2713, + "step": 41888 + }, + { + "epoch": 1.9622897831076966, + "grad_norm": 0.5795190993579697, + "learning_rate": 4.660077777216831e-09, + "loss": 0.2749, + "step": 41889 + }, + { + "epoch": 1.962336628097625, + "grad_norm": 0.602328631817799, + "learning_rate": 4.648510722516375e-09, + "loss": 0.2592, + "step": 41890 + }, + { + "epoch": 1.9623834730875531, + "grad_norm": 0.5883528260943632, + "learning_rate": 4.636958027936789e-09, + "loss": 0.2809, + "step": 41891 + }, + { + "epoch": 1.9624303180774816, + "grad_norm": 0.57771436413541, + "learning_rate": 4.6254196935444085e-09, + "loss": 0.2843, + "step": 41892 + }, + { + "epoch": 1.96247716306741, + "grad_norm": 0.5836003152137075, + "learning_rate": 4.613895719405292e-09, + "loss": 0.2589, + "step": 41893 + }, + { + "epoch": 1.9625240080573383, + "grad_norm": 0.556758442791052, + "learning_rate": 4.6023861055863315e-09, + "loss": 0.2592, + "step": 41894 + }, + { + "epoch": 1.9625708530472665, + "grad_norm": 0.5673793706510112, + "learning_rate": 4.590890852153307e-09, + "loss": 0.2544, + "step": 41895 + }, + { + "epoch": 1.962617698037195, + "grad_norm": 0.62346027286744, + "learning_rate": 4.579409959172277e-09, + "loss": 0.286, + "step": 41896 + }, + { + "epoch": 1.9626645430271232, + "grad_norm": 0.6135928122580245, + "learning_rate": 4.567943426709853e-09, + "loss": 0.274, + "step": 41897 + }, + { + "epoch": 1.9627113880170515, + "grad_norm": 0.5817505909844962, + "learning_rate": 4.556491254831541e-09, + "loss": 0.2406, + "step": 41898 + }, + { + "epoch": 1.96275823300698, + "grad_norm": 0.6033368834238955, + "learning_rate": 4.5450534436033974e-09, + "loss": 0.2673, + "step": 41899 + }, + { + "epoch": 1.9628050779969082, + "grad_norm": 0.5715386139496674, + "learning_rate": 4.533629993091482e-09, + "loss": 0.2643, + "step": 41900 + }, + { + "epoch": 1.9628519229868364, + "grad_norm": 0.6004458856674961, + "learning_rate": 4.5222209033610185e-09, + "loss": 0.2786, + "step": 41901 + }, + { + "epoch": 1.9628987679767649, + "grad_norm": 0.634342100847371, + "learning_rate": 4.510826174478067e-09, + "loss": 0.2697, + "step": 41902 + }, + { + "epoch": 1.9629456129666933, + "grad_norm": 0.6240445131459766, + "learning_rate": 4.499445806508129e-09, + "loss": 0.2822, + "step": 41903 + }, + { + "epoch": 1.9629924579566216, + "grad_norm": 0.6809054988697267, + "learning_rate": 4.488079799516709e-09, + "loss": 0.286, + "step": 41904 + }, + { + "epoch": 1.9630393029465498, + "grad_norm": 0.582126964488681, + "learning_rate": 4.476728153569032e-09, + "loss": 0.2566, + "step": 41905 + }, + { + "epoch": 1.9630861479364783, + "grad_norm": 0.5905232591368151, + "learning_rate": 4.465390868730879e-09, + "loss": 0.2758, + "step": 41906 + }, + { + "epoch": 1.9631329929264065, + "grad_norm": 0.5706904626697967, + "learning_rate": 4.45406794506692e-09, + "loss": 0.2509, + "step": 41907 + }, + { + "epoch": 1.9631798379163348, + "grad_norm": 0.6396683186752982, + "learning_rate": 4.442759382642381e-09, + "loss": 0.2742, + "step": 41908 + }, + { + "epoch": 1.9632266829062632, + "grad_norm": 0.5692754692337609, + "learning_rate": 4.431465181522765e-09, + "loss": 0.2513, + "step": 41909 + }, + { + "epoch": 1.9632735278961915, + "grad_norm": 0.5733834195533076, + "learning_rate": 4.420185341772743e-09, + "loss": 0.2635, + "step": 41910 + }, + { + "epoch": 1.9633203728861197, + "grad_norm": 0.6157515677138932, + "learning_rate": 4.40891986345754e-09, + "loss": 0.2639, + "step": 41911 + }, + { + "epoch": 1.9633672178760482, + "grad_norm": 0.599993365060881, + "learning_rate": 4.397668746641548e-09, + "loss": 0.2651, + "step": 41912 + }, + { + "epoch": 1.9634140628659766, + "grad_norm": 0.5634543625652728, + "learning_rate": 4.3864319913897165e-09, + "loss": 0.2731, + "step": 41913 + }, + { + "epoch": 1.9634609078559047, + "grad_norm": 0.5776896507380489, + "learning_rate": 4.375209597766717e-09, + "loss": 0.2674, + "step": 41914 + }, + { + "epoch": 1.9635077528458331, + "grad_norm": 0.6193059296945759, + "learning_rate": 4.364001565837217e-09, + "loss": 0.281, + "step": 41915 + }, + { + "epoch": 1.9635545978357616, + "grad_norm": 0.5958806994595404, + "learning_rate": 4.352807895665612e-09, + "loss": 0.2869, + "step": 41916 + }, + { + "epoch": 1.9636014428256898, + "grad_norm": 0.5897632876591274, + "learning_rate": 4.341628587316294e-09, + "loss": 0.2595, + "step": 41917 + }, + { + "epoch": 1.963648287815618, + "grad_norm": 0.6134786432127354, + "learning_rate": 4.330463640853655e-09, + "loss": 0.2791, + "step": 41918 + }, + { + "epoch": 1.9636951328055465, + "grad_norm": 0.641468513926397, + "learning_rate": 4.319313056342089e-09, + "loss": 0.2753, + "step": 41919 + }, + { + "epoch": 1.9637419777954748, + "grad_norm": 0.611077216721197, + "learning_rate": 4.3081768338454345e-09, + "loss": 0.2759, + "step": 41920 + }, + { + "epoch": 1.963788822785403, + "grad_norm": 0.5835842025471722, + "learning_rate": 4.297054973428083e-09, + "loss": 0.2684, + "step": 41921 + }, + { + "epoch": 1.9638356677753315, + "grad_norm": 0.5587974795305005, + "learning_rate": 4.2859474751535955e-09, + "loss": 0.2654, + "step": 41922 + }, + { + "epoch": 1.9638825127652597, + "grad_norm": 0.5752448689608631, + "learning_rate": 4.274854339086642e-09, + "loss": 0.2542, + "step": 41923 + }, + { + "epoch": 1.963929357755188, + "grad_norm": 0.6009457759578303, + "learning_rate": 4.263775565290507e-09, + "loss": 0.2842, + "step": 41924 + }, + { + "epoch": 1.9639762027451164, + "grad_norm": 0.6320890452680212, + "learning_rate": 4.252711153828748e-09, + "loss": 0.2826, + "step": 41925 + }, + { + "epoch": 1.9640230477350449, + "grad_norm": 0.6075082186434769, + "learning_rate": 4.241661104765759e-09, + "loss": 0.2827, + "step": 41926 + }, + { + "epoch": 1.964069892724973, + "grad_norm": 0.6162486025240477, + "learning_rate": 4.230625418164269e-09, + "loss": 0.2808, + "step": 41927 + }, + { + "epoch": 1.9641167377149014, + "grad_norm": 0.5864456077726159, + "learning_rate": 4.21960409408867e-09, + "loss": 0.2798, + "step": 41928 + }, + { + "epoch": 1.9641635827048298, + "grad_norm": 0.5383438474276424, + "learning_rate": 4.208597132601688e-09, + "loss": 0.2478, + "step": 41929 + }, + { + "epoch": 1.964210427694758, + "grad_norm": 0.6407610847872478, + "learning_rate": 4.197604533767163e-09, + "loss": 0.2726, + "step": 41930 + }, + { + "epoch": 1.9642572726846863, + "grad_norm": 0.6401270475437502, + "learning_rate": 4.186626297648099e-09, + "loss": 0.2834, + "step": 41931 + }, + { + "epoch": 1.9643041176746148, + "grad_norm": 0.5646877882150211, + "learning_rate": 4.1756624243075026e-09, + "loss": 0.2554, + "step": 41932 + }, + { + "epoch": 1.964350962664543, + "grad_norm": 0.5856605742260341, + "learning_rate": 4.164712913808932e-09, + "loss": 0.2827, + "step": 41933 + }, + { + "epoch": 1.9643978076544713, + "grad_norm": 0.6471460028019387, + "learning_rate": 4.153777766214839e-09, + "loss": 0.2962, + "step": 41934 + }, + { + "epoch": 1.9644446526443997, + "grad_norm": 0.604114831153639, + "learning_rate": 4.142856981588783e-09, + "loss": 0.2687, + "step": 41935 + }, + { + "epoch": 1.964491497634328, + "grad_norm": 0.6619430956055355, + "learning_rate": 4.131950559992937e-09, + "loss": 0.2847, + "step": 41936 + }, + { + "epoch": 1.9645383426242562, + "grad_norm": 0.6175916360260241, + "learning_rate": 4.1210585014905825e-09, + "loss": 0.2797, + "step": 41937 + }, + { + "epoch": 1.9645851876141847, + "grad_norm": 0.6107820295286296, + "learning_rate": 4.110180806144171e-09, + "loss": 0.2785, + "step": 41938 + }, + { + "epoch": 1.9646320326041131, + "grad_norm": 0.6062484542052511, + "learning_rate": 4.099317474016428e-09, + "loss": 0.2781, + "step": 41939 + }, + { + "epoch": 1.9646788775940414, + "grad_norm": 0.5959960012685844, + "learning_rate": 4.088468505169807e-09, + "loss": 0.2617, + "step": 41940 + }, + { + "epoch": 1.9647257225839696, + "grad_norm": 0.6309615807582766, + "learning_rate": 4.0776338996664776e-09, + "loss": 0.2839, + "step": 41941 + }, + { + "epoch": 1.964772567573898, + "grad_norm": 0.6071128222438477, + "learning_rate": 4.0668136575691686e-09, + "loss": 0.2767, + "step": 41942 + }, + { + "epoch": 1.9648194125638263, + "grad_norm": 0.5646396096451876, + "learning_rate": 4.05600777894033e-09, + "loss": 0.2546, + "step": 41943 + }, + { + "epoch": 1.9648662575537545, + "grad_norm": 0.6012613788528397, + "learning_rate": 4.045216263841578e-09, + "loss": 0.2635, + "step": 41944 + }, + { + "epoch": 1.964913102543683, + "grad_norm": 0.5835585556419186, + "learning_rate": 4.034439112335087e-09, + "loss": 0.2669, + "step": 41945 + }, + { + "epoch": 1.9649599475336113, + "grad_norm": 0.5581111365755053, + "learning_rate": 4.023676324483306e-09, + "loss": 0.2529, + "step": 41946 + }, + { + "epoch": 1.9650067925235395, + "grad_norm": 0.547840941963122, + "learning_rate": 4.012927900347851e-09, + "loss": 0.2431, + "step": 41947 + }, + { + "epoch": 1.965053637513468, + "grad_norm": 0.6185984553060285, + "learning_rate": 4.0021938399906206e-09, + "loss": 0.2587, + "step": 41948 + }, + { + "epoch": 1.9651004825033964, + "grad_norm": 0.6001644210659665, + "learning_rate": 3.991474143473506e-09, + "loss": 0.2647, + "step": 41949 + }, + { + "epoch": 1.9651473274933244, + "grad_norm": 0.5985420877526901, + "learning_rate": 3.980768810858126e-09, + "loss": 0.2729, + "step": 41950 + }, + { + "epoch": 1.965194172483253, + "grad_norm": 0.5652480109953385, + "learning_rate": 3.970077842205821e-09, + "loss": 0.2566, + "step": 41951 + }, + { + "epoch": 1.9652410174731814, + "grad_norm": 0.5628730275903615, + "learning_rate": 3.959401237578764e-09, + "loss": 0.2633, + "step": 41952 + }, + { + "epoch": 1.9652878624631096, + "grad_norm": 0.5785760533234816, + "learning_rate": 3.948738997037738e-09, + "loss": 0.2669, + "step": 41953 + }, + { + "epoch": 1.9653347074530378, + "grad_norm": 0.6056965712211806, + "learning_rate": 3.9380911206443605e-09, + "loss": 0.2737, + "step": 41954 + }, + { + "epoch": 1.9653815524429663, + "grad_norm": 0.6012328729773238, + "learning_rate": 3.927457608459973e-09, + "loss": 0.2696, + "step": 41955 + }, + { + "epoch": 1.9654283974328945, + "grad_norm": 0.6436996950247326, + "learning_rate": 3.916838460545636e-09, + "loss": 0.2822, + "step": 41956 + }, + { + "epoch": 1.9654752424228228, + "grad_norm": 0.6186304504257653, + "learning_rate": 3.9062336769626895e-09, + "loss": 0.2763, + "step": 41957 + }, + { + "epoch": 1.9655220874127513, + "grad_norm": 0.545768393317864, + "learning_rate": 3.8956432577719196e-09, + "loss": 0.2487, + "step": 41958 + }, + { + "epoch": 1.9655689324026795, + "grad_norm": 0.6623951811544592, + "learning_rate": 3.88506720303411e-09, + "loss": 0.2844, + "step": 41959 + }, + { + "epoch": 1.9656157773926077, + "grad_norm": 0.6089603692387087, + "learning_rate": 3.8745055128106e-09, + "loss": 0.2638, + "step": 41960 + }, + { + "epoch": 1.9656626223825362, + "grad_norm": 0.5972938012899084, + "learning_rate": 3.863958187161898e-09, + "loss": 0.271, + "step": 41961 + }, + { + "epoch": 1.9657094673724647, + "grad_norm": 0.604461240120759, + "learning_rate": 3.853425226148788e-09, + "loss": 0.2704, + "step": 41962 + }, + { + "epoch": 1.9657563123623927, + "grad_norm": 0.5724075096906773, + "learning_rate": 3.8429066298317775e-09, + "loss": 0.2646, + "step": 41963 + }, + { + "epoch": 1.9658031573523211, + "grad_norm": 0.6017580465041723, + "learning_rate": 3.832402398271373e-09, + "loss": 0.2638, + "step": 41964 + }, + { + "epoch": 1.9658500023422496, + "grad_norm": 0.597821116557017, + "learning_rate": 3.82191253152836e-09, + "loss": 0.279, + "step": 41965 + }, + { + "epoch": 1.9658968473321778, + "grad_norm": 0.6044086125133608, + "learning_rate": 3.81143702966269e-09, + "loss": 0.269, + "step": 41966 + }, + { + "epoch": 1.965943692322106, + "grad_norm": 0.6139776933837267, + "learning_rate": 3.800975892734593e-09, + "loss": 0.2806, + "step": 41967 + }, + { + "epoch": 1.9659905373120345, + "grad_norm": 0.6353975096876284, + "learning_rate": 3.790529120804853e-09, + "loss": 0.2839, + "step": 41968 + }, + { + "epoch": 1.9660373823019628, + "grad_norm": 0.5965408395952793, + "learning_rate": 3.780096713932868e-09, + "loss": 0.276, + "step": 41969 + }, + { + "epoch": 1.966084227291891, + "grad_norm": 0.6172331631865654, + "learning_rate": 3.769678672179422e-09, + "loss": 0.282, + "step": 41970 + }, + { + "epoch": 1.9661310722818195, + "grad_norm": 0.548802577730136, + "learning_rate": 3.759274995603912e-09, + "loss": 0.2544, + "step": 41971 + }, + { + "epoch": 1.9661779172717477, + "grad_norm": 0.6050923676450917, + "learning_rate": 3.74888568426629e-09, + "loss": 0.2661, + "step": 41972 + }, + { + "epoch": 1.966224762261676, + "grad_norm": 0.6054189010305854, + "learning_rate": 3.738510738226508e-09, + "loss": 0.2807, + "step": 41973 + }, + { + "epoch": 1.9662716072516044, + "grad_norm": 0.5861884891453412, + "learning_rate": 3.7281501575439635e-09, + "loss": 0.2627, + "step": 41974 + }, + { + "epoch": 1.966318452241533, + "grad_norm": 0.5938723879689226, + "learning_rate": 3.7178039422786083e-09, + "loss": 0.263, + "step": 41975 + }, + { + "epoch": 1.9663652972314611, + "grad_norm": 0.5783516780524065, + "learning_rate": 3.7074720924898387e-09, + "loss": 0.2671, + "step": 41976 + }, + { + "epoch": 1.9664121422213894, + "grad_norm": 0.6228839051704717, + "learning_rate": 3.69715460823733e-09, + "loss": 0.2633, + "step": 41977 + }, + { + "epoch": 1.9664589872113178, + "grad_norm": 0.5601629875281875, + "learning_rate": 3.686851489580201e-09, + "loss": 0.2797, + "step": 41978 + }, + { + "epoch": 1.966505832201246, + "grad_norm": 0.5950755820081316, + "learning_rate": 3.676562736577849e-09, + "loss": 0.2558, + "step": 41979 + }, + { + "epoch": 1.9665526771911743, + "grad_norm": 0.5903524466293474, + "learning_rate": 3.666288349289393e-09, + "loss": 0.2809, + "step": 41980 + }, + { + "epoch": 1.9665995221811028, + "grad_norm": 0.6099248154170994, + "learning_rate": 3.6560283277739528e-09, + "loss": 0.2594, + "step": 41981 + }, + { + "epoch": 1.966646367171031, + "grad_norm": 0.5967263893277068, + "learning_rate": 3.6457826720906476e-09, + "loss": 0.266, + "step": 41982 + }, + { + "epoch": 1.9666932121609593, + "grad_norm": 0.6039145674933226, + "learning_rate": 3.6355513822985966e-09, + "loss": 0.2787, + "step": 41983 + }, + { + "epoch": 1.9667400571508877, + "grad_norm": 0.6161413523231543, + "learning_rate": 3.6253344584563643e-09, + "loss": 0.2641, + "step": 41984 + }, + { + "epoch": 1.9667869021408162, + "grad_norm": 0.5911621035011715, + "learning_rate": 3.61513190062307e-09, + "loss": 0.265, + "step": 41985 + }, + { + "epoch": 1.9668337471307442, + "grad_norm": 0.5857755231882917, + "learning_rate": 3.6049437088570005e-09, + "loss": 0.268, + "step": 41986 + }, + { + "epoch": 1.9668805921206727, + "grad_norm": 0.6062729328539752, + "learning_rate": 3.594769883217275e-09, + "loss": 0.2794, + "step": 41987 + }, + { + "epoch": 1.9669274371106011, + "grad_norm": 0.6063773409254256, + "learning_rate": 3.5846104237619028e-09, + "loss": 0.2714, + "step": 41988 + }, + { + "epoch": 1.9669742821005294, + "grad_norm": 0.6057229838259744, + "learning_rate": 3.574465330550003e-09, + "loss": 0.2806, + "step": 41989 + }, + { + "epoch": 1.9670211270904576, + "grad_norm": 0.5387182387044085, + "learning_rate": 3.5643346036393077e-09, + "loss": 0.2533, + "step": 41990 + }, + { + "epoch": 1.967067972080386, + "grad_norm": 0.6372243601990134, + "learning_rate": 3.5542182430886587e-09, + "loss": 0.2837, + "step": 41991 + }, + { + "epoch": 1.9671148170703143, + "grad_norm": 0.601970758564685, + "learning_rate": 3.5441162489557866e-09, + "loss": 0.2759, + "step": 41992 + }, + { + "epoch": 1.9671616620602426, + "grad_norm": 0.6067237672697086, + "learning_rate": 3.534028621299257e-09, + "loss": 0.2791, + "step": 41993 + }, + { + "epoch": 1.967208507050171, + "grad_norm": 0.5833848666142751, + "learning_rate": 3.5239553601768007e-09, + "loss": 0.2612, + "step": 41994 + }, + { + "epoch": 1.9672553520400993, + "grad_norm": 0.5577017126597952, + "learning_rate": 3.513896465646427e-09, + "loss": 0.2682, + "step": 41995 + }, + { + "epoch": 1.9673021970300275, + "grad_norm": 0.5625228876801592, + "learning_rate": 3.503851937766145e-09, + "loss": 0.2559, + "step": 41996 + }, + { + "epoch": 1.967349042019956, + "grad_norm": 0.5802158548168642, + "learning_rate": 3.493821776593964e-09, + "loss": 0.2475, + "step": 41997 + }, + { + "epoch": 1.9673958870098844, + "grad_norm": 0.6412700359004158, + "learning_rate": 3.4838059821870606e-09, + "loss": 0.2805, + "step": 41998 + }, + { + "epoch": 1.9674427319998125, + "grad_norm": 0.5919641363859758, + "learning_rate": 3.473804554603721e-09, + "loss": 0.2664, + "step": 41999 + }, + { + "epoch": 1.967489576989741, + "grad_norm": 0.5777543560257997, + "learning_rate": 3.463817493900845e-09, + "loss": 0.2708, + "step": 42000 + }, + { + "epoch": 1.9675364219796694, + "grad_norm": 0.5825612902715172, + "learning_rate": 3.453844800136441e-09, + "loss": 0.2751, + "step": 42001 + }, + { + "epoch": 1.9675832669695976, + "grad_norm": 0.5980137420205017, + "learning_rate": 3.4438864733674084e-09, + "loss": 0.2693, + "step": 42002 + }, + { + "epoch": 1.9676301119595259, + "grad_norm": 0.6321081032760596, + "learning_rate": 3.4339425136517555e-09, + "loss": 0.276, + "step": 42003 + }, + { + "epoch": 1.9676769569494543, + "grad_norm": 0.6282145092032856, + "learning_rate": 3.4240129210458274e-09, + "loss": 0.2823, + "step": 42004 + }, + { + "epoch": 1.9677238019393826, + "grad_norm": 0.5826648168258162, + "learning_rate": 3.414097695607632e-09, + "loss": 0.2452, + "step": 42005 + }, + { + "epoch": 1.9677706469293108, + "grad_norm": 0.580865268090276, + "learning_rate": 3.404196837393514e-09, + "loss": 0.2705, + "step": 42006 + }, + { + "epoch": 1.9678174919192393, + "grad_norm": 0.6298592329667682, + "learning_rate": 3.39431034646065e-09, + "loss": 0.2749, + "step": 42007 + }, + { + "epoch": 1.9678643369091675, + "grad_norm": 0.5809718451992779, + "learning_rate": 3.3844382228662154e-09, + "loss": 0.2596, + "step": 42008 + }, + { + "epoch": 1.9679111818990958, + "grad_norm": 0.6149766667653525, + "learning_rate": 3.3745804666668327e-09, + "loss": 0.2767, + "step": 42009 + }, + { + "epoch": 1.9679580268890242, + "grad_norm": 0.6143587971286817, + "learning_rate": 3.3647370779194e-09, + "loss": 0.2741, + "step": 42010 + }, + { + "epoch": 1.9680048718789527, + "grad_norm": 0.591764977099131, + "learning_rate": 3.354908056679984e-09, + "loss": 0.2663, + "step": 42011 + }, + { + "epoch": 1.968051716868881, + "grad_norm": 0.5995980863879622, + "learning_rate": 3.3450934030060388e-09, + "loss": 0.2744, + "step": 42012 + }, + { + "epoch": 1.9680985618588092, + "grad_norm": 0.6107522291629454, + "learning_rate": 3.3352931169530755e-09, + "loss": 0.2749, + "step": 42013 + }, + { + "epoch": 1.9681454068487376, + "grad_norm": 0.6346665867013417, + "learning_rate": 3.3255071985782705e-09, + "loss": 0.2801, + "step": 42014 + }, + { + "epoch": 1.9681922518386659, + "grad_norm": 0.6360616587145608, + "learning_rate": 3.31573564793769e-09, + "loss": 0.2795, + "step": 42015 + }, + { + "epoch": 1.968239096828594, + "grad_norm": 0.5972742027011401, + "learning_rate": 3.3059784650874006e-09, + "loss": 0.2684, + "step": 42016 + }, + { + "epoch": 1.9682859418185226, + "grad_norm": 0.6119120749670005, + "learning_rate": 3.2962356500837455e-09, + "loss": 0.2782, + "step": 42017 + }, + { + "epoch": 1.9683327868084508, + "grad_norm": 0.6062640382368761, + "learning_rate": 3.2865072029827918e-09, + "loss": 0.2693, + "step": 42018 + }, + { + "epoch": 1.968379631798379, + "grad_norm": 0.652717269751401, + "learning_rate": 3.2767931238406046e-09, + "loss": 0.2646, + "step": 42019 + }, + { + "epoch": 1.9684264767883075, + "grad_norm": 0.604997832805181, + "learning_rate": 3.2670934127126965e-09, + "loss": 0.2637, + "step": 42020 + }, + { + "epoch": 1.968473321778236, + "grad_norm": 0.603542826697499, + "learning_rate": 3.2574080696551324e-09, + "loss": 0.2839, + "step": 42021 + }, + { + "epoch": 1.968520166768164, + "grad_norm": 0.6142591340788958, + "learning_rate": 3.247737094723702e-09, + "loss": 0.2792, + "step": 42022 + }, + { + "epoch": 1.9685670117580925, + "grad_norm": 0.6361210592043814, + "learning_rate": 3.2380804879739158e-09, + "loss": 0.2819, + "step": 42023 + }, + { + "epoch": 1.968613856748021, + "grad_norm": 0.6069255865801485, + "learning_rate": 3.228438249461563e-09, + "loss": 0.2899, + "step": 42024 + }, + { + "epoch": 1.9686607017379492, + "grad_norm": 0.5926950562575272, + "learning_rate": 3.2188103792421545e-09, + "loss": 0.266, + "step": 42025 + }, + { + "epoch": 1.9687075467278774, + "grad_norm": 0.5945535411910158, + "learning_rate": 3.209196877370646e-09, + "loss": 0.2871, + "step": 42026 + }, + { + "epoch": 1.9687543917178059, + "grad_norm": 0.582857619814824, + "learning_rate": 3.199597743902827e-09, + "loss": 0.2756, + "step": 42027 + }, + { + "epoch": 1.968801236707734, + "grad_norm": 0.5899090412607172, + "learning_rate": 3.1900129788939303e-09, + "loss": 0.2794, + "step": 42028 + }, + { + "epoch": 1.9688480816976623, + "grad_norm": 0.6409478742391544, + "learning_rate": 3.1804425823986353e-09, + "loss": 0.2879, + "step": 42029 + }, + { + "epoch": 1.9688949266875908, + "grad_norm": 0.6287978762788208, + "learning_rate": 3.170886554472452e-09, + "loss": 0.2687, + "step": 42030 + }, + { + "epoch": 1.968941771677519, + "grad_norm": 0.6099895650528823, + "learning_rate": 3.1613448951703373e-09, + "loss": 0.2548, + "step": 42031 + }, + { + "epoch": 1.9689886166674473, + "grad_norm": 0.5748250400539697, + "learning_rate": 3.1518176045469697e-09, + "loss": 0.2605, + "step": 42032 + }, + { + "epoch": 1.9690354616573758, + "grad_norm": 0.5756166869818594, + "learning_rate": 3.142304682657582e-09, + "loss": 0.2734, + "step": 42033 + }, + { + "epoch": 1.9690823066473042, + "grad_norm": 0.6259607681739592, + "learning_rate": 3.1328061295565758e-09, + "loss": 0.2727, + "step": 42034 + }, + { + "epoch": 1.9691291516372322, + "grad_norm": 0.637291775864714, + "learning_rate": 3.1233219452986298e-09, + "loss": 0.2702, + "step": 42035 + }, + { + "epoch": 1.9691759966271607, + "grad_norm": 0.6204061187452561, + "learning_rate": 3.1138521299384215e-09, + "loss": 0.2656, + "step": 42036 + }, + { + "epoch": 1.9692228416170892, + "grad_norm": 0.6538022392908641, + "learning_rate": 3.104396683530353e-09, + "loss": 0.2953, + "step": 42037 + }, + { + "epoch": 1.9692696866070174, + "grad_norm": 0.5596186216065996, + "learning_rate": 3.094955606129102e-09, + "loss": 0.2502, + "step": 42038 + }, + { + "epoch": 1.9693165315969456, + "grad_norm": 0.614471840940702, + "learning_rate": 3.0855288977887922e-09, + "loss": 0.2629, + "step": 42039 + }, + { + "epoch": 1.969363376586874, + "grad_norm": 0.5733231233270036, + "learning_rate": 3.0761165585635466e-09, + "loss": 0.2534, + "step": 42040 + }, + { + "epoch": 1.9694102215768023, + "grad_norm": 0.6561503049172805, + "learning_rate": 3.0667185885077666e-09, + "loss": 0.2818, + "step": 42041 + }, + { + "epoch": 1.9694570665667306, + "grad_norm": 0.6117934509761414, + "learning_rate": 3.0573349876752977e-09, + "loss": 0.2795, + "step": 42042 + }, + { + "epoch": 1.969503911556659, + "grad_norm": 0.5716897424817853, + "learning_rate": 3.0479657561202635e-09, + "loss": 0.2723, + "step": 42043 + }, + { + "epoch": 1.9695507565465873, + "grad_norm": 0.6446215489229494, + "learning_rate": 3.0386108938967872e-09, + "loss": 0.2753, + "step": 42044 + }, + { + "epoch": 1.9695976015365155, + "grad_norm": 0.6199377473356923, + "learning_rate": 3.029270401058437e-09, + "loss": 0.2746, + "step": 42045 + }, + { + "epoch": 1.969644446526444, + "grad_norm": 0.5738609448184682, + "learning_rate": 3.0199442776590593e-09, + "loss": 0.277, + "step": 42046 + }, + { + "epoch": 1.9696912915163725, + "grad_norm": 0.6678701323168468, + "learning_rate": 3.010632523752499e-09, + "loss": 0.2721, + "step": 42047 + }, + { + "epoch": 1.9697381365063007, + "grad_norm": 0.5840556159850749, + "learning_rate": 3.00133513939177e-09, + "loss": 0.2796, + "step": 42048 + }, + { + "epoch": 1.969784981496229, + "grad_norm": 0.603855304105125, + "learning_rate": 2.9920521246309952e-09, + "loss": 0.272, + "step": 42049 + }, + { + "epoch": 1.9698318264861574, + "grad_norm": 0.5920190877579894, + "learning_rate": 2.9827834795234655e-09, + "loss": 0.2698, + "step": 42050 + }, + { + "epoch": 1.9698786714760856, + "grad_norm": 0.6187267029140373, + "learning_rate": 2.973529204122194e-09, + "loss": 0.2769, + "step": 42051 + }, + { + "epoch": 1.9699255164660139, + "grad_norm": 0.601880534633269, + "learning_rate": 2.9642892984807493e-09, + "loss": 0.2758, + "step": 42052 + }, + { + "epoch": 1.9699723614559423, + "grad_norm": 0.6089187675284042, + "learning_rate": 2.955063762652144e-09, + "loss": 0.2754, + "step": 42053 + }, + { + "epoch": 1.9700192064458706, + "grad_norm": 0.559909082400049, + "learning_rate": 2.945852596689669e-09, + "loss": 0.258, + "step": 42054 + }, + { + "epoch": 1.9700660514357988, + "grad_norm": 0.5721477336483436, + "learning_rate": 2.93665580064606e-09, + "loss": 0.2565, + "step": 42055 + }, + { + "epoch": 1.9701128964257273, + "grad_norm": 0.6198549713406272, + "learning_rate": 2.9274733745746078e-09, + "loss": 0.2665, + "step": 42056 + }, + { + "epoch": 1.9701597414156558, + "grad_norm": 0.6344638922245229, + "learning_rate": 2.9183053185274922e-09, + "loss": 0.2696, + "step": 42057 + }, + { + "epoch": 1.9702065864055838, + "grad_norm": 0.6000843924845508, + "learning_rate": 2.9091516325582823e-09, + "loss": 0.2604, + "step": 42058 + }, + { + "epoch": 1.9702534313955122, + "grad_norm": 0.5628502008390529, + "learning_rate": 2.9000123167191587e-09, + "loss": 0.2548, + "step": 42059 + }, + { + "epoch": 1.9703002763854407, + "grad_norm": 0.5736911491782956, + "learning_rate": 2.890887371062856e-09, + "loss": 0.2594, + "step": 42060 + }, + { + "epoch": 1.970347121375369, + "grad_norm": 0.5983219742414028, + "learning_rate": 2.8817767956421104e-09, + "loss": 0.2637, + "step": 42061 + }, + { + "epoch": 1.9703939663652972, + "grad_norm": 0.5808309236322344, + "learning_rate": 2.8726805905088253e-09, + "loss": 0.2659, + "step": 42062 + }, + { + "epoch": 1.9704408113552256, + "grad_norm": 0.6212594057132387, + "learning_rate": 2.8635987557157352e-09, + "loss": 0.2744, + "step": 42063 + }, + { + "epoch": 1.9704876563451539, + "grad_norm": 0.6007979175658387, + "learning_rate": 2.8545312913150214e-09, + "loss": 0.2682, + "step": 42064 + }, + { + "epoch": 1.9705345013350821, + "grad_norm": 0.6290852407623096, + "learning_rate": 2.8454781973585867e-09, + "loss": 0.2825, + "step": 42065 + }, + { + "epoch": 1.9705813463250106, + "grad_norm": 0.5828082747246874, + "learning_rate": 2.836439473899166e-09, + "loss": 0.2581, + "step": 42066 + }, + { + "epoch": 1.9706281913149388, + "grad_norm": 0.5596933304617702, + "learning_rate": 2.8274151209881085e-09, + "loss": 0.2451, + "step": 42067 + }, + { + "epoch": 1.970675036304867, + "grad_norm": 0.6395210892030792, + "learning_rate": 2.818405138677871e-09, + "loss": 0.2812, + "step": 42068 + }, + { + "epoch": 1.9707218812947955, + "grad_norm": 0.5808966319931969, + "learning_rate": 2.8094095270198017e-09, + "loss": 0.2556, + "step": 42069 + }, + { + "epoch": 1.970768726284724, + "grad_norm": 0.6172229403824621, + "learning_rate": 2.8004282860660813e-09, + "loss": 0.2699, + "step": 42070 + }, + { + "epoch": 1.970815571274652, + "grad_norm": 0.6240186937451162, + "learning_rate": 2.791461415868335e-09, + "loss": 0.2681, + "step": 42071 + }, + { + "epoch": 1.9708624162645805, + "grad_norm": 0.5795483305386971, + "learning_rate": 2.7825089164779107e-09, + "loss": 0.2789, + "step": 42072 + }, + { + "epoch": 1.970909261254509, + "grad_norm": 0.5978684693881561, + "learning_rate": 2.773570787946711e-09, + "loss": 0.2699, + "step": 42073 + }, + { + "epoch": 1.9709561062444372, + "grad_norm": 0.5819520905031602, + "learning_rate": 2.7646470303258068e-09, + "loss": 0.2563, + "step": 42074 + }, + { + "epoch": 1.9710029512343654, + "grad_norm": 0.5486246407101713, + "learning_rate": 2.7557376436668227e-09, + "loss": 0.2523, + "step": 42075 + }, + { + "epoch": 1.9710497962242939, + "grad_norm": 0.6147798064216609, + "learning_rate": 2.7468426280208293e-09, + "loss": 0.2651, + "step": 42076 + }, + { + "epoch": 1.9710966412142221, + "grad_norm": 0.5907005450416335, + "learning_rate": 2.737961983438897e-09, + "loss": 0.2697, + "step": 42077 + }, + { + "epoch": 1.9711434862041504, + "grad_norm": 0.6399591669110729, + "learning_rate": 2.729095709972651e-09, + "loss": 0.2948, + "step": 42078 + }, + { + "epoch": 1.9711903311940788, + "grad_norm": 0.6483499520739543, + "learning_rate": 2.720243807672607e-09, + "loss": 0.3051, + "step": 42079 + }, + { + "epoch": 1.971237176184007, + "grad_norm": 0.6047076297612345, + "learning_rate": 2.711406276589834e-09, + "loss": 0.2578, + "step": 42080 + }, + { + "epoch": 1.9712840211739353, + "grad_norm": 0.577165771656802, + "learning_rate": 2.7025831167754037e-09, + "loss": 0.269, + "step": 42081 + }, + { + "epoch": 1.9713308661638638, + "grad_norm": 0.5856657415306132, + "learning_rate": 2.6937743282798303e-09, + "loss": 0.2609, + "step": 42082 + }, + { + "epoch": 1.9713777111537922, + "grad_norm": 0.5825248764334808, + "learning_rate": 2.684979911154184e-09, + "loss": 0.2664, + "step": 42083 + }, + { + "epoch": 1.9714245561437205, + "grad_norm": 0.6013311585778373, + "learning_rate": 2.6761998654484254e-09, + "loss": 0.2739, + "step": 42084 + }, + { + "epoch": 1.9714714011336487, + "grad_norm": 0.5628032267628144, + "learning_rate": 2.667434191213625e-09, + "loss": 0.258, + "step": 42085 + }, + { + "epoch": 1.9715182461235772, + "grad_norm": 0.6460151289845425, + "learning_rate": 2.6586828885000192e-09, + "loss": 0.265, + "step": 42086 + }, + { + "epoch": 1.9715650911135054, + "grad_norm": 0.601393276924848, + "learning_rate": 2.6499459573581242e-09, + "loss": 0.2786, + "step": 42087 + }, + { + "epoch": 1.9716119361034337, + "grad_norm": 0.5957813526235121, + "learning_rate": 2.6412233978379e-09, + "loss": 0.2712, + "step": 42088 + }, + { + "epoch": 1.9716587810933621, + "grad_norm": 0.5673013263315752, + "learning_rate": 2.632515209989861e-09, + "loss": 0.2599, + "step": 42089 + }, + { + "epoch": 1.9717056260832904, + "grad_norm": 0.5450560370178444, + "learning_rate": 2.6238213938642453e-09, + "loss": 0.2444, + "step": 42090 + }, + { + "epoch": 1.9717524710732186, + "grad_norm": 0.6045950711066048, + "learning_rate": 2.615141949510458e-09, + "loss": 0.2709, + "step": 42091 + }, + { + "epoch": 1.971799316063147, + "grad_norm": 0.5894940814069901, + "learning_rate": 2.606476876979014e-09, + "loss": 0.2622, + "step": 42092 + }, + { + "epoch": 1.9718461610530755, + "grad_norm": 0.5559682374643375, + "learning_rate": 2.5978261763195954e-09, + "loss": 0.2544, + "step": 42093 + }, + { + "epoch": 1.9718930060430035, + "grad_norm": 0.5987460685385262, + "learning_rate": 2.5891898475818853e-09, + "loss": 0.2589, + "step": 42094 + }, + { + "epoch": 1.971939851032932, + "grad_norm": 0.5962680958120464, + "learning_rate": 2.580567890815844e-09, + "loss": 0.2777, + "step": 42095 + }, + { + "epoch": 1.9719866960228605, + "grad_norm": 0.5876454000666027, + "learning_rate": 2.5719603060708754e-09, + "loss": 0.2588, + "step": 42096 + }, + { + "epoch": 1.9720335410127887, + "grad_norm": 0.6133834100128955, + "learning_rate": 2.5633670933966626e-09, + "loss": 0.2677, + "step": 42097 + }, + { + "epoch": 1.972080386002717, + "grad_norm": 0.5925735079729725, + "learning_rate": 2.554788252842333e-09, + "loss": 0.2838, + "step": 42098 + }, + { + "epoch": 1.9721272309926454, + "grad_norm": 0.5873555004498437, + "learning_rate": 2.5462237844575686e-09, + "loss": 0.2723, + "step": 42099 + }, + { + "epoch": 1.9721740759825737, + "grad_norm": 0.5845299974547853, + "learning_rate": 2.5376736882917753e-09, + "loss": 0.2718, + "step": 42100 + }, + { + "epoch": 1.972220920972502, + "grad_norm": 0.5961865137733381, + "learning_rate": 2.529137964393802e-09, + "loss": 0.2616, + "step": 42101 + }, + { + "epoch": 1.9722677659624304, + "grad_norm": 0.6256638092110924, + "learning_rate": 2.5206166128127763e-09, + "loss": 0.2628, + "step": 42102 + }, + { + "epoch": 1.9723146109523586, + "grad_norm": 0.6005423784393672, + "learning_rate": 2.512109633598103e-09, + "loss": 0.2647, + "step": 42103 + }, + { + "epoch": 1.9723614559422868, + "grad_norm": 0.5877580394073, + "learning_rate": 2.5036170267986327e-09, + "loss": 0.2736, + "step": 42104 + }, + { + "epoch": 1.9724083009322153, + "grad_norm": 0.6157759994534139, + "learning_rate": 2.4951387924626593e-09, + "loss": 0.2674, + "step": 42105 + }, + { + "epoch": 1.9724551459221438, + "grad_norm": 0.5496934586521288, + "learning_rate": 2.486674930639865e-09, + "loss": 0.252, + "step": 42106 + }, + { + "epoch": 1.9725019909120718, + "grad_norm": 0.6934168188886656, + "learning_rate": 2.478225441378268e-09, + "loss": 0.2749, + "step": 42107 + }, + { + "epoch": 1.9725488359020003, + "grad_norm": 0.5922222941309578, + "learning_rate": 2.469790324726995e-09, + "loss": 0.2599, + "step": 42108 + }, + { + "epoch": 1.9725956808919287, + "grad_norm": 0.6119154928484786, + "learning_rate": 2.461369580734063e-09, + "loss": 0.281, + "step": 42109 + }, + { + "epoch": 1.972642525881857, + "grad_norm": 0.6119056598384598, + "learning_rate": 2.4529632094486e-09, + "loss": 0.2644, + "step": 42110 + }, + { + "epoch": 1.9726893708717852, + "grad_norm": 0.6424985295478016, + "learning_rate": 2.444571210918345e-09, + "loss": 0.2915, + "step": 42111 + }, + { + "epoch": 1.9727362158617137, + "grad_norm": 0.574842838150189, + "learning_rate": 2.4361935851918705e-09, + "loss": 0.264, + "step": 42112 + }, + { + "epoch": 1.972783060851642, + "grad_norm": 0.5931599901166827, + "learning_rate": 2.4278303323174713e-09, + "loss": 0.2762, + "step": 42113 + }, + { + "epoch": 1.9728299058415701, + "grad_norm": 0.5757879615306577, + "learning_rate": 2.4194814523428866e-09, + "loss": 0.2669, + "step": 42114 + }, + { + "epoch": 1.9728767508314986, + "grad_norm": 0.5913277761596293, + "learning_rate": 2.4111469453166892e-09, + "loss": 0.2786, + "step": 42115 + }, + { + "epoch": 1.9729235958214268, + "grad_norm": 0.6006613010893745, + "learning_rate": 2.4028268112866184e-09, + "loss": 0.2749, + "step": 42116 + }, + { + "epoch": 1.972970440811355, + "grad_norm": 0.6069239960172499, + "learning_rate": 2.3945210503004134e-09, + "loss": 0.2801, + "step": 42117 + }, + { + "epoch": 1.9730172858012835, + "grad_norm": 0.6300931328838076, + "learning_rate": 2.3862296624058144e-09, + "loss": 0.2843, + "step": 42118 + }, + { + "epoch": 1.973064130791212, + "grad_norm": 0.6083591110780963, + "learning_rate": 2.377952647651116e-09, + "loss": 0.2717, + "step": 42119 + }, + { + "epoch": 1.9731109757811403, + "grad_norm": 0.6139715610846311, + "learning_rate": 2.369690006083225e-09, + "loss": 0.2848, + "step": 42120 + }, + { + "epoch": 1.9731578207710685, + "grad_norm": 0.6211076360508531, + "learning_rate": 2.3614417377501584e-09, + "loss": 0.2566, + "step": 42121 + }, + { + "epoch": 1.973204665760997, + "grad_norm": 0.598399429905467, + "learning_rate": 2.3532078426991014e-09, + "loss": 0.2658, + "step": 42122 + }, + { + "epoch": 1.9732515107509252, + "grad_norm": 0.597788251770631, + "learning_rate": 2.3449883209777926e-09, + "loss": 0.2739, + "step": 42123 + }, + { + "epoch": 1.9732983557408534, + "grad_norm": 0.5890079763033284, + "learning_rate": 2.336783172633139e-09, + "loss": 0.2663, + "step": 42124 + }, + { + "epoch": 1.973345200730782, + "grad_norm": 0.6322330566223562, + "learning_rate": 2.3285923977126033e-09, + "loss": 0.2802, + "step": 42125 + }, + { + "epoch": 1.9733920457207101, + "grad_norm": 0.5506146460941528, + "learning_rate": 2.3204159962633697e-09, + "loss": 0.2449, + "step": 42126 + }, + { + "epoch": 1.9734388907106384, + "grad_norm": 0.6217721851033522, + "learning_rate": 2.3122539683323454e-09, + "loss": 0.2661, + "step": 42127 + }, + { + "epoch": 1.9734857357005668, + "grad_norm": 0.5932943757810272, + "learning_rate": 2.3041063139664366e-09, + "loss": 0.2788, + "step": 42128 + }, + { + "epoch": 1.9735325806904953, + "grad_norm": 0.5696938791760023, + "learning_rate": 2.2959730332125506e-09, + "loss": 0.2589, + "step": 42129 + }, + { + "epoch": 1.9735794256804233, + "grad_norm": 0.5520581919760433, + "learning_rate": 2.2878541261175944e-09, + "loss": 0.2463, + "step": 42130 + }, + { + "epoch": 1.9736262706703518, + "grad_norm": 0.5706824285935875, + "learning_rate": 2.2797495927281977e-09, + "loss": 0.2484, + "step": 42131 + }, + { + "epoch": 1.9736731156602803, + "grad_norm": 0.5886827700474099, + "learning_rate": 2.2716594330912667e-09, + "loss": 0.2638, + "step": 42132 + }, + { + "epoch": 1.9737199606502085, + "grad_norm": 0.623520238400063, + "learning_rate": 2.2635836472531537e-09, + "loss": 0.2681, + "step": 42133 + }, + { + "epoch": 1.9737668056401367, + "grad_norm": 0.5779730426204996, + "learning_rate": 2.25552223526021e-09, + "loss": 0.2648, + "step": 42134 + }, + { + "epoch": 1.9738136506300652, + "grad_norm": 0.6632903562652117, + "learning_rate": 2.2474751971587883e-09, + "loss": 0.257, + "step": 42135 + }, + { + "epoch": 1.9738604956199934, + "grad_norm": 0.5778009799433177, + "learning_rate": 2.239442532995795e-09, + "loss": 0.2645, + "step": 42136 + }, + { + "epoch": 1.9739073406099217, + "grad_norm": 0.6484374274669663, + "learning_rate": 2.231424242816749e-09, + "loss": 0.2857, + "step": 42137 + }, + { + "epoch": 1.9739541855998501, + "grad_norm": 0.5552246235750226, + "learning_rate": 2.223420326668002e-09, + "loss": 0.2471, + "step": 42138 + }, + { + "epoch": 1.9740010305897784, + "grad_norm": 0.5913653018230798, + "learning_rate": 2.2154307845959067e-09, + "loss": 0.2688, + "step": 42139 + }, + { + "epoch": 1.9740478755797066, + "grad_norm": 0.5949289519141822, + "learning_rate": 2.2074556166459816e-09, + "loss": 0.2694, + "step": 42140 + }, + { + "epoch": 1.974094720569635, + "grad_norm": 0.6117648207835977, + "learning_rate": 2.1994948228645784e-09, + "loss": 0.2852, + "step": 42141 + }, + { + "epoch": 1.9741415655595635, + "grad_norm": 0.6180778438848937, + "learning_rate": 2.191548403296939e-09, + "loss": 0.2729, + "step": 42142 + }, + { + "epoch": 1.9741884105494916, + "grad_norm": 0.6256359324920072, + "learning_rate": 2.1836163579894154e-09, + "loss": 0.2728, + "step": 42143 + }, + { + "epoch": 1.97423525553942, + "grad_norm": 0.541957167268977, + "learning_rate": 2.1756986869872486e-09, + "loss": 0.2435, + "step": 42144 + }, + { + "epoch": 1.9742821005293485, + "grad_norm": 0.6545350682008266, + "learning_rate": 2.167795390336236e-09, + "loss": 0.282, + "step": 42145 + }, + { + "epoch": 1.9743289455192767, + "grad_norm": 0.5908615482308585, + "learning_rate": 2.159906468081896e-09, + "loss": 0.2742, + "step": 42146 + }, + { + "epoch": 1.974375790509205, + "grad_norm": 0.6308272097268883, + "learning_rate": 2.152031920269193e-09, + "loss": 0.2768, + "step": 42147 + }, + { + "epoch": 1.9744226354991334, + "grad_norm": 0.6291355962489648, + "learning_rate": 2.144171746943924e-09, + "loss": 0.2848, + "step": 42148 + }, + { + "epoch": 1.9744694804890617, + "grad_norm": 0.5801215957419303, + "learning_rate": 2.136325948151052e-09, + "loss": 0.2501, + "step": 42149 + }, + { + "epoch": 1.97451632547899, + "grad_norm": 0.5847614358607159, + "learning_rate": 2.1284945239358203e-09, + "loss": 0.2718, + "step": 42150 + }, + { + "epoch": 1.9745631704689184, + "grad_norm": 0.6276006474059027, + "learning_rate": 2.120677474343191e-09, + "loss": 0.2741, + "step": 42151 + }, + { + "epoch": 1.9746100154588466, + "grad_norm": 0.5842718342186167, + "learning_rate": 2.1128747994184075e-09, + "loss": 0.2659, + "step": 42152 + }, + { + "epoch": 1.9746568604487749, + "grad_norm": 0.5979185183630593, + "learning_rate": 2.1050864992061547e-09, + "loss": 0.2625, + "step": 42153 + }, + { + "epoch": 1.9747037054387033, + "grad_norm": 0.6163261758676168, + "learning_rate": 2.0973125737511203e-09, + "loss": 0.2596, + "step": 42154 + }, + { + "epoch": 1.9747505504286318, + "grad_norm": 0.5422552963424215, + "learning_rate": 2.089553023098545e-09, + "loss": 0.2517, + "step": 42155 + }, + { + "epoch": 1.97479739541856, + "grad_norm": 0.583206039414556, + "learning_rate": 2.0818078472925605e-09, + "loss": 0.2619, + "step": 42156 + }, + { + "epoch": 1.9748442404084883, + "grad_norm": 0.6024718895848786, + "learning_rate": 2.074077046378131e-09, + "loss": 0.2634, + "step": 42157 + }, + { + "epoch": 1.9748910853984167, + "grad_norm": 0.5526847754412106, + "learning_rate": 2.066360620399388e-09, + "loss": 0.2566, + "step": 42158 + }, + { + "epoch": 1.974937930388345, + "grad_norm": 0.5967473596167608, + "learning_rate": 2.0586585694010175e-09, + "loss": 0.2786, + "step": 42159 + }, + { + "epoch": 1.9749847753782732, + "grad_norm": 0.6129566111452466, + "learning_rate": 2.050970893427151e-09, + "loss": 0.2749, + "step": 42160 + }, + { + "epoch": 1.9750316203682017, + "grad_norm": 0.7040678969214874, + "learning_rate": 2.0432975925219202e-09, + "loss": 0.2658, + "step": 42161 + }, + { + "epoch": 1.97507846535813, + "grad_norm": 0.6456536414661954, + "learning_rate": 2.035638666730011e-09, + "loss": 0.2827, + "step": 42162 + }, + { + "epoch": 1.9751253103480582, + "grad_norm": 0.5782253545740585, + "learning_rate": 2.027994116095e-09, + "loss": 0.2694, + "step": 42163 + }, + { + "epoch": 1.9751721553379866, + "grad_norm": 0.5697158278039895, + "learning_rate": 2.0203639406610187e-09, + "loss": 0.2459, + "step": 42164 + }, + { + "epoch": 1.975219000327915, + "grad_norm": 0.6030774723162836, + "learning_rate": 2.0127481404721982e-09, + "loss": 0.2611, + "step": 42165 + }, + { + "epoch": 1.975265845317843, + "grad_norm": 0.582519290912395, + "learning_rate": 2.005146715571837e-09, + "loss": 0.2693, + "step": 42166 + }, + { + "epoch": 1.9753126903077716, + "grad_norm": 0.6110599829693157, + "learning_rate": 1.997559666004345e-09, + "loss": 0.2637, + "step": 42167 + }, + { + "epoch": 1.9753595352977, + "grad_norm": 0.607862865380688, + "learning_rate": 1.98998699181302e-09, + "loss": 0.2676, + "step": 42168 + }, + { + "epoch": 1.9754063802876283, + "grad_norm": 0.5910994585270853, + "learning_rate": 1.982428693041161e-09, + "loss": 0.2692, + "step": 42169 + }, + { + "epoch": 1.9754532252775565, + "grad_norm": 0.5727241535747477, + "learning_rate": 1.974884769732899e-09, + "loss": 0.2579, + "step": 42170 + }, + { + "epoch": 1.975500070267485, + "grad_norm": 0.6514790642538241, + "learning_rate": 1.9673552219309778e-09, + "loss": 0.2737, + "step": 42171 + }, + { + "epoch": 1.9755469152574132, + "grad_norm": 0.5865082324657059, + "learning_rate": 1.9598400496792514e-09, + "loss": 0.2707, + "step": 42172 + }, + { + "epoch": 1.9755937602473415, + "grad_norm": 0.5509938713072875, + "learning_rate": 1.9523392530210184e-09, + "loss": 0.2555, + "step": 42173 + }, + { + "epoch": 1.97564060523727, + "grad_norm": 0.5833093771565623, + "learning_rate": 1.944852831999022e-09, + "loss": 0.2769, + "step": 42174 + }, + { + "epoch": 1.9756874502271982, + "grad_norm": 0.5882587744932405, + "learning_rate": 1.9373807866562843e-09, + "loss": 0.2704, + "step": 42175 + }, + { + "epoch": 1.9757342952171264, + "grad_norm": 0.6269340351619132, + "learning_rate": 1.9299231170363807e-09, + "loss": 0.2666, + "step": 42176 + }, + { + "epoch": 1.9757811402070549, + "grad_norm": 0.5702460264573154, + "learning_rate": 1.9224798231817777e-09, + "loss": 0.2731, + "step": 42177 + }, + { + "epoch": 1.9758279851969833, + "grad_norm": 0.6083524383713617, + "learning_rate": 1.915050905135496e-09, + "loss": 0.2483, + "step": 42178 + }, + { + "epoch": 1.9758748301869113, + "grad_norm": 0.6099613113261216, + "learning_rate": 1.9076363629402794e-09, + "loss": 0.2754, + "step": 42179 + }, + { + "epoch": 1.9759216751768398, + "grad_norm": 0.577185703198822, + "learning_rate": 1.900236196638594e-09, + "loss": 0.2629, + "step": 42180 + }, + { + "epoch": 1.9759685201667683, + "grad_norm": 0.5877000758188958, + "learning_rate": 1.8928504062731833e-09, + "loss": 0.2643, + "step": 42181 + }, + { + "epoch": 1.9760153651566965, + "grad_norm": 0.5860468234556494, + "learning_rate": 1.8854789918865134e-09, + "loss": 0.2618, + "step": 42182 + }, + { + "epoch": 1.9760622101466248, + "grad_norm": 0.5946294598991846, + "learning_rate": 1.878121953521328e-09, + "loss": 0.2616, + "step": 42183 + }, + { + "epoch": 1.9761090551365532, + "grad_norm": 0.5588266613853797, + "learning_rate": 1.87077929121926e-09, + "loss": 0.2577, + "step": 42184 + }, + { + "epoch": 1.9761559001264815, + "grad_norm": 0.575563982387106, + "learning_rate": 1.863451005023331e-09, + "loss": 0.2681, + "step": 42185 + }, + { + "epoch": 1.9762027451164097, + "grad_norm": 0.601312165447798, + "learning_rate": 1.8561370949751744e-09, + "loss": 0.2751, + "step": 42186 + }, + { + "epoch": 1.9762495901063382, + "grad_norm": 0.570030408526158, + "learning_rate": 1.848837561117256e-09, + "loss": 0.2628, + "step": 42187 + }, + { + "epoch": 1.9762964350962664, + "grad_norm": 0.5580451584682944, + "learning_rate": 1.841552403491209e-09, + "loss": 0.2555, + "step": 42188 + }, + { + "epoch": 1.9763432800861946, + "grad_norm": 0.5357538178092003, + "learning_rate": 1.8342816221389447e-09, + "loss": 0.2507, + "step": 42189 + }, + { + "epoch": 1.976390125076123, + "grad_norm": 0.5967625454880748, + "learning_rate": 1.8270252171026515e-09, + "loss": 0.2558, + "step": 42190 + }, + { + "epoch": 1.9764369700660516, + "grad_norm": 0.6401868648527059, + "learning_rate": 1.81978318842424e-09, + "loss": 0.2838, + "step": 42191 + }, + { + "epoch": 1.9764838150559798, + "grad_norm": 0.611017833740856, + "learning_rate": 1.8125555361447889e-09, + "loss": 0.2714, + "step": 42192 + }, + { + "epoch": 1.976530660045908, + "grad_norm": 0.6219563856873626, + "learning_rate": 1.8053422603062086e-09, + "loss": 0.2777, + "step": 42193 + }, + { + "epoch": 1.9765775050358365, + "grad_norm": 0.5518741493346677, + "learning_rate": 1.7981433609501332e-09, + "loss": 0.2529, + "step": 42194 + }, + { + "epoch": 1.9766243500257648, + "grad_norm": 0.5903307471923976, + "learning_rate": 1.79095883811764e-09, + "loss": 0.2574, + "step": 42195 + }, + { + "epoch": 1.976671195015693, + "grad_norm": 0.5864190135392352, + "learning_rate": 1.7837886918503634e-09, + "loss": 0.2609, + "step": 42196 + }, + { + "epoch": 1.9767180400056215, + "grad_norm": 0.56615253465701, + "learning_rate": 1.7766329221893807e-09, + "loss": 0.2421, + "step": 42197 + }, + { + "epoch": 1.9767648849955497, + "grad_norm": 0.5740902371151853, + "learning_rate": 1.7694915291760483e-09, + "loss": 0.2703, + "step": 42198 + }, + { + "epoch": 1.976811729985478, + "grad_norm": 0.5800206939262381, + "learning_rate": 1.7623645128514443e-09, + "loss": 0.248, + "step": 42199 + }, + { + "epoch": 1.9768585749754064, + "grad_norm": 0.6150845945026517, + "learning_rate": 1.755251873256647e-09, + "loss": 0.2864, + "step": 42200 + }, + { + "epoch": 1.9769054199653349, + "grad_norm": 0.5922784834715553, + "learning_rate": 1.7481536104321795e-09, + "loss": 0.2739, + "step": 42201 + }, + { + "epoch": 1.9769522649552629, + "grad_norm": 0.589158846550512, + "learning_rate": 1.7410697244193976e-09, + "loss": 0.2612, + "step": 42202 + }, + { + "epoch": 1.9769991099451913, + "grad_norm": 0.6474915426746045, + "learning_rate": 1.7340002152585467e-09, + "loss": 0.2748, + "step": 42203 + }, + { + "epoch": 1.9770459549351198, + "grad_norm": 0.5665401396848777, + "learning_rate": 1.7269450829909829e-09, + "loss": 0.2639, + "step": 42204 + }, + { + "epoch": 1.977092799925048, + "grad_norm": 0.6116093490294785, + "learning_rate": 1.7199043276566741e-09, + "loss": 0.2717, + "step": 42205 + }, + { + "epoch": 1.9771396449149763, + "grad_norm": 0.5478226457681884, + "learning_rate": 1.712877949296421e-09, + "loss": 0.2541, + "step": 42206 + }, + { + "epoch": 1.9771864899049048, + "grad_norm": 0.6211485069907443, + "learning_rate": 1.7058659479507467e-09, + "loss": 0.2734, + "step": 42207 + }, + { + "epoch": 1.977233334894833, + "grad_norm": 0.6418713394654011, + "learning_rate": 1.698868323659897e-09, + "loss": 0.2849, + "step": 42208 + }, + { + "epoch": 1.9772801798847612, + "grad_norm": 0.5934790956285065, + "learning_rate": 1.691885076464117e-09, + "loss": 0.2688, + "step": 42209 + }, + { + "epoch": 1.9773270248746897, + "grad_norm": 0.5710252108315762, + "learning_rate": 1.6849162064036529e-09, + "loss": 0.2578, + "step": 42210 + }, + { + "epoch": 1.977373869864618, + "grad_norm": 0.6067048084373878, + "learning_rate": 1.6779617135184723e-09, + "loss": 0.2787, + "step": 42211 + }, + { + "epoch": 1.9774207148545462, + "grad_norm": 0.6501885510051959, + "learning_rate": 1.6710215978485433e-09, + "loss": 0.2938, + "step": 42212 + }, + { + "epoch": 1.9774675598444746, + "grad_norm": 0.5534894460006808, + "learning_rate": 1.6640958594341117e-09, + "loss": 0.2737, + "step": 42213 + }, + { + "epoch": 1.977514404834403, + "grad_norm": 0.5607526508267325, + "learning_rate": 1.6571844983148677e-09, + "loss": 0.2566, + "step": 42214 + }, + { + "epoch": 1.9775612498243311, + "grad_norm": 0.6121047468031727, + "learning_rate": 1.6502875145307794e-09, + "loss": 0.2896, + "step": 42215 + }, + { + "epoch": 1.9776080948142596, + "grad_norm": 0.6191811703485537, + "learning_rate": 1.64340490812126e-09, + "loss": 0.2834, + "step": 42216 + }, + { + "epoch": 1.977654939804188, + "grad_norm": 0.624331576963108, + "learning_rate": 1.636536679125722e-09, + "loss": 0.2829, + "step": 42217 + }, + { + "epoch": 1.9777017847941163, + "grad_norm": 0.5685188948336865, + "learning_rate": 1.6296828275841337e-09, + "loss": 0.2555, + "step": 42218 + }, + { + "epoch": 1.9777486297840445, + "grad_norm": 0.7054308905240104, + "learning_rate": 1.6228433535359078e-09, + "loss": 0.2838, + "step": 42219 + }, + { + "epoch": 1.977795474773973, + "grad_norm": 0.5691292370299809, + "learning_rate": 1.61601825702018e-09, + "loss": 0.2515, + "step": 42220 + }, + { + "epoch": 1.9778423197639012, + "grad_norm": 0.5882406753012278, + "learning_rate": 1.6092075380763628e-09, + "loss": 0.2704, + "step": 42221 + }, + { + "epoch": 1.9778891647538295, + "grad_norm": 0.5753960040210326, + "learning_rate": 1.6024111967435918e-09, + "loss": 0.255, + "step": 42222 + }, + { + "epoch": 1.977936009743758, + "grad_norm": 0.6021104011255435, + "learning_rate": 1.5956292330610024e-09, + "loss": 0.2726, + "step": 42223 + }, + { + "epoch": 1.9779828547336862, + "grad_norm": 0.5985235894047284, + "learning_rate": 1.5888616470674523e-09, + "loss": 0.2649, + "step": 42224 + }, + { + "epoch": 1.9780296997236144, + "grad_norm": 0.6679590111596431, + "learning_rate": 1.5821084388023545e-09, + "loss": 0.2913, + "step": 42225 + }, + { + "epoch": 1.9780765447135429, + "grad_norm": 0.5928010774644377, + "learning_rate": 1.5753696083040116e-09, + "loss": 0.2621, + "step": 42226 + }, + { + "epoch": 1.9781233897034713, + "grad_norm": 0.5998521720407287, + "learning_rate": 1.568645155611559e-09, + "loss": 0.2495, + "step": 42227 + }, + { + "epoch": 1.9781702346933996, + "grad_norm": 0.6155361962526561, + "learning_rate": 1.561935080763577e-09, + "loss": 0.2814, + "step": 42228 + }, + { + "epoch": 1.9782170796833278, + "grad_norm": 0.6114317196800184, + "learning_rate": 1.5552393837989232e-09, + "loss": 0.2602, + "step": 42229 + }, + { + "epoch": 1.9782639246732563, + "grad_norm": 0.5970681312273509, + "learning_rate": 1.5485580647556232e-09, + "loss": 0.2656, + "step": 42230 + }, + { + "epoch": 1.9783107696631845, + "grad_norm": 0.6149595225922155, + "learning_rate": 1.5418911236725342e-09, + "loss": 0.2833, + "step": 42231 + }, + { + "epoch": 1.9783576146531128, + "grad_norm": 0.5855897853942769, + "learning_rate": 1.5352385605876818e-09, + "loss": 0.2663, + "step": 42232 + }, + { + "epoch": 1.9784044596430412, + "grad_norm": 0.5697228767807007, + "learning_rate": 1.5286003755399237e-09, + "loss": 0.2567, + "step": 42233 + }, + { + "epoch": 1.9784513046329695, + "grad_norm": 0.6097841925618619, + "learning_rate": 1.5219765685667299e-09, + "loss": 0.2732, + "step": 42234 + }, + { + "epoch": 1.9784981496228977, + "grad_norm": 0.6386366567531002, + "learning_rate": 1.515367139706958e-09, + "loss": 0.2864, + "step": 42235 + }, + { + "epoch": 1.9785449946128262, + "grad_norm": 0.6046835607516797, + "learning_rate": 1.5087720889980783e-09, + "loss": 0.2744, + "step": 42236 + }, + { + "epoch": 1.9785918396027546, + "grad_norm": 0.6036385236632104, + "learning_rate": 1.5021914164781159e-09, + "loss": 0.2619, + "step": 42237 + }, + { + "epoch": 1.9786386845926827, + "grad_norm": 0.5949651020981281, + "learning_rate": 1.4956251221853734e-09, + "loss": 0.265, + "step": 42238 + }, + { + "epoch": 1.9786855295826111, + "grad_norm": 0.5933486545716294, + "learning_rate": 1.4890732061570435e-09, + "loss": 0.2448, + "step": 42239 + }, + { + "epoch": 1.9787323745725396, + "grad_norm": 0.5549591877740964, + "learning_rate": 1.4825356684314284e-09, + "loss": 0.2699, + "step": 42240 + }, + { + "epoch": 1.9787792195624678, + "grad_norm": 0.5767843468432726, + "learning_rate": 1.4760125090457212e-09, + "loss": 0.2666, + "step": 42241 + }, + { + "epoch": 1.978826064552396, + "grad_norm": 0.5683235537545701, + "learning_rate": 1.4695037280373914e-09, + "loss": 0.2695, + "step": 42242 + }, + { + "epoch": 1.9788729095423245, + "grad_norm": 0.5813634456313205, + "learning_rate": 1.4630093254441868e-09, + "loss": 0.2658, + "step": 42243 + }, + { + "epoch": 1.9789197545322528, + "grad_norm": 0.612604508990656, + "learning_rate": 1.4565293013035774e-09, + "loss": 0.2872, + "step": 42244 + }, + { + "epoch": 1.978966599522181, + "grad_norm": 0.6475590480294261, + "learning_rate": 1.4500636556524783e-09, + "loss": 0.2953, + "step": 42245 + }, + { + "epoch": 1.9790134445121095, + "grad_norm": 0.6169414139931974, + "learning_rate": 1.4436123885280818e-09, + "loss": 0.2665, + "step": 42246 + }, + { + "epoch": 1.9790602895020377, + "grad_norm": 0.5995022782180037, + "learning_rate": 1.4371754999681354e-09, + "loss": 0.273, + "step": 42247 + }, + { + "epoch": 1.979107134491966, + "grad_norm": 0.6206531609395951, + "learning_rate": 1.4307529900089988e-09, + "loss": 0.2957, + "step": 42248 + }, + { + "epoch": 1.9791539794818944, + "grad_norm": 0.6616131845191118, + "learning_rate": 1.424344858687865e-09, + "loss": 0.3042, + "step": 42249 + }, + { + "epoch": 1.9792008244718229, + "grad_norm": 0.6399753967264232, + "learning_rate": 1.4179511060416484e-09, + "loss": 0.2898, + "step": 42250 + }, + { + "epoch": 1.979247669461751, + "grad_norm": 0.5917282776037711, + "learning_rate": 1.411571732107264e-09, + "loss": 0.2692, + "step": 42251 + }, + { + "epoch": 1.9792945144516794, + "grad_norm": 0.5544685773556014, + "learning_rate": 1.405206736921072e-09, + "loss": 0.2675, + "step": 42252 + }, + { + "epoch": 1.9793413594416078, + "grad_norm": 0.5694084092143408, + "learning_rate": 1.3988561205199868e-09, + "loss": 0.2725, + "step": 42253 + }, + { + "epoch": 1.979388204431536, + "grad_norm": 0.5800415987952203, + "learning_rate": 1.392519882940646e-09, + "loss": 0.2618, + "step": 42254 + }, + { + "epoch": 1.9794350494214643, + "grad_norm": 0.5983149573699629, + "learning_rate": 1.386198024219132e-09, + "loss": 0.276, + "step": 42255 + }, + { + "epoch": 1.9794818944113928, + "grad_norm": 0.6199107325285401, + "learning_rate": 1.3798905443923595e-09, + "loss": 0.2759, + "step": 42256 + }, + { + "epoch": 1.979528739401321, + "grad_norm": 0.589656115472818, + "learning_rate": 1.373597443496133e-09, + "loss": 0.2612, + "step": 42257 + }, + { + "epoch": 1.9795755843912493, + "grad_norm": 0.5796884739328086, + "learning_rate": 1.3673187215668127e-09, + "loss": 0.2614, + "step": 42258 + }, + { + "epoch": 1.9796224293811777, + "grad_norm": 0.5985575031052478, + "learning_rate": 1.3610543786404805e-09, + "loss": 0.2498, + "step": 42259 + }, + { + "epoch": 1.979669274371106, + "grad_norm": 0.6068141363168922, + "learning_rate": 1.3548044147534967e-09, + "loss": 0.2546, + "step": 42260 + }, + { + "epoch": 1.9797161193610342, + "grad_norm": 0.5734656333846744, + "learning_rate": 1.348568829941388e-09, + "loss": 0.2594, + "step": 42261 + }, + { + "epoch": 1.9797629643509627, + "grad_norm": 0.5815721831737652, + "learning_rate": 1.3423476242402368e-09, + "loss": 0.2581, + "step": 42262 + }, + { + "epoch": 1.9798098093408911, + "grad_norm": 0.5653453830628754, + "learning_rate": 1.3361407976861251e-09, + "loss": 0.2552, + "step": 42263 + }, + { + "epoch": 1.9798566543308194, + "grad_norm": 0.6170172626294179, + "learning_rate": 1.3299483503143028e-09, + "loss": 0.2604, + "step": 42264 + }, + { + "epoch": 1.9799034993207476, + "grad_norm": 0.6071229264568749, + "learning_rate": 1.3237702821605747e-09, + "loss": 0.253, + "step": 42265 + }, + { + "epoch": 1.979950344310676, + "grad_norm": 0.5812081096007403, + "learning_rate": 1.3176065932607452e-09, + "loss": 0.2697, + "step": 42266 + }, + { + "epoch": 1.9799971893006043, + "grad_norm": 0.5536890281723558, + "learning_rate": 1.3114572836497864e-09, + "loss": 0.2521, + "step": 42267 + }, + { + "epoch": 1.9800440342905326, + "grad_norm": 0.6150199830006312, + "learning_rate": 1.3053223533635028e-09, + "loss": 0.2825, + "step": 42268 + }, + { + "epoch": 1.980090879280461, + "grad_norm": 0.5362108602309529, + "learning_rate": 1.2992018024371443e-09, + "loss": 0.2584, + "step": 42269 + }, + { + "epoch": 1.9801377242703893, + "grad_norm": 0.5768174848099383, + "learning_rate": 1.293095630905683e-09, + "loss": 0.2517, + "step": 42270 + }, + { + "epoch": 1.9801845692603175, + "grad_norm": 0.5685531132508924, + "learning_rate": 1.2870038388046458e-09, + "loss": 0.2656, + "step": 42271 + }, + { + "epoch": 1.980231414250246, + "grad_norm": 0.6686613005715348, + "learning_rate": 1.2809264261687271e-09, + "loss": 0.2884, + "step": 42272 + }, + { + "epoch": 1.9802782592401744, + "grad_norm": 0.5841838982106501, + "learning_rate": 1.2748633930331767e-09, + "loss": 0.2649, + "step": 42273 + }, + { + "epoch": 1.9803251042301024, + "grad_norm": 0.5466190123486107, + "learning_rate": 1.2688147394326888e-09, + "loss": 0.2549, + "step": 42274 + }, + { + "epoch": 1.980371949220031, + "grad_norm": 0.6083567371747264, + "learning_rate": 1.2627804654019582e-09, + "loss": 0.2743, + "step": 42275 + }, + { + "epoch": 1.9804187942099594, + "grad_norm": 0.5882068782873615, + "learning_rate": 1.2567605709762342e-09, + "loss": 0.27, + "step": 42276 + }, + { + "epoch": 1.9804656391998876, + "grad_norm": 0.5818243223576628, + "learning_rate": 1.2507550561896565e-09, + "loss": 0.2855, + "step": 42277 + }, + { + "epoch": 1.9805124841898158, + "grad_norm": 0.6140186900534821, + "learning_rate": 1.244763921076919e-09, + "loss": 0.267, + "step": 42278 + }, + { + "epoch": 1.9805593291797443, + "grad_norm": 0.5640536314617379, + "learning_rate": 1.2387871656724392e-09, + "loss": 0.2576, + "step": 42279 + }, + { + "epoch": 1.9806061741696726, + "grad_norm": 0.594429621952248, + "learning_rate": 1.2328247900106337e-09, + "loss": 0.2779, + "step": 42280 + }, + { + "epoch": 1.9806530191596008, + "grad_norm": 0.5939927488612671, + "learning_rate": 1.2268767941261971e-09, + "loss": 0.2701, + "step": 42281 + }, + { + "epoch": 1.9806998641495293, + "grad_norm": 0.5973577214399449, + "learning_rate": 1.2209431780527137e-09, + "loss": 0.2749, + "step": 42282 + }, + { + "epoch": 1.9807467091394575, + "grad_norm": 0.6048603114260084, + "learning_rate": 1.2150239418248777e-09, + "loss": 0.282, + "step": 42283 + }, + { + "epoch": 1.9807935541293857, + "grad_norm": 0.5927897199256749, + "learning_rate": 1.2091190854765512e-09, + "loss": 0.2726, + "step": 42284 + }, + { + "epoch": 1.9808403991193142, + "grad_norm": 0.6002605875048641, + "learning_rate": 1.2032286090415957e-09, + "loss": 0.2701, + "step": 42285 + }, + { + "epoch": 1.9808872441092427, + "grad_norm": 0.6261587361473233, + "learning_rate": 1.1973525125541507e-09, + "loss": 0.2788, + "step": 42286 + }, + { + "epoch": 1.9809340890991707, + "grad_norm": 0.6093771039385586, + "learning_rate": 1.1914907960478006e-09, + "loss": 0.2841, + "step": 42287 + }, + { + "epoch": 1.9809809340890991, + "grad_norm": 0.5795923842895867, + "learning_rate": 1.1856434595561295e-09, + "loss": 0.2654, + "step": 42288 + }, + { + "epoch": 1.9810277790790276, + "grad_norm": 0.6084518556861759, + "learning_rate": 1.1798105031132767e-09, + "loss": 0.2798, + "step": 42289 + }, + { + "epoch": 1.9810746240689558, + "grad_norm": 0.5897926827741907, + "learning_rate": 1.173991926752549e-09, + "loss": 0.2721, + "step": 42290 + }, + { + "epoch": 1.981121469058884, + "grad_norm": 0.5682316604939859, + "learning_rate": 1.1681877305075306e-09, + "loss": 0.2695, + "step": 42291 + }, + { + "epoch": 1.9811683140488126, + "grad_norm": 0.5967617769170703, + "learning_rate": 1.1623979144115284e-09, + "loss": 0.2807, + "step": 42292 + }, + { + "epoch": 1.9812151590387408, + "grad_norm": 0.6236578560323836, + "learning_rate": 1.156622478497571e-09, + "loss": 0.285, + "step": 42293 + }, + { + "epoch": 1.981262004028669, + "grad_norm": 0.6049455297668792, + "learning_rate": 1.1508614227995208e-09, + "loss": 0.2875, + "step": 42294 + }, + { + "epoch": 1.9813088490185975, + "grad_norm": 0.6002083898033373, + "learning_rate": 1.145114747350129e-09, + "loss": 0.2658, + "step": 42295 + }, + { + "epoch": 1.9813556940085257, + "grad_norm": 0.5869546178820623, + "learning_rate": 1.1393824521824249e-09, + "loss": 0.2751, + "step": 42296 + }, + { + "epoch": 1.981402538998454, + "grad_norm": 0.7141338357586754, + "learning_rate": 1.1336645373294374e-09, + "loss": 0.2648, + "step": 42297 + }, + { + "epoch": 1.9814493839883824, + "grad_norm": 0.6656516195054495, + "learning_rate": 1.127961002824196e-09, + "loss": 0.2956, + "step": 42298 + }, + { + "epoch": 1.981496228978311, + "grad_norm": 0.6247597477153684, + "learning_rate": 1.1222718486997296e-09, + "loss": 0.2633, + "step": 42299 + }, + { + "epoch": 1.9815430739682391, + "grad_norm": 0.5984193120967456, + "learning_rate": 1.116597074988235e-09, + "loss": 0.2696, + "step": 42300 + }, + { + "epoch": 1.9815899189581674, + "grad_norm": 0.5618524753381974, + "learning_rate": 1.1109366817227407e-09, + "loss": 0.246, + "step": 42301 + }, + { + "epoch": 1.9816367639480958, + "grad_norm": 0.6329648031700572, + "learning_rate": 1.1052906689357212e-09, + "loss": 0.2825, + "step": 42302 + }, + { + "epoch": 1.981683608938024, + "grad_norm": 0.5720606974193686, + "learning_rate": 1.0996590366596505e-09, + "loss": 0.273, + "step": 42303 + }, + { + "epoch": 1.9817304539279523, + "grad_norm": 0.6194817689306541, + "learning_rate": 1.0940417849270024e-09, + "loss": 0.2647, + "step": 42304 + }, + { + "epoch": 1.9817772989178808, + "grad_norm": 0.5845658557186851, + "learning_rate": 1.088438913770251e-09, + "loss": 0.2712, + "step": 42305 + }, + { + "epoch": 1.981824143907809, + "grad_norm": 0.5843176040578159, + "learning_rate": 1.0828504232213156e-09, + "loss": 0.271, + "step": 42306 + }, + { + "epoch": 1.9818709888977373, + "grad_norm": 0.6208213530859927, + "learning_rate": 1.077276313312392e-09, + "loss": 0.2854, + "step": 42307 + }, + { + "epoch": 1.9819178338876657, + "grad_norm": 0.611579161827028, + "learning_rate": 1.071716584075677e-09, + "loss": 0.2796, + "step": 42308 + }, + { + "epoch": 1.9819646788775942, + "grad_norm": 0.5834997750327213, + "learning_rate": 1.0661712355433674e-09, + "loss": 0.2734, + "step": 42309 + }, + { + "epoch": 1.9820115238675222, + "grad_norm": 0.5941777201334393, + "learning_rate": 1.0606402677468263e-09, + "loss": 0.2724, + "step": 42310 + }, + { + "epoch": 1.9820583688574507, + "grad_norm": 0.60491503993643, + "learning_rate": 1.0551236807185284e-09, + "loss": 0.2778, + "step": 42311 + }, + { + "epoch": 1.9821052138473791, + "grad_norm": 0.6154447360316461, + "learning_rate": 1.049621474489837e-09, + "loss": 0.2772, + "step": 42312 + }, + { + "epoch": 1.9821520588373074, + "grad_norm": 0.5800309772395289, + "learning_rate": 1.0441336490923938e-09, + "loss": 0.2642, + "step": 42313 + }, + { + "epoch": 1.9821989038272356, + "grad_norm": 0.5502083091113291, + "learning_rate": 1.0386602045578397e-09, + "loss": 0.2511, + "step": 42314 + }, + { + "epoch": 1.982245748817164, + "grad_norm": 0.6001887766814862, + "learning_rate": 1.0332011409178167e-09, + "loss": 0.2643, + "step": 42315 + }, + { + "epoch": 1.9822925938070923, + "grad_norm": 0.5995851377023487, + "learning_rate": 1.0277564582034106e-09, + "loss": 0.2728, + "step": 42316 + }, + { + "epoch": 1.9823394387970206, + "grad_norm": 0.5556120606040957, + "learning_rate": 1.0223261564465402e-09, + "loss": 0.2555, + "step": 42317 + }, + { + "epoch": 1.982386283786949, + "grad_norm": 0.6344757184875278, + "learning_rate": 1.016910235677737e-09, + "loss": 0.2801, + "step": 42318 + }, + { + "epoch": 1.9824331287768773, + "grad_norm": 0.6109048556018676, + "learning_rate": 1.0115086959286424e-09, + "loss": 0.2681, + "step": 42319 + }, + { + "epoch": 1.9824799737668055, + "grad_norm": 0.6476409376542118, + "learning_rate": 1.0061215372303423e-09, + "loss": 0.2902, + "step": 42320 + }, + { + "epoch": 1.982526818756734, + "grad_norm": 0.5805793660325665, + "learning_rate": 1.0007487596136456e-09, + "loss": 0.2811, + "step": 42321 + }, + { + "epoch": 1.9825736637466624, + "grad_norm": 0.6866738410186204, + "learning_rate": 9.953903631096384e-10, + "loss": 0.2888, + "step": 42322 + }, + { + "epoch": 1.9826205087365905, + "grad_norm": 0.5844298515979205, + "learning_rate": 9.90046347748852e-10, + "loss": 0.2571, + "step": 42323 + }, + { + "epoch": 1.982667353726519, + "grad_norm": 0.6316561867221576, + "learning_rate": 9.847167135623725e-10, + "loss": 0.275, + "step": 42324 + }, + { + "epoch": 1.9827141987164474, + "grad_norm": 0.650360625241153, + "learning_rate": 9.79401460580731e-10, + "loss": 0.2616, + "step": 42325 + }, + { + "epoch": 1.9827610437063756, + "grad_norm": 0.6125917732687761, + "learning_rate": 9.74100588834459e-10, + "loss": 0.2726, + "step": 42326 + }, + { + "epoch": 1.9828078886963039, + "grad_norm": 0.6141336115513726, + "learning_rate": 9.688140983540873e-10, + "loss": 0.2745, + "step": 42327 + }, + { + "epoch": 1.9828547336862323, + "grad_norm": 0.5718487005357198, + "learning_rate": 9.63541989170147e-10, + "loss": 0.2589, + "step": 42328 + }, + { + "epoch": 1.9829015786761606, + "grad_norm": 0.5623222934656056, + "learning_rate": 9.582842613128918e-10, + "loss": 0.2607, + "step": 42329 + }, + { + "epoch": 1.9829484236660888, + "grad_norm": 0.6098761500270987, + "learning_rate": 9.530409148125752e-10, + "loss": 0.2675, + "step": 42330 + }, + { + "epoch": 1.9829952686560173, + "grad_norm": 0.6295250590830331, + "learning_rate": 9.478119496994508e-10, + "loss": 0.2684, + "step": 42331 + }, + { + "epoch": 1.9830421136459455, + "grad_norm": 0.5987698835335967, + "learning_rate": 9.425973660037725e-10, + "loss": 0.2679, + "step": 42332 + }, + { + "epoch": 1.9830889586358738, + "grad_norm": 0.6057997949897012, + "learning_rate": 9.373971637549606e-10, + "loss": 0.2704, + "step": 42333 + }, + { + "epoch": 1.9831358036258022, + "grad_norm": 0.6263375420236916, + "learning_rate": 9.322113429835466e-10, + "loss": 0.2828, + "step": 42334 + }, + { + "epoch": 1.9831826486157307, + "grad_norm": 0.640142380190416, + "learning_rate": 9.27039903719229e-10, + "loss": 0.2723, + "step": 42335 + }, + { + "epoch": 1.983229493605659, + "grad_norm": 0.5573276528741568, + "learning_rate": 9.218828459914287e-10, + "loss": 0.2644, + "step": 42336 + }, + { + "epoch": 1.9832763385955872, + "grad_norm": 0.6279135018952234, + "learning_rate": 9.167401698301215e-10, + "loss": 0.2737, + "step": 42337 + }, + { + "epoch": 1.9833231835855156, + "grad_norm": 0.5897254649833567, + "learning_rate": 9.11611875265006e-10, + "loss": 0.2653, + "step": 42338 + }, + { + "epoch": 1.9833700285754439, + "grad_norm": 0.5764095211069415, + "learning_rate": 9.064979623252257e-10, + "loss": 0.2738, + "step": 42339 + }, + { + "epoch": 1.983416873565372, + "grad_norm": 0.6127969103625377, + "learning_rate": 9.013984310404789e-10, + "loss": 0.278, + "step": 42340 + }, + { + "epoch": 1.9834637185553006, + "grad_norm": 0.5881076619196871, + "learning_rate": 8.963132814401865e-10, + "loss": 0.2483, + "step": 42341 + }, + { + "epoch": 1.9835105635452288, + "grad_norm": 0.6236477223193921, + "learning_rate": 8.912425135534919e-10, + "loss": 0.2842, + "step": 42342 + }, + { + "epoch": 1.983557408535157, + "grad_norm": 0.582407445173274, + "learning_rate": 8.861861274095385e-10, + "loss": 0.2619, + "step": 42343 + }, + { + "epoch": 1.9836042535250855, + "grad_norm": 0.6075887957926029, + "learning_rate": 8.81144123037192e-10, + "loss": 0.2609, + "step": 42344 + }, + { + "epoch": 1.983651098515014, + "grad_norm": 0.6460367948678439, + "learning_rate": 8.761165004661509e-10, + "loss": 0.263, + "step": 42345 + }, + { + "epoch": 1.983697943504942, + "grad_norm": 0.5861859541580406, + "learning_rate": 8.711032597247259e-10, + "loss": 0.2618, + "step": 42346 + }, + { + "epoch": 1.9837447884948705, + "grad_norm": 0.5492085624600578, + "learning_rate": 8.661044008417829e-10, + "loss": 0.2625, + "step": 42347 + }, + { + "epoch": 1.983791633484799, + "grad_norm": 0.594883938100924, + "learning_rate": 8.611199238464651e-10, + "loss": 0.2735, + "step": 42348 + }, + { + "epoch": 1.9838384784747272, + "grad_norm": 0.6244036578710043, + "learning_rate": 8.561498287673608e-10, + "loss": 0.2706, + "step": 42349 + }, + { + "epoch": 1.9838853234646554, + "grad_norm": 0.6245175118553862, + "learning_rate": 8.511941156327807e-10, + "loss": 0.2673, + "step": 42350 + }, + { + "epoch": 1.9839321684545839, + "grad_norm": 0.6070840569768522, + "learning_rate": 8.462527844715907e-10, + "loss": 0.2731, + "step": 42351 + }, + { + "epoch": 1.983979013444512, + "grad_norm": 0.6098065436207029, + "learning_rate": 8.413258353121012e-10, + "loss": 0.2753, + "step": 42352 + }, + { + "epoch": 1.9840258584344403, + "grad_norm": 0.5964982486561324, + "learning_rate": 8.364132681826232e-10, + "loss": 0.2528, + "step": 42353 + }, + { + "epoch": 1.9840727034243688, + "grad_norm": 0.6232709853238799, + "learning_rate": 8.315150831111896e-10, + "loss": 0.2674, + "step": 42354 + }, + { + "epoch": 1.984119548414297, + "grad_norm": 0.6590038152815761, + "learning_rate": 8.266312801266663e-10, + "loss": 0.292, + "step": 42355 + }, + { + "epoch": 1.9841663934042253, + "grad_norm": 0.6281333546652083, + "learning_rate": 8.217618592562538e-10, + "loss": 0.2781, + "step": 42356 + }, + { + "epoch": 1.9842132383941538, + "grad_norm": 0.6282148761800054, + "learning_rate": 8.169068205288177e-10, + "loss": 0.2773, + "step": 42357 + }, + { + "epoch": 1.9842600833840822, + "grad_norm": 0.6134575570830695, + "learning_rate": 8.120661639718364e-10, + "loss": 0.2685, + "step": 42358 + }, + { + "epoch": 1.9843069283740102, + "grad_norm": 0.6469784335347649, + "learning_rate": 8.072398896133426e-10, + "loss": 0.2743, + "step": 42359 + }, + { + "epoch": 1.9843537733639387, + "grad_norm": 0.5773756956014774, + "learning_rate": 8.024279974808147e-10, + "loss": 0.2665, + "step": 42360 + }, + { + "epoch": 1.9844006183538672, + "grad_norm": 0.5573565859596485, + "learning_rate": 7.976304876022856e-10, + "loss": 0.2638, + "step": 42361 + }, + { + "epoch": 1.9844474633437954, + "grad_norm": 0.6414927587724037, + "learning_rate": 7.928473600055109e-10, + "loss": 0.2916, + "step": 42362 + }, + { + "epoch": 1.9844943083337236, + "grad_norm": 0.6180554975423066, + "learning_rate": 7.880786147174136e-10, + "loss": 0.2642, + "step": 42363 + }, + { + "epoch": 1.984541153323652, + "grad_norm": 0.5744923661924801, + "learning_rate": 7.833242517657491e-10, + "loss": 0.2606, + "step": 42364 + }, + { + "epoch": 1.9845879983135803, + "grad_norm": 0.6227197972123051, + "learning_rate": 7.785842711779956e-10, + "loss": 0.2846, + "step": 42365 + }, + { + "epoch": 1.9846348433035086, + "grad_norm": 0.6192086943095273, + "learning_rate": 7.738586729813535e-10, + "loss": 0.2782, + "step": 42366 + }, + { + "epoch": 1.984681688293437, + "grad_norm": 0.5445216253894724, + "learning_rate": 7.691474572030233e-10, + "loss": 0.2571, + "step": 42367 + }, + { + "epoch": 1.9847285332833653, + "grad_norm": 0.6015130886461536, + "learning_rate": 7.644506238702054e-10, + "loss": 0.2641, + "step": 42368 + }, + { + "epoch": 1.9847753782732935, + "grad_norm": 0.6108962589021072, + "learning_rate": 7.597681730095451e-10, + "loss": 0.2711, + "step": 42369 + }, + { + "epoch": 1.984822223263222, + "grad_norm": 0.5825055782789682, + "learning_rate": 7.551001046485207e-10, + "loss": 0.2523, + "step": 42370 + }, + { + "epoch": 1.9848690682531505, + "grad_norm": 0.5977528915948566, + "learning_rate": 7.504464188134997e-10, + "loss": 0.258, + "step": 42371 + }, + { + "epoch": 1.9849159132430787, + "grad_norm": 0.6182237564367689, + "learning_rate": 7.458071155316826e-10, + "loss": 0.2631, + "step": 42372 + }, + { + "epoch": 1.984962758233007, + "grad_norm": 0.6147444649816801, + "learning_rate": 7.411821948294373e-10, + "loss": 0.272, + "step": 42373 + }, + { + "epoch": 1.9850096032229354, + "grad_norm": 0.6099554660035397, + "learning_rate": 7.365716567334091e-10, + "loss": 0.2793, + "step": 42374 + }, + { + "epoch": 1.9850564482128636, + "grad_norm": 0.6220126698589822, + "learning_rate": 7.31975501270521e-10, + "loss": 0.2779, + "step": 42375 + }, + { + "epoch": 1.9851032932027919, + "grad_norm": 0.613882015229163, + "learning_rate": 7.273937284668631e-10, + "loss": 0.2736, + "step": 42376 + }, + { + "epoch": 1.9851501381927203, + "grad_norm": 0.5564689114282204, + "learning_rate": 7.228263383488033e-10, + "loss": 0.2669, + "step": 42377 + }, + { + "epoch": 1.9851969831826486, + "grad_norm": 0.5563317110236272, + "learning_rate": 7.182733309427092e-10, + "loss": 0.2563, + "step": 42378 + }, + { + "epoch": 1.9852438281725768, + "grad_norm": 0.5859315329431718, + "learning_rate": 7.137347062749489e-10, + "loss": 0.2557, + "step": 42379 + }, + { + "epoch": 1.9852906731625053, + "grad_norm": 0.5657388721821034, + "learning_rate": 7.092104643713349e-10, + "loss": 0.2581, + "step": 42380 + }, + { + "epoch": 1.9853375181524338, + "grad_norm": 0.6136504890027514, + "learning_rate": 7.047006052579575e-10, + "loss": 0.2745, + "step": 42381 + }, + { + "epoch": 1.9853843631423618, + "grad_norm": 0.6210272407992244, + "learning_rate": 7.002051289611844e-10, + "loss": 0.2756, + "step": 42382 + }, + { + "epoch": 1.9854312081322902, + "grad_norm": 0.6160134476387575, + "learning_rate": 6.957240355062733e-10, + "loss": 0.2691, + "step": 42383 + }, + { + "epoch": 1.9854780531222187, + "grad_norm": 0.5921736904173626, + "learning_rate": 6.912573249193145e-10, + "loss": 0.2712, + "step": 42384 + }, + { + "epoch": 1.985524898112147, + "grad_norm": 0.6221197335980753, + "learning_rate": 6.868049972261204e-10, + "loss": 0.2667, + "step": 42385 + }, + { + "epoch": 1.9855717431020752, + "grad_norm": 0.6215641719802417, + "learning_rate": 6.823670524519488e-10, + "loss": 0.2794, + "step": 42386 + }, + { + "epoch": 1.9856185880920036, + "grad_norm": 0.5929985581328833, + "learning_rate": 6.779434906228899e-10, + "loss": 0.2757, + "step": 42387 + }, + { + "epoch": 1.9856654330819319, + "grad_norm": 0.5601992371752761, + "learning_rate": 6.735343117639237e-10, + "loss": 0.2702, + "step": 42388 + }, + { + "epoch": 1.9857122780718601, + "grad_norm": 0.6385786221706461, + "learning_rate": 6.691395159005854e-10, + "loss": 0.2873, + "step": 42389 + }, + { + "epoch": 1.9857591230617886, + "grad_norm": 0.6477161610165669, + "learning_rate": 6.647591030581324e-10, + "loss": 0.2986, + "step": 42390 + }, + { + "epoch": 1.9858059680517168, + "grad_norm": 0.5947823835195013, + "learning_rate": 6.603930732618224e-10, + "loss": 0.2749, + "step": 42391 + }, + { + "epoch": 1.985852813041645, + "grad_norm": 0.6213494145535269, + "learning_rate": 6.560414265366355e-10, + "loss": 0.2755, + "step": 42392 + }, + { + "epoch": 1.9858996580315735, + "grad_norm": 0.6461509655539696, + "learning_rate": 6.517041629081066e-10, + "loss": 0.2858, + "step": 42393 + }, + { + "epoch": 1.985946503021502, + "grad_norm": 0.5790801161974348, + "learning_rate": 6.473812824003833e-10, + "loss": 0.2643, + "step": 42394 + }, + { + "epoch": 1.98599334801143, + "grad_norm": 0.6051293047415944, + "learning_rate": 6.430727850390006e-10, + "loss": 0.2717, + "step": 42395 + }, + { + "epoch": 1.9860401930013585, + "grad_norm": 0.5926154373524712, + "learning_rate": 6.38778670848661e-10, + "loss": 0.2736, + "step": 42396 + }, + { + "epoch": 1.986087037991287, + "grad_norm": 0.558232931533007, + "learning_rate": 6.344989398537893e-10, + "loss": 0.2598, + "step": 42397 + }, + { + "epoch": 1.9861338829812152, + "grad_norm": 0.5901684828443876, + "learning_rate": 6.302335920793657e-10, + "loss": 0.2709, + "step": 42398 + }, + { + "epoch": 1.9861807279711434, + "grad_norm": 0.6048835116495881, + "learning_rate": 6.259826275495373e-10, + "loss": 0.2634, + "step": 42399 + }, + { + "epoch": 1.9862275729610719, + "grad_norm": 0.5849557239382731, + "learning_rate": 6.217460462892843e-10, + "loss": 0.2733, + "step": 42400 + }, + { + "epoch": 1.9862744179510001, + "grad_norm": 0.5923345077779305, + "learning_rate": 6.175238483224766e-10, + "loss": 0.2708, + "step": 42401 + }, + { + "epoch": 1.9863212629409284, + "grad_norm": 0.6171430324179717, + "learning_rate": 6.133160336735389e-10, + "loss": 0.2608, + "step": 42402 + }, + { + "epoch": 1.9863681079308568, + "grad_norm": 0.5585844680586021, + "learning_rate": 6.091226023668961e-10, + "loss": 0.2559, + "step": 42403 + }, + { + "epoch": 1.986414952920785, + "grad_norm": 0.5713531159823595, + "learning_rate": 6.049435544264182e-10, + "loss": 0.2602, + "step": 42404 + }, + { + "epoch": 1.9864617979107133, + "grad_norm": 0.6065071119154972, + "learning_rate": 6.007788898765298e-10, + "loss": 0.287, + "step": 42405 + }, + { + "epoch": 1.9865086429006418, + "grad_norm": 0.6331519723752782, + "learning_rate": 5.966286087408235e-10, + "loss": 0.2873, + "step": 42406 + }, + { + "epoch": 1.9865554878905702, + "grad_norm": 0.5998745264681153, + "learning_rate": 5.924927110434464e-10, + "loss": 0.2603, + "step": 42407 + }, + { + "epoch": 1.9866023328804985, + "grad_norm": 0.6242145322791207, + "learning_rate": 5.883711968079908e-10, + "loss": 0.2943, + "step": 42408 + }, + { + "epoch": 1.9866491778704267, + "grad_norm": 0.5744080613769126, + "learning_rate": 5.84264066058049e-10, + "loss": 0.2691, + "step": 42409 + }, + { + "epoch": 1.9866960228603552, + "grad_norm": 0.6133742132523197, + "learning_rate": 5.801713188177682e-10, + "loss": 0.2795, + "step": 42410 + }, + { + "epoch": 1.9867428678502834, + "grad_norm": 0.6145758859043849, + "learning_rate": 5.760929551101857e-10, + "loss": 0.2742, + "step": 42411 + }, + { + "epoch": 1.9867897128402117, + "grad_norm": 0.5723856795195723, + "learning_rate": 5.720289749591712e-10, + "loss": 0.2713, + "step": 42412 + }, + { + "epoch": 1.9868365578301401, + "grad_norm": 0.6091776909676593, + "learning_rate": 5.679793783877619e-10, + "loss": 0.2701, + "step": 42413 + }, + { + "epoch": 1.9868834028200684, + "grad_norm": 0.5992015552479649, + "learning_rate": 5.639441654192724e-10, + "loss": 0.2639, + "step": 42414 + }, + { + "epoch": 1.9869302478099966, + "grad_norm": 0.5667262635800789, + "learning_rate": 5.599233360772948e-10, + "loss": 0.2655, + "step": 42415 + }, + { + "epoch": 1.986977092799925, + "grad_norm": 0.6477488385659871, + "learning_rate": 5.559168903848666e-10, + "loss": 0.2486, + "step": 42416 + }, + { + "epoch": 1.9870239377898535, + "grad_norm": 0.5357715652334315, + "learning_rate": 5.519248283647472e-10, + "loss": 0.2469, + "step": 42417 + }, + { + "epoch": 1.9870707827797816, + "grad_norm": 0.5927499440940156, + "learning_rate": 5.479471500399736e-10, + "loss": 0.2678, + "step": 42418 + }, + { + "epoch": 1.98711762776971, + "grad_norm": 0.5818567663780181, + "learning_rate": 5.439838554338606e-10, + "loss": 0.2621, + "step": 42419 + }, + { + "epoch": 1.9871644727596385, + "grad_norm": 0.5909174768885461, + "learning_rate": 5.400349445686126e-10, + "loss": 0.2551, + "step": 42420 + }, + { + "epoch": 1.9872113177495667, + "grad_norm": 0.6141331400980358, + "learning_rate": 5.361004174675444e-10, + "loss": 0.2778, + "step": 42421 + }, + { + "epoch": 1.987258162739495, + "grad_norm": 0.6011867906485521, + "learning_rate": 5.321802741528603e-10, + "loss": 0.2595, + "step": 42422 + }, + { + "epoch": 1.9873050077294234, + "grad_norm": 0.5402072986741803, + "learning_rate": 5.282745146470425e-10, + "loss": 0.2567, + "step": 42423 + }, + { + "epoch": 1.9873518527193517, + "grad_norm": 0.6274443612870205, + "learning_rate": 5.24383138973128e-10, + "loss": 0.2694, + "step": 42424 + }, + { + "epoch": 1.98739869770928, + "grad_norm": 0.6394692483302333, + "learning_rate": 5.205061471530437e-10, + "loss": 0.2935, + "step": 42425 + }, + { + "epoch": 1.9874455426992084, + "grad_norm": 0.6050989282322334, + "learning_rate": 5.166435392092717e-10, + "loss": 0.2656, + "step": 42426 + }, + { + "epoch": 1.9874923876891366, + "grad_norm": 0.6045571139233483, + "learning_rate": 5.127953151637388e-10, + "loss": 0.2513, + "step": 42427 + }, + { + "epoch": 1.9875392326790648, + "grad_norm": 0.5672565317171232, + "learning_rate": 5.089614750392047e-10, + "loss": 0.2548, + "step": 42428 + }, + { + "epoch": 1.9875860776689933, + "grad_norm": 0.5913124941921784, + "learning_rate": 5.051420188570411e-10, + "loss": 0.269, + "step": 42429 + }, + { + "epoch": 1.9876329226589218, + "grad_norm": 0.6628180360637016, + "learning_rate": 5.013369466397299e-10, + "loss": 0.2716, + "step": 42430 + }, + { + "epoch": 1.9876797676488498, + "grad_norm": 0.6469033977510104, + "learning_rate": 4.975462584091984e-10, + "loss": 0.2838, + "step": 42431 + }, + { + "epoch": 1.9877266126387783, + "grad_norm": 0.589310645436833, + "learning_rate": 4.937699541868179e-10, + "loss": 0.2613, + "step": 42432 + }, + { + "epoch": 1.9877734576287067, + "grad_norm": 0.6256313789353609, + "learning_rate": 4.900080339945157e-10, + "loss": 0.2758, + "step": 42433 + }, + { + "epoch": 1.987820302618635, + "grad_norm": 0.587447264282205, + "learning_rate": 4.862604978539409e-10, + "loss": 0.2735, + "step": 42434 + }, + { + "epoch": 1.9878671476085632, + "grad_norm": 0.5918963576974742, + "learning_rate": 4.825273457870205e-10, + "loss": 0.2678, + "step": 42435 + }, + { + "epoch": 1.9879139925984917, + "grad_norm": 0.6627263697916805, + "learning_rate": 4.788085778148488e-10, + "loss": 0.2865, + "step": 42436 + }, + { + "epoch": 1.98796083758842, + "grad_norm": 0.6296563487515964, + "learning_rate": 4.751041939587975e-10, + "loss": 0.2944, + "step": 42437 + }, + { + "epoch": 1.9880076825783481, + "grad_norm": 0.5970421765485887, + "learning_rate": 4.714141942402383e-10, + "loss": 0.2681, + "step": 42438 + }, + { + "epoch": 1.9880545275682766, + "grad_norm": 0.6005558496706774, + "learning_rate": 4.677385786805433e-10, + "loss": 0.2881, + "step": 42439 + }, + { + "epoch": 1.9881013725582048, + "grad_norm": 0.5667631538457001, + "learning_rate": 4.640773473005289e-10, + "loss": 0.2609, + "step": 42440 + }, + { + "epoch": 1.988148217548133, + "grad_norm": 0.5831946196140719, + "learning_rate": 4.604305001218445e-10, + "loss": 0.2546, + "step": 42441 + }, + { + "epoch": 1.9881950625380616, + "grad_norm": 0.5757213414203662, + "learning_rate": 4.5679803716502934e-10, + "loss": 0.2554, + "step": 42442 + }, + { + "epoch": 1.98824190752799, + "grad_norm": 0.5838746411584304, + "learning_rate": 4.5317995845117757e-10, + "loss": 0.2615, + "step": 42443 + }, + { + "epoch": 1.9882887525179183, + "grad_norm": 0.5953532255253845, + "learning_rate": 4.4957626400082835e-10, + "loss": 0.2744, + "step": 42444 + }, + { + "epoch": 1.9883355975078465, + "grad_norm": 0.5823840127199926, + "learning_rate": 4.4598695383507587e-10, + "loss": 0.2719, + "step": 42445 + }, + { + "epoch": 1.988382442497775, + "grad_norm": 0.6023182734185089, + "learning_rate": 4.424120279744593e-10, + "loss": 0.2932, + "step": 42446 + }, + { + "epoch": 1.9884292874877032, + "grad_norm": 0.5682784232254109, + "learning_rate": 4.388514864395177e-10, + "loss": 0.2617, + "step": 42447 + }, + { + "epoch": 1.9884761324776314, + "grad_norm": 0.5852998523776691, + "learning_rate": 4.3530532925079026e-10, + "loss": 0.2823, + "step": 42448 + }, + { + "epoch": 1.98852297746756, + "grad_norm": 0.5816785377297274, + "learning_rate": 4.3177355642853856e-10, + "loss": 0.2522, + "step": 42449 + }, + { + "epoch": 1.9885698224574881, + "grad_norm": 0.6120328573937196, + "learning_rate": 4.2825616799330174e-10, + "loss": 0.2877, + "step": 42450 + }, + { + "epoch": 1.9886166674474164, + "grad_norm": 0.6160001295506597, + "learning_rate": 4.247531639650637e-10, + "loss": 0.2774, + "step": 42451 + }, + { + "epoch": 1.9886635124373448, + "grad_norm": 0.6093838236291013, + "learning_rate": 4.2126454436408617e-10, + "loss": 0.2641, + "step": 42452 + }, + { + "epoch": 1.9887103574272733, + "grad_norm": 0.5836607985444418, + "learning_rate": 4.177903092106306e-10, + "loss": 0.263, + "step": 42453 + }, + { + "epoch": 1.9887572024172013, + "grad_norm": 0.6235684630042173, + "learning_rate": 4.1433045852440345e-10, + "loss": 0.2759, + "step": 42454 + }, + { + "epoch": 1.9888040474071298, + "grad_norm": 0.5688218559030892, + "learning_rate": 4.1088499232566635e-10, + "loss": 0.2652, + "step": 42455 + }, + { + "epoch": 1.9888508923970583, + "grad_norm": 0.5707124490895122, + "learning_rate": 4.074539106338482e-10, + "loss": 0.2617, + "step": 42456 + }, + { + "epoch": 1.9888977373869865, + "grad_norm": 0.6186849681654386, + "learning_rate": 4.04037213468933e-10, + "loss": 0.2755, + "step": 42457 + }, + { + "epoch": 1.9889445823769147, + "grad_norm": 0.6278470781246508, + "learning_rate": 4.0063490085062715e-10, + "loss": 0.2757, + "step": 42458 + }, + { + "epoch": 1.9889914273668432, + "grad_norm": 0.6154691922121084, + "learning_rate": 3.9724697279835966e-10, + "loss": 0.2632, + "step": 42459 + }, + { + "epoch": 1.9890382723567714, + "grad_norm": 0.565242065124324, + "learning_rate": 3.9387342933155936e-10, + "loss": 0.2611, + "step": 42460 + }, + { + "epoch": 1.9890851173466997, + "grad_norm": 0.6651635203449358, + "learning_rate": 3.9051427046993276e-10, + "loss": 0.2872, + "step": 42461 + }, + { + "epoch": 1.9891319623366281, + "grad_norm": 0.5975345884906555, + "learning_rate": 3.871694962326311e-10, + "loss": 0.2752, + "step": 42462 + }, + { + "epoch": 1.9891788073265564, + "grad_norm": 0.5764611699575914, + "learning_rate": 3.8383910663880587e-10, + "loss": 0.2717, + "step": 42463 + }, + { + "epoch": 1.9892256523164846, + "grad_norm": 0.6274468250727311, + "learning_rate": 3.8052310170788586e-10, + "loss": 0.2877, + "step": 42464 + }, + { + "epoch": 1.989272497306413, + "grad_norm": 0.6148593298216263, + "learning_rate": 3.7722148145874494e-10, + "loss": 0.2832, + "step": 42465 + }, + { + "epoch": 1.9893193422963416, + "grad_norm": 0.5933465952319066, + "learning_rate": 3.7393424591025685e-10, + "loss": 0.2614, + "step": 42466 + }, + { + "epoch": 1.9893661872862696, + "grad_norm": 0.5868234789472915, + "learning_rate": 3.706613950818505e-10, + "loss": 0.2706, + "step": 42467 + }, + { + "epoch": 1.989413032276198, + "grad_norm": 0.609120546991648, + "learning_rate": 3.674029289918446e-10, + "loss": 0.2853, + "step": 42468 + }, + { + "epoch": 1.9894598772661265, + "grad_norm": 0.5766042555604682, + "learning_rate": 3.641588476591129e-10, + "loss": 0.2594, + "step": 42469 + }, + { + "epoch": 1.9895067222560547, + "grad_norm": 0.5689982248653216, + "learning_rate": 3.6092915110252924e-10, + "loss": 0.2614, + "step": 42470 + }, + { + "epoch": 1.989553567245983, + "grad_norm": 0.6050681184091784, + "learning_rate": 3.5771383934041227e-10, + "loss": 0.2938, + "step": 42471 + }, + { + "epoch": 1.9896004122359114, + "grad_norm": 0.60038928292431, + "learning_rate": 3.545129123913582e-10, + "loss": 0.2714, + "step": 42472 + }, + { + "epoch": 1.9896472572258397, + "grad_norm": 0.5706210844735651, + "learning_rate": 3.5132637027396333e-10, + "loss": 0.2619, + "step": 42473 + }, + { + "epoch": 1.989694102215768, + "grad_norm": 0.611539922519122, + "learning_rate": 3.4815421300626874e-10, + "loss": 0.2785, + "step": 42474 + }, + { + "epoch": 1.9897409472056964, + "grad_norm": 0.6103583710813919, + "learning_rate": 3.4499644060687063e-10, + "loss": 0.2661, + "step": 42475 + }, + { + "epoch": 1.9897877921956246, + "grad_norm": 0.5905683499841569, + "learning_rate": 3.4185305309353266e-10, + "loss": 0.2795, + "step": 42476 + }, + { + "epoch": 1.9898346371855529, + "grad_norm": 0.6110111426683461, + "learning_rate": 3.3872405048485103e-10, + "loss": 0.282, + "step": 42477 + }, + { + "epoch": 1.9898814821754813, + "grad_norm": 0.6051328741731685, + "learning_rate": 3.356094327983117e-10, + "loss": 0.2766, + "step": 42478 + }, + { + "epoch": 1.9899283271654098, + "grad_norm": 0.6027205260583457, + "learning_rate": 3.3250920005223343e-10, + "loss": 0.2823, + "step": 42479 + }, + { + "epoch": 1.989975172155338, + "grad_norm": 0.6086902241730094, + "learning_rate": 3.294233522641022e-10, + "loss": 0.2689, + "step": 42480 + }, + { + "epoch": 1.9900220171452663, + "grad_norm": 0.5645687930166533, + "learning_rate": 3.2635188945195904e-10, + "loss": 0.2552, + "step": 42481 + }, + { + "epoch": 1.9900688621351947, + "grad_norm": 0.5763996260195948, + "learning_rate": 3.2329481163356766e-10, + "loss": 0.2591, + "step": 42482 + }, + { + "epoch": 1.990115707125123, + "grad_norm": 0.5353062629069707, + "learning_rate": 3.202521188261365e-10, + "loss": 0.2419, + "step": 42483 + }, + { + "epoch": 1.9901625521150512, + "grad_norm": 0.6269669827848082, + "learning_rate": 3.1722381104742906e-10, + "loss": 0.2748, + "step": 42484 + }, + { + "epoch": 1.9902093971049797, + "grad_norm": 0.6298976929808835, + "learning_rate": 3.1420988831493137e-10, + "loss": 0.2855, + "step": 42485 + }, + { + "epoch": 1.990256242094908, + "grad_norm": 0.5925152978340856, + "learning_rate": 3.1121035064557436e-10, + "loss": 0.2731, + "step": 42486 + }, + { + "epoch": 1.9903030870848362, + "grad_norm": 0.6145050318080959, + "learning_rate": 3.082251980571216e-10, + "loss": 0.2872, + "step": 42487 + }, + { + "epoch": 1.9903499320747646, + "grad_norm": 0.5991719169566357, + "learning_rate": 3.052544305667815e-10, + "loss": 0.2549, + "step": 42488 + }, + { + "epoch": 1.990396777064693, + "grad_norm": 0.5797747460916616, + "learning_rate": 3.0229804819120744e-10, + "loss": 0.2554, + "step": 42489 + }, + { + "epoch": 1.990443622054621, + "grad_norm": 0.5789214994934792, + "learning_rate": 2.993560509476079e-10, + "loss": 0.2649, + "step": 42490 + }, + { + "epoch": 1.9904904670445496, + "grad_norm": 0.6142758079078369, + "learning_rate": 2.964284388529137e-10, + "loss": 0.2739, + "step": 42491 + }, + { + "epoch": 1.990537312034478, + "grad_norm": 0.5917222719211608, + "learning_rate": 2.9351521192405586e-10, + "loss": 0.2712, + "step": 42492 + }, + { + "epoch": 1.9905841570244063, + "grad_norm": 0.6189746201252061, + "learning_rate": 2.9061637017768764e-10, + "loss": 0.263, + "step": 42493 + }, + { + "epoch": 1.9906310020143345, + "grad_norm": 0.6033307599740582, + "learning_rate": 2.877319136307399e-10, + "loss": 0.27, + "step": 42494 + }, + { + "epoch": 1.990677847004263, + "grad_norm": 0.6463689131657399, + "learning_rate": 2.84861842299311e-10, + "loss": 0.2882, + "step": 42495 + }, + { + "epoch": 1.9907246919941912, + "grad_norm": 0.6361623223480789, + "learning_rate": 2.8200615620060936e-10, + "loss": 0.2604, + "step": 42496 + }, + { + "epoch": 1.9907715369841195, + "grad_norm": 0.5958558853825677, + "learning_rate": 2.7916485535045555e-10, + "loss": 0.2723, + "step": 42497 + }, + { + "epoch": 1.990818381974048, + "grad_norm": 0.609517555240366, + "learning_rate": 2.763379397652255e-10, + "loss": 0.2682, + "step": 42498 + }, + { + "epoch": 1.9908652269639762, + "grad_norm": 0.5992876048164079, + "learning_rate": 2.735254094615725e-10, + "loss": 0.2642, + "step": 42499 + }, + { + "epoch": 1.9909120719539044, + "grad_norm": 0.6409050629479603, + "learning_rate": 2.707272644555947e-10, + "loss": 0.2699, + "step": 42500 + }, + { + "epoch": 1.9909589169438329, + "grad_norm": 0.5853660282515414, + "learning_rate": 2.6794350476339047e-10, + "loss": 0.2643, + "step": 42501 + }, + { + "epoch": 1.9910057619337613, + "grad_norm": 0.5804747895850846, + "learning_rate": 2.651741304005029e-10, + "loss": 0.2583, + "step": 42502 + }, + { + "epoch": 1.9910526069236893, + "grad_norm": 0.6279054924489662, + "learning_rate": 2.6241914138358526e-10, + "loss": 0.27, + "step": 42503 + }, + { + "epoch": 1.9910994519136178, + "grad_norm": 0.624780437601725, + "learning_rate": 2.5967853772818075e-10, + "loss": 0.2873, + "step": 42504 + }, + { + "epoch": 1.9911462969035463, + "grad_norm": 0.5999141269130843, + "learning_rate": 2.569523194498325e-10, + "loss": 0.2667, + "step": 42505 + }, + { + "epoch": 1.9911931418934745, + "grad_norm": 0.5832787280597764, + "learning_rate": 2.5424048656436106e-10, + "loss": 0.2693, + "step": 42506 + }, + { + "epoch": 1.9912399868834028, + "grad_norm": 0.5706764521027318, + "learning_rate": 2.5154303908758725e-10, + "loss": 0.2615, + "step": 42507 + }, + { + "epoch": 1.9912868318733312, + "grad_norm": 0.5751465792989094, + "learning_rate": 2.4885997703505414e-10, + "loss": 0.2752, + "step": 42508 + }, + { + "epoch": 1.9913336768632595, + "grad_norm": 0.5684682451965556, + "learning_rate": 2.461913004217498e-10, + "loss": 0.2571, + "step": 42509 + }, + { + "epoch": 1.9913805218531877, + "grad_norm": 0.6700160962637134, + "learning_rate": 2.4353700926349476e-10, + "loss": 0.3003, + "step": 42510 + }, + { + "epoch": 1.9914273668431162, + "grad_norm": 0.5566307283908735, + "learning_rate": 2.4089710357555476e-10, + "loss": 0.2618, + "step": 42511 + }, + { + "epoch": 1.9914742118330444, + "grad_norm": 0.5599170995220021, + "learning_rate": 2.382715833726401e-10, + "loss": 0.2692, + "step": 42512 + }, + { + "epoch": 1.9915210568229726, + "grad_norm": 0.609480302360934, + "learning_rate": 2.35660448670294e-10, + "loss": 0.288, + "step": 42513 + }, + { + "epoch": 1.991567901812901, + "grad_norm": 0.6500856489990022, + "learning_rate": 2.330636994835045e-10, + "loss": 0.2859, + "step": 42514 + }, + { + "epoch": 1.9916147468028296, + "grad_norm": 0.5872408688274128, + "learning_rate": 2.3048133582698195e-10, + "loss": 0.2715, + "step": 42515 + }, + { + "epoch": 1.9916615917927578, + "grad_norm": 0.5755309462487385, + "learning_rate": 2.2791335771599199e-10, + "loss": 0.2621, + "step": 42516 + }, + { + "epoch": 1.991708436782686, + "grad_norm": 0.6165281819040321, + "learning_rate": 2.253597651649675e-10, + "loss": 0.2833, + "step": 42517 + }, + { + "epoch": 1.9917552817726145, + "grad_norm": 0.5487166510540056, + "learning_rate": 2.2282055818861893e-10, + "loss": 0.2567, + "step": 42518 + }, + { + "epoch": 1.9918021267625428, + "grad_norm": 0.6133757476401819, + "learning_rate": 2.202957368016567e-10, + "loss": 0.2705, + "step": 42519 + }, + { + "epoch": 1.991848971752471, + "grad_norm": 0.6130877567407501, + "learning_rate": 2.177853010185138e-10, + "loss": 0.2642, + "step": 42520 + }, + { + "epoch": 1.9918958167423995, + "grad_norm": 0.6125821292473104, + "learning_rate": 2.1528925085390062e-10, + "loss": 0.2789, + "step": 42521 + }, + { + "epoch": 1.9919426617323277, + "grad_norm": 0.609502570780935, + "learning_rate": 2.128075863219725e-10, + "loss": 0.2757, + "step": 42522 + }, + { + "epoch": 1.991989506722256, + "grad_norm": 0.6079596902218902, + "learning_rate": 2.1034030743688482e-10, + "loss": 0.2657, + "step": 42523 + }, + { + "epoch": 1.9920363517121844, + "grad_norm": 0.6157864084090339, + "learning_rate": 2.07887414213348e-10, + "loss": 0.2699, + "step": 42524 + }, + { + "epoch": 1.9920831967021129, + "grad_norm": 0.5945166279202868, + "learning_rate": 2.0544890666468476e-10, + "loss": 0.2716, + "step": 42525 + }, + { + "epoch": 1.9921300416920409, + "grad_norm": 0.5768916316527818, + "learning_rate": 2.030247848056055e-10, + "loss": 0.249, + "step": 42526 + }, + { + "epoch": 1.9921768866819693, + "grad_norm": 0.658951889042525, + "learning_rate": 2.006150486497105e-10, + "loss": 0.2888, + "step": 42527 + }, + { + "epoch": 1.9922237316718978, + "grad_norm": 0.5807868175944203, + "learning_rate": 1.9821969821115505e-10, + "loss": 0.2684, + "step": 42528 + }, + { + "epoch": 1.992270576661826, + "grad_norm": 0.6137036039261912, + "learning_rate": 1.9583873350326188e-10, + "loss": 0.275, + "step": 42529 + }, + { + "epoch": 1.9923174216517543, + "grad_norm": 0.5842374980336873, + "learning_rate": 1.9347215454018631e-10, + "loss": 0.2595, + "step": 42530 + }, + { + "epoch": 1.9923642666416828, + "grad_norm": 0.6368595942881083, + "learning_rate": 1.9111996133552858e-10, + "loss": 0.2766, + "step": 42531 + }, + { + "epoch": 1.992411111631611, + "grad_norm": 0.5357418424524784, + "learning_rate": 1.8878215390233379e-10, + "loss": 0.2539, + "step": 42532 + }, + { + "epoch": 1.9924579566215392, + "grad_norm": 0.6134893299044004, + "learning_rate": 1.864587322547573e-10, + "loss": 0.2777, + "step": 42533 + }, + { + "epoch": 1.9925048016114677, + "grad_norm": 0.5532984179348518, + "learning_rate": 1.841496964055667e-10, + "loss": 0.2551, + "step": 42534 + }, + { + "epoch": 1.992551646601396, + "grad_norm": 0.624394984730982, + "learning_rate": 1.8185504636836214e-10, + "loss": 0.2736, + "step": 42535 + }, + { + "epoch": 1.9925984915913242, + "grad_norm": 0.6401594669128329, + "learning_rate": 1.7957478215618885e-10, + "loss": 0.2789, + "step": 42536 + }, + { + "epoch": 1.9926453365812526, + "grad_norm": 0.5842225788497895, + "learning_rate": 1.7730890378236942e-10, + "loss": 0.2756, + "step": 42537 + }, + { + "epoch": 1.992692181571181, + "grad_norm": 0.6009008621361236, + "learning_rate": 1.7505741125967146e-10, + "loss": 0.2776, + "step": 42538 + }, + { + "epoch": 1.9927390265611091, + "grad_norm": 0.6450605322203129, + "learning_rate": 1.7282030460141764e-10, + "loss": 0.2797, + "step": 42539 + }, + { + "epoch": 1.9927858715510376, + "grad_norm": 0.6466829315762578, + "learning_rate": 1.7059758382009795e-10, + "loss": 0.2991, + "step": 42540 + }, + { + "epoch": 1.992832716540966, + "grad_norm": 0.6003616590524211, + "learning_rate": 1.6838924892875752e-10, + "loss": 0.2584, + "step": 42541 + }, + { + "epoch": 1.9928795615308943, + "grad_norm": 0.5727337435752186, + "learning_rate": 1.6619529994016393e-10, + "loss": 0.2709, + "step": 42542 + }, + { + "epoch": 1.9929264065208225, + "grad_norm": 0.5794160002108076, + "learning_rate": 1.640157368665296e-10, + "loss": 0.2689, + "step": 42543 + }, + { + "epoch": 1.992973251510751, + "grad_norm": 0.5804662941330923, + "learning_rate": 1.618505597208997e-10, + "loss": 0.265, + "step": 42544 + }, + { + "epoch": 1.9930200965006792, + "grad_norm": 0.5775939640380662, + "learning_rate": 1.5969976851548662e-10, + "loss": 0.2826, + "step": 42545 + }, + { + "epoch": 1.9930669414906075, + "grad_norm": 0.5822567889078247, + "learning_rate": 1.5756336326250288e-10, + "loss": 0.265, + "step": 42546 + }, + { + "epoch": 1.993113786480536, + "grad_norm": 0.6332156918501939, + "learning_rate": 1.55441343974716e-10, + "loss": 0.2713, + "step": 42547 + }, + { + "epoch": 1.9931606314704642, + "grad_norm": 0.6228145972239919, + "learning_rate": 1.5333371066378332e-10, + "loss": 0.2889, + "step": 42548 + }, + { + "epoch": 1.9932074764603924, + "grad_norm": 0.6597928095043407, + "learning_rate": 1.5124046334219488e-10, + "loss": 0.2921, + "step": 42549 + }, + { + "epoch": 1.9932543214503209, + "grad_norm": 0.6573006740343205, + "learning_rate": 1.4916160202188556e-10, + "loss": 0.2881, + "step": 42550 + }, + { + "epoch": 1.9933011664402494, + "grad_norm": 0.662482482792523, + "learning_rate": 1.4709712671479026e-10, + "loss": 0.2753, + "step": 42551 + }, + { + "epoch": 1.9933480114301776, + "grad_norm": 0.5886503132141403, + "learning_rate": 1.4504703743312143e-10, + "loss": 0.263, + "step": 42552 + }, + { + "epoch": 1.9933948564201058, + "grad_norm": 0.5687750201628693, + "learning_rate": 1.430113341879813e-10, + "loss": 0.2622, + "step": 42553 + }, + { + "epoch": 1.9934417014100343, + "grad_norm": 0.624080614821908, + "learning_rate": 1.4099001699185987e-10, + "loss": 0.2826, + "step": 42554 + }, + { + "epoch": 1.9934885463999625, + "grad_norm": 0.5402758970904378, + "learning_rate": 1.389830858558594e-10, + "loss": 0.2497, + "step": 42555 + }, + { + "epoch": 1.9935353913898908, + "grad_norm": 0.5946358810660821, + "learning_rate": 1.3699054079163722e-10, + "loss": 0.2662, + "step": 42556 + }, + { + "epoch": 1.9935822363798192, + "grad_norm": 0.5926573134884894, + "learning_rate": 1.3501238181085063e-10, + "loss": 0.277, + "step": 42557 + }, + { + "epoch": 1.9936290813697475, + "grad_norm": 0.5942485850461373, + "learning_rate": 1.330486089246019e-10, + "loss": 0.2639, + "step": 42558 + }, + { + "epoch": 1.9936759263596757, + "grad_norm": 0.6062187609096089, + "learning_rate": 1.310992221445484e-10, + "loss": 0.2716, + "step": 42559 + }, + { + "epoch": 1.9937227713496042, + "grad_norm": 0.5628478037277278, + "learning_rate": 1.2916422148151476e-10, + "loss": 0.2645, + "step": 42560 + }, + { + "epoch": 1.9937696163395326, + "grad_norm": 0.589975684727608, + "learning_rate": 1.2724360694688075e-10, + "loss": 0.2695, + "step": 42561 + }, + { + "epoch": 1.9938164613294607, + "grad_norm": 0.5764011392199507, + "learning_rate": 1.2533737855174865e-10, + "loss": 0.2733, + "step": 42562 + }, + { + "epoch": 1.9938633063193891, + "grad_norm": 0.583711453448934, + "learning_rate": 1.2344553630666556e-10, + "loss": 0.2653, + "step": 42563 + }, + { + "epoch": 1.9939101513093176, + "grad_norm": 0.6202854846364143, + "learning_rate": 1.2156808022301126e-10, + "loss": 0.2592, + "step": 42564 + }, + { + "epoch": 1.9939569962992458, + "grad_norm": 0.5781277465071312, + "learning_rate": 1.197050103116104e-10, + "loss": 0.2575, + "step": 42565 + }, + { + "epoch": 1.994003841289174, + "grad_norm": 0.5477400286882369, + "learning_rate": 1.1785632658273262e-10, + "loss": 0.2559, + "step": 42566 + }, + { + "epoch": 1.9940506862791025, + "grad_norm": 0.5712509067329313, + "learning_rate": 1.1602202904748006e-10, + "loss": 0.2677, + "step": 42567 + }, + { + "epoch": 1.9940975312690308, + "grad_norm": 0.627702396978015, + "learning_rate": 1.1420211771612233e-10, + "loss": 0.2724, + "step": 42568 + }, + { + "epoch": 1.994144376258959, + "grad_norm": 0.615780909209503, + "learning_rate": 1.1239659259892899e-10, + "loss": 0.289, + "step": 42569 + }, + { + "epoch": 1.9941912212488875, + "grad_norm": 0.6385374639313223, + "learning_rate": 1.1060545370700227e-10, + "loss": 0.2654, + "step": 42570 + }, + { + "epoch": 1.9942380662388157, + "grad_norm": 0.6195368247082288, + "learning_rate": 1.0882870105005661e-10, + "loss": 0.2804, + "step": 42571 + }, + { + "epoch": 1.994284911228744, + "grad_norm": 0.6170537303060879, + "learning_rate": 1.0706633463836158e-10, + "loss": 0.291, + "step": 42572 + }, + { + "epoch": 1.9943317562186724, + "grad_norm": 0.6083943730648005, + "learning_rate": 1.0531835448218675e-10, + "loss": 0.2736, + "step": 42573 + }, + { + "epoch": 1.9943786012086009, + "grad_norm": 0.6054610232958143, + "learning_rate": 1.0358476059152412e-10, + "loss": 0.2763, + "step": 42574 + }, + { + "epoch": 1.994425446198529, + "grad_norm": 0.568971645152949, + "learning_rate": 1.0186555297664324e-10, + "loss": 0.258, + "step": 42575 + }, + { + "epoch": 1.9944722911884574, + "grad_norm": 0.6043115895891861, + "learning_rate": 1.0016073164698103e-10, + "loss": 0.2724, + "step": 42576 + }, + { + "epoch": 1.9945191361783858, + "grad_norm": 0.5850511148853884, + "learning_rate": 9.847029661280705e-11, + "loss": 0.2582, + "step": 42577 + }, + { + "epoch": 1.994565981168314, + "grad_norm": 0.6012397196123325, + "learning_rate": 9.679424788355818e-11, + "loss": 0.2739, + "step": 42578 + }, + { + "epoch": 1.9946128261582423, + "grad_norm": 0.7171137328513731, + "learning_rate": 9.513258546867132e-11, + "loss": 0.2919, + "step": 42579 + }, + { + "epoch": 1.9946596711481708, + "grad_norm": 0.5781005123788928, + "learning_rate": 9.348530937813849e-11, + "loss": 0.2605, + "step": 42580 + }, + { + "epoch": 1.994706516138099, + "grad_norm": 0.5700615699887863, + "learning_rate": 9.185241962139658e-11, + "loss": 0.2514, + "step": 42581 + }, + { + "epoch": 1.9947533611280273, + "grad_norm": 0.5926910201821369, + "learning_rate": 9.02339162076049e-11, + "loss": 0.2526, + "step": 42582 + }, + { + "epoch": 1.9948002061179557, + "grad_norm": 0.6056028640572838, + "learning_rate": 8.86297991462004e-11, + "loss": 0.2759, + "step": 42583 + }, + { + "epoch": 1.994847051107884, + "grad_norm": 0.6464384663713225, + "learning_rate": 8.704006844634238e-11, + "loss": 0.2877, + "step": 42584 + }, + { + "epoch": 1.9948938960978122, + "grad_norm": 0.5875671534786063, + "learning_rate": 8.54647241171902e-11, + "loss": 0.2764, + "step": 42585 + }, + { + "epoch": 1.9949407410877407, + "grad_norm": 0.6123897918807686, + "learning_rate": 8.39037661679032e-11, + "loss": 0.2771, + "step": 42586 + }, + { + "epoch": 1.9949875860776691, + "grad_norm": 0.5742883770851598, + "learning_rate": 8.235719460764068e-11, + "loss": 0.2673, + "step": 42587 + }, + { + "epoch": 1.9950344310675974, + "grad_norm": 0.6178254691557042, + "learning_rate": 8.082500944472938e-11, + "loss": 0.2796, + "step": 42588 + }, + { + "epoch": 1.9950812760575256, + "grad_norm": 0.6309730371403978, + "learning_rate": 7.930721068860613e-11, + "loss": 0.2666, + "step": 42589 + }, + { + "epoch": 1.995128121047454, + "grad_norm": 0.5884360853631915, + "learning_rate": 7.780379834759766e-11, + "loss": 0.2741, + "step": 42590 + }, + { + "epoch": 1.9951749660373823, + "grad_norm": 0.6044023580700943, + "learning_rate": 7.631477243058571e-11, + "loss": 0.2745, + "step": 42591 + }, + { + "epoch": 1.9952218110273106, + "grad_norm": 0.5938452916494995, + "learning_rate": 7.484013294589699e-11, + "loss": 0.2675, + "step": 42592 + }, + { + "epoch": 1.995268656017239, + "grad_norm": 0.6543863717841031, + "learning_rate": 7.337987990241324e-11, + "loss": 0.2858, + "step": 42593 + }, + { + "epoch": 1.9953155010071673, + "grad_norm": 0.6058242801711768, + "learning_rate": 7.193401330818361e-11, + "loss": 0.2621, + "step": 42594 + }, + { + "epoch": 1.9953623459970955, + "grad_norm": 0.607390960000491, + "learning_rate": 7.050253317153477e-11, + "loss": 0.2561, + "step": 42595 + }, + { + "epoch": 1.995409190987024, + "grad_norm": 0.5495830151442884, + "learning_rate": 6.908543950079338e-11, + "loss": 0.2416, + "step": 42596 + }, + { + "epoch": 1.9954560359769524, + "grad_norm": 0.6277320895736285, + "learning_rate": 6.768273230428612e-11, + "loss": 0.2763, + "step": 42597 + }, + { + "epoch": 1.9955028809668804, + "grad_norm": 0.566208343841006, + "learning_rate": 6.629441159006211e-11, + "loss": 0.2858, + "step": 42598 + }, + { + "epoch": 1.995549725956809, + "grad_norm": 0.6151254395162219, + "learning_rate": 6.492047736561535e-11, + "loss": 0.2672, + "step": 42599 + }, + { + "epoch": 1.9955965709467374, + "grad_norm": 0.5933741639709738, + "learning_rate": 6.356092963955007e-11, + "loss": 0.2693, + "step": 42600 + }, + { + "epoch": 1.9956434159366656, + "grad_norm": 0.5807991484578755, + "learning_rate": 6.221576841908273e-11, + "loss": 0.2591, + "step": 42601 + }, + { + "epoch": 1.9956902609265939, + "grad_norm": 0.6109554941079841, + "learning_rate": 6.088499371226242e-11, + "loss": 0.2686, + "step": 42602 + }, + { + "epoch": 1.9957371059165223, + "grad_norm": 0.5989177002071683, + "learning_rate": 5.956860552686072e-11, + "loss": 0.2636, + "step": 42603 + }, + { + "epoch": 1.9957839509064506, + "grad_norm": 0.586890358576786, + "learning_rate": 5.826660387037163e-11, + "loss": 0.2585, + "step": 42604 + }, + { + "epoch": 1.9958307958963788, + "grad_norm": 0.5432208783053355, + "learning_rate": 5.697898875001162e-11, + "loss": 0.2538, + "step": 42605 + }, + { + "epoch": 1.9958776408863073, + "grad_norm": 0.5690833618971376, + "learning_rate": 5.570576017355223e-11, + "loss": 0.2545, + "step": 42606 + }, + { + "epoch": 1.9959244858762355, + "grad_norm": 0.6215571286147479, + "learning_rate": 5.444691814793235e-11, + "loss": 0.2739, + "step": 42607 + }, + { + "epoch": 1.9959713308661637, + "grad_norm": 0.6103103354024216, + "learning_rate": 5.320246268092355e-11, + "loss": 0.2694, + "step": 42608 + }, + { + "epoch": 1.9960181758560922, + "grad_norm": 0.5992470340759409, + "learning_rate": 5.197239377918717e-11, + "loss": 0.2662, + "step": 42609 + }, + { + "epoch": 1.9960650208460207, + "grad_norm": 0.6090762614530397, + "learning_rate": 5.0756711450217213e-11, + "loss": 0.2651, + "step": 42610 + }, + { + "epoch": 1.9961118658359487, + "grad_norm": 0.5874521546592536, + "learning_rate": 4.955541570067501e-11, + "loss": 0.2706, + "step": 42611 + }, + { + "epoch": 1.9961587108258771, + "grad_norm": 0.5858390095210475, + "learning_rate": 4.836850653749947e-11, + "loss": 0.265, + "step": 42612 + }, + { + "epoch": 1.9962055558158056, + "grad_norm": 0.5858528133700789, + "learning_rate": 4.719598396790703e-11, + "loss": 0.2765, + "step": 42613 + }, + { + "epoch": 1.9962524008057339, + "grad_norm": 0.6247805350907267, + "learning_rate": 4.6037847998003925e-11, + "loss": 0.2687, + "step": 42614 + }, + { + "epoch": 1.996299245795662, + "grad_norm": 0.6489415178067108, + "learning_rate": 4.48940986350066e-11, + "loss": 0.2917, + "step": 42615 + }, + { + "epoch": 1.9963460907855906, + "grad_norm": 0.6305832485635487, + "learning_rate": 4.376473588529884e-11, + "loss": 0.2828, + "step": 42616 + }, + { + "epoch": 1.9963929357755188, + "grad_norm": 0.6084718459582632, + "learning_rate": 4.2649759755541976e-11, + "loss": 0.2644, + "step": 42617 + }, + { + "epoch": 1.996439780765447, + "grad_norm": 0.5597842277164599, + "learning_rate": 4.154917025184224e-11, + "loss": 0.2645, + "step": 42618 + }, + { + "epoch": 1.9964866257553755, + "grad_norm": 0.612832645083769, + "learning_rate": 4.046296738058342e-11, + "loss": 0.2775, + "step": 42619 + }, + { + "epoch": 1.9965334707453037, + "grad_norm": 0.5600020294160452, + "learning_rate": 3.9391151148149286e-11, + "loss": 0.2565, + "step": 42620 + }, + { + "epoch": 1.996580315735232, + "grad_norm": 0.5989164317459603, + "learning_rate": 3.8333721560923634e-11, + "loss": 0.2511, + "step": 42621 + }, + { + "epoch": 1.9966271607251604, + "grad_norm": 0.6171395112465782, + "learning_rate": 3.7290678624457565e-11, + "loss": 0.2716, + "step": 42622 + }, + { + "epoch": 1.996674005715089, + "grad_norm": 0.6179015308960836, + "learning_rate": 3.626202234513487e-11, + "loss": 0.2914, + "step": 42623 + }, + { + "epoch": 1.9967208507050171, + "grad_norm": 0.6000937826224775, + "learning_rate": 3.5247752728784224e-11, + "loss": 0.279, + "step": 42624 + }, + { + "epoch": 1.9967676956949454, + "grad_norm": 1.4803577716940417, + "learning_rate": 3.4247869781234286e-11, + "loss": 0.2828, + "step": 42625 + }, + { + "epoch": 1.9968145406848739, + "grad_norm": 0.6212429267377264, + "learning_rate": 3.326237350831374e-11, + "loss": 0.2917, + "step": 42626 + }, + { + "epoch": 1.996861385674802, + "grad_norm": 0.6009575163803148, + "learning_rate": 3.2291263915573687e-11, + "loss": 0.2819, + "step": 42627 + }, + { + "epoch": 1.9969082306647303, + "grad_norm": 0.6501692781599675, + "learning_rate": 3.1334541008565256e-11, + "loss": 0.3021, + "step": 42628 + }, + { + "epoch": 1.9969550756546588, + "grad_norm": 0.5997678408764334, + "learning_rate": 3.0392204793117105e-11, + "loss": 0.2695, + "step": 42629 + }, + { + "epoch": 1.997001920644587, + "grad_norm": 0.6238010985517677, + "learning_rate": 2.946425527422525e-11, + "loss": 0.2661, + "step": 42630 + }, + { + "epoch": 1.9970487656345153, + "grad_norm": 0.5984522002245639, + "learning_rate": 2.8550692457440798e-11, + "loss": 0.2713, + "step": 42631 + }, + { + "epoch": 1.9970956106244437, + "grad_norm": 0.5789694930752792, + "learning_rate": 2.765151634803731e-11, + "loss": 0.2643, + "step": 42632 + }, + { + "epoch": 1.9971424556143722, + "grad_norm": 0.642273457874385, + "learning_rate": 2.676672695101079e-11, + "loss": 0.2614, + "step": 42633 + }, + { + "epoch": 1.9971893006043002, + "grad_norm": 0.5796627210007053, + "learning_rate": 2.5896324271912355e-11, + "loss": 0.2626, + "step": 42634 + }, + { + "epoch": 1.9972361455942287, + "grad_norm": 0.5793159046701026, + "learning_rate": 2.5040308315182892e-11, + "loss": 0.2655, + "step": 42635 + }, + { + "epoch": 1.9972829905841571, + "grad_norm": 0.5971411127115264, + "learning_rate": 2.4198679086095967e-11, + "loss": 0.271, + "step": 42636 + }, + { + "epoch": 1.9973298355740854, + "grad_norm": 0.5915042003518448, + "learning_rate": 2.3371436589370023e-11, + "loss": 0.2792, + "step": 42637 + }, + { + "epoch": 1.9973766805640136, + "grad_norm": 0.5631553435177137, + "learning_rate": 2.255858082972351e-11, + "loss": 0.2525, + "step": 42638 + }, + { + "epoch": 1.997423525553942, + "grad_norm": 0.5725835027914723, + "learning_rate": 2.1760111812152428e-11, + "loss": 0.2659, + "step": 42639 + }, + { + "epoch": 1.9974703705438703, + "grad_norm": 0.6036809599500517, + "learning_rate": 2.097602954082012e-11, + "loss": 0.2761, + "step": 42640 + }, + { + "epoch": 1.9975172155337986, + "grad_norm": 0.6383210046780988, + "learning_rate": 2.020633402044503e-11, + "loss": 0.2751, + "step": 42641 + }, + { + "epoch": 1.997564060523727, + "grad_norm": 0.5781290527938695, + "learning_rate": 1.945102525546805e-11, + "loss": 0.263, + "step": 42642 + }, + { + "epoch": 1.9976109055136553, + "grad_norm": 0.5869648830480293, + "learning_rate": 1.8710103250052512e-11, + "loss": 0.2593, + "step": 42643 + }, + { + "epoch": 1.9976577505035835, + "grad_norm": 0.618111246409027, + "learning_rate": 1.7983568008639317e-11, + "loss": 0.2461, + "step": 42644 + }, + { + "epoch": 1.997704595493512, + "grad_norm": 0.5940206779803724, + "learning_rate": 1.7271419535669354e-11, + "loss": 0.2861, + "step": 42645 + }, + { + "epoch": 1.9977514404834404, + "grad_norm": 0.5789650138214149, + "learning_rate": 1.6573657834750845e-11, + "loss": 0.2507, + "step": 42646 + }, + { + "epoch": 1.9977982854733685, + "grad_norm": 0.5980686298809759, + "learning_rate": 1.5890282910047128e-11, + "loss": 0.2728, + "step": 42647 + }, + { + "epoch": 1.997845130463297, + "grad_norm": 0.6145650345761163, + "learning_rate": 1.522129476572154e-11, + "loss": 0.2735, + "step": 42648 + }, + { + "epoch": 1.9978919754532254, + "grad_norm": 0.6139896702668912, + "learning_rate": 1.4566693405382305e-11, + "loss": 0.263, + "step": 42649 + }, + { + "epoch": 1.9979388204431536, + "grad_norm": 0.5850771907020262, + "learning_rate": 1.3926478832637647e-11, + "loss": 0.278, + "step": 42650 + }, + { + "epoch": 1.9979856654330819, + "grad_norm": 0.6129396743858738, + "learning_rate": 1.3300651051650904e-11, + "loss": 0.2688, + "step": 42651 + }, + { + "epoch": 1.9980325104230103, + "grad_norm": 0.627350113324727, + "learning_rate": 1.2689210065752743e-11, + "loss": 0.2759, + "step": 42652 + }, + { + "epoch": 1.9980793554129386, + "grad_norm": 0.5856337821388942, + "learning_rate": 1.2092155878551393e-11, + "loss": 0.2752, + "step": 42653 + }, + { + "epoch": 1.9981262004028668, + "grad_norm": 0.5717786455793525, + "learning_rate": 1.1509488493099963e-11, + "loss": 0.2656, + "step": 42654 + }, + { + "epoch": 1.9981730453927953, + "grad_norm": 0.6318984614462267, + "learning_rate": 1.0941207913284235e-11, + "loss": 0.2836, + "step": 42655 + }, + { + "epoch": 1.9982198903827235, + "grad_norm": 0.5484295720487737, + "learning_rate": 1.0387314142157323e-11, + "loss": 0.2535, + "step": 42656 + }, + { + "epoch": 1.9982667353726518, + "grad_norm": 0.578552907185465, + "learning_rate": 9.84780718277234e-12, + "loss": 0.26, + "step": 42657 + }, + { + "epoch": 1.9983135803625802, + "grad_norm": 0.5757510495852265, + "learning_rate": 9.32268703873751e-12, + "loss": 0.248, + "step": 42658 + }, + { + "epoch": 1.9983604253525087, + "grad_norm": 0.6032086867745604, + "learning_rate": 8.81195371227328e-12, + "loss": 0.2607, + "step": 42659 + }, + { + "epoch": 1.998407270342437, + "grad_norm": 0.5374791204630749, + "learning_rate": 8.315607206987874e-12, + "loss": 0.2532, + "step": 42660 + }, + { + "epoch": 1.9984541153323652, + "grad_norm": 0.567934339087134, + "learning_rate": 7.833647525379295e-12, + "loss": 0.2629, + "step": 42661 + }, + { + "epoch": 1.9985009603222936, + "grad_norm": 0.6065988796245896, + "learning_rate": 7.366074670223101e-12, + "loss": 0.2672, + "step": 42662 + }, + { + "epoch": 1.9985478053122219, + "grad_norm": 0.615421531407821, + "learning_rate": 6.9128886445724016e-12, + "loss": 0.277, + "step": 42663 + }, + { + "epoch": 1.99859465030215, + "grad_norm": 0.5847810721281181, + "learning_rate": 6.474089450647647e-12, + "loss": 0.2805, + "step": 42664 + }, + { + "epoch": 1.9986414952920786, + "grad_norm": 0.594796393903766, + "learning_rate": 6.049677091224393e-12, + "loss": 0.2653, + "step": 42665 + }, + { + "epoch": 1.9986883402820068, + "grad_norm": 0.6163324229000838, + "learning_rate": 5.6396515685230855e-12, + "loss": 0.272, + "step": 42666 + }, + { + "epoch": 1.998735185271935, + "grad_norm": 0.6414868428786016, + "learning_rate": 5.244012885041727e-12, + "loss": 0.2881, + "step": 42667 + }, + { + "epoch": 1.9987820302618635, + "grad_norm": 0.5811714064166205, + "learning_rate": 4.8627610430007636e-12, + "loss": 0.2757, + "step": 42668 + }, + { + "epoch": 1.998828875251792, + "grad_norm": 0.6191586626402131, + "learning_rate": 4.4958960448981956e-12, + "loss": 0.259, + "step": 42669 + }, + { + "epoch": 1.99887572024172, + "grad_norm": 0.6209272081281508, + "learning_rate": 4.14341789239936e-12, + "loss": 0.272, + "step": 42670 + }, + { + "epoch": 1.9989225652316485, + "grad_norm": 0.6265855775093583, + "learning_rate": 3.805326587447144e-12, + "loss": 0.2725, + "step": 42671 + }, + { + "epoch": 1.998969410221577, + "grad_norm": 0.5874970562900451, + "learning_rate": 3.481622132539553e-12, + "loss": 0.2716, + "step": 42672 + }, + { + "epoch": 1.9990162552115052, + "grad_norm": 0.561853667621646, + "learning_rate": 3.1723045290643628e-12, + "loss": 0.243, + "step": 42673 + }, + { + "epoch": 1.9990631002014334, + "grad_norm": 0.6337232949863664, + "learning_rate": 2.877373779242021e-12, + "loss": 0.2665, + "step": 42674 + }, + { + "epoch": 1.9991099451913619, + "grad_norm": 0.5994641311615381, + "learning_rate": 2.59682988418275e-12, + "loss": 0.2618, + "step": 42675 + }, + { + "epoch": 1.99915679018129, + "grad_norm": 0.6569828009120615, + "learning_rate": 2.3306728461069963e-12, + "loss": 0.2904, + "step": 42676 + }, + { + "epoch": 1.9992036351712184, + "grad_norm": 0.6199397848464381, + "learning_rate": 2.0789026661249824e-12, + "loss": 0.2779, + "step": 42677 + }, + { + "epoch": 1.9992504801611468, + "grad_norm": 0.6304231140737662, + "learning_rate": 1.841519345902043e-12, + "loss": 0.2624, + "step": 42678 + }, + { + "epoch": 1.999297325151075, + "grad_norm": 0.5771093114797315, + "learning_rate": 1.6185228865484015e-12, + "loss": 0.2552, + "step": 42679 + }, + { + "epoch": 1.9993441701410033, + "grad_norm": 0.6080988071677794, + "learning_rate": 1.4099132897293922e-12, + "loss": 0.2816, + "step": 42680 + }, + { + "epoch": 1.9993910151309318, + "grad_norm": 0.6150521399364919, + "learning_rate": 1.2156905562776822e-12, + "loss": 0.2664, + "step": 42681 + }, + { + "epoch": 1.9994378601208602, + "grad_norm": 0.5960918410952083, + "learning_rate": 1.0358546875810505e-12, + "loss": 0.2771, + "step": 42682 + }, + { + "epoch": 1.9994847051107882, + "grad_norm": 0.6161401839411397, + "learning_rate": 8.704056847497199e-13, + "loss": 0.2722, + "step": 42683 + }, + { + "epoch": 1.9995315501007167, + "grad_norm": 0.5909515487517819, + "learning_rate": 7.193435483388023e-13, + "loss": 0.276, + "step": 42684 + }, + { + "epoch": 1.9995783950906452, + "grad_norm": 0.628595368427143, + "learning_rate": 5.826682794585204e-13, + "loss": 0.2786, + "step": 42685 + }, + { + "epoch": 1.9996252400805734, + "grad_norm": 0.6292391668209067, + "learning_rate": 4.603798789415415e-13, + "loss": 0.2605, + "step": 42686 + }, + { + "epoch": 1.9996720850705016, + "grad_norm": 0.5759611095544911, + "learning_rate": 3.5247834734297716e-13, + "loss": 0.2657, + "step": 42687 + }, + { + "epoch": 1.99971893006043, + "grad_norm": 0.596295249926667, + "learning_rate": 2.589636852179389e-13, + "loss": 0.2686, + "step": 42688 + }, + { + "epoch": 1.9997657750503584, + "grad_norm": 0.5670874675160223, + "learning_rate": 1.7983589339909402e-13, + "loss": 0.2654, + "step": 42689 + }, + { + "epoch": 1.9998126200402866, + "grad_norm": 0.5898473214908776, + "learning_rate": 1.1509497244155398e-13, + "loss": 0.272, + "step": 42690 + }, + { + "epoch": 1.999859465030215, + "grad_norm": 0.5717304450441908, + "learning_rate": 6.474092206776306e-14, + "loss": 0.2683, + "step": 42691 + }, + { + "epoch": 1.9999063100201433, + "grad_norm": 0.5854867450162711, + "learning_rate": 2.877374338794425e-14, + "loss": 0.2654, + "step": 42692 + }, + { + "epoch": 1.9999531550100715, + "grad_norm": 0.559714501038162, + "learning_rate": 7.193435846986063e-15, + "loss": 0.2514, + "step": 42693 + }, + { + "epoch": 2.0, + "grad_norm": 0.5678060447658159, + "learning_rate": 0.0, + "loss": 0.2521, + "step": 42694 + }, + { + "epoch": 2.0, + "step": 42694, + "total_flos": 1.18789275810005e+17, + "train_loss": 0.04862895955609489, + "train_runtime": 106793.1001, + "train_samples_per_second": 204.685, + "train_steps_per_second": 0.4 + } + ], + "logging_steps": 1.0, + "max_steps": 42694, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 1.18789275810005e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}